Bug Summary

File:llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Warning:line 1926, column 15
Value stored to 'I' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name PPCISelDAGToDAG.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/PowerPC -I /build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/llvm/lib/Target/PowerPC -I include -I /build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-26-233846-219801-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pattern matching instruction selector for PowerPC,
10// converting from a legalized dag to a PPC dag.
11//
12//===----------------------------------------------------------------------===//
13
14#include "MCTargetDesc/PPCMCTargetDesc.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCISelLowering.h"
18#include "PPCMachineFunctionInfo.h"
19#include "PPCSubtarget.h"
20#include "PPCTargetMachine.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SmallPtrSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/BranchProbabilityInfo.h"
28#include "llvm/CodeGen/FunctionLoweringInfo.h"
29#include "llvm/CodeGen/ISDOpcodes.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/SelectionDAG.h"
35#include "llvm/CodeGen/SelectionDAGISel.h"
36#include "llvm/CodeGen/SelectionDAGNodes.h"
37#include "llvm/CodeGen/TargetInstrInfo.h"
38#include "llvm/CodeGen/TargetRegisterInfo.h"
39#include "llvm/CodeGen/ValueTypes.h"
40#include "llvm/IR/BasicBlock.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/Function.h"
43#include "llvm/IR/GlobalValue.h"
44#include "llvm/IR/InlineAsm.h"
45#include "llvm/IR/InstrTypes.h"
46#include "llvm/IR/IntrinsicsPowerPC.h"
47#include "llvm/IR/Module.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CodeGen.h"
50#include "llvm/Support/CommandLine.h"
51#include "llvm/Support/Compiler.h"
52#include "llvm/Support/Debug.h"
53#include "llvm/Support/ErrorHandling.h"
54#include "llvm/Support/KnownBits.h"
55#include "llvm/Support/MachineValueType.h"
56#include "llvm/Support/MathExtras.h"
57#include "llvm/Support/raw_ostream.h"
58#include <algorithm>
59#include <cassert>
60#include <cstdint>
61#include <iterator>
62#include <limits>
63#include <memory>
64#include <new>
65#include <tuple>
66#include <utility>
67
68using namespace llvm;
69
70#define DEBUG_TYPE"ppc-codegen" "ppc-codegen"
71
72STATISTIC(NumSextSetcc,static llvm::Statistic NumSextSetcc = {"ppc-codegen", "NumSextSetcc"
, "Number of (sext(setcc)) nodes expanded into GPR sequence."
}
73 "Number of (sext(setcc)) nodes expanded into GPR sequence.")static llvm::Statistic NumSextSetcc = {"ppc-codegen", "NumSextSetcc"
, "Number of (sext(setcc)) nodes expanded into GPR sequence."
}
;
74STATISTIC(NumZextSetcc,static llvm::Statistic NumZextSetcc = {"ppc-codegen", "NumZextSetcc"
, "Number of (zext(setcc)) nodes expanded into GPR sequence."
}
75 "Number of (zext(setcc)) nodes expanded into GPR sequence.")static llvm::Statistic NumZextSetcc = {"ppc-codegen", "NumZextSetcc"
, "Number of (zext(setcc)) nodes expanded into GPR sequence."
}
;
76STATISTIC(SignExtensionsAdded,static llvm::Statistic SignExtensionsAdded = {"ppc-codegen", "SignExtensionsAdded"
, "Number of sign extensions for compare inputs added."}
77 "Number of sign extensions for compare inputs added.")static llvm::Statistic SignExtensionsAdded = {"ppc-codegen", "SignExtensionsAdded"
, "Number of sign extensions for compare inputs added."}
;
78STATISTIC(ZeroExtensionsAdded,static llvm::Statistic ZeroExtensionsAdded = {"ppc-codegen", "ZeroExtensionsAdded"
, "Number of zero extensions for compare inputs added."}
79 "Number of zero extensions for compare inputs added.")static llvm::Statistic ZeroExtensionsAdded = {"ppc-codegen", "ZeroExtensionsAdded"
, "Number of zero extensions for compare inputs added."}
;
80STATISTIC(NumLogicOpsOnComparison,static llvm::Statistic NumLogicOpsOnComparison = {"ppc-codegen"
, "NumLogicOpsOnComparison", "Number of logical ops on i1 values calculated in GPR."
}
81 "Number of logical ops on i1 values calculated in GPR.")static llvm::Statistic NumLogicOpsOnComparison = {"ppc-codegen"
, "NumLogicOpsOnComparison", "Number of logical ops on i1 values calculated in GPR."
}
;
82STATISTIC(OmittedForNonExtendUses,static llvm::Statistic OmittedForNonExtendUses = {"ppc-codegen"
, "OmittedForNonExtendUses", "Number of compares not eliminated as they have non-extending uses."
}
83 "Number of compares not eliminated as they have non-extending uses.")static llvm::Statistic OmittedForNonExtendUses = {"ppc-codegen"
, "OmittedForNonExtendUses", "Number of compares not eliminated as they have non-extending uses."
}
;
84STATISTIC(NumP9Setb,static llvm::Statistic NumP9Setb = {"ppc-codegen", "NumP9Setb"
, "Number of compares lowered to setb."}
85 "Number of compares lowered to setb.")static llvm::Statistic NumP9Setb = {"ppc-codegen", "NumP9Setb"
, "Number of compares lowered to setb."}
;
86
87// FIXME: Remove this once the bug has been fixed!
88cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
89cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
90
91static cl::opt<bool>
92 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
93 cl::desc("use aggressive ppc isel for bit permutations"),
94 cl::Hidden);
95static cl::opt<bool> BPermRewriterNoMasking(
96 "ppc-bit-perm-rewriter-stress-rotates",
97 cl::desc("stress rotate selection in aggressive ppc isel for "
98 "bit permutations"),
99 cl::Hidden);
100
101static cl::opt<bool> EnableBranchHint(
102 "ppc-use-branch-hint", cl::init(true),
103 cl::desc("Enable static hinting of branches on ppc"),
104 cl::Hidden);
105
106static cl::opt<bool> EnableTLSOpt(
107 "ppc-tls-opt", cl::init(true),
108 cl::desc("Enable tls optimization peephole"),
109 cl::Hidden);
110
111enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
112 ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
113 ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
114
115static cl::opt<ICmpInGPRType> CmpInGPR(
116 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
117 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
118 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons.")llvm::cl::OptionEnumValue { "none", int(ICGPR_None), "Do not modify integer comparisons."
}
,
119 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs.")llvm::cl::OptionEnumValue { "all", int(ICGPR_All), "All possible int comparisons in GPRs."
}
,
120 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs.")llvm::cl::OptionEnumValue { "i32", int(ICGPR_I32), "Only i32 comparisons in GPRs."
}
,
121 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs.")llvm::cl::OptionEnumValue { "i64", int(ICGPR_I64), "Only i64 comparisons in GPRs."
}
,
122 clEnumValN(ICGPR_NonExtIn, "nonextin",llvm::cl::OptionEnumValue { "nonextin", int(ICGPR_NonExtIn), "Only comparisons where inputs don't need [sz]ext."
}
123 "Only comparisons where inputs don't need [sz]ext.")llvm::cl::OptionEnumValue { "nonextin", int(ICGPR_NonExtIn), "Only comparisons where inputs don't need [sz]ext."
}
,
124 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result.")llvm::cl::OptionEnumValue { "zext", int(ICGPR_Zext), "Only comparisons with zext result."
}
,
125 clEnumValN(ICGPR_ZextI32, "zexti32",llvm::cl::OptionEnumValue { "zexti32", int(ICGPR_ZextI32), "Only i32 comparisons with zext result."
}
126 "Only i32 comparisons with zext result.")llvm::cl::OptionEnumValue { "zexti32", int(ICGPR_ZextI32), "Only i32 comparisons with zext result."
}
,
127 clEnumValN(ICGPR_ZextI64, "zexti64",llvm::cl::OptionEnumValue { "zexti64", int(ICGPR_ZextI64), "Only i64 comparisons with zext result."
}
128 "Only i64 comparisons with zext result.")llvm::cl::OptionEnumValue { "zexti64", int(ICGPR_ZextI64), "Only i64 comparisons with zext result."
}
,
129 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result.")llvm::cl::OptionEnumValue { "sext", int(ICGPR_Sext), "Only comparisons with sext result."
}
,
130 clEnumValN(ICGPR_SextI32, "sexti32",llvm::cl::OptionEnumValue { "sexti32", int(ICGPR_SextI32), "Only i32 comparisons with sext result."
}
131 "Only i32 comparisons with sext result.")llvm::cl::OptionEnumValue { "sexti32", int(ICGPR_SextI32), "Only i32 comparisons with sext result."
}
,
132 clEnumValN(ICGPR_SextI64, "sexti64",llvm::cl::OptionEnumValue { "sexti64", int(ICGPR_SextI64), "Only i64 comparisons with sext result."
}
133 "Only i64 comparisons with sext result.")llvm::cl::OptionEnumValue { "sexti64", int(ICGPR_SextI64), "Only i64 comparisons with sext result."
}
));
134namespace {
135
136 //===--------------------------------------------------------------------===//
137 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
138 /// instructions for SelectionDAG operations.
139 ///
140 class PPCDAGToDAGISel : public SelectionDAGISel {
141 const PPCTargetMachine &TM;
142 const PPCSubtarget *Subtarget = nullptr;
143 const PPCTargetLowering *PPCLowering = nullptr;
144 unsigned GlobalBaseReg = 0;
145
146 public:
147 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
148 : SelectionDAGISel(tm, OptLevel), TM(tm) {}
149
150 bool runOnMachineFunction(MachineFunction &MF) override {
151 // Make sure we re-emit a set of the global base reg if necessary
152 GlobalBaseReg = 0;
153 Subtarget = &MF.getSubtarget<PPCSubtarget>();
154 PPCLowering = Subtarget->getTargetLowering();
155 if (Subtarget->hasROPProtect()) {
156 // Create a place on the stack for the ROP Protection Hash.
157 // The ROP Protection Hash will always be 8 bytes and aligned to 8
158 // bytes.
159 MachineFrameInfo &MFI = MF.getFrameInfo();
160 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
161 const int Result = MFI.CreateStackObject(8, Align(8), false);
162 FI->setROPProtectionHashSaveIndex(Result);
163 }
164 SelectionDAGISel::runOnMachineFunction(MF);
165
166 return true;
167 }
168
169 void PreprocessISelDAG() override;
170 void PostprocessISelDAG() override;
171
172 /// getI16Imm - Return a target constant with the specified value, of type
173 /// i16.
174 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
175 return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
176 }
177
178 /// getI32Imm - Return a target constant with the specified value, of type
179 /// i32.
180 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
181 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
182 }
183
184 /// getI64Imm - Return a target constant with the specified value, of type
185 /// i64.
186 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
187 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
188 }
189
190 /// getSmallIPtrImm - Return a target constant of pointer type.
191 inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
192 return CurDAG->getTargetConstant(
193 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
194 }
195
196 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
197 /// rotate and mask opcode and mask operation.
198 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
199 unsigned &SH, unsigned &MB, unsigned &ME);
200
201 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
202 /// base register. Return the virtual register that holds this value.
203 SDNode *getGlobalBaseReg();
204
205 void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
206
207 // Select - Convert the specified operand from a target-independent to a
208 // target-specific node if it hasn't already been changed.
209 void Select(SDNode *N) override;
210
211 bool tryBitfieldInsert(SDNode *N);
212 bool tryBitPermutation(SDNode *N);
213 bool tryIntCompareInGPR(SDNode *N);
214
215 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
216 // an X-Form load instruction with the offset being a relocation coming from
217 // the PPCISD::ADD_TLS.
218 bool tryTLSXFormLoad(LoadSDNode *N);
219 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
220 // an X-Form store instruction with the offset being a relocation coming from
221 // the PPCISD::ADD_TLS.
222 bool tryTLSXFormStore(StoreSDNode *N);
223 /// SelectCC - Select a comparison of the specified values with the
224 /// specified condition code, returning the CR# of the expression.
225 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
226 const SDLoc &dl, SDValue Chain = SDValue());
227
228 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
229 /// immediate field. Note that the operand at this point is already the
230 /// result of a prior SelectAddressRegImm call.
231 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
232 if (N.getOpcode() == ISD::TargetConstant ||
233 N.getOpcode() == ISD::TargetGlobalAddress) {
234 Out = N;
235 return true;
236 }
237
238 return false;
239 }
240
241 /// SelectDSForm - Returns true if address N can be represented by the
242 /// addressing mode of DSForm instructions (a base register, plus a signed
243 /// 16-bit displacement that is a multiple of 4.
244 bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
245 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
246 Align(4)) == PPC::AM_DSForm;
247 }
248
249 /// SelectDQForm - Returns true if address N can be represented by the
250 /// addressing mode of DQForm instructions (a base register, plus a signed
251 /// 16-bit displacement that is a multiple of 16.
252 bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
253 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
254 Align(16)) == PPC::AM_DQForm;
255 }
256
257 /// SelectDForm - Returns true if address N can be represented by
258 /// the addressing mode of DForm instructions (a base register, plus a
259 /// signed 16-bit immediate.
260 bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
261 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
262 None) == PPC::AM_DForm;
263 }
264
265 /// SelectPCRelForm - Returns true if address N can be represented by
266 /// PC-Relative addressing mode.
267 bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
268 SDValue &Base) {
269 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
270 None) == PPC::AM_PCRel;
271 }
272
273 /// SelectPDForm - Returns true if address N can be represented by Prefixed
274 /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
275 bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
276 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
277 None) == PPC::AM_PrefixDForm;
278 }
279
280 /// SelectXForm - Returns true if address N can be represented by the
281 /// addressing mode of XForm instructions (an indexed [r+r] operation).
282 bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
283 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
284 None) == PPC::AM_XForm;
285 }
286
287 /// SelectForceXForm - Given the specified address, force it to be
288 /// represented as an indexed [r+r] operation (an XForm instruction).
289 bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
290 SDValue &Base) {
291 return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
292 PPC::AM_XForm;
293 }
294
295 /// SelectAddrIdx - Given the specified address, check to see if it can be
296 /// represented as an indexed [r+r] operation.
297 /// This is for xform instructions whose associated displacement form is D.
298 /// The last parameter \p 0 means associated D form has no requirment for 16
299 /// bit signed displacement.
300 /// Returns false if it can be represented by [r+imm], which are preferred.
301 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
302 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None);
303 }
304
305 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
306 /// represented as an indexed [r+r] operation.
307 /// This is for xform instructions whose associated displacement form is DS.
308 /// The last parameter \p 4 means associated DS form 16 bit signed
309 /// displacement must be a multiple of 4.
310 /// Returns false if it can be represented by [r+imm], which are preferred.
311 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
312 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
313 Align(4));
314 }
315
316 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
317 /// represented as an indexed [r+r] operation.
318 /// This is for xform instructions whose associated displacement form is DQ.
319 /// The last parameter \p 16 means associated DQ form 16 bit signed
320 /// displacement must be a multiple of 16.
321 /// Returns false if it can be represented by [r+imm], which are preferred.
322 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
323 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
324 Align(16));
325 }
326
327 /// SelectAddrIdxOnly - Given the specified address, force it to be
328 /// represented as an indexed [r+r] operation.
329 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
330 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
331 }
332
333 /// SelectAddrImm - Returns true if the address N can be represented by
334 /// a base register plus a signed 16-bit displacement [r+imm].
335 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
336 /// displacement.
337 bool SelectAddrImm(SDValue N, SDValue &Disp,
338 SDValue &Base) {
339 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None);
340 }
341
342 /// SelectAddrImmX4 - Returns true if the address N can be represented by
343 /// a base register plus a signed 16-bit displacement that is a multiple of
344 /// 4 (last parameter). Suitable for use by STD and friends.
345 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
346 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
347 }
348
349 /// SelectAddrImmX16 - Returns true if the address N can be represented by
350 /// a base register plus a signed 16-bit displacement that is a multiple of
351 /// 16(last parameter). Suitable for use by STXV and friends.
352 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
353 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
354 Align(16));
355 }
356
357 /// SelectAddrImmX34 - Returns true if the address N can be represented by
358 /// a base register plus a signed 34-bit displacement. Suitable for use by
359 /// PSTXVP and friends.
360 bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
361 return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
362 }
363
364 // Select an address into a single register.
365 bool SelectAddr(SDValue N, SDValue &Base) {
366 Base = N;
367 return true;
368 }
369
370 bool SelectAddrPCRel(SDValue N, SDValue &Base) {
371 return PPCLowering->SelectAddressPCRel(N, Base);
372 }
373
374 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
375 /// inline asm expressions. It is always correct to compute the value into
376 /// a register. The case of adding a (possibly relocatable) constant to a
377 /// register can be improved, but it is wrong to substitute Reg+Reg for
378 /// Reg in an asm, because the load or store opcode would have to change.
379 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
380 unsigned ConstraintID,
381 std::vector<SDValue> &OutOps) override {
382 switch(ConstraintID) {
383 default:
384 errs() << "ConstraintID: " << ConstraintID << "\n";
385 llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 385)
;
386 case InlineAsm::Constraint_es:
387 case InlineAsm::Constraint_m:
388 case InlineAsm::Constraint_o:
389 case InlineAsm::Constraint_Q:
390 case InlineAsm::Constraint_Z:
391 case InlineAsm::Constraint_Zy:
392 // We need to make sure that this one operand does not end up in r0
393 // (because we might end up lowering this as 0(%op)).
394 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
395 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
396 SDLoc dl(Op);
397 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
398 SDValue NewOp =
399 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
400 dl, Op.getValueType(),
401 Op, RC), 0);
402
403 OutOps.push_back(NewOp);
404 return false;
405 }
406 return true;
407 }
408
409 StringRef getPassName() const override {
410 return "PowerPC DAG->DAG Pattern Instruction Selection";
411 }
412
413// Include the pieces autogenerated from the target description.
414#include "PPCGenDAGISel.inc"
415
416private:
417 bool trySETCC(SDNode *N);
418 bool tryFoldSWTestBRCC(SDNode *N);
419 bool tryAsSingleRLDICL(SDNode *N);
420 bool tryAsSingleRLDICR(SDNode *N);
421 bool tryAsSingleRLWINM(SDNode *N);
422 bool tryAsSingleRLWINM8(SDNode *N);
423 bool tryAsSingleRLWIMI(SDNode *N);
424 bool tryAsPairOfRLDICL(SDNode *N);
425 bool tryAsSingleRLDIMI(SDNode *N);
426
427 void PeepholePPC64();
428 void PeepholePPC64ZExt();
429 void PeepholeCROps();
430
431 SDValue combineToCMPB(SDNode *N);
432 void foldBoolExts(SDValue &Res, SDNode *&N);
433
434 bool AllUsersSelectZero(SDNode *N);
435 void SwapAllSelectUsers(SDNode *N);
436
437 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
438 void transferMemOperands(SDNode *N, SDNode *Result);
439 };
440
441} // end anonymous namespace
442
443/// getGlobalBaseReg - Output the instructions required to put the
444/// base address to use for accessing globals into a register.
445///
446SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
447 if (!GlobalBaseReg) {
448 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
449 // Insert the set of GlobalBaseReg into the first MBB of the function
450 MachineBasicBlock &FirstMBB = MF->front();
451 MachineBasicBlock::iterator MBBI = FirstMBB.begin();
452 const Module *M = MF->getFunction().getParent();
453 DebugLoc dl;
454
455 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
456 if (Subtarget->isTargetELF()) {
457 GlobalBaseReg = PPC::R30;
458 if (!Subtarget->isSecurePlt() &&
459 M->getPICLevel() == PICLevel::SmallPIC) {
460 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
461 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
462 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
463 } else {
464 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
465 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
466 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
467 BuildMI(FirstMBB, MBBI, dl,
468 TII.get(PPC::UpdateGBR), GlobalBaseReg)
469 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
470 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
471 }
472 } else {
473 GlobalBaseReg =
474 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
475 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
476 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
477 }
478 } else {
479 // We must ensure that this sequence is dominated by the prologue.
480 // FIXME: This is a bit of a big hammer since we don't get the benefits
481 // of shrink-wrapping whenever we emit this instruction. Considering
482 // this is used in any function where we emit a jump table, this may be
483 // a significant limitation. We should consider inserting this in the
484 // block where it is used and then commoning this sequence up if it
485 // appears in multiple places.
486 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
487 // MovePCtoLR8.
488 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
489 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
490 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
491 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
492 }
493 }
494 return CurDAG->getRegister(GlobalBaseReg,
495 PPCLowering->getPointerTy(CurDAG->getDataLayout()))
496 .getNode();
497}
498
499// Check if a SDValue has the toc-data attribute.
500static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
501 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
502 if (!GA)
503 return false;
504
505 const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
506 if (!GV)
507 return false;
508
509 if (!GV->hasAttribute("toc-data"))
510 return false;
511
512 // TODO: These asserts should be updated as more support for the toc data
513 // transformation is added (struct support, etc.).
514
515 assert((static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter than TOC entry "
"size not supported by the toc data transformation.") ? void
(0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter than TOC entry \" \"size not supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 518, __extension__
__PRETTY_FUNCTION__))
516 PointerSize >= GV->getAlign().valueOrOne().value() &&(static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter than TOC entry "
"size not supported by the toc data transformation.") ? void
(0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter than TOC entry \" \"size not supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 518, __extension__
__PRETTY_FUNCTION__))
517 "GlobalVariables with an alignment requirement stricter than TOC entry "(static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter than TOC entry "
"size not supported by the toc data transformation.") ? void
(0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter than TOC entry \" \"size not supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 518, __extension__
__PRETTY_FUNCTION__))
518 "size not supported by the toc data transformation.")(static_cast <bool> (PointerSize >= GV->getAlign(
).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter than TOC entry "
"size not supported by the toc data transformation.") ? void
(0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter than TOC entry \" \"size not supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 518, __extension__
__PRETTY_FUNCTION__))
;
519
520 Type *GVType = GV->getValueType();
521
522 assert(GVType->isSized() && "A GlobalVariable's size must be known to be "(static_cast <bool> (GVType->isSized() && "A GlobalVariable's size must be known to be "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->isSized() && \"A GlobalVariable's size must be known to be \" \"supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 523, __extension__
__PRETTY_FUNCTION__))
523 "supported by the toc data transformation.")(static_cast <bool> (GVType->isSized() && "A GlobalVariable's size must be known to be "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->isSized() && \"A GlobalVariable's size must be known to be \" \"supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 523, __extension__
__PRETTY_FUNCTION__))
;
524
525 if (GVType->isVectorTy())
526 report_fatal_error("A GlobalVariable of Vector type is not currently "
527 "supported by the toc data transformation.");
528
529 if (GVType->isArrayTy())
530 report_fatal_error("A GlobalVariable of Array type is not currently "
531 "supported by the toc data transformation.");
532
533 if (GVType->isStructTy())
534 report_fatal_error("A GlobalVariable of Struct type is not currently "
535 "supported by the toc data transformation.");
536
537 assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&(static_cast <bool> (GVType->getPrimitiveSizeInBits(
) <= PointerSize * 8 && "A GlobalVariable with size larger than a TOC entry is not currently "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than a TOC entry is not currently \" \"supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 539, __extension__
__PRETTY_FUNCTION__))
538 "A GlobalVariable with size larger than a TOC entry is not currently "(static_cast <bool> (GVType->getPrimitiveSizeInBits(
) <= PointerSize * 8 && "A GlobalVariable with size larger than a TOC entry is not currently "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than a TOC entry is not currently \" \"supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 539, __extension__
__PRETTY_FUNCTION__))
539 "supported by the toc data transformation.")(static_cast <bool> (GVType->getPrimitiveSizeInBits(
) <= PointerSize * 8 && "A GlobalVariable with size larger than a TOC entry is not currently "
"supported by the toc data transformation.") ? void (0) : __assert_fail
("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than a TOC entry is not currently \" \"supported by the toc data transformation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 539, __extension__
__PRETTY_FUNCTION__))
;
540
541 if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())
542 report_fatal_error("A GlobalVariable with private or local linkage is not "
543 "currently supported by the toc data transformation.");
544
545 assert(!GV->hasCommonLinkage() &&(static_cast <bool> (!GV->hasCommonLinkage() &&
"Tentative definitions cannot have the mapping class XMC_TD."
) ? void (0) : __assert_fail ("!GV->hasCommonLinkage() && \"Tentative definitions cannot have the mapping class XMC_TD.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 546, __extension__
__PRETTY_FUNCTION__))
546 "Tentative definitions cannot have the mapping class XMC_TD.")(static_cast <bool> (!GV->hasCommonLinkage() &&
"Tentative definitions cannot have the mapping class XMC_TD."
) ? void (0) : __assert_fail ("!GV->hasCommonLinkage() && \"Tentative definitions cannot have the mapping class XMC_TD.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 546, __extension__
__PRETTY_FUNCTION__))
;
547
548 return true;
549}
550
551/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
552/// operand. If so Imm will receive the 32-bit value.
553static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
554 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
555 Imm = cast<ConstantSDNode>(N)->getZExtValue();
556 return true;
557 }
558 return false;
559}
560
561/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
562/// operand. If so Imm will receive the 64-bit value.
563static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
564 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
565 Imm = cast<ConstantSDNode>(N)->getZExtValue();
566 return true;
567 }
568 return false;
569}
570
571// isInt32Immediate - This method tests to see if a constant operand.
572// If so Imm will receive the 32 bit value.
573static bool isInt32Immediate(SDValue N, unsigned &Imm) {
574 return isInt32Immediate(N.getNode(), Imm);
575}
576
577/// isInt64Immediate - This method tests to see if the value is a 64-bit
578/// constant operand. If so Imm will receive the 64-bit value.
579static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
580 return isInt64Immediate(N.getNode(), Imm);
581}
582
583static unsigned getBranchHint(unsigned PCC,
584 const FunctionLoweringInfo &FuncInfo,
585 const SDValue &DestMBB) {
586 assert(isa<BasicBlockSDNode>(DestMBB))(static_cast <bool> (isa<BasicBlockSDNode>(DestMBB
)) ? void (0) : __assert_fail ("isa<BasicBlockSDNode>(DestMBB)"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 586, __extension__
__PRETTY_FUNCTION__))
;
587
588 if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
589
590 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
591 const Instruction *BBTerm = BB->getTerminator();
592
593 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
594
595 const BasicBlock *TBB = BBTerm->getSuccessor(0);
596 const BasicBlock *FBB = BBTerm->getSuccessor(1);
597
598 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
599 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
600
601 // We only want to handle cases which are easy to predict at static time, e.g.
602 // C++ throw statement, that is very likely not taken, or calling never
603 // returned function, e.g. stdlib exit(). So we set Threshold to filter
604 // unwanted cases.
605 //
606 // Below is LLVM branch weight table, we only want to handle case 1, 2
607 //
608 // Case Taken:Nontaken Example
609 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
610 // 2. Invoke-terminating 1:1048575
611 // 3. Coldblock 4:64 __builtin_expect
612 // 4. Loop Branch 124:4 For loop
613 // 5. PH/ZH/FPH 20:12
614 const uint32_t Threshold = 10000;
615
616 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
617 return PPC::BR_NO_HINT;
618
619 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false)
620 << "::" << BB->getName() << "'\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false)
621 << " -> " << TBB->getName() << ": " << TProb << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false)
622 << " -> " << FBB->getName() << ": " << FProb << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Use branch hint for '" <<
FuncInfo.Fn->getName() << "::" << BB->getName
() << "'\n" << " -> " << TBB->getName
() << ": " << TProb << "\n" << " -> "
<< FBB->getName() << ": " << FProb <<
"\n"; } } while (false)
;
623
624 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
625
626 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
627 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
628 if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
629 std::swap(TProb, FProb);
630
631 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
632}
633
634// isOpcWithIntImmediate - This method tests to see if the node is a specific
635// opcode and that it has a immediate integer right operand.
636// If so Imm will receive the 32 bit value.
637static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
638 return N->getOpcode() == Opc
639 && isInt32Immediate(N->getOperand(1).getNode(), Imm);
640}
641
642void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
643 SDLoc dl(SN);
644 int FI = cast<FrameIndexSDNode>(N)->getIndex();
645 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
646 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
647 if (SN->hasOneUse())
648 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
649 getSmallIPtrImm(Offset, dl));
650 else
651 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
652 getSmallIPtrImm(Offset, dl)));
653}
654
655bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
656 bool isShiftMask, unsigned &SH,
657 unsigned &MB, unsigned &ME) {
658 // Don't even go down this path for i64, since different logic will be
659 // necessary for rldicl/rldicr/rldimi.
660 if (N->getValueType(0) != MVT::i32)
661 return false;
662
663 unsigned Shift = 32;
664 unsigned Indeterminant = ~0; // bit mask marking indeterminant results
665 unsigned Opcode = N->getOpcode();
666 if (N->getNumOperands() != 2 ||
667 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
668 return false;
669
670 if (Opcode == ISD::SHL) {
671 // apply shift left to mask if it comes first
672 if (isShiftMask) Mask = Mask << Shift;
673 // determine which bits are made indeterminant by shift
674 Indeterminant = ~(0xFFFFFFFFu << Shift);
675 } else if (Opcode == ISD::SRL) {
676 // apply shift right to mask if it comes first
677 if (isShiftMask) Mask = Mask >> Shift;
678 // determine which bits are made indeterminant by shift
679 Indeterminant = ~(0xFFFFFFFFu >> Shift);
680 // adjust for the left rotate
681 Shift = 32 - Shift;
682 } else if (Opcode == ISD::ROTL) {
683 Indeterminant = 0;
684 } else {
685 return false;
686 }
687
688 // if the mask doesn't intersect any Indeterminant bits
689 if (Mask && !(Mask & Indeterminant)) {
690 SH = Shift & 31;
691 // make sure the mask is still a mask (wrap arounds may not be)
692 return isRunOfOnes(Mask, MB, ME);
693 }
694 return false;
695}
696
697bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
698 SDValue Base = ST->getBasePtr();
699 if (Base.getOpcode() != PPCISD::ADD_TLS)
700 return false;
701 SDValue Offset = ST->getOffset();
702 if (!Offset.isUndef())
703 return false;
704 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
705 return false;
706
707 SDLoc dl(ST);
708 EVT MemVT = ST->getMemoryVT();
709 EVT RegVT = ST->getValue().getValueType();
710
711 unsigned Opcode;
712 switch (MemVT.getSimpleVT().SimpleTy) {
713 default:
714 return false;
715 case MVT::i8: {
716 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
717 break;
718 }
719 case MVT::i16: {
720 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
721 break;
722 }
723 case MVT::i32: {
724 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
725 break;
726 }
727 case MVT::i64: {
728 Opcode = PPC::STDXTLS;
729 break;
730 }
731 }
732 SDValue Chain = ST->getChain();
733 SDVTList VTs = ST->getVTList();
734 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
735 Chain};
736 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
737 transferMemOperands(ST, MN);
738 ReplaceNode(ST, MN);
739 return true;
740}
741
742bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
743 SDValue Base = LD->getBasePtr();
744 if (Base.getOpcode() != PPCISD::ADD_TLS)
745 return false;
746 SDValue Offset = LD->getOffset();
747 if (!Offset.isUndef())
748 return false;
749 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
750 return false;
751
752 SDLoc dl(LD);
753 EVT MemVT = LD->getMemoryVT();
754 EVT RegVT = LD->getValueType(0);
755 unsigned Opcode;
756 switch (MemVT.getSimpleVT().SimpleTy) {
757 default:
758 return false;
759 case MVT::i8: {
760 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
761 break;
762 }
763 case MVT::i16: {
764 Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
765 break;
766 }
767 case MVT::i32: {
768 Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
769 break;
770 }
771 case MVT::i64: {
772 Opcode = PPC::LDXTLS;
773 break;
774 }
775 }
776 SDValue Chain = LD->getChain();
777 SDVTList VTs = LD->getVTList();
778 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
779 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
780 transferMemOperands(LD, MN);
781 ReplaceNode(LD, MN);
782 return true;
783}
784
785/// Turn an or of two masked values into the rotate left word immediate then
786/// mask insert (rlwimi) instruction.
787bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
788 SDValue Op0 = N->getOperand(0);
789 SDValue Op1 = N->getOperand(1);
790 SDLoc dl(N);
791
792 KnownBits LKnown = CurDAG->computeKnownBits(Op0);
793 KnownBits RKnown = CurDAG->computeKnownBits(Op1);
794
795 unsigned TargetMask = LKnown.Zero.getZExtValue();
796 unsigned InsertMask = RKnown.Zero.getZExtValue();
797
798 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
799 unsigned Op0Opc = Op0.getOpcode();
800 unsigned Op1Opc = Op1.getOpcode();
801 unsigned Value, SH = 0;
802 TargetMask = ~TargetMask;
803 InsertMask = ~InsertMask;
804
805 // If the LHS has a foldable shift and the RHS does not, then swap it to the
806 // RHS so that we can fold the shift into the insert.
807 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
808 if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
809 Op0.getOperand(0).getOpcode() == ISD::SRL) {
810 if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
811 Op1.getOperand(0).getOpcode() != ISD::SRL) {
812 std::swap(Op0, Op1);
813 std::swap(Op0Opc, Op1Opc);
814 std::swap(TargetMask, InsertMask);
815 }
816 }
817 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
818 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
819 Op1.getOperand(0).getOpcode() != ISD::SRL) {
820 std::swap(Op0, Op1);
821 std::swap(Op0Opc, Op1Opc);
822 std::swap(TargetMask, InsertMask);
823 }
824 }
825
826 unsigned MB, ME;
827 if (isRunOfOnes(InsertMask, MB, ME)) {
828 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
829 isInt32Immediate(Op1.getOperand(1), Value)) {
830 Op1 = Op1.getOperand(0);
831 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
832 }
833 if (Op1Opc == ISD::AND) {
834 // The AND mask might not be a constant, and we need to make sure that
835 // if we're going to fold the masking with the insert, all bits not
836 // know to be zero in the mask are known to be one.
837 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
838 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
839
840 unsigned SHOpc = Op1.getOperand(0).getOpcode();
841 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
842 isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
843 // Note that Value must be in range here (less than 32) because
844 // otherwise there would not be any bits set in InsertMask.
845 Op1 = Op1.getOperand(0).getOperand(0);
846 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
847 }
848 }
849
850 SH &= 31;
851 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
852 getI32Imm(ME, dl) };
853 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
854 return true;
855 }
856 }
857 return false;
858}
859
860static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
861 unsigned MaxTruncation = 0;
862 // Cannot use range-based for loop here as we need the actual use (i.e. we
863 // need the operand number corresponding to the use). A range-based for
864 // will unbox the use and provide an SDNode*.
865 for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
866 Use != UseEnd; ++Use) {
867 unsigned Opc =
868 Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
869 switch (Opc) {
870 default: return 0;
871 case ISD::TRUNCATE:
872 if (Use->isMachineOpcode())
873 return 0;
874 MaxTruncation =
875 std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
876 continue;
877 case ISD::STORE: {
878 if (Use->isMachineOpcode())
879 return 0;
880 StoreSDNode *STN = cast<StoreSDNode>(*Use);
881 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
882 if (MemVTSize == 64 || Use.getOperandNo() != 0)
883 return 0;
884 MaxTruncation = std::max(MaxTruncation, MemVTSize);
885 continue;
886 }
887 case PPC::STW8:
888 case PPC::STWX8:
889 case PPC::STWU8:
890 case PPC::STWUX8:
891 if (Use.getOperandNo() != 0)
892 return 0;
893 MaxTruncation = std::max(MaxTruncation, 32u);
894 continue;
895 case PPC::STH8:
896 case PPC::STHX8:
897 case PPC::STHU8:
898 case PPC::STHUX8:
899 if (Use.getOperandNo() != 0)
900 return 0;
901 MaxTruncation = std::max(MaxTruncation, 16u);
902 continue;
903 case PPC::STB8:
904 case PPC::STBX8:
905 case PPC::STBU8:
906 case PPC::STBUX8:
907 if (Use.getOperandNo() != 0)
908 return 0;
909 MaxTruncation = std::max(MaxTruncation, 8u);
910 continue;
911 }
912 }
913 return MaxTruncation;
914}
915
916// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
917// zeros and return the number of bits by the left of these consecutive zeros.
918static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
919 unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
920 unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
921 if ((HiTZ + LoLZ) >= Num)
922 return (32 + HiTZ);
923 return 0;
924}
925
926// Direct materialization of 64-bit constants by enumerated patterns.
927static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
928 uint64_t Imm, unsigned &InstCnt) {
929 unsigned TZ = countTrailingZeros<uint64_t>(Imm);
930 unsigned LZ = countLeadingZeros<uint64_t>(Imm);
931 unsigned TO = countTrailingOnes<uint64_t>(Imm);
932 unsigned LO = countLeadingOnes<uint64_t>(Imm);
933 unsigned Hi32 = Hi_32(Imm);
934 unsigned Lo32 = Lo_32(Imm);
935 SDNode *Result = nullptr;
936 unsigned Shift = 0;
937
938 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
939 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
940 };
941
942 // Following patterns use 1 instructions to materialize the Imm.
943 InstCnt = 1;
944 // 1-1) Patterns : {zeros}{15-bit valve}
945 // {ones}{15-bit valve}
946 if (isInt<16>(Imm)) {
947 SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
948 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
949 }
950 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
951 // {ones}{15-bit valve}{16 zeros}
952 if (TZ > 15 && (LZ > 32 || LO > 32))
953 return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
954 getI32Imm((Imm >> 16) & 0xffff));
955
956 // Following patterns use 2 instructions to materialize the Imm.
957 InstCnt = 2;
958 assert(LZ < 64 && "Unexpected leading zeros here.")(static_cast <bool> (LZ < 64 && "Unexpected leading zeros here."
) ? void (0) : __assert_fail ("LZ < 64 && \"Unexpected leading zeros here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 958, __extension__
__PRETTY_FUNCTION__))
;
959 // Count of ones follwing the leading zeros.
960 unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
961 // 2-1) Patterns : {zeros}{31-bit value}
962 // {ones}{31-bit value}
963 if (isInt<32>(Imm)) {
964 uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
965 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
966 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
967 return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
968 getI32Imm(Imm & 0xffff));
969 }
970 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
971 // {zeros}{15-bit value}{zeros}
972 // {zeros}{ones}{15-bit value}
973 // {ones}{15-bit value}{zeros}
974 // We can take advantage of LI's sign-extension semantics to generate leading
975 // ones, and then use RLDIC to mask off the ones in both sides after rotation.
976 if ((LZ + FO + TZ) > 48) {
977 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
978 getI32Imm((Imm >> TZ) & 0xffff));
979 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
980 getI32Imm(TZ), getI32Imm(LZ));
981 }
982 // 2-3) Pattern : {zeros}{15-bit value}{ones}
983 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
984 // therefore we can take advantage of LI's sign-extension semantics, and then
985 // mask them off after rotation.
986 //
987 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
988 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
989 // +------------------------+ +------------------------+
990 // 63 0 63 0
991 // Imm (Imm >> (48 - LZ) & 0xffff)
992 // +----sext-----|--16-bit--+ +clear-|-----------------+
993 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
994 // +------------------------+ +------------------------+
995 // 63 0 63 0
996 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
997 if ((LZ + TO) > 48) {
998 // Since the immediates with (LZ > 32) have been handled by previous
999 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1000 // the Imm by a negative value.
1001 assert(LZ <= 32 && "Unexpected shift value.")(static_cast <bool> (LZ <= 32 && "Unexpected shift value."
) ? void (0) : __assert_fail ("LZ <= 32 && \"Unexpected shift value.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1001, __extension__
__PRETTY_FUNCTION__))
;
1002 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1003 getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1004 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1005 getI32Imm(48 - LZ), getI32Imm(LZ));
1006 }
1007 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1008 // {ones}{15-bit value}{ones}
1009 // We can take advantage of LI's sign-extension semantics to generate leading
1010 // ones, and then use RLDICL to mask off the ones in left sides (if required)
1011 // after rotation.
1012 //
1013 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1014 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1015 // +------------------------+ +------------------------+
1016 // 63 0 63 0
1017 // Imm (Imm >> TO) & 0xffff
1018 // +----sext-----|--16-bit--+ +LZ|---------------------+
1019 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1020 // +------------------------+ +------------------------+
1021 // 63 0 63 0
1022 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1023 if ((LZ + FO + TO) > 48) {
1024 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1025 getI32Imm((Imm >> TO) & 0xffff));
1026 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1027 getI32Imm(TO), getI32Imm(LZ));
1028 }
1029 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1030 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1031 // value, we can use LI for Lo16 without generating leading ones then add the
1032 // Hi16(in Lo32).
1033 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1034 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1035 getI32Imm(Lo32 & 0xffff));
1036 return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1037 getI32Imm(Lo32 >> 16));
1038 }
1039 // 2-6) Patterns : {******}{49 zeros}{******}
1040 // {******}{49 ones}{******}
1041 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1042 // bits remain on both sides. Rotate right the Imm to construct an int<16>
1043 // value, use LI for int<16> value and then use RLDICL without mask to rotate
1044 // it back.
1045 //
1046 // 1) findContiguousZerosAtLeast(Imm, 49)
1047 // +------|--zeros-|------+ +---ones--||---15 bit--+
1048 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1049 // +----------------------+ +----------------------+
1050 // 63 0 63 0
1051 //
1052 // 2) findContiguousZerosAtLeast(~Imm, 49)
1053 // +------|--ones--|------+ +---ones--||---15 bit--+
1054 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1055 // +----------------------+ +----------------------+
1056 // 63 0 63 0
1057 if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1058 (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1059 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1060 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1061 getI32Imm(RotImm & 0xffff));
1062 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1063 getI32Imm(Shift), getI32Imm(0));
1064 }
1065
1066 // Following patterns use 3 instructions to materialize the Imm.
1067 InstCnt = 3;
1068 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1069 // {zeros}{31-bit value}{zeros}
1070 // {zeros}{ones}{31-bit value}
1071 // {ones}{31-bit value}{zeros}
1072 // We can take advantage of LIS's sign-extension semantics to generate leading
1073 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1074 // ones in both sides after rotation.
1075 if ((LZ + FO + TZ) > 32) {
1076 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1077 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1078 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1079 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1080 getI32Imm((Imm >> TZ) & 0xffff));
1081 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1082 getI32Imm(TZ), getI32Imm(LZ));
1083 }
1084 // 3-2) Pattern : {zeros}{31-bit value}{ones}
1085 // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value,
1086 // therefore we can take advantage of LIS's sign-extension semantics, add
1087 // the remaining bits with ORI, and then mask them off after rotation.
1088 // This is similar to Pattern 2-3, please refer to the diagram there.
1089 if ((LZ + TO) > 32) {
1090 // Since the immediates with (LZ > 32) have been handled by previous
1091 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1092 // the Imm by a negative value.
1093 assert(LZ <= 32 && "Unexpected shift value.")(static_cast <bool> (LZ <= 32 && "Unexpected shift value."
) ? void (0) : __assert_fail ("LZ <= 32 && \"Unexpected shift value.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1093, __extension__
__PRETTY_FUNCTION__))
;
1094 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1095 getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1096 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1097 getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1098 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1099 getI32Imm(32 - LZ), getI32Imm(LZ));
1100 }
1101 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1102 // {ones}{31-bit value}{ones}
1103 // We can take advantage of LIS's sign-extension semantics to generate leading
1104 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1105 // ones in left sides (if required) after rotation.
1106 // This is similar to Pattern 2-4, please refer to the diagram there.
1107 if ((LZ + FO + TO) > 32) {
1108 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1109 getI32Imm((Imm >> (TO + 16)) & 0xffff));
1110 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1111 getI32Imm((Imm >> TO) & 0xffff));
1112 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1113 getI32Imm(TO), getI32Imm(LZ));
1114 }
1115 // 3-4) Patterns : High word == Low word
1116 if (Hi32 == Lo32) {
1117 // Handle the first 32 bits.
1118 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1119 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1120 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1121 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1122 getI32Imm(Lo32 & 0xffff));
1123 // Use rldimi to insert the Low word into High word.
1124 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1125 getI32Imm(0)};
1126 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1127 }
1128 // 3-5) Patterns : {******}{33 zeros}{******}
1129 // {******}{33 ones}{******}
1130 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1131 // bits remain on both sides. Rotate right the Imm to construct an int<32>
1132 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1133 // rotate it back.
1134 // This is similar to Pattern 2-6, please refer to the diagram there.
1135 if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1136 (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1137 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1138 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1139 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1140 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1141 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1142 getI32Imm(RotImm & 0xffff));
1143 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1144 getI32Imm(Shift), getI32Imm(0));
1145 }
1146
1147 InstCnt = 0;
1148 return nullptr;
1149}
1150
1151// Try to select instructions to generate a 64 bit immediate using prefix as
1152// well as non prefix instructions. The function will return the SDNode
1153// to materialize that constant or it will return nullptr if it does not
1154// find one. The variable InstCnt is set to the number of instructions that
1155// were selected.
1156static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,
1157 uint64_t Imm, unsigned &InstCnt) {
1158 unsigned TZ = countTrailingZeros<uint64_t>(Imm);
1159 unsigned LZ = countLeadingZeros<uint64_t>(Imm);
1160 unsigned TO = countTrailingOnes<uint64_t>(Imm);
1161 unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1162 unsigned Hi32 = Hi_32(Imm);
1163 unsigned Lo32 = Lo_32(Imm);
1164
1165 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1166 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1167 };
1168
1169 auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1170 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1171 };
1172
1173 // Following patterns use 1 instruction to materialize Imm.
1174 InstCnt = 1;
1175
1176 // The pli instruction can materialize up to 34 bits directly.
1177 // If a constant fits within 34-bits, emit the pli instruction here directly.
1178 if (isInt<34>(Imm))
1179 return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1180 CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1181
1182 // Require at least two instructions.
1183 InstCnt = 2;
1184 SDNode *Result = nullptr;
1185 // Patterns : {zeros}{ones}{33-bit value}{zeros}
1186 // {zeros}{33-bit value}{zeros}
1187 // {zeros}{ones}{33-bit value}
1188 // {ones}{33-bit value}{zeros}
1189 // We can take advantage of PLI's sign-extension semantics to generate leading
1190 // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1191 if ((LZ + FO + TZ) > 30) {
1192 APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1193 APInt Extended = SignedInt34.sext(64);
1194 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1195 getI64Imm(*Extended.getRawData()));
1196 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1197 getI32Imm(TZ), getI32Imm(LZ));
1198 }
1199 // Pattern : {zeros}{33-bit value}{ones}
1200 // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1201 // therefore we can take advantage of PLI's sign-extension semantics, and then
1202 // mask them off after rotation.
1203 //
1204 // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1205 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1206 // +------------------------+ +------------------------+
1207 // 63 0 63 0
1208 //
1209 // +----sext-----|--34-bit--+ +clear-|-----------------+
1210 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1211 // +------------------------+ +------------------------+
1212 // 63 0 63 0
1213 if ((LZ + TO) > 30) {
1214 APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1215 APInt Extended = SignedInt34.sext(64);
1216 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1217 getI64Imm(*Extended.getRawData()));
1218 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1219 getI32Imm(30 - LZ), getI32Imm(LZ));
1220 }
1221 // Patterns : {zeros}{ones}{33-bit value}{ones}
1222 // {ones}{33-bit value}{ones}
1223 // Similar to LI we can take advantage of PLI's sign-extension semantics to
1224 // generate leading ones, and then use RLDICL to mask off the ones in left
1225 // sides (if required) after rotation.
1226 if ((LZ + FO + TO) > 30) {
1227 APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1228 APInt Extended = SignedInt34.sext(64);
1229 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1230 getI64Imm(*Extended.getRawData()));
1231 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1232 getI32Imm(TO), getI32Imm(LZ));
1233 }
1234 // Patterns : {******}{31 zeros}{******}
1235 // : {******}{31 ones}{******}
1236 // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1237 // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1238 // for the int<33> value and then use RLDICL without a mask to rotate it back.
1239 //
1240 // +------|--ones--|------+ +---ones--||---33 bit--+
1241 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1242 // +----------------------+ +----------------------+
1243 // 63 0 63 0
1244 for (unsigned Shift = 0; Shift < 63; ++Shift) {
1245 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1246 if (isInt<34>(RotImm)) {
1247 Result =
1248 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1249 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1250 SDValue(Result, 0), getI32Imm(Shift),
1251 getI32Imm(0));
1252 }
1253 }
1254
1255 // Patterns : High word == Low word
1256 // This is basically a splat of a 32 bit immediate.
1257 if (Hi32 == Lo32) {
1258 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1259 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1260 getI32Imm(0)};
1261 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1262 }
1263
1264 InstCnt = 3;
1265 // Catch-all
1266 // This pattern can form any 64 bit immediate in 3 instructions.
1267 SDNode *ResultHi =
1268 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1269 SDNode *ResultLo =
1270 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1271 SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1272 getI32Imm(0)};
1273 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1274}
1275
1276static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1277 unsigned *InstCnt = nullptr) {
1278 unsigned InstCntDirect = 0;
1279 // No more than 3 instructions is used if we can select the i64 immediate
1280 // directly.
1281 SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1282
1283 const PPCSubtarget &Subtarget =
1284 CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
1285
1286 // If we have prefixed instructions and there is a chance we can
1287 // materialize the constant with fewer prefixed instructions than
1288 // non-prefixed, try that.
1289 if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1290 unsigned InstCntDirectP = 0;
1291 SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1292 // Use the prefix case in either of two cases:
1293 // 1) We have no result from the non-prefix case to use.
1294 // 2) The non-prefix case uses more instructions than the prefix case.
1295 // If the prefix and non-prefix cases use the same number of instructions
1296 // we will prefer the non-prefix case.
1297 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1298 if (InstCnt)
1299 *InstCnt = InstCntDirectP;
1300 return ResultP;
1301 }
1302 }
1303
1304 if (Result) {
1305 if (InstCnt)
1306 *InstCnt = InstCntDirect;
1307 return Result;
1308 }
1309 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1310 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1311 };
1312 // Handle the upper 32 bit value.
1313 Result =
1314 selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1315 // Add in the last bits as required.
1316 if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {
1317 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1318 SDValue(Result, 0), getI32Imm(Hi16));
1319 ++InstCntDirect;
1320 }
1321 if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {
1322 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1323 getI32Imm(Lo16));
1324 ++InstCntDirect;
1325 }
1326 if (InstCnt)
1327 *InstCnt = InstCntDirect;
1328 return Result;
1329}
1330
1331// Select a 64-bit constant.
1332static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
1333 SDLoc dl(N);
1334
1335 // Get 64 bit value.
1336 int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1337 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1338 uint64_t SextImm = SignExtend64(Imm, MinSize);
1339 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1340 if (isInt<16>(SextImm))
1341 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1342 }
1343 return selectI64Imm(CurDAG, dl, Imm);
1344}
1345
1346namespace {
1347
1348class BitPermutationSelector {
1349 struct ValueBit {
1350 SDValue V;
1351
1352 // The bit number in the value, using a convention where bit 0 is the
1353 // lowest-order bit.
1354 unsigned Idx;
1355
1356 // ConstZero means a bit we need to mask off.
1357 // Variable is a bit comes from an input variable.
1358 // VariableKnownToBeZero is also a bit comes from an input variable,
1359 // but it is known to be already zero. So we do not need to mask them.
1360 enum Kind {
1361 ConstZero,
1362 Variable,
1363 VariableKnownToBeZero
1364 } K;
1365
1366 ValueBit(SDValue V, unsigned I, Kind K = Variable)
1367 : V(V), Idx(I), K(K) {}
1368 ValueBit(Kind K = Variable) : Idx(UINT32_MAX(4294967295U)), K(K) {}
1369
1370 bool isZero() const {
1371 return K == ConstZero || K == VariableKnownToBeZero;
1372 }
1373
1374 bool hasValue() const {
1375 return K == Variable || K == VariableKnownToBeZero;
1376 }
1377
1378 SDValue getValue() const {
1379 assert(hasValue() && "Cannot get the value of a constant bit")(static_cast <bool> (hasValue() && "Cannot get the value of a constant bit"
) ? void (0) : __assert_fail ("hasValue() && \"Cannot get the value of a constant bit\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1379, __extension__
__PRETTY_FUNCTION__))
;
1380 return V;
1381 }
1382
1383 unsigned getValueBitIndex() const {
1384 assert(hasValue() && "Cannot get the value bit index of a constant bit")(static_cast <bool> (hasValue() && "Cannot get the value bit index of a constant bit"
) ? void (0) : __assert_fail ("hasValue() && \"Cannot get the value bit index of a constant bit\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1384, __extension__
__PRETTY_FUNCTION__))
;
1385 return Idx;
1386 }
1387 };
1388
1389 // A bit group has the same underlying value and the same rotate factor.
1390 struct BitGroup {
1391 SDValue V;
1392 unsigned RLAmt;
1393 unsigned StartIdx, EndIdx;
1394
1395 // This rotation amount assumes that the lower 32 bits of the quantity are
1396 // replicated in the high 32 bits by the rotation operator (which is done
1397 // by rlwinm and friends in 64-bit mode).
1398 bool Repl32;
1399 // Did converting to Repl32 == true change the rotation factor? If it did,
1400 // it decreased it by 32.
1401 bool Repl32CR;
1402 // Was this group coalesced after setting Repl32 to true?
1403 bool Repl32Coalesced;
1404
1405 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1406 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1407 Repl32Coalesced(false) {
1408 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << Rdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tbit group for " <<
V.getNode() << " RLAmt = " << R << " [" <<
S << ", " << E << "]\n"; } } while (false)
1409 << " [" << S << ", " << E << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tbit group for " <<
V.getNode() << " RLAmt = " << R << " [" <<
S << ", " << E << "]\n"; } } while (false)
;
1410 }
1411 };
1412
1413 // Information on each (Value, RLAmt) pair (like the number of groups
1414 // associated with each) used to choose the lowering method.
1415 struct ValueRotInfo {
1416 SDValue V;
1417 unsigned RLAmt = std::numeric_limits<unsigned>::max();
1418 unsigned NumGroups = 0;
1419 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1420 bool Repl32 = false;
1421
1422 ValueRotInfo() = default;
1423
1424 // For sorting (in reverse order) by NumGroups, and then by
1425 // FirstGroupStartIdx.
1426 bool operator < (const ValueRotInfo &Other) const {
1427 // We need to sort so that the non-Repl32 come first because, when we're
1428 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1429 // masking operation.
1430 if (Repl32 < Other.Repl32)
1431 return true;
1432 else if (Repl32 > Other.Repl32)
1433 return false;
1434 else if (NumGroups > Other.NumGroups)
1435 return true;
1436 else if (NumGroups < Other.NumGroups)
1437 return false;
1438 else if (RLAmt == 0 && Other.RLAmt != 0)
1439 return true;
1440 else if (RLAmt != 0 && Other.RLAmt == 0)
1441 return false;
1442 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1443 return true;
1444 return false;
1445 }
1446 };
1447
1448 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1449 using ValueBitsMemoizer =
1450 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1451 ValueBitsMemoizer Memoizer;
1452
1453 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1454 // The bool is true if something interesting was deduced, otherwise if we're
1455 // providing only a generic representation of V (or something else likewise
1456 // uninteresting for instruction selection) through the SmallVector.
1457 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1458 unsigned NumBits) {
1459 auto &ValueEntry = Memoizer[V];
1460 if (ValueEntry)
1461 return std::make_pair(ValueEntry->first, &ValueEntry->second);
1462 ValueEntry.reset(new ValueBitsMemoizedValue());
1463 bool &Interesting = ValueEntry->first;
1464 SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1465 Bits.resize(NumBits);
1466
1467 switch (V.getOpcode()) {
1468 default: break;
1469 case ISD::ROTL:
1470 if (isa<ConstantSDNode>(V.getOperand(1))) {
1471 unsigned RotAmt = V.getConstantOperandVal(1);
1472
1473 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1474
1475 for (unsigned i = 0; i < NumBits; ++i)
1476 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1477
1478 return std::make_pair(Interesting = true, &Bits);
1479 }
1480 break;
1481 case ISD::SHL:
1482 case PPCISD::SHL:
1483 if (isa<ConstantSDNode>(V.getOperand(1))) {
1484 unsigned ShiftAmt = V.getConstantOperandVal(1);
1485
1486 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1487
1488 for (unsigned i = ShiftAmt; i < NumBits; ++i)
1489 Bits[i] = LHSBits[i - ShiftAmt];
1490
1491 for (unsigned i = 0; i < ShiftAmt; ++i)
1492 Bits[i] = ValueBit(ValueBit::ConstZero);
1493
1494 return std::make_pair(Interesting = true, &Bits);
1495 }
1496 break;
1497 case ISD::SRL:
1498 case PPCISD::SRL:
1499 if (isa<ConstantSDNode>(V.getOperand(1))) {
1500 unsigned ShiftAmt = V.getConstantOperandVal(1);
1501
1502 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1503
1504 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1505 Bits[i] = LHSBits[i + ShiftAmt];
1506
1507 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1508 Bits[i] = ValueBit(ValueBit::ConstZero);
1509
1510 return std::make_pair(Interesting = true, &Bits);
1511 }
1512 break;
1513 case ISD::AND:
1514 if (isa<ConstantSDNode>(V.getOperand(1))) {
1515 uint64_t Mask = V.getConstantOperandVal(1);
1516
1517 const SmallVector<ValueBit, 64> *LHSBits;
1518 // Mark this as interesting, only if the LHS was also interesting. This
1519 // prevents the overall procedure from matching a single immediate 'and'
1520 // (which is non-optimal because such an and might be folded with other
1521 // things if we don't select it here).
1522 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1523
1524 for (unsigned i = 0; i < NumBits; ++i)
1525 if (((Mask >> i) & 1) == 1)
1526 Bits[i] = (*LHSBits)[i];
1527 else {
1528 // AND instruction masks this bit. If the input is already zero,
1529 // we have nothing to do here. Otherwise, make the bit ConstZero.
1530 if ((*LHSBits)[i].isZero())
1531 Bits[i] = (*LHSBits)[i];
1532 else
1533 Bits[i] = ValueBit(ValueBit::ConstZero);
1534 }
1535
1536 return std::make_pair(Interesting, &Bits);
1537 }
1538 break;
1539 case ISD::OR: {
1540 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1541 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1542
1543 bool AllDisjoint = true;
1544 SDValue LastVal = SDValue();
1545 unsigned LastIdx = 0;
1546 for (unsigned i = 0; i < NumBits; ++i) {
1547 if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1548 // If both inputs are known to be zero and one is ConstZero and
1549 // another is VariableKnownToBeZero, we can select whichever
1550 // we like. To minimize the number of bit groups, we select
1551 // VariableKnownToBeZero if this bit is the next bit of the same
1552 // input variable from the previous bit. Otherwise, we select
1553 // ConstZero.
1554 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1555 LHSBits[i].getValueBitIndex() == LastIdx + 1)
1556 Bits[i] = LHSBits[i];
1557 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1558 RHSBits[i].getValueBitIndex() == LastIdx + 1)
1559 Bits[i] = RHSBits[i];
1560 else
1561 Bits[i] = ValueBit(ValueBit::ConstZero);
1562 }
1563 else if (LHSBits[i].isZero())
1564 Bits[i] = RHSBits[i];
1565 else if (RHSBits[i].isZero())
1566 Bits[i] = LHSBits[i];
1567 else {
1568 AllDisjoint = false;
1569 break;
1570 }
1571 // We remember the value and bit index of this bit.
1572 if (Bits[i].hasValue()) {
1573 LastVal = Bits[i].getValue();
1574 LastIdx = Bits[i].getValueBitIndex();
1575 }
1576 else {
1577 if (LastVal) LastVal = SDValue();
1578 LastIdx = 0;
1579 }
1580 }
1581
1582 if (!AllDisjoint)
1583 break;
1584
1585 return std::make_pair(Interesting = true, &Bits);
1586 }
1587 case ISD::ZERO_EXTEND: {
1588 // We support only the case with zero extension from i32 to i64 so far.
1589 if (V.getValueType() != MVT::i64 ||
1590 V.getOperand(0).getValueType() != MVT::i32)
1591 break;
1592
1593 const SmallVector<ValueBit, 64> *LHSBits;
1594 const unsigned NumOperandBits = 32;
1595 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1596 NumOperandBits);
1597
1598 for (unsigned i = 0; i < NumOperandBits; ++i)
1599 Bits[i] = (*LHSBits)[i];
1600
1601 for (unsigned i = NumOperandBits; i < NumBits; ++i)
1602 Bits[i] = ValueBit(ValueBit::ConstZero);
1603
1604 return std::make_pair(Interesting, &Bits);
1605 }
1606 case ISD::TRUNCATE: {
1607 EVT FromType = V.getOperand(0).getValueType();
1608 EVT ToType = V.getValueType();
1609 // We support only the case with truncate from i64 to i32.
1610 if (FromType != MVT::i64 || ToType != MVT::i32)
1611 break;
1612 const unsigned NumAllBits = FromType.getSizeInBits();
1613 SmallVector<ValueBit, 64> *InBits;
1614 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1615 NumAllBits);
1616 const unsigned NumValidBits = ToType.getSizeInBits();
1617
1618 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1619 // So, we cannot include this truncate.
1620 bool UseUpper32bit = false;
1621 for (unsigned i = 0; i < NumValidBits; ++i)
1622 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1623 UseUpper32bit = true;
1624 break;
1625 }
1626 if (UseUpper32bit)
1627 break;
1628
1629 for (unsigned i = 0; i < NumValidBits; ++i)
1630 Bits[i] = (*InBits)[i];
1631
1632 return std::make_pair(Interesting, &Bits);
1633 }
1634 case ISD::AssertZext: {
1635 // For AssertZext, we look through the operand and
1636 // mark the bits known to be zero.
1637 const SmallVector<ValueBit, 64> *LHSBits;
1638 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1639 NumBits);
1640
1641 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1642 const unsigned NumValidBits = FromType.getSizeInBits();
1643 for (unsigned i = 0; i < NumValidBits; ++i)
1644 Bits[i] = (*LHSBits)[i];
1645
1646 // These bits are known to be zero but the AssertZext may be from a value
1647 // that already has some constant zero bits (i.e. from a masking and).
1648 for (unsigned i = NumValidBits; i < NumBits; ++i)
1649 Bits[i] = (*LHSBits)[i].hasValue()
1650 ? ValueBit((*LHSBits)[i].getValue(),
1651 (*LHSBits)[i].getValueBitIndex(),
1652 ValueBit::VariableKnownToBeZero)
1653 : ValueBit(ValueBit::ConstZero);
1654
1655 return std::make_pair(Interesting, &Bits);
1656 }
1657 case ISD::LOAD:
1658 LoadSDNode *LD = cast<LoadSDNode>(V);
1659 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1660 EVT VT = LD->getMemoryVT();
1661 const unsigned NumValidBits = VT.getSizeInBits();
1662
1663 for (unsigned i = 0; i < NumValidBits; ++i)
1664 Bits[i] = ValueBit(V, i);
1665
1666 // These bits are known to be zero.
1667 for (unsigned i = NumValidBits; i < NumBits; ++i)
1668 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1669
1670 // Zero-extending load itself cannot be optimized. So, it is not
1671 // interesting by itself though it gives useful information.
1672 return std::make_pair(Interesting = false, &Bits);
1673 }
1674 break;
1675 }
1676
1677 for (unsigned i = 0; i < NumBits; ++i)
1678 Bits[i] = ValueBit(V, i);
1679
1680 return std::make_pair(Interesting = false, &Bits);
1681 }
1682
1683 // For each value (except the constant ones), compute the left-rotate amount
1684 // to get it from its original to final position.
1685 void computeRotationAmounts() {
1686 NeedMask = false;
1687 RLAmt.resize(Bits.size());
1688 for (unsigned i = 0; i < Bits.size(); ++i)
1689 if (Bits[i].hasValue()) {
1690 unsigned VBI = Bits[i].getValueBitIndex();
1691 if (i >= VBI)
1692 RLAmt[i] = i - VBI;
1693 else
1694 RLAmt[i] = Bits.size() - (VBI - i);
1695 } else if (Bits[i].isZero()) {
1696 NeedMask = true;
1697 RLAmt[i] = UINT32_MAX(4294967295U);
1698 } else {
1699 llvm_unreachable("Unknown value bit type")::llvm::llvm_unreachable_internal("Unknown value bit type", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1699)
;
1700 }
1701 }
1702
1703 // Collect groups of consecutive bits with the same underlying value and
1704 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1705 // they break up groups.
1706 void collectBitGroups(bool LateMask) {
1707 BitGroups.clear();
1708
1709 unsigned LastRLAmt = RLAmt[0];
1710 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1711 unsigned LastGroupStartIdx = 0;
1712 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1713 for (unsigned i = 1; i < Bits.size(); ++i) {
1714 unsigned ThisRLAmt = RLAmt[i];
1715 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1716 if (LateMask && !ThisValue) {
1717 ThisValue = LastValue;
1718 ThisRLAmt = LastRLAmt;
1719 // If we're doing late masking, then the first bit group always starts
1720 // at zero (even if the first bits were zero).
1721 if (BitGroups.empty())
1722 LastGroupStartIdx = 0;
1723 }
1724
1725 // If this bit is known to be zero and the current group is a bit group
1726 // of zeros, we do not need to terminate the current bit group even the
1727 // Value or RLAmt does not match here. Instead, we terminate this group
1728 // when the first non-zero bit appears later.
1729 if (IsGroupOfZeros && Bits[i].isZero())
1730 continue;
1731
1732 // If this bit has the same underlying value and the same rotate factor as
1733 // the last one, then they're part of the same group.
1734 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1735 // We cannot continue the current group if this bits is not known to
1736 // be zero in a bit group of zeros.
1737 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1738 continue;
1739
1740 if (LastValue.getNode())
1741 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1742 i-1));
1743 LastRLAmt = ThisRLAmt;
1744 LastValue = ThisValue;
1745 LastGroupStartIdx = i;
1746 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1747 }
1748 if (LastValue.getNode())
1749 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1750 Bits.size()-1));
1751
1752 if (BitGroups.empty())
1753 return;
1754
1755 // We might be able to combine the first and last groups.
1756 if (BitGroups.size() > 1) {
1757 // If the first and last groups are the same, then remove the first group
1758 // in favor of the last group, making the ending index of the last group
1759 // equal to the ending index of the to-be-removed first group.
1760 if (BitGroups[0].StartIdx == 0 &&
1761 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1762 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1763 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1764 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining final bit group with initial one\n"
; } } while (false)
;
1765 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1766 BitGroups.erase(BitGroups.begin());
1767 }
1768 }
1769 }
1770
1771 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1772 // associated with each. If the number of groups are same, we prefer a group
1773 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1774 // instruction. If there is a degeneracy, pick the one that occurs
1775 // first (in the final value).
1776 void collectValueRotInfo() {
1777 ValueRots.clear();
1778
1779 for (auto &BG : BitGroups) {
1780 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1781 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1782 VRI.V = BG.V;
1783 VRI.RLAmt = BG.RLAmt;
1784 VRI.Repl32 = BG.Repl32;
1785 VRI.NumGroups += 1;
1786 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1787 }
1788
1789 // Now that we've collected the various ValueRotInfo instances, we need to
1790 // sort them.
1791 ValueRotsVec.clear();
1792 for (auto &I : ValueRots) {
1793 ValueRotsVec.push_back(I.second);
1794 }
1795 llvm::sort(ValueRotsVec);
1796 }
1797
1798 // In 64-bit mode, rlwinm and friends have a rotation operator that
1799 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1800 // indices of these instructions can only be in the lower 32 bits, so they
1801 // can only represent some 64-bit bit groups. However, when they can be used,
1802 // the 32-bit replication can be used to represent, as a single bit group,
1803 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1804 // groups when possible. Returns true if any of the bit groups were
1805 // converted.
1806 void assignRepl32BitGroups() {
1807 // If we have bits like this:
1808 //
1809 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1810 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1811 // Groups: | RLAmt = 8 | RLAmt = 40 |
1812 //
1813 // But, making use of a 32-bit operation that replicates the low-order 32
1814 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1815 // of 8.
1816
1817 auto IsAllLow32 = [this](BitGroup & BG) {
1818 if (BG.StartIdx <= BG.EndIdx) {
1819 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1820 if (!Bits[i].hasValue())
1821 continue;
1822 if (Bits[i].getValueBitIndex() >= 32)
1823 return false;
1824 }
1825 } else {
1826 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1827 if (!Bits[i].hasValue())
1828 continue;
1829 if (Bits[i].getValueBitIndex() >= 32)
1830 return false;
1831 }
1832 for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1833 if (!Bits[i].hasValue())
1834 continue;
1835 if (Bits[i].getValueBitIndex() >= 32)
1836 return false;
1837 }
1838 }
1839
1840 return true;
1841 };
1842
1843 for (auto &BG : BitGroups) {
1844 // If this bit group has RLAmt of 0 and will not be merged with
1845 // another bit group, we don't benefit from Repl32. We don't mark
1846 // such group to give more freedom for later instruction selection.
1847 if (BG.RLAmt == 0) {
1848 auto PotentiallyMerged = [this](BitGroup & BG) {
1849 for (auto &BG2 : BitGroups)
1850 if (&BG != &BG2 && BG.V == BG2.V &&
1851 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
1852 return true;
1853 return false;
1854 };
1855 if (!PotentiallyMerged(BG))
1856 continue;
1857 }
1858 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1859 if (IsAllLow32(BG)) {
1860 if (BG.RLAmt >= 32) {
1861 BG.RLAmt -= 32;
1862 BG.Repl32CR = true;
1863 }
1864
1865 BG.Repl32 = true;
1866
1867 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for "
<< BG.V.getNode() << " RLAmt = " << BG.RLAmt
<< " [" << BG.StartIdx << ", " << BG
.EndIdx << "]\n"; } } while (false)
1868 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for "
<< BG.V.getNode() << " RLAmt = " << BG.RLAmt
<< " [" << BG.StartIdx << ", " << BG
.EndIdx << "]\n"; } } while (false)
1869 << BG.StartIdx << ", " << BG.EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for "
<< BG.V.getNode() << " RLAmt = " << BG.RLAmt
<< " [" << BG.StartIdx << ", " << BG
.EndIdx << "]\n"; } } while (false)
;
1870 }
1871 }
1872 }
1873
1874 // Now walk through the bit groups, consolidating where possible.
1875 for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1876 // We might want to remove this bit group by merging it with the previous
1877 // group (which might be the ending group).
1878 auto IP = (I == BitGroups.begin()) ?
1879 std::prev(BitGroups.end()) : std::prev(I);
1880 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1881 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1882
1883 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)
1884 << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)
1885 << I->StartIdx << ", " << I->EndIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)
1886 << "] with group with range [" << IP->StartIdx << ", "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)
1887 << IP->EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with group with range [" << IP
->StartIdx << ", " << IP->EndIdx << "]\n"
; } } while (false)
;
1888
1889 IP->EndIdx = I->EndIdx;
1890 IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1891 IP->Repl32Coalesced = true;
1892 I = BitGroups.erase(I);
1893 continue;
1894 } else {
1895 // There is a special case worth handling: If there is a single group
1896 // covering the entire upper 32 bits, and it can be merged with both
1897 // the next and previous groups (which might be the same group), then
1898 // do so. If it is the same group (so there will be only one group in
1899 // total), then we need to reverse the order of the range so that it
1900 // covers the entire 64 bits.
1901 if (I->StartIdx == 32 && I->EndIdx == 63) {
1902 assert(std::next(I) == BitGroups.end() &&(static_cast <bool> (std::next(I) == BitGroups.end() &&
"bit group ends at index 63 but there is another?") ? void (
0) : __assert_fail ("std::next(I) == BitGroups.end() && \"bit group ends at index 63 but there is another?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1903, __extension__
__PRETTY_FUNCTION__))
1903 "bit group ends at index 63 but there is another?")(static_cast <bool> (std::next(I) == BitGroups.end() &&
"bit group ends at index 63 but there is another?") ? void (
0) : __assert_fail ("std::next(I) == BitGroups.end() && \"bit group ends at index 63 but there is another?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 1903, __extension__
__PRETTY_FUNCTION__))
;
1904 auto IN = BitGroups.begin();
1905
1906 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1907 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1908 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1909 IsAllLow32(*I)) {
1910
1911 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)
1912 << " RLAmt = " << I->RLAmt << " [" << I->StartIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)
1913 << ", " << I->EndIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)
1914 << "] with 32-bit replicated groups with ranges ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)
1915 << IP->StartIdx << ", " << IP->EndIdx << "] and ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)
1916 << IN->StartIdx << ", " << IN->EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tcombining bit group for "
<< I->V.getNode() << " RLAmt = " << I->
RLAmt << " [" << I->StartIdx << ", " <<
I->EndIdx << "] with 32-bit replicated groups with ranges ["
<< IP->StartIdx << ", " << IP->EndIdx
<< "] and [" << IN->StartIdx << ", " <<
IN->EndIdx << "]\n"; } } while (false)
;
1917
1918 if (IP == IN) {
1919 // There is only one other group; change it to cover the whole
1920 // range (backward, so that it can still be Repl32 but cover the
1921 // whole 64-bit range).
1922 IP->StartIdx = 31;
1923 IP->EndIdx = 30;
1924 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1925 IP->Repl32Coalesced = true;
1926 I = BitGroups.erase(I);
Value stored to 'I' is never read
1927 } else {
1928 // There are two separate groups, one before this group and one
1929 // after us (at the beginning). We're going to remove this group,
1930 // but also the group at the very beginning.
1931 IP->EndIdx = IN->EndIdx;
1932 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1933 IP->Repl32Coalesced = true;
1934 I = BitGroups.erase(I);
1935 BitGroups.erase(BitGroups.begin());
1936 }
1937
1938 // This must be the last group in the vector (and we might have
1939 // just invalidated the iterator above), so break here.
1940 break;
1941 }
1942 }
1943 }
1944
1945 ++I;
1946 }
1947 }
1948
1949 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1950 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1951 }
1952
1953 uint64_t getZerosMask() {
1954 uint64_t Mask = 0;
1955 for (unsigned i = 0; i < Bits.size(); ++i) {
1956 if (Bits[i].hasValue())
1957 continue;
1958 Mask |= (UINT64_C(1)1UL << i);
1959 }
1960
1961 return ~Mask;
1962 }
1963
1964 // This method extends an input value to 64 bit if input is 32-bit integer.
1965 // While selecting instructions in BitPermutationSelector in 64-bit mode,
1966 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1967 // In such case, we extend it to 64 bit to be consistent with other values.
1968 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1969 if (V.getValueSizeInBits() == 64)
1970 return V;
1971
1972 assert(V.getValueSizeInBits() == 32)(static_cast <bool> (V.getValueSizeInBits() == 32) ? void
(0) : __assert_fail ("V.getValueSizeInBits() == 32", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1972, __extension__ __PRETTY_FUNCTION__))
;
1973 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1974 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1975 MVT::i64), 0);
1976 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1977 MVT::i64, ImDef, V,
1978 SubRegIdx), 0);
1979 return ExtVal;
1980 }
1981
1982 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
1983 if (V.getValueSizeInBits() == 32)
1984 return V;
1985
1986 assert(V.getValueSizeInBits() == 64)(static_cast <bool> (V.getValueSizeInBits() == 64) ? void
(0) : __assert_fail ("V.getValueSizeInBits() == 64", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 1986, __extension__ __PRETTY_FUNCTION__))
;
1987 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1988 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
1989 MVT::i32, V, SubRegIdx), 0);
1990 return SubVal;
1991 }
1992
1993 // Depending on the number of groups for a particular value, it might be
1994 // better to rotate, mask explicitly (using andi/andis), and then or the
1995 // result. Select this part of the result first.
1996 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1997 if (BPermRewriterNoMasking)
1998 return;
1999
2000 for (ValueRotInfo &VRI : ValueRotsVec) {
2001 unsigned Mask = 0;
2002 for (unsigned i = 0; i < Bits.size(); ++i) {
2003 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2004 continue;
2005 if (RLAmt[i] != VRI.RLAmt)
2006 continue;
2007 Mask |= (1u << i);
2008 }
2009
2010 // Compute the masks for andi/andis that would be necessary.
2011 unsigned ANDIMask = (Mask & UINT16_MAX(65535)), ANDISMask = Mask >> 16;
2012 assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask for value bit groups") ? void (0) : __assert_fail
("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask for value bit groups\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2013, __extension__
__PRETTY_FUNCTION__))
2013 "No set bits in mask for value bit groups")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask for value bit groups") ? void (0) : __assert_fail
("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask for value bit groups\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2013, __extension__
__PRETTY_FUNCTION__))
;
2014 bool NeedsRotate = VRI.RLAmt != 0;
2015
2016 // We're trying to minimize the number of instructions. If we have one
2017 // group, using one of andi/andis can break even. If we have three
2018 // groups, we can use both andi and andis and break even (to use both
2019 // andi and andis we also need to or the results together). We need four
2020 // groups if we also need to rotate. To use andi/andis we need to do more
2021 // than break even because rotate-and-mask instructions tend to be easier
2022 // to schedule.
2023
2024 // FIXME: We've biased here against using andi/andis, which is right for
2025 // POWER cores, but not optimal everywhere. For example, on the A2,
2026 // andi/andis have single-cycle latency whereas the rotate-and-mask
2027 // instructions take two cycles, and it would be better to bias toward
2028 // andi/andis in break-even cases.
2029
2030 unsigned NumAndInsts = (unsigned) NeedsRotate +
2031 (unsigned) (ANDIMask != 0) +
2032 (unsigned) (ANDISMask != 0) +
2033 (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2034 (unsigned) (bool) Res;
2035
2036 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< ":" << "\n\t\t\tisel using masking: " <<
NumAndInsts << " using rotates: " << VRI.NumGroups
<< "\n"; } } while (false)
2037 << " RL: " << VRI.RLAmt << ":"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< ":" << "\n\t\t\tisel using masking: " <<
NumAndInsts << " using rotates: " << VRI.NumGroups
<< "\n"; } } while (false)
2038 << "\n\t\t\tisel using masking: " << NumAndInstsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< ":" << "\n\t\t\tisel using masking: " <<
NumAndInsts << " using rotates: " << VRI.NumGroups
<< "\n"; } } while (false)
2039 << " using rotates: " << VRI.NumGroups << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< ":" << "\n\t\t\tisel using masking: " <<
NumAndInsts << " using rotates: " << VRI.NumGroups
<< "\n"; } } while (false)
;
2040
2041 if (NumAndInsts >= VRI.NumGroups)
2042 continue;
2043
2044 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\t\t\tusing masking\n";
} } while (false)
;
2045
2046 if (InstCnt) *InstCnt += NumAndInsts;
2047
2048 SDValue VRot;
2049 if (VRI.RLAmt) {
2050 SDValue Ops[] =
2051 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2052 getI32Imm(0, dl), getI32Imm(31, dl) };
2053 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2054 Ops), 0);
2055 } else {
2056 VRot = TruncateToInt32(VRI.V, dl);
2057 }
2058
2059 SDValue ANDIVal, ANDISVal;
2060 if (ANDIMask != 0)
2061 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2062 VRot, getI32Imm(ANDIMask, dl)),
2063 0);
2064 if (ANDISMask != 0)
2065 ANDISVal =
2066 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2067 getI32Imm(ANDISMask, dl)),
2068 0);
2069
2070 SDValue TotalVal;
2071 if (!ANDIVal)
2072 TotalVal = ANDISVal;
2073 else if (!ANDISVal)
2074 TotalVal = ANDIVal;
2075 else
2076 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2077 ANDIVal, ANDISVal), 0);
2078
2079 if (!Res)
2080 Res = TotalVal;
2081 else
2082 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2083 Res, TotalVal), 0);
2084
2085 // Now, remove all groups with this underlying value and rotation
2086 // factor.
2087 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2088 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2089 });
2090 }
2091 }
2092
2093 // Instruction selection for the 32-bit case.
2094 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2095 SDLoc dl(N);
2096 SDValue Res;
2097
2098 if (InstCnt) *InstCnt = 0;
2099
2100 // Take care of cases that should use andi/andis first.
2101 SelectAndParts32(dl, Res, InstCnt);
2102
2103 // If we've not yet selected a 'starting' instruction, and we have no zeros
2104 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2105 // number of groups), and start with this rotated value.
2106 if ((!NeedMask || LateMask) && !Res) {
2107 ValueRotInfo &VRI = ValueRotsVec[0];
2108 if (VRI.RLAmt) {
2109 if (InstCnt) *InstCnt += 1;
2110 SDValue Ops[] =
2111 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2112 getI32Imm(0, dl), getI32Imm(31, dl) };
2113 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2114 0);
2115 } else {
2116 Res = TruncateToInt32(VRI.V, dl);
2117 }
2118
2119 // Now, remove all groups with this underlying value and rotation factor.
2120 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2121 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2122 });
2123 }
2124
2125 if (InstCnt) *InstCnt += BitGroups.size();
2126
2127 // Insert the other groups (one at a time).
2128 for (auto &BG : BitGroups) {
2129 if (!Res) {
2130 SDValue Ops[] =
2131 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2132 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2133 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2134 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2135 } else {
2136 SDValue Ops[] =
2137 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2138 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2139 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2140 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2141 }
2142 }
2143
2144 if (LateMask) {
2145 unsigned Mask = (unsigned) getZerosMask();
2146
2147 unsigned ANDIMask = (Mask & UINT16_MAX(65535)), ANDISMask = Mask >> 16;
2148 assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in zeros mask?") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in zeros mask?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2149, __extension__
__PRETTY_FUNCTION__))
2149 "No set bits in zeros mask?")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in zeros mask?") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in zeros mask?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2149, __extension__
__PRETTY_FUNCTION__))
;
2150
2151 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2152 (unsigned) (ANDISMask != 0) +
2153 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2154
2155 SDValue ANDIVal, ANDISVal;
2156 if (ANDIMask != 0)
2157 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2158 Res, getI32Imm(ANDIMask, dl)),
2159 0);
2160 if (ANDISMask != 0)
2161 ANDISVal =
2162 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2163 getI32Imm(ANDISMask, dl)),
2164 0);
2165
2166 if (!ANDIVal)
2167 Res = ANDISVal;
2168 else if (!ANDISVal)
2169 Res = ANDIVal;
2170 else
2171 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2172 ANDIVal, ANDISVal), 0);
2173 }
2174
2175 return Res.getNode();
2176 }
2177
2178 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2179 unsigned MaskStart, unsigned MaskEnd,
2180 bool IsIns) {
2181 // In the notation used by the instructions, 'start' and 'end' are reversed
2182 // because bits are counted from high to low order.
2183 unsigned InstMaskStart = 64 - MaskEnd - 1,
2184 InstMaskEnd = 64 - MaskStart - 1;
2185
2186 if (Repl32)
2187 return 1;
2188
2189 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2190 InstMaskEnd == 63 - RLAmt)
2191 return 1;
2192
2193 return 2;
2194 }
2195
2196 // For 64-bit values, not all combinations of rotates and masks are
2197 // available. Produce one if it is available.
2198 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2199 bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2200 unsigned *InstCnt = nullptr) {
2201 // In the notation used by the instructions, 'start' and 'end' are reversed
2202 // because bits are counted from high to low order.
2203 unsigned InstMaskStart = 64 - MaskEnd - 1,
2204 InstMaskEnd = 64 - MaskStart - 1;
2205
2206 if (InstCnt) *InstCnt += 1;
2207
2208 if (Repl32) {
2209 // This rotation amount assumes that the lower 32 bits of the quantity
2210 // are replicated in the high 32 bits by the rotation operator (which is
2211 // done by rlwinm and friends).
2212 assert(InstMaskStart >= 32 && "Mask cannot start out of range")(static_cast <bool> (InstMaskStart >= 32 && "Mask cannot start out of range"
) ? void (0) : __assert_fail ("InstMaskStart >= 32 && \"Mask cannot start out of range\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2212, __extension__
__PRETTY_FUNCTION__))
;
2213 assert(InstMaskEnd >= 32 && "Mask cannot end out of range")(static_cast <bool> (InstMaskEnd >= 32 && "Mask cannot end out of range"
) ? void (0) : __assert_fail ("InstMaskEnd >= 32 && \"Mask cannot end out of range\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2213, __extension__
__PRETTY_FUNCTION__))
;
2214 SDValue Ops[] =
2215 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2216 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2217 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2218 Ops), 0);
2219 }
2220
2221 if (InstMaskEnd == 63) {
2222 SDValue Ops[] =
2223 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2224 getI32Imm(InstMaskStart, dl) };
2225 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2226 }
2227
2228 if (InstMaskStart == 0) {
2229 SDValue Ops[] =
2230 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2231 getI32Imm(InstMaskEnd, dl) };
2232 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2233 }
2234
2235 if (InstMaskEnd == 63 - RLAmt) {
2236 SDValue Ops[] =
2237 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2238 getI32Imm(InstMaskStart, dl) };
2239 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2240 }
2241
2242 // We cannot do this with a single instruction, so we'll use two. The
2243 // problem is that we're not free to choose both a rotation amount and mask
2244 // start and end independently. We can choose an arbitrary mask start and
2245 // end, but then the rotation amount is fixed. Rotation, however, can be
2246 // inverted, and so by applying an "inverse" rotation first, we can get the
2247 // desired result.
2248 if (InstCnt) *InstCnt += 1;
2249
2250 // The rotation mask for the second instruction must be MaskStart.
2251 unsigned RLAmt2 = MaskStart;
2252 // The first instruction must rotate V so that the overall rotation amount
2253 // is RLAmt.
2254 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2255 if (RLAmt1)
2256 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2257 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2258 }
2259
2260 // For 64-bit values, not all combinations of rotates and masks are
2261 // available. Produce a rotate-mask-and-insert if one is available.
2262 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2263 unsigned RLAmt, bool Repl32, unsigned MaskStart,
2264 unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2265 // In the notation used by the instructions, 'start' and 'end' are reversed
2266 // because bits are counted from high to low order.
2267 unsigned InstMaskStart = 64 - MaskEnd - 1,
2268 InstMaskEnd = 64 - MaskStart - 1;
2269
2270 if (InstCnt) *InstCnt += 1;
2271
2272 if (Repl32) {
2273 // This rotation amount assumes that the lower 32 bits of the quantity
2274 // are replicated in the high 32 bits by the rotation operator (which is
2275 // done by rlwinm and friends).
2276 assert(InstMaskStart >= 32 && "Mask cannot start out of range")(static_cast <bool> (InstMaskStart >= 32 && "Mask cannot start out of range"
) ? void (0) : __assert_fail ("InstMaskStart >= 32 && \"Mask cannot start out of range\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2276, __extension__
__PRETTY_FUNCTION__))
;
2277 assert(InstMaskEnd >= 32 && "Mask cannot end out of range")(static_cast <bool> (InstMaskEnd >= 32 && "Mask cannot end out of range"
) ? void (0) : __assert_fail ("InstMaskEnd >= 32 && \"Mask cannot end out of range\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2277, __extension__
__PRETTY_FUNCTION__))
;
2278 SDValue Ops[] =
2279 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2280 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2281 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2282 Ops), 0);
2283 }
2284
2285 if (InstMaskEnd == 63 - RLAmt) {
2286 SDValue Ops[] =
2287 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2288 getI32Imm(InstMaskStart, dl) };
2289 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2290 }
2291
2292 // We cannot do this with a single instruction, so we'll use two. The
2293 // problem is that we're not free to choose both a rotation amount and mask
2294 // start and end independently. We can choose an arbitrary mask start and
2295 // end, but then the rotation amount is fixed. Rotation, however, can be
2296 // inverted, and so by applying an "inverse" rotation first, we can get the
2297 // desired result.
2298 if (InstCnt) *InstCnt += 1;
2299
2300 // The rotation mask for the second instruction must be MaskStart.
2301 unsigned RLAmt2 = MaskStart;
2302 // The first instruction must rotate V so that the overall rotation amount
2303 // is RLAmt.
2304 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2305 if (RLAmt1)
2306 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2307 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2308 }
2309
2310 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2311 if (BPermRewriterNoMasking)
2312 return;
2313
2314 // The idea here is the same as in the 32-bit version, but with additional
2315 // complications from the fact that Repl32 might be true. Because we
2316 // aggressively convert bit groups to Repl32 form (which, for small
2317 // rotation factors, involves no other change), and then coalesce, it might
2318 // be the case that a single 64-bit masking operation could handle both
2319 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2320 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2321 // completely capture the new combined bit group.
2322
2323 for (ValueRotInfo &VRI : ValueRotsVec) {
2324 uint64_t Mask = 0;
2325
2326 // We need to add to the mask all bits from the associated bit groups.
2327 // If Repl32 is false, we need to add bits from bit groups that have
2328 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2329 // group is trivially convertable if it overlaps only with the lower 32
2330 // bits, and the group has not been coalesced.
2331 auto MatchingBG = [VRI](const BitGroup &BG) {
2332 if (VRI.V != BG.V)
2333 return false;
2334
2335 unsigned EffRLAmt = BG.RLAmt;
2336 if (!VRI.Repl32 && BG.Repl32) {
2337 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2338 !BG.Repl32Coalesced) {
2339 if (BG.Repl32CR)
2340 EffRLAmt += 32;
2341 } else {
2342 return false;
2343 }
2344 } else if (VRI.Repl32 != BG.Repl32) {
2345 return false;
2346 }
2347
2348 return VRI.RLAmt == EffRLAmt;
2349 };
2350
2351 for (auto &BG : BitGroups) {
2352 if (!MatchingBG(BG))
2353 continue;
2354
2355 if (BG.StartIdx <= BG.EndIdx) {
2356 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2357 Mask |= (UINT64_C(1)1UL << i);
2358 } else {
2359 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2360 Mask |= (UINT64_C(1)1UL << i);
2361 for (unsigned i = 0; i <= BG.EndIdx; ++i)
2362 Mask |= (UINT64_C(1)1UL << i);
2363 }
2364 }
2365
2366 // We can use the 32-bit andi/andis technique if the mask does not
2367 // require any higher-order bits. This can save an instruction compared
2368 // to always using the general 64-bit technique.
2369 bool Use32BitInsts = isUInt<32>(Mask);
2370 // Compute the masks for andi/andis that would be necessary.
2371 unsigned ANDIMask = (Mask & UINT16_MAX(65535)),
2372 ANDISMask = (Mask >> 16) & UINT16_MAX(65535);
2373
2374 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2375
2376 unsigned NumAndInsts = (unsigned) NeedsRotate +
2377 (unsigned) (bool) Res;
2378 unsigned NumOfSelectInsts = 0;
2379 selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2380 assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.")(static_cast <bool> (NumOfSelectInsts > 0 &&
"Failed to select an i64 constant.") ? void (0) : __assert_fail
("NumOfSelectInsts > 0 && \"Failed to select an i64 constant.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2380, __extension__
__PRETTY_FUNCTION__))
;
2381 if (Use32BitInsts)
2382 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2383 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2384 else
2385 NumAndInsts += NumOfSelectInsts + /* and */ 1;
2386
2387 unsigned NumRLInsts = 0;
2388 bool FirstBG = true;
2389 bool MoreBG = false;
2390 for (auto &BG : BitGroups) {
2391 if (!MatchingBG(BG)) {
2392 MoreBG = true;
2393 continue;
2394 }
2395 NumRLInsts +=
2396 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2397 !FirstBG);
2398 FirstBG = false;
2399 }
2400
2401 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false)
2402 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false)
2403 << "\n\t\t\tisel using masking: " << NumAndInstsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false)
2404 << " using rotates: " << NumRLInsts << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\trotation groups for "
<< VRI.V.getNode() << " RL: " << VRI.RLAmt
<< (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: "
<< NumAndInsts << " using rotates: " << NumRLInsts
<< "\n"; } } while (false)
;
2405
2406 // When we'd use andi/andis, we bias toward using the rotates (andi only
2407 // has a record form, and is cracked on POWER cores). However, when using
2408 // general 64-bit constant formation, bias toward the constant form,
2409 // because that exposes more opportunities for CSE.
2410 if (NumAndInsts > NumRLInsts)
2411 continue;
2412 // When merging multiple bit groups, instruction or is used.
2413 // But when rotate is used, rldimi can inert the rotated value into any
2414 // register, so instruction or can be avoided.
2415 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2416 continue;
2417
2418 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\t\t\tusing masking\n";
} } while (false)
;
2419
2420 if (InstCnt) *InstCnt += NumAndInsts;
2421
2422 SDValue VRot;
2423 // We actually need to generate a rotation if we have a non-zero rotation
2424 // factor or, in the Repl32 case, if we care about any of the
2425 // higher-order replicated bits. In the latter case, we generate a mask
2426 // backward so that it actually includes the entire 64 bits.
2427 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2428 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2429 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2430 else
2431 VRot = VRI.V;
2432
2433 SDValue TotalVal;
2434 if (Use32BitInsts) {
2435 assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2436, __extension__
__PRETTY_FUNCTION__))
2436 "No set bits in mask when using 32-bit ands for 64-bit value")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2436, __extension__
__PRETTY_FUNCTION__))
;
2437
2438 SDValue ANDIVal, ANDISVal;
2439 if (ANDIMask != 0)
2440 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2441 ExtendToInt64(VRot, dl),
2442 getI32Imm(ANDIMask, dl)),
2443 0);
2444 if (ANDISMask != 0)
2445 ANDISVal =
2446 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2447 ExtendToInt64(VRot, dl),
2448 getI32Imm(ANDISMask, dl)),
2449 0);
2450
2451 if (!ANDIVal)
2452 TotalVal = ANDISVal;
2453 else if (!ANDISVal)
2454 TotalVal = ANDIVal;
2455 else
2456 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2457 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2458 } else {
2459 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2460 TotalVal =
2461 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2462 ExtendToInt64(VRot, dl), TotalVal),
2463 0);
2464 }
2465
2466 if (!Res)
2467 Res = TotalVal;
2468 else
2469 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2470 ExtendToInt64(Res, dl), TotalVal),
2471 0);
2472
2473 // Now, remove all groups with this underlying value and rotation
2474 // factor.
2475 eraseMatchingBitGroups(MatchingBG);
2476 }
2477 }
2478
2479 // Instruction selection for the 64-bit case.
2480 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2481 SDLoc dl(N);
2482 SDValue Res;
2483
2484 if (InstCnt) *InstCnt = 0;
2485
2486 // Take care of cases that should use andi/andis first.
2487 SelectAndParts64(dl, Res, InstCnt);
2488
2489 // If we've not yet selected a 'starting' instruction, and we have no zeros
2490 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2491 // number of groups), and start with this rotated value.
2492 if ((!NeedMask || LateMask) && !Res) {
2493 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2494 // groups will come first, and so the VRI representing the largest number
2495 // of groups might not be first (it might be the first Repl32 groups).
2496 unsigned MaxGroupsIdx = 0;
2497 if (!ValueRotsVec[0].Repl32) {
2498 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2499 if (ValueRotsVec[i].Repl32) {
2500 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2501 MaxGroupsIdx = i;
2502 break;
2503 }
2504 }
2505
2506 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2507 bool NeedsRotate = false;
2508 if (VRI.RLAmt) {
2509 NeedsRotate = true;
2510 } else if (VRI.Repl32) {
2511 for (auto &BG : BitGroups) {
2512 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2513 BG.Repl32 != VRI.Repl32)
2514 continue;
2515
2516 // We don't need a rotate if the bit group is confined to the lower
2517 // 32 bits.
2518 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2519 continue;
2520
2521 NeedsRotate = true;
2522 break;
2523 }
2524 }
2525
2526 if (NeedsRotate)
2527 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2528 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2529 InstCnt);
2530 else
2531 Res = VRI.V;
2532
2533 // Now, remove all groups with this underlying value and rotation factor.
2534 if (Res)
2535 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2536 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2537 BG.Repl32 == VRI.Repl32;
2538 });
2539 }
2540
2541 // Because 64-bit rotates are more flexible than inserts, we might have a
2542 // preference regarding which one we do first (to save one instruction).
2543 if (!Res)
2544 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2545 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2546 false) <
2547 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2548 true)) {
2549 if (I != BitGroups.begin()) {
2550 BitGroup BG = *I;
2551 BitGroups.erase(I);
2552 BitGroups.insert(BitGroups.begin(), BG);
2553 }
2554
2555 break;
2556 }
2557 }
2558
2559 // Insert the other groups (one at a time).
2560 for (auto &BG : BitGroups) {
2561 if (!Res)
2562 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2563 BG.EndIdx, InstCnt);
2564 else
2565 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2566 BG.StartIdx, BG.EndIdx, InstCnt);
2567 }
2568
2569 if (LateMask) {
2570 uint64_t Mask = getZerosMask();
2571
2572 // We can use the 32-bit andi/andis technique if the mask does not
2573 // require any higher-order bits. This can save an instruction compared
2574 // to always using the general 64-bit technique.
2575 bool Use32BitInsts = isUInt<32>(Mask);
2576 // Compute the masks for andi/andis that would be necessary.
2577 unsigned ANDIMask = (Mask & UINT16_MAX(65535)),
2578 ANDISMask = (Mask >> 16) & UINT16_MAX(65535);
2579
2580 if (Use32BitInsts) {
2581 assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2582, __extension__
__PRETTY_FUNCTION__))
2582 "No set bits in mask when using 32-bit ands for 64-bit value")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) &&
"No set bits in mask when using 32-bit ands for 64-bit value"
) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2582, __extension__
__PRETTY_FUNCTION__))
;
2583
2584 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2585 (unsigned) (ANDISMask != 0) +
2586 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2587
2588 SDValue ANDIVal, ANDISVal;
2589 if (ANDIMask != 0)
2590 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2591 ExtendToInt64(Res, dl),
2592 getI32Imm(ANDIMask, dl)),
2593 0);
2594 if (ANDISMask != 0)
2595 ANDISVal =
2596 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2597 ExtendToInt64(Res, dl),
2598 getI32Imm(ANDISMask, dl)),
2599 0);
2600
2601 if (!ANDIVal)
2602 Res = ANDISVal;
2603 else if (!ANDISVal)
2604 Res = ANDIVal;
2605 else
2606 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2607 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2608 } else {
2609 unsigned NumOfSelectInsts = 0;
2610 SDValue MaskVal =
2611 SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2612 Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2613 ExtendToInt64(Res, dl), MaskVal),
2614 0);
2615 if (InstCnt)
2616 *InstCnt += NumOfSelectInsts + /* and */ 1;
2617 }
2618 }
2619
2620 return Res.getNode();
2621 }
2622
2623 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2624 // Fill in BitGroups.
2625 collectBitGroups(LateMask);
2626 if (BitGroups.empty())
2627 return nullptr;
2628
2629 // For 64-bit values, figure out when we can use 32-bit instructions.
2630 if (Bits.size() == 64)
2631 assignRepl32BitGroups();
2632
2633 // Fill in ValueRotsVec.
2634 collectValueRotInfo();
2635
2636 if (Bits.size() == 32) {
2637 return Select32(N, LateMask, InstCnt);
2638 } else {
2639 assert(Bits.size() == 64 && "Not 64 bits here?")(static_cast <bool> (Bits.size() == 64 && "Not 64 bits here?"
) ? void (0) : __assert_fail ("Bits.size() == 64 && \"Not 64 bits here?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2639, __extension__
__PRETTY_FUNCTION__))
;
2640 return Select64(N, LateMask, InstCnt);
2641 }
2642
2643 return nullptr;
2644 }
2645
2646 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2647 erase_if(BitGroups, F);
2648 }
2649
2650 SmallVector<ValueBit, 64> Bits;
2651
2652 bool NeedMask = false;
2653 SmallVector<unsigned, 64> RLAmt;
2654
2655 SmallVector<BitGroup, 16> BitGroups;
2656
2657 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2658 SmallVector<ValueRotInfo, 16> ValueRotsVec;
2659
2660 SelectionDAG *CurDAG = nullptr;
2661
2662public:
2663 BitPermutationSelector(SelectionDAG *DAG)
2664 : CurDAG(DAG) {}
2665
2666 // Here we try to match complex bit permutations into a set of
2667 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2668 // known to produce optimal code for common cases (like i32 byte swapping).
2669 SDNode *Select(SDNode *N) {
2670 Memoizer.clear();
2671 auto Result =
2672 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2673 if (!Result.first)
2674 return nullptr;
2675 Bits = std::move(*Result.second);
2676
2677 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Considering bit-permutation-based instruction"
" selection for: "; } } while (false)
2678 " selection for: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Considering bit-permutation-based instruction"
" selection for: "; } } while (false)
;
2679 LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { N->dump(CurDAG); } } while (false)
;
2680
2681 // Fill it RLAmt and set NeedMask.
2682 computeRotationAmounts();
2683
2684 if (!NeedMask)
2685 return Select(N, false);
2686
2687 // We currently have two techniques for handling results with zeros: early
2688 // masking (the default) and late masking. Late masking is sometimes more
2689 // efficient, but because the structure of the bit groups is different, it
2690 // is hard to tell without generating both and comparing the results. With
2691 // late masking, we ignore zeros in the resulting value when inserting each
2692 // set of bit groups, and then mask in the zeros at the end. With early
2693 // masking, we only insert the non-zero parts of the result at every step.
2694
2695 unsigned InstCnt = 0, InstCntLateMask = 0;
2696 LLVM_DEBUG(dbgs() << "\tEarly masking:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tEarly masking:\n"; } } while
(false)
;
2697 SDNode *RN = Select(N, false, &InstCnt);
2698 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\tisel would use " <<
InstCnt << " instructions\n"; } } while (false)
;
2699
2700 LLVM_DEBUG(dbgs() << "\tLate masking:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tLate masking:\n"; } } while
(false)
;
2701 SDNode *RNLM = Select(N, true, &InstCntLateMask);
2702 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMaskdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\tisel would use " <<
InstCntLateMask << " instructions\n"; } } while (false
)
2703 << " instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\t\tisel would use " <<
InstCntLateMask << " instructions\n"; } } while (false
)
;
2704
2705 if (InstCnt <= InstCntLateMask) {
2706 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tUsing early-masking for isel\n"
; } } while (false)
;
2707 return RN;
2708 }
2709
2710 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "\tUsing late-masking for isel\n"
; } } while (false)
;
2711 return RNLM;
2712 }
2713};
2714
2715class IntegerCompareEliminator {
2716 SelectionDAG *CurDAG;
2717 PPCDAGToDAGISel *S;
2718 // Conversion type for interpreting results of a 32-bit instruction as
2719 // a 64-bit value or vice versa.
2720 enum ExtOrTruncConversion { Ext, Trunc };
2721
2722 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2723 // in a GPR.
2724 // ZExtOrig - use the original condition code, zero-extend value
2725 // ZExtInvert - invert the condition code, zero-extend value
2726 // SExtOrig - use the original condition code, sign-extend value
2727 // SExtInvert - invert the condition code, sign-extend value
2728 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2729
2730 // Comparisons against zero to emit GPR code sequences for. Each of these
2731 // sequences may need to be emitted for two or more equivalent patterns.
2732 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2733 // matters as well as the extension type: sext (-1/0), zext (1/0).
2734 // GEZExt - (zext (LHS >= 0))
2735 // GESExt - (sext (LHS >= 0))
2736 // LEZExt - (zext (LHS <= 0))
2737 // LESExt - (sext (LHS <= 0))
2738 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2739
2740 SDNode *tryEXTEND(SDNode *N);
2741 SDNode *tryLogicOpOfCompares(SDNode *N);
2742 SDValue computeLogicOpInGPR(SDValue LogicOp);
2743 SDValue signExtendInputIfNeeded(SDValue Input);
2744 SDValue zeroExtendInputIfNeeded(SDValue Input);
2745 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2746 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2747 ZeroCompare CmpTy);
2748 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2749 int64_t RHSValue, SDLoc dl);
2750 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2751 int64_t RHSValue, SDLoc dl);
2752 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2753 int64_t RHSValue, SDLoc dl);
2754 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2755 int64_t RHSValue, SDLoc dl);
2756 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2757
2758public:
2759 IntegerCompareEliminator(SelectionDAG *DAG,
2760 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2761 assert(CurDAG->getTargetLoweringInfo()(static_cast <bool> (CurDAG->getTargetLoweringInfo()
.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() ==
64 && "Only expecting to use this on 64 bit targets."
) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2763, __extension__
__PRETTY_FUNCTION__))
2762 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&(static_cast <bool> (CurDAG->getTargetLoweringInfo()
.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() ==
64 && "Only expecting to use this on 64 bit targets."
) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2763, __extension__
__PRETTY_FUNCTION__))
2763 "Only expecting to use this on 64 bit targets.")(static_cast <bool> (CurDAG->getTargetLoweringInfo()
.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() ==
64 && "Only expecting to use this on 64 bit targets."
) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2763, __extension__
__PRETTY_FUNCTION__))
;
2764 }
2765 SDNode *Select(SDNode *N) {
2766 if (CmpInGPR == ICGPR_None)
2767 return nullptr;
2768 switch (N->getOpcode()) {
2769 default: break;
2770 case ISD::ZERO_EXTEND:
2771 if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2772 CmpInGPR == ICGPR_SextI64)
2773 return nullptr;
2774 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2775 case ISD::SIGN_EXTEND:
2776 if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2777 CmpInGPR == ICGPR_ZextI64)
2778 return nullptr;
2779 return tryEXTEND(N);
2780 case ISD::AND:
2781 case ISD::OR:
2782 case ISD::XOR:
2783 return tryLogicOpOfCompares(N);
2784 }
2785 return nullptr;
2786 }
2787};
2788
2789static bool isLogicOp(unsigned Opc) {
2790 return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
2791}
2792// The obvious case for wanting to keep the value in a GPR. Namely, the
2793// result of the comparison is actually needed in a GPR.
2794SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2795 assert((N->getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND
|| N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2797, __extension__
__PRETTY_FUNCTION__))
2796 N->getOpcode() == ISD::SIGN_EXTEND) &&(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND
|| N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2797, __extension__
__PRETTY_FUNCTION__))
2797 "Expecting a zero/sign extend node!")(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND
|| N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2797, __extension__
__PRETTY_FUNCTION__))
;
2798 SDValue WideRes;
2799 // If we are zero-extending the result of a logical operation on i1
2800 // values, we can keep the values in GPRs.
2801 if (isLogicOp(N->getOperand(0).getOpcode()) &&
2802 N->getOperand(0).getValueType() == MVT::i1 &&
2803 N->getOpcode() == ISD::ZERO_EXTEND)
2804 WideRes = computeLogicOpInGPR(N->getOperand(0));
2805 else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2806 return nullptr;
2807 else
2808 WideRes =
2809 getSETCCInGPR(N->getOperand(0),
2810 N->getOpcode() == ISD::SIGN_EXTEND ?
2811 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2812
2813 if (!WideRes)
2814 return nullptr;
2815
2816 SDLoc dl(N);
2817 bool Input32Bit = WideRes.getValueType() == MVT::i32;
2818 bool Output32Bit = N->getValueType(0) == MVT::i32;
2819
2820 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2821 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2822
2823 SDValue ConvOp = WideRes;
2824 if (Input32Bit != Output32Bit)
2825 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2826 ExtOrTruncConversion::Trunc);
2827 return ConvOp.getNode();
2828}
2829
2830// Attempt to perform logical operations on the results of comparisons while
2831// keeping the values in GPRs. Without doing so, these would end up being
2832// lowered to CR-logical operations which suffer from significant latency and
2833// low ILP.
2834SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2835 if (N->getValueType(0) != MVT::i1)
2836 return nullptr;
2837 assert(isLogicOp(N->getOpcode()) &&(static_cast <bool> (isLogicOp(N->getOpcode()) &&
"Expected a logic operation on setcc results.") ? void (0) :
__assert_fail ("isLogicOp(N->getOpcode()) && \"Expected a logic operation on setcc results.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2838, __extension__
__PRETTY_FUNCTION__))
2838 "Expected a logic operation on setcc results.")(static_cast <bool> (isLogicOp(N->getOpcode()) &&
"Expected a logic operation on setcc results.") ? void (0) :
__assert_fail ("isLogicOp(N->getOpcode()) && \"Expected a logic operation on setcc results.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2838, __extension__
__PRETTY_FUNCTION__))
;
2839 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2840 if (!LoweredLogical)
2841 return nullptr;
2842
2843 SDLoc dl(N);
2844 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2845 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2846 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2847 SDValue LHS = LoweredLogical.getOperand(0);
2848 SDValue RHS = LoweredLogical.getOperand(1);
2849 SDValue WideOp;
2850 SDValue OpToConvToRecForm;
2851
2852 // Look through any 32-bit to 64-bit implicit extend nodes to find the
2853 // opcode that is input to the XORI.
2854 if (IsBitwiseNegate &&
2855 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2856 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2857 else if (IsBitwiseNegate)
2858 // If the input to the XORI isn't an extension, that's what we're after.
2859 OpToConvToRecForm = LoweredLogical.getOperand(0);
2860 else
2861 // If this is not an XORI, it is a reg-reg logical op and we can convert
2862 // it to record-form.
2863 OpToConvToRecForm = LoweredLogical;
2864
2865 // Get the record-form version of the node we're looking to use to get the
2866 // CR result from.
2867 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2868 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2869
2870 // Convert the right node to record-form. This is either the logical we're
2871 // looking at or it is the input node to the negation (if we're looking at
2872 // a bitwise negation).
2873 if (NewOpc != -1 && IsBitwiseNegate) {
2874 // The input to the XORI has a record-form. Use it.
2875 assert(LoweredLogical.getConstantOperandVal(1) == 1 &&(static_cast <bool> (LoweredLogical.getConstantOperandVal
(1) == 1 && "Expected a PPC::XORI8 only for bitwise negation."
) ? void (0) : __assert_fail ("LoweredLogical.getConstantOperandVal(1) == 1 && \"Expected a PPC::XORI8 only for bitwise negation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2876, __extension__
__PRETTY_FUNCTION__))
2876 "Expected a PPC::XORI8 only for bitwise negation.")(static_cast <bool> (LoweredLogical.getConstantOperandVal
(1) == 1 && "Expected a PPC::XORI8 only for bitwise negation."
) ? void (0) : __assert_fail ("LoweredLogical.getConstantOperandVal(1) == 1 && \"Expected a PPC::XORI8 only for bitwise negation.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2876, __extension__
__PRETTY_FUNCTION__))
;
2877 // Emit the record-form instruction.
2878 std::vector<SDValue> Ops;
2879 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2880 Ops.push_back(OpToConvToRecForm.getOperand(i));
2881
2882 WideOp =
2883 SDValue(CurDAG->getMachineNode(NewOpc, dl,
2884 OpToConvToRecForm.getValueType(),
2885 MVT::Glue, Ops), 0);
2886 } else {
2887 assert((NewOpc != -1 || !IsBitwiseNegate) &&(static_cast <bool> ((NewOpc != -1 || !IsBitwiseNegate)
&& "No record form available for AND8/OR8/XOR8?") ? void
(0) : __assert_fail ("(NewOpc != -1 || !IsBitwiseNegate) && \"No record form available for AND8/OR8/XOR8?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2888, __extension__
__PRETTY_FUNCTION__))
2888 "No record form available for AND8/OR8/XOR8?")(static_cast <bool> ((NewOpc != -1 || !IsBitwiseNegate)
&& "No record form available for AND8/OR8/XOR8?") ? void
(0) : __assert_fail ("(NewOpc != -1 || !IsBitwiseNegate) && \"No record form available for AND8/OR8/XOR8?\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2888, __extension__
__PRETTY_FUNCTION__))
;
2889 WideOp =
2890 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
2891 dl, MVT::i64, MVT::Glue, LHS, RHS),
2892 0);
2893 }
2894
2895 // Select this node to a single bit from CR0 set by the record-form node
2896 // just created. For bitwise negation, use the EQ bit which is the equivalent
2897 // of negating the result (i.e. it is a bit set when the result of the
2898 // operation is zero).
2899 SDValue SRIdxVal =
2900 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2901 SDValue CRBit =
2902 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2903 MVT::i1, CR0Reg, SRIdxVal,
2904 WideOp.getValue(1)), 0);
2905 return CRBit.getNode();
2906}
2907
2908// Lower a logical operation on i1 values into a GPR sequence if possible.
2909// The result can be kept in a GPR if requested.
2910// Three types of inputs can be handled:
2911// - SETCC
2912// - TRUNCATE
2913// - Logical operation (AND/OR/XOR)
2914// There is also a special case that is handled (namely a complement operation
2915// achieved with xor %a, -1).
2916SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2917 assert(isLogicOp(LogicOp.getOpcode()) &&(static_cast <bool> (isLogicOp(LogicOp.getOpcode()) &&
"Can only handle logic operations here.") ? void (0) : __assert_fail
("isLogicOp(LogicOp.getOpcode()) && \"Can only handle logic operations here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2918, __extension__
__PRETTY_FUNCTION__))
2918 "Can only handle logic operations here.")(static_cast <bool> (isLogicOp(LogicOp.getOpcode()) &&
"Can only handle logic operations here.") ? void (0) : __assert_fail
("isLogicOp(LogicOp.getOpcode()) && \"Can only handle logic operations here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2918, __extension__
__PRETTY_FUNCTION__))
;
2919 assert(LogicOp.getValueType() == MVT::i1 &&(static_cast <bool> (LogicOp.getValueType() == MVT::i1 &&
"Can only handle logic operations on i1 values here.") ? void
(0) : __assert_fail ("LogicOp.getValueType() == MVT::i1 && \"Can only handle logic operations on i1 values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2920, __extension__
__PRETTY_FUNCTION__))
2920 "Can only handle logic operations on i1 values here.")(static_cast <bool> (LogicOp.getValueType() == MVT::i1 &&
"Can only handle logic operations on i1 values here.") ? void
(0) : __assert_fail ("LogicOp.getValueType() == MVT::i1 && \"Can only handle logic operations on i1 values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2920, __extension__
__PRETTY_FUNCTION__))
;
2921 SDLoc dl(LogicOp);
2922 SDValue LHS, RHS;
2923
2924 // Special case: xor %a, -1
2925 bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2926
2927 // Produces a GPR sequence for each operand of the binary logic operation.
2928 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2929 // the value in a GPR and for logic operations, it will recursively produce
2930 // a GPR sequence for the operation.
2931 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2932 unsigned OperandOpcode = Operand.getOpcode();
2933 if (OperandOpcode == ISD::SETCC)
2934 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2935 else if (OperandOpcode == ISD::TRUNCATE) {
2936 SDValue InputOp = Operand.getOperand(0);
2937 EVT InVT = InputOp.getValueType();
2938 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
2939 PPC::RLDICL, dl, InVT, InputOp,
2940 S->getI64Imm(0, dl),
2941 S->getI64Imm(63, dl)), 0);
2942 } else if (isLogicOp(OperandOpcode))
2943 return computeLogicOpInGPR(Operand);
2944 return SDValue();
2945 };
2946 LHS = getLogicOperand(LogicOp.getOperand(0));
2947 RHS = getLogicOperand(LogicOp.getOperand(1));
2948
2949 // If a GPR sequence can't be produced for the LHS we can't proceed.
2950 // Not producing a GPR sequence for the RHS is only a problem if this isn't
2951 // a bitwise negation operation.
2952 if (!LHS || (!RHS && !IsBitwiseNegation))
2953 return SDValue();
2954
2955 NumLogicOpsOnComparison++;
2956
2957 // We will use the inputs as 64-bit values.
2958 if (LHS.getValueType() == MVT::i32)
2959 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2960 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2961 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2962
2963 unsigned NewOpc;
2964 switch (LogicOp.getOpcode()) {
2965 default: llvm_unreachable("Unknown logic operation.")::llvm::llvm_unreachable_internal("Unknown logic operation.",
"llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2965)
;
2966 case ISD::AND: NewOpc = PPC::AND8; break;
2967 case ISD::OR: NewOpc = PPC::OR8; break;
2968 case ISD::XOR: NewOpc = PPC::XOR8; break;
2969 }
2970
2971 if (IsBitwiseNegation) {
2972 RHS = S->getI64Imm(1, dl);
2973 NewOpc = PPC::XORI8;
2974 }
2975
2976 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2977
2978}
2979
2980/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2981/// Otherwise just reinterpret it as a 64-bit value.
2982/// Useful when emitting comparison code for 32-bit values without using
2983/// the compare instruction (which only considers the lower 32-bits).
2984SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2985 assert(Input.getValueType() == MVT::i32 &&(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only sign-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only sign-extend 32-bit values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2986, __extension__
__PRETTY_FUNCTION__))
2986 "Can only sign-extend 32-bit values here.")(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only sign-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only sign-extend 32-bit values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 2986, __extension__
__PRETTY_FUNCTION__))
;
2987 unsigned Opc = Input.getOpcode();
2988
2989 // The value was sign extended and then truncated to 32-bits. No need to
2990 // sign extend it again.
2991 if (Opc == ISD::TRUNCATE &&
2992 (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
2993 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
2994 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2995
2996 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2997 // The input is a sign-extending load. All ppc sign-extending loads
2998 // sign-extend to the full 64-bits.
2999 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3000 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3001
3002 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3003 // We don't sign-extend constants.
3004 if (InputConst)
3005 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3006
3007 SDLoc dl(Input);
3008 SignExtensionsAdded++;
3009 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3010 MVT::i64, Input), 0);
3011}
3012
3013/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3014/// Otherwise just reinterpret it as a 64-bit value.
3015/// Useful when emitting comparison code for 32-bit values without using
3016/// the compare instruction (which only considers the lower 32-bits).
3017SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3018 assert(Input.getValueType() == MVT::i32 &&(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only zero-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only zero-extend 32-bit values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3019, __extension__
__PRETTY_FUNCTION__))
3019 "Can only zero-extend 32-bit values here.")(static_cast <bool> (Input.getValueType() == MVT::i32 &&
"Can only zero-extend 32-bit values here.") ? void (0) : __assert_fail
("Input.getValueType() == MVT::i32 && \"Can only zero-extend 32-bit values here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3019, __extension__
__PRETTY_FUNCTION__))
;
3020 unsigned Opc = Input.getOpcode();
3021
3022 // The only condition under which we can omit the actual extend instruction:
3023 // - The value is a positive constant
3024 // - The value comes from a load that isn't a sign-extending load
3025 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3026 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3027 (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3028 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3029 if (IsTruncateOfZExt)
3030 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3031
3032 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3033 if (InputConst && InputConst->getSExtValue() >= 0)
3034 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3035
3036 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3037 // The input is a load that doesn't sign-extend (it will be zero-extended).
3038 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3039 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3040
3041 // None of the above, need to zero-extend.
3042 SDLoc dl(Input);
3043 ZeroExtensionsAdded++;
3044 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3045 S->getI64Imm(0, dl),
3046 S->getI64Imm(32, dl)), 0);
3047}
3048
3049// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3050// course not actual zero/sign extensions that will generate machine code,
3051// they're just a way to reinterpret a 32 bit value in a register as a
3052// 64 bit value and vice-versa.
3053SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3054 ExtOrTruncConversion Conv) {
3055 SDLoc dl(NatWidthRes);
3056
3057 // For reinterpreting 32-bit values as 64 bit values, we generate
3058 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3059 if (Conv == ExtOrTruncConversion::Ext) {
3060 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3061 SDValue SubRegIdx =
3062 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3063 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3064 ImDef, NatWidthRes, SubRegIdx), 0);
3065 }
3066
3067 assert(Conv == ExtOrTruncConversion::Trunc &&(static_cast <bool> (Conv == ExtOrTruncConversion::Trunc
&& "Unknown convertion between 32 and 64 bit values."
) ? void (0) : __assert_fail ("Conv == ExtOrTruncConversion::Trunc && \"Unknown convertion between 32 and 64 bit values.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3068, __extension__
__PRETTY_FUNCTION__))
3068 "Unknown convertion between 32 and 64 bit values.")(static_cast <bool> (Conv == ExtOrTruncConversion::Trunc
&& "Unknown convertion between 32 and 64 bit values."
) ? void (0) : __assert_fail ("Conv == ExtOrTruncConversion::Trunc && \"Unknown convertion between 32 and 64 bit values.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3068, __extension__
__PRETTY_FUNCTION__))
;
3069 // For reinterpreting 64-bit values as 32-bit values, we just need to
3070 // EXTRACT_SUBREG (i.e. extract the low word).
3071 SDValue SubRegIdx =
3072 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3073 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3074 NatWidthRes, SubRegIdx), 0);
3075}
3076
3077// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3078// Handle both zero-extensions and sign-extensions.
3079SDValue
3080IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3081 ZeroCompare CmpTy) {
3082 EVT InVT = LHS.getValueType();
3083 bool Is32Bit = InVT == MVT::i32;
3084 SDValue ToExtend;
3085
3086 // Produce the value that needs to be either zero or sign extended.
3087 switch (CmpTy) {
3088 case ZeroCompare::GEZExt:
3089 case ZeroCompare::GESExt:
3090 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3091 dl, InVT, LHS, LHS), 0);
3092 break;
3093 case ZeroCompare::LEZExt:
3094 case ZeroCompare::LESExt: {
3095 if (Is32Bit) {
3096 // Upper 32 bits cannot be undefined for this sequence.
3097 LHS = signExtendInputIfNeeded(LHS);
3098 SDValue Neg =
3099 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3100 ToExtend =
3101 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3102 Neg, S->getI64Imm(1, dl),
3103 S->getI64Imm(63, dl)), 0);
3104 } else {
3105 SDValue Addi =
3106 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3107 S->getI64Imm(~0ULL, dl)), 0);
3108 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3109 Addi, LHS), 0);
3110 }
3111 break;
3112 }
3113 }
3114
3115 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3116 if (!Is32Bit &&
3117 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3118 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3119 ToExtend, S->getI64Imm(1, dl),
3120 S->getI64Imm(63, dl)), 0);
3121 if (!Is32Bit &&
3122 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3123 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3124 S->getI64Imm(63, dl)), 0);
3125
3126 assert(Is32Bit && "Should have handled the 32-bit sequences above.")(static_cast <bool> (Is32Bit && "Should have handled the 32-bit sequences above."
) ? void (0) : __assert_fail ("Is32Bit && \"Should have handled the 32-bit sequences above.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3126, __extension__
__PRETTY_FUNCTION__))
;
3127 // For 32-bit sequences, the extensions differ between GE/LE cases.
3128 switch (CmpTy) {
3129 case ZeroCompare::GEZExt: {
3130 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3131 S->getI32Imm(31, dl) };
3132 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3133 ShiftOps), 0);
3134 }
3135 case ZeroCompare::GESExt:
3136 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3137 S->getI32Imm(31, dl)), 0);
3138 case ZeroCompare::LEZExt:
3139 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3140 S->getI32Imm(1, dl)), 0);
3141 case ZeroCompare::LESExt:
3142 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3143 S->getI32Imm(-1, dl)), 0);
3144 }
3145
3146 // The above case covers all the enumerators so it can't have a default clause
3147 // to avoid compiler warnings.
3148 llvm_unreachable("Unknown zero-comparison type.")::llvm::llvm_unreachable_internal("Unknown zero-comparison type."
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3148)
;
3149}
3150
3151/// Produces a zero-extended result of comparing two 32-bit values according to
3152/// the passed condition code.
3153SDValue
3154IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3155 ISD::CondCode CC,
3156 int64_t RHSValue, SDLoc dl) {
3157 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3158 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
3159 return SDValue();
3160 bool IsRHSZero = RHSValue == 0;
3161 bool IsRHSOne = RHSValue == 1;
3162 bool IsRHSNegOne = RHSValue == -1LL;
3163 switch (CC) {
3164 default: return SDValue();
3165 case ISD::SETEQ: {
3166 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3167 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3168 SDValue Xor = IsRHSZero ? LHS :
3169 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3170 SDValue Clz =
3171 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3172 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3173 S->getI32Imm(31, dl) };
3174 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3175 ShiftOps), 0);
3176 }
3177 case ISD::SETNE: {
3178 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3179 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3180 SDValue Xor = IsRHSZero ? LHS :
3181 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3182 SDValue Clz =
3183 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3184 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3185 S->getI32Imm(31, dl) };
3186 SDValue Shift =
3187 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3188 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3189 S->getI32Imm(1, dl)), 0);
3190 }
3191 case ISD::SETGE: {
3192 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3193 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3194 if(IsRHSZero)
3195 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3196
3197 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3198 // by swapping inputs and falling through.
3199 std::swap(LHS, RHS);
3200 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3201 IsRHSZero = RHSConst && RHSConst->isZero();
3202 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3203 }
3204 case ISD::SETLE: {
3205 if (CmpInGPR == ICGPR_NonExtIn)
3206 return SDValue();
3207 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3208 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3209 if(IsRHSZero) {
3210 if (CmpInGPR == ICGPR_NonExtIn)
3211 return SDValue();
3212 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3213 }
3214
3215 // The upper 32-bits of the register can't be undefined for this sequence.
3216 LHS = signExtendInputIfNeeded(LHS);
3217 RHS = signExtendInputIfNeeded(RHS);
3218 SDValue Sub =
3219 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3220 SDValue Shift =
3221 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3222 S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3223 0);
3224 return
3225 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3226 MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3227 }
3228 case ISD::SETGT: {
3229 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3230 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3231 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3232 // Handle SETLT -1 (which is equivalent to SETGE 0).
3233 if (IsRHSNegOne)
3234 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3235
3236 if (IsRHSZero) {
3237 if (CmpInGPR == ICGPR_NonExtIn)
3238 return SDValue();
3239 // The upper 32-bits of the register can't be undefined for this sequence.
3240 LHS = signExtendInputIfNeeded(LHS);
3241 RHS = signExtendInputIfNeeded(RHS);
3242 SDValue Neg =
3243 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3244 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3245 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3246 }
3247 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3248 // (%b < %a) by swapping inputs and falling through.
3249 std::swap(LHS, RHS);
3250 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3251 IsRHSZero = RHSConst && RHSConst->isZero();
3252 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3253 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3254 }
3255 case ISD::SETLT: {
3256 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3257 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3258 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3259 // Handle SETLT 1 (which is equivalent to SETLE 0).
3260 if (IsRHSOne) {
3261 if (CmpInGPR == ICGPR_NonExtIn)
3262 return SDValue();
3263 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3264 }
3265
3266 if (IsRHSZero) {
3267 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3268 S->getI32Imm(31, dl) };
3269 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3270 ShiftOps), 0);
3271 }
3272
3273 if (CmpInGPR == ICGPR_NonExtIn)
3274 return SDValue();
3275 // The upper 32-bits of the register can't be undefined for this sequence.
3276 LHS = signExtendInputIfNeeded(LHS);
3277 RHS = signExtendInputIfNeeded(RHS);
3278 SDValue SUBFNode =
3279 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3280 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3281 SUBFNode, S->getI64Imm(1, dl),
3282 S->getI64Imm(63, dl)), 0);
3283 }
3284 case ISD::SETUGE:
3285 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3286 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3287 std::swap(LHS, RHS);
3288 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3289 case ISD::SETULE: {
3290 if (CmpInGPR == ICGPR_NonExtIn)
3291 return SDValue();
3292 // The upper 32-bits of the register can't be undefined for this sequence.
3293 LHS = zeroExtendInputIfNeeded(LHS);
3294 RHS = zeroExtendInputIfNeeded(RHS);
3295 SDValue Subtract =
3296 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3297 SDValue SrdiNode =
3298 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3299 Subtract, S->getI64Imm(1, dl),
3300 S->getI64Imm(63, dl)), 0);
3301 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3302 S->getI32Imm(1, dl)), 0);
3303 }
3304 case ISD::SETUGT:
3305 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3306 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3307 std::swap(LHS, RHS);
3308 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3309 case ISD::SETULT: {
3310 if (CmpInGPR == ICGPR_NonExtIn)
3311 return SDValue();
3312 // The upper 32-bits of the register can't be undefined for this sequence.
3313 LHS = zeroExtendInputIfNeeded(LHS);
3314 RHS = zeroExtendInputIfNeeded(RHS);
3315 SDValue Subtract =
3316 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3317 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3318 Subtract, S->getI64Imm(1, dl),
3319 S->getI64Imm(63, dl)), 0);
3320 }
3321 }
3322}
3323
3324/// Produces a sign-extended result of comparing two 32-bit values according to
3325/// the passed condition code.
3326SDValue
3327IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3328 ISD::CondCode CC,
3329 int64_t RHSValue, SDLoc dl) {
3330 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3331 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
3332 return SDValue();
3333 bool IsRHSZero = RHSValue == 0;
3334 bool IsRHSOne = RHSValue == 1;
3335 bool IsRHSNegOne = RHSValue == -1LL;
3336
3337 switch (CC) {
3338 default: return SDValue();
3339 case ISD::SETEQ: {
3340 // (sext (setcc %a, %b, seteq)) ->
3341 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3342 // (sext (setcc %a, 0, seteq)) ->
3343 // (ashr (shl (ctlz %a), 58), 63)
3344 SDValue CountInput = IsRHSZero ? LHS :
3345 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3346 SDValue Cntlzw =
3347 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3348 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3349 S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3350 SDValue Slwi =
3351 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3352 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3353 }
3354 case ISD::SETNE: {
3355 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3356 // flip the bit, finally take 2's complement.
3357 // (sext (setcc %a, %b, setne)) ->
3358 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3359 // Same as above, but the first xor is not needed.
3360 // (sext (setcc %a, 0, setne)) ->
3361 // (neg (xor (lshr (ctlz %a), 5), 1))
3362 SDValue Xor = IsRHSZero ? LHS :
3363 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3364 SDValue Clz =
3365 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3366 SDValue ShiftOps[] =
3367 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3368 SDValue Shift =
3369 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3370 SDValue Xori =
3371 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3372 S->getI32Imm(1, dl)), 0);
3373 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3374 }
3375 case ISD::SETGE: {
3376 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3377 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3378 if (IsRHSZero)
3379 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3380
3381 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3382 // by swapping inputs and falling through.
3383 std::swap(LHS, RHS);
3384 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3385 IsRHSZero = RHSConst && RHSConst->isZero();
3386 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3387 }
3388 case ISD::SETLE: {
3389 if (CmpInGPR == ICGPR_NonExtIn)
3390 return SDValue();
3391 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3392 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3393 if (IsRHSZero)
3394 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3395
3396 // The upper 32-bits of the register can't be undefined for this sequence.
3397 LHS = signExtendInputIfNeeded(LHS);
3398 RHS = signExtendInputIfNeeded(RHS);
3399 SDValue SUBFNode =
3400 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3401 LHS, RHS), 0);
3402 SDValue Srdi =
3403 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3404 SUBFNode, S->getI64Imm(1, dl),
3405 S->getI64Imm(63, dl)), 0);
3406 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3407 S->getI32Imm(-1, dl)), 0);
3408 }
3409 case ISD::SETGT: {
3410 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3411 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3412 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3413 if (IsRHSNegOne)
3414 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3415 if (IsRHSZero) {
3416 if (CmpInGPR == ICGPR_NonExtIn)
3417 return SDValue();
3418 // The upper 32-bits of the register can't be undefined for this sequence.
3419 LHS = signExtendInputIfNeeded(LHS);
3420 RHS = signExtendInputIfNeeded(RHS);
3421 SDValue Neg =
3422 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3423 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3424 S->getI64Imm(63, dl)), 0);
3425 }
3426 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3427 // (%b < %a) by swapping inputs and falling through.
3428 std::swap(LHS, RHS);
3429 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3430 IsRHSZero = RHSConst && RHSConst->isZero();
3431 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3432 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3433 }
3434 case ISD::SETLT: {
3435 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3436 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3437 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3438 if (IsRHSOne) {
3439 if (CmpInGPR == ICGPR_NonExtIn)
3440 return SDValue();
3441 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3442 }
3443 if (IsRHSZero)
3444 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3445 S->getI32Imm(31, dl)), 0);
3446
3447 if (CmpInGPR == ICGPR_NonExtIn)
3448 return SDValue();
3449 // The upper 32-bits of the register can't be undefined for this sequence.
3450 LHS = signExtendInputIfNeeded(LHS);
3451 RHS = signExtendInputIfNeeded(RHS);
3452 SDValue SUBFNode =
3453 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3454 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3455 SUBFNode, S->getI64Imm(63, dl)), 0);
3456 }
3457 case ISD::SETUGE:
3458 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3459 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3460 std::swap(LHS, RHS);
3461 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3462 case ISD::SETULE: {
3463 if (CmpInGPR == ICGPR_NonExtIn)
3464 return SDValue();
3465 // The upper 32-bits of the register can't be undefined for this sequence.
3466 LHS = zeroExtendInputIfNeeded(LHS);
3467 RHS = zeroExtendInputIfNeeded(RHS);
3468 SDValue Subtract =
3469 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3470 SDValue Shift =
3471 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3472 S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3473 0);
3474 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3475 S->getI32Imm(-1, dl)), 0);
3476 }
3477 case ISD::SETUGT:
3478 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3479 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3480 std::swap(LHS, RHS);
3481 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3482 case ISD::SETULT: {
3483 if (CmpInGPR == ICGPR_NonExtIn)
3484 return SDValue();
3485 // The upper 32-bits of the register can't be undefined for this sequence.
3486 LHS = zeroExtendInputIfNeeded(LHS);
3487 RHS = zeroExtendInputIfNeeded(RHS);
3488 SDValue Subtract =
3489 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3490 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3491 Subtract, S->getI64Imm(63, dl)), 0);
3492 }
3493 }
3494}
3495
3496/// Produces a zero-extended result of comparing two 64-bit values according to
3497/// the passed condition code.
3498SDValue
3499IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3500 ISD::CondCode CC,
3501 int64_t RHSValue, SDLoc dl) {
3502 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3503 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
3504 return SDValue();
3505 bool IsRHSZero = RHSValue == 0;
3506 bool IsRHSOne = RHSValue == 1;
3507 bool IsRHSNegOne = RHSValue == -1LL;
3508 switch (CC) {
3509 default: return SDValue();
3510 case ISD::SETEQ: {
3511 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3512 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3513 SDValue Xor = IsRHSZero ? LHS :
3514 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3515 SDValue Clz =
3516 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3517 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3518 S->getI64Imm(58, dl),
3519 S->getI64Imm(63, dl)), 0);
3520 }
3521 case ISD::SETNE: {
3522 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3523 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3524 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3525 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3526 SDValue Xor = IsRHSZero ? LHS :
3527 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3528 SDValue AC =
3529 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3530 Xor, S->getI32Imm(~0U, dl)), 0);
3531 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3532 Xor, AC.getValue(1)), 0);
3533 }
3534 case ISD::SETGE: {
3535 // {subc.reg, subc.CA} = (subcarry %a, %b)
3536 // (zext (setcc %a, %b, setge)) ->
3537 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3538 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3539 if (IsRHSZero)
3540 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3541 std::swap(LHS, RHS);
3542 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3543 IsRHSZero = RHSConst && RHSConst->isZero();
3544 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3545 }
3546 case ISD::SETLE: {
3547 // {subc.reg, subc.CA} = (subcarry %b, %a)
3548 // (zext (setcc %a, %b, setge)) ->
3549 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3550 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3551 if (IsRHSZero)
3552 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3553 SDValue ShiftL =
3554 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3555 S->getI64Imm(1, dl),
3556 S->getI64Imm(63, dl)), 0);
3557 SDValue ShiftR =
3558 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3559 S->getI64Imm(63, dl)), 0);
3560 SDValue SubtractCarry =
3561 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3562 LHS, RHS), 1);
3563 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3564 ShiftR, ShiftL, SubtractCarry), 0);
3565 }
3566 case ISD::SETGT: {
3567 // {subc.reg, subc.CA} = (subcarry %b, %a)
3568 // (zext (setcc %a, %b, setgt)) ->
3569 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3570 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3571 if (IsRHSNegOne)
3572 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3573 if (IsRHSZero) {
3574 SDValue Addi =
3575 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3576 S->getI64Imm(~0ULL, dl)), 0);
3577 SDValue Nor =
3578 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3579 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3580 S->getI64Imm(1, dl),
3581 S->getI64Imm(63, dl)), 0);
3582 }
3583 std::swap(LHS, RHS);
3584 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3585 IsRHSZero = RHSConst && RHSConst->isZero();
3586 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3587 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3588 }
3589 case ISD::SETLT: {
3590 // {subc.reg, subc.CA} = (subcarry %a, %b)
3591 // (zext (setcc %a, %b, setlt)) ->
3592 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3593 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3594 if (IsRHSOne)
3595 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3596 if (IsRHSZero)
3597 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3598 S->getI64Imm(1, dl),
3599 S->getI64Imm(63, dl)), 0);
3600 SDValue SRADINode =
3601 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3602 LHS, S->getI64Imm(63, dl)), 0);
3603 SDValue SRDINode =
3604 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3605 RHS, S->getI64Imm(1, dl),
3606 S->getI64Imm(63, dl)), 0);
3607 SDValue SUBFC8Carry =
3608 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3609 RHS, LHS), 1);
3610 SDValue ADDE8Node =
3611 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3612 SRDINode, SRADINode, SUBFC8Carry), 0);
3613 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3614 ADDE8Node, S->getI64Imm(1, dl)), 0);
3615 }
3616 case ISD::SETUGE:
3617 // {subc.reg, subc.CA} = (subcarry %a, %b)
3618 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3619 std::swap(LHS, RHS);
3620 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3621 case ISD::SETULE: {
3622 // {subc.reg, subc.CA} = (subcarry %b, %a)
3623 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3624 SDValue SUBFC8Carry =
3625 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3626 LHS, RHS), 1);
3627 SDValue SUBFE8Node =
3628 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3629 LHS, LHS, SUBFC8Carry), 0);
3630 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3631 SUBFE8Node, S->getI64Imm(1, dl)), 0);
3632 }
3633 case ISD::SETUGT:
3634 // {subc.reg, subc.CA} = (subcarry %b, %a)
3635 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3636 std::swap(LHS, RHS);
3637 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3638 case ISD::SETULT: {
3639 // {subc.reg, subc.CA} = (subcarry %a, %b)
3640 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3641 SDValue SubtractCarry =
3642 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3643 RHS, LHS), 1);
3644 SDValue ExtSub =
3645 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3646 LHS, LHS, SubtractCarry), 0);
3647 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3648 ExtSub), 0);
3649 }
3650 }
3651}
3652
3653/// Produces a sign-extended result of comparing two 64-bit values according to
3654/// the passed condition code.
3655SDValue
3656IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3657 ISD::CondCode CC,
3658 int64_t RHSValue, SDLoc dl) {
3659 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3660 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
3661 return SDValue();
3662 bool IsRHSZero = RHSValue == 0;
3663 bool IsRHSOne = RHSValue == 1;
3664 bool IsRHSNegOne = RHSValue == -1LL;
3665 switch (CC) {
3666 default: return SDValue();
3667 case ISD::SETEQ: {
3668 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3669 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3670 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3671 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3672 SDValue AddInput = IsRHSZero ? LHS :
3673 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3674 SDValue Addic =
3675 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3676 AddInput, S->getI32Imm(~0U, dl)), 0);
3677 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3678 Addic, Addic.getValue(1)), 0);
3679 }
3680 case ISD::SETNE: {
3681 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3682 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3683 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3684 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3685 SDValue Xor = IsRHSZero ? LHS :
3686 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3687 SDValue SC =
3688 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3689 Xor, S->getI32Imm(0, dl)), 0);
3690 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3691 SC, SC.getValue(1)), 0);
3692 }
3693 case ISD::SETGE: {
3694 // {subc.reg, subc.CA} = (subcarry %a, %b)
3695 // (zext (setcc %a, %b, setge)) ->
3696 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3697 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3698 if (IsRHSZero)
3699 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3700 std::swap(LHS, RHS);
3701 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3702 IsRHSZero = RHSConst && RHSConst->isZero();
3703 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3704 }
3705 case ISD::SETLE: {
3706 // {subc.reg, subc.CA} = (subcarry %b, %a)
3707 // (zext (setcc %a, %b, setge)) ->
3708 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3709 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3710 if (IsRHSZero)
3711 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3712 SDValue ShiftR =
3713 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3714 S->getI64Imm(63, dl)), 0);
3715 SDValue ShiftL =
3716 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3717 S->getI64Imm(1, dl),
3718 S->getI64Imm(63, dl)), 0);
3719 SDValue SubtractCarry =
3720 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3721 LHS, RHS), 1);
3722 SDValue Adde =
3723 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3724 ShiftR, ShiftL, SubtractCarry), 0);
3725 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3726 }
3727 case ISD::SETGT: {
3728 // {subc.reg, subc.CA} = (subcarry %b, %a)
3729 // (zext (setcc %a, %b, setgt)) ->
3730 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3731 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3732 if (IsRHSNegOne)
3733 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3734 if (IsRHSZero) {
3735 SDValue Add =
3736 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3737 S->getI64Imm(-1, dl)), 0);
3738 SDValue Nor =
3739 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3740 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3741 S->getI64Imm(63, dl)), 0);
3742 }
3743 std::swap(LHS, RHS);
3744 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3745 IsRHSZero = RHSConst && RHSConst->isZero();
3746 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3747 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3748 }
3749 case ISD::SETLT: {
3750 // {subc.reg, subc.CA} = (subcarry %a, %b)
3751 // (zext (setcc %a, %b, setlt)) ->
3752 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3753 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3754 if (IsRHSOne)
3755 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3756 if (IsRHSZero) {
3757 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3758 S->getI64Imm(63, dl)), 0);
3759 }
3760 SDValue SRADINode =
3761 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3762 LHS, S->getI64Imm(63, dl)), 0);
3763 SDValue SRDINode =
3764 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3765 RHS, S->getI64Imm(1, dl),
3766 S->getI64Imm(63, dl)), 0);
3767 SDValue SUBFC8Carry =
3768 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3769 RHS, LHS), 1);
3770 SDValue ADDE8Node =
3771 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3772 SRDINode, SRADINode, SUBFC8Carry), 0);
3773 SDValue XORI8Node =
3774 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3775 ADDE8Node, S->getI64Imm(1, dl)), 0);
3776 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3777 XORI8Node), 0);
3778 }
3779 case ISD::SETUGE:
3780 // {subc.reg, subc.CA} = (subcarry %a, %b)
3781 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3782 std::swap(LHS, RHS);
3783 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3784 case ISD::SETULE: {
3785 // {subc.reg, subc.CA} = (subcarry %b, %a)
3786 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3787 SDValue SubtractCarry =
3788 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3789 LHS, RHS), 1);
3790 SDValue ExtSub =
3791 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3792 LHS, SubtractCarry), 0);
3793 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3794 ExtSub, ExtSub), 0);
3795 }
3796 case ISD::SETUGT:
3797 // {subc.reg, subc.CA} = (subcarry %b, %a)
3798 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3799 std::swap(LHS, RHS);
3800 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3801 case ISD::SETULT: {
3802 // {subc.reg, subc.CA} = (subcarry %a, %b)
3803 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3804 SDValue SubCarry =
3805 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3806 RHS, LHS), 1);
3807 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3808 LHS, LHS, SubCarry), 0);
3809 }
3810 }
3811}
3812
3813/// Do all uses of this SDValue need the result in a GPR?
3814/// This is meant to be used on values that have type i1 since
3815/// it is somewhat meaningless to ask if values of other types
3816/// should be kept in GPR's.
3817static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3818 assert(Compare.getOpcode() == ISD::SETCC &&(static_cast <bool> (Compare.getOpcode() == ISD::SETCC &&
"An ISD::SETCC node required here.") ? void (0) : __assert_fail
("Compare.getOpcode() == ISD::SETCC && \"An ISD::SETCC node required here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3819, __extension__
__PRETTY_FUNCTION__))
3819 "An ISD::SETCC node required here.")(static_cast <bool> (Compare.getOpcode() == ISD::SETCC &&
"An ISD::SETCC node required here.") ? void (0) : __assert_fail
("Compare.getOpcode() == ISD::SETCC && \"An ISD::SETCC node required here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3819, __extension__
__PRETTY_FUNCTION__))
;
3820
3821 // For values that have a single use, the caller should obviously already have
3822 // checked if that use is an extending use. We check the other uses here.
3823 if (Compare.hasOneUse())
3824 return true;
3825 // We want the value in a GPR if it is being extended, used for a select, or
3826 // used in logical operations.
3827 for (auto CompareUse : Compare.getNode()->uses())
3828 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3829 CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3830 CompareUse->getOpcode() != ISD::SELECT &&
3831 !isLogicOp(CompareUse->getOpcode())) {
3832 OmittedForNonExtendUses++;
3833 return false;
3834 }
3835 return true;
3836}
3837
3838/// Returns an equivalent of a SETCC node but with the result the same width as
3839/// the inputs. This can also be used for SELECT_CC if either the true or false
3840/// values is a power of two while the other is zero.
3841SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3842 SetccInGPROpts ConvOpts) {
3843 assert((Compare.getOpcode() == ISD::SETCC ||(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC
|| Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."
) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3845, __extension__
__PRETTY_FUNCTION__))
3844 Compare.getOpcode() == ISD::SELECT_CC) &&(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC
|| Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."
) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3845, __extension__
__PRETTY_FUNCTION__))
3845 "An ISD::SETCC node required here.")(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC
|| Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."
) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 3845, __extension__
__PRETTY_FUNCTION__))
;
3846
3847 // Don't convert this comparison to a GPR sequence because there are uses
3848 // of the i1 result (i.e. uses that require the result in the CR).
3849 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3850 return SDValue();
3851
3852 SDValue LHS = Compare.getOperand(0);
3853 SDValue RHS = Compare.getOperand(1);
3854
3855 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3856 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
3857 ISD::CondCode CC =
3858 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3859 EVT InputVT = LHS.getValueType();
3860 if (InputVT != MVT::i32 && InputVT != MVT::i64)
3861 return SDValue();
3862
3863 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3864 ConvOpts == SetccInGPROpts::SExtInvert)
3865 CC = ISD::getSetCCInverse(CC, InputVT);
3866
3867 bool Inputs32Bit = InputVT == MVT::i32;
3868
3869 SDLoc dl(Compare);
3870 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3871 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX(9223372036854775807L);
3872 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3873 ConvOpts == SetccInGPROpts::SExtInvert;
3874
3875 if (IsSext && Inputs32Bit)
3876 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3877 else if (Inputs32Bit)
3878 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3879 else if (IsSext)
3880 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3881 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3882}
3883
3884} // end anonymous namespace
3885
3886bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3887 if (N->getValueType(0) != MVT::i32 &&
3888 N->getValueType(0) != MVT::i64)
3889 return false;
3890
3891 // This optimization will emit code that assumes 64-bit registers
3892 // so we don't want to run it in 32-bit mode. Also don't run it
3893 // on functions that are not to be optimized.
3894 if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
3895 return false;
3896
3897 // For POWER10, it is more profitable to use the set boolean extension
3898 // instructions rather than the integer compare elimination codegen.
3899 // Users can override this via the command line option, `--ppc-gpr-icmps`.
3900 if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
3901 return false;
3902
3903 switch (N->getOpcode()) {
3904 default: break;
3905 case ISD::ZERO_EXTEND:
3906 case ISD::SIGN_EXTEND:
3907 case ISD::AND:
3908 case ISD::OR:
3909 case ISD::XOR: {
3910 IntegerCompareEliminator ICmpElim(CurDAG, this);
3911 if (SDNode *New = ICmpElim.Select(N)) {
3912 ReplaceNode(N, New);
3913 return true;
3914 }
3915 }
3916 }
3917 return false;
3918}
3919
3920bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3921 if (N->getValueType(0) != MVT::i32 &&
3922 N->getValueType(0) != MVT::i64)
3923 return false;
3924
3925 if (!UseBitPermRewriter)
3926 return false;
3927
3928 switch (N->getOpcode()) {
3929 default: break;
3930 case ISD::ROTL:
3931 case ISD::SHL:
3932 case ISD::SRL:
3933 case ISD::AND:
3934 case ISD::OR: {
3935 BitPermutationSelector BPS(CurDAG);
3936 if (SDNode *New = BPS.Select(N)) {
3937 ReplaceNode(N, New);
3938 return true;
3939 }
3940 return false;
3941 }
3942 }
3943
3944 return false;
3945}
3946
3947/// SelectCC - Select a comparison of the specified values with the specified
3948/// condition code, returning the CR# of the expression.
3949SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3950 const SDLoc &dl, SDValue Chain) {
3951 // Always select the LHS.
3952 unsigned Opc;
3953
3954 if (LHS.getValueType() == MVT::i32) {
3955 unsigned Imm;
3956 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3957 if (isInt32Immediate(RHS, Imm)) {
3958 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3959 if (isUInt<16>(Imm))
3960 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3961 getI32Imm(Imm & 0xFFFF, dl)),
3962 0);
3963 // If this is a 16-bit signed immediate, fold it.
3964 if (isInt<16>((int)Imm))
3965 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3966 getI32Imm(Imm & 0xFFFF, dl)),
3967 0);
3968
3969 // For non-equality comparisons, the default code would materialize the
3970 // constant, then compare against it, like this:
3971 // lis r2, 4660
3972 // ori r2, r2, 22136
3973 // cmpw cr0, r3, r2
3974 // Since we are just comparing for equality, we can emit this instead:
3975 // xoris r0,r3,0x1234
3976 // cmplwi cr0,r0,0x5678
3977 // beq cr0,L6
3978 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3979 getI32Imm(Imm >> 16, dl)), 0);
3980 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3981 getI32Imm(Imm & 0xFFFF, dl)), 0);
3982 }
3983 Opc = PPC::CMPLW;
3984 } else if (ISD::isUnsignedIntSetCC(CC)) {
3985 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
3986 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3987 getI32Imm(Imm & 0xFFFF, dl)), 0);
3988 Opc = PPC::CMPLW;
3989 } else {
3990 int16_t SImm;
3991 if (isIntS16Immediate(RHS, SImm))
3992 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3993 getI32Imm((int)SImm & 0xFFFF,
3994 dl)),
3995 0);
3996 Opc = PPC::CMPW;
3997 }
3998 } else if (LHS.getValueType() == MVT::i64) {
3999 uint64_t Imm;
4000 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4001 if (isInt64Immediate(RHS.getNode(), Imm)) {
4002 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4003 if (isUInt<16>(Imm))
4004 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4005 getI32Imm(Imm & 0xFFFF, dl)),
4006 0);
4007 // If this is a 16-bit signed immediate, fold it.
4008 if (isInt<16>(Imm))
4009 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4010 getI32Imm(Imm & 0xFFFF, dl)),
4011 0);
4012
4013 // For non-equality comparisons, the default code would materialize the
4014 // constant, then compare against it, like this:
4015 // lis r2, 4660
4016 // ori r2, r2, 22136
4017 // cmpd cr0, r3, r2
4018 // Since we are just comparing for equality, we can emit this instead:
4019 // xoris r0,r3,0x1234
4020 // cmpldi cr0,r0,0x5678
4021 // beq cr0,L6
4022 if (isUInt<32>(Imm)) {
4023 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4024 getI64Imm(Imm >> 16, dl)), 0);
4025 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4026 getI64Imm(Imm & 0xFFFF, dl)),
4027 0);
4028 }
4029 }
4030 Opc = PPC::CMPLD;
4031 } else if (ISD::isUnsignedIntSetCC(CC)) {
4032 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4033 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4034 getI64Imm(Imm & 0xFFFF, dl)), 0);
4035 Opc = PPC::CMPLD;
4036 } else {
4037 int16_t SImm;
4038 if (isIntS16Immediate(RHS, SImm))
4039 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4040 getI64Imm(SImm & 0xFFFF, dl)),
4041 0);
4042 Opc = PPC::CMPD;
4043 }
4044 } else if (LHS.getValueType() == MVT::f32) {
4045 if (Subtarget->hasSPE()) {
4046 switch (CC) {
4047 default:
4048 case ISD::SETEQ:
4049 case ISD::SETNE:
4050 Opc = PPC::EFSCMPEQ;
4051 break;
4052 case ISD::SETLT:
4053 case ISD::SETGE:
4054 case ISD::SETOLT:
4055 case ISD::SETOGE:
4056 case ISD::SETULT:
4057 case ISD::SETUGE:
4058 Opc = PPC::EFSCMPLT;
4059 break;
4060 case ISD::SETGT:
4061 case ISD::SETLE:
4062 case ISD::SETOGT:
4063 case ISD::SETOLE:
4064 case ISD::SETUGT:
4065 case ISD::SETULE:
4066 Opc = PPC::EFSCMPGT;
4067 break;
4068 }
4069 } else
4070 Opc = PPC::FCMPUS;
4071 } else if (LHS.getValueType() == MVT::f64) {
4072 if (Subtarget->hasSPE()) {
4073 switch (CC) {
4074 default:
4075 case ISD::SETEQ:
4076 case ISD::SETNE:
4077 Opc = PPC::EFDCMPEQ;
4078 break;
4079 case ISD::SETLT:
4080 case ISD::SETGE:
4081 case ISD::SETOLT:
4082 case ISD::SETOGE:
4083 case ISD::SETULT:
4084 case ISD::SETUGE:
4085 Opc = PPC::EFDCMPLT;
4086 break;
4087 case ISD::SETGT:
4088 case ISD::SETLE:
4089 case ISD::SETOGT:
4090 case ISD::SETOLE:
4091 case ISD::SETUGT:
4092 case ISD::SETULE:
4093 Opc = PPC::EFDCMPGT;
4094 break;
4095 }
4096 } else
4097 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4098 } else {
4099 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!")(static_cast <bool> (LHS.getValueType() == MVT::f128 &&
"Unknown vt!") ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f128 && \"Unknown vt!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4099, __extension__
__PRETTY_FUNCTION__))
;
4100 assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector")(static_cast <bool> (Subtarget->hasP9Vector() &&
"XSCMPUQP requires Power9 Vector") ? void (0) : __assert_fail
("Subtarget->hasP9Vector() && \"XSCMPUQP requires Power9 Vector\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4100, __extension__
__PRETTY_FUNCTION__))
;
4101 Opc = PPC::XSCMPUQP;
4102 }
4103 if (Chain)
4104 return SDValue(
4105 CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4106 0);
4107 else
4108 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4109}
4110
4111static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
4112 const PPCSubtarget *Subtarget) {
4113 // For SPE instructions, the result is in GT bit of the CR
4114 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4115
4116 switch (CC) {
4117 case ISD::SETUEQ:
4118 case ISD::SETONE:
4119 case ISD::SETOLE:
4120 case ISD::SETOGE:
4121 llvm_unreachable("Should be lowered by legalize!")::llvm::llvm_unreachable_internal("Should be lowered by legalize!"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4121)
;
4122 default: llvm_unreachable("Unknown condition!")::llvm::llvm_unreachable_internal("Unknown condition!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4122)
;
4123 case ISD::SETOEQ:
4124 case ISD::SETEQ:
4125 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4126 case ISD::SETUNE:
4127 case ISD::SETNE:
4128 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4129 case ISD::SETOLT:
4130 case ISD::SETLT:
4131 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4132 case ISD::SETULE:
4133 case ISD::SETLE:
4134 return PPC::PRED_LE;
4135 case ISD::SETOGT:
4136 case ISD::SETGT:
4137 return PPC::PRED_GT;
4138 case ISD::SETUGE:
4139 case ISD::SETGE:
4140 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4141 case ISD::SETO: return PPC::PRED_NU;
4142 case ISD::SETUO: return PPC::PRED_UN;
4143 // These two are invalid for floating point. Assume we have int.
4144 case ISD::SETULT: return PPC::PRED_LT;
4145 case ISD::SETUGT: return PPC::PRED_GT;
4146 }
4147}
4148
4149/// getCRIdxForSetCC - Return the index of the condition register field
4150/// associated with the SetCC condition, and whether or not the field is
4151/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4152static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4153 Invert = false;
4154 switch (CC) {
4155 default: llvm_unreachable("Unknown condition!")::llvm::llvm_unreachable_internal("Unknown condition!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 4155)
;
4156 case ISD::SETOLT:
4157 case ISD::SETLT: return 0; // Bit #0 = SETOLT
4158 case ISD::SETOGT:
4159 case ISD::SETGT: return 1; // Bit #1 = SETOGT
4160 case ISD::SETOEQ:
4161 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4162 case ISD::SETUO: return 3; // Bit #3 = SETUO
4163 case ISD::SETUGE:
4164 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4165 case ISD::SETULE:
4166 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4167 case ISD::SETUNE:
4168 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4169 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4170 case ISD::SETUEQ:
4171 case ISD::SETOGE:
4172 case ISD::SETOLE:
4173 case ISD::SETONE:
4174 llvm_unreachable("Invalid branch code: should be expanded by legalize")::llvm::llvm_unreachable_internal("Invalid branch code: should be expanded by legalize"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4174)
;
4175 // These are invalid for floating point. Assume integer.
4176 case ISD::SETULT: return 0;
4177 case ISD::SETUGT: return 1;
4178 }
4179}
4180
4181// getVCmpInst: return the vector compare instruction for the specified
4182// vector type and condition code. Since this is for altivec specific code,
4183// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4184// and v4f32).
4185static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4186 bool HasVSX, bool &Swap, bool &Negate) {
4187 Swap = false;
4188 Negate = false;
4189
4190 if (VecVT.isFloatingPoint()) {
4191 /* Handle some cases by swapping input operands. */
4192 switch (CC) {
4193 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4194 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4195 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4196 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4197 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4198 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4199 default: break;
4200 }
4201 /* Handle some cases by negating the result. */
4202 switch (CC) {
4203 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4204 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4205 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4206 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4207 default: break;
4208 }
4209 /* We have instructions implementing the remaining cases. */
4210 switch (CC) {
4211 case ISD::SETEQ:
4212 case ISD::SETOEQ:
4213 if (VecVT == MVT::v4f32)
4214 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4215 else if (VecVT == MVT::v2f64)
4216 return PPC::XVCMPEQDP;
4217 break;
4218 case ISD::SETGT:
4219 case ISD::SETOGT:
4220 if (VecVT == MVT::v4f32)
4221 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4222 else if (VecVT == MVT::v2f64)
4223 return PPC::XVCMPGTDP;
4224 break;
4225 case ISD::SETGE:
4226 case ISD::SETOGE:
4227 if (VecVT == MVT::v4f32)
4228 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4229 else if (VecVT == MVT::v2f64)
4230 return PPC::XVCMPGEDP;
4231 break;
4232 default:
4233 break;
4234 }
4235 llvm_unreachable("Invalid floating-point vector compare condition")::llvm::llvm_unreachable_internal("Invalid floating-point vector compare condition"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4235)
;
4236 } else {
4237 /* Handle some cases by swapping input operands. */
4238 switch (CC) {
4239 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4240 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4241 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4242 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4243 default: break;
4244 }
4245 /* Handle some cases by negating the result. */
4246 switch (CC) {
4247 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4248 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4249 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4250 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4251 default: break;
4252 }
4253 /* We have instructions implementing the remaining cases. */
4254 switch (CC) {
4255 case ISD::SETEQ:
4256 case ISD::SETUEQ:
4257 if (VecVT == MVT::v16i8)
4258 return PPC::VCMPEQUB;
4259 else if (VecVT == MVT::v8i16)
4260 return PPC::VCMPEQUH;
4261 else if (VecVT == MVT::v4i32)
4262 return PPC::VCMPEQUW;
4263 else if (VecVT == MVT::v2i64)
4264 return PPC::VCMPEQUD;
4265 else if (VecVT == MVT::v1i128)
4266 return PPC::VCMPEQUQ;
4267 break;
4268 case ISD::SETGT:
4269 if (VecVT == MVT::v16i8)
4270 return PPC::VCMPGTSB;
4271 else if (VecVT == MVT::v8i16)
4272 return PPC::VCMPGTSH;
4273 else if (VecVT == MVT::v4i32)
4274 return PPC::VCMPGTSW;
4275 else if (VecVT == MVT::v2i64)
4276 return PPC::VCMPGTSD;
4277 else if (VecVT == MVT::v1i128)
4278 return PPC::VCMPGTSQ;
4279 break;
4280 case ISD::SETUGT:
4281 if (VecVT == MVT::v16i8)
4282 return PPC::VCMPGTUB;
4283 else if (VecVT == MVT::v8i16)
4284 return PPC::VCMPGTUH;
4285 else if (VecVT == MVT::v4i32)
4286 return PPC::VCMPGTUW;
4287 else if (VecVT == MVT::v2i64)
4288 return PPC::VCMPGTUD;
4289 else if (VecVT == MVT::v1i128)
4290 return PPC::VCMPGTUQ;
4291 break;
4292 default:
4293 break;
4294 }
4295 llvm_unreachable("Invalid integer vector compare condition")::llvm::llvm_unreachable_internal("Invalid integer vector compare condition"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4295)
;
4296 }
4297}
4298
4299bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4300 SDLoc dl(N);
4301 unsigned Imm;
4302 bool IsStrict = N->isStrictFPOpcode();
4303 ISD::CondCode CC =
4304 cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4305 EVT PtrVT =
4306 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4307 bool isPPC64 = (PtrVT == MVT::i64);
4308 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4309
4310 SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4311 SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4312
4313 if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4314 // We can codegen setcc op, imm very efficiently compared to a brcond.
4315 // Check for those cases here.
4316 // setcc op, 0
4317 if (Imm == 0) {
4318 SDValue Op = LHS;
4319 switch (CC) {
4320 default: break;
4321 case ISD::SETEQ: {
4322 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4323 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4324 getI32Imm(31, dl) };
4325 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4326 return true;
4327 }
4328 case ISD::SETNE: {
4329 if (isPPC64) break;
4330 SDValue AD =
4331 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4332 Op, getI32Imm(~0U, dl)), 0);
4333 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4334 return true;
4335 }
4336 case ISD::SETLT: {
4337 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4338 getI32Imm(31, dl) };
4339 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4340 return true;
4341 }
4342 case ISD::SETGT: {
4343 SDValue T =
4344 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4345 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4346 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4347 getI32Imm(31, dl) };
4348 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4349 return true;
4350 }
4351 }
4352 } else if (Imm == ~0U) { // setcc op, -1
4353 SDValue Op = LHS;
4354 switch (CC) {
4355 default: break;
4356 case ISD::SETEQ:
4357 if (isPPC64) break;
4358 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4359 Op, getI32Imm(1, dl)), 0);
4360 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4361 SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4362 MVT::i32,
4363 getI32Imm(0, dl)),
4364 0), Op.getValue(1));
4365 return true;
4366 case ISD::SETNE: {
4367 if (isPPC64) break;
4368 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4369 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4370 Op, getI32Imm(~0U, dl));
4371 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4372 SDValue(AD, 1));
4373 return true;
4374 }
4375 case ISD::SETLT: {
4376 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4377 getI32Imm(1, dl)), 0);
4378 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4379 Op), 0);
4380 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4381 getI32Imm(31, dl) };
4382 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4383 return true;
4384 }
4385 case ISD::SETGT: {
4386 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4387 getI32Imm(31, dl) };
4388 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4389 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4390 return true;
4391 }
4392 }
4393 }
4394 }
4395
4396 // Altivec Vector compare instructions do not set any CR register by default and
4397 // vector compare operations return the same type as the operands.
4398 if (!IsStrict && LHS.getValueType().isVector()) {
4399 if (Subtarget->hasSPE())
4400 return false;
4401
4402 EVT VecVT = LHS.getValueType();
4403 bool Swap, Negate;
4404 unsigned int VCmpInst =
4405 getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4406 if (Swap)
4407 std::swap(LHS, RHS);
4408
4409 EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4410 if (Negate) {
4411 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4412 CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4413 ResVT, VCmp, VCmp);
4414 return true;
4415 }
4416
4417 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4418 return true;
4419 }
4420
4421 if (Subtarget->useCRBits())
4422 return false;
4423
4424 bool Inv;
4425 unsigned Idx = getCRIdxForSetCC(CC, Inv);
4426 SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4427 if (IsStrict)
4428 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4429 SDValue IntCR;
4430
4431 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4432 // The correct compare instruction is already set by SelectCC()
4433 if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4434 Idx = 1;
4435 }
4436
4437 // Force the ccreg into CR7.
4438 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4439
4440 SDValue InFlag; // Null incoming flag value.
4441 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4442 InFlag).getValue(1);
4443
4444 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4445 CCReg), 0);
4446
4447 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4448 getI32Imm(31, dl), getI32Imm(31, dl) };
4449 if (!Inv) {
4450 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4451 return true;
4452 }
4453
4454 // Get the specified bit.
4455 SDValue Tmp =
4456 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4457 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4458 return true;
4459}
4460
4461/// Does this node represent a load/store node whose address can be represented
4462/// with a register plus an immediate that's a multiple of \p Val:
4463bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4464 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4465 StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4466 MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
4467 SDValue AddrOp;
4468 if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4469 AddrOp = N->getOperand(1);
4470 else if (STN)
4471 AddrOp = STN->getOperand(2);
4472
4473 // If the address points a frame object or a frame object with an offset,
4474 // we need to check the object alignment.
4475 short Imm = 0;
4476 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4477 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4478 AddrOp)) {
4479 // If op0 is a frame index that is under aligned, we can't do it either,
4480 // because it is translated to r31 or r1 + slot + offset. We won't know the
4481 // slot number until the stack frame is finalized.
4482 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4483 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4484 if ((SlotAlign % Val) != 0)
4485 return false;
4486
4487 // If we have an offset, we need further check on the offset.
4488 if (AddrOp.getOpcode() != ISD::ADD)
4489 return true;
4490 }
4491
4492 if (AddrOp.getOpcode() == ISD::ADD)
4493 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4494
4495 // If the address comes from the outside, the offset will be zero.
4496 return AddrOp.getOpcode() == ISD::CopyFromReg;
4497}
4498
4499void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4500 // Transfer memoperands.
4501 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4502 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4503}
4504
4505static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4506 bool &NeedSwapOps, bool &IsUnCmp) {
4507
4508 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.")(static_cast <bool> (N->getOpcode() == ISD::SELECT_CC
&& "Expecting a SELECT_CC here.") ? void (0) : __assert_fail
("N->getOpcode() == ISD::SELECT_CC && \"Expecting a SELECT_CC here.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4508, __extension__
__PRETTY_FUNCTION__))
;
4509
4510 SDValue LHS = N->getOperand(0);
4511 SDValue RHS = N->getOperand(1);
4512 SDValue TrueRes = N->getOperand(2);
4513 SDValue FalseRes = N->getOperand(3);
4514 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4515 if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4516 N->getSimpleValueType(0) != MVT::i32))
4517 return false;
4518
4519 // We are looking for any of:
4520 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4521 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4522 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4523 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4524 int64_t TrueResVal = TrueConst->getSExtValue();
4525 if ((TrueResVal < -1 || TrueResVal > 1) ||
4526 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4527 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4528 (TrueResVal == 0 &&
4529 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4530 return false;
4531
4532 SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4533 ? FalseRes
4534 : FalseRes.getOperand(0);
4535 bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4536 if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4537 SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4538 return false;
4539
4540 // Without this setb optimization, the outer SELECT_CC will be manually
4541 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4542 // transforms pseudo instruction to isel instruction. When there are more than
4543 // one use for result like zext/sext, with current optimization we only see
4544 // isel is replaced by setb but can't see any significant gain. Since
4545 // setb has longer latency than original isel, we should avoid this. Another
4546 // point is that setb requires comparison always kept, it can break the
4547 // opportunity to get the comparison away if we have in future.
4548 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4549 return false;
4550
4551 SDValue InnerLHS = SetOrSelCC.getOperand(0);
4552 SDValue InnerRHS = SetOrSelCC.getOperand(1);
4553 ISD::CondCode InnerCC =
4554 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4555 // If the inner comparison is a select_cc, make sure the true/false values are
4556 // 1/-1 and canonicalize it if needed.
4557 if (InnerIsSel) {
4558 ConstantSDNode *SelCCTrueConst =
4559 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4560 ConstantSDNode *SelCCFalseConst =
4561 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4562 if (!SelCCTrueConst || !SelCCFalseConst)
4563 return false;
4564 int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4565 int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4566 // The values must be -1/1 (requiring a swap) or 1/-1.
4567 if (SelCCTVal == -1 && SelCCFVal == 1) {
4568 std::swap(InnerLHS, InnerRHS);
4569 } else if (SelCCTVal != 1 || SelCCFVal != -1)
4570 return false;
4571 }
4572
4573 // Canonicalize unsigned case
4574 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4575 IsUnCmp = true;
4576 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4577 }
4578
4579 bool InnerSwapped = false;
4580 if (LHS == InnerRHS && RHS == InnerLHS)
4581 InnerSwapped = true;
4582 else if (LHS != InnerLHS || RHS != InnerRHS)
4583 return false;
4584
4585 switch (CC) {
4586 // (select_cc lhs, rhs, 0, \
4587 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4588 case ISD::SETEQ:
4589 if (!InnerIsSel)
4590 return false;
4591 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4592 return false;
4593 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4594 break;
4595
4596 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4597 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4598 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4599 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4600 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4601 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4602 case ISD::SETULT:
4603 if (!IsUnCmp && InnerCC != ISD::SETNE)
4604 return false;
4605 IsUnCmp = true;
4606 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4607 case ISD::SETLT:
4608 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4609 (InnerCC == ISD::SETLT && InnerSwapped))
4610 NeedSwapOps = (TrueResVal == 1);
4611 else
4612 return false;
4613 break;
4614
4615 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4616 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4617 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4618 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4619 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4620 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4621 case ISD::SETUGT:
4622 if (!IsUnCmp && InnerCC != ISD::SETNE)
4623 return false;
4624 IsUnCmp = true;
4625 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4626 case ISD::SETGT:
4627 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4628 (InnerCC == ISD::SETGT && InnerSwapped))
4629 NeedSwapOps = (TrueResVal == -1);
4630 else
4631 return false;
4632 break;
4633
4634 default:
4635 return false;
4636 }
4637
4638 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { dbgs() << "Found a node that can be lowered to a SETB: "
; } } while (false)
;
4639 LLVM_DEBUG(N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("ppc-codegen")) { N->dump(); } } while (false)
;
4640
4641 return true;
4642}
4643
4644// Return true if it's a software square-root/divide operand.
4645static bool isSWTestOp(SDValue N) {
4646 if (N.getOpcode() == PPCISD::FTSQRT)
4647 return true;
4648 if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
4649 return false;
4650 switch (N.getConstantOperandVal(0)) {
4651 case Intrinsic::ppc_vsx_xvtdivdp:
4652 case Intrinsic::ppc_vsx_xvtdivsp:
4653 case Intrinsic::ppc_vsx_xvtsqrtdp:
4654 case Intrinsic::ppc_vsx_xvtsqrtsp:
4655 return true;
4656 }
4657 return false;
4658}
4659
4660bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4661 assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.")(static_cast <bool> (N->getOpcode() == ISD::BR_CC &&
"ISD::BR_CC is expected.") ? void (0) : __assert_fail ("N->getOpcode() == ISD::BR_CC && \"ISD::BR_CC is expected.\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4661, __extension__
__PRETTY_FUNCTION__))
;
4662 // We are looking for following patterns, where `truncate to i1` actually has
4663 // the same semantic with `and 1`.
4664 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4665 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4666 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4667 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4668 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4669 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4670 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4671 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4672 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4673 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4674 return false;
4675
4676 SDValue CmpRHS = N->getOperand(3);
4677 if (!isa<ConstantSDNode>(CmpRHS) ||
4678 cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
4679 return false;
4680
4681 SDValue CmpLHS = N->getOperand(2);
4682 if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4683 return false;
4684
4685 unsigned PCC = 0;
4686 bool IsCCNE = CC == ISD::SETNE;
4687 if (CmpLHS.getOpcode() == ISD::AND &&
4688 isa<ConstantSDNode>(CmpLHS.getOperand(1)))
4689 switch (CmpLHS.getConstantOperandVal(1)) {
4690 case 1:
4691 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4692 break;
4693 case 2:
4694 PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4695 break;
4696 case 4:
4697 PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4698 break;
4699 case 8:
4700 PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4701 break;
4702 default:
4703 return false;
4704 }
4705 else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4706 CmpLHS.getValueType() == MVT::i1)
4707 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4708
4709 if (PCC) {
4710 SDLoc dl(N);
4711 SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4712 N->getOperand(0)};
4713 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4714 return true;
4715 }
4716 return false;
4717}
4718
4719bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4720 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4720, __extension__
__PRETTY_FUNCTION__))
;
4721 unsigned Imm;
4722 if (!isInt32Immediate(N->getOperand(1), Imm))
4723 return false;
4724
4725 SDLoc dl(N);
4726 SDValue Val = N->getOperand(0);
4727 unsigned SH, MB, ME;
4728 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4729 // with a mask, emit rlwinm
4730 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4731 Val = Val.getOperand(0);
4732 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4733 getI32Imm(ME, dl)};
4734 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4735 return true;
4736 }
4737
4738 // If this is just a masked value where the input is not handled, and
4739 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4740 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4741 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4742 getI32Imm(ME, dl)};
4743 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4744 return true;
4745 }
4746
4747 // AND X, 0 -> 0, not "rlwinm 32".
4748 if (Imm == 0) {
4749 ReplaceUses(SDValue(N, 0), N->getOperand(1));
4750 return true;
4751 }
4752
4753 return false;
4754}
4755
4756bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
4757 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4757, __extension__
__PRETTY_FUNCTION__))
;
4758 uint64_t Imm64;
4759 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4760 return false;
4761
4762 unsigned MB, ME;
4763 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
4764 // MB ME
4765 // +----------------------+
4766 // |xxxxxxxxxxx00011111000|
4767 // +----------------------+
4768 // 0 32 64
4769 // We can only do it if the MB is larger than 32 and MB <= ME
4770 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
4771 // we didn't rotate it.
4772 SDLoc dl(N);
4773 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
4774 getI64Imm(ME - 32, dl)};
4775 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
4776 return true;
4777 }
4778
4779 return false;
4780}
4781
4782bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
4783 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4783, __extension__
__PRETTY_FUNCTION__))
;
4784 uint64_t Imm64;
4785 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4786 return false;
4787
4788 // Do nothing if it is 16-bit imm as the pattern in the .td file handle
4789 // it well with "andi.".
4790 if (isUInt<16>(Imm64))
4791 return false;
4792
4793 SDLoc Loc(N);
4794 SDValue Val = N->getOperand(0);
4795
4796 // Optimized with two rldicl's as follows:
4797 // Add missing bits on left to the mask and check that the mask is a
4798 // wrapped run of ones, i.e.
4799 // Change pattern |0001111100000011111111|
4800 // to |1111111100000011111111|.
4801 unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
4802 if (NumOfLeadingZeros != 0)
4803 Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
4804
4805 unsigned MB, ME;
4806 if (!isRunOfOnes64(Imm64, MB, ME))
4807 return false;
4808
4809 // ME MB MB-ME+63
4810 // +----------------------+ +----------------------+
4811 // |1111111100000011111111| -> |0000001111111111111111|
4812 // +----------------------+ +----------------------+
4813 // 0 63 0 63
4814 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
4815 unsigned OnesOnLeft = ME + 1;
4816 unsigned ZerosInBetween = (MB - ME + 63) & 63;
4817 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
4818 // on the left the bits that are already zeros in the mask.
4819 Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
4820 getI64Imm(OnesOnLeft, Loc),
4821 getI64Imm(ZerosInBetween, Loc)),
4822 0);
4823 // MB-ME+63 ME MB
4824 // +----------------------+ +----------------------+
4825 // |0000001111111111111111| -> |0001111100000011111111|
4826 // +----------------------+ +----------------------+
4827 // 0 63 0 63
4828 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
4829 // left the number of ones we previously added.
4830 SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
4831 getI64Imm(NumOfLeadingZeros, Loc)};
4832 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4833 return true;
4834}
4835
4836bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
4837 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4837, __extension__
__PRETTY_FUNCTION__))
;
4838 unsigned Imm;
4839 if (!isInt32Immediate(N->getOperand(1), Imm))
4840 return false;
4841
4842 SDValue Val = N->getOperand(0);
4843 unsigned Imm2;
4844 // ISD::OR doesn't get all the bitfield insertion fun.
4845 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4846 // bitfield insert.
4847 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
4848 return false;
4849
4850 // The idea here is to check whether this is equivalent to:
4851 // (c1 & m) | (x & ~m)
4852 // where m is a run-of-ones mask. The logic here is that, for each bit in
4853 // c1 and c2:
4854 // - if both are 1, then the output will be 1.
4855 // - if both are 0, then the output will be 0.
4856 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4857 // come from x.
4858 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4859 // be 0.
4860 // If that last condition is never the case, then we can form m from the
4861 // bits that are the same between c1 and c2.
4862 unsigned MB, ME;
4863 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
4864 SDLoc dl(N);
4865 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
4866 getI32Imm(MB, dl), getI32Imm(ME, dl)};
4867 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
4868 return true;
4869 }
4870
4871 return false;
4872}
4873
4874bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
4875 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4875, __extension__
__PRETTY_FUNCTION__))
;
4876 uint64_t Imm64;
4877 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
4878 return false;
4879
4880 // If this is a 64-bit zero-extension mask, emit rldicl.
4881 unsigned MB = 64 - countTrailingOnes(Imm64);
4882 unsigned SH = 0;
4883 unsigned Imm;
4884 SDValue Val = N->getOperand(0);
4885 SDLoc dl(N);
4886
4887 if (Val.getOpcode() == ISD::ANY_EXTEND) {
4888 auto Op0 = Val.getOperand(0);
4889 if (Op0.getOpcode() == ISD::SRL &&
4890 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
4891
4892 auto ResultType = Val.getNode()->getValueType(0);
4893 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
4894 SDValue IDVal(ImDef, 0);
4895
4896 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
4897 IDVal, Op0.getOperand(0),
4898 getI32Imm(1, dl)),
4899 0);
4900 SH = 64 - Imm;
4901 }
4902 }
4903
4904 // If the operand is a logical right shift, we can fold it into this
4905 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4906 // for n <= mb. The right shift is really a left rotate followed by a
4907 // mask, and this mask is a more-restrictive sub-mask of the mask implied
4908 // by the shift.
4909 if (Val.getOpcode() == ISD::SRL &&
4910 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
4911 assert(Imm < 64 && "Illegal shift amount")(static_cast <bool> (Imm < 64 && "Illegal shift amount"
) ? void (0) : __assert_fail ("Imm < 64 && \"Illegal shift amount\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4911, __extension__
__PRETTY_FUNCTION__))
;
4912 Val = Val.getOperand(0);
4913 SH = 64 - Imm;
4914 }
4915
4916 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
4917 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4918 return true;
4919}
4920
4921bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
4922 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND &&
"ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4922, __extension__
__PRETTY_FUNCTION__))
;
4923 uint64_t Imm64;
4924 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
4925 !isMask_64(~Imm64))
4926 return false;
4927
4928 // If this is a negated 64-bit zero-extension mask,
4929 // i.e. the immediate is a sequence of ones from most significant side
4930 // and all zero for reminder, we should use rldicr.
4931 unsigned MB = 63 - countTrailingOnes(~Imm64);
4932 unsigned SH = 0;
4933 SDLoc dl(N);
4934 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
4935 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
4936 return true;
4937}
4938
4939bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
4940 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"ISD::OR SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"ISD::OR SDNode expected\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4940, __extension__
__PRETTY_FUNCTION__))
;
4941 uint64_t Imm64;
4942 unsigned MB, ME;
4943 SDValue N0 = N->getOperand(0);
4944
4945 // We won't get fewer instructions if the imm is 32-bit integer.
4946 // rldimi requires the imm to have consecutive ones with both sides zero.
4947 // Also, make sure the first Op has only one use, otherwise this may increase
4948 // register pressure since rldimi is destructive.
4949 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
4950 isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
4951 return false;
4952
4953 unsigned SH = 63 - ME;
4954 SDLoc Dl(N);
4955 // Use select64Imm for making LI instr instead of directly putting Imm64
4956 SDValue Ops[] = {
4957 N->getOperand(0),
4958 SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
4959 getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
4960 CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
4961 return true;
4962}
4963
4964// Select - Convert the specified operand from a target-independent to a
4965// target-specific node if it hasn't already been changed.
4966void PPCDAGToDAGISel::Select(SDNode *N) {
4967 SDLoc dl(N);
4968 if (N->isMachineOpcode()) {
4969 N->setNodeId(-1);
4970 return; // Already selected.
4971 }
4972
4973 // In case any misguided DAG-level optimizations form an ADD with a
4974 // TargetConstant operand, crash here instead of miscompiling (by selecting
4975 // an r+r add instead of some kind of r+i add).
4976 if (N->getOpcode() == ISD::ADD &&
4977 N->getOperand(1).getOpcode() == ISD::TargetConstant)
4978 llvm_unreachable("Invalid ADD with TargetConstant operand")::llvm::llvm_unreachable_internal("Invalid ADD with TargetConstant operand"
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 4978)
;
4979
4980 // Try matching complex bit permutations before doing anything else.
4981 if (tryBitPermutation(N))
4982 return;
4983
4984 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4985 if (tryIntCompareInGPR(N))
4986 return;
4987
4988 switch (N->getOpcode()) {
4989 default: break;
4990
4991 case ISD::Constant:
4992 if (N->getValueType(0) == MVT::i64) {
4993 ReplaceNode(N, selectI64Imm(CurDAG, N));
4994 return;
4995 }
4996 break;
4997
4998 case ISD::INTRINSIC_VOID: {
4999 auto IntrinsicID = N->getConstantOperandVal(1);
5000 if (IntrinsicID == Intrinsic::ppc_tdw || IntrinsicID == Intrinsic::ppc_tw) {
5001 unsigned Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TDI : PPC::TWI;
5002 SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
5003 int16_t SImmOperand2;
5004 int16_t SImmOperand3;
5005 int16_t SImmOperand4;
5006 bool isOperand2IntS16Immediate =
5007 isIntS16Immediate(N->getOperand(2), SImmOperand2);
5008 bool isOperand3IntS16Immediate =
5009 isIntS16Immediate(N->getOperand(3), SImmOperand3);
5010 // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5011 // reg or imm + imm. The imm + imm form will be optimized to either an
5012 // unconditional trap or a nop in a later pass.
5013 if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5014 Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5015 else if (isOperand3IntS16Immediate)
5016 // The 2nd and 3rd operands are reg + imm.
5017 Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
5018 else {
5019 // The 2nd and 3rd operands are imm + reg.
5020 bool isOperand4IntS16Immediate =
5021 isIntS16Immediate(N->getOperand(4), SImmOperand4);
5022 (void)isOperand4IntS16Immediate;
5023 assert(isOperand4IntS16Immediate &&(static_cast <bool> (isOperand4IntS16Immediate &&
"The 4th operand is not an Immediate") ? void (0) : __assert_fail
("isOperand4IntS16Immediate && \"The 4th operand is not an Immediate\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5024, __extension__
__PRETTY_FUNCTION__))
5024 "The 4th operand is not an Immediate")(static_cast <bool> (isOperand4IntS16Immediate &&
"The 4th operand is not an Immediate") ? void (0) : __assert_fail
("isOperand4IntS16Immediate && \"The 4th operand is not an Immediate\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5024, __extension__
__PRETTY_FUNCTION__))
;
5025 // We need to flip the condition immediate TO.
5026 int16_t TO = int(SImmOperand4) & 0x1F;
5027 // We swap the first and second bit of TO if they are not same.
5028 if ((TO & 0x1) != ((TO & 0x2) >> 1))
5029 TO = (TO & 0x1) ? TO + 1 : TO - 1;
5030 // We swap the fourth and fifth bit of TO if they are not same.
5031 if ((TO & 0x8) != ((TO & 0x10) >> 1))
5032 TO = (TO & 0x8) ? TO + 8 : TO - 8;
5033 Ops[0] = getI32Imm(TO, dl);
5034 Ops[1] = N->getOperand(3);
5035 Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
5036 }
5037 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, Ops);
5038 return;
5039 }
5040 break;
5041 }
5042
5043 case ISD::INTRINSIC_WO_CHAIN: {
5044 // We emit the PPC::FSELS instruction here because of type conflicts with
5045 // the comparison operand. The FSELS instruction is defined to use an 8-byte
5046 // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5047 // value for the comparison. When selecting through a .td file, a type
5048 // error is raised. Must check this first so we never break on the
5049 // !Subtarget->isISA3_1() check.
5050 auto IntID = N->getConstantOperandVal(0);
5051 if (IntID == Intrinsic::ppc_fsels) {
5052 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
5053 CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
5054 return;
5055 }
5056
5057 if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
5058 auto Pred = N->getConstantOperandVal(1);
5059 unsigned Opcode =
5060 IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5061 unsigned SubReg = 0;
5062 unsigned ShiftVal = 0;
5063 bool Reverse = false;
5064 switch (Pred) {
5065 case 0:
5066 SubReg = PPC::sub_eq;
5067 ShiftVal = 1;
5068 break;
5069 case 1:
5070 SubReg = PPC::sub_eq;
5071 ShiftVal = 1;
5072 Reverse = true;
5073 break;
5074 case 2:
5075 SubReg = PPC::sub_lt;
5076 ShiftVal = 3;
5077 break;
5078 case 3:
5079 SubReg = PPC::sub_lt;
5080 ShiftVal = 3;
5081 Reverse = true;
5082 break;
5083 case 4:
5084 SubReg = PPC::sub_gt;
5085 ShiftVal = 2;
5086 break;
5087 case 5:
5088 SubReg = PPC::sub_gt;
5089 ShiftVal = 2;
5090 Reverse = true;
5091 break;
5092 case 6:
5093 SubReg = PPC::sub_un;
5094 break;
5095 case 7:
5096 SubReg = PPC::sub_un;
5097 Reverse = true;
5098 break;
5099 }
5100
5101 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5102 SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
5103 CurDAG->getTargetConstant(0, dl, MVT::i32)};
5104 SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
5105 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5106 // On Power10, we can use SETBC[R]. On prior architectures, we have to use
5107 // MFOCRF and shift/negate the value.
5108 if (Subtarget->isISA3_1()) {
5109 SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
5110 SDValue CRBit = SDValue(
5111 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5112 CR6Reg, SubRegIdx, BCDOp.getValue(1)),
5113 0);
5114 CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
5115 CRBit);
5116 } else {
5117 SDValue Move =
5118 SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
5119 BCDOp.getValue(1)),
5120 0);
5121 SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
5122 getI32Imm(31, dl), getI32Imm(31, dl)};
5123 if (!Reverse)
5124 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5125 else {
5126 SDValue Shift = SDValue(
5127 CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
5128 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
5129 }
5130 }
5131 return;
5132 }
5133
5134 if (!Subtarget->isISA3_1())
5135 break;
5136 unsigned Opcode = 0;
5137 switch (IntID) {
5138 default:
5139 break;
5140 case Intrinsic::ppc_altivec_vstribr_p:
5141 Opcode = PPC::VSTRIBR_rec;
5142 break;
5143 case Intrinsic::ppc_altivec_vstribl_p:
5144 Opcode = PPC::VSTRIBL_rec;
5145 break;
5146 case Intrinsic::ppc_altivec_vstrihr_p:
5147 Opcode = PPC::VSTRIHR_rec;
5148 break;
5149 case Intrinsic::ppc_altivec_vstrihl_p:
5150 Opcode = PPC::VSTRIHL_rec;
5151 break;
5152 }
5153 if (!Opcode)
5154 break;
5155
5156 // Generate the appropriate vector string isolate intrinsic to match.
5157 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5158 SDValue VecStrOp =
5159 SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
5160 // Vector string isolate instructions update the EQ bit of CR6.
5161 // Generate a SETBC instruction to extract the bit and place it in a GPR.
5162 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
5163 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5164 SDValue CRBit = SDValue(
5165 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5166 CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
5167 0);
5168 CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
5169 return;
5170 }
5171
5172 case ISD::SETCC:
5173 case ISD::STRICT_FSETCC:
5174 case ISD::STRICT_FSETCCS:
5175 if (trySETCC(N))
5176 return;
5177 break;
5178 // These nodes will be transformed into GETtlsADDR32 node, which
5179 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5180 case PPCISD::ADDI_TLSLD_L_ADDR:
5181 case PPCISD::ADDI_TLSGD_L_ADDR: {
5182 const Module *Mod = MF->getFunction().getParent();
5183 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5184 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
5185 Mod->getPICLevel() == PICLevel::SmallPIC)
5186 break;
5187 // Attach global base pointer on GETtlsADDR32 node in order to
5188 // generate secure plt code for TLS symbols.
5189 getGlobalBaseReg();
5190 } break;
5191 case PPCISD::CALL: {
5192 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5193 !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
5194 !Subtarget->isTargetELF())
5195 break;
5196
5197 SDValue Op = N->getOperand(1);
5198
5199 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5200 if (GA->getTargetFlags() == PPCII::MO_PLT)
5201 getGlobalBaseReg();
5202 }
5203 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
5204 if (ES->getTargetFlags() == PPCII::MO_PLT)
5205 getGlobalBaseReg();
5206 }
5207 }
5208 break;
5209
5210 case PPCISD::GlobalBaseReg:
5211 ReplaceNode(N, getGlobalBaseReg());
5212 return;
5213
5214 case ISD::FrameIndex:
5215 selectFrameIndex(N, N);
5216 return;
5217
5218 case PPCISD::MFOCRF: {
5219 SDValue InFlag = N->getOperand(1);
5220 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
5221 N->getOperand(0), InFlag));
5222 return;
5223 }
5224
5225 case PPCISD::READ_TIME_BASE:
5226 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
5227 MVT::Other, N->getOperand(0)));
5228 return;
5229
5230 case PPCISD::SRA_ADDZE: {
5231 SDValue N0 = N->getOperand(0);
5232 SDValue ShiftAmt =
5233 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
5234 getConstantIntValue(), dl,
5235 N->getValueType(0));
5236 if (N->getValueType(0) == MVT::i64) {
5237 SDNode *Op =
5238 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
5239 N0, ShiftAmt);
5240 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
5241 SDValue(Op, 1));
5242 return;
5243 } else {
5244 assert(N->getValueType(0) == MVT::i32 &&(static_cast <bool> (N->getValueType(0) == MVT::i32 &&
"Expecting i64 or i32 in PPCISD::SRA_ADDZE") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::i32 && \"Expecting i64 or i32 in PPCISD::SRA_ADDZE\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5245, __extension__
__PRETTY_FUNCTION__))
5245 "Expecting i64 or i32 in PPCISD::SRA_ADDZE")(static_cast <bool> (N->getValueType(0) == MVT::i32 &&
"Expecting i64 or i32 in PPCISD::SRA_ADDZE") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::i32 && \"Expecting i64 or i32 in PPCISD::SRA_ADDZE\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5245, __extension__
__PRETTY_FUNCTION__))
;
5246 SDNode *Op =
5247 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
5248 N0, ShiftAmt);
5249 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
5250 SDValue(Op, 1));
5251 return;
5252 }
5253 }
5254
5255 case ISD::STORE: {
5256 // Change TLS initial-exec D-form stores to X-form stores.
5257 StoreSDNode *ST = cast<StoreSDNode>(N);
5258 if (EnableTLSOpt && Subtarget->isELFv2ABI() &&
5259 ST->getAddressingMode() != ISD::PRE_INC)
5260 if (tryTLSXFormStore(ST))
5261 return;
5262 break;
5263 }
5264 case ISD::LOAD: {
5265 // Handle preincrement loads.
5266 LoadSDNode *LD = cast<LoadSDNode>(N);
5267 EVT LoadedVT = LD->getMemoryVT();
5268
5269 // Normal loads are handled by code generated from the .td file.
5270 if (LD->getAddressingMode() != ISD::PRE_INC) {
5271 // Change TLS initial-exec D-form loads to X-form loads.
5272 if (EnableTLSOpt && Subtarget->isELFv2ABI())
5273 if (tryTLSXFormLoad(LD))
5274 return;
5275 break;
5276 }
5277
5278 SDValue Offset = LD->getOffset();
5279 if (Offset.getOpcode() == ISD::TargetConstant ||
5280 Offset.getOpcode() == ISD::TargetGlobalAddress) {
5281
5282 unsigned Opcode;
5283 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5284 if (LD->getValueType(0) != MVT::i64) {
5285 // Handle PPC32 integer and normal FP loads.
5286 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) &&
"Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5286, __extension__
__PRETTY_FUNCTION__))
;
5287 switch (LoadedVT.getSimpleVT().SimpleTy) {
5288 default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5288)
;
5289 case MVT::f64: Opcode = PPC::LFDU; break;
5290 case MVT::f32: Opcode = PPC::LFSU; break;
5291 case MVT::i32: Opcode = PPC::LWZU; break;
5292 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5293 case MVT::i1:
5294 case MVT::i8: Opcode = PPC::LBZU; break;
5295 }
5296 } else {
5297 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!")(static_cast <bool> (LD->getValueType(0) == MVT::i64
&& "Unknown load result type!") ? void (0) : __assert_fail
("LD->getValueType(0) == MVT::i64 && \"Unknown load result type!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5297, __extension__
__PRETTY_FUNCTION__))
;
5298 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) &&
"Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5298, __extension__
__PRETTY_FUNCTION__))
;
5299 switch (LoadedVT.getSimpleVT().SimpleTy) {
5300 default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5300)
;
5301 case MVT::i64: Opcode = PPC::LDU; break;
5302 case MVT::i32: Opcode = PPC::LWZU8; break;
5303 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5304 case MVT::i1:
5305 case MVT::i8: Opcode = PPC::LBZU8; break;
5306 }
5307 }
5308
5309 SDValue Chain = LD->getChain();
5310 SDValue Base = LD->getBasePtr();
5311 SDValue Ops[] = { Offset, Base, Chain };
5312 SDNode *MN = CurDAG->getMachineNode(
5313 Opcode, dl, LD->getValueType(0),
5314 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5315 transferMemOperands(N, MN);
5316 ReplaceNode(N, MN);
5317 return;
5318 } else {
5319 unsigned Opcode;
5320 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5321 if (LD->getValueType(0) != MVT::i64) {
5322 // Handle PPC32 integer and normal FP loads.
5323 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) &&
"Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5323, __extension__
__PRETTY_FUNCTION__))
;
5324 switch (LoadedVT.getSimpleVT().SimpleTy) {
5325 default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5325)
;
5326 case MVT::f64: Opcode = PPC::LFDUX; break;
5327 case MVT::f32: Opcode = PPC::LFSUX; break;
5328 case MVT::i32: Opcode = PPC::LWZUX; break;
5329 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5330 case MVT::i1:
5331 case MVT::i8: Opcode = PPC::LBZUX; break;
5332 }
5333 } else {
5334 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!")(static_cast <bool> (LD->getValueType(0) == MVT::i64
&& "Unknown load result type!") ? void (0) : __assert_fail
("LD->getValueType(0) == MVT::i64 && \"Unknown load result type!\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5334, __extension__
__PRETTY_FUNCTION__))
;
5335 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16 ||
LoadedVT == MVT::i32) && "Invalid sext update load")
? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && \"Invalid sext update load\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5336, __extension__
__PRETTY_FUNCTION__))
5336 "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16 ||
LoadedVT == MVT::i32) && "Invalid sext update load")
? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && \"Invalid sext update load\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5336, __extension__
__PRETTY_FUNCTION__))
;
5337 switch (LoadedVT.getSimpleVT().SimpleTy) {
5338 default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5338)
;
5339 case MVT::i64: Opcode = PPC::LDUX; break;
5340 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5341 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5342 case MVT::i1:
5343 case MVT::i8: Opcode = PPC::LBZUX8; break;
5344 }
5345 }
5346
5347 SDValue Chain = LD->getChain();
5348 SDValue Base = LD->getBasePtr();
5349 SDValue Ops[] = { Base, Offset, Chain };
5350 SDNode *MN = CurDAG->getMachineNode(
5351 Opcode, dl, LD->getValueType(0),
5352 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5353 transferMemOperands(N, MN);
5354 ReplaceNode(N, MN);
5355 return;
5356 }
5357 }
5358
5359 case ISD::AND:
5360 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5361 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
5362 tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
5363 return;
5364
5365 // Other cases are autogenerated.
5366 break;
5367 case ISD::OR: {
5368 if (N->getValueType(0) == MVT::i32)
5369 if (tryBitfieldInsert(N))
5370 return;
5371
5372 int16_t Imm;
5373 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5374 isIntS16Immediate(N->getOperand(1), Imm)) {
5375 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
5376
5377 // If this is equivalent to an add, then we can fold it with the
5378 // FrameIndex calculation.
5379 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
5380 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
5381 return;
5382 }
5383 }
5384
5385 // If this is 'or' against an imm with consecutive ones and both sides zero,
5386 // try to emit rldimi
5387 if (tryAsSingleRLDIMI(N))
5388 return;
5389
5390 // OR with a 32-bit immediate can be handled by ori + oris
5391 // without creating an immediate in a GPR.
5392 uint64_t Imm64 = 0;
5393 bool IsPPC64 = Subtarget->isPPC64();
5394 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5395 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5396 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5397 uint64_t ImmHi = Imm64 >> 16;
5398 uint64_t ImmLo = Imm64 & 0xFFFF;
5399 if (ImmHi != 0 && ImmLo != 0) {
5400 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
5401 N->getOperand(0),
5402 getI16Imm(ImmLo, dl));
5403 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5404 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
5405 return;
5406 }
5407 }
5408
5409 // Other cases are autogenerated.
5410 break;
5411 }
5412 case ISD::XOR: {
5413 // XOR with a 32-bit immediate can be handled by xori + xoris
5414 // without creating an immediate in a GPR.
5415 uint64_t Imm64 = 0;
5416 bool IsPPC64 = Subtarget->isPPC64();
5417 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5418 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5419 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5420 uint64_t ImmHi = Imm64 >> 16;
5421 uint64_t ImmLo = Imm64 & 0xFFFF;
5422 if (ImmHi != 0 && ImmLo != 0) {
5423 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
5424 N->getOperand(0),
5425 getI16Imm(ImmLo, dl));
5426 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5427 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
5428 return;
5429 }
5430 }
5431
5432 break;
5433 }
5434 case ISD::ADD: {
5435 int16_t Imm;
5436 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5437 isIntS16Immediate(N->getOperand(1), Imm)) {
5438 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
5439 return;
5440 }
5441
5442 break;
5443 }
5444 case ISD::SHL: {
5445 unsigned Imm, SH, MB, ME;
5446 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5447 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5448 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5449 getI32Imm(SH, dl), getI32Imm(MB, dl),
5450 getI32Imm(ME, dl) };
5451 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5452 return;
5453 }
5454
5455 // Other cases are autogenerated.
5456 break;
5457 }
5458 case ISD::SRL: {
5459 unsigned Imm, SH, MB, ME;
5460 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5461 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5462 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5463 getI32Imm(SH, dl), getI32Imm(MB, dl),
5464 getI32Imm(ME, dl) };
5465 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5466 return;
5467 }
5468
5469 // Other cases are autogenerated.
5470 break;
5471 }
5472 case ISD::MUL: {
5473 SDValue Op1 = N->getOperand(1);
5474 if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64)
5475 break;
5476
5477 // If the multiplier fits int16, we can handle it with mulli.
5478 int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
5479 unsigned Shift = countTrailingZeros<uint64_t>(Imm);
5480 if (isInt<16>(Imm) || !Shift)
5481 break;
5482
5483 // If the shifted value fits int16, we can do this transformation:
5484 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5485 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5486 uint64_t ImmSh = Imm >> Shift;
5487 if (isInt<16>(ImmSh)) {
5488 uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
5489 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
5490 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
5491 N->getOperand(0), SDImm);
5492 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0),
5493 getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl));
5494 return;
5495 }
5496 break;
5497 }
5498 // FIXME: Remove this once the ANDI glue bug is fixed:
5499 case PPCISD::ANDI_rec_1_EQ_BIT:
5500 case PPCISD::ANDI_rec_1_GT_BIT: {
5501 if (!ANDIGlueBug)
5502 break;
5503
5504 EVT InVT = N->getOperand(0).getValueType();
5505 assert((InVT == MVT::i64 || InVT == MVT::i32) &&(static_cast <bool> ((InVT == MVT::i64 || InVT == MVT::
i32) && "Invalid input type for ANDI_rec_1_EQ_BIT") ?
void (0) : __assert_fail ("(InVT == MVT::i64 || InVT == MVT::i32) && \"Invalid input type for ANDI_rec_1_EQ_BIT\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5506, __extension__
__PRETTY_FUNCTION__))
5506 "Invalid input type for ANDI_rec_1_EQ_BIT")(static_cast <bool> ((InVT == MVT::i64 || InVT == MVT::
i32) && "Invalid input type for ANDI_rec_1_EQ_BIT") ?
void (0) : __assert_fail ("(InVT == MVT::i64 || InVT == MVT::i32) && \"Invalid input type for ANDI_rec_1_EQ_BIT\""
, "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp", 5506, __extension__
__PRETTY_FUNCTION__))
;
5507
5508 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5509 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
5510 N->getOperand(0),
5511 CurDAG->getTargetConstant(1, dl, InVT)),
5512 0);
5513 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
5514 SDValue SRIdxVal = CurDAG->getTargetConstant(
5515 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5516 dl, MVT::i32);
5517
5518 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
5519 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
5520 return;
5521 }
5522 case ISD::SELECT_CC: {
5523 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
5524 EVT PtrVT =
5525 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
5526 bool isPPC64 = (PtrVT == MVT::i64);
5527
5528 // If this is a select of i1 operands, we'll pattern match it.
5529 if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
5530 break;
5531
5532 if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5533 bool NeedSwapOps = false;
5534 bool IsUnCmp = false;
5535 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
5536 SDValue LHS = N->getOperand(0);
5537 SDValue RHS = N->getOperand(1);
5538 if (NeedSwapOps)
5539 std::swap(LHS, RHS);
5540
5541 // Make use of SelectCC to generate the comparison to set CR bits, for
5542 // equality comparisons having one literal operand, SelectCC probably
5543 // doesn't need to materialize the whole literal and just use xoris to
5544 // check it first, it leads the following comparison result can't
5545 // exactly represent GT/LT relationship. So to avoid this we specify
5546 // SETGT/SETUGT here instead of SETEQ.
5547 SDValue GenCC =
5548 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5549 CurDAG->SelectNodeTo(
5550 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5551 N->getValueType(0), GenCC);
5552 NumP9Setb++;
5553 return;
5554 }
5555 }
5556
5557 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5558 if (!isPPC64)
5559 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
5560 if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
5561 if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
5562 if (N1C->isZero() && N3C->isZero() && N2C->getZExtValue() == 1ULL &&
5563 CC == ISD::SETNE &&
5564 // FIXME: Implement this optzn for PPC64.
5565 N->getValueType(0) == MVT::i32) {
5566 SDNode *Tmp =
5567 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
5568 N->getOperand(0), getI32Imm(~0U, dl));
5569 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
5570 N->getOperand(0), SDValue(Tmp, 1));
5571 return;
5572 }
5573
5574 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
5575
5576 if (N->getValueType(0) == MVT::i1) {
5577 // An i1 select is: (c & t) | (!c & f).
5578 bool Inv;
5579 unsigned Idx = getCRIdxForSetCC(CC, Inv);
5580
5581 unsigned SRI;
5582 switch (Idx) {
5583 default: llvm_unreachable("Invalid CC index")::llvm::llvm_unreachable_internal("Invalid CC index", "llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp"
, 5583)
;
5584 case 0: SRI = PPC::sub_lt; break;
5585 case 1: SRI = PPC::sub_gt; break;
5586 case 2: SRI = PPC::sub_eq; break;
5587 case 3: SRI = PPC::sub_un; break;
5588 }
5589
5590 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
5591
5592 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
5593 CCBit, CCBit), 0);
5594 SDValue C = Inv ? NotCCBit : CCBit,
5595 NotC = Inv ? CCBit : NotCCBit;
5596
5597 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5598 C, N->getOperand(2)), 0);
5599 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5600 NotC, N->getOperand(3)), 0);
5601
5602 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
5603 return;
5604 }
5605
5606 unsigned BROpc =
5607 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
5608
5609 unsigned SelectCCOp;
5610 if (N->getValueType(0) == MVT::i32)
5611 SelectCCOp = PPC::SELECT_CC_I4;
5612 else if (N->getValueType(0) == MVT::i64)
5613 SelectCCOp = PPC::SELECT_CC_I8;
5614 else if (N->getValueType(0) == MVT::f32) {
5615 if (Subtarget->hasP8Vector())
5616 SelectCCOp = PPC::SELECT_CC_VSSRC;
5617 else if (Subtarget->hasSPE())
5618 SelectCCOp = PPC::SELECT_CC_SPE4;
5619 else
5620 SelectCCOp = PPC::SELECT_CC_F4;
5621 } else if (N->getValueType(0) == MVT::f64) {
5622 if (Subtarget->hasVSX())
5623 SelectCCOp = PPC::SELECT_CC_VSFRC;
5624 else if (Subtarget->hasSPE())
5625 SelectCCOp = PPC::SELECT_CC_SPE;
5626 else
5627 SelectCCOp = PPC::SELECT_CC_F8;
5628 } else if (N->getValueType(0) == MVT::f128)
5629 SelectCCOp = PPC::SELECT_CC_F16;
5630 else if (Subtarget->hasSPE())
5631 SelectCCOp = PPC::SELECT_CC_SPE;
5632 else if (N->getValueType(0) == MVT::v2f64 ||
5633 N->getValueType(0) == MVT::v2i64)
5634 SelectCCOp = PPC::SELECT_CC_VSRC;
5635 else
5636 SelectCCOp = PPC::SELECT_CC_VRRC;
5637
5638 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
5639 getI32Imm(BROpc, dl) };
5640 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
5641 return;
5642 }
5643 case ISD::VECTOR_SHUFFLE:
5644 if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
5645 N->getValueType(0) == MVT::v2i64)) {
5646 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
5647
5648 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
5649 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
5650 unsigned DM[2];
5651
5652 for (int i = 0; i < 2; ++i)
5653 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
5654 DM[i] = 0;
5655 else
5656 DM[i] = 1;
5657
5658 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5659 Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5660 isa<LoadSDNode>(Op1.getOperand(0))) {
5661 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
5662 SDValue Base, Offset;
5663
5664 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5665 (LD->getMemoryVT() == MVT::f64 ||
5666 LD->getMemoryVT() == MVT::i64) &&
5667 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
5668 SDValue Chain = LD->getChain();
5669 SDValue Ops[] = { Base, Offset, Chain };
5670 MachineMemOperand *MemOp = LD->getMemOperand();
5671 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
5672 N->getValueType(0), Ops);
5673 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
5674 return;
5675 }
5676 }
5677
5678 // For little endian, we must swap the input operands and adjust
5679 // the mask elements (reverse and invert them).
5680 if (Subtarget->isLittleEndian()) {
5681 std::swap(Op1, Op2);
5682 unsigned tmp = DM[0];
5683 DM[0] = 1 - DM[1];
5684 DM[1] = 1 - tmp;
5685 }
5686
5687 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
5688 MVT::i32);
5689 SDValue Ops[] = { Op1, Op2, DMV };
5690 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
5691 return;
5692 }
5693
5694 break;
5695 case PPCISD::BDNZ:
5696 case PPCISD::BDZ: {
5697 bool IsPPC64 = Subtarget->isPPC64();
5698 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
5699 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
5700 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
5701 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
5702 MVT::Other, Ops);
5703 return;
5704 }
5705 case PPCISD::COND_BRANCH: {