File: | llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp |
Warning: | line 511, column 18 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This file defines a pattern matching instruction selector for PowerPC, | ||||
10 | // converting from a legalized dag to a PPC dag. | ||||
11 | // | ||||
12 | //===----------------------------------------------------------------------===// | ||||
13 | |||||
14 | #include "MCTargetDesc/PPCMCTargetDesc.h" | ||||
15 | #include "MCTargetDesc/PPCPredicates.h" | ||||
16 | #include "PPC.h" | ||||
17 | #include "PPCISelLowering.h" | ||||
18 | #include "PPCMachineFunctionInfo.h" | ||||
19 | #include "PPCSubtarget.h" | ||||
20 | #include "PPCTargetMachine.h" | ||||
21 | #include "llvm/ADT/APInt.h" | ||||
22 | #include "llvm/ADT/DenseMap.h" | ||||
23 | #include "llvm/ADT/STLExtras.h" | ||||
24 | #include "llvm/ADT/SmallPtrSet.h" | ||||
25 | #include "llvm/ADT/SmallVector.h" | ||||
26 | #include "llvm/ADT/Statistic.h" | ||||
27 | #include "llvm/Analysis/BranchProbabilityInfo.h" | ||||
28 | #include "llvm/CodeGen/FunctionLoweringInfo.h" | ||||
29 | #include "llvm/CodeGen/ISDOpcodes.h" | ||||
30 | #include "llvm/CodeGen/MachineBasicBlock.h" | ||||
31 | #include "llvm/CodeGen/MachineFunction.h" | ||||
32 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||||
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||
34 | #include "llvm/CodeGen/SelectionDAG.h" | ||||
35 | #include "llvm/CodeGen/SelectionDAGISel.h" | ||||
36 | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||||
37 | #include "llvm/CodeGen/TargetInstrInfo.h" | ||||
38 | #include "llvm/CodeGen/TargetRegisterInfo.h" | ||||
39 | #include "llvm/CodeGen/ValueTypes.h" | ||||
40 | #include "llvm/IR/BasicBlock.h" | ||||
41 | #include "llvm/IR/DebugLoc.h" | ||||
42 | #include "llvm/IR/Function.h" | ||||
43 | #include "llvm/IR/GlobalValue.h" | ||||
44 | #include "llvm/IR/InlineAsm.h" | ||||
45 | #include "llvm/IR/InstrTypes.h" | ||||
46 | #include "llvm/IR/IntrinsicsPowerPC.h" | ||||
47 | #include "llvm/IR/Module.h" | ||||
48 | #include "llvm/Support/Casting.h" | ||||
49 | #include "llvm/Support/CodeGen.h" | ||||
50 | #include "llvm/Support/CommandLine.h" | ||||
51 | #include "llvm/Support/Compiler.h" | ||||
52 | #include "llvm/Support/Debug.h" | ||||
53 | #include "llvm/Support/ErrorHandling.h" | ||||
54 | #include "llvm/Support/KnownBits.h" | ||||
55 | #include "llvm/Support/MachineValueType.h" | ||||
56 | #include "llvm/Support/MathExtras.h" | ||||
57 | #include "llvm/Support/raw_ostream.h" | ||||
58 | #include <algorithm> | ||||
59 | #include <cassert> | ||||
60 | #include <cstdint> | ||||
61 | #include <iterator> | ||||
62 | #include <limits> | ||||
63 | #include <memory> | ||||
64 | #include <new> | ||||
65 | #include <tuple> | ||||
66 | #include <utility> | ||||
67 | |||||
68 | using namespace llvm; | ||||
69 | |||||
70 | #define DEBUG_TYPE"ppc-codegen" "ppc-codegen" | ||||
71 | |||||
72 | STATISTIC(NumSextSetcc,static llvm::Statistic NumSextSetcc = {"ppc-codegen", "NumSextSetcc" , "Number of (sext(setcc)) nodes expanded into GPR sequence." } | ||||
73 | "Number of (sext(setcc)) nodes expanded into GPR sequence.")static llvm::Statistic NumSextSetcc = {"ppc-codegen", "NumSextSetcc" , "Number of (sext(setcc)) nodes expanded into GPR sequence." }; | ||||
74 | STATISTIC(NumZextSetcc,static llvm::Statistic NumZextSetcc = {"ppc-codegen", "NumZextSetcc" , "Number of (zext(setcc)) nodes expanded into GPR sequence." } | ||||
75 | "Number of (zext(setcc)) nodes expanded into GPR sequence.")static llvm::Statistic NumZextSetcc = {"ppc-codegen", "NumZextSetcc" , "Number of (zext(setcc)) nodes expanded into GPR sequence." }; | ||||
76 | STATISTIC(SignExtensionsAdded,static llvm::Statistic SignExtensionsAdded = {"ppc-codegen", "SignExtensionsAdded" , "Number of sign extensions for compare inputs added."} | ||||
77 | "Number of sign extensions for compare inputs added.")static llvm::Statistic SignExtensionsAdded = {"ppc-codegen", "SignExtensionsAdded" , "Number of sign extensions for compare inputs added."}; | ||||
78 | STATISTIC(ZeroExtensionsAdded,static llvm::Statistic ZeroExtensionsAdded = {"ppc-codegen", "ZeroExtensionsAdded" , "Number of zero extensions for compare inputs added."} | ||||
79 | "Number of zero extensions for compare inputs added.")static llvm::Statistic ZeroExtensionsAdded = {"ppc-codegen", "ZeroExtensionsAdded" , "Number of zero extensions for compare inputs added."}; | ||||
80 | STATISTIC(NumLogicOpsOnComparison,static llvm::Statistic NumLogicOpsOnComparison = {"ppc-codegen" , "NumLogicOpsOnComparison", "Number of logical ops on i1 values calculated in GPR." } | ||||
81 | "Number of logical ops on i1 values calculated in GPR.")static llvm::Statistic NumLogicOpsOnComparison = {"ppc-codegen" , "NumLogicOpsOnComparison", "Number of logical ops on i1 values calculated in GPR." }; | ||||
82 | STATISTIC(OmittedForNonExtendUses,static llvm::Statistic OmittedForNonExtendUses = {"ppc-codegen" , "OmittedForNonExtendUses", "Number of compares not eliminated as they have non-extending uses." } | ||||
83 | "Number of compares not eliminated as they have non-extending uses.")static llvm::Statistic OmittedForNonExtendUses = {"ppc-codegen" , "OmittedForNonExtendUses", "Number of compares not eliminated as they have non-extending uses." }; | ||||
84 | STATISTIC(NumP9Setb,static llvm::Statistic NumP9Setb = {"ppc-codegen", "NumP9Setb" , "Number of compares lowered to setb."} | ||||
85 | "Number of compares lowered to setb.")static llvm::Statistic NumP9Setb = {"ppc-codegen", "NumP9Setb" , "Number of compares lowered to setb."}; | ||||
86 | |||||
87 | // FIXME: Remove this once the bug has been fixed! | ||||
88 | cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", | ||||
89 | cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); | ||||
90 | |||||
91 | static cl::opt<bool> | ||||
92 | UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), | ||||
93 | cl::desc("use aggressive ppc isel for bit permutations"), | ||||
94 | cl::Hidden); | ||||
95 | static cl::opt<bool> BPermRewriterNoMasking( | ||||
96 | "ppc-bit-perm-rewriter-stress-rotates", | ||||
97 | cl::desc("stress rotate selection in aggressive ppc isel for " | ||||
98 | "bit permutations"), | ||||
99 | cl::Hidden); | ||||
100 | |||||
101 | static cl::opt<bool> EnableBranchHint( | ||||
102 | "ppc-use-branch-hint", cl::init(true), | ||||
103 | cl::desc("Enable static hinting of branches on ppc"), | ||||
104 | cl::Hidden); | ||||
105 | |||||
106 | static cl::opt<bool> EnableTLSOpt( | ||||
107 | "ppc-tls-opt", cl::init(true), | ||||
108 | cl::desc("Enable tls optimization peephole"), | ||||
109 | cl::Hidden); | ||||
110 | |||||
111 | enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, | ||||
112 | ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, | ||||
113 | ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; | ||||
114 | |||||
115 | static cl::opt<ICmpInGPRType> CmpInGPR( | ||||
116 | "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), | ||||
117 | cl::desc("Specify the types of comparisons to emit GPR-only code for."), | ||||
118 | cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons.")llvm::cl::OptionEnumValue { "none", int(ICGPR_None), "Do not modify integer comparisons." }, | ||||
119 | clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs.")llvm::cl::OptionEnumValue { "all", int(ICGPR_All), "All possible int comparisons in GPRs." }, | ||||
120 | clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs.")llvm::cl::OptionEnumValue { "i32", int(ICGPR_I32), "Only i32 comparisons in GPRs." }, | ||||
121 | clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs.")llvm::cl::OptionEnumValue { "i64", int(ICGPR_I64), "Only i64 comparisons in GPRs." }, | ||||
122 | clEnumValN(ICGPR_NonExtIn, "nonextin",llvm::cl::OptionEnumValue { "nonextin", int(ICGPR_NonExtIn), "Only comparisons where inputs don't need [sz]ext." } | ||||
123 | "Only comparisons where inputs don't need [sz]ext.")llvm::cl::OptionEnumValue { "nonextin", int(ICGPR_NonExtIn), "Only comparisons where inputs don't need [sz]ext." }, | ||||
124 | clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result.")llvm::cl::OptionEnumValue { "zext", int(ICGPR_Zext), "Only comparisons with zext result." }, | ||||
125 | clEnumValN(ICGPR_ZextI32, "zexti32",llvm::cl::OptionEnumValue { "zexti32", int(ICGPR_ZextI32), "Only i32 comparisons with zext result." } | ||||
126 | "Only i32 comparisons with zext result.")llvm::cl::OptionEnumValue { "zexti32", int(ICGPR_ZextI32), "Only i32 comparisons with zext result." }, | ||||
127 | clEnumValN(ICGPR_ZextI64, "zexti64",llvm::cl::OptionEnumValue { "zexti64", int(ICGPR_ZextI64), "Only i64 comparisons with zext result." } | ||||
128 | "Only i64 comparisons with zext result.")llvm::cl::OptionEnumValue { "zexti64", int(ICGPR_ZextI64), "Only i64 comparisons with zext result." }, | ||||
129 | clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result.")llvm::cl::OptionEnumValue { "sext", int(ICGPR_Sext), "Only comparisons with sext result." }, | ||||
130 | clEnumValN(ICGPR_SextI32, "sexti32",llvm::cl::OptionEnumValue { "sexti32", int(ICGPR_SextI32), "Only i32 comparisons with sext result." } | ||||
131 | "Only i32 comparisons with sext result.")llvm::cl::OptionEnumValue { "sexti32", int(ICGPR_SextI32), "Only i32 comparisons with sext result." }, | ||||
132 | clEnumValN(ICGPR_SextI64, "sexti64",llvm::cl::OptionEnumValue { "sexti64", int(ICGPR_SextI64), "Only i64 comparisons with sext result." } | ||||
133 | "Only i64 comparisons with sext result.")llvm::cl::OptionEnumValue { "sexti64", int(ICGPR_SextI64), "Only i64 comparisons with sext result." })); | ||||
134 | namespace { | ||||
135 | |||||
136 | //===--------------------------------------------------------------------===// | ||||
137 | /// PPCDAGToDAGISel - PPC specific code to select PPC machine | ||||
138 | /// instructions for SelectionDAG operations. | ||||
139 | /// | ||||
140 | class PPCDAGToDAGISel : public SelectionDAGISel { | ||||
141 | const PPCTargetMachine &TM; | ||||
142 | const PPCSubtarget *Subtarget = nullptr; | ||||
143 | const PPCTargetLowering *PPCLowering = nullptr; | ||||
144 | unsigned GlobalBaseReg = 0; | ||||
145 | |||||
146 | public: | ||||
147 | explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) | ||||
148 | : SelectionDAGISel(tm, OptLevel), TM(tm) {} | ||||
149 | |||||
150 | bool runOnMachineFunction(MachineFunction &MF) override { | ||||
151 | // Make sure we re-emit a set of the global base reg if necessary | ||||
152 | GlobalBaseReg = 0; | ||||
153 | Subtarget = &MF.getSubtarget<PPCSubtarget>(); | ||||
154 | PPCLowering = Subtarget->getTargetLowering(); | ||||
155 | if (Subtarget->hasROPProtect()) { | ||||
156 | // Create a place on the stack for the ROP Protection Hash. | ||||
157 | // The ROP Protection Hash will always be 8 bytes and aligned to 8 | ||||
158 | // bytes. | ||||
159 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
160 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); | ||||
161 | const int Result = MFI.CreateStackObject(8, Align(8), false); | ||||
162 | FI->setROPProtectionHashSaveIndex(Result); | ||||
163 | } | ||||
164 | SelectionDAGISel::runOnMachineFunction(MF); | ||||
165 | |||||
166 | return true; | ||||
167 | } | ||||
168 | |||||
169 | void PreprocessISelDAG() override; | ||||
170 | void PostprocessISelDAG() override; | ||||
171 | |||||
172 | /// getI16Imm - Return a target constant with the specified value, of type | ||||
173 | /// i16. | ||||
174 | inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { | ||||
175 | return CurDAG->getTargetConstant(Imm, dl, MVT::i16); | ||||
176 | } | ||||
177 | |||||
178 | /// getI32Imm - Return a target constant with the specified value, of type | ||||
179 | /// i32. | ||||
180 | inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { | ||||
181 | return CurDAG->getTargetConstant(Imm, dl, MVT::i32); | ||||
182 | } | ||||
183 | |||||
184 | /// getI64Imm - Return a target constant with the specified value, of type | ||||
185 | /// i64. | ||||
186 | inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { | ||||
187 | return CurDAG->getTargetConstant(Imm, dl, MVT::i64); | ||||
188 | } | ||||
189 | |||||
190 | /// getSmallIPtrImm - Return a target constant of pointer type. | ||||
191 | inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { | ||||
192 | return CurDAG->getTargetConstant( | ||||
193 | Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); | ||||
194 | } | ||||
195 | |||||
196 | /// isRotateAndMask - Returns true if Mask and Shift can be folded into a | ||||
197 | /// rotate and mask opcode and mask operation. | ||||
198 | static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, | ||||
199 | unsigned &SH, unsigned &MB, unsigned &ME); | ||||
200 | |||||
201 | /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC | ||||
202 | /// base register. Return the virtual register that holds this value. | ||||
203 | SDNode *getGlobalBaseReg(); | ||||
204 | |||||
205 | void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); | ||||
206 | |||||
207 | // Select - Convert the specified operand from a target-independent to a | ||||
208 | // target-specific node if it hasn't already been changed. | ||||
209 | void Select(SDNode *N) override; | ||||
210 | |||||
211 | bool tryBitfieldInsert(SDNode *N); | ||||
212 | bool tryBitPermutation(SDNode *N); | ||||
213 | bool tryIntCompareInGPR(SDNode *N); | ||||
214 | |||||
215 | // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into | ||||
216 | // an X-Form load instruction with the offset being a relocation coming from | ||||
217 | // the PPCISD::ADD_TLS. | ||||
218 | bool tryTLSXFormLoad(LoadSDNode *N); | ||||
219 | // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into | ||||
220 | // an X-Form store instruction with the offset being a relocation coming from | ||||
221 | // the PPCISD::ADD_TLS. | ||||
222 | bool tryTLSXFormStore(StoreSDNode *N); | ||||
223 | /// SelectCC - Select a comparison of the specified values with the | ||||
224 | /// specified condition code, returning the CR# of the expression. | ||||
225 | SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
226 | const SDLoc &dl, SDValue Chain = SDValue()); | ||||
227 | |||||
228 | /// SelectAddrImmOffs - Return true if the operand is valid for a preinc | ||||
229 | /// immediate field. Note that the operand at this point is already the | ||||
230 | /// result of a prior SelectAddressRegImm call. | ||||
231 | bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { | ||||
232 | if (N.getOpcode() == ISD::TargetConstant || | ||||
233 | N.getOpcode() == ISD::TargetGlobalAddress) { | ||||
234 | Out = N; | ||||
235 | return true; | ||||
236 | } | ||||
237 | |||||
238 | return false; | ||||
239 | } | ||||
240 | |||||
241 | /// SelectDSForm - Returns true if address N can be represented by the | ||||
242 | /// addressing mode of DSForm instructions (a base register, plus a signed | ||||
243 | /// 16-bit displacement that is a multiple of 4. | ||||
244 | bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { | ||||
245 | return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, | ||||
246 | Align(4)) == PPC::AM_DSForm; | ||||
247 | } | ||||
248 | |||||
249 | /// SelectDQForm - Returns true if address N can be represented by the | ||||
250 | /// addressing mode of DQForm instructions (a base register, plus a signed | ||||
251 | /// 16-bit displacement that is a multiple of 16. | ||||
252 | bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { | ||||
253 | return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, | ||||
254 | Align(16)) == PPC::AM_DQForm; | ||||
255 | } | ||||
256 | |||||
257 | /// SelectDForm - Returns true if address N can be represented by | ||||
258 | /// the addressing mode of DForm instructions (a base register, plus a | ||||
259 | /// signed 16-bit immediate. | ||||
260 | bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { | ||||
261 | return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, | ||||
262 | None) == PPC::AM_DForm; | ||||
263 | } | ||||
264 | |||||
265 | /// SelectXForm - Returns true if address N can be represented by the | ||||
266 | /// addressing mode of XForm instructions (an indexed [r+r] operation). | ||||
267 | bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { | ||||
268 | return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, | ||||
269 | None) == PPC::AM_XForm; | ||||
270 | } | ||||
271 | |||||
272 | /// SelectForceXForm - Given the specified address, force it to be | ||||
273 | /// represented as an indexed [r+r] operation (an XForm instruction). | ||||
274 | bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp, | ||||
275 | SDValue &Base) { | ||||
276 | return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) == | ||||
277 | PPC::AM_XForm; | ||||
278 | } | ||||
279 | |||||
280 | /// SelectAddrIdx - Given the specified address, check to see if it can be | ||||
281 | /// represented as an indexed [r+r] operation. | ||||
282 | /// This is for xform instructions whose associated displacement form is D. | ||||
283 | /// The last parameter \p 0 means associated D form has no requirment for 16 | ||||
284 | /// bit signed displacement. | ||||
285 | /// Returns false if it can be represented by [r+imm], which are preferred. | ||||
286 | bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { | ||||
287 | return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None); | ||||
288 | } | ||||
289 | |||||
290 | /// SelectAddrIdx4 - Given the specified address, check to see if it can be | ||||
291 | /// represented as an indexed [r+r] operation. | ||||
292 | /// This is for xform instructions whose associated displacement form is DS. | ||||
293 | /// The last parameter \p 4 means associated DS form 16 bit signed | ||||
294 | /// displacement must be a multiple of 4. | ||||
295 | /// Returns false if it can be represented by [r+imm], which are preferred. | ||||
296 | bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) { | ||||
297 | return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, | ||||
298 | Align(4)); | ||||
299 | } | ||||
300 | |||||
301 | /// SelectAddrIdx16 - Given the specified address, check to see if it can be | ||||
302 | /// represented as an indexed [r+r] operation. | ||||
303 | /// This is for xform instructions whose associated displacement form is DQ. | ||||
304 | /// The last parameter \p 16 means associated DQ form 16 bit signed | ||||
305 | /// displacement must be a multiple of 16. | ||||
306 | /// Returns false if it can be represented by [r+imm], which are preferred. | ||||
307 | bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) { | ||||
308 | return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, | ||||
309 | Align(16)); | ||||
310 | } | ||||
311 | |||||
312 | /// SelectAddrIdxOnly - Given the specified address, force it to be | ||||
313 | /// represented as an indexed [r+r] operation. | ||||
314 | bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { | ||||
315 | return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); | ||||
316 | } | ||||
317 | |||||
318 | /// SelectAddrImm - Returns true if the address N can be represented by | ||||
319 | /// a base register plus a signed 16-bit displacement [r+imm]. | ||||
320 | /// The last parameter \p 0 means D form has no requirment for 16 bit signed | ||||
321 | /// displacement. | ||||
322 | bool SelectAddrImm(SDValue N, SDValue &Disp, | ||||
323 | SDValue &Base) { | ||||
324 | return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None); | ||||
325 | } | ||||
326 | |||||
327 | /// SelectAddrImmX4 - Returns true if the address N can be represented by | ||||
328 | /// a base register plus a signed 16-bit displacement that is a multiple of | ||||
329 | /// 4 (last parameter). Suitable for use by STD and friends. | ||||
330 | bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { | ||||
331 | return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4)); | ||||
332 | } | ||||
333 | |||||
334 | /// SelectAddrImmX16 - Returns true if the address N can be represented by | ||||
335 | /// a base register plus a signed 16-bit displacement that is a multiple of | ||||
336 | /// 16(last parameter). Suitable for use by STXV and friends. | ||||
337 | bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { | ||||
338 | return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, | ||||
339 | Align(16)); | ||||
340 | } | ||||
341 | |||||
342 | /// SelectAddrImmX34 - Returns true if the address N can be represented by | ||||
343 | /// a base register plus a signed 34-bit displacement. Suitable for use by | ||||
344 | /// PSTXVP and friends. | ||||
345 | bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) { | ||||
346 | return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG); | ||||
347 | } | ||||
348 | |||||
349 | // Select an address into a single register. | ||||
350 | bool SelectAddr(SDValue N, SDValue &Base) { | ||||
351 | Base = N; | ||||
352 | return true; | ||||
353 | } | ||||
354 | |||||
355 | bool SelectAddrPCRel(SDValue N, SDValue &Base) { | ||||
356 | return PPCLowering->SelectAddressPCRel(N, Base); | ||||
357 | } | ||||
358 | |||||
359 | /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for | ||||
360 | /// inline asm expressions. It is always correct to compute the value into | ||||
361 | /// a register. The case of adding a (possibly relocatable) constant to a | ||||
362 | /// register can be improved, but it is wrong to substitute Reg+Reg for | ||||
363 | /// Reg in an asm, because the load or store opcode would have to change. | ||||
364 | bool SelectInlineAsmMemoryOperand(const SDValue &Op, | ||||
365 | unsigned ConstraintID, | ||||
366 | std::vector<SDValue> &OutOps) override { | ||||
367 | switch(ConstraintID) { | ||||
368 | default: | ||||
369 | errs() << "ConstraintID: " << ConstraintID << "\n"; | ||||
370 | llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 370); | ||||
371 | case InlineAsm::Constraint_es: | ||||
372 | case InlineAsm::Constraint_m: | ||||
373 | case InlineAsm::Constraint_o: | ||||
374 | case InlineAsm::Constraint_Q: | ||||
375 | case InlineAsm::Constraint_Z: | ||||
376 | case InlineAsm::Constraint_Zy: | ||||
377 | // We need to make sure that this one operand does not end up in r0 | ||||
378 | // (because we might end up lowering this as 0(%op)). | ||||
379 | const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); | ||||
380 | const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); | ||||
381 | SDLoc dl(Op); | ||||
382 | SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); | ||||
383 | SDValue NewOp = | ||||
384 | SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, | ||||
385 | dl, Op.getValueType(), | ||||
386 | Op, RC), 0); | ||||
387 | |||||
388 | OutOps.push_back(NewOp); | ||||
389 | return false; | ||||
390 | } | ||||
391 | return true; | ||||
392 | } | ||||
393 | |||||
394 | StringRef getPassName() const override { | ||||
395 | return "PowerPC DAG->DAG Pattern Instruction Selection"; | ||||
396 | } | ||||
397 | |||||
398 | // Include the pieces autogenerated from the target description. | ||||
399 | #include "PPCGenDAGISel.inc" | ||||
400 | |||||
401 | private: | ||||
402 | bool trySETCC(SDNode *N); | ||||
403 | bool tryFoldSWTestBRCC(SDNode *N); | ||||
404 | bool tryAsSingleRLDICL(SDNode *N); | ||||
405 | bool tryAsSingleRLDICR(SDNode *N); | ||||
406 | bool tryAsSingleRLWINM(SDNode *N); | ||||
407 | bool tryAsSingleRLWINM8(SDNode *N); | ||||
408 | bool tryAsSingleRLWIMI(SDNode *N); | ||||
409 | bool tryAsPairOfRLDICL(SDNode *N); | ||||
410 | bool tryAsSingleRLDIMI(SDNode *N); | ||||
411 | |||||
412 | void PeepholePPC64(); | ||||
413 | void PeepholePPC64ZExt(); | ||||
414 | void PeepholeCROps(); | ||||
415 | |||||
416 | SDValue combineToCMPB(SDNode *N); | ||||
417 | void foldBoolExts(SDValue &Res, SDNode *&N); | ||||
418 | |||||
419 | bool AllUsersSelectZero(SDNode *N); | ||||
420 | void SwapAllSelectUsers(SDNode *N); | ||||
421 | |||||
422 | bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; | ||||
423 | void transferMemOperands(SDNode *N, SDNode *Result); | ||||
424 | }; | ||||
425 | |||||
426 | } // end anonymous namespace | ||||
427 | |||||
428 | /// getGlobalBaseReg - Output the instructions required to put the | ||||
429 | /// base address to use for accessing globals into a register. | ||||
430 | /// | ||||
431 | SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { | ||||
432 | if (!GlobalBaseReg) { | ||||
433 | const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); | ||||
434 | // Insert the set of GlobalBaseReg into the first MBB of the function | ||||
435 | MachineBasicBlock &FirstMBB = MF->front(); | ||||
436 | MachineBasicBlock::iterator MBBI = FirstMBB.begin(); | ||||
437 | const Module *M = MF->getFunction().getParent(); | ||||
438 | DebugLoc dl; | ||||
439 | |||||
440 | if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { | ||||
441 | if (Subtarget->isTargetELF()) { | ||||
442 | GlobalBaseReg = PPC::R30; | ||||
443 | if (!Subtarget->isSecurePlt() && | ||||
444 | M->getPICLevel() == PICLevel::SmallPIC) { | ||||
445 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); | ||||
446 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); | ||||
447 | MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); | ||||
448 | } else { | ||||
449 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); | ||||
450 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); | ||||
451 | Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); | ||||
452 | BuildMI(FirstMBB, MBBI, dl, | ||||
453 | TII.get(PPC::UpdateGBR), GlobalBaseReg) | ||||
454 | .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); | ||||
455 | MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); | ||||
456 | } | ||||
457 | } else { | ||||
458 | GlobalBaseReg = | ||||
459 | RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); | ||||
460 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); | ||||
461 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); | ||||
462 | } | ||||
463 | } else { | ||||
464 | // We must ensure that this sequence is dominated by the prologue. | ||||
465 | // FIXME: This is a bit of a big hammer since we don't get the benefits | ||||
466 | // of shrink-wrapping whenever we emit this instruction. Considering | ||||
467 | // this is used in any function where we emit a jump table, this may be | ||||
468 | // a significant limitation. We should consider inserting this in the | ||||
469 | // block where it is used and then commoning this sequence up if it | ||||
470 | // appears in multiple places. | ||||
471 | // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of | ||||
472 | // MovePCtoLR8. | ||||
473 | MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true); | ||||
474 | GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); | ||||
475 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); | ||||
476 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); | ||||
477 | } | ||||
478 | } | ||||
479 | return CurDAG->getRegister(GlobalBaseReg, | ||||
480 | PPCLowering->getPointerTy(CurDAG->getDataLayout())) | ||||
481 | .getNode(); | ||||
482 | } | ||||
483 | |||||
484 | // Check if a SDValue has the toc-data attribute. | ||||
485 | static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) { | ||||
486 | GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val); | ||||
| |||||
487 | if (!GA) | ||||
488 | return false; | ||||
489 | |||||
490 | const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal()); | ||||
491 | if (!GV
| ||||
492 | return false; | ||||
493 | |||||
494 | if (!GV->hasAttribute("toc-data")) | ||||
495 | return false; | ||||
496 | |||||
497 | // TODO: These asserts should be updated as more support for the toc data | ||||
498 | // transformation is added (64 bit, struct support, etc.). | ||||
499 | |||||
500 | assert(PointerSize == 4 && "Only 32 Bit Codegen is currently supported by "(static_cast <bool> (PointerSize == 4 && "Only 32 Bit Codegen is currently supported by " "the toc data transformation.") ? void (0) : __assert_fail ( "PointerSize == 4 && \"Only 32 Bit Codegen is currently supported by \" \"the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 501, __extension__ __PRETTY_FUNCTION__)) | ||||
501 | "the toc data transformation.")(static_cast <bool> (PointerSize == 4 && "Only 32 Bit Codegen is currently supported by " "the toc data transformation.") ? void (0) : __assert_fail ( "PointerSize == 4 && \"Only 32 Bit Codegen is currently supported by \" \"the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 501, __extension__ __PRETTY_FUNCTION__)); | ||||
502 | |||||
503 | assert(PointerSize >= GV->getAlign().valueOrOne().value() &&(static_cast <bool> (PointerSize >= GV->getAlign( ).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter then 4-bytes " "not supported by the toc data transformation.") ? void (0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter then 4-bytes \" \"not supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 505, __extension__ __PRETTY_FUNCTION__)) | ||||
504 | "GlobalVariables with an alignment requirement stricter then 4-bytes "(static_cast <bool> (PointerSize >= GV->getAlign( ).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter then 4-bytes " "not supported by the toc data transformation.") ? void (0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter then 4-bytes \" \"not supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 505, __extension__ __PRETTY_FUNCTION__)) | ||||
505 | "not supported by the toc data transformation.")(static_cast <bool> (PointerSize >= GV->getAlign( ).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter then 4-bytes " "not supported by the toc data transformation.") ? void (0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter then 4-bytes \" \"not supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 505, __extension__ __PRETTY_FUNCTION__)); | ||||
506 | |||||
507 | Type *PtrType = GV->getType(); | ||||
508 | assert(PtrType->isPointerTy() &&(static_cast <bool> (PtrType->isPointerTy() && "GlobalVariables always have pointer type!.") ? void (0) : __assert_fail ("PtrType->isPointerTy() && \"GlobalVariables always have pointer type!.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 509, __extension__ __PRETTY_FUNCTION__)) | ||||
509 | "GlobalVariables always have pointer type!.")(static_cast <bool> (PtrType->isPointerTy() && "GlobalVariables always have pointer type!.") ? void (0) : __assert_fail ("PtrType->isPointerTy() && \"GlobalVariables always have pointer type!.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 509, __extension__ __PRETTY_FUNCTION__)); | ||||
510 | |||||
511 | Type *GVType = dyn_cast<PointerType>(PtrType)->getElementType(); | ||||
| |||||
512 | |||||
513 | assert(GVType->isSized() && "A GlobalVariable's size must be known to be "(static_cast <bool> (GVType->isSized() && "A GlobalVariable's size must be known to be " "supported by the toc data transformation.") ? void (0) : __assert_fail ("GVType->isSized() && \"A GlobalVariable's size must be known to be \" \"supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 514, __extension__ __PRETTY_FUNCTION__)) | ||||
514 | "supported by the toc data transformation.")(static_cast <bool> (GVType->isSized() && "A GlobalVariable's size must be known to be " "supported by the toc data transformation.") ? void (0) : __assert_fail ("GVType->isSized() && \"A GlobalVariable's size must be known to be \" \"supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 514, __extension__ __PRETTY_FUNCTION__)); | ||||
515 | |||||
516 | if (GVType->isVectorTy()) | ||||
517 | report_fatal_error("A GlobalVariable of Vector type is not currently " | ||||
518 | "supported by the toc data transformation."); | ||||
519 | |||||
520 | if (GVType->isArrayTy()) | ||||
521 | report_fatal_error("A GlobalVariable of Array type is not currently " | ||||
522 | "supported by the toc data transformation."); | ||||
523 | |||||
524 | if (GVType->isStructTy()) | ||||
525 | report_fatal_error("A GlobalVariable of Struct type is not currently " | ||||
526 | "supported by the toc data transformation."); | ||||
527 | |||||
528 | assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&(static_cast <bool> (GVType->getPrimitiveSizeInBits( ) <= PointerSize * 8 && "A GlobalVariable with size larger than 32 bits is not currently " "supported by the toc data transformation.") ? void (0) : __assert_fail ("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than 32 bits is not currently \" \"supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 530, __extension__ __PRETTY_FUNCTION__)) | ||||
529 | "A GlobalVariable with size larger than 32 bits is not currently "(static_cast <bool> (GVType->getPrimitiveSizeInBits( ) <= PointerSize * 8 && "A GlobalVariable with size larger than 32 bits is not currently " "supported by the toc data transformation.") ? void (0) : __assert_fail ("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than 32 bits is not currently \" \"supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 530, __extension__ __PRETTY_FUNCTION__)) | ||||
530 | "supported by the toc data transformation.")(static_cast <bool> (GVType->getPrimitiveSizeInBits( ) <= PointerSize * 8 && "A GlobalVariable with size larger than 32 bits is not currently " "supported by the toc data transformation.") ? void (0) : __assert_fail ("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than 32 bits is not currently \" \"supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 530, __extension__ __PRETTY_FUNCTION__)); | ||||
531 | |||||
532 | if (GV->hasLocalLinkage() || GV->hasPrivateLinkage()) | ||||
533 | report_fatal_error("A GlobalVariable with private or local linkage is not " | ||||
534 | "currently supported by the toc data transformation."); | ||||
535 | |||||
536 | assert(!GV->hasCommonLinkage() &&(static_cast <bool> (!GV->hasCommonLinkage() && "Tentative definitions cannot have the mapping class XMC_TD." ) ? void (0) : __assert_fail ("!GV->hasCommonLinkage() && \"Tentative definitions cannot have the mapping class XMC_TD.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 537, __extension__ __PRETTY_FUNCTION__)) | ||||
537 | "Tentative definitions cannot have the mapping class XMC_TD.")(static_cast <bool> (!GV->hasCommonLinkage() && "Tentative definitions cannot have the mapping class XMC_TD." ) ? void (0) : __assert_fail ("!GV->hasCommonLinkage() && \"Tentative definitions cannot have the mapping class XMC_TD.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 537, __extension__ __PRETTY_FUNCTION__)); | ||||
538 | |||||
539 | return true; | ||||
540 | } | ||||
541 | |||||
542 | /// isInt32Immediate - This method tests to see if the node is a 32-bit constant | ||||
543 | /// operand. If so Imm will receive the 32-bit value. | ||||
544 | static bool isInt32Immediate(SDNode *N, unsigned &Imm) { | ||||
545 | if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { | ||||
546 | Imm = cast<ConstantSDNode>(N)->getZExtValue(); | ||||
547 | return true; | ||||
548 | } | ||||
549 | return false; | ||||
550 | } | ||||
551 | |||||
552 | /// isInt64Immediate - This method tests to see if the node is a 64-bit constant | ||||
553 | /// operand. If so Imm will receive the 64-bit value. | ||||
554 | static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { | ||||
555 | if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { | ||||
556 | Imm = cast<ConstantSDNode>(N)->getZExtValue(); | ||||
557 | return true; | ||||
558 | } | ||||
559 | return false; | ||||
560 | } | ||||
561 | |||||
562 | // isInt32Immediate - This method tests to see if a constant operand. | ||||
563 | // If so Imm will receive the 32 bit value. | ||||
564 | static bool isInt32Immediate(SDValue N, unsigned &Imm) { | ||||
565 | return isInt32Immediate(N.getNode(), Imm); | ||||
566 | } | ||||
567 | |||||
568 | /// isInt64Immediate - This method tests to see if the value is a 64-bit | ||||
569 | /// constant operand. If so Imm will receive the 64-bit value. | ||||
570 | static bool isInt64Immediate(SDValue N, uint64_t &Imm) { | ||||
571 | return isInt64Immediate(N.getNode(), Imm); | ||||
572 | } | ||||
573 | |||||
574 | static unsigned getBranchHint(unsigned PCC, | ||||
575 | const FunctionLoweringInfo &FuncInfo, | ||||
576 | const SDValue &DestMBB) { | ||||
577 | assert(isa<BasicBlockSDNode>(DestMBB))(static_cast <bool> (isa<BasicBlockSDNode>(DestMBB )) ? void (0) : __assert_fail ("isa<BasicBlockSDNode>(DestMBB)" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 577, __extension__ __PRETTY_FUNCTION__)); | ||||
578 | |||||
579 | if (!FuncInfo.BPI) return PPC::BR_NO_HINT; | ||||
580 | |||||
581 | const BasicBlock *BB = FuncInfo.MBB->getBasicBlock(); | ||||
582 | const Instruction *BBTerm = BB->getTerminator(); | ||||
583 | |||||
584 | if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; | ||||
585 | |||||
586 | const BasicBlock *TBB = BBTerm->getSuccessor(0); | ||||
587 | const BasicBlock *FBB = BBTerm->getSuccessor(1); | ||||
588 | |||||
589 | auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB); | ||||
590 | auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB); | ||||
591 | |||||
592 | // We only want to handle cases which are easy to predict at static time, e.g. | ||||
593 | // C++ throw statement, that is very likely not taken, or calling never | ||||
594 | // returned function, e.g. stdlib exit(). So we set Threshold to filter | ||||
595 | // unwanted cases. | ||||
596 | // | ||||
597 | // Below is LLVM branch weight table, we only want to handle case 1, 2 | ||||
598 | // | ||||
599 | // Case Taken:Nontaken Example | ||||
600 | // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), | ||||
601 | // 2. Invoke-terminating 1:1048575 | ||||
602 | // 3. Coldblock 4:64 __builtin_expect | ||||
603 | // 4. Loop Branch 124:4 For loop | ||||
604 | // 5. PH/ZH/FPH 20:12 | ||||
605 | const uint32_t Threshold = 10000; | ||||
606 | |||||
607 | if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) | ||||
608 | return PPC::BR_NO_HINT; | ||||
609 | |||||
610 | LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() << "::" << BB->getName () << "'\n" << " -> " << TBB->getName () << ": " << TProb << "\n" << " -> " << FBB->getName() << ": " << FProb << "\n"; } } while (false) | ||||
611 | << "::" << BB->getName() << "'\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() << "::" << BB->getName () << "'\n" << " -> " << TBB->getName () << ": " << TProb << "\n" << " -> " << FBB->getName() << ": " << FProb << "\n"; } } while (false) | ||||
612 | << " -> " << TBB->getName() << ": " << TProb << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() << "::" << BB->getName () << "'\n" << " -> " << TBB->getName () << ": " << TProb << "\n" << " -> " << FBB->getName() << ": " << FProb << "\n"; } } while (false) | ||||
613 | << " -> " << FBB->getName() << ": " << FProb << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() << "::" << BB->getName () << "'\n" << " -> " << TBB->getName () << ": " << TProb << "\n" << " -> " << FBB->getName() << ": " << FProb << "\n"; } } while (false); | ||||
614 | |||||
615 | const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB); | ||||
616 | |||||
617 | // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, | ||||
618 | // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock | ||||
619 | if (BBDN->getBasicBlock()->getBasicBlock() != TBB) | ||||
620 | std::swap(TProb, FProb); | ||||
621 | |||||
622 | return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; | ||||
623 | } | ||||
624 | |||||
625 | // isOpcWithIntImmediate - This method tests to see if the node is a specific | ||||
626 | // opcode and that it has a immediate integer right operand. | ||||
627 | // If so Imm will receive the 32 bit value. | ||||
628 | static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { | ||||
629 | return N->getOpcode() == Opc | ||||
630 | && isInt32Immediate(N->getOperand(1).getNode(), Imm); | ||||
631 | } | ||||
632 | |||||
633 | void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { | ||||
634 | SDLoc dl(SN); | ||||
635 | int FI = cast<FrameIndexSDNode>(N)->getIndex(); | ||||
636 | SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); | ||||
637 | unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; | ||||
638 | if (SN->hasOneUse()) | ||||
639 | CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, | ||||
640 | getSmallIPtrImm(Offset, dl)); | ||||
641 | else | ||||
642 | ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, | ||||
643 | getSmallIPtrImm(Offset, dl))); | ||||
644 | } | ||||
645 | |||||
646 | bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, | ||||
647 | bool isShiftMask, unsigned &SH, | ||||
648 | unsigned &MB, unsigned &ME) { | ||||
649 | // Don't even go down this path for i64, since different logic will be | ||||
650 | // necessary for rldicl/rldicr/rldimi. | ||||
651 | if (N->getValueType(0) != MVT::i32) | ||||
652 | return false; | ||||
653 | |||||
654 | unsigned Shift = 32; | ||||
655 | unsigned Indeterminant = ~0; // bit mask marking indeterminant results | ||||
656 | unsigned Opcode = N->getOpcode(); | ||||
657 | if (N->getNumOperands() != 2 || | ||||
658 | !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) | ||||
659 | return false; | ||||
660 | |||||
661 | if (Opcode == ISD::SHL) { | ||||
662 | // apply shift left to mask if it comes first | ||||
663 | if (isShiftMask) Mask = Mask << Shift; | ||||
664 | // determine which bits are made indeterminant by shift | ||||
665 | Indeterminant = ~(0xFFFFFFFFu << Shift); | ||||
666 | } else if (Opcode == ISD::SRL) { | ||||
667 | // apply shift right to mask if it comes first | ||||
668 | if (isShiftMask) Mask = Mask >> Shift; | ||||
669 | // determine which bits are made indeterminant by shift | ||||
670 | Indeterminant = ~(0xFFFFFFFFu >> Shift); | ||||
671 | // adjust for the left rotate | ||||
672 | Shift = 32 - Shift; | ||||
673 | } else if (Opcode == ISD::ROTL) { | ||||
674 | Indeterminant = 0; | ||||
675 | } else { | ||||
676 | return false; | ||||
677 | } | ||||
678 | |||||
679 | // if the mask doesn't intersect any Indeterminant bits | ||||
680 | if (Mask && !(Mask & Indeterminant)) { | ||||
681 | SH = Shift & 31; | ||||
682 | // make sure the mask is still a mask (wrap arounds may not be) | ||||
683 | return isRunOfOnes(Mask, MB, ME); | ||||
684 | } | ||||
685 | return false; | ||||
686 | } | ||||
687 | |||||
688 | bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { | ||||
689 | SDValue Base = ST->getBasePtr(); | ||||
690 | if (Base.getOpcode() != PPCISD::ADD_TLS) | ||||
691 | return false; | ||||
692 | SDValue Offset = ST->getOffset(); | ||||
693 | if (!Offset.isUndef()) | ||||
694 | return false; | ||||
695 | if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) | ||||
696 | return false; | ||||
697 | |||||
698 | SDLoc dl(ST); | ||||
699 | EVT MemVT = ST->getMemoryVT(); | ||||
700 | EVT RegVT = ST->getValue().getValueType(); | ||||
701 | |||||
702 | unsigned Opcode; | ||||
703 | switch (MemVT.getSimpleVT().SimpleTy) { | ||||
704 | default: | ||||
705 | return false; | ||||
706 | case MVT::i8: { | ||||
707 | Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS; | ||||
708 | break; | ||||
709 | } | ||||
710 | case MVT::i16: { | ||||
711 | Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS; | ||||
712 | break; | ||||
713 | } | ||||
714 | case MVT::i32: { | ||||
715 | Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS; | ||||
716 | break; | ||||
717 | } | ||||
718 | case MVT::i64: { | ||||
719 | Opcode = PPC::STDXTLS; | ||||
720 | break; | ||||
721 | } | ||||
722 | } | ||||
723 | SDValue Chain = ST->getChain(); | ||||
724 | SDVTList VTs = ST->getVTList(); | ||||
725 | SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1), | ||||
726 | Chain}; | ||||
727 | SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); | ||||
728 | transferMemOperands(ST, MN); | ||||
729 | ReplaceNode(ST, MN); | ||||
730 | return true; | ||||
731 | } | ||||
732 | |||||
733 | bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { | ||||
734 | SDValue Base = LD->getBasePtr(); | ||||
735 | if (Base.getOpcode() != PPCISD::ADD_TLS) | ||||
736 | return false; | ||||
737 | SDValue Offset = LD->getOffset(); | ||||
738 | if (!Offset.isUndef()) | ||||
739 | return false; | ||||
740 | if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) | ||||
741 | return false; | ||||
742 | |||||
743 | SDLoc dl(LD); | ||||
744 | EVT MemVT = LD->getMemoryVT(); | ||||
745 | EVT RegVT = LD->getValueType(0); | ||||
746 | unsigned Opcode; | ||||
747 | switch (MemVT.getSimpleVT().SimpleTy) { | ||||
748 | default: | ||||
749 | return false; | ||||
750 | case MVT::i8: { | ||||
751 | Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS; | ||||
752 | break; | ||||
753 | } | ||||
754 | case MVT::i16: { | ||||
755 | Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; | ||||
756 | break; | ||||
757 | } | ||||
758 | case MVT::i32: { | ||||
759 | Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; | ||||
760 | break; | ||||
761 | } | ||||
762 | case MVT::i64: { | ||||
763 | Opcode = PPC::LDXTLS; | ||||
764 | break; | ||||
765 | } | ||||
766 | } | ||||
767 | SDValue Chain = LD->getChain(); | ||||
768 | SDVTList VTs = LD->getVTList(); | ||||
769 | SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain}; | ||||
770 | SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); | ||||
771 | transferMemOperands(LD, MN); | ||||
772 | ReplaceNode(LD, MN); | ||||
773 | return true; | ||||
774 | } | ||||
775 | |||||
776 | /// Turn an or of two masked values into the rotate left word immediate then | ||||
777 | /// mask insert (rlwimi) instruction. | ||||
778 | bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { | ||||
779 | SDValue Op0 = N->getOperand(0); | ||||
780 | SDValue Op1 = N->getOperand(1); | ||||
781 | SDLoc dl(N); | ||||
782 | |||||
783 | KnownBits LKnown = CurDAG->computeKnownBits(Op0); | ||||
784 | KnownBits RKnown = CurDAG->computeKnownBits(Op1); | ||||
785 | |||||
786 | unsigned TargetMask = LKnown.Zero.getZExtValue(); | ||||
787 | unsigned InsertMask = RKnown.Zero.getZExtValue(); | ||||
788 | |||||
789 | if ((TargetMask | InsertMask) == 0xFFFFFFFF) { | ||||
790 | unsigned Op0Opc = Op0.getOpcode(); | ||||
791 | unsigned Op1Opc = Op1.getOpcode(); | ||||
792 | unsigned Value, SH = 0; | ||||
793 | TargetMask = ~TargetMask; | ||||
794 | InsertMask = ~InsertMask; | ||||
795 | |||||
796 | // If the LHS has a foldable shift and the RHS does not, then swap it to the | ||||
797 | // RHS so that we can fold the shift into the insert. | ||||
798 | if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { | ||||
799 | if (Op0.getOperand(0).getOpcode() == ISD::SHL || | ||||
800 | Op0.getOperand(0).getOpcode() == ISD::SRL) { | ||||
801 | if (Op1.getOperand(0).getOpcode() != ISD::SHL && | ||||
802 | Op1.getOperand(0).getOpcode() != ISD::SRL) { | ||||
803 | std::swap(Op0, Op1); | ||||
804 | std::swap(Op0Opc, Op1Opc); | ||||
805 | std::swap(TargetMask, InsertMask); | ||||
806 | } | ||||
807 | } | ||||
808 | } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { | ||||
809 | if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && | ||||
810 | Op1.getOperand(0).getOpcode() != ISD::SRL) { | ||||
811 | std::swap(Op0, Op1); | ||||
812 | std::swap(Op0Opc, Op1Opc); | ||||
813 | std::swap(TargetMask, InsertMask); | ||||
814 | } | ||||
815 | } | ||||
816 | |||||
817 | unsigned MB, ME; | ||||
818 | if (isRunOfOnes(InsertMask, MB, ME)) { | ||||
819 | if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && | ||||
820 | isInt32Immediate(Op1.getOperand(1), Value)) { | ||||
821 | Op1 = Op1.getOperand(0); | ||||
822 | SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; | ||||
823 | } | ||||
824 | if (Op1Opc == ISD::AND) { | ||||
825 | // The AND mask might not be a constant, and we need to make sure that | ||||
826 | // if we're going to fold the masking with the insert, all bits not | ||||
827 | // know to be zero in the mask are known to be one. | ||||
828 | KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1)); | ||||
829 | bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); | ||||
830 | |||||
831 | unsigned SHOpc = Op1.getOperand(0).getOpcode(); | ||||
832 | if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && | ||||
833 | isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { | ||||
834 | // Note that Value must be in range here (less than 32) because | ||||
835 | // otherwise there would not be any bits set in InsertMask. | ||||
836 | Op1 = Op1.getOperand(0).getOperand(0); | ||||
837 | SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; | ||||
838 | } | ||||
839 | } | ||||
840 | |||||
841 | SH &= 31; | ||||
842 | SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), | ||||
843 | getI32Imm(ME, dl) }; | ||||
844 | ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); | ||||
845 | return true; | ||||
846 | } | ||||
847 | } | ||||
848 | return false; | ||||
849 | } | ||||
850 | |||||
851 | static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { | ||||
852 | unsigned MaxTruncation = 0; | ||||
853 | // Cannot use range-based for loop here as we need the actual use (i.e. we | ||||
854 | // need the operand number corresponding to the use). A range-based for | ||||
855 | // will unbox the use and provide an SDNode*. | ||||
856 | for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); | ||||
857 | Use != UseEnd; ++Use) { | ||||
858 | unsigned Opc = | ||||
859 | Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); | ||||
860 | switch (Opc) { | ||||
861 | default: return 0; | ||||
862 | case ISD::TRUNCATE: | ||||
863 | if (Use->isMachineOpcode()) | ||||
864 | return 0; | ||||
865 | MaxTruncation = | ||||
866 | std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits()); | ||||
867 | continue; | ||||
868 | case ISD::STORE: { | ||||
869 | if (Use->isMachineOpcode()) | ||||
870 | return 0; | ||||
871 | StoreSDNode *STN = cast<StoreSDNode>(*Use); | ||||
872 | unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); | ||||
873 | if (MemVTSize == 64 || Use.getOperandNo() != 0) | ||||
874 | return 0; | ||||
875 | MaxTruncation = std::max(MaxTruncation, MemVTSize); | ||||
876 | continue; | ||||
877 | } | ||||
878 | case PPC::STW8: | ||||
879 | case PPC::STWX8: | ||||
880 | case PPC::STWU8: | ||||
881 | case PPC::STWUX8: | ||||
882 | if (Use.getOperandNo() != 0) | ||||
883 | return 0; | ||||
884 | MaxTruncation = std::max(MaxTruncation, 32u); | ||||
885 | continue; | ||||
886 | case PPC::STH8: | ||||
887 | case PPC::STHX8: | ||||
888 | case PPC::STHU8: | ||||
889 | case PPC::STHUX8: | ||||
890 | if (Use.getOperandNo() != 0) | ||||
891 | return 0; | ||||
892 | MaxTruncation = std::max(MaxTruncation, 16u); | ||||
893 | continue; | ||||
894 | case PPC::STB8: | ||||
895 | case PPC::STBX8: | ||||
896 | case PPC::STBU8: | ||||
897 | case PPC::STBUX8: | ||||
898 | if (Use.getOperandNo() != 0) | ||||
899 | return 0; | ||||
900 | MaxTruncation = std::max(MaxTruncation, 8u); | ||||
901 | continue; | ||||
902 | } | ||||
903 | } | ||||
904 | return MaxTruncation; | ||||
905 | } | ||||
906 | |||||
907 | // For any 32 < Num < 64, check if the Imm contains at least Num consecutive | ||||
908 | // zeros and return the number of bits by the left of these consecutive zeros. | ||||
909 | static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) { | ||||
910 | unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm)); | ||||
911 | unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm)); | ||||
912 | if ((HiTZ + LoLZ) >= Num) | ||||
913 | return (32 + HiTZ); | ||||
914 | return 0; | ||||
915 | } | ||||
916 | |||||
917 | // Direct materialization of 64-bit constants by enumerated patterns. | ||||
918 | static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, | ||||
919 | uint64_t Imm, unsigned &InstCnt) { | ||||
920 | unsigned TZ = countTrailingZeros<uint64_t>(Imm); | ||||
921 | unsigned LZ = countLeadingZeros<uint64_t>(Imm); | ||||
922 | unsigned TO = countTrailingOnes<uint64_t>(Imm); | ||||
923 | unsigned LO = countLeadingOnes<uint64_t>(Imm); | ||||
924 | unsigned Hi32 = Hi_32(Imm); | ||||
925 | unsigned Lo32 = Lo_32(Imm); | ||||
926 | SDNode *Result = nullptr; | ||||
927 | unsigned Shift = 0; | ||||
928 | |||||
929 | auto getI32Imm = [CurDAG, dl](unsigned Imm) { | ||||
930 | return CurDAG->getTargetConstant(Imm, dl, MVT::i32); | ||||
931 | }; | ||||
932 | |||||
933 | // Following patterns use 1 instructions to materialize the Imm. | ||||
934 | InstCnt = 1; | ||||
935 | // 1-1) Patterns : {zeros}{15-bit valve} | ||||
936 | // {ones}{15-bit valve} | ||||
937 | if (isInt<16>(Imm)) { | ||||
938 | SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64); | ||||
939 | return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); | ||||
940 | } | ||||
941 | // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros} | ||||
942 | // {ones}{15-bit valve}{16 zeros} | ||||
943 | if (TZ > 15 && (LZ > 32 || LO > 32)) | ||||
944 | return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, | ||||
945 | getI32Imm((Imm >> 16) & 0xffff)); | ||||
946 | |||||
947 | // Following patterns use 2 instructions to materialize the Imm. | ||||
948 | InstCnt = 2; | ||||
949 | assert(LZ < 64 && "Unexpected leading zeros here.")(static_cast <bool> (LZ < 64 && "Unexpected leading zeros here." ) ? void (0) : __assert_fail ("LZ < 64 && \"Unexpected leading zeros here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 949, __extension__ __PRETTY_FUNCTION__)); | ||||
950 | // Count of ones follwing the leading zeros. | ||||
951 | unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ); | ||||
952 | // 2-1) Patterns : {zeros}{31-bit value} | ||||
953 | // {ones}{31-bit value} | ||||
954 | if (isInt<32>(Imm)) { | ||||
955 | uint64_t ImmHi16 = (Imm >> 16) & 0xffff; | ||||
956 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; | ||||
957 | Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); | ||||
958 | return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
959 | getI32Imm(Imm & 0xffff)); | ||||
960 | } | ||||
961 | // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros} | ||||
962 | // {zeros}{15-bit value}{zeros} | ||||
963 | // {zeros}{ones}{15-bit value} | ||||
964 | // {ones}{15-bit value}{zeros} | ||||
965 | // We can take advantage of LI's sign-extension semantics to generate leading | ||||
966 | // ones, and then use RLDIC to mask off the ones in both sides after rotation. | ||||
967 | if ((LZ + FO + TZ) > 48) { | ||||
968 | Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, | ||||
969 | getI32Imm((Imm >> TZ) & 0xffff)); | ||||
970 | return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), | ||||
971 | getI32Imm(TZ), getI32Imm(LZ)); | ||||
972 | } | ||||
973 | // 2-3) Pattern : {zeros}{15-bit value}{ones} | ||||
974 | // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value, | ||||
975 | // therefore we can take advantage of LI's sign-extension semantics, and then | ||||
976 | // mask them off after rotation. | ||||
977 | // | ||||
978 | // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+ | ||||
979 | // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| | ||||
980 | // +------------------------+ +------------------------+ | ||||
981 | // 63 0 63 0 | ||||
982 | // Imm (Imm >> (48 - LZ) & 0xffff) | ||||
983 | // +----sext-----|--16-bit--+ +clear-|-----------------+ | ||||
984 | // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| | ||||
985 | // +------------------------+ +------------------------+ | ||||
986 | // 63 0 63 0 | ||||
987 | // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ | ||||
988 | if ((LZ + TO) > 48) { | ||||
989 | // Since the immediates with (LZ > 32) have been handled by previous | ||||
990 | // patterns, here we have (LZ <= 32) to make sure we will not shift right | ||||
991 | // the Imm by a negative value. | ||||
992 | assert(LZ <= 32 && "Unexpected shift value.")(static_cast <bool> (LZ <= 32 && "Unexpected shift value." ) ? void (0) : __assert_fail ("LZ <= 32 && \"Unexpected shift value.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 992, __extension__ __PRETTY_FUNCTION__)); | ||||
993 | Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, | ||||
994 | getI32Imm((Imm >> (48 - LZ) & 0xffff))); | ||||
995 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
996 | getI32Imm(48 - LZ), getI32Imm(LZ)); | ||||
997 | } | ||||
998 | // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones} | ||||
999 | // {ones}{15-bit value}{ones} | ||||
1000 | // We can take advantage of LI's sign-extension semantics to generate leading | ||||
1001 | // ones, and then use RLDICL to mask off the ones in left sides (if required) | ||||
1002 | // after rotation. | ||||
1003 | // | ||||
1004 | // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+ | ||||
1005 | // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb| | ||||
1006 | // +------------------------+ +------------------------+ | ||||
1007 | // 63 0 63 0 | ||||
1008 | // Imm (Imm >> TO) & 0xffff | ||||
1009 | // +----sext-----|--16-bit--+ +LZ|---------------------+ | ||||
1010 | // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111| | ||||
1011 | // +------------------------+ +------------------------+ | ||||
1012 | // 63 0 63 0 | ||||
1013 | // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ | ||||
1014 | if ((LZ + FO + TO) > 48) { | ||||
1015 | Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, | ||||
1016 | getI32Imm((Imm >> TO) & 0xffff)); | ||||
1017 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1018 | getI32Imm(TO), getI32Imm(LZ)); | ||||
1019 | } | ||||
1020 | // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value} | ||||
1021 | // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit | ||||
1022 | // value, we can use LI for Lo16 without generating leading ones then add the | ||||
1023 | // Hi16(in Lo32). | ||||
1024 | if (LZ == 32 && ((Lo32 & 0x8000) == 0)) { | ||||
1025 | Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, | ||||
1026 | getI32Imm(Lo32 & 0xffff)); | ||||
1027 | return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0), | ||||
1028 | getI32Imm(Lo32 >> 16)); | ||||
1029 | } | ||||
1030 | // 2-6) Patterns : {******}{49 zeros}{******} | ||||
1031 | // {******}{49 ones}{******} | ||||
1032 | // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15 | ||||
1033 | // bits remain on both sides. Rotate right the Imm to construct an int<16> | ||||
1034 | // value, use LI for int<16> value and then use RLDICL without mask to rotate | ||||
1035 | // it back. | ||||
1036 | // | ||||
1037 | // 1) findContiguousZerosAtLeast(Imm, 49) | ||||
1038 | // +------|--zeros-|------+ +---ones--||---15 bit--+ | ||||
1039 | // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb| | ||||
1040 | // +----------------------+ +----------------------+ | ||||
1041 | // 63 0 63 0 | ||||
1042 | // | ||||
1043 | // 2) findContiguousZerosAtLeast(~Imm, 49) | ||||
1044 | // +------|--ones--|------+ +---ones--||---15 bit--+ | ||||
1045 | // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| | ||||
1046 | // +----------------------+ +----------------------+ | ||||
1047 | // 63 0 63 0 | ||||
1048 | if ((Shift = findContiguousZerosAtLeast(Imm, 49)) || | ||||
1049 | (Shift = findContiguousZerosAtLeast(~Imm, 49))) { | ||||
1050 | uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); | ||||
1051 | Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, | ||||
1052 | getI32Imm(RotImm & 0xffff)); | ||||
1053 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1054 | getI32Imm(Shift), getI32Imm(0)); | ||||
1055 | } | ||||
1056 | |||||
1057 | // Following patterns use 3 instructions to materialize the Imm. | ||||
1058 | InstCnt = 3; | ||||
1059 | // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros} | ||||
1060 | // {zeros}{31-bit value}{zeros} | ||||
1061 | // {zeros}{ones}{31-bit value} | ||||
1062 | // {ones}{31-bit value}{zeros} | ||||
1063 | // We can take advantage of LIS's sign-extension semantics to generate leading | ||||
1064 | // ones, add the remaining bits with ORI, and then use RLDIC to mask off the | ||||
1065 | // ones in both sides after rotation. | ||||
1066 | if ((LZ + FO + TZ) > 32) { | ||||
1067 | uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff; | ||||
1068 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; | ||||
1069 | Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); | ||||
1070 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1071 | getI32Imm((Imm >> TZ) & 0xffff)); | ||||
1072 | return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), | ||||
1073 | getI32Imm(TZ), getI32Imm(LZ)); | ||||
1074 | } | ||||
1075 | // 3-2) Pattern : {zeros}{31-bit value}{ones} | ||||
1076 | // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value, | ||||
1077 | // therefore we can take advantage of LIS's sign-extension semantics, add | ||||
1078 | // the remaining bits with ORI, and then mask them off after rotation. | ||||
1079 | // This is similar to Pattern 2-3, please refer to the diagram there. | ||||
1080 | if ((LZ + TO) > 32) { | ||||
1081 | // Since the immediates with (LZ > 32) have been handled by previous | ||||
1082 | // patterns, here we have (LZ <= 32) to make sure we will not shift right | ||||
1083 | // the Imm by a negative value. | ||||
1084 | assert(LZ <= 32 && "Unexpected shift value.")(static_cast <bool> (LZ <= 32 && "Unexpected shift value." ) ? void (0) : __assert_fail ("LZ <= 32 && \"Unexpected shift value.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1084, __extension__ __PRETTY_FUNCTION__)); | ||||
1085 | Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, | ||||
1086 | getI32Imm((Imm >> (48 - LZ)) & 0xffff)); | ||||
1087 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1088 | getI32Imm((Imm >> (32 - LZ)) & 0xffff)); | ||||
1089 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1090 | getI32Imm(32 - LZ), getI32Imm(LZ)); | ||||
1091 | } | ||||
1092 | // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones} | ||||
1093 | // {ones}{31-bit value}{ones} | ||||
1094 | // We can take advantage of LIS's sign-extension semantics to generate leading | ||||
1095 | // ones, add the remaining bits with ORI, and then use RLDICL to mask off the | ||||
1096 | // ones in left sides (if required) after rotation. | ||||
1097 | // This is similar to Pattern 2-4, please refer to the diagram there. | ||||
1098 | if ((LZ + FO + TO) > 32) { | ||||
1099 | Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, | ||||
1100 | getI32Imm((Imm >> (TO + 16)) & 0xffff)); | ||||
1101 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1102 | getI32Imm((Imm >> TO) & 0xffff)); | ||||
1103 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1104 | getI32Imm(TO), getI32Imm(LZ)); | ||||
1105 | } | ||||
1106 | // 3-4) Patterns : High word == Low word | ||||
1107 | if (Hi32 == Lo32) { | ||||
1108 | // Handle the first 32 bits. | ||||
1109 | uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff; | ||||
1110 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; | ||||
1111 | Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); | ||||
1112 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1113 | getI32Imm(Lo32 & 0xffff)); | ||||
1114 | // Use rldimi to insert the Low word into High word. | ||||
1115 | SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), | ||||
1116 | getI32Imm(0)}; | ||||
1117 | return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); | ||||
1118 | } | ||||
1119 | // 3-5) Patterns : {******}{33 zeros}{******} | ||||
1120 | // {******}{33 ones}{******} | ||||
1121 | // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31 | ||||
1122 | // bits remain on both sides. Rotate right the Imm to construct an int<32> | ||||
1123 | // value, use LIS + ORI for int<32> value and then use RLDICL without mask to | ||||
1124 | // rotate it back. | ||||
1125 | // This is similar to Pattern 2-6, please refer to the diagram there. | ||||
1126 | if ((Shift = findContiguousZerosAtLeast(Imm, 33)) || | ||||
1127 | (Shift = findContiguousZerosAtLeast(~Imm, 33))) { | ||||
1128 | uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); | ||||
1129 | uint64_t ImmHi16 = (RotImm >> 16) & 0xffff; | ||||
1130 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; | ||||
1131 | Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); | ||||
1132 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1133 | getI32Imm(RotImm & 0xffff)); | ||||
1134 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1135 | getI32Imm(Shift), getI32Imm(0)); | ||||
1136 | } | ||||
1137 | |||||
1138 | InstCnt = 0; | ||||
1139 | return nullptr; | ||||
1140 | } | ||||
1141 | |||||
1142 | // Try to select instructions to generate a 64 bit immediate using prefix as | ||||
1143 | // well as non prefix instructions. The function will return the SDNode | ||||
1144 | // to materialize that constant or it will return nullptr if it does not | ||||
1145 | // find one. The variable InstCnt is set to the number of instructions that | ||||
1146 | // were selected. | ||||
1147 | static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, | ||||
1148 | uint64_t Imm, unsigned &InstCnt) { | ||||
1149 | unsigned TZ = countTrailingZeros<uint64_t>(Imm); | ||||
1150 | unsigned LZ = countLeadingZeros<uint64_t>(Imm); | ||||
1151 | unsigned TO = countTrailingOnes<uint64_t>(Imm); | ||||
1152 | unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ)); | ||||
1153 | unsigned Hi32 = Hi_32(Imm); | ||||
1154 | unsigned Lo32 = Lo_32(Imm); | ||||
1155 | |||||
1156 | auto getI32Imm = [CurDAG, dl](unsigned Imm) { | ||||
1157 | return CurDAG->getTargetConstant(Imm, dl, MVT::i32); | ||||
1158 | }; | ||||
1159 | |||||
1160 | auto getI64Imm = [CurDAG, dl](uint64_t Imm) { | ||||
1161 | return CurDAG->getTargetConstant(Imm, dl, MVT::i64); | ||||
1162 | }; | ||||
1163 | |||||
1164 | // Following patterns use 1 instruction to materialize Imm. | ||||
1165 | InstCnt = 1; | ||||
1166 | |||||
1167 | // The pli instruction can materialize up to 34 bits directly. | ||||
1168 | // If a constant fits within 34-bits, emit the pli instruction here directly. | ||||
1169 | if (isInt<34>(Imm)) | ||||
1170 | return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, | ||||
1171 | CurDAG->getTargetConstant(Imm, dl, MVT::i64)); | ||||
1172 | |||||
1173 | // Require at least two instructions. | ||||
1174 | InstCnt = 2; | ||||
1175 | SDNode *Result = nullptr; | ||||
1176 | // Patterns : {zeros}{ones}{33-bit value}{zeros} | ||||
1177 | // {zeros}{33-bit value}{zeros} | ||||
1178 | // {zeros}{ones}{33-bit value} | ||||
1179 | // {ones}{33-bit value}{zeros} | ||||
1180 | // We can take advantage of PLI's sign-extension semantics to generate leading | ||||
1181 | // ones, and then use RLDIC to mask off the ones on both sides after rotation. | ||||
1182 | if ((LZ + FO + TZ) > 30) { | ||||
1183 | APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff); | ||||
1184 | APInt Extended = SignedInt34.sext(64); | ||||
1185 | Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, | ||||
1186 | getI64Imm(*Extended.getRawData())); | ||||
1187 | return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), | ||||
1188 | getI32Imm(TZ), getI32Imm(LZ)); | ||||
1189 | } | ||||
1190 | // Pattern : {zeros}{33-bit value}{ones} | ||||
1191 | // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value, | ||||
1192 | // therefore we can take advantage of PLI's sign-extension semantics, and then | ||||
1193 | // mask them off after rotation. | ||||
1194 | // | ||||
1195 | // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+ | ||||
1196 | // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| | ||||
1197 | // +------------------------+ +------------------------+ | ||||
1198 | // 63 0 63 0 | ||||
1199 | // | ||||
1200 | // +----sext-----|--34-bit--+ +clear-|-----------------+ | ||||
1201 | // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| | ||||
1202 | // +------------------------+ +------------------------+ | ||||
1203 | // 63 0 63 0 | ||||
1204 | if ((LZ + TO) > 30) { | ||||
1205 | APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff); | ||||
1206 | APInt Extended = SignedInt34.sext(64); | ||||
1207 | Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, | ||||
1208 | getI64Imm(*Extended.getRawData())); | ||||
1209 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1210 | getI32Imm(30 - LZ), getI32Imm(LZ)); | ||||
1211 | } | ||||
1212 | // Patterns : {zeros}{ones}{33-bit value}{ones} | ||||
1213 | // {ones}{33-bit value}{ones} | ||||
1214 | // Similar to LI we can take advantage of PLI's sign-extension semantics to | ||||
1215 | // generate leading ones, and then use RLDICL to mask off the ones in left | ||||
1216 | // sides (if required) after rotation. | ||||
1217 | if ((LZ + FO + TO) > 30) { | ||||
1218 | APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff); | ||||
1219 | APInt Extended = SignedInt34.sext(64); | ||||
1220 | Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, | ||||
1221 | getI64Imm(*Extended.getRawData())); | ||||
1222 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1223 | getI32Imm(TO), getI32Imm(LZ)); | ||||
1224 | } | ||||
1225 | // Patterns : {******}{31 zeros}{******} | ||||
1226 | // : {******}{31 ones}{******} | ||||
1227 | // If Imm contains 31 consecutive zeros/ones then the remaining bit count | ||||
1228 | // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI | ||||
1229 | // for the int<33> value and then use RLDICL without a mask to rotate it back. | ||||
1230 | // | ||||
1231 | // +------|--ones--|------+ +---ones--||---33 bit--+ | ||||
1232 | // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| | ||||
1233 | // +----------------------+ +----------------------+ | ||||
1234 | // 63 0 63 0 | ||||
1235 | for (unsigned Shift = 0; Shift < 63; ++Shift) { | ||||
1236 | uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); | ||||
1237 | if (isInt<34>(RotImm)) { | ||||
1238 | Result = | ||||
1239 | CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm)); | ||||
1240 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
1241 | SDValue(Result, 0), getI32Imm(Shift), | ||||
1242 | getI32Imm(0)); | ||||
1243 | } | ||||
1244 | } | ||||
1245 | |||||
1246 | // Patterns : High word == Low word | ||||
1247 | // This is basically a splat of a 32 bit immediate. | ||||
1248 | if (Hi32 == Lo32) { | ||||
1249 | Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); | ||||
1250 | SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), | ||||
1251 | getI32Imm(0)}; | ||||
1252 | return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); | ||||
1253 | } | ||||
1254 | |||||
1255 | InstCnt = 3; | ||||
1256 | // Catch-all | ||||
1257 | // This pattern can form any 64 bit immediate in 3 instructions. | ||||
1258 | SDNode *ResultHi = | ||||
1259 | CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); | ||||
1260 | SDNode *ResultLo = | ||||
1261 | CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32)); | ||||
1262 | SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32), | ||||
1263 | getI32Imm(0)}; | ||||
1264 | return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); | ||||
1265 | } | ||||
1266 | |||||
1267 | static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, | ||||
1268 | unsigned *InstCnt = nullptr) { | ||||
1269 | unsigned InstCntDirect = 0; | ||||
1270 | // No more than 3 instructions is used if we can select the i64 immediate | ||||
1271 | // directly. | ||||
1272 | SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect); | ||||
1273 | |||||
1274 | const PPCSubtarget &Subtarget = | ||||
1275 | CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>(); | ||||
1276 | |||||
1277 | // If we have prefixed instructions and there is a chance we can | ||||
1278 | // materialize the constant with fewer prefixed instructions than | ||||
1279 | // non-prefixed, try that. | ||||
1280 | if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) { | ||||
1281 | unsigned InstCntDirectP = 0; | ||||
1282 | SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP); | ||||
1283 | // Use the prefix case in either of two cases: | ||||
1284 | // 1) We have no result from the non-prefix case to use. | ||||
1285 | // 2) The non-prefix case uses more instructions than the prefix case. | ||||
1286 | // If the prefix and non-prefix cases use the same number of instructions | ||||
1287 | // we will prefer the non-prefix case. | ||||
1288 | if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) { | ||||
1289 | if (InstCnt) | ||||
1290 | *InstCnt = InstCntDirectP; | ||||
1291 | return ResultP; | ||||
1292 | } | ||||
1293 | } | ||||
1294 | |||||
1295 | if (Result) { | ||||
1296 | if (InstCnt) | ||||
1297 | *InstCnt = InstCntDirect; | ||||
1298 | return Result; | ||||
1299 | } | ||||
1300 | auto getI32Imm = [CurDAG, dl](unsigned Imm) { | ||||
1301 | return CurDAG->getTargetConstant(Imm, dl, MVT::i32); | ||||
1302 | }; | ||||
1303 | // Handle the upper 32 bit value. | ||||
1304 | Result = | ||||
1305 | selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect); | ||||
1306 | // Add in the last bits as required. | ||||
1307 | if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) { | ||||
1308 | Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, | ||||
1309 | SDValue(Result, 0), getI32Imm(Hi16)); | ||||
1310 | ++InstCntDirect; | ||||
1311 | } | ||||
1312 | if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) { | ||||
1313 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1314 | getI32Imm(Lo16)); | ||||
1315 | ++InstCntDirect; | ||||
1316 | } | ||||
1317 | if (InstCnt) | ||||
1318 | *InstCnt = InstCntDirect; | ||||
1319 | return Result; | ||||
1320 | } | ||||
1321 | |||||
1322 | // Select a 64-bit constant. | ||||
1323 | static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { | ||||
1324 | SDLoc dl(N); | ||||
1325 | |||||
1326 | // Get 64 bit value. | ||||
1327 | int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); | ||||
1328 | if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { | ||||
1329 | uint64_t SextImm = SignExtend64(Imm, MinSize); | ||||
1330 | SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); | ||||
1331 | if (isInt<16>(SextImm)) | ||||
1332 | return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); | ||||
1333 | } | ||||
1334 | return selectI64Imm(CurDAG, dl, Imm); | ||||
1335 | } | ||||
1336 | |||||
1337 | namespace { | ||||
1338 | |||||
1339 | class BitPermutationSelector { | ||||
1340 | struct ValueBit { | ||||
1341 | SDValue V; | ||||
1342 | |||||
1343 | // The bit number in the value, using a convention where bit 0 is the | ||||
1344 | // lowest-order bit. | ||||
1345 | unsigned Idx; | ||||
1346 | |||||
1347 | // ConstZero means a bit we need to mask off. | ||||
1348 | // Variable is a bit comes from an input variable. | ||||
1349 | // VariableKnownToBeZero is also a bit comes from an input variable, | ||||
1350 | // but it is known to be already zero. So we do not need to mask them. | ||||
1351 | enum Kind { | ||||
1352 | ConstZero, | ||||
1353 | Variable, | ||||
1354 | VariableKnownToBeZero | ||||
1355 | } K; | ||||
1356 | |||||
1357 | ValueBit(SDValue V, unsigned I, Kind K = Variable) | ||||
1358 | : V(V), Idx(I), K(K) {} | ||||
1359 | ValueBit(Kind K = Variable) | ||||
1360 | : V(SDValue(nullptr, 0)), Idx(UINT32_MAX(4294967295U)), K(K) {} | ||||
1361 | |||||
1362 | bool isZero() const { | ||||
1363 | return K == ConstZero || K == VariableKnownToBeZero; | ||||
1364 | } | ||||
1365 | |||||
1366 | bool hasValue() const { | ||||
1367 | return K == Variable || K == VariableKnownToBeZero; | ||||
1368 | } | ||||
1369 | |||||
1370 | SDValue getValue() const { | ||||
1371 | assert(hasValue() && "Cannot get the value of a constant bit")(static_cast <bool> (hasValue() && "Cannot get the value of a constant bit" ) ? void (0) : __assert_fail ("hasValue() && \"Cannot get the value of a constant bit\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1371, __extension__ __PRETTY_FUNCTION__)); | ||||
1372 | return V; | ||||
1373 | } | ||||
1374 | |||||
1375 | unsigned getValueBitIndex() const { | ||||
1376 | assert(hasValue() && "Cannot get the value bit index of a constant bit")(static_cast <bool> (hasValue() && "Cannot get the value bit index of a constant bit" ) ? void (0) : __assert_fail ("hasValue() && \"Cannot get the value bit index of a constant bit\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1376, __extension__ __PRETTY_FUNCTION__)); | ||||
1377 | return Idx; | ||||
1378 | } | ||||
1379 | }; | ||||
1380 | |||||
1381 | // A bit group has the same underlying value and the same rotate factor. | ||||
1382 | struct BitGroup { | ||||
1383 | SDValue V; | ||||
1384 | unsigned RLAmt; | ||||
1385 | unsigned StartIdx, EndIdx; | ||||
1386 | |||||
1387 | // This rotation amount assumes that the lower 32 bits of the quantity are | ||||
1388 | // replicated in the high 32 bits by the rotation operator (which is done | ||||
1389 | // by rlwinm and friends in 64-bit mode). | ||||
1390 | bool Repl32; | ||||
1391 | // Did converting to Repl32 == true change the rotation factor? If it did, | ||||
1392 | // it decreased it by 32. | ||||
1393 | bool Repl32CR; | ||||
1394 | // Was this group coalesced after setting Repl32 to true? | ||||
1395 | bool Repl32Coalesced; | ||||
1396 | |||||
1397 | BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) | ||||
1398 | : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), | ||||
1399 | Repl32Coalesced(false) { | ||||
1400 | LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << Rdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R << " [" << S << ", " << E << "]\n"; } } while (false) | ||||
1401 | << " [" << S << ", " << E << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R << " [" << S << ", " << E << "]\n"; } } while (false); | ||||
1402 | } | ||||
1403 | }; | ||||
1404 | |||||
1405 | // Information on each (Value, RLAmt) pair (like the number of groups | ||||
1406 | // associated with each) used to choose the lowering method. | ||||
1407 | struct ValueRotInfo { | ||||
1408 | SDValue V; | ||||
1409 | unsigned RLAmt = std::numeric_limits<unsigned>::max(); | ||||
1410 | unsigned NumGroups = 0; | ||||
1411 | unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); | ||||
1412 | bool Repl32 = false; | ||||
1413 | |||||
1414 | ValueRotInfo() = default; | ||||
1415 | |||||
1416 | // For sorting (in reverse order) by NumGroups, and then by | ||||
1417 | // FirstGroupStartIdx. | ||||
1418 | bool operator < (const ValueRotInfo &Other) const { | ||||
1419 | // We need to sort so that the non-Repl32 come first because, when we're | ||||
1420 | // doing masking, the Repl32 bit groups might be subsumed into the 64-bit | ||||
1421 | // masking operation. | ||||
1422 | if (Repl32 < Other.Repl32) | ||||
1423 | return true; | ||||
1424 | else if (Repl32 > Other.Repl32) | ||||
1425 | return false; | ||||
1426 | else if (NumGroups > Other.NumGroups) | ||||
1427 | return true; | ||||
1428 | else if (NumGroups < Other.NumGroups) | ||||
1429 | return false; | ||||
1430 | else if (RLAmt == 0 && Other.RLAmt != 0) | ||||
1431 | return true; | ||||
1432 | else if (RLAmt != 0 && Other.RLAmt == 0) | ||||
1433 | return false; | ||||
1434 | else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) | ||||
1435 | return true; | ||||
1436 | return false; | ||||
1437 | } | ||||
1438 | }; | ||||
1439 | |||||
1440 | using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>; | ||||
1441 | using ValueBitsMemoizer = | ||||
1442 | DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>; | ||||
1443 | ValueBitsMemoizer Memoizer; | ||||
1444 | |||||
1445 | // Return a pair of bool and a SmallVector pointer to a memoization entry. | ||||
1446 | // The bool is true if something interesting was deduced, otherwise if we're | ||||
1447 | // providing only a generic representation of V (or something else likewise | ||||
1448 | // uninteresting for instruction selection) through the SmallVector. | ||||
1449 | std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V, | ||||
1450 | unsigned NumBits) { | ||||
1451 | auto &ValueEntry = Memoizer[V]; | ||||
1452 | if (ValueEntry) | ||||
1453 | return std::make_pair(ValueEntry->first, &ValueEntry->second); | ||||
1454 | ValueEntry.reset(new ValueBitsMemoizedValue()); | ||||
1455 | bool &Interesting = ValueEntry->first; | ||||
1456 | SmallVector<ValueBit, 64> &Bits = ValueEntry->second; | ||||
1457 | Bits.resize(NumBits); | ||||
1458 | |||||
1459 | switch (V.getOpcode()) { | ||||
1460 | default: break; | ||||
1461 | case ISD::ROTL: | ||||
1462 | if (isa<ConstantSDNode>(V.getOperand(1))) { | ||||
1463 | unsigned RotAmt = V.getConstantOperandVal(1); | ||||
1464 | |||||
1465 | const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; | ||||
1466 | |||||
1467 | for (unsigned i = 0; i < NumBits; ++i) | ||||
1468 | Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; | ||||
1469 | |||||
1470 | return std::make_pair(Interesting = true, &Bits); | ||||
1471 | } | ||||
1472 | break; | ||||
1473 | case ISD::SHL: | ||||
1474 | case PPCISD::SHL: | ||||
1475 | if (isa<ConstantSDNode>(V.getOperand(1))) { | ||||
1476 | unsigned ShiftAmt = V.getConstantOperandVal(1); | ||||
1477 | |||||
1478 | const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; | ||||
1479 | |||||
1480 | for (unsigned i = ShiftAmt; i < NumBits; ++i) | ||||
1481 | Bits[i] = LHSBits[i - ShiftAmt]; | ||||
1482 | |||||
1483 | for (unsigned i = 0; i < ShiftAmt; ++i) | ||||
1484 | Bits[i] = ValueBit(ValueBit::ConstZero); | ||||
1485 | |||||
1486 | return std::make_pair(Interesting = true, &Bits); | ||||
1487 | } | ||||
1488 | break; | ||||
1489 | case ISD::SRL: | ||||
1490 | case PPCISD::SRL: | ||||
1491 | if (isa<ConstantSDNode>(V.getOperand(1))) { | ||||
1492 | unsigned ShiftAmt = V.getConstantOperandVal(1); | ||||
1493 | |||||
1494 | const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; | ||||
1495 | |||||
1496 | for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) | ||||
1497 | Bits[i] = LHSBits[i + ShiftAmt]; | ||||
1498 | |||||
1499 | for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) | ||||
1500 | Bits[i] = ValueBit(ValueBit::ConstZero); | ||||
1501 | |||||
1502 | return std::make_pair(Interesting = true, &Bits); | ||||
1503 | } | ||||
1504 | break; | ||||
1505 | case ISD::AND: | ||||
1506 | if (isa<ConstantSDNode>(V.getOperand(1))) { | ||||
1507 | uint64_t Mask = V.getConstantOperandVal(1); | ||||
1508 | |||||
1509 | const SmallVector<ValueBit, 64> *LHSBits; | ||||
1510 | // Mark this as interesting, only if the LHS was also interesting. This | ||||
1511 | // prevents the overall procedure from matching a single immediate 'and' | ||||
1512 | // (which is non-optimal because such an and might be folded with other | ||||
1513 | // things if we don't select it here). | ||||
1514 | std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); | ||||
1515 | |||||
1516 | for (unsigned i = 0; i < NumBits; ++i) | ||||
1517 | if (((Mask >> i) & 1) == 1) | ||||
1518 | Bits[i] = (*LHSBits)[i]; | ||||
1519 | else { | ||||
1520 | // AND instruction masks this bit. If the input is already zero, | ||||
1521 | // we have nothing to do here. Otherwise, make the bit ConstZero. | ||||
1522 | if ((*LHSBits)[i].isZero()) | ||||
1523 | Bits[i] = (*LHSBits)[i]; | ||||
1524 | else | ||||
1525 | Bits[i] = ValueBit(ValueBit::ConstZero); | ||||
1526 | } | ||||
1527 | |||||
1528 | return std::make_pair(Interesting, &Bits); | ||||
1529 | } | ||||
1530 | break; | ||||
1531 | case ISD::OR: { | ||||
1532 | const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; | ||||
1533 | const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; | ||||
1534 | |||||
1535 | bool AllDisjoint = true; | ||||
1536 | SDValue LastVal = SDValue(); | ||||
1537 | unsigned LastIdx = 0; | ||||
1538 | for (unsigned i = 0; i < NumBits; ++i) { | ||||
1539 | if (LHSBits[i].isZero() && RHSBits[i].isZero()) { | ||||
1540 | // If both inputs are known to be zero and one is ConstZero and | ||||
1541 | // another is VariableKnownToBeZero, we can select whichever | ||||
1542 | // we like. To minimize the number of bit groups, we select | ||||
1543 | // VariableKnownToBeZero if this bit is the next bit of the same | ||||
1544 | // input variable from the previous bit. Otherwise, we select | ||||
1545 | // ConstZero. | ||||
1546 | if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && | ||||
1547 | LHSBits[i].getValueBitIndex() == LastIdx + 1) | ||||
1548 | Bits[i] = LHSBits[i]; | ||||
1549 | else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && | ||||
1550 | RHSBits[i].getValueBitIndex() == LastIdx + 1) | ||||
1551 | Bits[i] = RHSBits[i]; | ||||
1552 | else | ||||
1553 | Bits[i] = ValueBit(ValueBit::ConstZero); | ||||
1554 | } | ||||
1555 | else if (LHSBits[i].isZero()) | ||||
1556 | Bits[i] = RHSBits[i]; | ||||
1557 | else if (RHSBits[i].isZero()) | ||||
1558 | Bits[i] = LHSBits[i]; | ||||
1559 | else { | ||||
1560 | AllDisjoint = false; | ||||
1561 | break; | ||||
1562 | } | ||||
1563 | // We remember the value and bit index of this bit. | ||||
1564 | if (Bits[i].hasValue()) { | ||||
1565 | LastVal = Bits[i].getValue(); | ||||
1566 | LastIdx = Bits[i].getValueBitIndex(); | ||||
1567 | } | ||||
1568 | else { | ||||
1569 | if (LastVal) LastVal = SDValue(); | ||||
1570 | LastIdx = 0; | ||||
1571 | } | ||||
1572 | } | ||||
1573 | |||||
1574 | if (!AllDisjoint) | ||||
1575 | break; | ||||
1576 | |||||
1577 | return std::make_pair(Interesting = true, &Bits); | ||||
1578 | } | ||||
1579 | case ISD::ZERO_EXTEND: { | ||||
1580 | // We support only the case with zero extension from i32 to i64 so far. | ||||
1581 | if (V.getValueType() != MVT::i64 || | ||||
1582 | V.getOperand(0).getValueType() != MVT::i32) | ||||
1583 | break; | ||||
1584 | |||||
1585 | const SmallVector<ValueBit, 64> *LHSBits; | ||||
1586 | const unsigned NumOperandBits = 32; | ||||
1587 | std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), | ||||
1588 | NumOperandBits); | ||||
1589 | |||||
1590 | for (unsigned i = 0; i < NumOperandBits; ++i) | ||||
1591 | Bits[i] = (*LHSBits)[i]; | ||||
1592 | |||||
1593 | for (unsigned i = NumOperandBits; i < NumBits; ++i) | ||||
1594 | Bits[i] = ValueBit(ValueBit::ConstZero); | ||||
1595 | |||||
1596 | return std::make_pair(Interesting, &Bits); | ||||
1597 | } | ||||
1598 | case ISD::TRUNCATE: { | ||||
1599 | EVT FromType = V.getOperand(0).getValueType(); | ||||
1600 | EVT ToType = V.getValueType(); | ||||
1601 | // We support only the case with truncate from i64 to i32. | ||||
1602 | if (FromType != MVT::i64 || ToType != MVT::i32) | ||||
1603 | break; | ||||
1604 | const unsigned NumAllBits = FromType.getSizeInBits(); | ||||
1605 | SmallVector<ValueBit, 64> *InBits; | ||||
1606 | std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), | ||||
1607 | NumAllBits); | ||||
1608 | const unsigned NumValidBits = ToType.getSizeInBits(); | ||||
1609 | |||||
1610 | // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. | ||||
1611 | // So, we cannot include this truncate. | ||||
1612 | bool UseUpper32bit = false; | ||||
1613 | for (unsigned i = 0; i < NumValidBits; ++i) | ||||
1614 | if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { | ||||
1615 | UseUpper32bit = true; | ||||
1616 | break; | ||||
1617 | } | ||||
1618 | if (UseUpper32bit) | ||||
1619 | break; | ||||
1620 | |||||
1621 | for (unsigned i = 0; i < NumValidBits; ++i) | ||||
1622 | Bits[i] = (*InBits)[i]; | ||||
1623 | |||||
1624 | return std::make_pair(Interesting, &Bits); | ||||
1625 | } | ||||
1626 | case ISD::AssertZext: { | ||||
1627 | // For AssertZext, we look through the operand and | ||||
1628 | // mark the bits known to be zero. | ||||
1629 | const SmallVector<ValueBit, 64> *LHSBits; | ||||
1630 | std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), | ||||
1631 | NumBits); | ||||
1632 | |||||
1633 | EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT(); | ||||
1634 | const unsigned NumValidBits = FromType.getSizeInBits(); | ||||
1635 | for (unsigned i = 0; i < NumValidBits; ++i) | ||||
1636 | Bits[i] = (*LHSBits)[i]; | ||||
1637 | |||||
1638 | // These bits are known to be zero but the AssertZext may be from a value | ||||
1639 | // that already has some constant zero bits (i.e. from a masking and). | ||||
1640 | for (unsigned i = NumValidBits; i < NumBits; ++i) | ||||
1641 | Bits[i] = (*LHSBits)[i].hasValue() | ||||
1642 | ? ValueBit((*LHSBits)[i].getValue(), | ||||
1643 | (*LHSBits)[i].getValueBitIndex(), | ||||
1644 | ValueBit::VariableKnownToBeZero) | ||||
1645 | : ValueBit(ValueBit::ConstZero); | ||||
1646 | |||||
1647 | return std::make_pair(Interesting, &Bits); | ||||
1648 | } | ||||
1649 | case ISD::LOAD: | ||||
1650 | LoadSDNode *LD = cast<LoadSDNode>(V); | ||||
1651 | if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { | ||||
1652 | EVT VT = LD->getMemoryVT(); | ||||
1653 | const unsigned NumValidBits = VT.getSizeInBits(); | ||||
1654 | |||||
1655 | for (unsigned i = 0; i < NumValidBits; ++i) | ||||
1656 | Bits[i] = ValueBit(V, i); | ||||
1657 | |||||
1658 | // These bits are known to be zero. | ||||
1659 | for (unsigned i = NumValidBits; i < NumBits; ++i) | ||||
1660 | Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); | ||||
1661 | |||||
1662 | // Zero-extending load itself cannot be optimized. So, it is not | ||||
1663 | // interesting by itself though it gives useful information. | ||||
1664 | return std::make_pair(Interesting = false, &Bits); | ||||
1665 | } | ||||
1666 | break; | ||||
1667 | } | ||||
1668 | |||||
1669 | for (unsigned i = 0; i < NumBits; ++i) | ||||
1670 | Bits[i] = ValueBit(V, i); | ||||
1671 | |||||
1672 | return std::make_pair(Interesting = false, &Bits); | ||||
1673 | } | ||||
1674 | |||||
1675 | // For each value (except the constant ones), compute the left-rotate amount | ||||
1676 | // to get it from its original to final position. | ||||
1677 | void computeRotationAmounts() { | ||||
1678 | NeedMask = false; | ||||
1679 | RLAmt.resize(Bits.size()); | ||||
1680 | for (unsigned i = 0; i < Bits.size(); ++i) | ||||
1681 | if (Bits[i].hasValue()) { | ||||
1682 | unsigned VBI = Bits[i].getValueBitIndex(); | ||||
1683 | if (i >= VBI) | ||||
1684 | RLAmt[i] = i - VBI; | ||||
1685 | else | ||||
1686 | RLAmt[i] = Bits.size() - (VBI - i); | ||||
1687 | } else if (Bits[i].isZero()) { | ||||
1688 | NeedMask = true; | ||||
1689 | RLAmt[i] = UINT32_MAX(4294967295U); | ||||
1690 | } else { | ||||
1691 | llvm_unreachable("Unknown value bit type")::llvm::llvm_unreachable_internal("Unknown value bit type", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1691); | ||||
1692 | } | ||||
1693 | } | ||||
1694 | |||||
1695 | // Collect groups of consecutive bits with the same underlying value and | ||||
1696 | // rotation factor. If we're doing late masking, we ignore zeros, otherwise | ||||
1697 | // they break up groups. | ||||
1698 | void collectBitGroups(bool LateMask) { | ||||
1699 | BitGroups.clear(); | ||||
1700 | |||||
1701 | unsigned LastRLAmt = RLAmt[0]; | ||||
1702 | SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); | ||||
1703 | unsigned LastGroupStartIdx = 0; | ||||
1704 | bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); | ||||
1705 | for (unsigned i = 1; i < Bits.size(); ++i) { | ||||
1706 | unsigned ThisRLAmt = RLAmt[i]; | ||||
1707 | SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); | ||||
1708 | if (LateMask && !ThisValue) { | ||||
1709 | ThisValue = LastValue; | ||||
1710 | ThisRLAmt = LastRLAmt; | ||||
1711 | // If we're doing late masking, then the first bit group always starts | ||||
1712 | // at zero (even if the first bits were zero). | ||||
1713 | if (BitGroups.empty()) | ||||
1714 | LastGroupStartIdx = 0; | ||||
1715 | } | ||||
1716 | |||||
1717 | // If this bit is known to be zero and the current group is a bit group | ||||
1718 | // of zeros, we do not need to terminate the current bit group even the | ||||
1719 | // Value or RLAmt does not match here. Instead, we terminate this group | ||||
1720 | // when the first non-zero bit appears later. | ||||
1721 | if (IsGroupOfZeros && Bits[i].isZero()) | ||||
1722 | continue; | ||||
1723 | |||||
1724 | // If this bit has the same underlying value and the same rotate factor as | ||||
1725 | // the last one, then they're part of the same group. | ||||
1726 | if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) | ||||
1727 | // We cannot continue the current group if this bits is not known to | ||||
1728 | // be zero in a bit group of zeros. | ||||
1729 | if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) | ||||
1730 | continue; | ||||
1731 | |||||
1732 | if (LastValue.getNode()) | ||||
1733 | BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, | ||||
1734 | i-1)); | ||||
1735 | LastRLAmt = ThisRLAmt; | ||||
1736 | LastValue = ThisValue; | ||||
1737 | LastGroupStartIdx = i; | ||||
1738 | IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); | ||||
1739 | } | ||||
1740 | if (LastValue.getNode()) | ||||
1741 | BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, | ||||
1742 | Bits.size()-1)); | ||||
1743 | |||||
1744 | if (BitGroups.empty()) | ||||
1745 | return; | ||||
1746 | |||||
1747 | // We might be able to combine the first and last groups. | ||||
1748 | if (BitGroups.size() > 1) { | ||||
1749 | // If the first and last groups are the same, then remove the first group | ||||
1750 | // in favor of the last group, making the ending index of the last group | ||||
1751 | // equal to the ending index of the to-be-removed first group. | ||||
1752 | if (BitGroups[0].StartIdx == 0 && | ||||
1753 | BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && | ||||
1754 | BitGroups[0].V == BitGroups[BitGroups.size()-1].V && | ||||
1755 | BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { | ||||
1756 | LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining final bit group with initial one\n" ; } } while (false); | ||||
1757 | BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; | ||||
1758 | BitGroups.erase(BitGroups.begin()); | ||||
1759 | } | ||||
1760 | } | ||||
1761 | } | ||||
1762 | |||||
1763 | // Take all (SDValue, RLAmt) pairs and sort them by the number of groups | ||||
1764 | // associated with each. If the number of groups are same, we prefer a group | ||||
1765 | // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate | ||||
1766 | // instruction. If there is a degeneracy, pick the one that occurs | ||||
1767 | // first (in the final value). | ||||
1768 | void collectValueRotInfo() { | ||||
1769 | ValueRots.clear(); | ||||
1770 | |||||
1771 | for (auto &BG : BitGroups) { | ||||
1772 | unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); | ||||
1773 | ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; | ||||
1774 | VRI.V = BG.V; | ||||
1775 | VRI.RLAmt = BG.RLAmt; | ||||
1776 | VRI.Repl32 = BG.Repl32; | ||||
1777 | VRI.NumGroups += 1; | ||||
1778 | VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); | ||||
1779 | } | ||||
1780 | |||||
1781 | // Now that we've collected the various ValueRotInfo instances, we need to | ||||
1782 | // sort them. | ||||
1783 | ValueRotsVec.clear(); | ||||
1784 | for (auto &I : ValueRots) { | ||||
1785 | ValueRotsVec.push_back(I.second); | ||||
1786 | } | ||||
1787 | llvm::sort(ValueRotsVec); | ||||
1788 | } | ||||
1789 | |||||
1790 | // In 64-bit mode, rlwinm and friends have a rotation operator that | ||||
1791 | // replicates the low-order 32 bits into the high-order 32-bits. The mask | ||||
1792 | // indices of these instructions can only be in the lower 32 bits, so they | ||||
1793 | // can only represent some 64-bit bit groups. However, when they can be used, | ||||
1794 | // the 32-bit replication can be used to represent, as a single bit group, | ||||
1795 | // otherwise separate bit groups. We'll convert to replicated-32-bit bit | ||||
1796 | // groups when possible. Returns true if any of the bit groups were | ||||
1797 | // converted. | ||||
1798 | void assignRepl32BitGroups() { | ||||
1799 | // If we have bits like this: | ||||
1800 | // | ||||
1801 | // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 | ||||
1802 | // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 | ||||
1803 | // Groups: | RLAmt = 8 | RLAmt = 40 | | ||||
1804 | // | ||||
1805 | // But, making use of a 32-bit operation that replicates the low-order 32 | ||||
1806 | // bits into the high-order 32 bits, this can be one bit group with a RLAmt | ||||
1807 | // of 8. | ||||
1808 | |||||
1809 | auto IsAllLow32 = [this](BitGroup & BG) { | ||||
1810 | if (BG.StartIdx <= BG.EndIdx) { | ||||
1811 | for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { | ||||
1812 | if (!Bits[i].hasValue()) | ||||
1813 | continue; | ||||
1814 | if (Bits[i].getValueBitIndex() >= 32) | ||||
1815 | return false; | ||||
1816 | } | ||||
1817 | } else { | ||||
1818 | for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { | ||||
1819 | if (!Bits[i].hasValue()) | ||||
1820 | continue; | ||||
1821 | if (Bits[i].getValueBitIndex() >= 32) | ||||
1822 | return false; | ||||
1823 | } | ||||
1824 | for (unsigned i = 0; i <= BG.EndIdx; ++i) { | ||||
1825 | if (!Bits[i].hasValue()) | ||||
1826 | continue; | ||||
1827 | if (Bits[i].getValueBitIndex() >= 32) | ||||
1828 | return false; | ||||
1829 | } | ||||
1830 | } | ||||
1831 | |||||
1832 | return true; | ||||
1833 | }; | ||||
1834 | |||||
1835 | for (auto &BG : BitGroups) { | ||||
1836 | // If this bit group has RLAmt of 0 and will not be merged with | ||||
1837 | // another bit group, we don't benefit from Repl32. We don't mark | ||||
1838 | // such group to give more freedom for later instruction selection. | ||||
1839 | if (BG.RLAmt == 0) { | ||||
1840 | auto PotentiallyMerged = [this](BitGroup & BG) { | ||||
1841 | for (auto &BG2 : BitGroups) | ||||
1842 | if (&BG != &BG2 && BG.V == BG2.V && | ||||
1843 | (BG2.RLAmt == 0 || BG2.RLAmt == 32)) | ||||
1844 | return true; | ||||
1845 | return false; | ||||
1846 | }; | ||||
1847 | if (!PotentiallyMerged(BG)) | ||||
1848 | continue; | ||||
1849 | } | ||||
1850 | if (BG.StartIdx < 32 && BG.EndIdx < 32) { | ||||
1851 | if (IsAllLow32(BG)) { | ||||
1852 | if (BG.RLAmt >= 32) { | ||||
1853 | BG.RLAmt -= 32; | ||||
1854 | BG.Repl32CR = true; | ||||
1855 | } | ||||
1856 | |||||
1857 | BG.Repl32 = true; | ||||
1858 | |||||
1859 | LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for " << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" << BG.StartIdx << ", " << BG .EndIdx << "]\n"; } } while (false) | ||||
1860 | << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for " << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" << BG.StartIdx << ", " << BG .EndIdx << "]\n"; } } while (false) | ||||
1861 | << BG.StartIdx << ", " << BG.EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for " << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" << BG.StartIdx << ", " << BG .EndIdx << "]\n"; } } while (false); | ||||
1862 | } | ||||
1863 | } | ||||
1864 | } | ||||
1865 | |||||
1866 | // Now walk through the bit groups, consolidating where possible. | ||||
1867 | for (auto I = BitGroups.begin(); I != BitGroups.end();) { | ||||
1868 | // We might want to remove this bit group by merging it with the previous | ||||
1869 | // group (which might be the ending group). | ||||
1870 | auto IP = (I == BitGroups.begin()) ? | ||||
1871 | std::prev(BitGroups.end()) : std::prev(I); | ||||
1872 | if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && | ||||
1873 | I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { | ||||
1874 | |||||
1875 | LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP ->StartIdx << ", " << IP->EndIdx << "]\n" ; } } while (false) | ||||
1876 | << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP ->StartIdx << ", " << IP->EndIdx << "]\n" ; } } while (false) | ||||
1877 | << I->StartIdx << ", " << I->EndIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP ->StartIdx << ", " << IP->EndIdx << "]\n" ; } } while (false) | ||||
1878 | << "] with group with range [" << IP->StartIdx << ", "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP ->StartIdx << ", " << IP->EndIdx << "]\n" ; } } while (false) | ||||
1879 | << IP->EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP ->StartIdx << ", " << IP->EndIdx << "]\n" ; } } while (false); | ||||
1880 | |||||
1881 | IP->EndIdx = I->EndIdx; | ||||
1882 | IP->Repl32CR = IP->Repl32CR || I->Repl32CR; | ||||
1883 | IP->Repl32Coalesced = true; | ||||
1884 | I = BitGroups.erase(I); | ||||
1885 | continue; | ||||
1886 | } else { | ||||
1887 | // There is a special case worth handling: If there is a single group | ||||
1888 | // covering the entire upper 32 bits, and it can be merged with both | ||||
1889 | // the next and previous groups (which might be the same group), then | ||||
1890 | // do so. If it is the same group (so there will be only one group in | ||||
1891 | // total), then we need to reverse the order of the range so that it | ||||
1892 | // covers the entire 64 bits. | ||||
1893 | if (I->StartIdx == 32 && I->EndIdx == 63) { | ||||
1894 | assert(std::next(I) == BitGroups.end() &&(static_cast <bool> (std::next(I) == BitGroups.end() && "bit group ends at index 63 but there is another?") ? void ( 0) : __assert_fail ("std::next(I) == BitGroups.end() && \"bit group ends at index 63 but there is another?\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1895, __extension__ __PRETTY_FUNCTION__)) | ||||
1895 | "bit group ends at index 63 but there is another?")(static_cast <bool> (std::next(I) == BitGroups.end() && "bit group ends at index 63 but there is another?") ? void ( 0) : __assert_fail ("std::next(I) == BitGroups.end() && \"bit group ends at index 63 but there is another?\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1895, __extension__ __PRETTY_FUNCTION__)); | ||||
1896 | auto IN = BitGroups.begin(); | ||||
1897 | |||||
1898 | if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && | ||||
1899 | (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && | ||||
1900 | IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && | ||||
1901 | IsAllLow32(*I)) { | ||||
1902 | |||||
1903 | LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false) | ||||
1904 | << " RLAmt = " << I->RLAmt << " [" << I->StartIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false) | ||||
1905 | << ", " << I->EndIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false) | ||||
1906 | << "] with 32-bit replicated groups with ranges ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false) | ||||
1907 | << IP->StartIdx << ", " << IP->EndIdx << "] and ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false) | ||||
1908 | << IN->StartIdx << ", " << IN->EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false); | ||||
1909 | |||||
1910 | if (IP == IN) { | ||||
1911 | // There is only one other group; change it to cover the whole | ||||
1912 | // range (backward, so that it can still be Repl32 but cover the | ||||
1913 | // whole 64-bit range). | ||||
1914 | IP->StartIdx = 31; | ||||
1915 | IP->EndIdx = 30; | ||||
1916 | IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; | ||||
1917 | IP->Repl32Coalesced = true; | ||||
1918 | I = BitGroups.erase(I); | ||||
1919 | } else { | ||||
1920 | // There are two separate groups, one before this group and one | ||||
1921 | // after us (at the beginning). We're going to remove this group, | ||||
1922 | // but also the group at the very beginning. | ||||
1923 | IP->EndIdx = IN->EndIdx; | ||||
1924 | IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; | ||||
1925 | IP->Repl32Coalesced = true; | ||||
1926 | I = BitGroups.erase(I); | ||||
1927 | BitGroups.erase(BitGroups.begin()); | ||||
1928 | } | ||||
1929 | |||||
1930 | // This must be the last group in the vector (and we might have | ||||
1931 | // just invalidated the iterator above), so break here. | ||||
1932 | break; | ||||
1933 | } | ||||
1934 | } | ||||
1935 | } | ||||
1936 | |||||
1937 | ++I; | ||||
1938 | } | ||||
1939 | } | ||||
1940 | |||||
1941 | SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { | ||||
1942 | return CurDAG->getTargetConstant(Imm, dl, MVT::i32); | ||||
1943 | } | ||||
1944 | |||||
1945 | uint64_t getZerosMask() { | ||||
1946 | uint64_t Mask = 0; | ||||
1947 | for (unsigned i = 0; i < Bits.size(); ++i) { | ||||
1948 | if (Bits[i].hasValue()) | ||||
1949 | continue; | ||||
1950 | Mask |= (UINT64_C(1)1UL << i); | ||||
1951 | } | ||||
1952 | |||||
1953 | return ~Mask; | ||||
1954 | } | ||||
1955 | |||||
1956 | // This method extends an input value to 64 bit if input is 32-bit integer. | ||||
1957 | // While selecting instructions in BitPermutationSelector in 64-bit mode, | ||||
1958 | // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. | ||||
1959 | // In such case, we extend it to 64 bit to be consistent with other values. | ||||
1960 | SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { | ||||
1961 | if (V.getValueSizeInBits() == 64) | ||||
1962 | return V; | ||||
1963 | |||||
1964 | assert(V.getValueSizeInBits() == 32)(static_cast <bool> (V.getValueSizeInBits() == 32) ? void (0) : __assert_fail ("V.getValueSizeInBits() == 32", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1964, __extension__ __PRETTY_FUNCTION__)); | ||||
1965 | SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); | ||||
1966 | SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, | ||||
1967 | MVT::i64), 0); | ||||
1968 | SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, | ||||
1969 | MVT::i64, ImDef, V, | ||||
1970 | SubRegIdx), 0); | ||||
1971 | return ExtVal; | ||||
1972 | } | ||||
1973 | |||||
1974 | SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { | ||||
1975 | if (V.getValueSizeInBits() == 32) | ||||
1976 | return V; | ||||
1977 | |||||
1978 | assert(V.getValueSizeInBits() == 64)(static_cast <bool> (V.getValueSizeInBits() == 64) ? void (0) : __assert_fail ("V.getValueSizeInBits() == 64", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1978, __extension__ __PRETTY_FUNCTION__)); | ||||
1979 | SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); | ||||
1980 | SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, | ||||
1981 | MVT::i32, V, SubRegIdx), 0); | ||||
1982 | return SubVal; | ||||
1983 | } | ||||
1984 | |||||
1985 | // Depending on the number of groups for a particular value, it might be | ||||
1986 | // better to rotate, mask explicitly (using andi/andis), and then or the | ||||
1987 | // result. Select this part of the result first. | ||||
1988 | void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { | ||||
1989 | if (BPermRewriterNoMasking) | ||||
1990 | return; | ||||
1991 | |||||
1992 | for (ValueRotInfo &VRI : ValueRotsVec) { | ||||
1993 | unsigned Mask = 0; | ||||
1994 | for (unsigned i = 0; i < Bits.size(); ++i) { | ||||
1995 | if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) | ||||
1996 | continue; | ||||
1997 | if (RLAmt[i] != VRI.RLAmt) | ||||
1998 | continue; | ||||
1999 | Mask |= (1u << i); | ||||
2000 | } | ||||
2001 | |||||
2002 | // Compute the masks for andi/andis that would be necessary. | ||||
2003 | unsigned ANDIMask = (Mask & UINT16_MAX(65535)), ANDISMask = Mask >> 16; | ||||
2004 | assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask for value bit groups") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask for value bit groups\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2005, __extension__ __PRETTY_FUNCTION__)) | ||||
2005 | "No set bits in mask for value bit groups")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask for value bit groups") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask for value bit groups\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2005, __extension__ __PRETTY_FUNCTION__)); | ||||
2006 | bool NeedsRotate = VRI.RLAmt != 0; | ||||
2007 | |||||
2008 | // We're trying to minimize the number of instructions. If we have one | ||||
2009 | // group, using one of andi/andis can break even. If we have three | ||||
2010 | // groups, we can use both andi and andis and break even (to use both | ||||
2011 | // andi and andis we also need to or the results together). We need four | ||||
2012 | // groups if we also need to rotate. To use andi/andis we need to do more | ||||
2013 | // than break even because rotate-and-mask instructions tend to be easier | ||||
2014 | // to schedule. | ||||
2015 | |||||
2016 | // FIXME: We've biased here against using andi/andis, which is right for | ||||
2017 | // POWER cores, but not optimal everywhere. For example, on the A2, | ||||
2018 | // andi/andis have single-cycle latency whereas the rotate-and-mask | ||||
2019 | // instructions take two cycles, and it would be better to bias toward | ||||
2020 | // andi/andis in break-even cases. | ||||
2021 | |||||
2022 | unsigned NumAndInsts = (unsigned) NeedsRotate + | ||||
2023 | (unsigned) (ANDIMask != 0) + | ||||
2024 | (unsigned) (ANDISMask != 0) + | ||||
2025 | (unsigned) (ANDIMask != 0 && ANDISMask != 0) + | ||||
2026 | (unsigned) (bool) Res; | ||||
2027 | |||||
2028 | LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << ":" << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << VRI.NumGroups << "\n"; } } while (false) | ||||
2029 | << " RL: " << VRI.RLAmt << ":"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << ":" << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << VRI.NumGroups << "\n"; } } while (false) | ||||
2030 | << "\n\t\t\tisel using masking: " << NumAndInstsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << ":" << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << VRI.NumGroups << "\n"; } } while (false) | ||||
2031 | << " using rotates: " << VRI.NumGroups << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << ":" << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << VRI.NumGroups << "\n"; } } while (false); | ||||
2032 | |||||
2033 | if (NumAndInsts >= VRI.NumGroups) | ||||
2034 | continue; | ||||
2035 | |||||
2036 | LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\t\t\tusing masking\n"; } } while (false); | ||||
2037 | |||||
2038 | if (InstCnt) *InstCnt += NumAndInsts; | ||||
2039 | |||||
2040 | SDValue VRot; | ||||
2041 | if (VRI.RLAmt) { | ||||
2042 | SDValue Ops[] = | ||||
2043 | { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), | ||||
2044 | getI32Imm(0, dl), getI32Imm(31, dl) }; | ||||
2045 | VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, | ||||
2046 | Ops), 0); | ||||
2047 | } else { | ||||
2048 | VRot = TruncateToInt32(VRI.V, dl); | ||||
2049 | } | ||||
2050 | |||||
2051 | SDValue ANDIVal, ANDISVal; | ||||
2052 | if (ANDIMask != 0) | ||||
2053 | ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, | ||||
2054 | VRot, getI32Imm(ANDIMask, dl)), | ||||
2055 | 0); | ||||
2056 | if (ANDISMask != 0) | ||||
2057 | ANDISVal = | ||||
2058 | SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot, | ||||
2059 | getI32Imm(ANDISMask, dl)), | ||||
2060 | 0); | ||||
2061 | |||||
2062 | SDValue TotalVal; | ||||
2063 | if (!ANDIVal) | ||||
2064 | TotalVal = ANDISVal; | ||||
2065 | else if (!ANDISVal) | ||||
2066 | TotalVal = ANDIVal; | ||||
2067 | else | ||||
2068 | TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, | ||||
2069 | ANDIVal, ANDISVal), 0); | ||||
2070 | |||||
2071 | if (!Res) | ||||
2072 | Res = TotalVal; | ||||
2073 | else | ||||
2074 | Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, | ||||
2075 | Res, TotalVal), 0); | ||||
2076 | |||||
2077 | // Now, remove all groups with this underlying value and rotation | ||||
2078 | // factor. | ||||
2079 | eraseMatchingBitGroups([VRI](const BitGroup &BG) { | ||||
2080 | return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; | ||||
2081 | }); | ||||
2082 | } | ||||
2083 | } | ||||
2084 | |||||
2085 | // Instruction selection for the 32-bit case. | ||||
2086 | SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { | ||||
2087 | SDLoc dl(N); | ||||
2088 | SDValue Res; | ||||
2089 | |||||
2090 | if (InstCnt) *InstCnt = 0; | ||||
2091 | |||||
2092 | // Take care of cases that should use andi/andis first. | ||||
2093 | SelectAndParts32(dl, Res, InstCnt); | ||||
2094 | |||||
2095 | // If we've not yet selected a 'starting' instruction, and we have no zeros | ||||
2096 | // to fill in, select the (Value, RLAmt) with the highest priority (largest | ||||
2097 | // number of groups), and start with this rotated value. | ||||
2098 | if ((!NeedMask || LateMask) && !Res) { | ||||
2099 | ValueRotInfo &VRI = ValueRotsVec[0]; | ||||
2100 | if (VRI.RLAmt) { | ||||
2101 | if (InstCnt) *InstCnt += 1; | ||||
2102 | SDValue Ops[] = | ||||
2103 | { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), | ||||
2104 | getI32Imm(0, dl), getI32Imm(31, dl) }; | ||||
2105 | Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), | ||||
2106 | 0); | ||||
2107 | } else { | ||||
2108 | Res = TruncateToInt32(VRI.V, dl); | ||||
2109 | } | ||||
2110 | |||||
2111 | // Now, remove all groups with this underlying value and rotation factor. | ||||
2112 | eraseMatchingBitGroups([VRI](const BitGroup &BG) { | ||||
2113 | return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; | ||||
2114 | }); | ||||
2115 | } | ||||
2116 | |||||
2117 | if (InstCnt) *InstCnt += BitGroups.size(); | ||||
2118 | |||||
2119 | // Insert the other groups (one at a time). | ||||
2120 | for (auto &BG : BitGroups) { | ||||
2121 | if (!Res) { | ||||
2122 | SDValue Ops[] = | ||||
2123 | { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), | ||||
2124 | getI32Imm(Bits.size() - BG.EndIdx - 1, dl), | ||||
2125 | getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; | ||||
2126 | Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); | ||||
2127 | } else { | ||||
2128 | SDValue Ops[] = | ||||
2129 | { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), | ||||
2130 | getI32Imm(Bits.size() - BG.EndIdx - 1, dl), | ||||
2131 | getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; | ||||
2132 | Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); | ||||
2133 | } | ||||
2134 | } | ||||
2135 | |||||
2136 | if (LateMask) { | ||||
2137 | unsigned Mask = (unsigned) getZerosMask(); | ||||
2138 | |||||
2139 | unsigned ANDIMask = (Mask & UINT16_MAX(65535)), ANDISMask = Mask >> 16; | ||||
2140 | assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in zeros mask?") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in zeros mask?\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2141, __extension__ __PRETTY_FUNCTION__)) | ||||
2141 | "No set bits in zeros mask?")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in zeros mask?") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in zeros mask?\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2141, __extension__ __PRETTY_FUNCTION__)); | ||||
2142 | |||||
2143 | if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + | ||||
2144 | (unsigned) (ANDISMask != 0) + | ||||
2145 | (unsigned) (ANDIMask != 0 && ANDISMask != 0); | ||||
2146 | |||||
2147 | SDValue ANDIVal, ANDISVal; | ||||
2148 | if (ANDIMask != 0) | ||||
2149 | ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, | ||||
2150 | Res, getI32Imm(ANDIMask, dl)), | ||||
2151 | 0); | ||||
2152 | if (ANDISMask != 0) | ||||
2153 | ANDISVal = | ||||
2154 | SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res, | ||||
2155 | getI32Imm(ANDISMask, dl)), | ||||
2156 | 0); | ||||
2157 | |||||
2158 | if (!ANDIVal) | ||||
2159 | Res = ANDISVal; | ||||
2160 | else if (!ANDISVal) | ||||
2161 | Res = ANDIVal; | ||||
2162 | else | ||||
2163 | Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, | ||||
2164 | ANDIVal, ANDISVal), 0); | ||||
2165 | } | ||||
2166 | |||||
2167 | return Res.getNode(); | ||||
2168 | } | ||||
2169 | |||||
2170 | unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, | ||||
2171 | unsigned MaskStart, unsigned MaskEnd, | ||||
2172 | bool IsIns) { | ||||
2173 | // In the notation used by the instructions, 'start' and 'end' are reversed | ||||
2174 | // because bits are counted from high to low order. | ||||
2175 | unsigned InstMaskStart = 64 - MaskEnd - 1, | ||||
2176 | InstMaskEnd = 64 - MaskStart - 1; | ||||
2177 | |||||
2178 | if (Repl32) | ||||
2179 | return 1; | ||||
2180 | |||||
2181 | if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || | ||||
2182 | InstMaskEnd == 63 - RLAmt) | ||||
2183 | return 1; | ||||
2184 | |||||
2185 | return 2; | ||||
2186 | } | ||||
2187 | |||||
2188 | // For 64-bit values, not all combinations of rotates and masks are | ||||
2189 | // available. Produce one if it is available. | ||||
2190 | SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, | ||||
2191 | bool Repl32, unsigned MaskStart, unsigned MaskEnd, | ||||
2192 | unsigned *InstCnt = nullptr) { | ||||
2193 | // In the notation used by the instructions, 'start' and 'end' are reversed | ||||
2194 | // because bits are counted from high to low order. | ||||
2195 | unsigned InstMaskStart = 64 - MaskEnd - 1, | ||||
2196 | InstMaskEnd = 64 - MaskStart - 1; | ||||
2197 | |||||
2198 | if (InstCnt) *InstCnt += 1; | ||||
2199 | |||||
2200 | if (Repl32) { | ||||
2201 | // This rotation amount assumes that the lower 32 bits of the quantity | ||||
2202 | // are replicated in the high 32 bits by the rotation operator (which is | ||||
2203 | // done by rlwinm and friends). | ||||
2204 | assert(InstMaskStart >= 32 && "Mask cannot start out of range")(static_cast <bool> (InstMaskStart >= 32 && "Mask cannot start out of range" ) ? void (0) : __assert_fail ("InstMaskStart >= 32 && \"Mask cannot start out of range\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2204, __extension__ __PRETTY_FUNCTION__)); | ||||
2205 | assert(InstMaskEnd >= 32 && "Mask cannot end out of range")(static_cast <bool> (InstMaskEnd >= 32 && "Mask cannot end out of range" ) ? void (0) : __assert_fail ("InstMaskEnd >= 32 && \"Mask cannot end out of range\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2205, __extension__ __PRETTY_FUNCTION__)); | ||||
2206 | SDValue Ops[] = | ||||
2207 | { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2208 | getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; | ||||
2209 | return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, | ||||
2210 | Ops), 0); | ||||
2211 | } | ||||
2212 | |||||
2213 | if (InstMaskEnd == 63) { | ||||
2214 | SDValue Ops[] = | ||||
2215 | { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2216 | getI32Imm(InstMaskStart, dl) }; | ||||
2217 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); | ||||
2218 | } | ||||
2219 | |||||
2220 | if (InstMaskStart == 0) { | ||||
2221 | SDValue Ops[] = | ||||
2222 | { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2223 | getI32Imm(InstMaskEnd, dl) }; | ||||
2224 | return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); | ||||
2225 | } | ||||
2226 | |||||
2227 | if (InstMaskEnd == 63 - RLAmt) { | ||||
2228 | SDValue Ops[] = | ||||
2229 | { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2230 | getI32Imm(InstMaskStart, dl) }; | ||||
2231 | return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); | ||||
2232 | } | ||||
2233 | |||||
2234 | // We cannot do this with a single instruction, so we'll use two. The | ||||
2235 | // problem is that we're not free to choose both a rotation amount and mask | ||||
2236 | // start and end independently. We can choose an arbitrary mask start and | ||||
2237 | // end, but then the rotation amount is fixed. Rotation, however, can be | ||||
2238 | // inverted, and so by applying an "inverse" rotation first, we can get the | ||||
2239 | // desired result. | ||||
2240 | if (InstCnt) *InstCnt += 1; | ||||
2241 | |||||
2242 | // The rotation mask for the second instruction must be MaskStart. | ||||
2243 | unsigned RLAmt2 = MaskStart; | ||||
2244 | // The first instruction must rotate V so that the overall rotation amount | ||||
2245 | // is RLAmt. | ||||
2246 | unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; | ||||
2247 | if (RLAmt1) | ||||
2248 | V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); | ||||
2249 | return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); | ||||
2250 | } | ||||
2251 | |||||
2252 | // For 64-bit values, not all combinations of rotates and masks are | ||||
2253 | // available. Produce a rotate-mask-and-insert if one is available. | ||||
2254 | SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, | ||||
2255 | unsigned RLAmt, bool Repl32, unsigned MaskStart, | ||||
2256 | unsigned MaskEnd, unsigned *InstCnt = nullptr) { | ||||
2257 | // In the notation used by the instructions, 'start' and 'end' are reversed | ||||
2258 | // because bits are counted from high to low order. | ||||
2259 | unsigned InstMaskStart = 64 - MaskEnd - 1, | ||||
2260 | InstMaskEnd = 64 - MaskStart - 1; | ||||
2261 | |||||
2262 | if (InstCnt) *InstCnt += 1; | ||||
2263 | |||||
2264 | if (Repl32) { | ||||
2265 | // This rotation amount assumes that the lower 32 bits of the quantity | ||||
2266 | // are replicated in the high 32 bits by the rotation operator (which is | ||||
2267 | // done by rlwinm and friends). | ||||
2268 | assert(InstMaskStart >= 32 && "Mask cannot start out of range")(static_cast <bool> (InstMaskStart >= 32 && "Mask cannot start out of range" ) ? void (0) : __assert_fail ("InstMaskStart >= 32 && \"Mask cannot start out of range\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2268, __extension__ __PRETTY_FUNCTION__)); | ||||
2269 | assert(InstMaskEnd >= 32 && "Mask cannot end out of range")(static_cast <bool> (InstMaskEnd >= 32 && "Mask cannot end out of range" ) ? void (0) : __assert_fail ("InstMaskEnd >= 32 && \"Mask cannot end out of range\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2269, __extension__ __PRETTY_FUNCTION__)); | ||||
2270 | SDValue Ops[] = | ||||
2271 | { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2272 | getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; | ||||
2273 | return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, | ||||
2274 | Ops), 0); | ||||
2275 | } | ||||
2276 | |||||
2277 | if (InstMaskEnd == 63 - RLAmt) { | ||||
2278 | SDValue Ops[] = | ||||
2279 | { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2280 | getI32Imm(InstMaskStart, dl) }; | ||||
2281 | return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); | ||||
2282 | } | ||||
2283 | |||||
2284 | // We cannot do this with a single instruction, so we'll use two. The | ||||
2285 | // problem is that we're not free to choose both a rotation amount and mask | ||||
2286 | // start and end independently. We can choose an arbitrary mask start and | ||||
2287 | // end, but then the rotation amount is fixed. Rotation, however, can be | ||||
2288 | // inverted, and so by applying an "inverse" rotation first, we can get the | ||||
2289 | // desired result. | ||||
2290 | if (InstCnt) *InstCnt += 1; | ||||
2291 | |||||
2292 | // The rotation mask for the second instruction must be MaskStart. | ||||
2293 | unsigned RLAmt2 = MaskStart; | ||||
2294 | // The first instruction must rotate V so that the overall rotation amount | ||||
2295 | // is RLAmt. | ||||
2296 | unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; | ||||
2297 | if (RLAmt1) | ||||
2298 | V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); | ||||
2299 | return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); | ||||
2300 | } | ||||
2301 | |||||
2302 | void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { | ||||
2303 | if (BPermRewriterNoMasking) | ||||
2304 | return; | ||||
2305 | |||||
2306 | // The idea here is the same as in the 32-bit version, but with additional | ||||
2307 | // complications from the fact that Repl32 might be true. Because we | ||||
2308 | // aggressively convert bit groups to Repl32 form (which, for small | ||||
2309 | // rotation factors, involves no other change), and then coalesce, it might | ||||
2310 | // be the case that a single 64-bit masking operation could handle both | ||||
2311 | // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 | ||||
2312 | // form allowed coalescing, then we must use a 32-bit rotaton in order to | ||||
2313 | // completely capture the new combined bit group. | ||||
2314 | |||||
2315 | for (ValueRotInfo &VRI : ValueRotsVec) { | ||||
2316 | uint64_t Mask = 0; | ||||
2317 | |||||
2318 | // We need to add to the mask all bits from the associated bit groups. | ||||
2319 | // If Repl32 is false, we need to add bits from bit groups that have | ||||
2320 | // Repl32 true, but are trivially convertable to Repl32 false. Such a | ||||
2321 | // group is trivially convertable if it overlaps only with the lower 32 | ||||
2322 | // bits, and the group has not been coalesced. | ||||
2323 | auto MatchingBG = [VRI](const BitGroup &BG) { | ||||
2324 | if (VRI.V != BG.V) | ||||
2325 | return false; | ||||
2326 | |||||
2327 | unsigned EffRLAmt = BG.RLAmt; | ||||
2328 | if (!VRI.Repl32 && BG.Repl32) { | ||||
2329 | if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && | ||||
2330 | !BG.Repl32Coalesced) { | ||||
2331 | if (BG.Repl32CR) | ||||
2332 | EffRLAmt += 32; | ||||
2333 | } else { | ||||
2334 | return false; | ||||
2335 | } | ||||
2336 | } else if (VRI.Repl32 != BG.Repl32) { | ||||
2337 | return false; | ||||
2338 | } | ||||
2339 | |||||
2340 | return VRI.RLAmt == EffRLAmt; | ||||
2341 | }; | ||||
2342 | |||||
2343 | for (auto &BG : BitGroups) { | ||||
2344 | if (!MatchingBG(BG)) | ||||
2345 | continue; | ||||
2346 | |||||
2347 | if (BG.StartIdx <= BG.EndIdx) { | ||||
2348 | for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) | ||||
2349 | Mask |= (UINT64_C(1)1UL << i); | ||||
2350 | } else { | ||||
2351 | for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) | ||||
2352 | Mask |= (UINT64_C(1)1UL << i); | ||||
2353 | for (unsigned i = 0; i <= BG.EndIdx; ++i) | ||||
2354 | Mask |= (UINT64_C(1)1UL << i); | ||||
2355 | } | ||||
2356 | } | ||||
2357 | |||||
2358 | // We can use the 32-bit andi/andis technique if the mask does not | ||||
2359 | // require any higher-order bits. This can save an instruction compared | ||||
2360 | // to always using the general 64-bit technique. | ||||
2361 | bool Use32BitInsts = isUInt<32>(Mask); | ||||
2362 | // Compute the masks for andi/andis that would be necessary. | ||||
2363 | unsigned ANDIMask = (Mask & UINT16_MAX(65535)), | ||||
2364 | ANDISMask = (Mask >> 16) & UINT16_MAX(65535); | ||||
2365 | |||||
2366 | bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); | ||||
2367 | |||||
2368 | unsigned NumAndInsts = (unsigned) NeedsRotate + | ||||
2369 | (unsigned) (bool) Res; | ||||
2370 | unsigned NumOfSelectInsts = 0; | ||||
2371 | selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts); | ||||
2372 | assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.")(static_cast <bool> (NumOfSelectInsts > 0 && "Failed to select an i64 constant.") ? void (0) : __assert_fail ("NumOfSelectInsts > 0 && \"Failed to select an i64 constant.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2372, __extension__ __PRETTY_FUNCTION__)); | ||||
2373 | if (Use32BitInsts) | ||||
2374 | NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + | ||||
2375 | (unsigned) (ANDIMask != 0 && ANDISMask != 0); | ||||
2376 | else | ||||
2377 | NumAndInsts += NumOfSelectInsts + /* and */ 1; | ||||
2378 | |||||
2379 | unsigned NumRLInsts = 0; | ||||
2380 | bool FirstBG = true; | ||||
2381 | bool MoreBG = false; | ||||
2382 | for (auto &BG : BitGroups) { | ||||
2383 | if (!MatchingBG(BG)) { | ||||
2384 | MoreBG = true; | ||||
2385 | continue; | ||||
2386 | } | ||||
2387 | NumRLInsts += | ||||
2388 | SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, | ||||
2389 | !FirstBG); | ||||
2390 | FirstBG = false; | ||||
2391 | } | ||||
2392 | |||||
2393 | LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << NumRLInsts << "\n"; } } while (false) | ||||
2394 | << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << NumRLInsts << "\n"; } } while (false) | ||||
2395 | << "\n\t\t\tisel using masking: " << NumAndInstsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << NumRLInsts << "\n"; } } while (false) | ||||
2396 | << " using rotates: " << NumRLInsts << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << NumRLInsts << "\n"; } } while (false); | ||||
2397 | |||||
2398 | // When we'd use andi/andis, we bias toward using the rotates (andi only | ||||
2399 | // has a record form, and is cracked on POWER cores). However, when using | ||||
2400 | // general 64-bit constant formation, bias toward the constant form, | ||||
2401 | // because that exposes more opportunities for CSE. | ||||
2402 | if (NumAndInsts > NumRLInsts) | ||||
2403 | continue; | ||||
2404 | // When merging multiple bit groups, instruction or is used. | ||||
2405 | // But when rotate is used, rldimi can inert the rotated value into any | ||||
2406 | // register, so instruction or can be avoided. | ||||
2407 | if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) | ||||
2408 | continue; | ||||
2409 | |||||
2410 | LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\t\t\tusing masking\n"; } } while (false); | ||||
2411 | |||||
2412 | if (InstCnt) *InstCnt += NumAndInsts; | ||||
2413 | |||||
2414 | SDValue VRot; | ||||
2415 | // We actually need to generate a rotation if we have a non-zero rotation | ||||
2416 | // factor or, in the Repl32 case, if we care about any of the | ||||
2417 | // higher-order replicated bits. In the latter case, we generate a mask | ||||
2418 | // backward so that it actually includes the entire 64 bits. | ||||
2419 | if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) | ||||
2420 | VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, | ||||
2421 | VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); | ||||
2422 | else | ||||
2423 | VRot = VRI.V; | ||||
2424 | |||||
2425 | SDValue TotalVal; | ||||
2426 | if (Use32BitInsts) { | ||||
2427 | assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value" ) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2428, __extension__ __PRETTY_FUNCTION__)) | ||||
2428 | "No set bits in mask when using 32-bit ands for 64-bit value")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value" ) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2428, __extension__ __PRETTY_FUNCTION__)); | ||||
2429 | |||||
2430 | SDValue ANDIVal, ANDISVal; | ||||
2431 | if (ANDIMask != 0) | ||||
2432 | ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, | ||||
2433 | ExtendToInt64(VRot, dl), | ||||
2434 | getI32Imm(ANDIMask, dl)), | ||||
2435 | 0); | ||||
2436 | if (ANDISMask != 0) | ||||
2437 | ANDISVal = | ||||
2438 | SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, | ||||
2439 | ExtendToInt64(VRot, dl), | ||||
2440 | getI32Imm(ANDISMask, dl)), | ||||
2441 | 0); | ||||
2442 | |||||
2443 | if (!ANDIVal) | ||||
2444 | TotalVal = ANDISVal; | ||||
2445 | else if (!ANDISVal) | ||||
2446 | TotalVal = ANDIVal; | ||||
2447 | else | ||||
2448 | TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, | ||||
2449 | ExtendToInt64(ANDIVal, dl), ANDISVal), 0); | ||||
2450 | } else { | ||||
2451 | TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); | ||||
2452 | TotalVal = | ||||
2453 | SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, | ||||
2454 | ExtendToInt64(VRot, dl), TotalVal), | ||||
2455 | 0); | ||||
2456 | } | ||||
2457 | |||||
2458 | if (!Res) | ||||
2459 | Res = TotalVal; | ||||
2460 | else | ||||
2461 | Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, | ||||
2462 | ExtendToInt64(Res, dl), TotalVal), | ||||
2463 | 0); | ||||
2464 | |||||
2465 | // Now, remove all groups with this underlying value and rotation | ||||
2466 | // factor. | ||||
2467 | eraseMatchingBitGroups(MatchingBG); | ||||
2468 | } | ||||
2469 | } | ||||
2470 | |||||
2471 | // Instruction selection for the 64-bit case. | ||||
2472 | SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { | ||||
2473 | SDLoc dl(N); | ||||
2474 | SDValue Res; | ||||
2475 | |||||
2476 | if (InstCnt) *InstCnt = 0; | ||||
2477 | |||||
2478 | // Take care of cases that should use andi/andis first. | ||||
2479 | SelectAndParts64(dl, Res, InstCnt); | ||||
2480 | |||||
2481 | // If we've not yet selected a 'starting' instruction, and we have no zeros | ||||
2482 | // to fill in, select the (Value, RLAmt) with the highest priority (largest | ||||
2483 | // number of groups), and start with this rotated value. | ||||
2484 | if ((!NeedMask || LateMask) && !Res) { | ||||
2485 | // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 | ||||
2486 | // groups will come first, and so the VRI representing the largest number | ||||
2487 | // of groups might not be first (it might be the first Repl32 groups). | ||||
2488 | unsigned MaxGroupsIdx = 0; | ||||
2489 | if (!ValueRotsVec[0].Repl32) { | ||||
2490 | for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) | ||||
2491 | if (ValueRotsVec[i].Repl32) { | ||||
2492 | if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) | ||||
2493 | MaxGroupsIdx = i; | ||||
2494 | break; | ||||
2495 | } | ||||
2496 | } | ||||
2497 | |||||
2498 | ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; | ||||
2499 | bool NeedsRotate = false; | ||||
2500 | if (VRI.RLAmt) { | ||||
2501 | NeedsRotate = true; | ||||
2502 | } else if (VRI.Repl32) { | ||||
2503 | for (auto &BG : BitGroups) { | ||||
2504 | if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || | ||||
2505 | BG.Repl32 != VRI.Repl32) | ||||
2506 | continue; | ||||
2507 | |||||
2508 | // We don't need a rotate if the bit group is confined to the lower | ||||
2509 | // 32 bits. | ||||
2510 | if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) | ||||
2511 | continue; | ||||
2512 | |||||
2513 | NeedsRotate = true; | ||||
2514 | break; | ||||
2515 | } | ||||
2516 | } | ||||
2517 | |||||
2518 | if (NeedsRotate) | ||||
2519 | Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, | ||||
2520 | VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, | ||||
2521 | InstCnt); | ||||
2522 | else | ||||
2523 | Res = VRI.V; | ||||
2524 | |||||
2525 | // Now, remove all groups with this underlying value and rotation factor. | ||||
2526 | if (Res) | ||||
2527 | eraseMatchingBitGroups([VRI](const BitGroup &BG) { | ||||
2528 | return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && | ||||
2529 | BG.Repl32 == VRI.Repl32; | ||||
2530 | }); | ||||
2531 | } | ||||
2532 | |||||
2533 | // Because 64-bit rotates are more flexible than inserts, we might have a | ||||
2534 | // preference regarding which one we do first (to save one instruction). | ||||
2535 | if (!Res) | ||||
2536 | for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { | ||||
2537 | if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, | ||||
2538 | false) < | ||||
2539 | SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, | ||||
2540 | true)) { | ||||
2541 | if (I != BitGroups.begin()) { | ||||
2542 | BitGroup BG = *I; | ||||
2543 | BitGroups.erase(I); | ||||
2544 | BitGroups.insert(BitGroups.begin(), BG); | ||||
2545 | } | ||||
2546 | |||||
2547 | break; | ||||
2548 | } | ||||
2549 | } | ||||
2550 | |||||
2551 | // Insert the other groups (one at a time). | ||||
2552 | for (auto &BG : BitGroups) { | ||||
2553 | if (!Res) | ||||
2554 | Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, | ||||
2555 | BG.EndIdx, InstCnt); | ||||
2556 | else | ||||
2557 | Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, | ||||
2558 | BG.StartIdx, BG.EndIdx, InstCnt); | ||||
2559 | } | ||||
2560 | |||||
2561 | if (LateMask) { | ||||
2562 | uint64_t Mask = getZerosMask(); | ||||
2563 | |||||
2564 | // We can use the 32-bit andi/andis technique if the mask does not | ||||
2565 | // require any higher-order bits. This can save an instruction compared | ||||
2566 | // to always using the general 64-bit technique. | ||||
2567 | bool Use32BitInsts = isUInt<32>(Mask); | ||||
2568 | // Compute the masks for andi/andis that would be necessary. | ||||
2569 | unsigned ANDIMask = (Mask & UINT16_MAX(65535)), | ||||
2570 | ANDISMask = (Mask >> 16) & UINT16_MAX(65535); | ||||
2571 | |||||
2572 | if (Use32BitInsts) { | ||||
2573 | assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value" ) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2574, __extension__ __PRETTY_FUNCTION__)) | ||||
2574 | "No set bits in mask when using 32-bit ands for 64-bit value")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value" ) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2574, __extension__ __PRETTY_FUNCTION__)); | ||||
2575 | |||||
2576 | if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + | ||||
2577 | (unsigned) (ANDISMask != 0) + | ||||
2578 | (unsigned) (ANDIMask != 0 && ANDISMask != 0); | ||||
2579 | |||||
2580 | SDValue ANDIVal, ANDISVal; | ||||
2581 | if (ANDIMask != 0) | ||||
2582 | ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, | ||||
2583 | ExtendToInt64(Res, dl), | ||||
2584 | getI32Imm(ANDIMask, dl)), | ||||
2585 | 0); | ||||
2586 | if (ANDISMask != 0) | ||||
2587 | ANDISVal = | ||||
2588 | SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, | ||||
2589 | ExtendToInt64(Res, dl), | ||||
2590 | getI32Imm(ANDISMask, dl)), | ||||
2591 | 0); | ||||
2592 | |||||
2593 | if (!ANDIVal) | ||||
2594 | Res = ANDISVal; | ||||
2595 | else if (!ANDISVal) | ||||
2596 | Res = ANDIVal; | ||||
2597 | else | ||||
2598 | Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, | ||||
2599 | ExtendToInt64(ANDIVal, dl), ANDISVal), 0); | ||||
2600 | } else { | ||||
2601 | unsigned NumOfSelectInsts = 0; | ||||
2602 | SDValue MaskVal = | ||||
2603 | SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0); | ||||
2604 | Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, | ||||
2605 | ExtendToInt64(Res, dl), MaskVal), | ||||
2606 | 0); | ||||
2607 | if (InstCnt) | ||||
2608 | *InstCnt += NumOfSelectInsts + /* and */ 1; | ||||
2609 | } | ||||
2610 | } | ||||
2611 | |||||
2612 | return Res.getNode(); | ||||
2613 | } | ||||
2614 | |||||
2615 | SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { | ||||
2616 | // Fill in BitGroups. | ||||
2617 | collectBitGroups(LateMask); | ||||
2618 | if (BitGroups.empty()) | ||||
2619 | return nullptr; | ||||
2620 | |||||
2621 | // For 64-bit values, figure out when we can use 32-bit instructions. | ||||
2622 | if (Bits.size() == 64) | ||||
2623 | assignRepl32BitGroups(); | ||||
2624 | |||||
2625 | // Fill in ValueRotsVec. | ||||
2626 | collectValueRotInfo(); | ||||
2627 | |||||
2628 | if (Bits.size() == 32) { | ||||
2629 | return Select32(N, LateMask, InstCnt); | ||||
2630 | } else { | ||||
2631 | assert(Bits.size() == 64 && "Not 64 bits here?")(static_cast <bool> (Bits.size() == 64 && "Not 64 bits here?" ) ? void (0) : __assert_fail ("Bits.size() == 64 && \"Not 64 bits here?\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2631, __extension__ __PRETTY_FUNCTION__)); | ||||
2632 | return Select64(N, LateMask, InstCnt); | ||||
2633 | } | ||||
2634 | |||||
2635 | return nullptr; | ||||
2636 | } | ||||
2637 | |||||
2638 | void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { | ||||
2639 | erase_if(BitGroups, F); | ||||
2640 | } | ||||
2641 | |||||
2642 | SmallVector<ValueBit, 64> Bits; | ||||
2643 | |||||
2644 | bool NeedMask = false; | ||||
2645 | SmallVector<unsigned, 64> RLAmt; | ||||
2646 | |||||
2647 | SmallVector<BitGroup, 16> BitGroups; | ||||
2648 | |||||
2649 | DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots; | ||||
2650 | SmallVector<ValueRotInfo, 16> ValueRotsVec; | ||||
2651 | |||||
2652 | SelectionDAG *CurDAG = nullptr; | ||||
2653 | |||||
2654 | public: | ||||
2655 | BitPermutationSelector(SelectionDAG *DAG) | ||||
2656 | : CurDAG(DAG) {} | ||||
2657 | |||||
2658 | // Here we try to match complex bit permutations into a set of | ||||
2659 | // rotate-and-shift/shift/and/or instructions, using a set of heuristics | ||||
2660 | // known to produce optimal code for common cases (like i32 byte swapping). | ||||
2661 | SDNode *Select(SDNode *N) { | ||||
2662 | Memoizer.clear(); | ||||
2663 | auto Result = | ||||
2664 | getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); | ||||
2665 | if (!Result.first) | ||||
2666 | return nullptr; | ||||
2667 | Bits = std::move(*Result.second); | ||||
2668 | |||||
2669 | LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Considering bit-permutation-based instruction" " selection for: "; } } while (false) | ||||
2670 | " selection for: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Considering bit-permutation-based instruction" " selection for: "; } } while (false); | ||||
2671 | LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { N->dump(CurDAG); } } while (false); | ||||
2672 | |||||
2673 | // Fill it RLAmt and set NeedMask. | ||||
2674 | computeRotationAmounts(); | ||||
2675 | |||||
2676 | if (!NeedMask) | ||||
2677 | return Select(N, false); | ||||
2678 | |||||
2679 | // We currently have two techniques for handling results with zeros: early | ||||
2680 | // masking (the default) and late masking. Late masking is sometimes more | ||||
2681 | // efficient, but because the structure of the bit groups is different, it | ||||
2682 | // is hard to tell without generating both and comparing the results. With | ||||
2683 | // late masking, we ignore zeros in the resulting value when inserting each | ||||
2684 | // set of bit groups, and then mask in the zeros at the end. With early | ||||
2685 | // masking, we only insert the non-zero parts of the result at every step. | ||||
2686 | |||||
2687 | unsigned InstCnt = 0, InstCntLateMask = 0; | ||||
2688 | LLVM_DEBUG(dbgs() << "\tEarly masking:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tEarly masking:\n"; } } while (false); | ||||
2689 | SDNode *RN = Select(N, false, &InstCnt); | ||||
2690 | LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"; } } while (false); | ||||
2691 | |||||
2692 | LLVM_DEBUG(dbgs() << "\tLate masking:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tLate masking:\n"; } } while (false); | ||||
2693 | SDNode *RNLM = Select(N, true, &InstCntLateMask); | ||||
2694 | LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMaskdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\tisel would use " << InstCntLateMask << " instructions\n"; } } while (false ) | ||||
2695 | << " instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\tisel would use " << InstCntLateMask << " instructions\n"; } } while (false ); | ||||
2696 | |||||
2697 | if (InstCnt <= InstCntLateMask) { | ||||
2698 | LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tUsing early-masking for isel\n" ; } } while (false); | ||||
2699 | return RN; | ||||
2700 | } | ||||
2701 | |||||
2702 | LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tUsing late-masking for isel\n" ; } } while (false); | ||||
2703 | return RNLM; | ||||
2704 | } | ||||
2705 | }; | ||||
2706 | |||||
2707 | class IntegerCompareEliminator { | ||||
2708 | SelectionDAG *CurDAG; | ||||
2709 | PPCDAGToDAGISel *S; | ||||
2710 | // Conversion type for interpreting results of a 32-bit instruction as | ||||
2711 | // a 64-bit value or vice versa. | ||||
2712 | enum ExtOrTruncConversion { Ext, Trunc }; | ||||
2713 | |||||
2714 | // Modifiers to guide how an ISD::SETCC node's result is to be computed | ||||
2715 | // in a GPR. | ||||
2716 | // ZExtOrig - use the original condition code, zero-extend value | ||||
2717 | // ZExtInvert - invert the condition code, zero-extend value | ||||
2718 | // SExtOrig - use the original condition code, sign-extend value | ||||
2719 | // SExtInvert - invert the condition code, sign-extend value | ||||
2720 | enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; | ||||
2721 | |||||
2722 | // Comparisons against zero to emit GPR code sequences for. Each of these | ||||
2723 | // sequences may need to be emitted for two or more equivalent patterns. | ||||
2724 | // For example (a >= 0) == (a > -1). The direction of the comparison (</>) | ||||
2725 | // matters as well as the extension type: sext (-1/0), zext (1/0). | ||||
2726 | // GEZExt - (zext (LHS >= 0)) | ||||
2727 | // GESExt - (sext (LHS >= 0)) | ||||
2728 | // LEZExt - (zext (LHS <= 0)) | ||||
2729 | // LESExt - (sext (LHS <= 0)) | ||||
2730 | enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; | ||||
2731 | |||||
2732 | SDNode *tryEXTEND(SDNode *N); | ||||
2733 | SDNode *tryLogicOpOfCompares(SDNode *N); | ||||
2734 | SDValue computeLogicOpInGPR(SDValue LogicOp); | ||||
2735 | SDValue signExtendInputIfNeeded(SDValue Input); | ||||
2736 | SDValue zeroExtendInputIfNeeded(SDValue Input); | ||||
2737 | SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); | ||||
2738 | SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, | ||||
2739 | ZeroCompare CmpTy); | ||||
2740 | SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
2741 | int64_t RHSValue, SDLoc dl); | ||||
2742 | SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
2743 | int64_t RHSValue, SDLoc dl); | ||||
2744 | SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
2745 | int64_t RHSValue, SDLoc dl); | ||||
2746 | SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
2747 | int64_t RHSValue, SDLoc dl); | ||||
2748 | SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); | ||||
2749 | |||||
2750 | public: | ||||
2751 | IntegerCompareEliminator(SelectionDAG *DAG, | ||||
2752 | PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { | ||||
2753 | assert(CurDAG->getTargetLoweringInfo()(static_cast <bool> (CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && "Only expecting to use this on 64 bit targets." ) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2755, __extension__ __PRETTY_FUNCTION__)) | ||||
2754 | .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&(static_cast <bool> (CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && "Only expecting to use this on 64 bit targets." ) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2755, __extension__ __PRETTY_FUNCTION__)) | ||||
2755 | "Only expecting to use this on 64 bit targets.")(static_cast <bool> (CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && "Only expecting to use this on 64 bit targets." ) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2755, __extension__ __PRETTY_FUNCTION__)); | ||||
2756 | } | ||||
2757 | SDNode *Select(SDNode *N) { | ||||
2758 | if (CmpInGPR == ICGPR_None) | ||||
2759 | return nullptr; | ||||
2760 | switch (N->getOpcode()) { | ||||
2761 | default: break; | ||||
2762 | case ISD::ZERO_EXTEND: | ||||
2763 | if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || | ||||
2764 | CmpInGPR == ICGPR_SextI64) | ||||
2765 | return nullptr; | ||||
2766 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
2767 | case ISD::SIGN_EXTEND: | ||||
2768 | if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || | ||||
2769 | CmpInGPR == ICGPR_ZextI64) | ||||
2770 | return nullptr; | ||||
2771 | return tryEXTEND(N); | ||||
2772 | case ISD::AND: | ||||
2773 | case ISD::OR: | ||||
2774 | case ISD::XOR: | ||||
2775 | return tryLogicOpOfCompares(N); | ||||
2776 | } | ||||
2777 | return nullptr; | ||||
2778 | } | ||||
2779 | }; | ||||
2780 | |||||
2781 | static bool isLogicOp(unsigned Opc) { | ||||
2782 | return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; | ||||
2783 | } | ||||
2784 | // The obvious case for wanting to keep the value in a GPR. Namely, the | ||||
2785 | // result of the comparison is actually needed in a GPR. | ||||
2786 | SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { | ||||
2787 | assert((N->getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!" ) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2789, __extension__ __PRETTY_FUNCTION__)) | ||||
2788 | N->getOpcode() == ISD::SIGN_EXTEND) &&(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!" ) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2789, __extension__ __PRETTY_FUNCTION__)) | ||||
2789 | "Expecting a zero/sign extend node!")(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!" ) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2789, __extension__ __PRETTY_FUNCTION__)); | ||||
2790 | SDValue WideRes; | ||||
2791 | // If we are zero-extending the result of a logical operation on i1 | ||||
2792 | // values, we can keep the values in GPRs. | ||||
2793 | if (isLogicOp(N->getOperand(0).getOpcode()) && | ||||
2794 | N->getOperand(0).getValueType() == MVT::i1 && | ||||
2795 | N->getOpcode() == ISD::ZERO_EXTEND) | ||||
2796 | WideRes = computeLogicOpInGPR(N->getOperand(0)); | ||||
2797 | else if (N->getOperand(0).getOpcode() != ISD::SETCC) | ||||
2798 | return nullptr; | ||||
2799 | else | ||||
2800 | WideRes = | ||||
2801 | getSETCCInGPR(N->getOperand(0), | ||||
2802 | N->getOpcode() == ISD::SIGN_EXTEND ? | ||||
2803 | SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); | ||||
2804 | |||||
2805 | if (!WideRes) | ||||
2806 | return nullptr; | ||||
2807 | |||||
2808 | SDLoc dl(N); | ||||
2809 | bool Input32Bit = WideRes.getValueType() == MVT::i32; | ||||
2810 | bool Output32Bit = N->getValueType(0) == MVT::i32; | ||||
2811 | |||||
2812 | NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; | ||||
2813 | NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; | ||||
2814 | |||||
2815 | SDValue ConvOp = WideRes; | ||||
2816 | if (Input32Bit != Output32Bit) | ||||
2817 | ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : | ||||
2818 | ExtOrTruncConversion::Trunc); | ||||
2819 | return ConvOp.getNode(); | ||||
2820 | } | ||||
2821 | |||||
2822 | // Attempt to perform logical operations on the results of comparisons while | ||||
2823 | // keeping the values in GPRs. Without doing so, these would end up being | ||||
2824 | // lowered to CR-logical operations which suffer from significant latency and | ||||
2825 | // low ILP. | ||||
2826 | SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { | ||||
2827 | if (N->getValueType(0) != MVT::i1) | ||||
2828 | return nullptr; | ||||
2829 | assert(isLogicOp(N->getOpcode()) &&(static_cast <bool> (isLogicOp(N->getOpcode()) && "Expected a logic operation on setcc results.") ? void (0) : __assert_fail ("isLogicOp(N->getOpcode()) && \"Expected a logic operation on setcc results.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2830, __extension__ __PRETTY_FUNCTION__)) | ||||
2830 | "Expected a logic operation on setcc results.")(static_cast <bool> (isLogicOp(N->getOpcode()) && "Expected a logic operation on setcc results.") ? void (0) : __assert_fail ("isLogicOp(N->getOpcode()) && \"Expected a logic operation on setcc results.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2830, __extension__ __PRETTY_FUNCTION__)); | ||||
2831 | SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); | ||||
2832 | if (!LoweredLogical) | ||||
2833 | return nullptr; | ||||
2834 | |||||
2835 | SDLoc dl(N); | ||||
2836 | bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; | ||||
2837 | unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; | ||||
2838 | SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); | ||||
2839 | SDValue LHS = LoweredLogical.getOperand(0); | ||||
2840 | SDValue RHS = LoweredLogical.getOperand(1); | ||||
2841 | SDValue WideOp; | ||||
2842 | SDValue OpToConvToRecForm; | ||||
2843 | |||||
2844 | // Look through any 32-bit to 64-bit implicit extend nodes to find the | ||||
2845 | // opcode that is input to the XORI. | ||||
2846 | if (IsBitwiseNegate && | ||||
2847 | LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) | ||||
2848 | OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); | ||||
2849 | else if (IsBitwiseNegate) | ||||
2850 | // If the input to the XORI isn't an extension, that's what we're after. | ||||
2851 | OpToConvToRecForm = LoweredLogical.getOperand(0); | ||||
2852 | else | ||||
2853 | // If this is not an XORI, it is a reg-reg logical op and we can convert | ||||
2854 | // it to record-form. | ||||
2855 | OpToConvToRecForm = LoweredLogical; | ||||
2856 | |||||
2857 | // Get the record-form version of the node we're looking to use to get the | ||||
2858 | // CR result from. | ||||
2859 | uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); | ||||
2860 | int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); | ||||
2861 | |||||
2862 | // Convert the right node to record-form. This is either the logical we're | ||||
2863 | // looking at or it is the input node to the negation (if we're looking at | ||||
2864 | // a bitwise negation). | ||||
2865 | if (NewOpc != -1 && IsBitwiseNegate) { | ||||
2866 | // The input to the XORI has a record-form. Use it. | ||||
2867 | assert(LoweredLogical.getConstantOperandVal(1) == 1 &&(static_cast <bool> (LoweredLogical.getConstantOperandVal (1) == 1 && "Expected a PPC::XORI8 only for bitwise negation." ) ? void (0) : __assert_fail ("LoweredLogical.getConstantOperandVal(1) == 1 && \"Expected a PPC::XORI8 only for bitwise negation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2868, __extension__ __PRETTY_FUNCTION__)) | ||||
2868 | "Expected a PPC::XORI8 only for bitwise negation.")(static_cast <bool> (LoweredLogical.getConstantOperandVal (1) == 1 && "Expected a PPC::XORI8 only for bitwise negation." ) ? void (0) : __assert_fail ("LoweredLogical.getConstantOperandVal(1) == 1 && \"Expected a PPC::XORI8 only for bitwise negation.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2868, __extension__ __PRETTY_FUNCTION__)); | ||||
2869 | // Emit the record-form instruction. | ||||
2870 | std::vector<SDValue> Ops; | ||||
2871 | for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) | ||||
2872 | Ops.push_back(OpToConvToRecForm.getOperand(i)); | ||||
2873 | |||||
2874 | WideOp = | ||||
2875 | SDValue(CurDAG->getMachineNode(NewOpc, dl, | ||||
2876 | OpToConvToRecForm.getValueType(), | ||||
2877 | MVT::Glue, Ops), 0); | ||||
2878 | } else { | ||||
2879 | assert((NewOpc != -1 || !IsBitwiseNegate) &&(static_cast <bool> ((NewOpc != -1 || !IsBitwiseNegate) && "No record form available for AND8/OR8/XOR8?") ? void (0) : __assert_fail ("(NewOpc != -1 || !IsBitwiseNegate) && \"No record form available for AND8/OR8/XOR8?\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2880, __extension__ __PRETTY_FUNCTION__)) | ||||
2880 | "No record form available for AND8/OR8/XOR8?")(static_cast <bool> ((NewOpc != -1 || !IsBitwiseNegate) && "No record form available for AND8/OR8/XOR8?") ? void (0) : __assert_fail ("(NewOpc != -1 || !IsBitwiseNegate) && \"No record form available for AND8/OR8/XOR8?\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2880, __extension__ __PRETTY_FUNCTION__)); | ||||
2881 | WideOp = | ||||
2882 | SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc, | ||||
2883 | dl, MVT::i64, MVT::Glue, LHS, RHS), | ||||
2884 | 0); | ||||
2885 | } | ||||
2886 | |||||
2887 | // Select this node to a single bit from CR0 set by the record-form node | ||||
2888 | // just created. For bitwise negation, use the EQ bit which is the equivalent | ||||
2889 | // of negating the result (i.e. it is a bit set when the result of the | ||||
2890 | // operation is zero). | ||||
2891 | SDValue SRIdxVal = | ||||
2892 | CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); | ||||
2893 | SDValue CRBit = | ||||
2894 | SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, | ||||
2895 | MVT::i1, CR0Reg, SRIdxVal, | ||||
2896 | WideOp.getValue(1)), 0); | ||||
2897 | return CRBit.getNode(); | ||||
2898 | } | ||||
2899 | |||||
2900 | // Lower a logical operation on i1 values into a GPR sequence if possible. | ||||
2901 | // The result can be kept in a GPR if requested. | ||||
2902 | // Three types of inputs can be handled: | ||||
2903 | // - SETCC | ||||
2904 | // - TRUNCATE | ||||
2905 | // - Logical operation (AND/OR/XOR) | ||||
2906 | // There is also a special case that is handled (namely a complement operation | ||||
2907 | // achieved with xor %a, -1). | ||||
2908 | SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { | ||||
2909 | assert(isLogicOp(LogicOp.getOpcode()) &&(static_cast <bool> (isLogicOp(LogicOp.getOpcode()) && "Can only handle logic operations here.") ? void (0) : __assert_fail ("isLogicOp(LogicOp.getOpcode()) && \"Can only handle logic operations here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2910, __extension__ __PRETTY_FUNCTION__)) | ||||
2910 | "Can only handle logic operations here.")(static_cast <bool> (isLogicOp(LogicOp.getOpcode()) && "Can only handle logic operations here.") ? void (0) : __assert_fail ("isLogicOp(LogicOp.getOpcode()) && \"Can only handle logic operations here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2910, __extension__ __PRETTY_FUNCTION__)); | ||||
2911 | assert(LogicOp.getValueType() == MVT::i1 &&(static_cast <bool> (LogicOp.getValueType() == MVT::i1 && "Can only handle logic operations on i1 values here.") ? void (0) : __assert_fail ("LogicOp.getValueType() == MVT::i1 && \"Can only handle logic operations on i1 values here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2912, __extension__ __PRETTY_FUNCTION__)) | ||||
2912 | "Can only handle logic operations on i1 values here.")(static_cast <bool> (LogicOp.getValueType() == MVT::i1 && "Can only handle logic operations on i1 values here.") ? void (0) : __assert_fail ("LogicOp.getValueType() == MVT::i1 && \"Can only handle logic operations on i1 values here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2912, __extension__ __PRETTY_FUNCTION__)); | ||||
2913 | SDLoc dl(LogicOp); | ||||
2914 | SDValue LHS, RHS; | ||||
2915 | |||||
2916 | // Special case: xor %a, -1 | ||||
2917 | bool IsBitwiseNegation = isBitwiseNot(LogicOp); | ||||
2918 | |||||
2919 | // Produces a GPR sequence for each operand of the binary logic operation. | ||||
2920 | // For SETCC, it produces the respective comparison, for TRUNCATE it truncates | ||||
2921 | // the value in a GPR and for logic operations, it will recursively produce | ||||
2922 | // a GPR sequence for the operation. | ||||
2923 | auto getLogicOperand = [&] (SDValue Operand) -> SDValue { | ||||
2924 | unsigned OperandOpcode = Operand.getOpcode(); | ||||
2925 | if (OperandOpcode == ISD::SETCC) | ||||
2926 | return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); | ||||
2927 | else if (OperandOpcode == ISD::TRUNCATE) { | ||||
2928 | SDValue InputOp = Operand.getOperand(0); | ||||
2929 | EVT InVT = InputOp.getValueType(); | ||||
2930 | return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : | ||||
2931 | PPC::RLDICL, dl, InVT, InputOp, | ||||
2932 | S->getI64Imm(0, dl), | ||||
2933 | S->getI64Imm(63, dl)), 0); | ||||
2934 | } else if (isLogicOp(OperandOpcode)) | ||||
2935 | return computeLogicOpInGPR(Operand); | ||||
2936 | return SDValue(); | ||||
2937 | }; | ||||
2938 | LHS = getLogicOperand(LogicOp.getOperand(0)); | ||||
2939 | RHS = getLogicOperand(LogicOp.getOperand(1)); | ||||
2940 | |||||
2941 | // If a GPR sequence can't be produced for the LHS we can't proceed. | ||||
2942 | // Not producing a GPR sequence for the RHS is only a problem if this isn't | ||||
2943 | // a bitwise negation operation. | ||||
2944 | if (!LHS || (!RHS && !IsBitwiseNegation)) | ||||
2945 | return SDValue(); | ||||
2946 | |||||
2947 | NumLogicOpsOnComparison++; | ||||
2948 | |||||
2949 | // We will use the inputs as 64-bit values. | ||||
2950 | if (LHS.getValueType() == MVT::i32) | ||||
2951 | LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); | ||||
2952 | if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) | ||||
2953 | RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); | ||||
2954 | |||||
2955 | unsigned NewOpc; | ||||
2956 | switch (LogicOp.getOpcode()) { | ||||
2957 | default: llvm_unreachable("Unknown logic operation.")::llvm::llvm_unreachable_internal("Unknown logic operation.", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2957); | ||||
2958 | case ISD::AND: NewOpc = PPC::AND8; break; | ||||
2959 | case ISD::OR: NewOpc = PPC::OR8; break; | ||||
2960 | case ISD::XOR: NewOpc = PPC::XOR8; break; | ||||
2961 | } | ||||
2962 | |||||
2963 | if (IsBitwiseNegation) { | ||||
2964 | RHS = S->getI64Imm(1, dl); | ||||
2965 | NewOpc = PPC::XORI8; | ||||
2966 | } | ||||
2967 | |||||
2968 | return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); | ||||
2969 | |||||
2970 | } | ||||
2971 | |||||
2972 | /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. | ||||
2973 | /// Otherwise just reinterpret it as a 64-bit value. | ||||
2974 | /// Useful when emitting comparison code for 32-bit values without using | ||||
2975 | /// the compare instruction (which only considers the lower 32-bits). | ||||
2976 | SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { | ||||
2977 | assert(Input.getValueType() == MVT::i32 &&(static_cast <bool> (Input.getValueType() == MVT::i32 && "Can only sign-extend 32-bit values here.") ? void (0) : __assert_fail ("Input.getValueType() == MVT::i32 && \"Can only sign-extend 32-bit values here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2978, __extension__ __PRETTY_FUNCTION__)) | ||||
2978 | "Can only sign-extend 32-bit values here.")(static_cast <bool> (Input.getValueType() == MVT::i32 && "Can only sign-extend 32-bit values here.") ? void (0) : __assert_fail ("Input.getValueType() == MVT::i32 && \"Can only sign-extend 32-bit values here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2978, __extension__ __PRETTY_FUNCTION__)); | ||||
2979 | unsigned Opc = Input.getOpcode(); | ||||
2980 | |||||
2981 | // The value was sign extended and then truncated to 32-bits. No need to | ||||
2982 | // sign extend it again. | ||||
2983 | if (Opc == ISD::TRUNCATE && | ||||
2984 | (Input.getOperand(0).getOpcode() == ISD::AssertSext || | ||||
2985 | Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) | ||||
2986 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
2987 | |||||
2988 | LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); | ||||
2989 | // The input is a sign-extending load. All ppc sign-extending loads | ||||
2990 | // sign-extend to the full 64-bits. | ||||
2991 | if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) | ||||
2992 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
2993 | |||||
2994 | ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); | ||||
2995 | // We don't sign-extend constants. | ||||
2996 | if (InputConst) | ||||
2997 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
2998 | |||||
2999 | SDLoc dl(Input); | ||||
3000 | SignExtensionsAdded++; | ||||
3001 | return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, | ||||
3002 | MVT::i64, Input), 0); | ||||
3003 | } | ||||
3004 | |||||
3005 | /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. | ||||
3006 | /// Otherwise just reinterpret it as a 64-bit value. | ||||
3007 | /// Useful when emitting comparison code for 32-bit values without using | ||||
3008 | /// the compare instruction (which only considers the lower 32-bits). | ||||
3009 | SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { | ||||
3010 | assert(Input.getValueType() == MVT::i32 &&(static_cast <bool> (Input.getValueType() == MVT::i32 && "Can only zero-extend 32-bit values here.") ? void (0) : __assert_fail ("Input.getValueType() == MVT::i32 && \"Can only zero-extend 32-bit values here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3011, __extension__ __PRETTY_FUNCTION__)) | ||||
3011 | "Can only zero-extend 32-bit values here.")(static_cast <bool> (Input.getValueType() == MVT::i32 && "Can only zero-extend 32-bit values here.") ? void (0) : __assert_fail ("Input.getValueType() == MVT::i32 && \"Can only zero-extend 32-bit values here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3011, __extension__ __PRETTY_FUNCTION__)); | ||||
3012 | unsigned Opc = Input.getOpcode(); | ||||
3013 | |||||
3014 | // The only condition under which we can omit the actual extend instruction: | ||||
3015 | // - The value is a positive constant | ||||
3016 | // - The value comes from a load that isn't a sign-extending load | ||||
3017 | // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. | ||||
3018 | bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && | ||||
3019 | (Input.getOperand(0).getOpcode() == ISD::AssertZext || | ||||
3020 | Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); | ||||
3021 | if (IsTruncateOfZExt) | ||||
3022 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
3023 | |||||
3024 | ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); | ||||
3025 | if (InputConst && InputConst->getSExtValue() >= 0) | ||||
3026 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
3027 | |||||
3028 | LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); | ||||
3029 | // The input is a load that doesn't sign-extend (it will be zero-extended). | ||||
3030 | if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) | ||||
3031 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
3032 | |||||
3033 | // None of the above, need to zero-extend. | ||||
3034 | SDLoc dl(Input); | ||||
3035 | ZeroExtensionsAdded++; | ||||
3036 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, | ||||
3037 | S->getI64Imm(0, dl), | ||||
3038 | S->getI64Imm(32, dl)), 0); | ||||
3039 | } | ||||
3040 | |||||
3041 | // Handle a 32-bit value in a 64-bit register and vice-versa. These are of | ||||
3042 | // course not actual zero/sign extensions that will generate machine code, | ||||
3043 | // they're just a way to reinterpret a 32 bit value in a register as a | ||||
3044 | // 64 bit value and vice-versa. | ||||
3045 | SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, | ||||
3046 | ExtOrTruncConversion Conv) { | ||||
3047 | SDLoc dl(NatWidthRes); | ||||
3048 | |||||
3049 | // For reinterpreting 32-bit values as 64 bit values, we generate | ||||
3050 | // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> | ||||
3051 | if (Conv == ExtOrTruncConversion::Ext) { | ||||
3052 | SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); | ||||
3053 | SDValue SubRegIdx = | ||||
3054 | CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); | ||||
3055 | return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, | ||||
3056 | ImDef, NatWidthRes, SubRegIdx), 0); | ||||
3057 | } | ||||
3058 | |||||
3059 | assert(Conv == ExtOrTruncConversion::Trunc &&(static_cast <bool> (Conv == ExtOrTruncConversion::Trunc && "Unknown convertion between 32 and 64 bit values." ) ? void (0) : __assert_fail ("Conv == ExtOrTruncConversion::Trunc && \"Unknown convertion between 32 and 64 bit values.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3060, __extension__ __PRETTY_FUNCTION__)) | ||||
3060 | "Unknown convertion between 32 and 64 bit values.")(static_cast <bool> (Conv == ExtOrTruncConversion::Trunc && "Unknown convertion between 32 and 64 bit values." ) ? void (0) : __assert_fail ("Conv == ExtOrTruncConversion::Trunc && \"Unknown convertion between 32 and 64 bit values.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3060, __extension__ __PRETTY_FUNCTION__)); | ||||
3061 | // For reinterpreting 64-bit values as 32-bit values, we just need to | ||||
3062 | // EXTRACT_SUBREG (i.e. extract the low word). | ||||
3063 | SDValue SubRegIdx = | ||||
3064 | CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); | ||||
3065 | return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, | ||||
3066 | NatWidthRes, SubRegIdx), 0); | ||||
3067 | } | ||||
3068 | |||||
3069 | // Produce a GPR sequence for compound comparisons (<=, >=) against zero. | ||||
3070 | // Handle both zero-extensions and sign-extensions. | ||||
3071 | SDValue | ||||
3072 | IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, | ||||
3073 | ZeroCompare CmpTy) { | ||||
3074 | EVT InVT = LHS.getValueType(); | ||||
3075 | bool Is32Bit = InVT == MVT::i32; | ||||
3076 | SDValue ToExtend; | ||||
3077 | |||||
3078 | // Produce the value that needs to be either zero or sign extended. | ||||
3079 | switch (CmpTy) { | ||||
3080 | case ZeroCompare::GEZExt: | ||||
3081 | case ZeroCompare::GESExt: | ||||
3082 | ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, | ||||
3083 | dl, InVT, LHS, LHS), 0); | ||||
3084 | break; | ||||
3085 | case ZeroCompare::LEZExt: | ||||
3086 | case ZeroCompare::LESExt: { | ||||
3087 | if (Is32Bit) { | ||||
3088 | // Upper 32 bits cannot be undefined for this sequence. | ||||
3089 | LHS = signExtendInputIfNeeded(LHS); | ||||
3090 | SDValue Neg = | ||||
3091 | SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); | ||||
3092 | ToExtend = | ||||
3093 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3094 | Neg, S->getI64Imm(1, dl), | ||||
3095 | S->getI64Imm(63, dl)), 0); | ||||
3096 | } else { | ||||
3097 | SDValue Addi = | ||||
3098 | SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, | ||||
3099 | S->getI64Imm(~0ULL, dl)), 0); | ||||
3100 | ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, | ||||
3101 | Addi, LHS), 0); | ||||
3102 | } | ||||
3103 | break; | ||||
3104 | } | ||||
3105 | } | ||||
3106 | |||||
3107 | // For 64-bit sequences, the extensions are the same for the GE/LE cases. | ||||
3108 | if (!Is32Bit && | ||||
3109 | (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) | ||||
3110 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3111 | ToExtend, S->getI64Imm(1, dl), | ||||
3112 | S->getI64Imm(63, dl)), 0); | ||||
3113 | if (!Is32Bit && | ||||
3114 | (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) | ||||
3115 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, | ||||
3116 | S->getI64Imm(63, dl)), 0); | ||||
3117 | |||||
3118 | assert(Is32Bit && "Should have handled the 32-bit sequences above.")(static_cast <bool> (Is32Bit && "Should have handled the 32-bit sequences above." ) ? void (0) : __assert_fail ("Is32Bit && \"Should have handled the 32-bit sequences above.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3118, __extension__ __PRETTY_FUNCTION__)); | ||||
3119 | // For 32-bit sequences, the extensions differ between GE/LE cases. | ||||
3120 | switch (CmpTy) { | ||||
3121 | case ZeroCompare::GEZExt: { | ||||
3122 | SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), | ||||
3123 | S->getI32Imm(31, dl) }; | ||||
3124 | return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, | ||||
3125 | ShiftOps), 0); | ||||
3126 | } | ||||
3127 | case ZeroCompare::GESExt: | ||||
3128 | return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, | ||||
3129 | S->getI32Imm(31, dl)), 0); | ||||
3130 | case ZeroCompare::LEZExt: | ||||
3131 | return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, | ||||
3132 | S->getI32Imm(1, dl)), 0); | ||||
3133 | case ZeroCompare::LESExt: | ||||
3134 | return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, | ||||
3135 | S->getI32Imm(-1, dl)), 0); | ||||
3136 | } | ||||
3137 | |||||
3138 | // The above case covers all the enumerators so it can't have a default clause | ||||
3139 | // to avoid compiler warnings. | ||||
3140 | llvm_unreachable("Unknown zero-comparison type.")::llvm::llvm_unreachable_internal("Unknown zero-comparison type." , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3140); | ||||
3141 | } | ||||
3142 | |||||
3143 | /// Produces a zero-extended result of comparing two 32-bit values according to | ||||
3144 | /// the passed condition code. | ||||
3145 | SDValue | ||||
3146 | IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, | ||||
3147 | ISD::CondCode CC, | ||||
3148 | int64_t RHSValue, SDLoc dl) { | ||||
3149 | if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || | ||||
3150 | CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) | ||||
3151 | return SDValue(); | ||||
3152 | bool IsRHSZero = RHSValue == 0; | ||||
3153 | bool IsRHSOne = RHSValue == 1; | ||||
3154 | bool IsRHSNegOne = RHSValue == -1LL; | ||||
3155 | switch (CC) { | ||||
3156 | default: return SDValue(); | ||||
3157 | case ISD::SETEQ: { | ||||
3158 | // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) | ||||
3159 | // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) | ||||
3160 | SDValue Xor = IsRHSZero ? LHS : | ||||
3161 | SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); | ||||
3162 | SDValue Clz = | ||||
3163 | SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); | ||||
3164 | SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), | ||||
3165 | S->getI32Imm(31, dl) }; | ||||
3166 | return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, | ||||
3167 | ShiftOps), 0); | ||||
3168 | } | ||||
3169 | case ISD::SETNE: { | ||||
3170 | // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) | ||||
3171 | // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) | ||||
3172 | SDValue Xor = IsRHSZero ? LHS : | ||||
3173 | SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); | ||||
3174 | SDValue Clz = | ||||
3175 | SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); | ||||
3176 | SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), | ||||
3177 | S->getI32Imm(31, dl) }; | ||||
3178 | SDValue Shift = | ||||
3179 | SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); | ||||
3180 | return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, | ||||
3181 | S->getI32Imm(1, dl)), 0); | ||||
3182 | } | ||||
3183 | case ISD::SETGE: { | ||||
3184 | // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) | ||||
3185 | // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) | ||||
3186 | if(IsRHSZero) | ||||
3187 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); | ||||
3188 | |||||
3189 | // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) | ||||
3190 | // by swapping inputs and falling through. | ||||
3191 | std::swap(LHS, RHS); | ||||
3192 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3193 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3194 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3195 | } | ||||
3196 | case ISD::SETLE: { | ||||
3197 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3198 | return SDValue(); | ||||
3199 | // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) | ||||
3200 | // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) | ||||
3201 | if(IsRHSZero) { | ||||
3202 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3203 | return SDValue(); | ||||
3204 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); | ||||
3205 | } | ||||
3206 | |||||
3207 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3208 | LHS = signExtendInputIfNeeded(LHS); | ||||
3209 | RHS = signExtendInputIfNeeded(RHS); | ||||
3210 | SDValue Sub = | ||||
3211 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); | ||||
3212 | SDValue Shift = | ||||
3213 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, | ||||
3214 | S->getI64Imm(1, dl), S->getI64Imm(63, dl)), | ||||
3215 | 0); | ||||
3216 | return | ||||
3217 | SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, | ||||
3218 | MVT::i64, Shift, S->getI32Imm(1, dl)), 0); | ||||
3219 | } | ||||
3220 | case ISD::SETGT: { | ||||
3221 | // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) | ||||
3222 | // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) | ||||
3223 | // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) | ||||
3224 | // Handle SETLT -1 (which is equivalent to SETGE 0). | ||||
3225 | if (IsRHSNegOne) | ||||
3226 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); | ||||
3227 | |||||
3228 | if (IsRHSZero) { | ||||
3229 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3230 | return SDValue(); | ||||
3231 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3232 | LHS = signExtendInputIfNeeded(LHS); | ||||
3233 | RHS = signExtendInputIfNeeded(RHS); | ||||
3234 | SDValue Neg = | ||||
3235 | SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); | ||||
3236 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3237 | Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); | ||||
3238 | } | ||||
3239 | // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as | ||||
3240 | // (%b < %a) by swapping inputs and falling through. | ||||
3241 | std::swap(LHS, RHS); | ||||
3242 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3243 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3244 | IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; | ||||
3245 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3246 | } | ||||
3247 | case ISD::SETLT: { | ||||
3248 | // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) | ||||
3249 | // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) | ||||
3250 | // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) | ||||
3251 | // Handle SETLT 1 (which is equivalent to SETLE 0). | ||||
3252 | if (IsRHSOne) { | ||||
3253 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3254 | return SDValue(); | ||||
3255 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); | ||||
3256 | } | ||||
3257 | |||||
3258 | if (IsRHSZero) { | ||||
3259 | SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), | ||||
3260 | S->getI32Imm(31, dl) }; | ||||
3261 | return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, | ||||
3262 | ShiftOps), 0); | ||||
3263 | } | ||||
3264 | |||||
3265 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3266 | return SDValue(); | ||||
3267 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3268 | LHS = signExtendInputIfNeeded(LHS); | ||||
3269 | RHS = signExtendInputIfNeeded(RHS); | ||||
3270 | SDValue SUBFNode = | ||||
3271 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); | ||||
3272 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3273 | SUBFNode, S->getI64Imm(1, dl), | ||||
3274 | S->getI64Imm(63, dl)), 0); | ||||
3275 | } | ||||
3276 | case ISD::SETUGE: | ||||
3277 | // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) | ||||
3278 | // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) | ||||
3279 | std::swap(LHS, RHS); | ||||
3280 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3281 | case ISD::SETULE: { | ||||
3282 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3283 | return SDValue(); | ||||
3284 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3285 | LHS = zeroExtendInputIfNeeded(LHS); | ||||
3286 | RHS = zeroExtendInputIfNeeded(RHS); | ||||
3287 | SDValue Subtract = | ||||
3288 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); | ||||
3289 | SDValue SrdiNode = | ||||
3290 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3291 | Subtract, S->getI64Imm(1, dl), | ||||
3292 | S->getI64Imm(63, dl)), 0); | ||||
3293 | return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, | ||||
3294 | S->getI32Imm(1, dl)), 0); | ||||
3295 | } | ||||
3296 | case ISD::SETUGT: | ||||
3297 | // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) | ||||
3298 | // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) | ||||
3299 | std::swap(LHS, RHS); | ||||
3300 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3301 | case ISD::SETULT: { | ||||
3302 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3303 | return SDValue(); | ||||
3304 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3305 | LHS = zeroExtendInputIfNeeded(LHS); | ||||
3306 | RHS = zeroExtendInputIfNeeded(RHS); | ||||
3307 | SDValue Subtract = | ||||
3308 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); | ||||
3309 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3310 | Subtract, S->getI64Imm(1, dl), | ||||
3311 | S->getI64Imm(63, dl)), 0); | ||||
3312 | } | ||||
3313 | } | ||||
3314 | } | ||||
3315 | |||||
3316 | /// Produces a sign-extended result of comparing two 32-bit values according to | ||||
3317 | /// the passed condition code. | ||||
3318 | SDValue | ||||
3319 | IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, | ||||
3320 | ISD::CondCode CC, | ||||
3321 | int64_t RHSValue, SDLoc dl) { | ||||
3322 | if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || | ||||
3323 | CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) | ||||
3324 | return SDValue(); | ||||
3325 | bool IsRHSZero = RHSValue == 0; | ||||
3326 | bool IsRHSOne = RHSValue == 1; | ||||
3327 | bool IsRHSNegOne = RHSValue == -1LL; | ||||
3328 | |||||
3329 | switch (CC) { | ||||
3330 | default: return SDValue(); | ||||
3331 | case ISD::SETEQ: { | ||||
3332 | // (sext (setcc %a, %b, seteq)) -> | ||||
3333 | // (ashr (shl (ctlz (xor %a, %b)), 58), 63) | ||||
3334 | // (sext (setcc %a, 0, seteq)) -> | ||||
3335 | // (ashr (shl (ctlz %a), 58), 63) | ||||
3336 | SDValue CountInput = IsRHSZero ? LHS : | ||||
3337 | SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); | ||||
3338 | SDValue Cntlzw = | ||||
3339 | SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); | ||||
3340 | SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), | ||||
3341 | S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; | ||||
3342 | SDValue Slwi = | ||||
3343 | SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); | ||||
3344 | return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); | ||||
3345 | } | ||||
3346 | case ISD::SETNE: { | ||||
3347 | // Bitwise xor the operands, count leading zeros, shift right by 5 bits and | ||||
3348 | // flip the bit, finally take 2's complement. | ||||
3349 | // (sext (setcc %a, %b, setne)) -> | ||||
3350 | // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) | ||||
3351 | // Same as above, but the first xor is not needed. | ||||
3352 | // (sext (setcc %a, 0, setne)) -> | ||||
3353 | // (neg (xor (lshr (ctlz %a), 5), 1)) | ||||
3354 | SDValue Xor = IsRHSZero ? LHS : | ||||
3355 | SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); | ||||
3356 | SDValue Clz = | ||||
3357 | SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); | ||||
3358 | SDValue ShiftOps[] = | ||||
3359 | { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; | ||||
3360 | SDValue Shift = | ||||
3361 | SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); | ||||
3362 | SDValue Xori = | ||||
3363 | SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, | ||||
3364 | S->getI32Imm(1, dl)), 0); | ||||
3365 | return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); | ||||
3366 | } | ||||
3367 | case ISD::SETGE: { | ||||
3368 | // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) | ||||
3369 | // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) | ||||
3370 | if (IsRHSZero) | ||||
3371 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); | ||||
3372 | |||||
3373 | // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) | ||||
3374 | // by swapping inputs and falling through. | ||||
3375 | std::swap(LHS, RHS); | ||||
3376 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3377 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3378 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3379 | } | ||||
3380 | case ISD::SETLE: { | ||||
3381 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3382 | return SDValue(); | ||||
3383 | // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) | ||||
3384 | // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) | ||||
3385 | if (IsRHSZero) | ||||
3386 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); | ||||
3387 | |||||
3388 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3389 | LHS = signExtendInputIfNeeded(LHS); | ||||
3390 | RHS = signExtendInputIfNeeded(RHS); | ||||
3391 | SDValue SUBFNode = | ||||
3392 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, | ||||
3393 | LHS, RHS), 0); | ||||
3394 | SDValue Srdi = | ||||
3395 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3396 | SUBFNode, S->getI64Imm(1, dl), | ||||
3397 | S->getI64Imm(63, dl)), 0); | ||||
3398 | return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, | ||||
3399 | S->getI32Imm(-1, dl)), 0); | ||||
3400 | } | ||||
3401 | case ISD::SETGT: { | ||||
3402 | // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) | ||||
3403 | // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) | ||||
3404 | // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) | ||||
3405 | if (IsRHSNegOne) | ||||
3406 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); | ||||
3407 | if (IsRHSZero) { | ||||
3408 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3409 | return SDValue(); | ||||
3410 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3411 | LHS = signExtendInputIfNeeded(LHS); | ||||
3412 | RHS = signExtendInputIfNeeded(RHS); | ||||
3413 | SDValue Neg = | ||||
3414 | SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); | ||||
3415 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, | ||||
3416 | S->getI64Imm(63, dl)), 0); | ||||
3417 | } | ||||
3418 | // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as | ||||
3419 | // (%b < %a) by swapping inputs and falling through. | ||||
3420 | std::swap(LHS, RHS); | ||||
3421 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3422 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3423 | IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; | ||||
3424 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3425 | } | ||||
3426 | case ISD::SETLT: { | ||||
3427 | // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) | ||||
3428 | // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) | ||||
3429 | // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) | ||||
3430 | if (IsRHSOne) { | ||||
3431 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3432 | return SDValue(); | ||||
3433 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); | ||||
3434 | } | ||||
3435 | if (IsRHSZero) | ||||
3436 | return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, | ||||
3437 | S->getI32Imm(31, dl)), 0); | ||||
3438 | |||||
3439 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3440 | return SDValue(); | ||||
3441 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3442 | LHS = signExtendInputIfNeeded(LHS); | ||||
3443 | RHS = signExtendInputIfNeeded(RHS); | ||||
3444 | SDValue SUBFNode = | ||||
3445 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); | ||||
3446 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, | ||||
3447 | SUBFNode, S->getI64Imm(63, dl)), 0); | ||||
3448 | } | ||||
3449 | case ISD::SETUGE: | ||||
3450 | // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) | ||||
3451 | // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) | ||||
3452 | std::swap(LHS, RHS); | ||||
3453 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3454 | case ISD::SETULE: { | ||||
3455 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3456 | return SDValue(); | ||||
3457 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3458 | LHS = zeroExtendInputIfNeeded(LHS); | ||||
3459 | RHS = zeroExtendInputIfNeeded(RHS); | ||||
3460 | SDValue Subtract = | ||||
3461 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); | ||||
3462 | SDValue Shift = | ||||
3463 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, | ||||
3464 | S->getI32Imm(1, dl), S->getI32Imm(63,dl)), | ||||
3465 | 0); | ||||
3466 | return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, | ||||
3467 | S->getI32Imm(-1, dl)), 0); | ||||
3468 | } | ||||
3469 | case ISD::SETUGT: | ||||
3470 | // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) | ||||
3471 | // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) | ||||
3472 | std::swap(LHS, RHS); | ||||
3473 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3474 | case ISD::SETULT: { | ||||
3475 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3476 | return SDValue(); | ||||
3477 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3478 | LHS = zeroExtendInputIfNeeded(LHS); | ||||
3479 | RHS = zeroExtendInputIfNeeded(RHS); | ||||
3480 | SDValue Subtract = | ||||
3481 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); | ||||
3482 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, | ||||
3483 | Subtract, S->getI64Imm(63, dl)), 0); | ||||
3484 | } | ||||
3485 | } | ||||
3486 | } | ||||
3487 | |||||
3488 | /// Produces a zero-extended result of comparing two 64-bit values according to | ||||
3489 | /// the passed condition code. | ||||
3490 | SDValue | ||||
3491 | IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, | ||||
3492 | ISD::CondCode CC, | ||||
3493 | int64_t RHSValue, SDLoc dl) { | ||||
3494 | if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || | ||||
3495 | CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) | ||||
3496 | return SDValue(); | ||||
3497 | bool IsRHSZero = RHSValue == 0; | ||||
3498 | bool IsRHSOne = RHSValue == 1; | ||||
3499 | bool IsRHSNegOne = RHSValue == -1LL; | ||||
3500 | switch (CC) { | ||||
3501 | default: return SDValue(); | ||||
3502 | case ISD::SETEQ: { | ||||
3503 | // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) | ||||
3504 | // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) | ||||
3505 | SDValue Xor = IsRHSZero ? LHS : | ||||
3506 | SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); | ||||
3507 | SDValue Clz = | ||||
3508 | SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); | ||||
3509 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, | ||||
3510 | S->getI64Imm(58, dl), | ||||
3511 | S->getI64Imm(63, dl)), 0); | ||||
3512 | } | ||||
3513 | case ISD::SETNE: { | ||||
3514 | // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) | ||||
3515 | // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) | ||||
3516 | // {addcz.reg, addcz.CA} = (addcarry %a, -1) | ||||
3517 | // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) | ||||
3518 | SDValue Xor = IsRHSZero ? LHS : | ||||
3519 | SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); | ||||
3520 | SDValue AC = | ||||
3521 | SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, | ||||
3522 | Xor, S->getI32Imm(~0U, dl)), 0); | ||||
3523 | return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, | ||||
3524 | Xor, AC.getValue(1)), 0); | ||||
3525 | } | ||||
3526 | case ISD::SETGE: { | ||||
3527 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3528 | // (zext (setcc %a, %b, setge)) -> | ||||
3529 | // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) | ||||
3530 | // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) | ||||
3531 | if (IsRHSZero) | ||||
3532 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); | ||||
3533 | std::swap(LHS, RHS); | ||||
3534 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3535 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3536 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3537 | } | ||||
3538 | case ISD::SETLE: { | ||||
3539 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3540 | // (zext (setcc %a, %b, setge)) -> | ||||
3541 | // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) | ||||
3542 | // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) | ||||
3543 | if (IsRHSZero) | ||||
3544 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); | ||||
3545 | SDValue ShiftL = | ||||
3546 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, | ||||
3547 | S->getI64Imm(1, dl), | ||||
3548 | S->getI64Imm(63, dl)), 0); | ||||
3549 | SDValue ShiftR = | ||||
3550 | SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, | ||||
3551 | S->getI64Imm(63, dl)), 0); | ||||
3552 | SDValue SubtractCarry = | ||||
3553 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3554 | LHS, RHS), 1); | ||||
3555 | return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, | ||||
3556 | ShiftR, ShiftL, SubtractCarry), 0); | ||||
3557 | } | ||||
3558 | case ISD::SETGT: { | ||||
3559 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3560 | // (zext (setcc %a, %b, setgt)) -> | ||||
3561 | // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) | ||||
3562 | // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) | ||||
3563 | if (IsRHSNegOne) | ||||
3564 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); | ||||
3565 | if (IsRHSZero) { | ||||
3566 | SDValue Addi = | ||||
3567 | SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, | ||||
3568 | S->getI64Imm(~0ULL, dl)), 0); | ||||
3569 | SDValue Nor = | ||||
3570 | SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); | ||||
3571 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, | ||||
3572 | S->getI64Imm(1, dl), | ||||
3573 | S->getI64Imm(63, dl)), 0); | ||||
3574 | } | ||||
3575 | std::swap(LHS, RHS); | ||||
3576 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3577 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3578 | IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; | ||||
3579 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3580 | } | ||||
3581 | case ISD::SETLT: { | ||||
3582 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3583 | // (zext (setcc %a, %b, setlt)) -> | ||||
3584 | // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) | ||||
3585 | // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) | ||||
3586 | if (IsRHSOne) | ||||
3587 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); | ||||
3588 | if (IsRHSZero) | ||||
3589 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, | ||||
3590 | S->getI64Imm(1, dl), | ||||
3591 | S->getI64Imm(63, dl)), 0); | ||||
3592 | SDValue SRADINode = | ||||
3593 | SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, | ||||
3594 | LHS, S->getI64Imm(63, dl)), 0); | ||||
3595 | SDValue SRDINode = | ||||
3596 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3597 | RHS, S->getI64Imm(1, dl), | ||||
3598 | S->getI64Imm(63, dl)), 0); | ||||
3599 | SDValue SUBFC8Carry = | ||||
3600 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3601 | RHS, LHS), 1); | ||||
3602 | SDValue ADDE8Node = | ||||
3603 | SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, | ||||
3604 | SRDINode, SRADINode, SUBFC8Carry), 0); | ||||
3605 | return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, | ||||
3606 | ADDE8Node, S->getI64Imm(1, dl)), 0); | ||||
3607 | } | ||||
3608 | case ISD::SETUGE: | ||||
3609 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3610 | // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) | ||||
3611 | std::swap(LHS, RHS); | ||||
3612 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3613 | case ISD::SETULE: { | ||||
3614 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3615 | // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) | ||||
3616 | SDValue SUBFC8Carry = | ||||
3617 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3618 | LHS, RHS), 1); | ||||
3619 | SDValue SUBFE8Node = | ||||
3620 | SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, | ||||
3621 | LHS, LHS, SUBFC8Carry), 0); | ||||
3622 | return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, | ||||
3623 | SUBFE8Node, S->getI64Imm(1, dl)), 0); | ||||
3624 | } | ||||
3625 | case ISD::SETUGT: | ||||
3626 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3627 | // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) | ||||
3628 | std::swap(LHS, RHS); | ||||
3629 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3630 | case ISD::SETULT: { | ||||
3631 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3632 | // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) | ||||
3633 | SDValue SubtractCarry = | ||||
3634 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3635 | RHS, LHS), 1); | ||||
3636 | SDValue ExtSub = | ||||
3637 | SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, | ||||
3638 | LHS, LHS, SubtractCarry), 0); | ||||
3639 | return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, | ||||
3640 | ExtSub), 0); | ||||
3641 | } | ||||
3642 | } | ||||
3643 | } | ||||
3644 | |||||
3645 | /// Produces a sign-extended result of comparing two 64-bit values according to | ||||
3646 | /// the passed condition code. | ||||
3647 | SDValue | ||||
3648 | IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, | ||||
3649 | ISD::CondCode CC, | ||||
3650 | int64_t RHSValue, SDLoc dl) { | ||||
3651 | if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || | ||||
3652 | CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) | ||||
3653 | return SDValue(); | ||||
3654 | bool IsRHSZero = RHSValue == 0; | ||||
3655 | bool IsRHSOne = RHSValue == 1; | ||||
3656 | bool IsRHSNegOne = RHSValue == -1LL; | ||||
3657 | switch (CC) { | ||||
3658 | default: return SDValue(); | ||||
3659 | case ISD::SETEQ: { | ||||
3660 | // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) | ||||
3661 | // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) | ||||
3662 | // {addcz.reg, addcz.CA} = (addcarry %a, -1) | ||||
3663 | // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) | ||||
3664 | SDValue AddInput = IsRHSZero ? LHS : | ||||
3665 | SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); | ||||
3666 | SDValue Addic = | ||||
3667 | SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, | ||||
3668 | AddInput, S->getI32Imm(~0U, dl)), 0); | ||||
3669 | return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, | ||||
3670 | Addic, Addic.getValue(1)), 0); | ||||
3671 | } | ||||
3672 | case ISD::SETNE: { | ||||
3673 | // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) | ||||
3674 | // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) | ||||
3675 | // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) | ||||
3676 | // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) | ||||
3677 | SDValue Xor = IsRHSZero ? LHS : | ||||
3678 | SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); | ||||
3679 | SDValue SC = | ||||
3680 | SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, | ||||
3681 | Xor, S->getI32Imm(0, dl)), 0); | ||||
3682 | return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, | ||||
3683 | SC, SC.getValue(1)), 0); | ||||
3684 | } | ||||
3685 | case ISD::SETGE: { | ||||
3686 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3687 | // (zext (setcc %a, %b, setge)) -> | ||||
3688 | // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) | ||||
3689 | // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) | ||||
3690 | if (IsRHSZero) | ||||
3691 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); | ||||
3692 | std::swap(LHS, RHS); | ||||
3693 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3694 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3695 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3696 | } | ||||
3697 | case ISD::SETLE: { | ||||
3698 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3699 | // (zext (setcc %a, %b, setge)) -> | ||||
3700 | // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) | ||||
3701 | // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) | ||||
3702 | if (IsRHSZero) | ||||
3703 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); | ||||
3704 | SDValue ShiftR = | ||||
3705 | SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, | ||||
3706 | S->getI64Imm(63, dl)), 0); | ||||
3707 | SDValue ShiftL = | ||||
3708 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, | ||||
3709 | S->getI64Imm(1, dl), | ||||
3710 | S->getI64Imm(63, dl)), 0); | ||||
3711 | SDValue SubtractCarry = | ||||
3712 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3713 | LHS, RHS), 1); | ||||
3714 | SDValue Adde = | ||||
3715 | SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, | ||||
3716 | ShiftR, ShiftL, SubtractCarry), 0); | ||||
3717 | return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); | ||||
3718 | } | ||||
3719 | case ISD::SETGT: { | ||||
3720 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3721 | // (zext (setcc %a, %b, setgt)) -> | ||||
3722 | // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) | ||||
3723 | // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) | ||||
3724 | if (IsRHSNegOne) | ||||
3725 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); | ||||
3726 | if (IsRHSZero) { | ||||
3727 | SDValue Add = | ||||
3728 | SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, | ||||
3729 | S->getI64Imm(-1, dl)), 0); | ||||
3730 | SDValue Nor = | ||||
3731 | SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); | ||||
3732 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, | ||||
3733 | S->getI64Imm(63, dl)), 0); | ||||
3734 | } | ||||
3735 | std::swap(LHS, RHS); | ||||
3736 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3737 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3738 | IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; | ||||
3739 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3740 | } | ||||
3741 | case ISD::SETLT: { | ||||
3742 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3743 | // (zext (setcc %a, %b, setlt)) -> | ||||
3744 | // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) | ||||
3745 | // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) | ||||
3746 | if (IsRHSOne) | ||||
3747 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); | ||||
3748 | if (IsRHSZero) { | ||||
3749 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, | ||||
3750 | S->getI64Imm(63, dl)), 0); | ||||
3751 | } | ||||
3752 | SDValue SRADINode = | ||||
3753 | SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, | ||||
3754 | LHS, S->getI64Imm(63, dl)), 0); | ||||
3755 | SDValue SRDINode = | ||||
3756 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3757 | RHS, S->getI64Imm(1, dl), | ||||
3758 | S->getI64Imm(63, dl)), 0); | ||||
3759 | SDValue SUBFC8Carry = | ||||
3760 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3761 | RHS, LHS), 1); | ||||
3762 | SDValue ADDE8Node = | ||||
3763 | SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, | ||||
3764 | SRDINode, SRADINode, SUBFC8Carry), 0); | ||||
3765 | SDValue XORI8Node = | ||||
3766 | SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, | ||||
3767 | ADDE8Node, S->getI64Imm(1, dl)), 0); | ||||
3768 | return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, | ||||
3769 | XORI8Node), 0); | ||||
3770 | } | ||||
3771 | case ISD::SETUGE: | ||||
3772 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3773 | // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) | ||||
3774 | std::swap(LHS, RHS); | ||||
3775 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3776 | case ISD::SETULE: { | ||||
3777 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3778 | // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) | ||||
3779 | SDValue SubtractCarry = | ||||
3780 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3781 | LHS, RHS), 1); | ||||
3782 | SDValue ExtSub = | ||||
3783 | SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, | ||||
3784 | LHS, SubtractCarry), 0); | ||||
3785 | return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, | ||||
3786 | ExtSub, ExtSub), 0); | ||||
3787 | } | ||||
3788 | case ISD::SETUGT: | ||||
3789 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3790 | // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) | ||||
3791 | std::swap(LHS, RHS); | ||||
3792 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3793 | case ISD::SETULT: { | ||||
3794 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3795 | // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) | ||||
3796 | SDValue SubCarry = | ||||
3797 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3798 | RHS, LHS), 1); | ||||
3799 | return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, | ||||
3800 | LHS, LHS, SubCarry), 0); | ||||
3801 | } | ||||
3802 | } | ||||
3803 | } | ||||
3804 | |||||
3805 | /// Do all uses of this SDValue need the result in a GPR? | ||||
3806 | /// This is meant to be used on values that have type i1 since | ||||
3807 | /// it is somewhat meaningless to ask if values of other types | ||||
3808 | /// should be kept in GPR's. | ||||
3809 | static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { | ||||
3810 | assert(Compare.getOpcode() == ISD::SETCC &&(static_cast <bool> (Compare.getOpcode() == ISD::SETCC && "An ISD::SETCC node required here.") ? void (0) : __assert_fail ("Compare.getOpcode() == ISD::SETCC && \"An ISD::SETCC node required here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3811, __extension__ __PRETTY_FUNCTION__)) | ||||
3811 | "An ISD::SETCC node required here.")(static_cast <bool> (Compare.getOpcode() == ISD::SETCC && "An ISD::SETCC node required here.") ? void (0) : __assert_fail ("Compare.getOpcode() == ISD::SETCC && \"An ISD::SETCC node required here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3811, __extension__ __PRETTY_FUNCTION__)); | ||||
3812 | |||||
3813 | // For values that have a single use, the caller should obviously already have | ||||
3814 | // checked if that use is an extending use. We check the other uses here. | ||||
3815 | if (Compare.hasOneUse()) | ||||
3816 | return true; | ||||
3817 | // We want the value in a GPR if it is being extended, used for a select, or | ||||
3818 | // used in logical operations. | ||||
3819 | for (auto CompareUse : Compare.getNode()->uses()) | ||||
3820 | if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && | ||||
3821 | CompareUse->getOpcode() != ISD::ZERO_EXTEND && | ||||
3822 | CompareUse->getOpcode() != ISD::SELECT && | ||||
3823 | !isLogicOp(CompareUse->getOpcode())) { | ||||
3824 | OmittedForNonExtendUses++; | ||||
3825 | return false; | ||||
3826 | } | ||||
3827 | return true; | ||||
3828 | } | ||||
3829 | |||||
3830 | /// Returns an equivalent of a SETCC node but with the result the same width as | ||||
3831 | /// the inputs. This can also be used for SELECT_CC if either the true or false | ||||
3832 | /// values is a power of two while the other is zero. | ||||
3833 | SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, | ||||
3834 | SetccInGPROpts ConvOpts) { | ||||
3835 | assert((Compare.getOpcode() == ISD::SETCC ||(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here." ) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3837, __extension__ __PRETTY_FUNCTION__)) | ||||
3836 | Compare.getOpcode() == ISD::SELECT_CC) &&(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here." ) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3837, __extension__ __PRETTY_FUNCTION__)) | ||||
3837 | "An ISD::SETCC node required here.")(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here." ) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3837, __extension__ __PRETTY_FUNCTION__)); | ||||
3838 | |||||
3839 | // Don't convert this comparison to a GPR sequence because there are uses | ||||
3840 | // of the i1 result (i.e. uses that require the result in the CR). | ||||
3841 | if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) | ||||
3842 | return SDValue(); | ||||
3843 | |||||
3844 | SDValue LHS = Compare.getOperand(0); | ||||
3845 | SDValue RHS = Compare.getOperand(1); | ||||
3846 | |||||
3847 | // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. | ||||
3848 | int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; | ||||
3849 | ISD::CondCode CC = | ||||
3850 | cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); | ||||
3851 | EVT InputVT = LHS.getValueType(); | ||||
3852 | if (InputVT != MVT::i32 && InputVT != MVT::i64) | ||||
3853 | return SDValue(); | ||||
3854 | |||||
3855 | if (ConvOpts == SetccInGPROpts::ZExtInvert || | ||||
3856 | ConvOpts == SetccInGPROpts::SExtInvert) | ||||
3857 | CC = ISD::getSetCCInverse(CC, InputVT); | ||||
3858 | |||||
3859 | bool Inputs32Bit = InputVT == MVT::i32; | ||||
3860 | |||||
3861 | SDLoc dl(Compare); | ||||
3862 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3863 | int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX(9223372036854775807L); | ||||
3864 | bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || | ||||
3865 | ConvOpts == SetccInGPROpts::SExtInvert; | ||||
3866 | |||||
3867 | if (IsSext && Inputs32Bit) | ||||
3868 | return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); | ||||
3869 | else if (Inputs32Bit) | ||||
3870 | return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); | ||||
3871 | else if (IsSext) | ||||
3872 | return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); | ||||
3873 | return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); | ||||
3874 | } | ||||
3875 | |||||
3876 | } // end anonymous namespace | ||||
3877 | |||||
3878 | bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { | ||||
3879 | if (N->getValueType(0) != MVT::i32 && | ||||
3880 | N->getValueType(0) != MVT::i64) | ||||
3881 | return false; | ||||
3882 | |||||
3883 | // This optimization will emit code that assumes 64-bit registers | ||||
3884 | // so we don't want to run it in 32-bit mode. Also don't run it | ||||
3885 | // on functions that are not to be optimized. | ||||
3886 | if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) | ||||
3887 | return false; | ||||
3888 | |||||
3889 | // For POWER10, it is more profitable to use the set boolean extension | ||||
3890 | // instructions rather than the integer compare elimination codegen. | ||||
3891 | // Users can override this via the command line option, `--ppc-gpr-icmps`. | ||||
3892 | if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1()) | ||||
3893 | return false; | ||||
3894 | |||||
3895 | switch (N->getOpcode()) { | ||||
3896 | default: break; | ||||
3897 | case ISD::ZERO_EXTEND: | ||||
3898 | case ISD::SIGN_EXTEND: | ||||
3899 | case ISD::AND: | ||||
3900 | case ISD::OR: | ||||
3901 | case ISD::XOR: { | ||||
3902 | IntegerCompareEliminator ICmpElim(CurDAG, this); | ||||
3903 | if (SDNode *New = ICmpElim.Select(N)) { | ||||
3904 | ReplaceNode(N, New); | ||||
3905 | return true; | ||||
3906 | } | ||||
3907 | } | ||||
3908 | } | ||||
3909 | return false; | ||||
3910 | } | ||||
3911 | |||||
3912 | bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { | ||||
3913 | if (N->getValueType(0) != MVT::i32 && | ||||
3914 | N->getValueType(0) != MVT::i64) | ||||
3915 | return false; | ||||
3916 | |||||
3917 | if (!UseBitPermRewriter) | ||||
3918 | return false; | ||||
3919 | |||||
3920 | switch (N->getOpcode()) { | ||||
3921 | default: break; | ||||
3922 | case ISD::ROTL: | ||||
3923 | case ISD::SHL: | ||||
3924 | case ISD::SRL: | ||||
3925 | case ISD::AND: | ||||
3926 | case ISD::OR: { | ||||
3927 | BitPermutationSelector BPS(CurDAG); | ||||
3928 | if (SDNode *New = BPS.Select(N)) { | ||||
3929 | ReplaceNode(N, New); | ||||
3930 | return true; | ||||
3931 | } | ||||
3932 | return false; | ||||
3933 | } | ||||
3934 | } | ||||
3935 | |||||
3936 | return false; | ||||
3937 | } | ||||
3938 | |||||
3939 | /// SelectCC - Select a comparison of the specified values with the specified | ||||
3940 | /// condition code, returning the CR# of the expression. | ||||
3941 | SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
3942 | const SDLoc &dl, SDValue Chain) { | ||||
3943 | // Always select the LHS. | ||||
3944 | unsigned Opc; | ||||
3945 | |||||
3946 | if (LHS.getValueType() == MVT::i32) { | ||||
3947 | unsigned Imm; | ||||
3948 | if (CC == ISD::SETEQ || CC == ISD::SETNE) { | ||||
3949 | if (isInt32Immediate(RHS, Imm)) { | ||||
3950 | // SETEQ/SETNE comparison with 16-bit immediate, fold it. | ||||
3951 | if (isUInt<16>(Imm)) | ||||
3952 | return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, | ||||
3953 | getI32Imm(Imm & 0xFFFF, dl)), | ||||
3954 | 0); | ||||
3955 | // If this is a 16-bit signed immediate, fold it. | ||||
3956 | if (isInt<16>((int)Imm)) | ||||
3957 | return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, | ||||
3958 | getI32Imm(Imm & 0xFFFF, dl)), | ||||
3959 | 0); | ||||
3960 | |||||
3961 | // For non-equality comparisons, the default code would materialize the | ||||
3962 | // constant, then compare against it, like this: | ||||
3963 | // lis r2, 4660 | ||||
3964 | // ori r2, r2, 22136 | ||||
3965 | // cmpw cr0, r3, r2 | ||||
3966 | // Since we are just comparing for equality, we can emit this instead: | ||||
3967 | // xoris r0,r3,0x1234 | ||||
3968 | // cmplwi cr0,r0,0x5678 | ||||
3969 | // beq cr0,L6 | ||||
3970 | SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, | ||||
3971 | getI32Imm(Imm >> 16, dl)), 0); | ||||
3972 | return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, | ||||
3973 | getI32Imm(Imm & 0xFFFF, dl)), 0); | ||||
3974 | } | ||||
3975 | Opc = PPC::CMPLW; | ||||
3976 | } else if (ISD::isUnsignedIntSetCC(CC)) { | ||||
3977 | if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) | ||||
3978 | return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, | ||||
3979 | getI32Imm(Imm & 0xFFFF, dl)), 0); | ||||
3980 | Opc = PPC::CMPLW; | ||||
3981 | } else { | ||||
3982 | int16_t SImm; | ||||
3983 | if (isIntS16Immediate(RHS, SImm)) | ||||
3984 | return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, | ||||
3985 | getI32Imm((int)SImm & 0xFFFF, | ||||
3986 | dl)), | ||||
3987 | 0); | ||||
3988 | Opc = PPC::CMPW; | ||||
3989 | } | ||||
3990 | } else if (LHS.getValueType() == MVT::i64) { | ||||
3991 | uint64_t Imm; | ||||
3992 | if (CC == ISD::SETEQ || CC == ISD::SETNE) { | ||||
3993 | if (isInt64Immediate(RHS.getNode(), Imm)) { | ||||
3994 | // SETEQ/SETNE comparison with 16-bit immediate, fold it. | ||||
3995 | if (isUInt<16>(Imm)) | ||||
3996 | return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, | ||||
3997 | getI32Imm(Imm & 0xFFFF, dl)), | ||||
3998 | 0); | ||||
3999 | // If this is a 16-bit signed immediate, fold it. | ||||
4000 | if (isInt<16>(Imm)) | ||||
4001 | return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, | ||||
4002 | getI32Imm(Imm & 0xFFFF, dl)), | ||||
4003 | 0); | ||||
4004 | |||||
4005 | // For non-equality comparisons, the default code would materialize the | ||||
4006 | // constant, then compare against it, like this: | ||||
4007 | // lis r2, 4660 | ||||
4008 | // ori r2, r2, 22136 | ||||
4009 | // cmpd cr0, r3, r2 | ||||
4010 | // Since we are just comparing for equality, we can emit this instead: | ||||
4011 | // xoris r0,r3,0x1234 | ||||
4012 | // cmpldi cr0,r0,0x5678 | ||||
4013 | // beq cr0,L6 | ||||
4014 | if (isUInt<32>(Imm)) { | ||||
4015 | SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, | ||||
4016 | getI64Imm(Imm >> 16, dl)), 0); | ||||
4017 | return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, | ||||
4018 | getI64Imm(Imm & 0xFFFF, dl)), | ||||
4019 | 0); | ||||
4020 | } | ||||
4021 | } | ||||
4022 | Opc = PPC::CMPLD; | ||||
4023 | } else if (ISD::isUnsignedIntSetCC(CC)) { | ||||
4024 | if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) | ||||
4025 | return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, | ||||
4026 | getI64Imm(Imm & 0xFFFF, dl)), 0); | ||||
4027 | Opc = PPC::CMPLD; | ||||
4028 | } else { | ||||
4029 | int16_t SImm; | ||||
4030 | if (isIntS16Immediate(RHS, SImm)) | ||||
4031 | return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, | ||||
4032 | getI64Imm(SImm & 0xFFFF, dl)), | ||||
4033 | 0); | ||||
4034 | Opc = PPC::CMPD; | ||||
4035 | } | ||||
4036 | } else if (LHS.getValueType() == MVT::f32) { | ||||
4037 | if (Subtarget->hasSPE()) { | ||||
4038 | switch (CC) { | ||||
4039 | default: | ||||
4040 | case ISD::SETEQ: | ||||
4041 | case ISD::SETNE: | ||||
4042 | Opc = PPC::EFSCMPEQ; | ||||
4043 | break; | ||||
4044 | case ISD::SETLT: | ||||
4045 | case ISD::SETGE: | ||||
4046 | case ISD::SETOLT: | ||||
4047 | case ISD::SETOGE: | ||||
4048 | case ISD::SETULT: | ||||
4049 | case ISD::SETUGE: | ||||
4050 | Opc = PPC::EFSCMPLT; | ||||
4051 | break; | ||||
4052 | case ISD::SETGT: | ||||
4053 | case ISD::SETLE: | ||||
4054 | case ISD::SETOGT: | ||||
4055 | case ISD::SETOLE: | ||||
4056 | case ISD::SETUGT: | ||||
4057 | case ISD::SETULE: | ||||
4058 | Opc = PPC::EFSCMPGT; | ||||
4059 | break; | ||||
4060 | } | ||||
4061 | } else | ||||
4062 | Opc = PPC::FCMPUS; | ||||
4063 | } else if (LHS.getValueType() == MVT::f64) { | ||||
4064 | if (Subtarget->hasSPE()) { | ||||
4065 | switch (CC) { | ||||
4066 | default: | ||||
4067 | case ISD::SETEQ: | ||||
4068 | case ISD::SETNE: | ||||
4069 | Opc = PPC::EFDCMPEQ; | ||||
4070 | break; | ||||
4071 | case ISD::SETLT: | ||||
4072 | case ISD::SETGE: | ||||
4073 | case ISD::SETOLT: | ||||
4074 | case ISD::SETOGE: | ||||
4075 | case ISD::SETULT: | ||||
4076 | case ISD::SETUGE: | ||||
4077 | Opc = PPC::EFDCMPLT; | ||||
4078 | break; | ||||
4079 | case ISD::SETGT: | ||||
4080 | case ISD::SETLE: | ||||
4081 | case ISD::SETOGT: | ||||
4082 | case ISD::SETOLE: | ||||
4083 | case ISD::SETUGT: | ||||
4084 | case ISD::SETULE: | ||||
4085 | Opc = PPC::EFDCMPGT; | ||||
4086 | break; | ||||
4087 | } | ||||
4088 | } else | ||||
4089 | Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; | ||||
4090 | } else { | ||||
4091 | assert(LHS.getValueType() == MVT::f128 && "Unknown vt!")(static_cast <bool> (LHS.getValueType() == MVT::f128 && "Unknown vt!") ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f128 && \"Unknown vt!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4091, __extension__ __PRETTY_FUNCTION__)); | ||||
4092 | assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector")(static_cast <bool> (Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector") ? void (0) : __assert_fail ("Subtarget->hasP9Vector() && \"XSCMPUQP requires Power9 Vector\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4092, __extension__ __PRETTY_FUNCTION__)); | ||||
4093 | Opc = PPC::XSCMPUQP; | ||||
4094 | } | ||||
4095 | if (Chain) | ||||
4096 | return SDValue( | ||||
4097 | CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain), | ||||
4098 | 0); | ||||
4099 | else | ||||
4100 | return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); | ||||
4101 | } | ||||
4102 | |||||
4103 | static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, | ||||
4104 | const PPCSubtarget *Subtarget) { | ||||
4105 | // For SPE instructions, the result is in GT bit of the CR | ||||
4106 | bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint(); | ||||
4107 | |||||
4108 | switch (CC) { | ||||
4109 | case ISD::SETUEQ: | ||||
4110 | case ISD::SETONE: | ||||
4111 | case ISD::SETOLE: | ||||
4112 | case ISD::SETOGE: | ||||
4113 | llvm_unreachable("Should be lowered by legalize!")::llvm::llvm_unreachable_internal("Should be lowered by legalize!" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4113); | ||||
4114 | default: llvm_unreachable("Unknown condition!")::llvm::llvm_unreachable_internal("Unknown condition!", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4114); | ||||
4115 | case ISD::SETOEQ: | ||||
4116 | case ISD::SETEQ: | ||||
4117 | return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ; | ||||
4118 | case ISD::SETUNE: | ||||
4119 | case ISD::SETNE: | ||||
4120 | return UseSPE ? PPC::PRED_LE : PPC::PRED_NE; | ||||
4121 | case ISD::SETOLT: | ||||
4122 | case ISD::SETLT: | ||||
4123 | return UseSPE ? PPC::PRED_GT : PPC::PRED_LT; | ||||
4124 | case ISD::SETULE: | ||||
4125 | case ISD::SETLE: | ||||
4126 | return PPC::PRED_LE; | ||||
4127 | case ISD::SETOGT: | ||||
4128 | case ISD::SETGT: | ||||
4129 | return PPC::PRED_GT; | ||||
4130 | case ISD::SETUGE: | ||||
4131 | case ISD::SETGE: | ||||
4132 | return UseSPE ? PPC::PRED_LE : PPC::PRED_GE; | ||||
4133 | case ISD::SETO: return PPC::PRED_NU; | ||||
4134 | case ISD::SETUO: return PPC::PRED_UN; | ||||
4135 | // These two are invalid for floating point. Assume we have int. | ||||
4136 | case ISD::SETULT: return PPC::PRED_LT; | ||||
4137 | case ISD::SETUGT: return PPC::PRED_GT; | ||||
4138 | } | ||||
4139 | } | ||||
4140 | |||||
4141 | /// getCRIdxForSetCC - Return the index of the condition register field | ||||
4142 | /// associated with the SetCC condition, and whether or not the field is | ||||
4143 | /// treated as inverted. That is, lt = 0; ge = 0 inverted. | ||||
4144 | static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { | ||||
4145 | Invert = false; | ||||
4146 | switch (CC) { | ||||
4147 | default: llvm_unreachable("Unknown condition!")::llvm::llvm_unreachable_internal("Unknown condition!", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4147); | ||||
4148 | case ISD::SETOLT: | ||||
4149 | case ISD::SETLT: return 0; // Bit #0 = SETOLT | ||||
4150 | case ISD::SETOGT: | ||||
4151 | case ISD::SETGT: return 1; // Bit #1 = SETOGT | ||||
4152 | case ISD::SETOEQ: | ||||
4153 | case ISD::SETEQ: return 2; // Bit #2 = SETOEQ | ||||
4154 | case ISD::SETUO: return 3; // Bit #3 = SETUO | ||||
4155 | case ISD::SETUGE: | ||||
4156 | case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE | ||||
4157 | case ISD::SETULE: | ||||
4158 | case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE | ||||
4159 | case ISD::SETUNE: | ||||
4160 | case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE | ||||
4161 | case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO | ||||
4162 | case ISD::SETUEQ: | ||||
4163 | case ISD::SETOGE: | ||||
4164 | case ISD::SETOLE: | ||||
4165 | case ISD::SETONE: | ||||
4166 | llvm_unreachable("Invalid branch code: should be expanded by legalize")::llvm::llvm_unreachable_internal("Invalid branch code: should be expanded by legalize" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4166); | ||||
4167 | // These are invalid for floating point. Assume integer. | ||||
4168 | case ISD::SETULT: return 0; | ||||
4169 | case ISD::SETUGT: return 1; | ||||
4170 | } | ||||
4171 | } | ||||
4172 | |||||
4173 | // getVCmpInst: return the vector compare instruction for the specified | ||||
4174 | // vector type and condition code. Since this is for altivec specific code, | ||||
4175 | // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128, | ||||
4176 | // and v4f32). | ||||
4177 | static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, | ||||
4178 | bool HasVSX, bool &Swap, bool &Negate) { | ||||
4179 | Swap = false; | ||||
4180 | Negate = false; | ||||
4181 | |||||
4182 | if (VecVT.isFloatingPoint()) { | ||||
4183 | /* Handle some cases by swapping input operands. */ | ||||
4184 | switch (CC) { | ||||
4185 | case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; | ||||
4186 | case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; | ||||
4187 | case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; | ||||
4188 | case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; | ||||
4189 | case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; | ||||
4190 | case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; | ||||
4191 | default: break; | ||||
4192 | } | ||||
4193 | /* Handle some cases by negating the result. */ | ||||
4194 | switch (CC) { | ||||
4195 | case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; | ||||
4196 | case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; | ||||
4197 | case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; | ||||
4198 | case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; | ||||
4199 | default: break; | ||||
4200 | } | ||||
4201 | /* We have instructions implementing the remaining cases. */ | ||||
4202 | switch (CC) { | ||||
4203 | case ISD::SETEQ: | ||||
4204 | case ISD::SETOEQ: | ||||
4205 | if (VecVT == MVT::v4f32) | ||||
4206 | return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; | ||||
4207 | else if (VecVT == MVT::v2f64) | ||||
4208 | return PPC::XVCMPEQDP; | ||||
4209 | break; | ||||
4210 | case ISD::SETGT: | ||||
4211 | case ISD::SETOGT: | ||||
4212 | if (VecVT == MVT::v4f32) | ||||
4213 | return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; | ||||
4214 | else if (VecVT == MVT::v2f64) | ||||
4215 | return PPC::XVCMPGTDP; | ||||
4216 | break; | ||||
4217 | case ISD::SETGE: | ||||
4218 | case ISD::SETOGE: | ||||
4219 | if (VecVT == MVT::v4f32) | ||||
4220 | return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; | ||||
4221 | else if (VecVT == MVT::v2f64) | ||||
4222 | return PPC::XVCMPGEDP; | ||||
4223 | break; | ||||
4224 | default: | ||||
4225 | break; | ||||
4226 | } | ||||
4227 | llvm_unreachable("Invalid floating-point vector compare condition")::llvm::llvm_unreachable_internal("Invalid floating-point vector compare condition" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4227); | ||||
4228 | } else { | ||||
4229 | /* Handle some cases by swapping input operands. */ | ||||
4230 | switch (CC) { | ||||
4231 | case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; | ||||
4232 | case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; | ||||
4233 | case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; | ||||
4234 | case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; | ||||
4235 | default: break; | ||||
4236 | } | ||||
4237 | /* Handle some cases by negating the result. */ | ||||
4238 | switch (CC) { | ||||
4239 | case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; | ||||
4240 | case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; | ||||
4241 | case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; | ||||
4242 | case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; | ||||
4243 | default: break; | ||||
4244 | } | ||||
4245 | /* We have instructions implementing the remaining cases. */ | ||||
4246 | switch (CC) { | ||||
4247 | case ISD::SETEQ: | ||||
4248 | case ISD::SETUEQ: | ||||
4249 | if (VecVT == MVT::v16i8) | ||||
4250 | return PPC::VCMPEQUB; | ||||
4251 | else if (VecVT == MVT::v8i16) | ||||
4252 | return PPC::VCMPEQUH; | ||||
4253 | else if (VecVT == MVT::v4i32) | ||||
4254 | return PPC::VCMPEQUW; | ||||
4255 | else if (VecVT == MVT::v2i64) | ||||
4256 | return PPC::VCMPEQUD; | ||||
4257 | else if (VecVT == MVT::v1i128) | ||||
4258 | return PPC::VCMPEQUQ; | ||||
4259 | break; | ||||
4260 | case ISD::SETGT: | ||||
4261 | if (VecVT == MVT::v16i8) | ||||
4262 | return PPC::VCMPGTSB; | ||||
4263 | else if (VecVT == MVT::v8i16) | ||||
4264 | return PPC::VCMPGTSH; | ||||
4265 | else if (VecVT == MVT::v4i32) | ||||
4266 | return PPC::VCMPGTSW; | ||||
4267 | else if (VecVT == MVT::v2i64) | ||||
4268 | return PPC::VCMPGTSD; | ||||
4269 | else if (VecVT == MVT::v1i128) | ||||
4270 | return PPC::VCMPGTSQ; | ||||
4271 | break; | ||||
4272 | case ISD::SETUGT: | ||||
4273 | if (VecVT == MVT::v16i8) | ||||
4274 | return PPC::VCMPGTUB; | ||||
4275 | else if (VecVT == MVT::v8i16) | ||||
4276 | return PPC::VCMPGTUH; | ||||
4277 | else if (VecVT == MVT::v4i32) | ||||
4278 | return PPC::VCMPGTUW; | ||||
4279 | else if (VecVT == MVT::v2i64) | ||||
4280 | return PPC::VCMPGTUD; | ||||
4281 | else if (VecVT == MVT::v1i128) | ||||
4282 | return PPC::VCMPGTUQ; | ||||
4283 | break; | ||||
4284 | default: | ||||
4285 | break; | ||||
4286 | } | ||||
4287 | llvm_unreachable("Invalid integer vector compare condition")::llvm::llvm_unreachable_internal("Invalid integer vector compare condition" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4287); | ||||
4288 | } | ||||
4289 | } | ||||
4290 | |||||
4291 | bool PPCDAGToDAGISel::trySETCC(SDNode *N) { | ||||
4292 | SDLoc dl(N); | ||||
4293 | unsigned Imm; | ||||
4294 | bool IsStrict = N->isStrictFPOpcode(); | ||||
4295 | ISD::CondCode CC = | ||||
4296 | cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get(); | ||||
4297 | EVT PtrVT = | ||||
4298 | CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); | ||||
4299 | bool isPPC64 = (PtrVT == MVT::i64); | ||||
4300 | SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); | ||||
4301 | |||||
4302 | SDValue LHS = N->getOperand(IsStrict ? 1 : 0); | ||||
4303 | SDValue RHS = N->getOperand(IsStrict ? 2 : 1); | ||||
4304 | |||||
4305 | if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) { | ||||
4306 | // We can codegen setcc op, imm very efficiently compared to a brcond. | ||||
4307 | // Check for those cases here. | ||||
4308 | // setcc op, 0 | ||||
4309 | if (Imm == 0) { | ||||
4310 | SDValue Op = LHS; | ||||
4311 | switch (CC) { | ||||
4312 | default: break; | ||||
4313 | case ISD::SETEQ: { | ||||
4314 | Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); | ||||
4315 | SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), | ||||
4316 | getI32Imm(31, dl) }; | ||||
4317 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4318 | return true; | ||||
4319 | } | ||||
4320 | case ISD::SETNE: { | ||||
4321 | if (isPPC64) break; | ||||
4322 | SDValue AD = | ||||
4323 | SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, | ||||
4324 | Op, getI32Imm(~0U, dl)), 0); | ||||
4325 | CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); | ||||
4326 | return true; | ||||
4327 | } | ||||
4328 | case ISD::SETLT: { | ||||
4329 | SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), | ||||
4330 | getI32Imm(31, dl) }; | ||||
4331 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4332 | return true; | ||||
4333 | } | ||||
4334 | case ISD::SETGT: { | ||||
4335 | SDValue T = | ||||
4336 | SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); | ||||
4337 | T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); | ||||
4338 | SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), | ||||
4339 | getI32Imm(31, dl) }; | ||||
4340 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4341 | return true; | ||||
4342 | } | ||||
4343 | } | ||||
4344 | } else if (Imm == ~0U) { // setcc op, -1 | ||||
4345 | SDValue Op = LHS; | ||||
4346 | switch (CC) { | ||||
4347 | default: break; | ||||
4348 | case ISD::SETEQ: | ||||
4349 | if (isPPC64) break; | ||||
4350 | Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, | ||||
4351 | Op, getI32Imm(1, dl)), 0); | ||||
4352 | CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, | ||||
4353 | SDValue(CurDAG->getMachineNode(PPC::LI, dl, | ||||
4354 | MVT::i32, | ||||
4355 | getI32Imm(0, dl)), | ||||
4356 | 0), Op.getValue(1)); | ||||
4357 | return true; | ||||
4358 | case ISD::SETNE: { | ||||
4359 | if (isPPC64) break; | ||||
4360 | Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); | ||||
4361 | SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, | ||||
4362 | Op, getI32Imm(~0U, dl)); | ||||
4363 | CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, | ||||
4364 | SDValue(AD, 1)); | ||||
4365 | return true; | ||||
4366 | } | ||||
4367 | case ISD::SETLT: { | ||||
4368 | SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, | ||||
4369 | getI32Imm(1, dl)), 0); | ||||
4370 | SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, | ||||
4371 | Op), 0); | ||||
4372 | SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), | ||||
4373 | getI32Imm(31, dl) }; | ||||
4374 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4375 | return true; | ||||
4376 | } | ||||
4377 | case ISD::SETGT: { | ||||
4378 | SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), | ||||
4379 | getI32Imm(31, dl) }; | ||||
4380 | Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); | ||||
4381 | CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); | ||||
4382 | return true; | ||||
4383 | } | ||||
4384 | } | ||||
4385 | } | ||||
4386 | } | ||||
4387 | |||||
4388 | // Altivec Vector compare instructions do not set any CR register by default and | ||||
4389 | // vector compare operations return the same type as the operands. | ||||
4390 | if (!IsStrict && LHS.getValueType().isVector()) { | ||||
4391 | if (Subtarget->hasSPE()) | ||||
4392 | return false; | ||||
4393 | |||||
4394 | EVT VecVT = LHS.getValueType(); | ||||
4395 | bool Swap, Negate; | ||||
4396 | unsigned int VCmpInst = | ||||
4397 | getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate); | ||||
4398 | if (Swap) | ||||
4399 | std::swap(LHS, RHS); | ||||
4400 | |||||
4401 | EVT ResVT = VecVT.changeVectorElementTypeToInteger(); | ||||
4402 | if (Negate) { | ||||
4403 | SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); | ||||
4404 | CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, | ||||
4405 | ResVT, VCmp, VCmp); | ||||
4406 | return true; | ||||
4407 | } | ||||
4408 | |||||
4409 | CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); | ||||
4410 | return true; | ||||
4411 | } | ||||
4412 | |||||
4413 | if (Subtarget->useCRBits()) | ||||
4414 | return false; | ||||
4415 | |||||
4416 | bool Inv; | ||||
4417 | unsigned Idx = getCRIdxForSetCC(CC, Inv); | ||||
4418 | SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain); | ||||
4419 | if (IsStrict) | ||||
4420 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1)); | ||||
4421 | SDValue IntCR; | ||||
4422 | |||||
4423 | // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that | ||||
4424 | // The correct compare instruction is already set by SelectCC() | ||||
4425 | if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { | ||||
4426 | Idx = 1; | ||||
4427 | } | ||||
4428 | |||||
4429 | // Force the ccreg into CR7. | ||||
4430 | SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); | ||||
4431 | |||||
4432 | SDValue InFlag(nullptr, 0); // Null incoming flag value. | ||||
4433 | CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, | ||||
4434 | InFlag).getValue(1); | ||||
4435 | |||||
4436 | IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, | ||||
4437 | CCReg), 0); | ||||
4438 | |||||
4439 | SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), | ||||
4440 | getI32Imm(31, dl), getI32Imm(31, dl) }; | ||||
4441 | if (!Inv) { | ||||
4442 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4443 | return true; | ||||
4444 | } | ||||
4445 | |||||
4446 | // Get the specified bit. | ||||
4447 | SDValue Tmp = | ||||
4448 | SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); | ||||
4449 | CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); | ||||
4450 | return true; | ||||
4451 | } | ||||
4452 | |||||
4453 | /// Does this node represent a load/store node whose address can be represented | ||||
4454 | /// with a register plus an immediate that's a multiple of \p Val: | ||||
4455 | bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { | ||||
4456 | LoadSDNode *LDN = dyn_cast<LoadSDNode>(N); | ||||
4457 | StoreSDNode *STN = dyn_cast<StoreSDNode>(N); | ||||
4458 | SDValue AddrOp; | ||||
4459 | if (LDN) | ||||
4460 | AddrOp = LDN->getOperand(1); | ||||
4461 | else if (STN) | ||||
4462 | AddrOp = STN->getOperand(2); | ||||
4463 | |||||
4464 | // If the address points a frame object or a frame object with an offset, | ||||
4465 | // we need to check the object alignment. | ||||
4466 | short Imm = 0; | ||||
4467 | if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>( | ||||
4468 | AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) : | ||||
4469 | AddrOp)) { | ||||
4470 | // If op0 is a frame index that is under aligned, we can't do it either, | ||||
4471 | // because it is translated to r31 or r1 + slot + offset. We won't know the | ||||
4472 | // slot number until the stack frame is finalized. | ||||
4473 | const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); | ||||
4474 | unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value(); | ||||
4475 | if ((SlotAlign % Val) != 0) | ||||
4476 | return false; | ||||
4477 | |||||
4478 | // If we have an offset, we need further check on the offset. | ||||
4479 | if (AddrOp.getOpcode() != ISD::ADD) | ||||
4480 | return true; | ||||
4481 | } | ||||
4482 | |||||
4483 | if (AddrOp.getOpcode() == ISD::ADD) | ||||
4484 | return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); | ||||
4485 | |||||
4486 | // If the address comes from the outside, the offset will be zero. | ||||
4487 | return AddrOp.getOpcode() == ISD::CopyFromReg; | ||||
4488 | } | ||||
4489 | |||||
4490 | void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { | ||||
4491 | // Transfer memoperands. | ||||
4492 | MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); | ||||
4493 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); | ||||
4494 | } | ||||
4495 | |||||
4496 | static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, | ||||
4497 | bool &NeedSwapOps, bool &IsUnCmp) { | ||||
4498 | |||||
4499 | assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.")(static_cast <bool> (N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.") ? void (0) : __assert_fail ("N->getOpcode() == ISD::SELECT_CC && \"Expecting a SELECT_CC here.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4499, __extension__ __PRETTY_FUNCTION__)); | ||||
4500 | |||||
4501 | SDValue LHS = N->getOperand(0); | ||||
4502 | SDValue RHS = N->getOperand(1); | ||||
4503 | SDValue TrueRes = N->getOperand(2); | ||||
4504 | SDValue FalseRes = N->getOperand(3); | ||||
4505 | ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); | ||||
4506 | if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 && | ||||
4507 | N->getSimpleValueType(0) != MVT::i32)) | ||||
4508 | return false; | ||||
4509 | |||||
4510 | // We are looking for any of: | ||||
4511 | // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) | ||||
4512 | // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) | ||||
4513 | // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) | ||||
4514 | // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) | ||||
4515 | int64_t TrueResVal = TrueConst->getSExtValue(); | ||||
4516 | if ((TrueResVal < -1 || TrueResVal > 1) || | ||||
4517 | (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || | ||||
4518 | (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || | ||||
4519 | (TrueResVal == 0 && | ||||
4520 | (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) | ||||
4521 | return false; | ||||
4522 | |||||
4523 | SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC | ||||
4524 | ? FalseRes | ||||
4525 | : FalseRes.getOperand(0); | ||||
4526 | bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC; | ||||
4527 | if (SetOrSelCC.getOpcode() != ISD::SETCC && | ||||
4528 | SetOrSelCC.getOpcode() != ISD::SELECT_CC) | ||||
4529 | return false; | ||||
4530 | |||||
4531 | // Without this setb optimization, the outer SELECT_CC will be manually | ||||
4532 | // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass | ||||
4533 | // transforms pseudo instruction to isel instruction. When there are more than | ||||
4534 | // one use for result like zext/sext, with current optimization we only see | ||||
4535 | // isel is replaced by setb but can't see any significant gain. Since | ||||
4536 | // setb has longer latency than original isel, we should avoid this. Another | ||||
4537 | // point is that setb requires comparison always kept, it can break the | ||||
4538 | // opportunity to get the comparison away if we have in future. | ||||
4539 | if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) | ||||
4540 | return false; | ||||
4541 | |||||
4542 | SDValue InnerLHS = SetOrSelCC.getOperand(0); | ||||
4543 | SDValue InnerRHS = SetOrSelCC.getOperand(1); | ||||
4544 | ISD::CondCode InnerCC = | ||||
4545 | cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); | ||||
4546 | // If the inner comparison is a select_cc, make sure the true/false values are | ||||
4547 | // 1/-1 and canonicalize it if needed. | ||||
4548 | if (InnerIsSel) { | ||||
4549 | ConstantSDNode *SelCCTrueConst = | ||||
4550 | dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2)); | ||||
4551 | ConstantSDNode *SelCCFalseConst = | ||||
4552 | dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3)); | ||||
4553 | if (!SelCCTrueConst || !SelCCFalseConst) | ||||
4554 | return false; | ||||
4555 | int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); | ||||
4556 | int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); | ||||
4557 | // The values must be -1/1 (requiring a swap) or 1/-1. | ||||
4558 | if (SelCCTVal == -1 && SelCCFVal == 1) { | ||||
4559 | std::swap(InnerLHS, InnerRHS); | ||||
4560 | } else if (SelCCTVal != 1 || SelCCFVal != -1) | ||||
4561 | return false; | ||||
4562 | } | ||||
4563 | |||||
4564 | // Canonicalize unsigned case | ||||
4565 | if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { | ||||
4566 | IsUnCmp = true; | ||||
4567 | InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; | ||||
4568 | } | ||||
4569 | |||||
4570 | bool InnerSwapped = false; | ||||
4571 | if (LHS == InnerRHS && RHS == InnerLHS) | ||||
4572 | InnerSwapped = true; | ||||
4573 | else if (LHS != InnerLHS || RHS != InnerRHS) | ||||
4574 | return false; | ||||
4575 | |||||
4576 | switch (CC) { | ||||
4577 | // (select_cc lhs, rhs, 0, \ | ||||
4578 | // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) | ||||
4579 | case ISD::SETEQ: | ||||
4580 | if (!InnerIsSel) | ||||
4581 | return false; | ||||
4582 | if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) | ||||
4583 | return false; | ||||
4584 | NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; | ||||
4585 | break; | ||||
4586 | |||||
4587 | // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) | ||||
4588 | // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) | ||||
4589 | // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) | ||||
4590 | // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) | ||||
4591 | // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) | ||||
4592 | // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) | ||||
4593 | case ISD::SETULT: | ||||
4594 | if (!IsUnCmp && InnerCC != ISD::SETNE) | ||||
4595 | return false; | ||||
4596 | IsUnCmp = true; | ||||
4597 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
4598 | case ISD::SETLT: | ||||
4599 | if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || | ||||
4600 | (InnerCC == ISD::SETLT && InnerSwapped)) | ||||
4601 | NeedSwapOps = (TrueResVal == 1); | ||||
4602 | else | ||||
4603 | return false; | ||||
4604 | break; | ||||
4605 | |||||
4606 | // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) | ||||
4607 | // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) | ||||
4608 | // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) | ||||
4609 | // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) | ||||
4610 | // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) | ||||
4611 | // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) | ||||
4612 | case ISD::SETUGT: | ||||
4613 | if (!IsUnCmp && InnerCC != ISD::SETNE) | ||||
4614 | return false; | ||||
4615 | IsUnCmp = true; | ||||
4616 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
4617 | case ISD::SETGT: | ||||
4618 | if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || | ||||
4619 | (InnerCC == ISD::SETGT && InnerSwapped)) | ||||
4620 | NeedSwapOps = (TrueResVal == -1); | ||||
4621 | else | ||||
4622 | return false; | ||||
4623 | break; | ||||
4624 | |||||
4625 | default: | ||||
4626 | return false; | ||||
4627 | } | ||||
4628 | |||||
4629 | LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Found a node that can be lowered to a SETB: " ; } } while (false); | ||||
4630 | LLVM_DEBUG(N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { N->dump(); } } while (false); | ||||
4631 | |||||
4632 | return true; | ||||
4633 | } | ||||
4634 | |||||
4635 | // Return true if it's a software square-root/divide operand. | ||||
4636 | static bool isSWTestOp(SDValue N) { | ||||
4637 | if (N.getOpcode() == PPCISD::FTSQRT) | ||||
4638 | return true; | ||||
4639 | if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0))) | ||||
4640 | return false; | ||||
4641 | switch (N.getConstantOperandVal(0)) { | ||||
4642 | case Intrinsic::ppc_vsx_xvtdivdp: | ||||
4643 | case Intrinsic::ppc_vsx_xvtdivsp: | ||||
4644 | case Intrinsic::ppc_vsx_xvtsqrtdp: | ||||
4645 | case Intrinsic::ppc_vsx_xvtsqrtsp: | ||||
4646 | return true; | ||||
4647 | } | ||||
4648 | return false; | ||||
4649 | } | ||||
4650 | |||||
4651 | bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) { | ||||
4652 | assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.")(static_cast <bool> (N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.") ? void (0) : __assert_fail ("N->getOpcode() == ISD::BR_CC && \"ISD::BR_CC is expected.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4652, __extension__ __PRETTY_FUNCTION__)); | ||||
4653 | // We are looking for following patterns, where `truncate to i1` actually has | ||||
4654 | // the same semantic with `and 1`. | ||||
4655 | // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp) | ||||
4656 | // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp) | ||||
4657 | // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp) | ||||
4658 | // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp) | ||||
4659 | // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp) | ||||
4660 | // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp) | ||||
4661 | // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp) | ||||
4662 | // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp) | ||||
4663 | ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); | ||||
4664 | if (CC != ISD::SETEQ && CC != ISD::SETNE) | ||||
4665 | return false; | ||||
4666 | |||||
4667 | SDValue CmpRHS = N->getOperand(3); | ||||
4668 | if (!isa<ConstantSDNode>(CmpRHS) || | ||||
4669 | cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0) | ||||
4670 | return false; | ||||
4671 | |||||
4672 | SDValue CmpLHS = N->getOperand(2); | ||||
4673 | if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0))) | ||||
4674 | return false; | ||||
4675 | |||||
4676 | unsigned PCC = 0; | ||||
4677 | bool IsCCNE = CC == ISD::SETNE; | ||||
4678 | if (CmpLHS.getOpcode() == ISD::AND && | ||||
4679 | isa<ConstantSDNode>(CmpLHS.getOperand(1))) | ||||
4680 | switch (CmpLHS.getConstantOperandVal(1)) { | ||||
4681 | case 1: | ||||
4682 | PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; | ||||
4683 | break; | ||||
4684 | case 2: | ||||
4685 | PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE; | ||||
4686 | break; | ||||
4687 | case 4: | ||||
4688 | PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE; | ||||
4689 | break; | ||||
4690 | case 8: | ||||
4691 | PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE; | ||||
4692 | break; | ||||
4693 | default: | ||||
4694 | return false; | ||||
4695 | } | ||||
4696 | else if (CmpLHS.getOpcode() == ISD::TRUNCATE && | ||||
4697 | CmpLHS.getValueType() == MVT::i1) | ||||
4698 | PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; | ||||
4699 | |||||
4700 | if (PCC) { | ||||
4701 | SDLoc dl(N); | ||||
4702 | SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4), | ||||
4703 | N->getOperand(0)}; | ||||
4704 | CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); | ||||
4705 | return true; | ||||
4706 | } | ||||
4707 | return false; | ||||
4708 | } | ||||
4709 | |||||
4710 | bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { | ||||
4711 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4711, __extension__ __PRETTY_FUNCTION__)); | ||||
4712 | unsigned Imm; | ||||
4713 | if (!isInt32Immediate(N->getOperand(1), Imm)) | ||||
4714 | return false; | ||||
4715 | |||||
4716 | SDLoc dl(N); | ||||
4717 | SDValue Val = N->getOperand(0); | ||||
4718 | unsigned SH, MB, ME; | ||||
4719 | // If this is an and of a value rotated between 0 and 31 bits and then and'd | ||||
4720 | // with a mask, emit rlwinm | ||||
4721 | if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) { | ||||
4722 | Val = Val.getOperand(0); | ||||
4723 | SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl), | ||||
4724 | getI32Imm(ME, dl)}; | ||||
4725 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4726 | return true; | ||||
4727 | } | ||||
4728 | |||||
4729 | // If this is just a masked value where the input is not handled, and | ||||
4730 | // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm | ||||
4731 | if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) { | ||||
4732 | SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl), | ||||
4733 | getI32Imm(ME, dl)}; | ||||
4734 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4735 | return true; | ||||
4736 | } | ||||
4737 | |||||
4738 | // AND X, 0 -> 0, not "rlwinm 32". | ||||
4739 | if (Imm == 0) { | ||||
4740 | ReplaceUses(SDValue(N, 0), N->getOperand(1)); | ||||
4741 | return true; | ||||
4742 | } | ||||
4743 | |||||
4744 | return false; | ||||
4745 | } | ||||
4746 | |||||
4747 | bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) { | ||||
4748 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4748, __extension__ __PRETTY_FUNCTION__)); | ||||
4749 | uint64_t Imm64; | ||||
4750 | if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) | ||||
4751 | return false; | ||||
4752 | |||||
4753 | unsigned MB, ME; | ||||
4754 | if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) { | ||||
4755 | // MB ME | ||||
4756 | // +----------------------+ | ||||
4757 | // |xxxxxxxxxxx00011111000| | ||||
4758 | // +----------------------+ | ||||
4759 | // 0 32 64 | ||||
4760 | // We can only do it if the MB is larger than 32 and MB <= ME | ||||
4761 | // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even | ||||
4762 | // we didn't rotate it. | ||||
4763 | SDLoc dl(N); | ||||
4764 | SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl), | ||||
4765 | getI64Imm(ME - 32, dl)}; | ||||
4766 | CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); | ||||
4767 | return true; | ||||
4768 | } | ||||
4769 | |||||
4770 | return false; | ||||
4771 | } | ||||
4772 | |||||
4773 | bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { | ||||
4774 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4774, __extension__ __PRETTY_FUNCTION__)); | ||||
4775 | uint64_t Imm64; | ||||
4776 | if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) | ||||
4777 | return false; | ||||
4778 | |||||
4779 | // Do nothing if it is 16-bit imm as the pattern in the .td file handle | ||||
4780 | // it well with "andi.". | ||||
4781 | if (isUInt<16>(Imm64)) | ||||
4782 | return false; | ||||
4783 | |||||
4784 | SDLoc Loc(N); | ||||
4785 | SDValue Val = N->getOperand(0); | ||||
4786 | |||||
4787 | // Optimized with two rldicl's as follows: | ||||
4788 | // Add missing bits on left to the mask and check that the mask is a | ||||
4789 | // wrapped run of ones, i.e. | ||||
4790 | // Change pattern |0001111100000011111111| | ||||
4791 | // to |1111111100000011111111|. | ||||
4792 | unsigned NumOfLeadingZeros = countLeadingZeros(Imm64); | ||||
4793 | if (NumOfLeadingZeros != 0) | ||||
4794 | Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros); | ||||
4795 | |||||
4796 | unsigned MB, ME; | ||||
4797 | if (!isRunOfOnes64(Imm64, MB, ME)) | ||||
4798 | return false; | ||||
4799 | |||||
4800 | // ME MB MB-ME+63 | ||||
4801 | // +----------------------+ +----------------------+ | ||||
4802 | // |1111111100000011111111| -> |0000001111111111111111| | ||||
4803 | // +----------------------+ +----------------------+ | ||||
4804 | // 0 63 0 63 | ||||
4805 | // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between. | ||||
4806 | unsigned OnesOnLeft = ME + 1; | ||||
4807 | unsigned ZerosInBetween = (MB - ME + 63) & 63; | ||||
4808 | // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear | ||||
4809 | // on the left the bits that are already zeros in the mask. | ||||
4810 | Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val, | ||||
4811 | getI64Imm(OnesOnLeft, Loc), | ||||
4812 | getI64Imm(ZerosInBetween, Loc)), | ||||
4813 | 0); | ||||
4814 | // MB-ME+63 ME MB | ||||
4815 | // +----------------------+ +----------------------+ | ||||
4816 | // |0000001111111111111111| -> |0001111100000011111111| | ||||
4817 | // +----------------------+ +----------------------+ | ||||
4818 | // 0 63 0 63 | ||||
4819 | // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the | ||||
4820 | // left the number of ones we previously added. | ||||
4821 | SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc), | ||||
4822 | getI64Imm(NumOfLeadingZeros, Loc)}; | ||||
4823 | CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); | ||||
4824 | return true; | ||||
4825 | } | ||||
4826 | |||||
4827 | bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { | ||||
4828 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4828, __extension__ __PRETTY_FUNCTION__)); | ||||
4829 | unsigned Imm; | ||||
4830 | if (!isInt32Immediate(N->getOperand(1), Imm)) | ||||
4831 | return false; | ||||
4832 | |||||
4833 | SDValue Val = N->getOperand(0); | ||||
4834 | unsigned Imm2; | ||||
4835 | // ISD::OR doesn't get all the bitfield insertion fun. | ||||
4836 | // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a | ||||
4837 | // bitfield insert. | ||||
4838 | if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2)) | ||||
4839 | return false; | ||||
4840 | |||||
4841 | // The idea here is to check whether this is equivalent to: | ||||
4842 | // (c1 & m) | (x & ~m) | ||||
4843 | // where m is a run-of-ones mask. The logic here is that, for each bit in | ||||
4844 | // c1 and c2: | ||||
4845 | // - if both are 1, then the output will be 1. | ||||
4846 | // - if both are 0, then the output will be 0. | ||||
4847 | // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will | ||||
4848 | // come from x. | ||||
4849 | // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will | ||||
4850 | // be 0. | ||||
4851 | // If that last condition is never the case, then we can form m from the | ||||
4852 | // bits that are the same between c1 and c2. | ||||
4853 | unsigned MB, ME; | ||||
4854 | if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) { | ||||
4855 | SDLoc dl(N); | ||||
4856 | SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl), | ||||
4857 | getI32Imm(MB, dl), getI32Imm(ME, dl)}; | ||||
4858 | ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); | ||||
4859 | return true; | ||||
4860 | } | ||||
4861 | |||||
4862 | return false; | ||||
4863 | } | ||||
4864 | |||||
4865 | bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) { | ||||
4866 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4866, __extension__ __PRETTY_FUNCTION__)); | ||||
4867 | uint64_t Imm64; | ||||
4868 | if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64)) | ||||
4869 | return false; | ||||
4870 | |||||
4871 | // If this is a 64-bit zero-extension mask, emit rldicl. | ||||
4872 | unsigned MB = 64 - countTrailingOnes(Imm64); | ||||
4873 | unsigned SH = 0; | ||||
4874 | unsigned Imm; | ||||
4875 | SDValue Val = N->getOperand(0); | ||||
4876 | SDLoc dl(N); | ||||
4877 | |||||
4878 | if (Val.getOpcode() == ISD::ANY_EXTEND) { | ||||
4879 | auto Op0 = Val.getOperand(0); | ||||
4880 | if (Op0.getOpcode() == ISD::SRL && | ||||
4881 | isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { | ||||
4882 | |||||
4883 | auto ResultType = Val.getNode()->getValueType(0); | ||||
4884 | auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType); | ||||
4885 | SDValue IDVal(ImDef, 0); | ||||
4886 | |||||
4887 | Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType, | ||||
4888 | IDVal, Op0.getOperand(0), | ||||
4889 | getI32Imm(1, dl)), | ||||
4890 | 0); | ||||
4891 | SH = 64 - Imm; | ||||
4892 | } | ||||
4893 | } | ||||
4894 | |||||
4895 | // If the operand is a logical right shift, we can fold it into this | ||||
4896 | // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) | ||||
4897 | // for n <= mb. The right shift is really a left rotate followed by a | ||||
4898 | // mask, and this mask is a more-restrictive sub-mask of the mask implied | ||||
4899 | // by the shift. | ||||
4900 | if (Val.getOpcode() == ISD::SRL && | ||||
4901 | isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { | ||||
4902 | assert(Imm < 64 && "Illegal shift amount")(static_cast <bool> (Imm < 64 && "Illegal shift amount" ) ? void (0) : __assert_fail ("Imm < 64 && \"Illegal shift amount\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4902, __extension__ __PRETTY_FUNCTION__)); | ||||
4903 | Val = Val.getOperand(0); | ||||
4904 | SH = 64 - Imm; | ||||
4905 | } | ||||
4906 | |||||
4907 | SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)}; | ||||
4908 | CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); | ||||
4909 | return true; | ||||
4910 | } | ||||
4911 | |||||
4912 | bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) { | ||||
4913 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4913, __extension__ __PRETTY_FUNCTION__)); | ||||
4914 | uint64_t Imm64; | ||||
4915 | if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || | ||||
4916 | !isMask_64(~Imm64)) | ||||
4917 | return false; | ||||
4918 | |||||
4919 | // If this is a negated 64-bit zero-extension mask, | ||||
4920 | // i.e. the immediate is a sequence of ones from most significant side | ||||
4921 | // and all zero for reminder, we should use rldicr. | ||||
4922 | unsigned MB = 63 - countTrailingOnes(~Imm64); | ||||
4923 | unsigned SH = 0; | ||||
4924 | SDLoc dl(N); | ||||
4925 | SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)}; | ||||
4926 | CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); | ||||
4927 | return true; | ||||
4928 | } | ||||
4929 | |||||
4930 | bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) { | ||||
4931 | assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::OR && "ISD::OR SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"ISD::OR SDNode expected\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4931, __extension__ __PRETTY_FUNCTION__)); | ||||
4932 | uint64_t Imm64; | ||||
4933 | unsigned MB, ME; | ||||
4934 | SDValue N0 = N->getOperand(0); | ||||
4935 | |||||
4936 | // We won't get fewer instructions if the imm is 32-bit integer. | ||||
4937 | // rldimi requires the imm to have consecutive ones with both sides zero. | ||||
4938 | // Also, make sure the first Op has only one use, otherwise this may increase | ||||
4939 | // register pressure since rldimi is destructive. | ||||
4940 | if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || | ||||
4941 | isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse()) | ||||
4942 | return false; | ||||
4943 | |||||
4944 | unsigned SH = 63 - ME; | ||||
4945 | SDLoc Dl(N); | ||||
4946 | // Use select64Imm for making LI instr instead of directly putting Imm64 | ||||
4947 | SDValue Ops[] = { | ||||
4948 | N->getOperand(0), | ||||
4949 | SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0), | ||||
4950 | getI32Imm(SH, Dl), getI32Imm(MB, Dl)}; | ||||
4951 | CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops); | ||||
4952 | return true; | ||||
4953 | } | ||||
4954 | |||||
4955 | // Select - Convert the specified operand from a target-independent to a | ||||
4956 | // target-specific node if it hasn't already been changed. | ||||
4957 | void PPCDAGToDAGISel::Select(SDNode *N) { | ||||
4958 | SDLoc dl(N); | ||||
4959 | if (N->isMachineOpcode()) { | ||||
4960 | N->setNodeId(-1); | ||||
4961 | return; // Already selected. | ||||
4962 | } | ||||
4963 | |||||
4964 | // In case any misguided DAG-level optimizations form an ADD with a | ||||
4965 | // TargetConstant operand, crash here instead of miscompiling (by selecting | ||||
4966 | // an r+r add instead of some kind of r+i add). | ||||
4967 | if (N->getOpcode() == ISD::ADD && | ||||
4968 | N->getOperand(1).getOpcode() == ISD::TargetConstant) | ||||
4969 | llvm_unreachable("Invalid ADD with TargetConstant operand")::llvm::llvm_unreachable_internal("Invalid ADD with TargetConstant operand" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4969); | ||||
4970 | |||||
4971 | // Try matching complex bit permutations before doing anything else. | ||||
4972 | if (tryBitPermutation(N)) | ||||
4973 | return; | ||||
4974 | |||||
4975 | // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). | ||||
4976 | if (tryIntCompareInGPR(N)) | ||||
4977 | return; | ||||
4978 | |||||
4979 | switch (N->getOpcode()) { | ||||
4980 | default: break; | ||||
4981 | |||||
4982 | case ISD::Constant: | ||||
4983 | if (N->getValueType(0) == MVT::i64) { | ||||
4984 | ReplaceNode(N, selectI64Imm(CurDAG, N)); | ||||
4985 | return; | ||||
4986 | } | ||||
4987 | break; | ||||
4988 | |||||
4989 | case ISD::INTRINSIC_WO_CHAIN: { | ||||
4990 | if (!Subtarget->isISA3_1()) | ||||
4991 | break; | ||||
4992 | unsigned Opcode = 0; | ||||
4993 | switch (N->getConstantOperandVal(0)) { | ||||
4994 | default: | ||||
4995 | break; | ||||
4996 | case Intrinsic::ppc_altivec_vstribr_p: | ||||
4997 | Opcode = PPC::VSTRIBR_rec; | ||||
4998 | break; | ||||
4999 | case Intrinsic::ppc_altivec_vstribl_p: | ||||
5000 | Opcode = PPC::VSTRIBL_rec; | ||||
5001 | break; | ||||
5002 | case Intrinsic::ppc_altivec_vstrihr_p: | ||||
5003 | Opcode = PPC::VSTRIHR_rec; | ||||
5004 | break; | ||||
5005 | case Intrinsic::ppc_altivec_vstrihl_p: | ||||
5006 | Opcode = PPC::VSTRIHL_rec; | ||||
5007 | break; | ||||
5008 | } | ||||
5009 | if (!Opcode) | ||||
5010 | break; | ||||
5011 | |||||
5012 | // Generate the appropriate vector string isolate intrinsic to match. | ||||
5013 | EVT VTs[] = {MVT::v16i8, MVT::Glue}; | ||||
5014 | SDValue VecStrOp = | ||||
5015 | SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0); | ||||
5016 | // Vector string isolate instructions update the EQ bit of CR6. | ||||
5017 | // Generate a SETBC instruction to extract the bit and place it in a GPR. | ||||
5018 | SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32); | ||||
5019 | SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32); | ||||
5020 | SDValue CRBit = SDValue( | ||||
5021 | CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, | ||||
5022 | CR6Reg, SubRegIdx, VecStrOp.getValue(1)), | ||||
5023 | 0); | ||||
5024 | CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit); | ||||
5025 | return; | ||||
5026 | } | ||||
5027 | |||||
5028 | case ISD::SETCC: | ||||
5029 | case ISD::STRICT_FSETCC: | ||||
5030 | case ISD::STRICT_FSETCCS: | ||||
5031 | if (trySETCC(N)) | ||||
5032 | return; | ||||
5033 | break; | ||||
5034 | // These nodes will be transformed into GETtlsADDR32 node, which | ||||
5035 | // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT | ||||
5036 | case PPCISD::ADDI_TLSLD_L_ADDR: | ||||
5037 | case PPCISD::ADDI_TLSGD_L_ADDR: { | ||||
5038 | const Module *Mod = MF->getFunction().getParent(); | ||||
5039 | if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || | ||||
5040 | !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() || | ||||
5041 | Mod->getPICLevel() == PICLevel::SmallPIC) | ||||
5042 | break; | ||||
5043 | // Attach global base pointer on GETtlsADDR32 node in order to | ||||
5044 | // generate secure plt code for TLS symbols. | ||||
5045 | getGlobalBaseReg(); | ||||
5046 | } break; | ||||
5047 | case PPCISD::CALL: { | ||||
5048 | if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || | ||||
5049 | !TM.isPositionIndependent() || !Subtarget->isSecurePlt() || | ||||
5050 | !Subtarget->isTargetELF()) | ||||
5051 | break; | ||||
5052 | |||||
5053 | SDValue Op = N->getOperand(1); | ||||
5054 | |||||
5055 | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { | ||||
5056 | if (GA->getTargetFlags() == PPCII::MO_PLT) | ||||
5057 | getGlobalBaseReg(); | ||||
5058 | } | ||||
5059 | else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { | ||||
5060 | if (ES->getTargetFlags() == PPCII::MO_PLT) | ||||
5061 | getGlobalBaseReg(); | ||||
5062 | } | ||||
5063 | } | ||||
5064 | break; | ||||
5065 | |||||
5066 | case PPCISD::GlobalBaseReg: | ||||
5067 | ReplaceNode(N, getGlobalBaseReg()); | ||||
5068 | return; | ||||
5069 | |||||
5070 | case ISD::FrameIndex: | ||||
5071 | selectFrameIndex(N, N); | ||||
5072 | return; | ||||
5073 | |||||
5074 | case PPCISD::MFOCRF: { | ||||
5075 | SDValue InFlag = N->getOperand(1); | ||||
5076 | ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, | ||||
5077 | N->getOperand(0), InFlag)); | ||||
5078 | return; | ||||
5079 | } | ||||
5080 | |||||
5081 | case PPCISD::READ_TIME_BASE: | ||||
5082 | ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, | ||||
5083 | MVT::Other, N->getOperand(0))); | ||||
5084 | return; | ||||
5085 | |||||
5086 | case PPCISD::SRA_ADDZE: { | ||||
5087 | SDValue N0 = N->getOperand(0); | ||||
5088 | SDValue ShiftAmt = | ||||
5089 | CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))-> | ||||
5090 | getConstantIntValue(), dl, | ||||
5091 | N->getValueType(0)); | ||||
5092 | if (N->getValueType(0) == MVT::i64) { | ||||
5093 | SDNode *Op = | ||||
5094 | CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, | ||||
5095 | N0, ShiftAmt); | ||||
5096 | CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), | ||||
5097 | SDValue(Op, 1)); | ||||
5098 | return; | ||||
5099 | } else { | ||||
5100 | assert(N->getValueType(0) == MVT::i32 &&(static_cast <bool> (N->getValueType(0) == MVT::i32 && "Expecting i64 or i32 in PPCISD::SRA_ADDZE") ? void (0) : __assert_fail ("N->getValueType(0) == MVT::i32 && \"Expecting i64 or i32 in PPCISD::SRA_ADDZE\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5101, __extension__ __PRETTY_FUNCTION__)) | ||||
5101 | "Expecting i64 or i32 in PPCISD::SRA_ADDZE")(static_cast <bool> (N->getValueType(0) == MVT::i32 && "Expecting i64 or i32 in PPCISD::SRA_ADDZE") ? void (0) : __assert_fail ("N->getValueType(0) == MVT::i32 && \"Expecting i64 or i32 in PPCISD::SRA_ADDZE\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5101, __extension__ __PRETTY_FUNCTION__)); | ||||
5102 | SDNode *Op = | ||||
5103 | CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, | ||||
5104 | N0, ShiftAmt); | ||||
5105 | CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), | ||||
5106 | SDValue(Op, 1)); | ||||
5107 | return; | ||||
5108 | } | ||||
5109 | } | ||||
5110 | |||||
5111 | case ISD::STORE: { | ||||
5112 | // Change TLS initial-exec D-form stores to X-form stores. | ||||
5113 | StoreSDNode *ST = cast<StoreSDNode>(N); | ||||
5114 | if (EnableTLSOpt && Subtarget->isELFv2ABI() && | ||||
5115 | ST->getAddressingMode() != ISD::PRE_INC) | ||||
5116 | if (tryTLSXFormStore(ST)) | ||||
5117 | return; | ||||
5118 | break; | ||||
5119 | } | ||||
5120 | case ISD::LOAD: { | ||||
5121 | // Handle preincrement loads. | ||||
5122 | LoadSDNode *LD = cast<LoadSDNode>(N); | ||||
5123 | EVT LoadedVT = LD->getMemoryVT(); | ||||
5124 | |||||
5125 | // Normal loads are handled by code generated from the .td file. | ||||
5126 | if (LD->getAddressingMode() != ISD::PRE_INC) { | ||||
5127 | // Change TLS initial-exec D-form loads to X-form loads. | ||||
5128 | if (EnableTLSOpt && Subtarget->isELFv2ABI()) | ||||
5129 | if (tryTLSXFormLoad(LD)) | ||||
5130 | return; | ||||
5131 | break; | ||||
5132 | } | ||||
5133 | |||||
5134 | SDValue Offset = LD->getOffset(); | ||||
5135 | if (Offset.getOpcode() == ISD::TargetConstant || | ||||
5136 | Offset.getOpcode() == ISD::TargetGlobalAddress) { | ||||
5137 | |||||
5138 | unsigned Opcode; | ||||
5139 | bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; | ||||
5140 | if (LD->getValueType(0) != MVT::i64) { | ||||
5141 | // Handle PPC32 integer and normal FP loads. | ||||
5142 | assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5142, __extension__ __PRETTY_FUNCTION__)); | ||||
5143 | switch (LoadedVT.getSimpleVT().SimpleTy) { | ||||
5144 | default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5144); | ||||
5145 | case MVT::f64: Opcode = PPC::LFDU; break; | ||||
5146 | case MVT::f32: Opcode = PPC::LFSU; break; | ||||
5147 | case MVT::i32: Opcode = PPC::LWZU; break; | ||||
5148 | case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; | ||||
5149 | case MVT::i1: | ||||
5150 | case MVT::i8: Opcode = PPC::LBZU; break; | ||||
5151 | } | ||||
5152 | } else { | ||||
5153 | assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!")(static_cast <bool> (LD->getValueType(0) == MVT::i64 && "Unknown load result type!") ? void (0) : __assert_fail ("LD->getValueType(0) == MVT::i64 && \"Unknown load result type!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5153, __extension__ __PRETTY_FUNCTION__)); | ||||
5154 | assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5154, __extension__ __PRETTY_FUNCTION__)); | ||||
5155 | switch (LoadedVT.getSimpleVT().SimpleTy) { | ||||
5156 | default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5156); | ||||
5157 | case MVT::i64: Opcode = PPC::LDU; break; | ||||
5158 | case MVT::i32: Opcode = PPC::LWZU8; break; | ||||
5159 | case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; | ||||
5160 | case MVT::i1: | ||||
5161 | case MVT::i8: Opcode = PPC::LBZU8; break; | ||||
5162 | } | ||||
5163 | } | ||||
5164 | |||||
5165 | SDValue Chain = LD->getChain(); | ||||
5166 | SDValue Base = LD->getBasePtr(); | ||||
5167 | SDValue Ops[] = { Offset, Base, Chain }; | ||||
5168 | SDNode *MN = CurDAG->getMachineNode( | ||||
5169 | Opcode, dl, LD->getValueType(0), | ||||
5170 | PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); | ||||
5171 | transferMemOperands(N, MN); | ||||
5172 | ReplaceNode(N, MN); | ||||
5173 | return; | ||||
5174 | } else { | ||||
5175 | unsigned Opcode; | ||||
5176 | bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; | ||||
5177 | if (LD->getValueType(0) != MVT::i64) { | ||||
5178 | // Handle PPC32 integer and normal FP loads. | ||||
5179 | assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5179, __extension__ __PRETTY_FUNCTION__)); | ||||
5180 | switch (LoadedVT.getSimpleVT().SimpleTy) { | ||||
5181 | default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5181); | ||||
5182 | case MVT::f64: Opcode = PPC::LFDUX; break; | ||||
5183 | case MVT::f32: Opcode = PPC::LFSUX; break; | ||||
5184 | case MVT::i32: Opcode = PPC::LWZUX; break; | ||||
5185 | case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; | ||||
5186 | case MVT::i1: | ||||
5187 | case MVT::i8: Opcode = PPC::LBZUX; break; | ||||
5188 | } | ||||
5189 | } else { | ||||
5190 | assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!")(static_cast <bool> (LD->getValueType(0) == MVT::i64 && "Unknown load result type!") ? void (0) : __assert_fail ("LD->getValueType(0) == MVT::i64 && \"Unknown load result type!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5190, __extension__ __PRETTY_FUNCTION__)); | ||||
5191 | assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && "Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && \"Invalid sext update load\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5192, __extension__ __PRETTY_FUNCTION__)) | ||||
5192 | "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && "Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && \"Invalid sext update load\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5192, __extension__ __PRETTY_FUNCTION__)); | ||||
5193 | switch (LoadedVT.getSimpleVT().SimpleTy) { | ||||
5194 | default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5194); | ||||
5195 | case MVT::i64: Opcode = PPC::LDUX; break; | ||||
5196 | case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; | ||||
5197 | case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; | ||||
5198 | case MVT::i1: | ||||
5199 | case MVT::i8: Opcode = PPC::LBZUX8; break; | ||||
5200 | } | ||||
5201 | } | ||||
5202 | |||||
5203 | SDValue Chain = LD->getChain(); | ||||
5204 | SDValue Base = LD->getBasePtr(); | ||||
5205 | SDValue Ops[] = { Base, Offset, Chain }; | ||||
5206 | SDNode *MN = CurDAG->getMachineNode( | ||||
5207 | Opcode, dl, LD->getValueType(0), | ||||
5208 | PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); | ||||
5209 | transferMemOperands(N, MN); | ||||
5210 | ReplaceNode(N, MN); | ||||
5211 | return; | ||||
5212 | } | ||||
5213 | } | ||||
5214 | |||||
5215 | case ISD::AND: | ||||
5216 | // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr | ||||
5217 | if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || | ||||
5218 | tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) | ||||
5219 | return; | ||||
5220 | |||||
5221 | // Other cases are autogenerated. | ||||
5222 | break; | ||||
5223 | case ISD::OR: { | ||||
5224 | if (N->getValueType(0) == MVT::i32) | ||||
5225 | if (tryBitfieldInsert(N)) | ||||
5226 | return; | ||||
5227 | |||||
5228 | int16_t Imm; | ||||
5229 | if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && | ||||
5230 | isIntS16Immediate(N->getOperand(1), Imm)) { | ||||
5231 | KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0)); | ||||
5232 | |||||
5233 | // If this is equivalent to an add, then we can fold it with the | ||||
5234 | // FrameIndex calculation. | ||||
5235 | if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { | ||||
5236 | selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); | ||||
5237 | return; | ||||
5238 | } | ||||
5239 | } | ||||
5240 | |||||
5241 | // If this is 'or' against an imm with consecutive ones and both sides zero, | ||||
5242 | // try to emit rldimi | ||||
5243 | if (tryAsSingleRLDIMI(N)) | ||||
5244 | return; | ||||
5245 | |||||
5246 | // OR with a 32-bit immediate can be handled by ori + oris | ||||
5247 | // without creating an immediate in a GPR. | ||||
5248 | uint64_t Imm64 = 0; | ||||
5249 | bool IsPPC64 = Subtarget->isPPC64(); | ||||
5250 | if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && | ||||
5251 | (Imm64 & ~0xFFFFFFFFuLL) == 0) { | ||||
5252 | // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. | ||||
5253 | uint64_t ImmHi = Imm64 >> 16; | ||||
5254 | uint64_t ImmLo = Imm64 & 0xFFFF; | ||||
5255 | if (ImmHi != 0 && ImmLo != 0) { | ||||
5256 | SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, | ||||
5257 | N->getOperand(0), | ||||
5258 | getI16Imm(ImmLo, dl)); | ||||
5259 | SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; | ||||
5260 | CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); | ||||
5261 | return; | ||||
5262 | } | ||||
5263 | } | ||||
5264 | |||||
5265 | // Other cases are autogenerated. | ||||
5266 | break; | ||||
5267 | } | ||||
5268 | case ISD::XOR: { | ||||
5269 | // XOR with a 32-bit immediate can be handled by xori + xoris | ||||
5270 | // without creating an immediate in a GPR. | ||||
5271 | uint64_t Imm64 = 0; | ||||
5272 | bool IsPPC64 = Subtarget->isPPC64(); | ||||
5273 | if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && | ||||
5274 | (Imm64 & ~0xFFFFFFFFuLL) == 0) { | ||||
5275 | // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. | ||||
5276 | uint64_t ImmHi = Imm64 >> 16; | ||||
5277 | uint64_t ImmLo = Imm64 & 0xFFFF; | ||||
5278 | if (ImmHi != 0 && ImmLo != 0) { | ||||
5279 | SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, | ||||
5280 | N->getOperand(0), | ||||
5281 | getI16Imm(ImmLo, dl)); | ||||
5282 | SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; | ||||
5283 | CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); | ||||
5284 | return; | ||||
5285 | } | ||||
5286 | } | ||||
5287 | |||||
5288 | break; | ||||
5289 | } | ||||
5290 | case ISD::ADD: { | ||||
5291 | int16_t Imm; | ||||
5292 | if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && | ||||
5293 | isIntS16Immediate(N->getOperand(1), Imm)) { | ||||
5294 | selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); | ||||
5295 | return; | ||||
5296 | } | ||||
5297 | |||||
5298 | break; | ||||
5299 | } | ||||
5300 | case ISD::SHL: { | ||||
5301 | unsigned Imm, SH, MB, ME; | ||||
5302 | if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && | ||||
5303 | isRotateAndMask(N, Imm, true, SH, MB, ME)) { | ||||
5304 | SDValue Ops[] = { N->getOperand(0).getOperand(0), | ||||
5305 | getI32Imm(SH, dl), getI32Imm(MB, dl), | ||||
5306 | getI32Imm(ME, dl) }; | ||||
5307 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
5308 | return; | ||||
5309 | } | ||||
5310 | |||||
5311 | // Other cases are autogenerated. | ||||
5312 | break; | ||||
5313 | } | ||||
5314 | case ISD::SRL: { | ||||
5315 | unsigned Imm, SH, MB, ME; | ||||
5316 | if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && | ||||
5317 | isRotateAndMask(N, Imm, true, SH, MB, ME)) { | ||||
5318 | SDValue Ops[] = { N->getOperand(0).getOperand(0), | ||||
5319 | getI32Imm(SH, dl), getI32Imm(MB, dl), | ||||
5320 | getI32Imm(ME, dl) }; | ||||
5321 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
5322 | return; | ||||
5323 | } | ||||
5324 | |||||
5325 | // Other cases are autogenerated. | ||||
5326 | break; | ||||
5327 | } | ||||
5328 | case ISD::MUL: { | ||||
5329 | SDValue Op1 = N->getOperand(1); | ||||
5330 | if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64) | ||||
5331 | break; | ||||
5332 | |||||
5333 | // If the multiplier fits int16, we can handle it with mulli. | ||||
5334 | int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue(); | ||||
5335 | unsigned Shift = countTrailingZeros<uint64_t>(Imm); | ||||
5336 | if (isInt<16>(Imm) || !Shift) | ||||
5337 | break; | ||||
5338 | |||||
5339 | // If the shifted value fits int16, we can do this transformation: | ||||
5340 | // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to | ||||
5341 | // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2). | ||||
5342 | uint64_t ImmSh = Imm >> Shift; | ||||
5343 | if (isInt<16>(ImmSh)) { | ||||
5344 | uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); | ||||
5345 | SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); | ||||
5346 | SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64, | ||||
5347 | N->getOperand(0), SDImm); | ||||
5348 | CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0), | ||||
5349 | getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl)); | ||||
5350 | return; | ||||
5351 | } | ||||
5352 | break; | ||||
5353 | } | ||||
5354 | // FIXME: Remove this once the ANDI glue bug is fixed: | ||||
5355 | case PPCISD::ANDI_rec_1_EQ_BIT: | ||||
5356 | case PPCISD::ANDI_rec_1_GT_BIT: { | ||||
5357 | if (!ANDIGlueBug) | ||||
5358 | break; | ||||
5359 | |||||
5360 | EVT InVT = N->getOperand(0).getValueType(); | ||||
5361 | assert((InVT == MVT::i64 || InVT == MVT::i32) &&(static_cast <bool> ((InVT == MVT::i64 || InVT == MVT:: i32) && "Invalid input type for ANDI_rec_1_EQ_BIT") ? void (0) : __assert_fail ("(InVT == MVT::i64 || InVT == MVT::i32) && \"Invalid input type for ANDI_rec_1_EQ_BIT\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5362, __extension__ __PRETTY_FUNCTION__)) | ||||
5362 | "Invalid input type for ANDI_rec_1_EQ_BIT")(static_cast <bool> ((InVT == MVT::i64 || InVT == MVT:: i32) && "Invalid input type for ANDI_rec_1_EQ_BIT") ? void (0) : __assert_fail ("(InVT == MVT::i64 || InVT == MVT::i32) && \"Invalid input type for ANDI_rec_1_EQ_BIT\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5362, __extension__ __PRETTY_FUNCTION__)); | ||||
5363 | |||||
5364 | unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec; | ||||
5365 | SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, | ||||
5366 | N->getOperand(0), | ||||
5367 | CurDAG->getTargetConstant(1, dl, InVT)), | ||||
5368 | 0); | ||||
5369 | SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); | ||||
5370 | SDValue SRIdxVal = CurDAG->getTargetConstant( | ||||
5371 | N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt, | ||||
5372 | dl, MVT::i32); | ||||
5373 | |||||
5374 | CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, | ||||
5375 | SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); | ||||
5376 | return; | ||||
5377 | } | ||||
5378 | case ISD::SELECT_CC: { | ||||
5379 | ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); | ||||
5380 | EVT PtrVT = | ||||
5381 | CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); | ||||
5382 | bool isPPC64 = (PtrVT == MVT::i64); | ||||
5383 | |||||
5384 | // If this is a select of i1 operands, we'll pattern match it. | ||||
5385 | if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1) | ||||
5386 | break; | ||||
5387 | |||||
5388 | if (Subtarget->isISA3_0() && Subtarget->isPPC64()) { | ||||
5389 | bool NeedSwapOps = false; | ||||
5390 | bool IsUnCmp = false; | ||||
5391 | if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { | ||||
5392 | SDValue LHS = N->getOperand(0); | ||||
5393 | SDValue RHS = N->getOperand(1); | ||||
5394 | if (NeedSwapOps) | ||||
5395 | std::swap(LHS, RHS); | ||||
5396 | |||||
5397 | // Make use of SelectCC to generate the comparison to set CR bits, for | ||||
5398 | // equality comparisons having one literal operand, SelectCC probably | ||||
5399 | // doesn't need to materialize the whole literal and just use xoris to | ||||
5400 | // check it first, it leads the following comparison result can't | ||||
5401 | // exactly represent GT/LT relationship. So to avoid this we specify | ||||
5402 | // SETGT/SETUGT here instead of SETEQ. | ||||
5403 | SDValue GenCC = | ||||
5404 | SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); | ||||
5405 | CurDAG->SelectNodeTo( | ||||
5406 | N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, | ||||
5407 | N->getValueType(0), GenCC); | ||||
5408 | NumP9Setb++; | ||||
5409 | return; | ||||
5410 | } | ||||
5411 | } | ||||
5412 | |||||
5413 | // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc | ||||
5414 | if (!isPPC64) | ||||
5415 | if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) | ||||
5416 | if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) | ||||
5417 | if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) | ||||
5418 | if (N1C->isNullValue() && N3C->isNullValue() && | ||||
5419 | N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && | ||||
5420 | // FIXME: Implement this optzn for PPC64. | ||||
5421 | N->getValueType(0) == MVT::i32) { | ||||
5422 | SDNode *Tmp = | ||||
5423 | CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, | ||||
5424 | N->getOperand(0), getI32Imm(~0U, dl)); | ||||
5425 | CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), | ||||
5426 | N->getOperand(0), SDValue(Tmp, 1)); | ||||
5427 | return; | ||||
5428 | } | ||||
5429 | |||||
5430 | SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); | ||||
5431 | |||||
5432 | if (N->getValueType(0) == MVT::i1) { | ||||
5433 | // An i1 select is: (c & t) | (!c & f). | ||||
5434 | bool Inv; | ||||
5435 | unsigned Idx = getCRIdxForSetCC(CC, Inv); | ||||
5436 | |||||
5437 | unsigned SRI; | ||||
5438 | switch (Idx) { | ||||
5439 | default: llvm_unreachable("Invalid CC index")::llvm::llvm_unreachable_internal("Invalid CC index", "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5439); | ||||
5440 | case 0: SRI = PPC::sub_lt; break; | ||||
5441 | case 1: SRI = PPC::sub_gt; break; | ||||
5442 | case 2: SRI = PPC::sub_eq; break; | ||||
5443 | case 3: SRI = PPC::sub_un; break; | ||||
5444 | } | ||||
5445 | |||||
5446 | SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); | ||||
5447 | |||||
5448 | SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, | ||||
5449 | CCBit, CCBit), 0); | ||||
5450 | SDValue C = Inv ? NotCCBit : CCBit, | ||||
5451 | NotC = Inv ? CCBit : NotCCBit; | ||||
5452 | |||||
5453 | SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, | ||||
5454 | C, N->getOperand(2)), 0); | ||||
5455 | SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, | ||||
5456 | NotC, N->getOperand(3)), 0); | ||||
5457 | |||||
5458 | CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); | ||||
5459 | return; | ||||
5460 | } | ||||
5461 | |||||
5462 | unsigned BROpc = | ||||
5463 | getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget); | ||||
5464 | |||||
5465 | unsigned SelectCCOp; | ||||
5466 | if (N->getValueType(0) == MVT::i32) | ||||
5467 | SelectCCOp = PPC::SELECT_CC_I4; | ||||
5468 | else if (N->getValueType(0) == MVT::i64) | ||||
5469 | SelectCCOp = PPC::SELECT_CC_I8; | ||||
5470 | else if (N->getValueType(0) == MVT::f32) { | ||||
5471 | if (Subtarget->hasP8Vector()) | ||||
5472 | SelectCCOp = PPC::SELECT_CC_VSSRC; | ||||
5473 | else if (Subtarget->hasSPE()) | ||||
5474 | SelectCCOp = PPC::SELECT_CC_SPE4; | ||||
5475 | else | ||||
5476 | SelectCCOp = PPC::SELECT_CC_F4; | ||||
5477 | } else if (N->getValueType(0) == MVT::f64) { | ||||
5478 | if (Subtarget->hasVSX()) | ||||
5479 | SelectCCOp = PPC::SELECT_CC_VSFRC; | ||||
5480 | else if (Subtarget->hasSPE()) | ||||
5481 | SelectCCOp = PPC::SELECT_CC_SPE; | ||||
5482 | else | ||||
5483 | SelectCCOp = PPC::SELECT_CC_F8; | ||||
5484 | } else if (N->getValueType(0) == MVT::f128) | ||||
5485 | SelectCCOp = PPC::SELECT_CC_F16; | ||||
5486 | else if (Subtarget->hasSPE()) | ||||
5487 | SelectCCOp = PPC::SELECT_CC_SPE; | ||||
5488 | else if (N->getValueType(0) == MVT::v2f64 || | ||||
5489 | N->getValueType(0) == MVT::v2i64) | ||||
5490 | SelectCCOp = PPC::SELECT_CC_VSRC; | ||||
5491 | else | ||||
5492 | SelectCCOp = PPC::SELECT_CC_VRRC; | ||||
5493 | |||||
5494 | SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), | ||||
5495 | getI32Imm(BROpc, dl) }; | ||||
5496 | CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); | ||||
5497 | return; | ||||
5498 | } | ||||
5499 | case ISD::VECTOR_SHUFFLE: | ||||
5500 | if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || | ||||
5501 | N->getValueType(0) == MVT::v2i64)) { | ||||
5502 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); | ||||
5503 | |||||
5504 | SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), | ||||
5505 | Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); | ||||
5506 | unsigned DM[2]; | ||||
5507 | |||||
5508 | for (int i = 0; i < 2; ++i) | ||||
5509 | if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) | ||||
5510 | DM[i] = 0; | ||||
5511 | else | ||||
5512 | DM[i] = 1; | ||||
5513 | |||||
5514 | if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && | ||||
5515 | Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && | ||||
5516 | isa<LoadSDNode>(Op1.getOperand(0))) { | ||||
5517 | LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0)); | ||||
5518 | SDValue Base, Offset; | ||||
5519 | |||||
5520 | if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && | ||||
5521 | (LD->getMemoryVT() == MVT::f64 || | ||||
5522 | LD->getMemoryVT() == MVT::i64) && | ||||
5523 | SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { | ||||
5524 | SDValue Chain = LD->getChain(); | ||||
5525 | SDValue Ops[] = { Base, Offset, Chain }; | ||||
5526 | MachineMemOperand *MemOp = LD->getMemOperand(); | ||||
5527 | SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, | ||||
5528 | N->getValueType(0), Ops); | ||||
5529 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp}); | ||||
5530 | return; | ||||
5531 | } | ||||
5532 | } | ||||
5533 | |||||
5534 | // For little endian, we must swap the input operands and adjust | ||||
5535 | // the mask elements (reverse and invert them). | ||||
5536 | if (Subtarget->isLittleEndian()) { | ||||
5537 | std::swap(Op1, Op2); | ||||
5538 | unsigned tmp = DM[0]; | ||||
5539 | DM[0] = 1 - DM[1]; | ||||
5540 | DM[1] = 1 - tmp; | ||||
5541 | } | ||||
5542 | |||||
5543 | SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, | ||||
5544 | MVT::i32); | ||||
5545 | SDValue Ops[] = { Op1, Op2, DMV }; | ||||
5546 | CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); | ||||
5547 | return; | ||||
5548 | } | ||||
5549 | |||||
5550 | break; | ||||
5551 | case PPCISD::BDNZ: | ||||
5552 | case PPCISD::BDZ: { | ||||
5553 | bool IsPPC64 = Subtarget->isPPC64(); | ||||
5554 | SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; | ||||
5555 | CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ | ||||
5556 | ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) | ||||
5557 | : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), | ||||
5558 | MVT::Other, Ops); | ||||
5559 | return; | ||||
5560 | } | ||||
5561 | case PPCISD::COND_BRANCH: { | ||||
5562 | // Op #0 is the Chain. | ||||
5563 | // Op #1 is the PPC::PRED_* number. | ||||
5564 | // Op #2 is the CR# | ||||
5565 | // Op #3 is the Dest MBB | ||||
5566 | // Op #4 is the Flag. | ||||
5567 | // Prevent PPC::PRED_* from being selected into LI. | ||||
5568 | unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); | ||||
5569 | if (EnableBranchHint) | ||||
5570 | PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3)); | ||||
5571 | |||||
5572 | SDValue Pred = getI32Imm(PCC, dl); | ||||
5573 | SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), | ||||
5574 | N->getOperand(0), N->getOperand(4) }; | ||||
5575 | CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); | ||||
5576 | return; | ||||
5577 | } | ||||
5578 | case ISD::BR_CC: { | ||||
5579 | if (tryFoldSWTestBRCC(N)) | ||||
5580 | return; | ||||
5581 | ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); | ||||
5582 | unsigned PCC = | ||||
5583 | getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget); | ||||
5584 | |||||
5585 | if (N->getOperand(2).getValueType() == MVT::i1) { | ||||
5586 | unsigned Opc; | ||||
5587 | bool Swap; | ||||
5588 | switch (PCC) { | ||||
5589 | default: llvm_unreachable("Unexpected Boolean-operand predicate")::llvm::llvm_unreachable_internal("Unexpected Boolean-operand predicate" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5589); | ||||
5590 | case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; | ||||
5591 | case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; | ||||
5592 | case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; | ||||
5593 | case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; | ||||
5594 | case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; | ||||
5595 | case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; | ||||
5596 | } | ||||
5597 | |||||
5598 | // A signed comparison of i1 values produces the opposite result to an | ||||
5599 | // unsigned one if the condition code includes less-than or greater-than. | ||||
5600 | // This is because 1 is the most negative signed i1 number and the most | ||||
5601 | // positive unsigned i1 number. The CR-logical operations used for such | ||||
5602 | // comparisons are non-commutative so for signed comparisons vs. unsigned | ||||
5603 | // ones, the input operands just need to be swapped. | ||||
5604 | if (ISD::isSignedIntSetCC(CC)) | ||||
5605 | Swap = !Swap; | ||||
5606 | |||||
5607 | SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, | ||||
5608 | N->getOperand(Swap ? 3 : 2), | ||||
5609 | N->getOperand(Swap ? 2 : 3)), 0); | ||||
5610 | CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), | ||||
5611 | N->getOperand(0)); | ||||
5612 | return; | ||||
5613 | } | ||||
5614 | |||||
5615 | if (EnableBranchHint) | ||||
5616 | PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4)); | ||||
5617 | |||||
5618 | SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); | ||||
5619 | SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, | ||||
5620 | N->getOperand(4), N->getOperand(0) }; | ||||
5621 | CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); | ||||
5622 | return; | ||||
5623 | } | ||||
5624 | case ISD::BRIND: { | ||||
5625 | // FIXME: Should custom lower this. | ||||
5626 | SDValue Chain = N->getOperand(0); | ||||
5627 | SDValue Target = N->getOperand(1); | ||||
5628 | unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; | ||||
5629 | unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; | ||||
5630 | Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, | ||||
5631 | Chain), 0); | ||||
5632 | CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); | ||||
5633 | return; | ||||
5634 | } | ||||
5635 | case PPCISD::TOC_ENTRY: { | ||||
5636 | const bool isPPC64 = Subtarget->isPPC64(); | ||||
5637 | const bool isELFABI = Subtarget->isSVR4ABI(); | ||||
5638 | const bool isAIXABI = Subtarget->isAIXABI(); | ||||
5639 | |||||
5640 | // PowerPC only support small, medium and large code model. | ||||
5641 | const CodeModel::Model CModel = TM.getCodeModel(); | ||||
5642 | assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&(static_cast <bool> (!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && "PowerPC doesn't support tiny or kernel code models." ) ? void (0) : __assert_fail ("!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && \"PowerPC doesn't support tiny or kernel code models.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5643, __extension__ __PRETTY_FUNCTION__)) | ||||
5643 | "PowerPC doesn't support tiny or kernel code models.")(static_cast <bool> (!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && "PowerPC doesn't support tiny or kernel code models." ) ? void (0) : __assert_fail ("!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && \"PowerPC doesn't support tiny or kernel code models.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5643, __extension__ __PRETTY_FUNCTION__)); | ||||
5644 | |||||
5645 | if (isAIXABI && CModel == CodeModel::Medium) | ||||
5646 | report_fatal_error("Medium code model is not supported on AIX."); | ||||
5647 | |||||
5648 | // For 64-bit small code model, we allow SelectCodeCommon to handle this, | ||||
5649 | // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. | ||||
5650 | if (isPPC64 && CModel == CodeModel::Small) | ||||
5651 | break; | ||||
5652 | |||||
5653 | // Handle 32-bit small code model. | ||||
5654 | if (!isPPC64) { | ||||
5655 | // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either | ||||
5656 | // PPC::ADDItoc, or PPC::LWZtoc | ||||
5657 | auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry) { | ||||
5658 | SDValue GA = TocEntry->getOperand(0); | ||||
5659 | SDValue TocBase = TocEntry->getOperand(1); | ||||
5660 | SDNode *MN = CurDAG->getMachineNode(OpCode, dl, MVT::i32, GA, TocBase); | ||||
5661 | transferMemOperands(TocEntry, MN); | ||||
5662 | ReplaceNode(TocEntry, MN); | ||||
5663 | }; | ||||
5664 | |||||
5665 | if (isELFABI) { | ||||
5666 | assert(TM.isPositionIndependent() &&(static_cast <bool> (TM.isPositionIndependent() && "32-bit ELF can only have TOC entries in position independent" " code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5668, __extension__ __PRETTY_FUNCTION__)) | ||||
5667 | "32-bit ELF can only have TOC entries in position independent"(static_cast <bool> (TM.isPositionIndependent() && "32-bit ELF can only have TOC entries in position independent" " code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5668, __extension__ __PRETTY_FUNCTION__)) | ||||
5668 | " code.")(static_cast <bool> (TM.isPositionIndependent() && "32-bit ELF can only have TOC entries in position independent" " code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5668, __extension__ __PRETTY_FUNCTION__)); | ||||
5669 | // 32-bit ELF always uses a small code model toc access. | ||||
5670 | replaceWith(PPC::LWZtoc, N); | ||||
5671 | return; | ||||
5672 | } | ||||
5673 | |||||
5674 | if (isAIXABI && CModel == CodeModel::Small) { | ||||
5675 | if (hasTocDataAttr(N->getOperand(0), | ||||
5676 | CurDAG->getDataLayout().getPointerSize())) | ||||
5677 | replaceWith(PPC::ADDItoc, N); | ||||
5678 | else | ||||
5679 | replaceWith(PPC::LWZtoc, N); | ||||
5680 | |||||
5681 | return; | ||||
5682 | } | ||||
5683 | } | ||||
5684 | |||||
5685 | assert(CModel != CodeModel::Small && "All small code models handled.")(static_cast <bool> (CModel != CodeModel::Small && "All small code models handled.") ? void (0) : __assert_fail ("CModel != CodeModel::Small && \"All small code models handled.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5685, __extension__ __PRETTY_FUNCTION__)); | ||||
5686 | |||||
5687 | assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"(static_cast <bool> ((isPPC64 || (isAIXABI && ! isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following." ) ? void (0) : __assert_fail ("(isPPC64 || (isAIXABI && !isPPC64)) && \"We are dealing with 64-bit\" \" ELF/AIX or 32-bit AIX in the following.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5688, __extension__ __PRETTY_FUNCTION__)) | ||||
5688 | " ELF/AIX or 32-bit AIX in the following.")(static_cast <bool> ((isPPC64 || (isAIXABI && ! isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following." ) ? void (0) : __assert_fail ("(isPPC64 || (isAIXABI && !isPPC64)) && \"We are dealing with 64-bit\" \" ELF/AIX or 32-bit AIX in the following.\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5688, __extension__ __PRETTY_FUNCTION__)); | ||||
5689 | |||||
5690 | // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode | ||||
5691 | // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We | ||||
5692 | // generate two instructions as described below. The first source operand | ||||
5693 | // is a symbol reference. If it must be toc-referenced according to | ||||
5694 | // Subtarget, we generate: | ||||
5695 | // [32-bit AIX] | ||||
5696 | // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) | ||||
5697 | // [64-bit ELF/AIX] | ||||
5698 | // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) | ||||
5699 | // Otherwise we generate: | ||||
5700 | // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) | ||||
5701 | SDValue GA = N->getOperand(0); | ||||
5702 | SDValue TOCbase = N->getOperand(1); | ||||
5703 | |||||
5704 | EVT VT = isPPC64 ? MVT::i64 : MVT::i32; | ||||
5705 | SDNode *Tmp = CurDAG->getMachineNode( | ||||
5706 | isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA); | ||||
5707 | |||||
5708 | if (PPCLowering->isAccessedAsGotIndirect(GA)) { | ||||
5709 | // If it is accessed as got-indirect, we need an extra LWZ/LD to load | ||||
5710 | // the address. | ||||
5711 | SDNode *MN = CurDAG->getMachineNode( | ||||
5712 | isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0)); | ||||
5713 | |||||
5714 | transferMemOperands(N, MN); | ||||
5715 | ReplaceNode(N, MN); | ||||
5716 | return; | ||||
5717 | } | ||||
5718 | |||||
5719 | // Build the address relative to the TOC-pointer. | ||||
5720 | ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, | ||||
5721 | SDValue(Tmp, 0), GA)); | ||||
5722 | return; | ||||
5723 | } | ||||
5724 | case PPCISD::PPC32_PICGOT: | ||||
5725 | // Generate a PIC-safe GOT reference. | ||||
5726 | assert(Subtarget->is32BitELFABI() &&(static_cast <bool> (Subtarget->is32BitELFABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4") ? void (0) : __assert_fail ("Subtarget->is32BitELFABI() && \"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5727, __extension__ __PRETTY_FUNCTION__)) | ||||
5727 | "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4")(static_cast <bool> (Subtarget->is32BitELFABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4") ? void (0) : __assert_fail ("Subtarget->is32BitELFABI() && \"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5727, __extension__ __PRETTY_FUNCTION__)); | ||||
5728 | CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, | ||||
5729 | PPCLowering->getPointerTy(CurDAG->getDataLayout()), | ||||
5730 | MVT::i32); | ||||
5731 | return; | ||||
5732 | |||||
5733 | case PPCISD::VADD_SPLAT: { | ||||
5734 | // This expands into one of three sequences, depending on whether | ||||
5735 | // the first operand is odd or even, positive or negative. | ||||
5736 | assert(isa<ConstantSDNode>(N->getOperand(0)) &&(static_cast <bool> (isa<ConstantSDNode>(N->getOperand (0)) && isa<ConstantSDNode>(N->getOperand(1) ) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5738, __extension__ __PRETTY_FUNCTION__)) | ||||
5737 | isa<ConstantSDNode>(N->getOperand(1)) &&(static_cast <bool> (isa<ConstantSDNode>(N->getOperand (0)) && isa<ConstantSDNode>(N->getOperand(1) ) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5738, __extension__ __PRETTY_FUNCTION__)) | ||||
5738 | "Invalid operand on VADD_SPLAT!")(static_cast <bool> (isa<ConstantSDNode>(N->getOperand (0)) && isa<ConstantSDNode>(N->getOperand(1) ) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5738, __extension__ __PRETTY_FUNCTION__)); | ||||
5739 | |||||
5740 | int Elt = N->getConstantOperandVal(0); | ||||
5741 | int EltSize = N->getConstantOperandVal(1); | ||||
5742 | unsigned Opc1, Opc2, Opc3; | ||||
5743 | EVT VT; | ||||
5744 | |||||
5745 | if (EltSize == 1) { | ||||
5746 | Opc1 = PPC::VSPLTISB; | ||||
5747 | Opc2 = PPC::VADDUBM; | ||||
5748 | Opc3 = PPC::VSUBUBM; | ||||
5749 | VT = MVT::v16i8; | ||||
5750 | } else if (EltSize == 2) { | ||||
5751 | Opc1 = PPC::VSPLTISH; | ||||
5752 | Opc2 = PPC::VADDUHM; | ||||
5753 | Opc3 = PPC::VSUBUHM; | ||||
5754 | VT = MVT::v8i16; | ||||
5755 | } else { | ||||
5756 | assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!")(static_cast <bool> (EltSize == 4 && "Invalid element size on VADD_SPLAT!" ) ? void (0) : __assert_fail ("EltSize == 4 && \"Invalid element size on VADD_SPLAT!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5756, __extension__ __PRETTY_FUNCTION__)); | ||||
5757 | Opc1 = PPC::VSPLTISW; | ||||
5758 | Opc2 = PPC::VADDUWM; | ||||
5759 | Opc3 = PPC::VSUBUWM; | ||||
5760 | VT = MVT::v4i32; | ||||
5761 | } | ||||
5762 | |||||
5763 | if ((Elt & 1) == 0) { | ||||
5764 | // Elt is even, in the range [-32,-18] + [16,30]. | ||||
5765 | // | ||||
5766 | // Convert: VADD_SPLAT elt, size | ||||
5767 | // Into: tmp = VSPLTIS[BHW] elt | ||||
5768 | // VADDU[BHW]M tmp, tmp | ||||
5769 | // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 | ||||
5770 | SDValue EltVal = getI32Imm(Elt >> 1, dl); | ||||
5771 | SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); | ||||
5772 | SDValue TmpVal = SDValue(Tmp, 0); | ||||
5773 | ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); | ||||
5774 | return; | ||||
5775 | } else if (Elt > 0) { | ||||
5776 | // Elt is odd and positive, in the range [17,31]. | ||||
5777 | // | ||||
5778 | // Convert: VADD_SPLAT elt, size | ||||
5779 | // Into: tmp1 = VSPLTIS[BHW] elt-16 | ||||
5780 | // tmp2 = VSPLTIS[BHW] -16 | ||||
5781 | // VSUBU[BHW]M tmp1, tmp2 | ||||
5782 | SDValue EltVal = getI32Imm(Elt - 16, dl); | ||||
5783 | SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); | ||||
5784 | EltVal = getI32Imm(-16, dl); | ||||
5785 | SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); | ||||
5786 | ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), | ||||
5787 | SDValue(Tmp2, 0))); | ||||
5788 | return; | ||||
5789 | } else { | ||||
5790 | // Elt is odd and negative, in the range [-31,-17]. | ||||
5791 | // | ||||
5792 | // Convert: VADD_SPLAT elt, size | ||||
5793 | // Into: tmp1 = VSPLTIS[BHW] elt+16 | ||||
5794 | // tmp2 = VSPLTIS[BHW] -16 | ||||
5795 | // VADDU[BHW]M tmp1, tmp2 | ||||
5796 | SDValue EltVal = getI32Imm(Elt + 16, dl); | ||||
5797 | SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); | ||||
5798 | EltVal = getI32Imm(-16, dl); | ||||
5799 | SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); | ||||
5800 | ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), | ||||
5801 | SDValue(Tmp2, 0))); | ||||
5802 | return; | ||||
5803 | } | ||||
5804 | } | ||||
5805 | } | ||||
5806 | |||||
5807 | SelectCode(N); | ||||
5808 | } | ||||
5809 | |||||
5810 | // If the target supports the cmpb instruction, do the idiom recognition here. | ||||
5811 | // We don't do this as a DAG combine because we don't want to do it as nodes | ||||
5812 | // are being combined (because we might miss part of the eventual idiom). We | ||||
5813 | // don't want to do it during instruction selection because we want to reuse | ||||
5814 | // the logic for lowering the masking operations already part of the | ||||
5815 | // instruction selector. | ||||
5816 | SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { | ||||
5817 | SDLoc dl(N); | ||||
5818 | |||||
5819 | assert(N->getOpcode() == ISD::OR &&(static_cast <bool> (N->getOpcode() == ISD::OR && "Only OR nodes are supported for CMPB") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Only OR nodes are supported for CMPB\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5820, __extension__ __PRETTY_FUNCTION__)) | ||||
5820 | "Only OR nodes are supported for CMPB")(static_cast <bool> (N->getOpcode() == ISD::OR && "Only OR nodes are supported for CMPB") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Only OR nodes are supported for CMPB\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5820, __extension__ __PRETTY_FUNCTION__)); | ||||
5821 | |||||
5822 | SDValue Res; | ||||
5823 | if (!Subtarget->hasCMPB()) | ||||
5824 | return Res; | ||||
5825 | |||||
5826 | if (N->getValueType(0) != MVT::i32 && | ||||
5827 | N->getValueType(0) != MVT::i64) | ||||
5828 | return Res; | ||||
5829 | |||||
5830 | EVT VT = N->getValueType(0); | ||||
5831 | |||||
5832 | SDValue RHS, LHS; | ||||
5833 | bool BytesFound[8] = {false, false, false, false, false, false, false, false}; | ||||
5834 | uint64_t Mask = 0, Alt = 0; | ||||
5835 | |||||
5836 | auto IsByteSelectCC = [this](SDValue O, unsigned &b, | ||||
5837 | uint64_t &Mask, uint64_t &Alt, | ||||
5838 | SDValue &LHS, SDValue &RHS) { | ||||
5839 | if (O.getOpcode() != ISD::SELECT_CC) | ||||
5840 | return false; | ||||
5841 | ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get(); | ||||
5842 | |||||
5843 | if (!isa<ConstantSDNode>(O.getOperand(2)) || | ||||
5844 | !isa<ConstantSDNode>(O.getOperand(3))) | ||||
5845 | return false; | ||||
5846 | |||||
5847 | uint64_t PM = O.getConstantOperandVal(2); | ||||
5848 | uint64_t PAlt = O.getConstantOperandVal(3); | ||||
5849 | for (b = 0; b < 8; ++b) { | ||||
5850 | uint64_t Mask = UINT64_C(0xFF)0xFFUL << (8*b); | ||||
5851 | if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) | ||||
5852 | break; | ||||
5853 | } | ||||
5854 | |||||
5855 | if (b == 8) | ||||
5856 | return false; | ||||
5857 | Mask |= PM; | ||||
5858 | Alt |= PAlt; | ||||
5859 | |||||
5860 | if (!isa<ConstantSDNode>(O.getOperand(1)) || | ||||
5861 | O.getConstantOperandVal(1) != 0) { | ||||
5862 | SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); | ||||
5863 | if (Op0.getOpcode() == ISD::TRUNCATE) | ||||
5864 | Op0 = Op0.getOperand(0); | ||||
5865 | if (Op1.getOpcode() == ISD::TRUNCATE) | ||||
5866 | Op1 = Op1.getOperand(0); | ||||
5867 | |||||
5868 | if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && | ||||
5869 | Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && | ||||
5870 | isa<ConstantSDNode>(Op0.getOperand(1))) { | ||||
5871 | |||||
5872 | unsigned Bits = Op0.getValueSizeInBits(); | ||||
5873 | if (b != Bits/8-1) | ||||
5874 | return false; | ||||
5875 | if (Op0.getConstantOperandVal(1) != Bits-8) | ||||
5876 | return false; | ||||
5877 | |||||
5878 | LHS = Op0.getOperand(0); | ||||
5879 | RHS = Op1.getOperand(0); | ||||
5880 | return true; | ||||
5881 | } | ||||
5882 | |||||
5883 | // When we have small integers (i16 to be specific), the form present | ||||
5884 | // post-legalization uses SETULT in the SELECT_CC for the | ||||
5885 | // higher-order byte, depending on the fact that the | ||||
5886 | // even-higher-order bytes are known to all be zero, for example: | ||||
5887 | // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult | ||||
5888 | // (so when the second byte is the same, because all higher-order | ||||
5889 | // bits from bytes 3 and 4 are known to be zero, the result of the | ||||
5890 | // xor can be at most 255) | ||||
5891 | if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && | ||||
5892 | isa<ConstantSDNode>(O.getOperand(1))) { | ||||
5893 | |||||
5894 | uint64_t ULim = O.getConstantOperandVal(1); | ||||
5895 | if (ULim != (UINT64_C(1)1UL << b*8)) | ||||
5896 | return false; | ||||
5897 | |||||
5898 | // Now we need to make sure that the upper bytes are known to be | ||||
5899 | // zero. | ||||
5900 | unsigned Bits = Op0.getValueSizeInBits(); | ||||
5901 | if (!CurDAG->MaskedValueIsZero( | ||||
5902 | Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) | ||||
5903 | return false; | ||||
5904 | |||||
5905 | LHS = Op0.getOperand(0); | ||||
5906 | RHS = Op0.getOperand(1); | ||||
5907 | return true; | ||||
5908 | } | ||||
5909 | |||||
5910 | return false; | ||||
5911 | } | ||||
5912 | |||||
5913 | if (CC != ISD::SETEQ) | ||||
5914 | return false; | ||||
5915 | |||||
5916 | SDValue Op = O.getOperand(0); | ||||
5917 | if (Op.getOpcode() == ISD::AND) { | ||||
5918 | if (!isa<ConstantSDNode>(Op.getOperand(1))) | ||||
5919 | return false; | ||||
5920 | if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF)0xFFUL << (8*b))) | ||||
5921 | return false; | ||||
5922 | |||||
5923 | SDValue XOR = Op.getOperand(0); | ||||
5924 | if (XOR.getOpcode() == ISD::TRUNCATE) | ||||
5925 | XOR = XOR.getOperand(0); | ||||
5926 | if (XOR.getOpcode() != ISD::XOR) | ||||
5927 | return false; | ||||
5928 | |||||
5929 | LHS = XOR.getOperand(0); | ||||
5930 | RHS = XOR.getOperand(1); | ||||
5931 | return true; | ||||
5932 | } else if (Op.getOpcode() == ISD::SRL) { | ||||
5933 | if (!isa<ConstantSDNode>(Op.getOperand(1))) | ||||
5934 | return false; | ||||
5935 | unsigned Bits = Op.getValueSizeInBits(); | ||||
5936 | if (b != Bits/8-1) | ||||
5937 | return false; | ||||
5938 | if (Op.getConstantOperandVal(1) != Bits-8) | ||||
5939 | return false; | ||||
5940 | |||||
5941 | SDValue XOR = Op.getOperand(0); | ||||
5942 | if (XOR.getOpcode() == ISD::TRUNCATE) | ||||
5943 | XOR = XOR.getOperand(0); | ||||
5944 | if (XOR.getOpcode() != ISD::XOR) | ||||
5945 | return false; | ||||
5946 | |||||
5947 | LHS = XOR.getOperand(0); | ||||
5948 | RHS = XOR.getOperand(1); | ||||
5949 | return true; | ||||
5950 | } | ||||
5951 | |||||
5952 | return false; | ||||
5953 | }; | ||||
5954 | |||||
5955 | SmallVector<SDValue, 8> Queue(1, SDValue(N, 0)); | ||||
5956 | while (!Queue.empty()) { | ||||
5957 | SDValue V = Queue.pop_back_val(); | ||||
5958 | |||||
5959 | for (const SDValue &O : V.getNode()->ops()) { | ||||
5960 | unsigned b = 0; | ||||
5961 | uint64_t M = 0, A = 0; | ||||
5962 | SDValue OLHS, ORHS; | ||||
5963 | if (O.getOpcode() == ISD::OR) { | ||||
5964 | Queue.push_back(O); | ||||
5965 | } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { | ||||
5966 | if (!LHS) { | ||||
5967 | LHS = OLHS; | ||||
5968 | RHS = ORHS; | ||||
5969 | BytesFound[b] = true; | ||||
5970 | Mask |= M; | ||||
5971 | Alt |= A; | ||||
5972 | } else if ((LHS == ORHS && RHS == OLHS) || | ||||
5973 | (RHS == ORHS && LHS == OLHS)) { | ||||
5974 | BytesFound[b] = true; | ||||
5975 | Mask |= M; | ||||
5976 | Alt |= A; | ||||
5977 | } else { | ||||
5978 | return Res; | ||||
5979 | } | ||||
5980 | } else { | ||||
5981 | return Res; | ||||
5982 | } | ||||
5983 | } | ||||
5984 | } | ||||
5985 | |||||
5986 | unsigned LastB = 0, BCnt = 0; | ||||
5987 | for (unsigned i = 0; i < 8; ++i) | ||||
5988 | if (BytesFound[LastB]) { | ||||
5989 | ++BCnt; | ||||
5990 | LastB = i; | ||||
5991 | } | ||||
5992 | |||||
5993 | if (!LastB || BCnt < 2) | ||||
5994 | return Res; | ||||
5995 | |||||
5996 | // Because we'll be zero-extending the output anyway if don't have a specific | ||||
5997 | // value for each input byte (via the Mask), we can 'anyext' the inputs. | ||||
5998 | if (LHS.getValueType() != VT) { | ||||
5999 | LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); | ||||
6000 | RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); | ||||
6001 | } | ||||
6002 | |||||
6003 | Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); | ||||
6004 | |||||
6005 | bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1)-1L; | ||||
6006 | if (NonTrivialMask && !Alt) { | ||||
6007 | // Res = Mask & CMPB | ||||
6008 | Res = CurDAG->getNode(ISD::AND, dl, VT, Res, | ||||
6009 | CurDAG->getConstant(Mask, dl, VT)); | ||||
6010 | } else if (Alt) { | ||||
6011 | // Res = (CMPB & Mask) | (~CMPB & Alt) | ||||
6012 | // Which, as suggested here: | ||||
6013 | // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge | ||||
6014 | // can be written as: | ||||
6015 | // Res = Alt ^ ((Alt ^ Mask) & CMPB) | ||||
6016 | // useful because the (Alt ^ Mask) can be pre-computed. | ||||
6017 | Res = CurDAG->getNode(ISD::AND, dl, VT, Res, | ||||
6018 | CurDAG->getConstant(Mask ^ Alt, dl, VT)); | ||||
6019 | Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, | ||||
6020 | CurDAG->getConstant(Alt, dl, VT)); | ||||
6021 | } | ||||
6022 | |||||
6023 | return Res; | ||||
6024 | } | ||||
6025 | |||||
6026 | // When CR bit registers are enabled, an extension of an i1 variable to a i32 | ||||
6027 | // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus | ||||
6028 | // involves constant materialization of a 0 or a 1 or both. If the result of | ||||
6029 | // the extension is then operated upon by some operator that can be constant | ||||
6030 | // folded with a constant 0 or 1, and that constant can be materialized using | ||||
6031 | // only one instruction (like a zero or one), then we should fold in those | ||||
6032 | // operations with the select. | ||||
6033 | void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { | ||||
6034 | if (!Subtarget->useCRBits()) | ||||
6035 | return; | ||||
6036 | |||||
6037 | if (N->getOpcode() != ISD::ZERO_EXTEND && | ||||
6038 | N->getOpcode() != ISD::SIGN_EXTEND && | ||||
6039 | N->getOpcode() != ISD::ANY_EXTEND) | ||||
6040 | return; | ||||
6041 | |||||
6042 | if (N->getOperand(0).getValueType() != MVT::i1) | ||||
6043 | return; | ||||
6044 | |||||
6045 | if (!N->hasOneUse()) | ||||
6046 | return; | ||||
6047 | |||||
6048 | SDLoc dl(N); | ||||
6049 | EVT VT = N->getValueType(0); | ||||
6050 | SDValue Cond = N->getOperand(0); | ||||
6051 | SDValue ConstTrue = | ||||
6052 | CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); | ||||
6053 | SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); | ||||
6054 | |||||
6055 | do { | ||||
6056 | SDNode *User = *N->use_begin(); | ||||
6057 | if (User->getNumOperands() != 2) | ||||
6058 | break; | ||||
6059 | |||||
6060 | auto TryFold = [this, N, User, dl](SDValue Val) { | ||||
6061 | SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); | ||||
6062 | SDValue O0 = UserO0.getNode() == N ? Val : UserO0; | ||||
6063 | SDValue O1 = UserO1.getNode() == N ? Val : UserO1; | ||||
6064 | |||||
6065 | return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, | ||||
6066 | User->getValueType(0), {O0, O1}); | ||||
6067 | }; | ||||
6068 | |||||
6069 | // FIXME: When the semantics of the interaction between select and undef | ||||
6070 | // are clearly defined, it may turn out to be unnecessary to break here. | ||||
6071 | SDValue TrueRes = TryFold(ConstTrue); | ||||
6072 | if (!TrueRes || TrueRes.isUndef()) | ||||
6073 | break; | ||||
6074 | SDValue FalseRes = TryFold(ConstFalse); | ||||
6075 | if (!FalseRes || FalseRes.isUndef()) | ||||
6076 | break; | ||||
6077 | |||||
6078 | // For us to materialize these using one instruction, we must be able to | ||||
6079 | // represent them as signed 16-bit integers. | ||||
6080 | uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(), | ||||
6081 | False = cast<ConstantSDNode>(FalseRes)->getZExtValue(); | ||||
6082 | if (!isInt<16>(True) || !isInt<16>(False)) | ||||
6083 | break; | ||||
6084 | |||||
6085 | // We can replace User with a new SELECT node, and try again to see if we | ||||
6086 | // can fold the select with its user. | ||||
6087 | Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); | ||||
6088 | N = User; | ||||
6089 | ConstTrue = TrueRes; | ||||
6090 | ConstFalse = FalseRes; | ||||
6091 | } while (N->hasOneUse()); | ||||
6092 | } | ||||
6093 | |||||
6094 | void PPCDAGToDAGISel::PreprocessISelDAG() { | ||||
6095 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | ||||
6096 | |||||
6097 | bool MadeChange = false; | ||||
6098 | while (Position != CurDAG->allnodes_begin()) { | ||||
6099 | SDNode *N = &*--Position; | ||||
6100 | if (N->use_empty()) | ||||
6101 | continue; | ||||
6102 | |||||
6103 | SDValue Res; | ||||
6104 | switch (N->getOpcode()) { | ||||
6105 | default: break; | ||||
6106 | case ISD::OR: | ||||
6107 | Res = combineToCMPB(N); | ||||
6108 | break; | ||||
6109 | } | ||||
6110 | |||||
6111 | if (!Res) | ||||
6112 | foldBoolExts(Res, N); | ||||
6113 | |||||
6114 | if (Res) { | ||||
6115 | LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "PPC DAG preprocessing replacing:\nOld: " ; } } while (false); | ||||
6116 | LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { N->dump(CurDAG); } } while (false); | ||||
6117 | LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false ); | ||||
6118 | LLVM_DEBUG(Res.getNode()->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { Res.getNode()->dump(CurDAG); } } while ( false); | ||||
6119 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
6120 | |||||
6121 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); | ||||
6122 | MadeChange = true; | ||||
6123 | } | ||||
6124 | } | ||||
6125 | |||||
6126 | if (MadeChange) | ||||
6127 | CurDAG->RemoveDeadNodes(); | ||||
6128 | } | ||||
6129 | |||||
6130 | /// PostprocessISelDAG - Perform some late peephole optimizations | ||||
6131 | /// on the DAG representation. | ||||
6132 | void PPCDAGToDAGISel::PostprocessISelDAG() { | ||||
6133 | // Skip peepholes at -O0. | ||||
6134 | if (TM.getOptLevel() == CodeGenOpt::None) | ||||
6135 | return; | ||||
6136 | |||||
6137 | PeepholePPC64(); | ||||
6138 | PeepholeCROps(); | ||||
6139 | PeepholePPC64ZExt(); | ||||
6140 | } | ||||
6141 | |||||
6142 | // Check if all users of this node will become isel where the second operand | ||||
6143 | // is the constant zero. If this is so, and if we can negate the condition, | ||||
6144 | // then we can flip the true and false operands. This will allow the zero to | ||||
6145 | // be folded with the isel so that we don't need to materialize a register | ||||
6146 | // containing zero. | ||||
6147 | bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { | ||||
6148 | for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); | ||||
6149 | UI != UE; ++UI) { | ||||
6150 | SDNode *User = *UI; | ||||
6151 | if (!User->isMachineOpcode()) | ||||
6152 | return false; | ||||
6153 | if (User->getMachineOpcode() != PPC::SELECT_I4 && | ||||
6154 | User->getMachineOpcode() != PPC::SELECT_I8) | ||||
6155 | return false; | ||||
6156 | |||||
6157 | SDNode *Op1 = User->getOperand(1).getNode(); | ||||
6158 | SDNode *Op2 = User->getOperand(2).getNode(); | ||||
6159 | // If we have a degenerate select with two equal operands, swapping will | ||||
6160 | // not do anything, and we may run into an infinite loop. | ||||
6161 | if (Op1 == Op2) | ||||
6162 | return false; | ||||
6163 | |||||
6164 | if (!Op2->isMachineOpcode()) | ||||
6165 | return false; | ||||
6166 | |||||
6167 | if (Op2->getMachineOpcode() != PPC::LI && | ||||
6168 | Op2->getMachineOpcode() != PPC::LI8) | ||||
6169 | return false; | ||||
6170 | |||||
6171 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0)); | ||||
6172 | if (!C) | ||||
6173 | return false; | ||||
6174 | |||||
6175 | if (!C->isNullValue()) | ||||
6176 | return false; | ||||
6177 | } | ||||
6178 | |||||
6179 | return true; | ||||
6180 | } | ||||
6181 | |||||
6182 | void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { | ||||
6183 | SmallVector<SDNode *, 4> ToReplace; | ||||
6184 | for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); | ||||
6185 | UI != UE; ++UI) { | ||||
6186 | SDNode *User = *UI; | ||||
6187 | assert((User->getMachineOpcode() == PPC::SELECT_I4 ||(static_cast <bool> ((User->getMachineOpcode() == PPC ::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && "Must have all select users") ? void (0) : __assert_fail ("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 6189, __extension__ __PRETTY_FUNCTION__)) | ||||
6188 | User->getMachineOpcode() == PPC::SELECT_I8) &&(static_cast <bool> ((User->getMachineOpcode() == PPC ::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && "Must have all select users") ? void (0) : __assert_fail ("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 6189, __extension__ __PRETTY_FUNCTION__)) | ||||
6189 | "Must have all select users")(static_cast <bool> ((User->getMachineOpcode() == PPC ::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && "Must have all select users") ? void (0) : __assert_fail ("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 6189, __extension__ __PRETTY_FUNCTION__)); | ||||
6190 | ToReplace.push_back(User); | ||||
6191 | } | ||||
6192 | |||||
6193 | for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), | ||||
6194 | UE = ToReplace.end(); UI != UE; ++UI) { | ||||
6195 | SDNode *User = *UI; | ||||
6196 | SDNode *ResNode = | ||||
6197 | CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), | ||||
6198 | User->getValueType(0), User->getOperand(0), | ||||
6199 | User->getOperand(2), | ||||
6200 | User->getOperand(1)); | ||||
6201 | |||||
6202 | LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "CR Peephole replacing:\nOld: " ; } } while (false); | ||||
6203 | LLVM_DEBUG(User->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { User->dump(CurDAG); } } while (false); | ||||
6204 | LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false ); | ||||
6205 | LLVM_DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { ResNode->dump(CurDAG); } } while (false ); | ||||
6206 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
6207 | |||||
6208 | ReplaceUses(User, ResNode); | ||||
6209 | } | ||||
6210 | } | ||||
6211 | |||||
6212 | void PPCDAGToDAGISel::PeepholeCROps() { | ||||
6213 | bool IsModified; | ||||
6214 | do { | ||||
6215 | IsModified = false; | ||||
6216 | for (SDNode &Node : CurDAG->allnodes()) { | ||||
6217 | MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); | ||||
6218 | if (!MachineNode || MachineNode->use_empty()) | ||||
6219 | continue; | ||||
6220 | SDNode *ResNode = MachineNode; | ||||
6221 | |||||
6222 | bool Op1Set = false, Op1Unset = false, | ||||
6223 | Op1Not = false, | ||||
6224 | Op2Set = false, Op2Unset = false, | ||||
6225 | Op2Not = false; | ||||
6226 | |||||
6227 | unsigned Opcode = MachineNode->getMachineOpcode(); | ||||
6228 | switch (Opcode) { | ||||
6229 | default: break; | ||||
6230 | case PPC::CRAND: | ||||
6231 | case PPC::CRNAND: | ||||
6232 | case PPC::CROR: | ||||
6233 | case PPC::CRXOR: | ||||
6234 | case PPC::CRNOR: | ||||
6235 | case PPC::CREQV: | ||||
6236 | case PPC::CRANDC: | ||||
6237 | case PPC::CRORC: { | ||||
6238 | SDValue Op = MachineNode->getOperand(1); | ||||
6239 | if (Op.isMachineOpcode()) { | ||||
6240 | if (Op.getMachineOpcode() == PPC::CRSET) | ||||
6241 | Op2Set = true; | ||||
6242 | else if (Op.getMachineOpcode() == PPC::CRUNSET) | ||||
6243 | Op2Unset = true; | ||||
6244 | else if (Op.getMachineOpcode() == PPC::CRNOR && | ||||
6245 | Op.getOperand(0) == Op.getOperand(1)) | ||||
6246 | Op2Not = true; | ||||
6247 | } | ||||
6248 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
6249 | } | ||||
6250 | case PPC::BC: | ||||
6251 | case PPC::BCn: | ||||
6252 | case PPC::SELECT_I4: | ||||
6253 | case PPC::SELECT_I8: | ||||
6254 | case PPC::SELECT_F4: | ||||
6255 | case PPC::SELECT_F8: | ||||
6256 | case PPC::SELECT_SPE: | ||||
6257 | case PPC::SELECT_SPE4: | ||||
6258 | case PPC::SELECT_VRRC: | ||||
6259 | case PPC::SELECT_VSFRC: | ||||
6260 | case PPC::SELECT_VSSRC: | ||||
6261 | case PPC::SELECT_VSRC: { | ||||
6262 | SDValue Op = MachineNode->getOperand(0); | ||||
6263 | if (Op.isMachineOpcode()) { | ||||
6264 | if (Op.getMachineOpcode() == PPC::CRSET) | ||||
6265 | Op1Set = true; | ||||
6266 | else if (Op.getMachineOpcode() == PPC::CRUNSET) | ||||
6267 | Op1Unset = true; | ||||
6268 | else if (Op.getMachineOpcode() == PPC::CRNOR && | ||||
6269 | Op.getOperand(0) == Op.getOperand(1)) | ||||
6270 | Op1Not = true; | ||||
6271 | } | ||||
6272 | } | ||||
6273 | break; | ||||
6274 | } | ||||
6275 | |||||
6276 | bool SelectSwap = false; | ||||
6277 | switch (Opcode) { | ||||
6278 | default: break; | ||||
6279 | case PPC::CRAND: | ||||
6280 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6281 | // x & x = x | ||||
6282 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6283 | else if (Op1Set) | ||||
6284 | // 1 & y = y | ||||
6285 | ResNode = MachineNode->getOperand(1).getNode(); | ||||
6286 | else if (Op2Set) | ||||
6287 | // x & 1 = x | ||||
6288 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6289 | else if (Op1Unset || Op2Unset) | ||||
6290 | // x & 0 = 0 & y = 0 | ||||
6291 | ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), | ||||
6292 | MVT::i1); | ||||
6293 | else if (Op1Not) | ||||
6294 | // ~x & y = andc(y, x) | ||||
6295 | ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), | ||||
6296 | MVT::i1, MachineNode->getOperand(1), | ||||
6297 | MachineNode->getOperand(0). | ||||
6298 | getOperand(0)); | ||||
6299 | else if (Op2Not) | ||||
6300 | // x & ~y = andc(x, y) | ||||
6301 | ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), | ||||
6302 | MVT::i1, MachineNode->getOperand(0), | ||||
6303 | MachineNode->getOperand(1). | ||||
6304 | getOperand(0)); | ||||
6305 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6306 | ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), | ||||
6307 | MVT::i1, MachineNode->getOperand(0), | ||||
6308 | MachineNode->getOperand(1)); | ||||
6309 | SelectSwap = true; | ||||
6310 | } | ||||
6311 | break; | ||||
6312 | case PPC::CRNAND: | ||||
6313 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6314 | // nand(x, x) -> nor(x, x) | ||||
6315 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6316 | MVT::i1, MachineNode->getOperand(0), | ||||
6317 | MachineNode->getOperand(0)); | ||||
6318 | else if (Op1Set) | ||||
6319 | // nand(1, y) -> nor(y, y) | ||||
6320 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6321 | MVT::i1, MachineNode->getOperand(1), | ||||
6322 | MachineNode->getOperand(1)); | ||||
6323 | else if (Op2Set) | ||||
6324 | // nand(x, 1) -> nor(x, x) | ||||
6325 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6326 | MVT::i1, MachineNode->getOperand(0), | ||||
6327 | MachineNode->getOperand(0)); | ||||
6328 | else if (Op1Unset || Op2Unset) | ||||
6329 | // nand(x, 0) = nand(0, y) = 1 | ||||
6330 | ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), | ||||
6331 | MVT::i1); | ||||
6332 | else if (Op1Not) | ||||
6333 | // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) | ||||
6334 | ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), | ||||
6335 | MVT::i1, MachineNode->getOperand(0). | ||||
6336 | getOperand(0), | ||||
6337 | MachineNode->getOperand(1)); | ||||
6338 | else if (Op2Not) | ||||
6339 | // nand(x, ~y) = ~x | y = orc(y, x) | ||||
6340 | ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), | ||||
6341 | MVT::i1, MachineNode->getOperand(1). | ||||
6342 | getOperand(0), | ||||
6343 | MachineNode->getOperand(0)); | ||||
6344 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6345 | ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), | ||||
6346 | MVT::i1, MachineNode->getOperand(0), | ||||
6347 | MachineNode->getOperand(1)); | ||||
6348 | SelectSwap = true; | ||||
6349 | } | ||||
6350 | break; | ||||
6351 | case PPC::CROR: | ||||
6352 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6353 | // x | x = x | ||||
6354 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6355 | else if (Op1Set || Op2Set) | ||||
6356 | // x | 1 = 1 | y = 1 | ||||
6357 | ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), | ||||
6358 | MVT::i1); | ||||
6359 | else if (Op1Unset) | ||||
6360 | // 0 | y = y | ||||
6361 | ResNode = MachineNode->getOperand(1).getNode(); | ||||
6362 | else if (Op2Unset) | ||||
6363 | // x | 0 = x | ||||
6364 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6365 | else if (Op1Not) | ||||
6366 | // ~x | y = orc(y, x) | ||||
6367 | ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), | ||||
6368 | MVT::i1, MachineNode->getOperand(1), | ||||
6369 | MachineNode->getOperand(0). | ||||
6370 | getOperand(0)); | ||||
6371 | else if (Op2Not) | ||||
6372 | // x | ~y = orc(x, y) | ||||
6373 | ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), | ||||
6374 | MVT::i1, MachineNode->getOperand(0), | ||||
6375 | MachineNode->getOperand(1). | ||||
6376 | getOperand(0)); | ||||
6377 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6378 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6379 | MVT::i1, MachineNode->getOperand(0), | ||||
6380 | MachineNode->getOperand(1)); | ||||
6381 | SelectSwap = true; | ||||
6382 | } | ||||
6383 | break; | ||||
6384 | case PPC::CRXOR: | ||||
6385 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6386 | // xor(x, x) = 0 | ||||
6387 | ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), | ||||
6388 | MVT::i1); | ||||
6389 | else if (Op1Set) | ||||
6390 | // xor(1, y) -> nor(y, y) | ||||
6391 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6392 | MVT::i1, MachineNode->getOperand(1), | ||||
6393 | MachineNode->getOperand(1)); | ||||
6394 | else if (Op2Set) | ||||
6395 | // xor(x, 1) -> nor(x, x) | ||||
6396 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6397 | MVT::i1, MachineNode->getOperand(0), | ||||
6398 | MachineNode->getOperand(0)); | ||||
6399 | else if (Op1Unset) | ||||
6400 | // xor(0, y) = y | ||||
6401 | ResNode = MachineNode->getOperand(1).getNode(); | ||||
6402 | else if (Op2Unset) | ||||
6403 | // xor(x, 0) = x | ||||
6404 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6405 | else if (Op1Not) | ||||
6406 | // xor(~x, y) = eqv(x, y) | ||||
6407 | ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), | ||||
6408 | MVT::i1, MachineNode->getOperand(0). | ||||
6409 | getOperand(0), | ||||
6410 | MachineNode->getOperand(1)); | ||||
6411 | else if (Op2Not) | ||||
6412 | // xor(x, ~y) = eqv(x, y) | ||||
6413 | ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), | ||||
6414 | MVT::i1, MachineNode->getOperand(0), | ||||
6415 | MachineNode->getOperand(1). | ||||
6416 | getOperand(0)); | ||||
6417 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6418 | ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), | ||||
6419 | MVT::i1, MachineNode->getOperand(0), | ||||
6420 | MachineNode->getOperand(1)); | ||||
6421 | SelectSwap = true; | ||||
6422 | } | ||||
6423 | break; | ||||
6424 | case PPC::CRNOR: | ||||
6425 | if (Op1Set || Op2Set) | ||||
6426 | // nor(1, y) -> 0 | ||||
6427 | ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), | ||||
6428 | MVT::i1); | ||||
6429 | else if (Op1Unset) | ||||
6430 | // nor(0, y) = ~y -> nor(y, y) | ||||
6431 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6432 | MVT::i1, MachineNode->getOperand(1), | ||||
6433 | MachineNode->getOperand(1)); | ||||
6434 | else if (Op2Unset) | ||||
6435 | // nor(x, 0) = ~x | ||||
6436 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6437 | MVT::i1, MachineNode->getOperand(0), | ||||
6438 | MachineNode->getOperand(0)); | ||||
6439 | else if (Op1Not) | ||||
6440 | // nor(~x, y) = andc(x, y) | ||||
6441 | ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), | ||||
6442 | MVT::i1, MachineNode->getOperand(0). | ||||
6443 | getOperand(0), | ||||
6444 | MachineNode->getOperand(1)); | ||||
6445 | else if (Op2Not) | ||||
6446 | // nor(x, ~y) = andc(y, x) | ||||
6447 | ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), | ||||
6448 | MVT::i1, MachineNode->getOperand(1). | ||||
6449 | getOperand(0), | ||||
6450 | MachineNode->getOperand(0)); | ||||
6451 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6452 | ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), | ||||
6453 | MVT::i1, MachineNode->getOperand(0), | ||||
6454 | MachineNode->getOperand(1)); | ||||
6455 | SelectSwap = true; | ||||
6456 | } | ||||
6457 | break; | ||||
6458 | case PPC::CREQV: | ||||
6459 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6460 | // eqv(x, x) = 1 | ||||
6461 | ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), | ||||
6462 | MVT::i1); | ||||
6463 | else if (Op1Set) | ||||
6464 | // eqv(1, y) = y | ||||
6465 | ResNode = MachineNode->getOperand(1).getNode(); | ||||
6466 | else if (Op2Set) | ||||
6467 | // eqv(x, 1) = x | ||||
6468 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6469 | else if (Op1Unset) | ||||
6470 | // eqv(0, y) = ~y -> nor(y, y) | ||||
6471 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6472 | MVT::i1, MachineNode->getOperand(1), | ||||
6473 | MachineNode->getOperand(1)); | ||||
6474 | else if (Op2Unset) | ||||
6475 | // eqv(x, 0) = ~x | ||||
6476 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6477 | MVT::i1, MachineNode->getOperand(0), | ||||
6478 | MachineNode->getOperand(0)); | ||||
6479 | else if (Op1Not) | ||||
6480 | // eqv(~x, y) = xor(x, y) | ||||
6481 | ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), | ||||
6482 | MVT::i1, MachineNode->getOperand(0). | ||||
6483 | getOperand(0), | ||||
6484 | MachineNode->getOperand(1)); | ||||
6485 | else if (Op2Not) | ||||
6486 | // eqv(x, ~y) = xor(x, y) | ||||
6487 | ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), | ||||
6488 | MVT::i1, MachineNode->getOperand(0), | ||||
6489 | MachineNode->getOperand(1). | ||||
6490 | getOperand(0)); | ||||
6491 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6492 | ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), | ||||
6493 | MVT::i1, MachineNode->getOperand(0), | ||||
6494 | MachineNode->getOperand(1)); | ||||
6495 | SelectSwap = true; | ||||
6496 | } | ||||
6497 | break; | ||||
6498 | case PPC::CRANDC: | ||||
6499 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6500 | // andc(x, x) = 0 | ||||
6501 | ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), | ||||
6502 | MVT::i1); | ||||
6503 | else if (Op1Set) | ||||
6504 | // andc(1, y) = ~y | ||||
6505 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6506 | MVT::i1, MachineNode->getOperand(1), | ||||
6507 | MachineNode->getOperand(1)); | ||||
6508 | else if (Op1Unset || Op2Set) | ||||
6509 | // andc(0, y) = andc(x, 1) = 0 | ||||
6510 | ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), | ||||
6511 | MVT::i1); | ||||
6512 | else if (Op2Unset) | ||||
6513 | // andc(x, 0) = x | ||||
6514 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6515 | else if (Op1Not) | ||||
6516 | // andc(~x, y) = ~(x | y) = nor(x, y) | ||||
6517 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6518 | MVT::i1, MachineNode->getOperand(0). | ||||
6519 | getOperand(0), | ||||
6520 | MachineNode->getOperand(1)); | ||||
6521 | else if (Op2Not) | ||||
6522 | // andc(x, ~y) = x & y | ||||
6523 | ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), | ||||
6524 | MVT::i1, MachineNode->getOperand(0), | ||||
6525 | MachineNode->getOperand(1). | ||||
6526 | getOperand(0)); | ||||
6527 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6528 | ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), | ||||
6529 | MVT::i1, MachineNode->getOperand(1), | ||||
6530 | MachineNode->getOperand(0)); | ||||
6531 | SelectSwap = true; | ||||
6532 | } | ||||
6533 | break; | ||||
6534 | case PPC::CRORC: | ||||
6535 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6536 | // orc(x, x) = 1 | ||||
6537 | ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), | ||||
6538 | MVT::i1); | ||||
6539 | else if (Op1Set || Op2Unset) | ||||
6540 | // orc(1, y) = orc(x, 0) = 1 | ||||
6541 | ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), | ||||
6542 | MVT::i1); | ||||
6543 | else if (Op2Set) | ||||
6544 | // orc(x, 1) = x | ||||
6545 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6546 | else if (Op1Unset) | ||||
6547 | // orc(0, y) = ~y | ||||
6548 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6549 | MVT::i1, MachineNode->getOperand(1), | ||||
6550 | MachineNode->getOperand(1)); | ||||
6551 | else if (Op1Not) | ||||
6552 | // orc(~x, y) = ~(x & y) = nand(x, y) | ||||
6553 | ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), | ||||
6554 | MVT::i1, MachineNode->getOperand(0). | ||||
6555 | getOperand(0), | ||||
6556 | MachineNode->getOperand(1)); | ||||
6557 | else if (Op2Not) | ||||
6558 | // orc(x, ~y) = x | y | ||||
6559 | ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), | ||||
6560 | MVT::i1, MachineNode->getOperand(0), | ||||
6561 | MachineNode->getOperand(1). | ||||
6562 | getOperand(0)); | ||||
6563 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6564 | ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), | ||||
6565 | MVT::i1, MachineNode->getOperand(1), | ||||
6566 | MachineNode->getOperand(0)); | ||||
6567 | SelectSwap = true; | ||||
6568 | } | ||||
6569 | break; | ||||
6570 | case PPC::SELECT_I4: | ||||
6571 | case PPC::SELECT_I8: | ||||
6572 | case PPC::SELECT_F4: | ||||
6573 | case PPC::SELECT_F8: | ||||
6574 | case PPC::SELECT_SPE: | ||||
6575 | case PPC::SELECT_SPE4: | ||||
6576 | case PPC::SELECT_VRRC: | ||||
6577 | case PPC::SELECT_VSFRC: | ||||
6578 | case PPC::SELECT_VSSRC: | ||||
6579 | case PPC::SELECT_VSRC: | ||||
6580 | if (Op1Set) | ||||
6581 | ResNode = MachineNode->getOperand(1).getNode(); | ||||
6582 | else if (Op1Unset) | ||||
6583 | ResNode = MachineNode->getOperand(2).getNode(); | ||||
6584 | else if (Op1Not) | ||||
6585 | ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), | ||||
6586 | SDLoc(MachineNode), | ||||
6587 | MachineNode->getValueType(0), | ||||
6588 | MachineNode->getOperand(0). | ||||
6589 | getOperand(0), | ||||
6590 | MachineNode->getOperand(2), | ||||
6591 | MachineNode->getOperand(1)); | ||||
6592 | break; | ||||
6593 | case PPC::BC: | ||||
6594 | case PPC::BCn: | ||||
6595 | if (Op1Not) | ||||
6596 | ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : | ||||
6597 | PPC::BC, | ||||
6598 | SDLoc(MachineNode), | ||||
6599 | MVT::Other, | ||||
6600 | MachineNode->getOperand(0). | ||||
6601 | getOperand(0), | ||||
6602 | MachineNode->getOperand(1), | ||||
6603 | MachineNode->getOperand(2)); | ||||
6604 | // FIXME: Handle Op1Set, Op1Unset here too. | ||||
6605 | break; | ||||
6606 | } | ||||
6607 | |||||
6608 | // If we're inverting this node because it is used only by selects that | ||||
6609 | // we'd like to swap, then swap the selects before the node replacement. | ||||
6610 | if (SelectSwap) | ||||
6611 | SwapAllSelectUsers(MachineNode); | ||||
6612 | |||||
6613 | if (ResNode != MachineNode) { | ||||
6614 | LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "CR Peephole replacing:\nOld: " ; } } while (false); | ||||
6615 | LLVM_DEBUG(MachineNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { MachineNode->dump(CurDAG); } } while (false ); | ||||
6616 | LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false ); | ||||
6617 | LLVM_DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { ResNode->dump(CurDAG); } } while (false ); | ||||
6618 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
6619 | |||||
6620 | ReplaceUses(MachineNode, ResNode); | ||||
6621 | IsModified = true; | ||||
6622 | } | ||||
6623 | } | ||||
6624 | if (IsModified) | ||||
6625 | CurDAG->RemoveDeadNodes(); | ||||
6626 | } while (IsModified); | ||||
6627 | } | ||||
6628 | |||||
6629 | // Gather the set of 32-bit operations that are known to have their | ||||
6630 | // higher-order 32 bits zero, where ToPromote contains all such operations. | ||||
6631 | static bool PeepholePPC64ZExtGather(SDValue Op32, | ||||
6632 | SmallPtrSetImpl<SDNode *> &ToPromote) { | ||||
6633 | if (!Op32.isMachineOpcode()) | ||||
6634 | return false; | ||||
6635 | |||||
6636 | // First, check for the "frontier" instructions (those that will clear the | ||||
6637 | // higher-order 32 bits. | ||||
6638 | |||||
6639 | // For RLWINM and RLWNM, we need to make sure that the mask does not wrap | ||||
6640 | // around. If it does not, then these instructions will clear the | ||||
6641 | // higher-order bits. | ||||
6642 | if ((Op32.getMachineOpcode() == PPC::RLWINM || | ||||
6643 | Op32.getMachineOpcode() == PPC::RLWNM) && | ||||
6644 | Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { | ||||
6645 | ToPromote.insert(Op32.getNode()); | ||||
6646 | return true; | ||||
6647 | } | ||||
6648 | |||||
6649 | // SLW and SRW always clear the higher-order bits. | ||||
6650 | if (Op32.getMachineOpcode() == PPC::SLW || | ||||
6651 | Op32.getMachineOpcode() == PPC::SRW) { | ||||
6652 | ToPromote.insert(Op32.getNode()); | ||||
6653 | return true; | ||||
6654 | } | ||||
6655 | |||||
6656 | // For LI and LIS, we need the immediate to be positive (so that it is not | ||||
6657 | // sign extended). | ||||
6658 | if (Op32.getMachineOpcode() == PPC::LI || | ||||
6659 | Op32.getMachineOpcode() == PPC::LIS) { | ||||
6660 | if (!isUInt<15>(Op32.getConstantOperandVal(0))) | ||||
6661 | return false; | ||||
6662 | |||||
6663 | ToPromote.insert(Op32.getNode()); | ||||
6664 | return true; | ||||
6665 | } | ||||
6666 | |||||
6667 | // LHBRX and LWBRX always clear the higher-order bits. | ||||
6668 | if (Op32.getMachineOpcode() == PPC::LHBRX || | ||||
6669 | Op32.getMachineOpcode() == PPC::LWBRX) { | ||||
6670 | ToPromote.insert(Op32.getNode()); | ||||
6671 | return true; | ||||
6672 | } | ||||
6673 | |||||
6674 | // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. | ||||
6675 | if (Op32.getMachineOpcode() == PPC::CNTLZW || | ||||
6676 | Op32.getMachineOpcode() == PPC::CNTTZW) { | ||||
6677 | ToPromote.insert(Op32.getNode()); | ||||
6678 | return true; | ||||
6679 | } | ||||
6680 | |||||
6681 | // Next, check for those instructions we can look through. | ||||
6682 | |||||
6683 | // Assuming the mask does not wrap around, then the higher-order bits are | ||||
6684 | // taken directly from the first operand. | ||||
6685 | if (Op32.getMachineOpcode() == PPC::RLWIMI && | ||||
6686 | Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { | ||||
6687 | SmallPtrSet<SDNode *, 16> ToPromote1; | ||||
6688 | if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) | ||||
6689 | return false; | ||||
6690 | |||||
6691 | ToPromote.insert(Op32.getNode()); | ||||
6692 | ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); | ||||
6693 | return true; | ||||
6694 | } | ||||
6695 | |||||
6696 | // For OR, the higher-order bits are zero if that is true for both operands. | ||||
6697 | // For SELECT_I4, the same is true (but the relevant operand numbers are | ||||
6698 | // shifted by 1). | ||||
6699 | if (Op32.getMachineOpcode() == PPC::OR || | ||||
6700 | Op32.getMachineOpcode() == PPC::SELECT_I4) { | ||||
6701 | unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; | ||||
6702 | SmallPtrSet<SDNode *, 16> ToPromote1; | ||||
6703 | if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) | ||||
6704 | return false; | ||||
6705 | if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) | ||||
6706 | return false; | ||||
6707 | |||||
6708 | ToPromote.insert(Op32.getNode()); | ||||
6709 | ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); | ||||
6710 | return true; | ||||
6711 | } | ||||
6712 | |||||
6713 | // For ORI and ORIS, we need the higher-order bits of the first operand to be | ||||
6714 | // zero, and also for the constant to be positive (so that it is not sign | ||||
6715 | // extended). | ||||
6716 | if (Op32.getMachineOpcode() == PPC::ORI || | ||||
6717 | Op32.getMachineOpcode() == PPC::ORIS) { | ||||
6718 | SmallPtrSet<SDNode *, 16> ToPromote1; | ||||
6719 | if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) | ||||
6720 | return false; | ||||
6721 | if (!isUInt<15>(Op32.getConstantOperandVal(1))) | ||||
6722 | return false; | ||||
6723 | |||||
6724 | ToPromote.insert(Op32.getNode()); | ||||
6725 | ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); | ||||
6726 | return true; | ||||
6727 | } | ||||
6728 | |||||
6729 | // The higher-order bits of AND are zero if that is true for at least one of | ||||
6730 | // the operands. | ||||
6731 | if (Op32.getMachineOpcode() == PPC::AND) { | ||||
6732 | SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2; | ||||
6733 | bool Op0OK = | ||||
6734 | PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); | ||||
6735 | bool Op1OK = | ||||
6736 | PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); | ||||
6737 | if (!Op0OK && !Op1OK) | ||||
6738 | return false; | ||||
6739 | |||||
6740 | ToPromote.insert(Op32.getNode()); | ||||
6741 | |||||
6742 | if (Op0OK) | ||||
6743 | ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); | ||||
6744 | |||||
6745 | if (Op1OK) | ||||
6746 | ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); | ||||
6747 | |||||
6748 | return true; | ||||
6749 | } | ||||
6750 | |||||
6751 | // For ANDI and ANDIS, the higher-order bits are zero if either that is true | ||||
6752 | // of the first operand, or if the second operand is positive (so that it is | ||||
6753 | // not sign extended). | ||||
6754 | if (Op32.getMachineOpcode() == PPC::ANDI_rec || | ||||
6755 | Op32.getMachineOpcode() == PPC::ANDIS_rec) { | ||||
6756 | SmallPtrSet<SDNode *, 16> ToPromote1; | ||||
6757 | bool Op0OK = | ||||
6758 | PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); | ||||
6759 | bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); | ||||
6760 | if (!Op0OK && !Op1OK) | ||||
6761 | return false; | ||||
6762 | |||||
6763 | ToPromote.insert(Op32.getNode()); | ||||
6764 | |||||
6765 | if (Op0OK) | ||||
6766 | ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); | ||||
6767 | |||||
6768 | return true; | ||||
6769 | } | ||||
6770 | |||||
6771 | return false; | ||||
6772 | } | ||||
6773 | |||||
6774 | void PPCDAGToDAGISel::PeepholePPC64ZExt() { | ||||
6775 | if (!Subtarget->isPPC64()) | ||||
6776 | return; | ||||
6777 | |||||
6778 | // When we zero-extend from i32 to i64, we use a pattern like this: | ||||
6779 | // def : Pat<(i64 (zext i32:$in)), | ||||
6780 | // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), | ||||
6781 | // 0, 32)>; | ||||
6782 | // There are several 32-bit shift/rotate instructions, however, that will | ||||
6783 | // clear the higher-order bits of their output, rendering the RLDICL | ||||
6784 | // unnecessary. When that happens, we remove it here, and redefine the | ||||
6785 | // relevant 32-bit operation to be a 64-bit operation. | ||||
6786 | |||||
6787 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | ||||
6788 | |||||
6789 | bool MadeChange = false; | ||||
6790 | while (Position != CurDAG->allnodes_begin()) { | ||||
6791 | SDNode *N = &*--Position; | ||||
6792 | // Skip dead nodes and any non-machine opcodes. | ||||
6793 | if (N->use_empty() || !N->isMachineOpcode()) | ||||
6794 | continue; | ||||
6795 | |||||
6796 | if (N->getMachineOpcode() != PPC::RLDICL) | ||||
6797 | continue; | ||||
6798 | |||||
6799 | if (N->getConstantOperandVal(1) != 0 || | ||||
6800 | N->getConstantOperandVal(2) != 32) | ||||
6801 | continue; | ||||
6802 | |||||
6803 | SDValue ISR = N->getOperand(0); | ||||
6804 | if (!ISR.isMachineOpcode() || | ||||
6805 | ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) | ||||
6806 | continue; | ||||
6807 | |||||
6808 | if (!ISR.hasOneUse()) | ||||
6809 | continue; | ||||
6810 | |||||
6811 | if (ISR.getConstantOperandVal(2) != PPC::sub_32) | ||||
6812 | continue; | ||||
6813 | |||||
6814 | SDValue IDef = ISR.getOperand(0); | ||||
6815 | if (!IDef.isMachineOpcode() || | ||||
6816 | IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) | ||||
6817 | continue; | ||||
6818 | |||||
6819 | // We now know that we're looking at a canonical i32 -> i64 zext. See if we | ||||
6820 | // can get rid of it. | ||||
6821 | |||||
6822 | SDValue Op32 = ISR->getOperand(1); | ||||
6823 | if (!Op32.isMachineOpcode()) | ||||
6824 | continue; | ||||
6825 | |||||
6826 | // There are some 32-bit instructions that always clear the high-order 32 | ||||
6827 | // bits, there are also some instructions (like AND) that we can look | ||||
6828 | // through. | ||||
6829 | SmallPtrSet<SDNode *, 16> ToPromote; | ||||
6830 | if (!PeepholePPC64ZExtGather(Op32, ToPromote)) | ||||
6831 | continue; | ||||
6832 | |||||
6833 | // If the ToPromote set contains nodes that have uses outside of the set | ||||
6834 | // (except for the original INSERT_SUBREG), then abort the transformation. | ||||
6835 | bool OutsideUse = false; | ||||
6836 | for (SDNode *PN : ToPromote) { | ||||
6837 | for (SDNode *UN : PN->uses()) { | ||||
6838 | if (!ToPromote.count(UN) && UN != ISR.getNode()) { | ||||
6839 | OutsideUse = true; | ||||
6840 | break; | ||||
6841 | } | ||||
6842 | } | ||||
6843 | |||||
6844 | if (OutsideUse) | ||||
6845 | break; | ||||
6846 | } | ||||
6847 | if (OutsideUse) | ||||
6848 | continue; | ||||
6849 | |||||
6850 | MadeChange = true; | ||||
6851 | |||||
6852 | // We now know that this zero extension can be removed by promoting to | ||||
6853 | // nodes in ToPromote to 64-bit operations, where for operations in the | ||||
6854 | // frontier of the set, we need to insert INSERT_SUBREGs for their | ||||
6855 | // operands. | ||||
6856 | for (SDNode *PN : ToPromote) { | ||||
6857 | unsigned NewOpcode; | ||||
6858 | switch (PN->getMachineOpcode()) { | ||||
6859 | default: | ||||
6860 | llvm_unreachable("Don't know the 64-bit variant of this instruction")::llvm::llvm_unreachable_internal("Don't know the 64-bit variant of this instruction" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 6860); | ||||
6861 | case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; | ||||
6862 | case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; | ||||
6863 | case PPC::SLW: NewOpcode = PPC::SLW8; break; | ||||
6864 | case PPC::SRW: NewOpcode = PPC::SRW8; break; | ||||
6865 | case PPC::LI: NewOpcode = PPC::LI8; break; | ||||
6866 | case PPC::LIS: NewOpcode = PPC::LIS8; break; | ||||
6867 | case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; | ||||
6868 | case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; | ||||
6869 | case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; | ||||
6870 | case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; | ||||
6871 | case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; | ||||
6872 | case PPC::OR: NewOpcode = PPC::OR8; break; | ||||
6873 | case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; | ||||
6874 | case PPC::ORI: NewOpcode = PPC::ORI8; break; | ||||
6875 | case PPC::ORIS: NewOpcode = PPC::ORIS8; break; | ||||
6876 | case PPC::AND: NewOpcode = PPC::AND8; break; | ||||
6877 | case PPC::ANDI_rec: | ||||
6878 | NewOpcode = PPC::ANDI8_rec; | ||||
6879 | break; | ||||
6880 | case PPC::ANDIS_rec: | ||||
6881 | NewOpcode = PPC::ANDIS8_rec; | ||||
6882 | break; | ||||
6883 | } | ||||
6884 | |||||
6885 | // Note: During the replacement process, the nodes will be in an | ||||
6886 | // inconsistent state (some instructions will have operands with values | ||||
6887 | // of the wrong type). Once done, however, everything should be right | ||||
6888 | // again. | ||||
6889 | |||||
6890 | SmallVector<SDValue, 4> Ops; | ||||
6891 | for (const SDValue &V : PN->ops()) { | ||||
6892 | if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && | ||||
6893 | !isa<ConstantSDNode>(V)) { | ||||
6894 | SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; | ||||
6895 | SDNode *ReplOp = | ||||
6896 | CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), | ||||
6897 | ISR.getNode()->getVTList(), ReplOpOps); | ||||
6898 | Ops.push_back(SDValue(ReplOp, 0)); | ||||
6899 | } else { | ||||
6900 | Ops.push_back(V); | ||||
6901 | } | ||||
6902 | } | ||||
6903 | |||||
6904 | // Because all to-be-promoted nodes only have users that are other | ||||
6905 | // promoted nodes (or the original INSERT_SUBREG), we can safely replace | ||||
6906 | // the i32 result value type with i64. | ||||
6907 | |||||
6908 | SmallVector<EVT, 2> NewVTs; | ||||
6909 | SDVTList VTs = PN->getVTList(); | ||||
6910 | for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) | ||||
6911 | if (VTs.VTs[i] == MVT::i32) | ||||
6912 | NewVTs.push_back(MVT::i64); | ||||
6913 | else | ||||
6914 | NewVTs.push_back(VTs.VTs[i]); | ||||
6915 | |||||
6916 | LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "PPC64 ZExt Peephole morphing:\nOld: " ; } } while (false); | ||||
6917 | LLVM_DEBUG(PN->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { PN->dump(CurDAG); } } while (false); | ||||
6918 | |||||
6919 | CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); | ||||
6920 | |||||
6921 | LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false ); | ||||
6922 | LLVM_DEBUG(PN->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { PN->dump(CurDAG); } } while (false); | ||||
6923 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
6924 | } | ||||
6925 | |||||
6926 | // Now we replace the original zero extend and its associated INSERT_SUBREG | ||||
6927 | // with the value feeding the INSERT_SUBREG (which has now been promoted to | ||||
6928 | // return an i64). | ||||
6929 | |||||
6930 | LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "PPC64 ZExt Peephole replacing:\nOld: " ; } } while (false); | ||||
6931 | LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { N->dump(CurDAG); } } while (false); | ||||
6932 | LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false ); | ||||
6933 | LLVM_DEBUG(Op32.getNode()->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { Op32.getNode()->dump(CurDAG); } } while (false); | ||||
6934 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
6935 | |||||
6936 | ReplaceUses(N, Op32.getNode()); | ||||
6937 | } | ||||
6938 | |||||
6939 | if (MadeChange) | ||||
6940 | CurDAG->RemoveDeadNodes(); | ||||
6941 | } | ||||
6942 | |||||
6943 | static bool isVSXSwap(SDValue N) { | ||||
6944 | if (!N->isMachineOpcode()) | ||||
6945 | return false; | ||||
6946 | unsigned Opc = N->getMachineOpcode(); | ||||
6947 | |||||
6948 | // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate | ||||
6949 | // operand is 2. | ||||
6950 | if (Opc == PPC::XXPERMDIs) { | ||||
6951 | return isa<ConstantSDNode>(N->getOperand(1)) && | ||||
6952 | N->getConstantOperandVal(1) == 2; | ||||
6953 | } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) { | ||||
6954 | return N->getOperand(0) == N->getOperand(1) && | ||||
6955 | isa<ConstantSDNode>(N->getOperand(2)) && | ||||
6956 | N->getConstantOperandVal(2) == 2; | ||||
6957 | } | ||||
6958 | |||||
6959 | return false; | ||||
6960 | } | ||||
6961 | |||||
6962 | // TODO: Make this complete and replace with a table-gen bit. | ||||
6963 | static bool isLaneInsensitive(SDValue N) { | ||||
6964 | if (!N->isMachineOpcode()) | ||||
6965 | return false; | ||||
6966 | unsigned Opc = N->getMachineOpcode(); | ||||
6967 | |||||
6968 | switch (Opc) { | ||||
6969 | default: | ||||
6970 | return false; | ||||
6971 | case PPC::VAVGSB: | ||||
6972 | case PPC::VAVGUB: | ||||
6973 | case PPC::VAVGSH: | ||||
6974 | case PPC::VAVGUH: | ||||
6975 | case PPC::VAVGSW: | ||||
6976 | case PPC::VAVGUW: | ||||
6977 | case PPC::VMAXFP: | ||||
6978 | case PPC::VMAXSB: | ||||
6979 | case PPC::VMAXUB: | ||||
6980 | case PPC::VMAXSH: | ||||
6981 | case PPC::VMAXUH: | ||||
6982 | case PPC::VMAXSW: | ||||
6983 | case PPC::VMAXUW: | ||||
6984 | case PPC::VMINFP: | ||||
6985 | case PPC::VMINSB: | ||||
6986 | case PPC::VMINUB: | ||||
6987 | case PPC::VMINSH: | ||||
6988 | case PPC::VMINUH: | ||||
6989 | case PPC::VMINSW: | ||||
6990 | case PPC::VMINUW: | ||||
6991 | case PPC::VADDFP: | ||||
6992 | case PPC::VADDUBM: | ||||
6993 | case PPC::VADDUHM: | ||||
6994 | case PPC::VADDUWM: | ||||
6995 | case PPC::VSUBFP: | ||||
6996 | case PPC::VSUBUBM: | ||||
6997 | case PPC::VSUBUHM: | ||||
6998 | case PPC::VSUBUWM: | ||||
6999 | case PPC::VAND: | ||||
7000 | case PPC::VANDC: | ||||
7001 | case PPC::VOR: | ||||
7002 | case PPC::VORC: | ||||
7003 | case PPC::VXOR: | ||||
7004 | case PPC::VNOR: | ||||
7005 | case PPC::VMULUWM: | ||||
7006 | return true; | ||||
7007 | } | ||||
7008 | } | ||||
7009 | |||||
7010 | // Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is | ||||
7011 | // lane-insensitive. | ||||
7012 | static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) { | ||||
7013 | // Our desired xxswap might be source of COPY_TO_REGCLASS. | ||||
7014 | // TODO: Can we put this a common method for DAG? | ||||
7015 | auto SkipRCCopy = [](SDValue V) { | ||||
7016 | while (V->isMachineOpcode() && | ||||
7017 | V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) { | ||||
7018 | // All values in the chain should have single use. | ||||
7019 | if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode())) | ||||
7020 | return SDValue(); | ||||
7021 | V = V->getOperand(0); | ||||
7022 | } | ||||
7023 | return V.hasOneUse() ? V : SDValue(); | ||||
7024 | }; | ||||
7025 | |||||
7026 | SDValue VecOp = SkipRCCopy(N->getOperand(0)); | ||||
7027 | if (!VecOp || !isLaneInsensitive(VecOp)) | ||||
7028 | return; | ||||
7029 | |||||
7030 | SDValue LHS = SkipRCCopy(VecOp.getOperand(0)), | ||||
7031 | RHS = SkipRCCopy(VecOp.getOperand(1)); | ||||
7032 | if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS)) | ||||
7033 | return; | ||||
7034 | |||||
7035 | // These swaps may still have chain-uses here, count on dead code elimination | ||||
7036 | // in following passes to remove them. | ||||
7037 | DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0)); | ||||
7038 | DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0)); | ||||
7039 | DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0)); | ||||
7040 | } | ||||
7041 | |||||
7042 | void PPCDAGToDAGISel::PeepholePPC64() { | ||||
7043 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | ||||
7044 | |||||
7045 | while (Position != CurDAG->allnodes_begin()) { | ||||
7046 | SDNode *N = &*--Position; | ||||
7047 | // Skip dead nodes and any non-machine opcodes. | ||||
7048 | if (N->use_empty() || !N->isMachineOpcode()) | ||||
7049 | continue; | ||||
7050 | |||||
7051 | if (isVSXSwap(SDValue(N, 0))) | ||||
7052 | reduceVSXSwap(N, CurDAG); | ||||
7053 | |||||
7054 | unsigned FirstOp; | ||||
7055 | unsigned StorageOpcode = N->getMachineOpcode(); | ||||
7056 | bool RequiresMod4Offset = false; | ||||
7057 | |||||
7058 | switch (StorageOpcode) { | ||||
7059 | default: continue; | ||||
7060 | |||||
7061 | case PPC::LWA: | ||||
7062 | case PPC::LD: | ||||
7063 | case PPC::DFLOADf64: | ||||
7064 | case PPC::DFLOADf32: | ||||
7065 | RequiresMod4Offset = true; | ||||
7066 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
7067 | case PPC::LBZ: | ||||
7068 | case PPC::LBZ8: | ||||
7069 | case PPC::LFD: | ||||
7070 | case PPC::LFS: | ||||
7071 | case PPC::LHA: | ||||
7072 | case PPC::LHA8: | ||||
7073 | case PPC::LHZ: | ||||
7074 | case PPC::LHZ8: | ||||
7075 | case PPC::LWZ: | ||||
7076 | case PPC::LWZ8: | ||||
7077 | FirstOp = 0; | ||||
7078 | break; | ||||
7079 | |||||
7080 | case PPC::STD: | ||||
7081 | case PPC::DFSTOREf64: | ||||
7082 | case PPC::DFSTOREf32: | ||||
7083 | RequiresMod4Offset = true; | ||||
7084 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
7085 | case PPC::STB: | ||||
7086 | case PPC::STB8: | ||||
7087 | case PPC::STFD: | ||||
7088 | case PPC::STFS: | ||||
7089 | case PPC::STH: | ||||
7090 | case PPC::STH8: | ||||
7091 | case PPC::STW: | ||||
7092 | case PPC::STW8: | ||||
7093 | FirstOp = 1; | ||||
7094 | break; | ||||
7095 | } | ||||
7096 | |||||
7097 | // If this is a load or store with a zero offset, or within the alignment, | ||||
7098 | // we may be able to fold an add-immediate into the memory operation. | ||||
7099 | // The check against alignment is below, as it can't occur until we check | ||||
7100 | // the arguments to N | ||||
7101 | if (!isa<ConstantSDNode>(N->getOperand(FirstOp))) | ||||
7102 | continue; | ||||
7103 | |||||
7104 | SDValue Base = N->getOperand(FirstOp + 1); | ||||
7105 | if (!Base.isMachineOpcode()) | ||||
7106 | continue; | ||||
7107 | |||||
7108 | unsigned Flags = 0; | ||||
7109 | bool ReplaceFlags = true; | ||||
7110 | |||||
7111 | // When the feeding operation is an add-immediate of some sort, | ||||
7112 | // determine whether we need to add relocation information to the | ||||
7113 | // target flags on the immediate operand when we fold it into the | ||||
7114 | // load instruction. | ||||
7115 | // | ||||
7116 | // For something like ADDItocL, the relocation information is | ||||
7117 | // inferred from the opcode; when we process it in the AsmPrinter, | ||||
7118 | // we add the necessary relocation there. A load, though, can receive | ||||
7119 | // relocation from various flavors of ADDIxxx, so we need to carry | ||||
7120 | // the relocation information in the target flags. | ||||
7121 | switch (Base.getMachineOpcode()) { | ||||
7122 | default: continue; | ||||
7123 | |||||
7124 | case PPC::ADDI8: | ||||
7125 | case PPC::ADDI: | ||||
7126 | // In some cases (such as TLS) the relocation information | ||||
7127 | // is already in place on the operand, so copying the operand | ||||
7128 | // is sufficient. | ||||
7129 | ReplaceFlags = false; | ||||
7130 | // For these cases, the immediate may not be divisible by 4, in | ||||
7131 | // which case the fold is illegal for DS-form instructions. (The | ||||
7132 | // other cases provide aligned addresses and are always safe.) | ||||
7133 | if (RequiresMod4Offset && | ||||
7134 | (!isa<ConstantSDNode>(Base.getOperand(1)) || | ||||
7135 | Base.getConstantOperandVal(1) % 4 != 0)) | ||||
7136 | continue; | ||||
7137 | break; | ||||
7138 | case PPC::ADDIdtprelL: | ||||
7139 | Flags = PPCII::MO_DTPREL_LO; | ||||
7140 | break; | ||||
7141 | case PPC::ADDItlsldL: | ||||
7142 | Flags = PPCII::MO_TLSLD_LO; | ||||
7143 | break; | ||||
7144 | case PPC::ADDItocL: | ||||
7145 | Flags = PPCII::MO_TOC_LO; | ||||
7146 | break; | ||||
7147 | } | ||||
7148 | |||||
7149 | SDValue ImmOpnd = Base.getOperand(1); | ||||
7150 | |||||
7151 | // On PPC64, the TOC base pointer is guaranteed by the ABI only to have | ||||
7152 | // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, | ||||
7153 | // we might have needed different @ha relocation values for the offset | ||||
7154 | // pointers). | ||||
7155 | int MaxDisplacement = 7; | ||||
7156 | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { | ||||
7157 | const GlobalValue *GV = GA->getGlobal(); | ||||
7158 | Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); | ||||
7159 | MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement); | ||||
7160 | } | ||||
7161 | |||||
7162 | bool UpdateHBase = false; | ||||
7163 | SDValue HBase = Base.getOperand(0); | ||||
7164 | |||||
7165 | int Offset = N->getConstantOperandVal(FirstOp); | ||||
7166 | if (ReplaceFlags) { | ||||
7167 | if (Offset < 0 || Offset > MaxDisplacement) { | ||||
7168 | // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only | ||||
7169 | // one use, then we can do this for any offset, we just need to also | ||||
7170 | // update the offset (i.e. the symbol addend) on the addis also. | ||||
7171 | if (Base.getMachineOpcode() != PPC::ADDItocL) | ||||
7172 | continue; | ||||
7173 | |||||
7174 | if (!HBase.isMachineOpcode() || | ||||
7175 | HBase.getMachineOpcode() != PPC::ADDIStocHA8) | ||||
7176 | continue; | ||||
7177 | |||||
7178 | if (!Base.hasOneUse() || !HBase.hasOneUse()) | ||||
7179 | continue; | ||||
7180 | |||||
7181 | SDValue HImmOpnd = HBase.getOperand(1); | ||||
7182 | if (HImmOpnd != ImmOpnd) | ||||
7183 | continue; | ||||
7184 | |||||
7185 | UpdateHBase = true; | ||||
7186 | } | ||||
7187 | } else { | ||||
7188 | // If we're directly folding the addend from an addi instruction, then: | ||||
7189 | // 1. In general, the offset on the memory access must be zero. | ||||
7190 | // 2. If the addend is a constant, then it can be combined with a | ||||
7191 | // non-zero offset, but only if the result meets the encoding | ||||
7192 | // requirements. | ||||
7193 | if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) { | ||||
7194 | Offset += C->getSExtValue(); | ||||
7195 | |||||
7196 | if (RequiresMod4Offset && (Offset % 4) != 0) | ||||
7197 | continue; | ||||
7198 | |||||
7199 | if (!isInt<16>(Offset)) | ||||
7200 | continue; | ||||
7201 | |||||
7202 | ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), | ||||
7203 | ImmOpnd.getValueType()); | ||||
7204 | } else if (Offset != 0) { | ||||
7205 | continue; | ||||
7206 | } | ||||
7207 | } | ||||
7208 | |||||
7209 | // We found an opportunity. Reverse the operands from the add | ||||
7210 | // immediate and substitute them into the load or store. If | ||||
7211 | // needed, update the target flags for the immediate operand to | ||||
7212 | // reflect the necessary relocation information. | ||||
7213 | LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Folding add-immediate into mem-op:\nBase: " ; } } while (false); | ||||
7214 | LLVM_DEBUG(Base->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { Base->dump(CurDAG); } } while (false); | ||||
7215 | LLVM_DEBUG(dbgs() << "\nN: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nN: "; } } while (false); | ||||
7216 | LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { N->dump(CurDAG); } } while (false); | ||||
7217 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
7218 | |||||
7219 | // If the relocation information isn't already present on the | ||||
7220 | // immediate operand, add it now. | ||||
7221 | if (ReplaceFlags) { | ||||
7222 | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { | ||||
7223 | SDLoc dl(GA); | ||||
7224 | const GlobalValue *GV = GA->getGlobal(); | ||||
7225 | Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); | ||||
7226 | // We can't perform this optimization for data whose alignment | ||||
7227 | // is insufficient for the instruction encoding. | ||||
7228 | if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) { | ||||
7229 | LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Rejected this candidate for alignment.\n\n" ; } } while (false); | ||||
7230 | continue; | ||||
7231 | } | ||||
7232 | ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); | ||||
7233 | } else if (ConstantPoolSDNode *CP = | ||||
7234 | dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { | ||||
7235 | const Constant *C = CP->getConstVal(); | ||||
7236 | ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), | ||||
7237 | Offset, Flags); | ||||
7238 | } | ||||
7239 | } | ||||
7240 | |||||
7241 | if (FirstOp == 1) // Store | ||||
7242 | (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, | ||||
7243 | Base.getOperand(0), N->getOperand(3)); | ||||
7244 | else // Load | ||||
7245 | (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), | ||||
7246 | N->getOperand(2)); | ||||
7247 | |||||
7248 | if (UpdateHBase) | ||||
7249 | (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), | ||||
7250 | ImmOpnd); | ||||
7251 | |||||
7252 | // The add-immediate may now be dead, in which case remove it. | ||||
7253 | if (Base.getNode()->use_empty()) | ||||
7254 | CurDAG->RemoveDeadNode(Base.getNode()); | ||||
7255 | } | ||||
7256 | } | ||||
7257 | |||||
7258 | /// createPPCISelDag - This pass converts a legalized DAG into a | ||||
7259 | /// PowerPC-specific DAG, ready for instruction scheduling. | ||||
7260 | /// | ||||
7261 | FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, | ||||
7262 | CodeGenOpt::Level OptLevel) { | ||||
7263 | return new PPCDAGToDAGISel(TM, OptLevel); | ||||
7264 | } |
1 | //===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the isa<X>(), cast<X>(), dyn_cast<X>(), cast_or_null<X>(), |
10 | // and dyn_cast_or_null<X>() templates. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_SUPPORT_CASTING_H |
15 | #define LLVM_SUPPORT_CASTING_H |
16 | |
17 | #include "llvm/Support/Compiler.h" |
18 | #include "llvm/Support/type_traits.h" |
19 | #include <cassert> |
20 | #include <memory> |
21 | #include <type_traits> |
22 | |
23 | namespace llvm { |
24 | |
25 | //===----------------------------------------------------------------------===// |
26 | // isa<x> Support Templates |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | // Define a template that can be specialized by smart pointers to reflect the |
30 | // fact that they are automatically dereferenced, and are not involved with the |
31 | // template selection process... the default implementation is a noop. |
32 | // |
33 | template<typename From> struct simplify_type { |
34 | using SimpleType = From; // The real type this represents... |
35 | |
36 | // An accessor to get the real value... |
37 | static SimpleType &getSimplifiedValue(From &Val) { return Val; } |
38 | }; |
39 | |
40 | template<typename From> struct simplify_type<const From> { |
41 | using NonConstSimpleType = typename simplify_type<From>::SimpleType; |
42 | using SimpleType = |
43 | typename add_const_past_pointer<NonConstSimpleType>::type; |
44 | using RetType = |
45 | typename add_lvalue_reference_if_not_pointer<SimpleType>::type; |
46 | |
47 | static RetType getSimplifiedValue(const From& Val) { |
48 | return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val)); |
49 | } |
50 | }; |
51 | |
52 | // The core of the implementation of isa<X> is here; To and From should be |
53 | // the names of classes. This template can be specialized to customize the |
54 | // implementation of isa<> without rewriting it from scratch. |
55 | template <typename To, typename From, typename Enabler = void> |
56 | struct isa_impl { |
57 | static inline bool doit(const From &Val) { |
58 | return To::classof(&Val); |
59 | } |
60 | }; |
61 | |
62 | /// Always allow upcasts, and perform no dynamic check for them. |
63 | template <typename To, typename From> |
64 | struct isa_impl<To, From, std::enable_if_t<std::is_base_of<To, From>::value>> { |
65 | static inline bool doit(const From &) { return true; } |
66 | }; |
67 | |
68 | template <typename To, typename From> struct isa_impl_cl { |
69 | static inline bool doit(const From &Val) { |
70 | return isa_impl<To, From>::doit(Val); |
71 | } |
72 | }; |
73 | |
74 | template <typename To, typename From> struct isa_impl_cl<To, const From> { |
75 | static inline bool doit(const From &Val) { |
76 | return isa_impl<To, From>::doit(Val); |
77 | } |
78 | }; |
79 | |
80 | template <typename To, typename From> |
81 | struct isa_impl_cl<To, const std::unique_ptr<From>> { |
82 | static inline bool doit(const std::unique_ptr<From> &Val) { |
83 | assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer" ) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 83, __extension__ __PRETTY_FUNCTION__)); |
84 | return isa_impl_cl<To, From>::doit(*Val); |
85 | } |
86 | }; |
87 | |
88 | template <typename To, typename From> struct isa_impl_cl<To, From*> { |
89 | static inline bool doit(const From *Val) { |
90 | assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer" ) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 90, __extension__ __PRETTY_FUNCTION__)); |
91 | return isa_impl<To, From>::doit(*Val); |
92 | } |
93 | }; |
94 | |
95 | template <typename To, typename From> struct isa_impl_cl<To, From*const> { |
96 | static inline bool doit(const From *Val) { |
97 | assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer" ) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 97, __extension__ __PRETTY_FUNCTION__)); |
98 | return isa_impl<To, From>::doit(*Val); |
99 | } |
100 | }; |
101 | |
102 | template <typename To, typename From> struct isa_impl_cl<To, const From*> { |
103 | static inline bool doit(const From *Val) { |
104 | assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer" ) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 104, __extension__ __PRETTY_FUNCTION__)); |
105 | return isa_impl<To, From>::doit(*Val); |
106 | } |
107 | }; |
108 | |
109 | template <typename To, typename From> struct isa_impl_cl<To, const From*const> { |
110 | static inline bool doit(const From *Val) { |
111 | assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer" ) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 111, __extension__ __PRETTY_FUNCTION__)); |
112 | return isa_impl<To, From>::doit(*Val); |
113 | } |
114 | }; |
115 | |
116 | template<typename To, typename From, typename SimpleFrom> |
117 | struct isa_impl_wrap { |
118 | // When From != SimplifiedType, we can simplify the type some more by using |
119 | // the simplify_type template. |
120 | static bool doit(const From &Val) { |
121 | return isa_impl_wrap<To, SimpleFrom, |
122 | typename simplify_type<SimpleFrom>::SimpleType>::doit( |
123 | simplify_type<const From>::getSimplifiedValue(Val)); |
124 | } |
125 | }; |
126 | |
127 | template<typename To, typename FromTy> |
128 | struct isa_impl_wrap<To, FromTy, FromTy> { |
129 | // When From == SimpleType, we are as simple as we are going to get. |
130 | static bool doit(const FromTy &Val) { |
131 | return isa_impl_cl<To,FromTy>::doit(Val); |
132 | } |
133 | }; |
134 | |
135 | // isa<X> - Return true if the parameter to the template is an instance of one |
136 | // of the template type arguments. Used like this: |
137 | // |
138 | // if (isa<Type>(myVal)) { ... } |
139 | // if (isa<Type0, Type1, Type2>(myVal)) { ... } |
140 | // |
141 | template <class X, class Y> LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) { |
142 | return isa_impl_wrap<X, const Y, |
143 | typename simplify_type<const Y>::SimpleType>::doit(Val); |
144 | } |
145 | |
146 | template <typename First, typename Second, typename... Rest, typename Y> |
147 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) { |
148 | return isa<First>(Val) || isa<Second, Rest...>(Val); |
149 | } |
150 | |
151 | // isa_and_nonnull<X> - Functionally identical to isa, except that a null value |
152 | // is accepted. |
153 | // |
154 | template <typename... X, class Y> |
155 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa_and_nonnull(const Y &Val) { |
156 | if (!Val) |
157 | return false; |
158 | return isa<X...>(Val); |
159 | } |
160 | |
161 | //===----------------------------------------------------------------------===// |
162 | // cast<x> Support Templates |
163 | //===----------------------------------------------------------------------===// |
164 | |
165 | template<class To, class From> struct cast_retty; |
166 | |
167 | // Calculate what type the 'cast' function should return, based on a requested |
168 | // type of To and a source type of From. |
169 | template<class To, class From> struct cast_retty_impl { |
170 | using ret_type = To &; // Normal case, return Ty& |
171 | }; |
172 | template<class To, class From> struct cast_retty_impl<To, const From> { |
173 | using ret_type = const To &; // Normal case, return Ty& |
174 | }; |
175 | |
176 | template<class To, class From> struct cast_retty_impl<To, From*> { |
177 | using ret_type = To *; // Pointer arg case, return Ty* |
178 | }; |
179 | |
180 | template<class To, class From> struct cast_retty_impl<To, const From*> { |
181 | using ret_type = const To *; // Constant pointer arg case, return const Ty* |
182 | }; |
183 | |
184 | template<class To, class From> struct cast_retty_impl<To, const From*const> { |
185 | using ret_type = const To *; // Constant pointer arg case, return const Ty* |
186 | }; |
187 | |
188 | template <class To, class From> |
189 | struct cast_retty_impl<To, std::unique_ptr<From>> { |
190 | private: |
191 | using PointerType = typename cast_retty_impl<To, From *>::ret_type; |
192 | using ResultType = std::remove_pointer_t<PointerType>; |
193 | |
194 | public: |
195 | using ret_type = std::unique_ptr<ResultType>; |
196 | }; |
197 | |
198 | template<class To, class From, class SimpleFrom> |
199 | struct cast_retty_wrap { |
200 | // When the simplified type and the from type are not the same, use the type |
201 | // simplifier to reduce the type, then reuse cast_retty_impl to get the |
202 | // resultant type. |
203 | using ret_type = typename cast_retty<To, SimpleFrom>::ret_type; |
204 | }; |
205 | |
206 | template<class To, class FromTy> |
207 | struct cast_retty_wrap<To, FromTy, FromTy> { |
208 | // When the simplified type is equal to the from type, use it directly. |
209 | using ret_type = typename cast_retty_impl<To,FromTy>::ret_type; |
210 | }; |
211 | |
212 | template<class To, class From> |
213 | struct cast_retty { |
214 | using ret_type = typename cast_retty_wrap< |
215 | To, From, typename simplify_type<From>::SimpleType>::ret_type; |
216 | }; |
217 | |
218 | // Ensure the non-simple values are converted using the simplify_type template |
219 | // that may be specialized by smart pointers... |
220 | // |
221 | template<class To, class From, class SimpleFrom> struct cast_convert_val { |
222 | // This is not a simple type, use the template to simplify it... |
223 | static typename cast_retty<To, From>::ret_type doit(From &Val) { |
224 | return cast_convert_val<To, SimpleFrom, |
225 | typename simplify_type<SimpleFrom>::SimpleType>::doit( |
226 | simplify_type<From>::getSimplifiedValue(Val)); |
227 | } |
228 | }; |
229 | |
230 | template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> { |
231 | // This _is_ a simple type, just cast it. |
232 | static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) { |
233 | typename cast_retty<To, FromTy>::ret_type Res2 |
234 | = (typename cast_retty<To, FromTy>::ret_type)const_cast<FromTy&>(Val); |
235 | return Res2; |
236 | } |
237 | }; |
238 | |
239 | template <class X> struct is_simple_type { |
240 | static const bool value = |
241 | std::is_same<X, typename simplify_type<X>::SimpleType>::value; |
242 | }; |
243 | |
244 | // cast<X> - Return the argument parameter cast to the specified type. This |
245 | // casting operator asserts that the type is correct, so it does not return null |
246 | // on failure. It does not allow a null argument (use cast_or_null for that). |
247 | // It is typically used like this: |
248 | // |
249 | // cast<Instruction>(myVal)->getParent() |
250 | // |
251 | template <class X, class Y> |
252 | inline std::enable_if_t<!is_simple_type<Y>::value, |
253 | typename cast_retty<X, const Y>::ret_type> |
254 | cast(const Y &Val) { |
255 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 255, __extension__ __PRETTY_FUNCTION__)); |
256 | return cast_convert_val< |
257 | X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val); |
258 | } |
259 | |
260 | template <class X, class Y> |
261 | inline typename cast_retty<X, Y>::ret_type cast(Y &Val) { |
262 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 262, __extension__ __PRETTY_FUNCTION__)); |
263 | return cast_convert_val<X, Y, |
264 | typename simplify_type<Y>::SimpleType>::doit(Val); |
265 | } |
266 | |
267 | template <class X, class Y> |
268 | inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) { |
269 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 269, __extension__ __PRETTY_FUNCTION__)); |
270 | return cast_convert_val<X, Y*, |
271 | typename simplify_type<Y*>::SimpleType>::doit(Val); |
272 | } |
273 | |
274 | template <class X, class Y> |
275 | inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type |
276 | cast(std::unique_ptr<Y> &&Val) { |
277 | assert(isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!") ? void (0 ) : __assert_fail ("isa<X>(Val.get()) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 277, __extension__ __PRETTY_FUNCTION__)); |
278 | using ret_type = typename cast_retty<X, std::unique_ptr<Y>>::ret_type; |
279 | return ret_type( |
280 | cast_convert_val<X, Y *, typename simplify_type<Y *>::SimpleType>::doit( |
281 | Val.release())); |
282 | } |
283 | |
284 | // cast_or_null<X> - Functionally identical to cast, except that a null value is |
285 | // accepted. |
286 | // |
287 | template <class X, class Y> |
288 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t< |
289 | !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type> |
290 | cast_or_null(const Y &Val) { |
291 | if (!Val) |
292 | return nullptr; |
293 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 293, __extension__ __PRETTY_FUNCTION__)); |
294 | return cast<X>(Val); |
295 | } |
296 | |
297 | template <class X, class Y> |
298 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<!is_simple_type<Y>::value, |
299 | typename cast_retty<X, Y>::ret_type> |
300 | cast_or_null(Y &Val) { |
301 | if (!Val) |
302 | return nullptr; |
303 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 303, __extension__ __PRETTY_FUNCTION__)); |
304 | return cast<X>(Val); |
305 | } |
306 | |
307 | template <class X, class Y> |
308 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type |
309 | cast_or_null(Y *Val) { |
310 | if (!Val) return nullptr; |
311 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-13~++20210711100610+f0393deb3367/llvm/include/llvm/Support/Casting.h" , 311, __extension__ __PRETTY_FUNCTION__)); |
312 | return cast<X>(Val); |
313 | } |
314 | |
315 | template <class X, class Y> |
316 | inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type |
317 | cast_or_null(std::unique_ptr<Y> &&Val) { |
318 | if (!Val) |
319 | return nullptr; |
320 | return cast<X>(std::move(Val)); |
321 | } |
322 | |
323 | // dyn_cast<X> - Return the argument parameter cast to the specified type. This |
324 | // casting operator returns null if the argument is of the wrong type, so it can |
325 | // be used to test for a type as well as cast if successful. This should be |
326 | // used in the context of an if statement like this: |
327 | // |
328 | // if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... } |
329 | // |
330 | |
331 | template <class X, class Y> |
332 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t< |
333 | !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type> |
334 | dyn_cast(const Y &Val) { |
335 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
336 | } |
337 | |
338 | template <class X, class Y> |
339 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) { |
340 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
341 | } |
342 | |
343 | template <class X, class Y> |
344 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type dyn_cast(Y *Val) { |
345 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
346 | } |
347 | |
348 | // dyn_cast_or_null<X> - Functionally identical to dyn_cast, except that a null |
349 | // value is accepted. |
350 | // |
351 | template <class X, class Y> |
352 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t< |
353 | !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type> |
354 | dyn_cast_or_null(const Y &Val) { |
355 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
356 | } |
357 | |
358 | template <class X, class Y> |
359 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<!is_simple_type<Y>::value, |
360 | typename cast_retty<X, Y>::ret_type> |
361 | dyn_cast_or_null(Y &Val) { |
362 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
363 | } |
364 | |
365 | template <class X, class Y> |
366 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type |
367 | dyn_cast_or_null(Y *Val) { |
368 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
369 | } |
370 | |
371 | // unique_dyn_cast<X> - Given a unique_ptr<Y>, try to return a unique_ptr<X>, |
372 | // taking ownership of the input pointer iff isa<X>(Val) is true. If the |
373 | // cast is successful, From refers to nullptr on exit and the casted value |
374 | // is returned. If the cast is unsuccessful, the function returns nullptr |
375 | // and From is unchanged. |
376 | template <class X, class Y> |
377 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &Val) |
378 | -> decltype(cast<X>(Val)) { |
379 | if (!isa<X>(Val)) |
380 | return nullptr; |
381 | return cast<X>(std::move(Val)); |
382 | } |
383 | |
384 | template <class X, class Y> |
385 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &&Val) { |
386 | return unique_dyn_cast<X, Y>(Val); |
387 | } |
388 | |
389 | // dyn_cast_or_null<X> - Functionally identical to unique_dyn_cast, except that |
390 | // a null value is accepted. |
391 | template <class X, class Y> |
392 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &Val) |
393 | -> decltype(cast<X>(Val)) { |
394 | if (!Val) |
395 | return nullptr; |
396 | return unique_dyn_cast<X, Y>(Val); |
397 | } |
398 | |
399 | template <class X, class Y> |
400 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &&Val) { |
401 | return unique_dyn_cast_or_null<X, Y>(Val); |
402 | } |
403 | |
404 | } // end namespace llvm |
405 | |
406 | #endif // LLVM_SUPPORT_CASTING_H |