File: | llvm/include/llvm/CodeGen/SelectionDAGNodes.h |
Warning: | line 1110, column 10 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This file defines a pattern matching instruction selector for PowerPC, | ||||
10 | // converting from a legalized dag to a PPC dag. | ||||
11 | // | ||||
12 | //===----------------------------------------------------------------------===// | ||||
13 | |||||
14 | #include "MCTargetDesc/PPCMCTargetDesc.h" | ||||
15 | #include "MCTargetDesc/PPCPredicates.h" | ||||
16 | #include "PPC.h" | ||||
17 | #include "PPCISelLowering.h" | ||||
18 | #include "PPCMachineFunctionInfo.h" | ||||
19 | #include "PPCSubtarget.h" | ||||
20 | #include "PPCTargetMachine.h" | ||||
21 | #include "llvm/ADT/APInt.h" | ||||
22 | #include "llvm/ADT/DenseMap.h" | ||||
23 | #include "llvm/ADT/STLExtras.h" | ||||
24 | #include "llvm/ADT/SmallPtrSet.h" | ||||
25 | #include "llvm/ADT/SmallVector.h" | ||||
26 | #include "llvm/ADT/Statistic.h" | ||||
27 | #include "llvm/Analysis/BranchProbabilityInfo.h" | ||||
28 | #include "llvm/CodeGen/FunctionLoweringInfo.h" | ||||
29 | #include "llvm/CodeGen/ISDOpcodes.h" | ||||
30 | #include "llvm/CodeGen/MachineBasicBlock.h" | ||||
31 | #include "llvm/CodeGen/MachineFunction.h" | ||||
32 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||||
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||
34 | #include "llvm/CodeGen/SelectionDAG.h" | ||||
35 | #include "llvm/CodeGen/SelectionDAGISel.h" | ||||
36 | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||||
37 | #include "llvm/CodeGen/TargetInstrInfo.h" | ||||
38 | #include "llvm/CodeGen/TargetRegisterInfo.h" | ||||
39 | #include "llvm/CodeGen/ValueTypes.h" | ||||
40 | #include "llvm/IR/BasicBlock.h" | ||||
41 | #include "llvm/IR/DebugLoc.h" | ||||
42 | #include "llvm/IR/Function.h" | ||||
43 | #include "llvm/IR/GlobalValue.h" | ||||
44 | #include "llvm/IR/InlineAsm.h" | ||||
45 | #include "llvm/IR/InstrTypes.h" | ||||
46 | #include "llvm/IR/IntrinsicsPowerPC.h" | ||||
47 | #include "llvm/IR/Module.h" | ||||
48 | #include "llvm/Support/Casting.h" | ||||
49 | #include "llvm/Support/CodeGen.h" | ||||
50 | #include "llvm/Support/CommandLine.h" | ||||
51 | #include "llvm/Support/Compiler.h" | ||||
52 | #include "llvm/Support/Debug.h" | ||||
53 | #include "llvm/Support/ErrorHandling.h" | ||||
54 | #include "llvm/Support/KnownBits.h" | ||||
55 | #include "llvm/Support/MachineValueType.h" | ||||
56 | #include "llvm/Support/MathExtras.h" | ||||
57 | #include "llvm/Support/raw_ostream.h" | ||||
58 | #include <algorithm> | ||||
59 | #include <cassert> | ||||
60 | #include <cstdint> | ||||
61 | #include <iterator> | ||||
62 | #include <limits> | ||||
63 | #include <memory> | ||||
64 | #include <new> | ||||
65 | #include <tuple> | ||||
66 | #include <utility> | ||||
67 | |||||
68 | using namespace llvm; | ||||
69 | |||||
70 | #define DEBUG_TYPE"ppc-codegen" "ppc-codegen" | ||||
71 | |||||
72 | STATISTIC(NumSextSetcc,static llvm::Statistic NumSextSetcc = {"ppc-codegen", "NumSextSetcc" , "Number of (sext(setcc)) nodes expanded into GPR sequence." } | ||||
73 | "Number of (sext(setcc)) nodes expanded into GPR sequence.")static llvm::Statistic NumSextSetcc = {"ppc-codegen", "NumSextSetcc" , "Number of (sext(setcc)) nodes expanded into GPR sequence." }; | ||||
74 | STATISTIC(NumZextSetcc,static llvm::Statistic NumZextSetcc = {"ppc-codegen", "NumZextSetcc" , "Number of (zext(setcc)) nodes expanded into GPR sequence." } | ||||
75 | "Number of (zext(setcc)) nodes expanded into GPR sequence.")static llvm::Statistic NumZextSetcc = {"ppc-codegen", "NumZextSetcc" , "Number of (zext(setcc)) nodes expanded into GPR sequence." }; | ||||
76 | STATISTIC(SignExtensionsAdded,static llvm::Statistic SignExtensionsAdded = {"ppc-codegen", "SignExtensionsAdded" , "Number of sign extensions for compare inputs added."} | ||||
77 | "Number of sign extensions for compare inputs added.")static llvm::Statistic SignExtensionsAdded = {"ppc-codegen", "SignExtensionsAdded" , "Number of sign extensions for compare inputs added."}; | ||||
78 | STATISTIC(ZeroExtensionsAdded,static llvm::Statistic ZeroExtensionsAdded = {"ppc-codegen", "ZeroExtensionsAdded" , "Number of zero extensions for compare inputs added."} | ||||
79 | "Number of zero extensions for compare inputs added.")static llvm::Statistic ZeroExtensionsAdded = {"ppc-codegen", "ZeroExtensionsAdded" , "Number of zero extensions for compare inputs added."}; | ||||
80 | STATISTIC(NumLogicOpsOnComparison,static llvm::Statistic NumLogicOpsOnComparison = {"ppc-codegen" , "NumLogicOpsOnComparison", "Number of logical ops on i1 values calculated in GPR." } | ||||
81 | "Number of logical ops on i1 values calculated in GPR.")static llvm::Statistic NumLogicOpsOnComparison = {"ppc-codegen" , "NumLogicOpsOnComparison", "Number of logical ops on i1 values calculated in GPR." }; | ||||
82 | STATISTIC(OmittedForNonExtendUses,static llvm::Statistic OmittedForNonExtendUses = {"ppc-codegen" , "OmittedForNonExtendUses", "Number of compares not eliminated as they have non-extending uses." } | ||||
83 | "Number of compares not eliminated as they have non-extending uses.")static llvm::Statistic OmittedForNonExtendUses = {"ppc-codegen" , "OmittedForNonExtendUses", "Number of compares not eliminated as they have non-extending uses." }; | ||||
84 | STATISTIC(NumP9Setb,static llvm::Statistic NumP9Setb = {"ppc-codegen", "NumP9Setb" , "Number of compares lowered to setb."} | ||||
85 | "Number of compares lowered to setb.")static llvm::Statistic NumP9Setb = {"ppc-codegen", "NumP9Setb" , "Number of compares lowered to setb."}; | ||||
86 | |||||
87 | // FIXME: Remove this once the bug has been fixed! | ||||
88 | cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", | ||||
89 | cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); | ||||
90 | |||||
91 | static cl::opt<bool> | ||||
92 | UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), | ||||
93 | cl::desc("use aggressive ppc isel for bit permutations"), | ||||
94 | cl::Hidden); | ||||
95 | static cl::opt<bool> BPermRewriterNoMasking( | ||||
96 | "ppc-bit-perm-rewriter-stress-rotates", | ||||
97 | cl::desc("stress rotate selection in aggressive ppc isel for " | ||||
98 | "bit permutations"), | ||||
99 | cl::Hidden); | ||||
100 | |||||
101 | static cl::opt<bool> EnableBranchHint( | ||||
102 | "ppc-use-branch-hint", cl::init(true), | ||||
103 | cl::desc("Enable static hinting of branches on ppc"), | ||||
104 | cl::Hidden); | ||||
105 | |||||
106 | static cl::opt<bool> EnableTLSOpt( | ||||
107 | "ppc-tls-opt", cl::init(true), | ||||
108 | cl::desc("Enable tls optimization peephole"), | ||||
109 | cl::Hidden); | ||||
110 | |||||
111 | enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, | ||||
112 | ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, | ||||
113 | ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; | ||||
114 | |||||
115 | static cl::opt<ICmpInGPRType> CmpInGPR( | ||||
116 | "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), | ||||
117 | cl::desc("Specify the types of comparisons to emit GPR-only code for."), | ||||
118 | cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons.")llvm::cl::OptionEnumValue { "none", int(ICGPR_None), "Do not modify integer comparisons." }, | ||||
119 | clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs.")llvm::cl::OptionEnumValue { "all", int(ICGPR_All), "All possible int comparisons in GPRs." }, | ||||
120 | clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs.")llvm::cl::OptionEnumValue { "i32", int(ICGPR_I32), "Only i32 comparisons in GPRs." }, | ||||
121 | clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs.")llvm::cl::OptionEnumValue { "i64", int(ICGPR_I64), "Only i64 comparisons in GPRs." }, | ||||
122 | clEnumValN(ICGPR_NonExtIn, "nonextin",llvm::cl::OptionEnumValue { "nonextin", int(ICGPR_NonExtIn), "Only comparisons where inputs don't need [sz]ext." } | ||||
123 | "Only comparisons where inputs don't need [sz]ext.")llvm::cl::OptionEnumValue { "nonextin", int(ICGPR_NonExtIn), "Only comparisons where inputs don't need [sz]ext." }, | ||||
124 | clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result.")llvm::cl::OptionEnumValue { "zext", int(ICGPR_Zext), "Only comparisons with zext result." }, | ||||
125 | clEnumValN(ICGPR_ZextI32, "zexti32",llvm::cl::OptionEnumValue { "zexti32", int(ICGPR_ZextI32), "Only i32 comparisons with zext result." } | ||||
126 | "Only i32 comparisons with zext result.")llvm::cl::OptionEnumValue { "zexti32", int(ICGPR_ZextI32), "Only i32 comparisons with zext result." }, | ||||
127 | clEnumValN(ICGPR_ZextI64, "zexti64",llvm::cl::OptionEnumValue { "zexti64", int(ICGPR_ZextI64), "Only i64 comparisons with zext result." } | ||||
128 | "Only i64 comparisons with zext result.")llvm::cl::OptionEnumValue { "zexti64", int(ICGPR_ZextI64), "Only i64 comparisons with zext result." }, | ||||
129 | clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result.")llvm::cl::OptionEnumValue { "sext", int(ICGPR_Sext), "Only comparisons with sext result." }, | ||||
130 | clEnumValN(ICGPR_SextI32, "sexti32",llvm::cl::OptionEnumValue { "sexti32", int(ICGPR_SextI32), "Only i32 comparisons with sext result." } | ||||
131 | "Only i32 comparisons with sext result.")llvm::cl::OptionEnumValue { "sexti32", int(ICGPR_SextI32), "Only i32 comparisons with sext result." }, | ||||
132 | clEnumValN(ICGPR_SextI64, "sexti64",llvm::cl::OptionEnumValue { "sexti64", int(ICGPR_SextI64), "Only i64 comparisons with sext result." } | ||||
133 | "Only i64 comparisons with sext result.")llvm::cl::OptionEnumValue { "sexti64", int(ICGPR_SextI64), "Only i64 comparisons with sext result." })); | ||||
134 | namespace { | ||||
135 | |||||
136 | //===--------------------------------------------------------------------===// | ||||
137 | /// PPCDAGToDAGISel - PPC specific code to select PPC machine | ||||
138 | /// instructions for SelectionDAG operations. | ||||
139 | /// | ||||
140 | class PPCDAGToDAGISel : public SelectionDAGISel { | ||||
141 | const PPCTargetMachine &TM; | ||||
142 | const PPCSubtarget *Subtarget = nullptr; | ||||
143 | const PPCTargetLowering *PPCLowering = nullptr; | ||||
144 | unsigned GlobalBaseReg = 0; | ||||
145 | |||||
146 | public: | ||||
147 | explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) | ||||
148 | : SelectionDAGISel(tm, OptLevel), TM(tm) {} | ||||
149 | |||||
150 | bool runOnMachineFunction(MachineFunction &MF) override { | ||||
151 | // Make sure we re-emit a set of the global base reg if necessary | ||||
152 | GlobalBaseReg = 0; | ||||
153 | Subtarget = &MF.getSubtarget<PPCSubtarget>(); | ||||
154 | PPCLowering = Subtarget->getTargetLowering(); | ||||
155 | if (Subtarget->hasROPProtect()) { | ||||
156 | // Create a place on the stack for the ROP Protection Hash. | ||||
157 | // The ROP Protection Hash will always be 8 bytes and aligned to 8 | ||||
158 | // bytes. | ||||
159 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
160 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); | ||||
161 | const int Result = MFI.CreateStackObject(8, Align(8), false); | ||||
162 | FI->setROPProtectionHashSaveIndex(Result); | ||||
163 | } | ||||
164 | SelectionDAGISel::runOnMachineFunction(MF); | ||||
165 | |||||
166 | return true; | ||||
167 | } | ||||
168 | |||||
169 | void PreprocessISelDAG() override; | ||||
170 | void PostprocessISelDAG() override; | ||||
171 | |||||
172 | /// getI16Imm - Return a target constant with the specified value, of type | ||||
173 | /// i16. | ||||
174 | inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { | ||||
175 | return CurDAG->getTargetConstant(Imm, dl, MVT::i16); | ||||
176 | } | ||||
177 | |||||
178 | /// getI32Imm - Return a target constant with the specified value, of type | ||||
179 | /// i32. | ||||
180 | inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { | ||||
181 | return CurDAG->getTargetConstant(Imm, dl, MVT::i32); | ||||
182 | } | ||||
183 | |||||
184 | /// getI64Imm - Return a target constant with the specified value, of type | ||||
185 | /// i64. | ||||
186 | inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { | ||||
187 | return CurDAG->getTargetConstant(Imm, dl, MVT::i64); | ||||
188 | } | ||||
189 | |||||
190 | /// getSmallIPtrImm - Return a target constant of pointer type. | ||||
191 | inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { | ||||
192 | return CurDAG->getTargetConstant( | ||||
193 | Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); | ||||
194 | } | ||||
195 | |||||
196 | /// isRotateAndMask - Returns true if Mask and Shift can be folded into a | ||||
197 | /// rotate and mask opcode and mask operation. | ||||
198 | static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, | ||||
199 | unsigned &SH, unsigned &MB, unsigned &ME); | ||||
200 | |||||
201 | /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC | ||||
202 | /// base register. Return the virtual register that holds this value. | ||||
203 | SDNode *getGlobalBaseReg(); | ||||
204 | |||||
205 | void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); | ||||
206 | |||||
207 | // Select - Convert the specified operand from a target-independent to a | ||||
208 | // target-specific node if it hasn't already been changed. | ||||
209 | void Select(SDNode *N) override; | ||||
210 | |||||
211 | bool tryBitfieldInsert(SDNode *N); | ||||
212 | bool tryBitPermutation(SDNode *N); | ||||
213 | bool tryIntCompareInGPR(SDNode *N); | ||||
214 | |||||
215 | // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into | ||||
216 | // an X-Form load instruction with the offset being a relocation coming from | ||||
217 | // the PPCISD::ADD_TLS. | ||||
218 | bool tryTLSXFormLoad(LoadSDNode *N); | ||||
219 | // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into | ||||
220 | // an X-Form store instruction with the offset being a relocation coming from | ||||
221 | // the PPCISD::ADD_TLS. | ||||
222 | bool tryTLSXFormStore(StoreSDNode *N); | ||||
223 | /// SelectCC - Select a comparison of the specified values with the | ||||
224 | /// specified condition code, returning the CR# of the expression. | ||||
225 | SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
226 | const SDLoc &dl, SDValue Chain = SDValue()); | ||||
227 | |||||
228 | /// SelectAddrImmOffs - Return true if the operand is valid for a preinc | ||||
229 | /// immediate field. Note that the operand at this point is already the | ||||
230 | /// result of a prior SelectAddressRegImm call. | ||||
231 | bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { | ||||
232 | if (N.getOpcode() == ISD::TargetConstant || | ||||
233 | N.getOpcode() == ISD::TargetGlobalAddress) { | ||||
234 | Out = N; | ||||
235 | return true; | ||||
236 | } | ||||
237 | |||||
238 | return false; | ||||
239 | } | ||||
240 | |||||
241 | /// SelectDSForm - Returns true if address N can be represented by the | ||||
242 | /// addressing mode of DSForm instructions (a base register, plus a signed | ||||
243 | /// 16-bit displacement that is a multiple of 4. | ||||
244 | bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { | ||||
245 | return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, | ||||
246 | Align(4)) == PPC::AM_DSForm; | ||||
247 | } | ||||
248 | |||||
249 | /// SelectDQForm - Returns true if address N can be represented by the | ||||
250 | /// addressing mode of DQForm instructions (a base register, plus a signed | ||||
251 | /// 16-bit displacement that is a multiple of 16. | ||||
252 | bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { | ||||
253 | return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, | ||||
254 | Align(16)) == PPC::AM_DQForm; | ||||
255 | } | ||||
256 | |||||
257 | /// SelectDForm - Returns true if address N can be represented by | ||||
258 | /// the addressing mode of DForm instructions (a base register, plus a | ||||
259 | /// signed 16-bit immediate. | ||||
260 | bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { | ||||
261 | return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, | ||||
262 | None) == PPC::AM_DForm; | ||||
263 | } | ||||
264 | |||||
265 | /// SelectXForm - Returns true if address N can be represented by the | ||||
266 | /// addressing mode of XForm instructions (an indexed [r+r] operation). | ||||
267 | bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { | ||||
268 | return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, | ||||
269 | None) == PPC::AM_XForm; | ||||
270 | } | ||||
271 | |||||
272 | /// SelectForceXForm - Given the specified address, force it to be | ||||
273 | /// represented as an indexed [r+r] operation (an XForm instruction). | ||||
274 | bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp, | ||||
275 | SDValue &Base) { | ||||
276 | return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) == | ||||
277 | PPC::AM_XForm; | ||||
278 | } | ||||
279 | |||||
280 | /// SelectAddrIdx - Given the specified address, check to see if it can be | ||||
281 | /// represented as an indexed [r+r] operation. | ||||
282 | /// This is for xform instructions whose associated displacement form is D. | ||||
283 | /// The last parameter \p 0 means associated D form has no requirment for 16 | ||||
284 | /// bit signed displacement. | ||||
285 | /// Returns false if it can be represented by [r+imm], which are preferred. | ||||
286 | bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { | ||||
287 | return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None); | ||||
288 | } | ||||
289 | |||||
290 | /// SelectAddrIdx4 - Given the specified address, check to see if it can be | ||||
291 | /// represented as an indexed [r+r] operation. | ||||
292 | /// This is for xform instructions whose associated displacement form is DS. | ||||
293 | /// The last parameter \p 4 means associated DS form 16 bit signed | ||||
294 | /// displacement must be a multiple of 4. | ||||
295 | /// Returns false if it can be represented by [r+imm], which are preferred. | ||||
296 | bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) { | ||||
297 | return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, | ||||
298 | Align(4)); | ||||
299 | } | ||||
300 | |||||
301 | /// SelectAddrIdx16 - Given the specified address, check to see if it can be | ||||
302 | /// represented as an indexed [r+r] operation. | ||||
303 | /// This is for xform instructions whose associated displacement form is DQ. | ||||
304 | /// The last parameter \p 16 means associated DQ form 16 bit signed | ||||
305 | /// displacement must be a multiple of 16. | ||||
306 | /// Returns false if it can be represented by [r+imm], which are preferred. | ||||
307 | bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) { | ||||
308 | return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, | ||||
309 | Align(16)); | ||||
310 | } | ||||
311 | |||||
312 | /// SelectAddrIdxOnly - Given the specified address, force it to be | ||||
313 | /// represented as an indexed [r+r] operation. | ||||
314 | bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { | ||||
315 | return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); | ||||
316 | } | ||||
317 | |||||
318 | /// SelectAddrImm - Returns true if the address N can be represented by | ||||
319 | /// a base register plus a signed 16-bit displacement [r+imm]. | ||||
320 | /// The last parameter \p 0 means D form has no requirment for 16 bit signed | ||||
321 | /// displacement. | ||||
322 | bool SelectAddrImm(SDValue N, SDValue &Disp, | ||||
323 | SDValue &Base) { | ||||
324 | return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None); | ||||
325 | } | ||||
326 | |||||
327 | /// SelectAddrImmX4 - Returns true if the address N can be represented by | ||||
328 | /// a base register plus a signed 16-bit displacement that is a multiple of | ||||
329 | /// 4 (last parameter). Suitable for use by STD and friends. | ||||
330 | bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { | ||||
331 | return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4)); | ||||
332 | } | ||||
333 | |||||
334 | /// SelectAddrImmX16 - Returns true if the address N can be represented by | ||||
335 | /// a base register plus a signed 16-bit displacement that is a multiple of | ||||
336 | /// 16(last parameter). Suitable for use by STXV and friends. | ||||
337 | bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { | ||||
338 | return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, | ||||
339 | Align(16)); | ||||
340 | } | ||||
341 | |||||
342 | /// SelectAddrImmX34 - Returns true if the address N can be represented by | ||||
343 | /// a base register plus a signed 34-bit displacement. Suitable for use by | ||||
344 | /// PSTXVP and friends. | ||||
345 | bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) { | ||||
346 | return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG); | ||||
347 | } | ||||
348 | |||||
349 | // Select an address into a single register. | ||||
350 | bool SelectAddr(SDValue N, SDValue &Base) { | ||||
351 | Base = N; | ||||
352 | return true; | ||||
353 | } | ||||
354 | |||||
355 | bool SelectAddrPCRel(SDValue N, SDValue &Base) { | ||||
356 | return PPCLowering->SelectAddressPCRel(N, Base); | ||||
357 | } | ||||
358 | |||||
359 | /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for | ||||
360 | /// inline asm expressions. It is always correct to compute the value into | ||||
361 | /// a register. The case of adding a (possibly relocatable) constant to a | ||||
362 | /// register can be improved, but it is wrong to substitute Reg+Reg for | ||||
363 | /// Reg in an asm, because the load or store opcode would have to change. | ||||
364 | bool SelectInlineAsmMemoryOperand(const SDValue &Op, | ||||
365 | unsigned ConstraintID, | ||||
366 | std::vector<SDValue> &OutOps) override { | ||||
367 | switch(ConstraintID) { | ||||
368 | default: | ||||
369 | errs() << "ConstraintID: " << ConstraintID << "\n"; | ||||
370 | llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 370); | ||||
371 | case InlineAsm::Constraint_es: | ||||
372 | case InlineAsm::Constraint_m: | ||||
373 | case InlineAsm::Constraint_o: | ||||
374 | case InlineAsm::Constraint_Q: | ||||
375 | case InlineAsm::Constraint_Z: | ||||
376 | case InlineAsm::Constraint_Zy: | ||||
377 | // We need to make sure that this one operand does not end up in r0 | ||||
378 | // (because we might end up lowering this as 0(%op)). | ||||
379 | const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); | ||||
380 | const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); | ||||
381 | SDLoc dl(Op); | ||||
382 | SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); | ||||
383 | SDValue NewOp = | ||||
384 | SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, | ||||
385 | dl, Op.getValueType(), | ||||
386 | Op, RC), 0); | ||||
387 | |||||
388 | OutOps.push_back(NewOp); | ||||
389 | return false; | ||||
390 | } | ||||
391 | return true; | ||||
392 | } | ||||
393 | |||||
394 | StringRef getPassName() const override { | ||||
395 | return "PowerPC DAG->DAG Pattern Instruction Selection"; | ||||
396 | } | ||||
397 | |||||
398 | // Include the pieces autogenerated from the target description. | ||||
399 | #include "PPCGenDAGISel.inc" | ||||
400 | |||||
401 | private: | ||||
402 | bool trySETCC(SDNode *N); | ||||
403 | bool tryFoldSWTestBRCC(SDNode *N); | ||||
404 | bool tryAsSingleRLDICL(SDNode *N); | ||||
405 | bool tryAsSingleRLDICR(SDNode *N); | ||||
406 | bool tryAsSingleRLWINM(SDNode *N); | ||||
407 | bool tryAsSingleRLWINM8(SDNode *N); | ||||
408 | bool tryAsSingleRLWIMI(SDNode *N); | ||||
409 | bool tryAsPairOfRLDICL(SDNode *N); | ||||
410 | bool tryAsSingleRLDIMI(SDNode *N); | ||||
411 | |||||
412 | void PeepholePPC64(); | ||||
413 | void PeepholePPC64ZExt(); | ||||
414 | void PeepholeCROps(); | ||||
415 | |||||
416 | SDValue combineToCMPB(SDNode *N); | ||||
417 | void foldBoolExts(SDValue &Res, SDNode *&N); | ||||
418 | |||||
419 | bool AllUsersSelectZero(SDNode *N); | ||||
420 | void SwapAllSelectUsers(SDNode *N); | ||||
421 | |||||
422 | bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; | ||||
423 | void transferMemOperands(SDNode *N, SDNode *Result); | ||||
424 | }; | ||||
425 | |||||
426 | } // end anonymous namespace | ||||
427 | |||||
428 | /// getGlobalBaseReg - Output the instructions required to put the | ||||
429 | /// base address to use for accessing globals into a register. | ||||
430 | /// | ||||
431 | SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { | ||||
432 | if (!GlobalBaseReg) { | ||||
433 | const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); | ||||
434 | // Insert the set of GlobalBaseReg into the first MBB of the function | ||||
435 | MachineBasicBlock &FirstMBB = MF->front(); | ||||
436 | MachineBasicBlock::iterator MBBI = FirstMBB.begin(); | ||||
437 | const Module *M = MF->getFunction().getParent(); | ||||
438 | DebugLoc dl; | ||||
439 | |||||
440 | if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { | ||||
441 | if (Subtarget->isTargetELF()) { | ||||
442 | GlobalBaseReg = PPC::R30; | ||||
443 | if (!Subtarget->isSecurePlt() && | ||||
444 | M->getPICLevel() == PICLevel::SmallPIC) { | ||||
445 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); | ||||
446 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); | ||||
447 | MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); | ||||
448 | } else { | ||||
449 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); | ||||
450 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); | ||||
451 | Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); | ||||
452 | BuildMI(FirstMBB, MBBI, dl, | ||||
453 | TII.get(PPC::UpdateGBR), GlobalBaseReg) | ||||
454 | .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); | ||||
455 | MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); | ||||
456 | } | ||||
457 | } else { | ||||
458 | GlobalBaseReg = | ||||
459 | RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); | ||||
460 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); | ||||
461 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); | ||||
462 | } | ||||
463 | } else { | ||||
464 | // We must ensure that this sequence is dominated by the prologue. | ||||
465 | // FIXME: This is a bit of a big hammer since we don't get the benefits | ||||
466 | // of shrink-wrapping whenever we emit this instruction. Considering | ||||
467 | // this is used in any function where we emit a jump table, this may be | ||||
468 | // a significant limitation. We should consider inserting this in the | ||||
469 | // block where it is used and then commoning this sequence up if it | ||||
470 | // appears in multiple places. | ||||
471 | // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of | ||||
472 | // MovePCtoLR8. | ||||
473 | MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true); | ||||
474 | GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); | ||||
475 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); | ||||
476 | BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); | ||||
477 | } | ||||
478 | } | ||||
479 | return CurDAG->getRegister(GlobalBaseReg, | ||||
480 | PPCLowering->getPointerTy(CurDAG->getDataLayout())) | ||||
481 | .getNode(); | ||||
482 | } | ||||
483 | |||||
484 | // Check if a SDValue has the toc-data attribute. | ||||
485 | static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) { | ||||
486 | GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val); | ||||
487 | if (!GA) | ||||
488 | return false; | ||||
489 | |||||
490 | const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal()); | ||||
491 | if (!GV) | ||||
492 | return false; | ||||
493 | |||||
494 | if (!GV->hasAttribute("toc-data")) | ||||
495 | return false; | ||||
496 | |||||
497 | // TODO: These asserts should be updated as more support for the toc data | ||||
498 | // transformation is added (64 bit, struct support, etc.). | ||||
499 | |||||
500 | assert(PointerSize == 4 && "Only 32 Bit Codegen is currently supported by "(static_cast <bool> (PointerSize == 4 && "Only 32 Bit Codegen is currently supported by " "the toc data transformation.") ? void (0) : __assert_fail ( "PointerSize == 4 && \"Only 32 Bit Codegen is currently supported by \" \"the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 501, __extension__ __PRETTY_FUNCTION__)) | ||||
501 | "the toc data transformation.")(static_cast <bool> (PointerSize == 4 && "Only 32 Bit Codegen is currently supported by " "the toc data transformation.") ? void (0) : __assert_fail ( "PointerSize == 4 && \"Only 32 Bit Codegen is currently supported by \" \"the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 501, __extension__ __PRETTY_FUNCTION__)); | ||||
502 | |||||
503 | assert(PointerSize >= GV->getAlign().valueOrOne().value() &&(static_cast <bool> (PointerSize >= GV->getAlign( ).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter then 4-bytes " "not supported by the toc data transformation.") ? void (0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter then 4-bytes \" \"not supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 505, __extension__ __PRETTY_FUNCTION__)) | ||||
504 | "GlobalVariables with an alignment requirement stricter then 4-bytes "(static_cast <bool> (PointerSize >= GV->getAlign( ).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter then 4-bytes " "not supported by the toc data transformation.") ? void (0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter then 4-bytes \" \"not supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 505, __extension__ __PRETTY_FUNCTION__)) | ||||
505 | "not supported by the toc data transformation.")(static_cast <bool> (PointerSize >= GV->getAlign( ).valueOrOne().value() && "GlobalVariables with an alignment requirement stricter then 4-bytes " "not supported by the toc data transformation.") ? void (0) : __assert_fail ("PointerSize >= GV->getAlign().valueOrOne().value() && \"GlobalVariables with an alignment requirement stricter then 4-bytes \" \"not supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 505, __extension__ __PRETTY_FUNCTION__)); | ||||
506 | |||||
507 | Type *GVType = GV->getValueType(); | ||||
508 | |||||
509 | assert(GVType->isSized() && "A GlobalVariable's size must be known to be "(static_cast <bool> (GVType->isSized() && "A GlobalVariable's size must be known to be " "supported by the toc data transformation.") ? void (0) : __assert_fail ("GVType->isSized() && \"A GlobalVariable's size must be known to be \" \"supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 510, __extension__ __PRETTY_FUNCTION__)) | ||||
510 | "supported by the toc data transformation.")(static_cast <bool> (GVType->isSized() && "A GlobalVariable's size must be known to be " "supported by the toc data transformation.") ? void (0) : __assert_fail ("GVType->isSized() && \"A GlobalVariable's size must be known to be \" \"supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 510, __extension__ __PRETTY_FUNCTION__)); | ||||
511 | |||||
512 | if (GVType->isVectorTy()) | ||||
513 | report_fatal_error("A GlobalVariable of Vector type is not currently " | ||||
514 | "supported by the toc data transformation."); | ||||
515 | |||||
516 | if (GVType->isArrayTy()) | ||||
517 | report_fatal_error("A GlobalVariable of Array type is not currently " | ||||
518 | "supported by the toc data transformation."); | ||||
519 | |||||
520 | if (GVType->isStructTy()) | ||||
521 | report_fatal_error("A GlobalVariable of Struct type is not currently " | ||||
522 | "supported by the toc data transformation."); | ||||
523 | |||||
524 | assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&(static_cast <bool> (GVType->getPrimitiveSizeInBits( ) <= PointerSize * 8 && "A GlobalVariable with size larger than 32 bits is not currently " "supported by the toc data transformation.") ? void (0) : __assert_fail ("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than 32 bits is not currently \" \"supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 526, __extension__ __PRETTY_FUNCTION__)) | ||||
525 | "A GlobalVariable with size larger than 32 bits is not currently "(static_cast <bool> (GVType->getPrimitiveSizeInBits( ) <= PointerSize * 8 && "A GlobalVariable with size larger than 32 bits is not currently " "supported by the toc data transformation.") ? void (0) : __assert_fail ("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than 32 bits is not currently \" \"supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 526, __extension__ __PRETTY_FUNCTION__)) | ||||
526 | "supported by the toc data transformation.")(static_cast <bool> (GVType->getPrimitiveSizeInBits( ) <= PointerSize * 8 && "A GlobalVariable with size larger than 32 bits is not currently " "supported by the toc data transformation.") ? void (0) : __assert_fail ("GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && \"A GlobalVariable with size larger than 32 bits is not currently \" \"supported by the toc data transformation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 526, __extension__ __PRETTY_FUNCTION__)); | ||||
527 | |||||
528 | if (GV->hasLocalLinkage() || GV->hasPrivateLinkage()) | ||||
529 | report_fatal_error("A GlobalVariable with private or local linkage is not " | ||||
530 | "currently supported by the toc data transformation."); | ||||
531 | |||||
532 | assert(!GV->hasCommonLinkage() &&(static_cast <bool> (!GV->hasCommonLinkage() && "Tentative definitions cannot have the mapping class XMC_TD." ) ? void (0) : __assert_fail ("!GV->hasCommonLinkage() && \"Tentative definitions cannot have the mapping class XMC_TD.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 533, __extension__ __PRETTY_FUNCTION__)) | ||||
533 | "Tentative definitions cannot have the mapping class XMC_TD.")(static_cast <bool> (!GV->hasCommonLinkage() && "Tentative definitions cannot have the mapping class XMC_TD." ) ? void (0) : __assert_fail ("!GV->hasCommonLinkage() && \"Tentative definitions cannot have the mapping class XMC_TD.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 533, __extension__ __PRETTY_FUNCTION__)); | ||||
534 | |||||
535 | return true; | ||||
536 | } | ||||
537 | |||||
538 | /// isInt32Immediate - This method tests to see if the node is a 32-bit constant | ||||
539 | /// operand. If so Imm will receive the 32-bit value. | ||||
540 | static bool isInt32Immediate(SDNode *N, unsigned &Imm) { | ||||
541 | if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { | ||||
542 | Imm = cast<ConstantSDNode>(N)->getZExtValue(); | ||||
543 | return true; | ||||
544 | } | ||||
545 | return false; | ||||
546 | } | ||||
547 | |||||
548 | /// isInt64Immediate - This method tests to see if the node is a 64-bit constant | ||||
549 | /// operand. If so Imm will receive the 64-bit value. | ||||
550 | static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { | ||||
551 | if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { | ||||
552 | Imm = cast<ConstantSDNode>(N)->getZExtValue(); | ||||
553 | return true; | ||||
554 | } | ||||
555 | return false; | ||||
556 | } | ||||
557 | |||||
558 | // isInt32Immediate - This method tests to see if a constant operand. | ||||
559 | // If so Imm will receive the 32 bit value. | ||||
560 | static bool isInt32Immediate(SDValue N, unsigned &Imm) { | ||||
561 | return isInt32Immediate(N.getNode(), Imm); | ||||
562 | } | ||||
563 | |||||
564 | /// isInt64Immediate - This method tests to see if the value is a 64-bit | ||||
565 | /// constant operand. If so Imm will receive the 64-bit value. | ||||
566 | static bool isInt64Immediate(SDValue N, uint64_t &Imm) { | ||||
567 | return isInt64Immediate(N.getNode(), Imm); | ||||
568 | } | ||||
569 | |||||
570 | static unsigned getBranchHint(unsigned PCC, | ||||
571 | const FunctionLoweringInfo &FuncInfo, | ||||
572 | const SDValue &DestMBB) { | ||||
573 | assert(isa<BasicBlockSDNode>(DestMBB))(static_cast <bool> (isa<BasicBlockSDNode>(DestMBB )) ? void (0) : __assert_fail ("isa<BasicBlockSDNode>(DestMBB)" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 573, __extension__ __PRETTY_FUNCTION__)); | ||||
574 | |||||
575 | if (!FuncInfo.BPI) return PPC::BR_NO_HINT; | ||||
576 | |||||
577 | const BasicBlock *BB = FuncInfo.MBB->getBasicBlock(); | ||||
578 | const Instruction *BBTerm = BB->getTerminator(); | ||||
579 | |||||
580 | if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; | ||||
581 | |||||
582 | const BasicBlock *TBB = BBTerm->getSuccessor(0); | ||||
583 | const BasicBlock *FBB = BBTerm->getSuccessor(1); | ||||
584 | |||||
585 | auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB); | ||||
586 | auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB); | ||||
587 | |||||
588 | // We only want to handle cases which are easy to predict at static time, e.g. | ||||
589 | // C++ throw statement, that is very likely not taken, or calling never | ||||
590 | // returned function, e.g. stdlib exit(). So we set Threshold to filter | ||||
591 | // unwanted cases. | ||||
592 | // | ||||
593 | // Below is LLVM branch weight table, we only want to handle case 1, 2 | ||||
594 | // | ||||
595 | // Case Taken:Nontaken Example | ||||
596 | // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), | ||||
597 | // 2. Invoke-terminating 1:1048575 | ||||
598 | // 3. Coldblock 4:64 __builtin_expect | ||||
599 | // 4. Loop Branch 124:4 For loop | ||||
600 | // 5. PH/ZH/FPH 20:12 | ||||
601 | const uint32_t Threshold = 10000; | ||||
602 | |||||
603 | if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) | ||||
604 | return PPC::BR_NO_HINT; | ||||
605 | |||||
606 | LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() << "::" << BB->getName () << "'\n" << " -> " << TBB->getName () << ": " << TProb << "\n" << " -> " << FBB->getName() << ": " << FProb << "\n"; } } while (false) | ||||
607 | << "::" << BB->getName() << "'\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() << "::" << BB->getName () << "'\n" << " -> " << TBB->getName () << ": " << TProb << "\n" << " -> " << FBB->getName() << ": " << FProb << "\n"; } } while (false) | ||||
608 | << " -> " << TBB->getName() << ": " << TProb << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() << "::" << BB->getName () << "'\n" << " -> " << TBB->getName () << ": " << TProb << "\n" << " -> " << FBB->getName() << ": " << FProb << "\n"; } } while (false) | ||||
609 | << " -> " << FBB->getName() << ": " << FProb << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() << "::" << BB->getName () << "'\n" << " -> " << TBB->getName () << ": " << TProb << "\n" << " -> " << FBB->getName() << ": " << FProb << "\n"; } } while (false); | ||||
610 | |||||
611 | const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB); | ||||
612 | |||||
613 | // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, | ||||
614 | // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock | ||||
615 | if (BBDN->getBasicBlock()->getBasicBlock() != TBB) | ||||
616 | std::swap(TProb, FProb); | ||||
617 | |||||
618 | return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; | ||||
619 | } | ||||
620 | |||||
621 | // isOpcWithIntImmediate - This method tests to see if the node is a specific | ||||
622 | // opcode and that it has a immediate integer right operand. | ||||
623 | // If so Imm will receive the 32 bit value. | ||||
624 | static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { | ||||
625 | return N->getOpcode() == Opc | ||||
626 | && isInt32Immediate(N->getOperand(1).getNode(), Imm); | ||||
627 | } | ||||
628 | |||||
629 | void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { | ||||
630 | SDLoc dl(SN); | ||||
631 | int FI = cast<FrameIndexSDNode>(N)->getIndex(); | ||||
632 | SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); | ||||
633 | unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; | ||||
634 | if (SN->hasOneUse()) | ||||
635 | CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, | ||||
636 | getSmallIPtrImm(Offset, dl)); | ||||
637 | else | ||||
638 | ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, | ||||
639 | getSmallIPtrImm(Offset, dl))); | ||||
640 | } | ||||
641 | |||||
642 | bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, | ||||
643 | bool isShiftMask, unsigned &SH, | ||||
644 | unsigned &MB, unsigned &ME) { | ||||
645 | // Don't even go down this path for i64, since different logic will be | ||||
646 | // necessary for rldicl/rldicr/rldimi. | ||||
647 | if (N->getValueType(0) != MVT::i32) | ||||
648 | return false; | ||||
649 | |||||
650 | unsigned Shift = 32; | ||||
651 | unsigned Indeterminant = ~0; // bit mask marking indeterminant results | ||||
652 | unsigned Opcode = N->getOpcode(); | ||||
653 | if (N->getNumOperands() != 2 || | ||||
654 | !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) | ||||
655 | return false; | ||||
656 | |||||
657 | if (Opcode == ISD::SHL) { | ||||
658 | // apply shift left to mask if it comes first | ||||
659 | if (isShiftMask) Mask = Mask << Shift; | ||||
660 | // determine which bits are made indeterminant by shift | ||||
661 | Indeterminant = ~(0xFFFFFFFFu << Shift); | ||||
662 | } else if (Opcode == ISD::SRL) { | ||||
663 | // apply shift right to mask if it comes first | ||||
664 | if (isShiftMask) Mask = Mask >> Shift; | ||||
665 | // determine which bits are made indeterminant by shift | ||||
666 | Indeterminant = ~(0xFFFFFFFFu >> Shift); | ||||
667 | // adjust for the left rotate | ||||
668 | Shift = 32 - Shift; | ||||
669 | } else if (Opcode == ISD::ROTL) { | ||||
670 | Indeterminant = 0; | ||||
671 | } else { | ||||
672 | return false; | ||||
673 | } | ||||
674 | |||||
675 | // if the mask doesn't intersect any Indeterminant bits | ||||
676 | if (Mask && !(Mask & Indeterminant)) { | ||||
677 | SH = Shift & 31; | ||||
678 | // make sure the mask is still a mask (wrap arounds may not be) | ||||
679 | return isRunOfOnes(Mask, MB, ME); | ||||
680 | } | ||||
681 | return false; | ||||
682 | } | ||||
683 | |||||
684 | bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { | ||||
685 | SDValue Base = ST->getBasePtr(); | ||||
686 | if (Base.getOpcode() != PPCISD::ADD_TLS) | ||||
687 | return false; | ||||
688 | SDValue Offset = ST->getOffset(); | ||||
689 | if (!Offset.isUndef()) | ||||
690 | return false; | ||||
691 | if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) | ||||
692 | return false; | ||||
693 | |||||
694 | SDLoc dl(ST); | ||||
695 | EVT MemVT = ST->getMemoryVT(); | ||||
696 | EVT RegVT = ST->getValue().getValueType(); | ||||
697 | |||||
698 | unsigned Opcode; | ||||
699 | switch (MemVT.getSimpleVT().SimpleTy) { | ||||
700 | default: | ||||
701 | return false; | ||||
702 | case MVT::i8: { | ||||
703 | Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS; | ||||
704 | break; | ||||
705 | } | ||||
706 | case MVT::i16: { | ||||
707 | Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS; | ||||
708 | break; | ||||
709 | } | ||||
710 | case MVT::i32: { | ||||
711 | Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS; | ||||
712 | break; | ||||
713 | } | ||||
714 | case MVT::i64: { | ||||
715 | Opcode = PPC::STDXTLS; | ||||
716 | break; | ||||
717 | } | ||||
718 | } | ||||
719 | SDValue Chain = ST->getChain(); | ||||
720 | SDVTList VTs = ST->getVTList(); | ||||
721 | SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1), | ||||
722 | Chain}; | ||||
723 | SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); | ||||
724 | transferMemOperands(ST, MN); | ||||
725 | ReplaceNode(ST, MN); | ||||
726 | return true; | ||||
727 | } | ||||
728 | |||||
729 | bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { | ||||
730 | SDValue Base = LD->getBasePtr(); | ||||
731 | if (Base.getOpcode() != PPCISD::ADD_TLS) | ||||
732 | return false; | ||||
733 | SDValue Offset = LD->getOffset(); | ||||
734 | if (!Offset.isUndef()) | ||||
735 | return false; | ||||
736 | if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) | ||||
737 | return false; | ||||
738 | |||||
739 | SDLoc dl(LD); | ||||
740 | EVT MemVT = LD->getMemoryVT(); | ||||
741 | EVT RegVT = LD->getValueType(0); | ||||
742 | unsigned Opcode; | ||||
743 | switch (MemVT.getSimpleVT().SimpleTy) { | ||||
744 | default: | ||||
745 | return false; | ||||
746 | case MVT::i8: { | ||||
747 | Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS; | ||||
748 | break; | ||||
749 | } | ||||
750 | case MVT::i16: { | ||||
751 | Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; | ||||
752 | break; | ||||
753 | } | ||||
754 | case MVT::i32: { | ||||
755 | Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; | ||||
756 | break; | ||||
757 | } | ||||
758 | case MVT::i64: { | ||||
759 | Opcode = PPC::LDXTLS; | ||||
760 | break; | ||||
761 | } | ||||
762 | } | ||||
763 | SDValue Chain = LD->getChain(); | ||||
764 | SDVTList VTs = LD->getVTList(); | ||||
765 | SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain}; | ||||
766 | SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); | ||||
767 | transferMemOperands(LD, MN); | ||||
768 | ReplaceNode(LD, MN); | ||||
769 | return true; | ||||
770 | } | ||||
771 | |||||
772 | /// Turn an or of two masked values into the rotate left word immediate then | ||||
773 | /// mask insert (rlwimi) instruction. | ||||
774 | bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { | ||||
775 | SDValue Op0 = N->getOperand(0); | ||||
776 | SDValue Op1 = N->getOperand(1); | ||||
777 | SDLoc dl(N); | ||||
778 | |||||
779 | KnownBits LKnown = CurDAG->computeKnownBits(Op0); | ||||
780 | KnownBits RKnown = CurDAG->computeKnownBits(Op1); | ||||
781 | |||||
782 | unsigned TargetMask = LKnown.Zero.getZExtValue(); | ||||
783 | unsigned InsertMask = RKnown.Zero.getZExtValue(); | ||||
784 | |||||
785 | if ((TargetMask | InsertMask) == 0xFFFFFFFF) { | ||||
786 | unsigned Op0Opc = Op0.getOpcode(); | ||||
787 | unsigned Op1Opc = Op1.getOpcode(); | ||||
788 | unsigned Value, SH = 0; | ||||
789 | TargetMask = ~TargetMask; | ||||
790 | InsertMask = ~InsertMask; | ||||
791 | |||||
792 | // If the LHS has a foldable shift and the RHS does not, then swap it to the | ||||
793 | // RHS so that we can fold the shift into the insert. | ||||
794 | if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { | ||||
795 | if (Op0.getOperand(0).getOpcode() == ISD::SHL || | ||||
796 | Op0.getOperand(0).getOpcode() == ISD::SRL) { | ||||
797 | if (Op1.getOperand(0).getOpcode() != ISD::SHL && | ||||
798 | Op1.getOperand(0).getOpcode() != ISD::SRL) { | ||||
799 | std::swap(Op0, Op1); | ||||
800 | std::swap(Op0Opc, Op1Opc); | ||||
801 | std::swap(TargetMask, InsertMask); | ||||
802 | } | ||||
803 | } | ||||
804 | } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { | ||||
805 | if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && | ||||
806 | Op1.getOperand(0).getOpcode() != ISD::SRL) { | ||||
807 | std::swap(Op0, Op1); | ||||
808 | std::swap(Op0Opc, Op1Opc); | ||||
809 | std::swap(TargetMask, InsertMask); | ||||
810 | } | ||||
811 | } | ||||
812 | |||||
813 | unsigned MB, ME; | ||||
814 | if (isRunOfOnes(InsertMask, MB, ME)) { | ||||
815 | if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && | ||||
816 | isInt32Immediate(Op1.getOperand(1), Value)) { | ||||
817 | Op1 = Op1.getOperand(0); | ||||
818 | SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; | ||||
819 | } | ||||
820 | if (Op1Opc == ISD::AND) { | ||||
821 | // The AND mask might not be a constant, and we need to make sure that | ||||
822 | // if we're going to fold the masking with the insert, all bits not | ||||
823 | // know to be zero in the mask are known to be one. | ||||
824 | KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1)); | ||||
825 | bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); | ||||
826 | |||||
827 | unsigned SHOpc = Op1.getOperand(0).getOpcode(); | ||||
828 | if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && | ||||
829 | isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { | ||||
830 | // Note that Value must be in range here (less than 32) because | ||||
831 | // otherwise there would not be any bits set in InsertMask. | ||||
832 | Op1 = Op1.getOperand(0).getOperand(0); | ||||
833 | SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; | ||||
834 | } | ||||
835 | } | ||||
836 | |||||
837 | SH &= 31; | ||||
838 | SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), | ||||
839 | getI32Imm(ME, dl) }; | ||||
840 | ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); | ||||
841 | return true; | ||||
842 | } | ||||
843 | } | ||||
844 | return false; | ||||
845 | } | ||||
846 | |||||
847 | static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { | ||||
848 | unsigned MaxTruncation = 0; | ||||
849 | // Cannot use range-based for loop here as we need the actual use (i.e. we | ||||
850 | // need the operand number corresponding to the use). A range-based for | ||||
851 | // will unbox the use and provide an SDNode*. | ||||
852 | for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); | ||||
853 | Use != UseEnd; ++Use) { | ||||
854 | unsigned Opc = | ||||
855 | Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); | ||||
856 | switch (Opc) { | ||||
857 | default: return 0; | ||||
858 | case ISD::TRUNCATE: | ||||
859 | if (Use->isMachineOpcode()) | ||||
860 | return 0; | ||||
861 | MaxTruncation = | ||||
862 | std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits()); | ||||
863 | continue; | ||||
864 | case ISD::STORE: { | ||||
865 | if (Use->isMachineOpcode()) | ||||
866 | return 0; | ||||
867 | StoreSDNode *STN = cast<StoreSDNode>(*Use); | ||||
868 | unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); | ||||
869 | if (MemVTSize == 64 || Use.getOperandNo() != 0) | ||||
870 | return 0; | ||||
871 | MaxTruncation = std::max(MaxTruncation, MemVTSize); | ||||
872 | continue; | ||||
873 | } | ||||
874 | case PPC::STW8: | ||||
875 | case PPC::STWX8: | ||||
876 | case PPC::STWU8: | ||||
877 | case PPC::STWUX8: | ||||
878 | if (Use.getOperandNo() != 0) | ||||
879 | return 0; | ||||
880 | MaxTruncation = std::max(MaxTruncation, 32u); | ||||
881 | continue; | ||||
882 | case PPC::STH8: | ||||
883 | case PPC::STHX8: | ||||
884 | case PPC::STHU8: | ||||
885 | case PPC::STHUX8: | ||||
886 | if (Use.getOperandNo() != 0) | ||||
887 | return 0; | ||||
888 | MaxTruncation = std::max(MaxTruncation, 16u); | ||||
889 | continue; | ||||
890 | case PPC::STB8: | ||||
891 | case PPC::STBX8: | ||||
892 | case PPC::STBU8: | ||||
893 | case PPC::STBUX8: | ||||
894 | if (Use.getOperandNo() != 0) | ||||
895 | return 0; | ||||
896 | MaxTruncation = std::max(MaxTruncation, 8u); | ||||
897 | continue; | ||||
898 | } | ||||
899 | } | ||||
900 | return MaxTruncation; | ||||
901 | } | ||||
902 | |||||
903 | // For any 32 < Num < 64, check if the Imm contains at least Num consecutive | ||||
904 | // zeros and return the number of bits by the left of these consecutive zeros. | ||||
905 | static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) { | ||||
906 | unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm)); | ||||
907 | unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm)); | ||||
908 | if ((HiTZ + LoLZ) >= Num) | ||||
909 | return (32 + HiTZ); | ||||
910 | return 0; | ||||
911 | } | ||||
912 | |||||
913 | // Direct materialization of 64-bit constants by enumerated patterns. | ||||
914 | static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, | ||||
915 | uint64_t Imm, unsigned &InstCnt) { | ||||
916 | unsigned TZ = countTrailingZeros<uint64_t>(Imm); | ||||
917 | unsigned LZ = countLeadingZeros<uint64_t>(Imm); | ||||
918 | unsigned TO = countTrailingOnes<uint64_t>(Imm); | ||||
919 | unsigned LO = countLeadingOnes<uint64_t>(Imm); | ||||
920 | unsigned Hi32 = Hi_32(Imm); | ||||
921 | unsigned Lo32 = Lo_32(Imm); | ||||
922 | SDNode *Result = nullptr; | ||||
923 | unsigned Shift = 0; | ||||
924 | |||||
925 | auto getI32Imm = [CurDAG, dl](unsigned Imm) { | ||||
926 | return CurDAG->getTargetConstant(Imm, dl, MVT::i32); | ||||
927 | }; | ||||
928 | |||||
929 | // Following patterns use 1 instructions to materialize the Imm. | ||||
930 | InstCnt = 1; | ||||
931 | // 1-1) Patterns : {zeros}{15-bit valve} | ||||
932 | // {ones}{15-bit valve} | ||||
933 | if (isInt<16>(Imm)) { | ||||
934 | SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64); | ||||
935 | return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); | ||||
936 | } | ||||
937 | // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros} | ||||
938 | // {ones}{15-bit valve}{16 zeros} | ||||
939 | if (TZ > 15 && (LZ > 32 || LO > 32)) | ||||
940 | return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, | ||||
941 | getI32Imm((Imm >> 16) & 0xffff)); | ||||
942 | |||||
943 | // Following patterns use 2 instructions to materialize the Imm. | ||||
944 | InstCnt = 2; | ||||
945 | assert(LZ < 64 && "Unexpected leading zeros here.")(static_cast <bool> (LZ < 64 && "Unexpected leading zeros here." ) ? void (0) : __assert_fail ("LZ < 64 && \"Unexpected leading zeros here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 945, __extension__ __PRETTY_FUNCTION__)); | ||||
946 | // Count of ones follwing the leading zeros. | ||||
947 | unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ); | ||||
948 | // 2-1) Patterns : {zeros}{31-bit value} | ||||
949 | // {ones}{31-bit value} | ||||
950 | if (isInt<32>(Imm)) { | ||||
951 | uint64_t ImmHi16 = (Imm >> 16) & 0xffff; | ||||
952 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; | ||||
953 | Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); | ||||
954 | return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
955 | getI32Imm(Imm & 0xffff)); | ||||
956 | } | ||||
957 | // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros} | ||||
958 | // {zeros}{15-bit value}{zeros} | ||||
959 | // {zeros}{ones}{15-bit value} | ||||
960 | // {ones}{15-bit value}{zeros} | ||||
961 | // We can take advantage of LI's sign-extension semantics to generate leading | ||||
962 | // ones, and then use RLDIC to mask off the ones in both sides after rotation. | ||||
963 | if ((LZ + FO + TZ) > 48) { | ||||
964 | Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, | ||||
965 | getI32Imm((Imm >> TZ) & 0xffff)); | ||||
966 | return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), | ||||
967 | getI32Imm(TZ), getI32Imm(LZ)); | ||||
968 | } | ||||
969 | // 2-3) Pattern : {zeros}{15-bit value}{ones} | ||||
970 | // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value, | ||||
971 | // therefore we can take advantage of LI's sign-extension semantics, and then | ||||
972 | // mask them off after rotation. | ||||
973 | // | ||||
974 | // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+ | ||||
975 | // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| | ||||
976 | // +------------------------+ +------------------------+ | ||||
977 | // 63 0 63 0 | ||||
978 | // Imm (Imm >> (48 - LZ) & 0xffff) | ||||
979 | // +----sext-----|--16-bit--+ +clear-|-----------------+ | ||||
980 | // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| | ||||
981 | // +------------------------+ +------------------------+ | ||||
982 | // 63 0 63 0 | ||||
983 | // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ | ||||
984 | if ((LZ + TO) > 48) { | ||||
985 | // Since the immediates with (LZ > 32) have been handled by previous | ||||
986 | // patterns, here we have (LZ <= 32) to make sure we will not shift right | ||||
987 | // the Imm by a negative value. | ||||
988 | assert(LZ <= 32 && "Unexpected shift value.")(static_cast <bool> (LZ <= 32 && "Unexpected shift value." ) ? void (0) : __assert_fail ("LZ <= 32 && \"Unexpected shift value.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 988, __extension__ __PRETTY_FUNCTION__)); | ||||
989 | Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, | ||||
990 | getI32Imm((Imm >> (48 - LZ) & 0xffff))); | ||||
991 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
992 | getI32Imm(48 - LZ), getI32Imm(LZ)); | ||||
993 | } | ||||
994 | // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones} | ||||
995 | // {ones}{15-bit value}{ones} | ||||
996 | // We can take advantage of LI's sign-extension semantics to generate leading | ||||
997 | // ones, and then use RLDICL to mask off the ones in left sides (if required) | ||||
998 | // after rotation. | ||||
999 | // | ||||
1000 | // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+ | ||||
1001 | // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb| | ||||
1002 | // +------------------------+ +------------------------+ | ||||
1003 | // 63 0 63 0 | ||||
1004 | // Imm (Imm >> TO) & 0xffff | ||||
1005 | // +----sext-----|--16-bit--+ +LZ|---------------------+ | ||||
1006 | // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111| | ||||
1007 | // +------------------------+ +------------------------+ | ||||
1008 | // 63 0 63 0 | ||||
1009 | // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ | ||||
1010 | if ((LZ + FO + TO) > 48) { | ||||
1011 | Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, | ||||
1012 | getI32Imm((Imm >> TO) & 0xffff)); | ||||
1013 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1014 | getI32Imm(TO), getI32Imm(LZ)); | ||||
1015 | } | ||||
1016 | // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value} | ||||
1017 | // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit | ||||
1018 | // value, we can use LI for Lo16 without generating leading ones then add the | ||||
1019 | // Hi16(in Lo32). | ||||
1020 | if (LZ == 32 && ((Lo32 & 0x8000) == 0)) { | ||||
1021 | Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, | ||||
1022 | getI32Imm(Lo32 & 0xffff)); | ||||
1023 | return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0), | ||||
1024 | getI32Imm(Lo32 >> 16)); | ||||
1025 | } | ||||
1026 | // 2-6) Patterns : {******}{49 zeros}{******} | ||||
1027 | // {******}{49 ones}{******} | ||||
1028 | // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15 | ||||
1029 | // bits remain on both sides. Rotate right the Imm to construct an int<16> | ||||
1030 | // value, use LI for int<16> value and then use RLDICL without mask to rotate | ||||
1031 | // it back. | ||||
1032 | // | ||||
1033 | // 1) findContiguousZerosAtLeast(Imm, 49) | ||||
1034 | // +------|--zeros-|------+ +---ones--||---15 bit--+ | ||||
1035 | // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb| | ||||
1036 | // +----------------------+ +----------------------+ | ||||
1037 | // 63 0 63 0 | ||||
1038 | // | ||||
1039 | // 2) findContiguousZerosAtLeast(~Imm, 49) | ||||
1040 | // +------|--ones--|------+ +---ones--||---15 bit--+ | ||||
1041 | // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| | ||||
1042 | // +----------------------+ +----------------------+ | ||||
1043 | // 63 0 63 0 | ||||
1044 | if ((Shift = findContiguousZerosAtLeast(Imm, 49)) || | ||||
1045 | (Shift = findContiguousZerosAtLeast(~Imm, 49))) { | ||||
1046 | uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); | ||||
1047 | Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, | ||||
1048 | getI32Imm(RotImm & 0xffff)); | ||||
1049 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1050 | getI32Imm(Shift), getI32Imm(0)); | ||||
1051 | } | ||||
1052 | |||||
1053 | // Following patterns use 3 instructions to materialize the Imm. | ||||
1054 | InstCnt = 3; | ||||
1055 | // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros} | ||||
1056 | // {zeros}{31-bit value}{zeros} | ||||
1057 | // {zeros}{ones}{31-bit value} | ||||
1058 | // {ones}{31-bit value}{zeros} | ||||
1059 | // We can take advantage of LIS's sign-extension semantics to generate leading | ||||
1060 | // ones, add the remaining bits with ORI, and then use RLDIC to mask off the | ||||
1061 | // ones in both sides after rotation. | ||||
1062 | if ((LZ + FO + TZ) > 32) { | ||||
1063 | uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff; | ||||
1064 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; | ||||
1065 | Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); | ||||
1066 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1067 | getI32Imm((Imm >> TZ) & 0xffff)); | ||||
1068 | return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), | ||||
1069 | getI32Imm(TZ), getI32Imm(LZ)); | ||||
1070 | } | ||||
1071 | // 3-2) Pattern : {zeros}{31-bit value}{ones} | ||||
1072 | // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value, | ||||
1073 | // therefore we can take advantage of LIS's sign-extension semantics, add | ||||
1074 | // the remaining bits with ORI, and then mask them off after rotation. | ||||
1075 | // This is similar to Pattern 2-3, please refer to the diagram there. | ||||
1076 | if ((LZ + TO) > 32) { | ||||
1077 | // Since the immediates with (LZ > 32) have been handled by previous | ||||
1078 | // patterns, here we have (LZ <= 32) to make sure we will not shift right | ||||
1079 | // the Imm by a negative value. | ||||
1080 | assert(LZ <= 32 && "Unexpected shift value.")(static_cast <bool> (LZ <= 32 && "Unexpected shift value." ) ? void (0) : __assert_fail ("LZ <= 32 && \"Unexpected shift value.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1080, __extension__ __PRETTY_FUNCTION__)); | ||||
1081 | Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, | ||||
1082 | getI32Imm((Imm >> (48 - LZ)) & 0xffff)); | ||||
1083 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1084 | getI32Imm((Imm >> (32 - LZ)) & 0xffff)); | ||||
1085 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1086 | getI32Imm(32 - LZ), getI32Imm(LZ)); | ||||
1087 | } | ||||
1088 | // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones} | ||||
1089 | // {ones}{31-bit value}{ones} | ||||
1090 | // We can take advantage of LIS's sign-extension semantics to generate leading | ||||
1091 | // ones, add the remaining bits with ORI, and then use RLDICL to mask off the | ||||
1092 | // ones in left sides (if required) after rotation. | ||||
1093 | // This is similar to Pattern 2-4, please refer to the diagram there. | ||||
1094 | if ((LZ + FO + TO) > 32) { | ||||
1095 | Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, | ||||
1096 | getI32Imm((Imm >> (TO + 16)) & 0xffff)); | ||||
1097 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1098 | getI32Imm((Imm >> TO) & 0xffff)); | ||||
1099 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1100 | getI32Imm(TO), getI32Imm(LZ)); | ||||
1101 | } | ||||
1102 | // 3-4) Patterns : High word == Low word | ||||
1103 | if (Hi32 == Lo32) { | ||||
1104 | // Handle the first 32 bits. | ||||
1105 | uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff; | ||||
1106 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; | ||||
1107 | Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); | ||||
1108 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1109 | getI32Imm(Lo32 & 0xffff)); | ||||
1110 | // Use rldimi to insert the Low word into High word. | ||||
1111 | SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), | ||||
1112 | getI32Imm(0)}; | ||||
1113 | return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); | ||||
1114 | } | ||||
1115 | // 3-5) Patterns : {******}{33 zeros}{******} | ||||
1116 | // {******}{33 ones}{******} | ||||
1117 | // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31 | ||||
1118 | // bits remain on both sides. Rotate right the Imm to construct an int<32> | ||||
1119 | // value, use LIS + ORI for int<32> value and then use RLDICL without mask to | ||||
1120 | // rotate it back. | ||||
1121 | // This is similar to Pattern 2-6, please refer to the diagram there. | ||||
1122 | if ((Shift = findContiguousZerosAtLeast(Imm, 33)) || | ||||
1123 | (Shift = findContiguousZerosAtLeast(~Imm, 33))) { | ||||
1124 | uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); | ||||
1125 | uint64_t ImmHi16 = (RotImm >> 16) & 0xffff; | ||||
1126 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; | ||||
1127 | Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); | ||||
1128 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1129 | getI32Imm(RotImm & 0xffff)); | ||||
1130 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1131 | getI32Imm(Shift), getI32Imm(0)); | ||||
1132 | } | ||||
1133 | |||||
1134 | InstCnt = 0; | ||||
1135 | return nullptr; | ||||
1136 | } | ||||
1137 | |||||
1138 | // Try to select instructions to generate a 64 bit immediate using prefix as | ||||
1139 | // well as non prefix instructions. The function will return the SDNode | ||||
1140 | // to materialize that constant or it will return nullptr if it does not | ||||
1141 | // find one. The variable InstCnt is set to the number of instructions that | ||||
1142 | // were selected. | ||||
1143 | static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, | ||||
1144 | uint64_t Imm, unsigned &InstCnt) { | ||||
1145 | unsigned TZ = countTrailingZeros<uint64_t>(Imm); | ||||
1146 | unsigned LZ = countLeadingZeros<uint64_t>(Imm); | ||||
1147 | unsigned TO = countTrailingOnes<uint64_t>(Imm); | ||||
1148 | unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ)); | ||||
1149 | unsigned Hi32 = Hi_32(Imm); | ||||
1150 | unsigned Lo32 = Lo_32(Imm); | ||||
1151 | |||||
1152 | auto getI32Imm = [CurDAG, dl](unsigned Imm) { | ||||
1153 | return CurDAG->getTargetConstant(Imm, dl, MVT::i32); | ||||
1154 | }; | ||||
1155 | |||||
1156 | auto getI64Imm = [CurDAG, dl](uint64_t Imm) { | ||||
1157 | return CurDAG->getTargetConstant(Imm, dl, MVT::i64); | ||||
1158 | }; | ||||
1159 | |||||
1160 | // Following patterns use 1 instruction to materialize Imm. | ||||
1161 | InstCnt = 1; | ||||
1162 | |||||
1163 | // The pli instruction can materialize up to 34 bits directly. | ||||
1164 | // If a constant fits within 34-bits, emit the pli instruction here directly. | ||||
1165 | if (isInt<34>(Imm)) | ||||
1166 | return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, | ||||
1167 | CurDAG->getTargetConstant(Imm, dl, MVT::i64)); | ||||
1168 | |||||
1169 | // Require at least two instructions. | ||||
1170 | InstCnt = 2; | ||||
1171 | SDNode *Result = nullptr; | ||||
1172 | // Patterns : {zeros}{ones}{33-bit value}{zeros} | ||||
1173 | // {zeros}{33-bit value}{zeros} | ||||
1174 | // {zeros}{ones}{33-bit value} | ||||
1175 | // {ones}{33-bit value}{zeros} | ||||
1176 | // We can take advantage of PLI's sign-extension semantics to generate leading | ||||
1177 | // ones, and then use RLDIC to mask off the ones on both sides after rotation. | ||||
1178 | if ((LZ + FO + TZ) > 30) { | ||||
1179 | APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff); | ||||
1180 | APInt Extended = SignedInt34.sext(64); | ||||
1181 | Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, | ||||
1182 | getI64Imm(*Extended.getRawData())); | ||||
1183 | return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), | ||||
1184 | getI32Imm(TZ), getI32Imm(LZ)); | ||||
1185 | } | ||||
1186 | // Pattern : {zeros}{33-bit value}{ones} | ||||
1187 | // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value, | ||||
1188 | // therefore we can take advantage of PLI's sign-extension semantics, and then | ||||
1189 | // mask them off after rotation. | ||||
1190 | // | ||||
1191 | // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+ | ||||
1192 | // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| | ||||
1193 | // +------------------------+ +------------------------+ | ||||
1194 | // 63 0 63 0 | ||||
1195 | // | ||||
1196 | // +----sext-----|--34-bit--+ +clear-|-----------------+ | ||||
1197 | // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| | ||||
1198 | // +------------------------+ +------------------------+ | ||||
1199 | // 63 0 63 0 | ||||
1200 | if ((LZ + TO) > 30) { | ||||
1201 | APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff); | ||||
1202 | APInt Extended = SignedInt34.sext(64); | ||||
1203 | Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, | ||||
1204 | getI64Imm(*Extended.getRawData())); | ||||
1205 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1206 | getI32Imm(30 - LZ), getI32Imm(LZ)); | ||||
1207 | } | ||||
1208 | // Patterns : {zeros}{ones}{33-bit value}{ones} | ||||
1209 | // {ones}{33-bit value}{ones} | ||||
1210 | // Similar to LI we can take advantage of PLI's sign-extension semantics to | ||||
1211 | // generate leading ones, and then use RLDICL to mask off the ones in left | ||||
1212 | // sides (if required) after rotation. | ||||
1213 | if ((LZ + FO + TO) > 30) { | ||||
1214 | APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff); | ||||
1215 | APInt Extended = SignedInt34.sext(64); | ||||
1216 | Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, | ||||
1217 | getI64Imm(*Extended.getRawData())); | ||||
1218 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), | ||||
1219 | getI32Imm(TO), getI32Imm(LZ)); | ||||
1220 | } | ||||
1221 | // Patterns : {******}{31 zeros}{******} | ||||
1222 | // : {******}{31 ones}{******} | ||||
1223 | // If Imm contains 31 consecutive zeros/ones then the remaining bit count | ||||
1224 | // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI | ||||
1225 | // for the int<33> value and then use RLDICL without a mask to rotate it back. | ||||
1226 | // | ||||
1227 | // +------|--ones--|------+ +---ones--||---33 bit--+ | ||||
1228 | // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| | ||||
1229 | // +----------------------+ +----------------------+ | ||||
1230 | // 63 0 63 0 | ||||
1231 | for (unsigned Shift = 0; Shift < 63; ++Shift) { | ||||
1232 | uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue(); | ||||
1233 | if (isInt<34>(RotImm)) { | ||||
1234 | Result = | ||||
1235 | CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm)); | ||||
1236 | return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
1237 | SDValue(Result, 0), getI32Imm(Shift), | ||||
1238 | getI32Imm(0)); | ||||
1239 | } | ||||
1240 | } | ||||
1241 | |||||
1242 | // Patterns : High word == Low word | ||||
1243 | // This is basically a splat of a 32 bit immediate. | ||||
1244 | if (Hi32 == Lo32) { | ||||
1245 | Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); | ||||
1246 | SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), | ||||
1247 | getI32Imm(0)}; | ||||
1248 | return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); | ||||
1249 | } | ||||
1250 | |||||
1251 | InstCnt = 3; | ||||
1252 | // Catch-all | ||||
1253 | // This pattern can form any 64 bit immediate in 3 instructions. | ||||
1254 | SDNode *ResultHi = | ||||
1255 | CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); | ||||
1256 | SDNode *ResultLo = | ||||
1257 | CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32)); | ||||
1258 | SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32), | ||||
1259 | getI32Imm(0)}; | ||||
1260 | return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); | ||||
1261 | } | ||||
1262 | |||||
1263 | static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, | ||||
1264 | unsigned *InstCnt = nullptr) { | ||||
1265 | unsigned InstCntDirect = 0; | ||||
1266 | // No more than 3 instructions is used if we can select the i64 immediate | ||||
1267 | // directly. | ||||
1268 | SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect); | ||||
1269 | |||||
1270 | const PPCSubtarget &Subtarget = | ||||
1271 | CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>(); | ||||
1272 | |||||
1273 | // If we have prefixed instructions and there is a chance we can | ||||
1274 | // materialize the constant with fewer prefixed instructions than | ||||
1275 | // non-prefixed, try that. | ||||
1276 | if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) { | ||||
1277 | unsigned InstCntDirectP = 0; | ||||
1278 | SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP); | ||||
1279 | // Use the prefix case in either of two cases: | ||||
1280 | // 1) We have no result from the non-prefix case to use. | ||||
1281 | // 2) The non-prefix case uses more instructions than the prefix case. | ||||
1282 | // If the prefix and non-prefix cases use the same number of instructions | ||||
1283 | // we will prefer the non-prefix case. | ||||
1284 | if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) { | ||||
1285 | if (InstCnt) | ||||
1286 | *InstCnt = InstCntDirectP; | ||||
1287 | return ResultP; | ||||
1288 | } | ||||
1289 | } | ||||
1290 | |||||
1291 | if (Result) { | ||||
1292 | if (InstCnt) | ||||
1293 | *InstCnt = InstCntDirect; | ||||
1294 | return Result; | ||||
1295 | } | ||||
1296 | auto getI32Imm = [CurDAG, dl](unsigned Imm) { | ||||
1297 | return CurDAG->getTargetConstant(Imm, dl, MVT::i32); | ||||
1298 | }; | ||||
1299 | // Handle the upper 32 bit value. | ||||
1300 | Result = | ||||
1301 | selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect); | ||||
1302 | // Add in the last bits as required. | ||||
1303 | if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) { | ||||
1304 | Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, | ||||
1305 | SDValue(Result, 0), getI32Imm(Hi16)); | ||||
1306 | ++InstCntDirect; | ||||
1307 | } | ||||
1308 | if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) { | ||||
1309 | Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), | ||||
1310 | getI32Imm(Lo16)); | ||||
1311 | ++InstCntDirect; | ||||
1312 | } | ||||
1313 | if (InstCnt) | ||||
1314 | *InstCnt = InstCntDirect; | ||||
1315 | return Result; | ||||
1316 | } | ||||
1317 | |||||
1318 | // Select a 64-bit constant. | ||||
1319 | static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { | ||||
1320 | SDLoc dl(N); | ||||
1321 | |||||
1322 | // Get 64 bit value. | ||||
1323 | int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); | ||||
1324 | if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { | ||||
1325 | uint64_t SextImm = SignExtend64(Imm, MinSize); | ||||
1326 | SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); | ||||
1327 | if (isInt<16>(SextImm)) | ||||
1328 | return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); | ||||
1329 | } | ||||
1330 | return selectI64Imm(CurDAG, dl, Imm); | ||||
1331 | } | ||||
1332 | |||||
1333 | namespace { | ||||
1334 | |||||
1335 | class BitPermutationSelector { | ||||
1336 | struct ValueBit { | ||||
1337 | SDValue V; | ||||
1338 | |||||
1339 | // The bit number in the value, using a convention where bit 0 is the | ||||
1340 | // lowest-order bit. | ||||
1341 | unsigned Idx; | ||||
1342 | |||||
1343 | // ConstZero means a bit we need to mask off. | ||||
1344 | // Variable is a bit comes from an input variable. | ||||
1345 | // VariableKnownToBeZero is also a bit comes from an input variable, | ||||
1346 | // but it is known to be already zero. So we do not need to mask them. | ||||
1347 | enum Kind { | ||||
1348 | ConstZero, | ||||
1349 | Variable, | ||||
1350 | VariableKnownToBeZero | ||||
1351 | } K; | ||||
1352 | |||||
1353 | ValueBit(SDValue V, unsigned I, Kind K = Variable) | ||||
1354 | : V(V), Idx(I), K(K) {} | ||||
1355 | ValueBit(Kind K = Variable) | ||||
1356 | : V(SDValue(nullptr, 0)), Idx(UINT32_MAX(4294967295U)), K(K) {} | ||||
1357 | |||||
1358 | bool isZero() const { | ||||
1359 | return K == ConstZero || K == VariableKnownToBeZero; | ||||
1360 | } | ||||
1361 | |||||
1362 | bool hasValue() const { | ||||
1363 | return K == Variable || K == VariableKnownToBeZero; | ||||
1364 | } | ||||
1365 | |||||
1366 | SDValue getValue() const { | ||||
1367 | assert(hasValue() && "Cannot get the value of a constant bit")(static_cast <bool> (hasValue() && "Cannot get the value of a constant bit" ) ? void (0) : __assert_fail ("hasValue() && \"Cannot get the value of a constant bit\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1367, __extension__ __PRETTY_FUNCTION__)); | ||||
1368 | return V; | ||||
1369 | } | ||||
1370 | |||||
1371 | unsigned getValueBitIndex() const { | ||||
1372 | assert(hasValue() && "Cannot get the value bit index of a constant bit")(static_cast <bool> (hasValue() && "Cannot get the value bit index of a constant bit" ) ? void (0) : __assert_fail ("hasValue() && \"Cannot get the value bit index of a constant bit\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1372, __extension__ __PRETTY_FUNCTION__)); | ||||
1373 | return Idx; | ||||
1374 | } | ||||
1375 | }; | ||||
1376 | |||||
1377 | // A bit group has the same underlying value and the same rotate factor. | ||||
1378 | struct BitGroup { | ||||
1379 | SDValue V; | ||||
1380 | unsigned RLAmt; | ||||
1381 | unsigned StartIdx, EndIdx; | ||||
1382 | |||||
1383 | // This rotation amount assumes that the lower 32 bits of the quantity are | ||||
1384 | // replicated in the high 32 bits by the rotation operator (which is done | ||||
1385 | // by rlwinm and friends in 64-bit mode). | ||||
1386 | bool Repl32; | ||||
1387 | // Did converting to Repl32 == true change the rotation factor? If it did, | ||||
1388 | // it decreased it by 32. | ||||
1389 | bool Repl32CR; | ||||
1390 | // Was this group coalesced after setting Repl32 to true? | ||||
1391 | bool Repl32Coalesced; | ||||
1392 | |||||
1393 | BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) | ||||
1394 | : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), | ||||
1395 | Repl32Coalesced(false) { | ||||
1396 | LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << Rdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R << " [" << S << ", " << E << "]\n"; } } while (false) | ||||
1397 | << " [" << S << ", " << E << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R << " [" << S << ", " << E << "]\n"; } } while (false); | ||||
1398 | } | ||||
1399 | }; | ||||
1400 | |||||
1401 | // Information on each (Value, RLAmt) pair (like the number of groups | ||||
1402 | // associated with each) used to choose the lowering method. | ||||
1403 | struct ValueRotInfo { | ||||
1404 | SDValue V; | ||||
1405 | unsigned RLAmt = std::numeric_limits<unsigned>::max(); | ||||
1406 | unsigned NumGroups = 0; | ||||
1407 | unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); | ||||
1408 | bool Repl32 = false; | ||||
1409 | |||||
1410 | ValueRotInfo() = default; | ||||
1411 | |||||
1412 | // For sorting (in reverse order) by NumGroups, and then by | ||||
1413 | // FirstGroupStartIdx. | ||||
1414 | bool operator < (const ValueRotInfo &Other) const { | ||||
1415 | // We need to sort so that the non-Repl32 come first because, when we're | ||||
1416 | // doing masking, the Repl32 bit groups might be subsumed into the 64-bit | ||||
1417 | // masking operation. | ||||
1418 | if (Repl32 < Other.Repl32) | ||||
1419 | return true; | ||||
1420 | else if (Repl32 > Other.Repl32) | ||||
1421 | return false; | ||||
1422 | else if (NumGroups > Other.NumGroups) | ||||
1423 | return true; | ||||
1424 | else if (NumGroups < Other.NumGroups) | ||||
1425 | return false; | ||||
1426 | else if (RLAmt == 0 && Other.RLAmt != 0) | ||||
1427 | return true; | ||||
1428 | else if (RLAmt != 0 && Other.RLAmt == 0) | ||||
1429 | return false; | ||||
1430 | else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) | ||||
1431 | return true; | ||||
1432 | return false; | ||||
1433 | } | ||||
1434 | }; | ||||
1435 | |||||
1436 | using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>; | ||||
1437 | using ValueBitsMemoizer = | ||||
1438 | DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>; | ||||
1439 | ValueBitsMemoizer Memoizer; | ||||
1440 | |||||
1441 | // Return a pair of bool and a SmallVector pointer to a memoization entry. | ||||
1442 | // The bool is true if something interesting was deduced, otherwise if we're | ||||
1443 | // providing only a generic representation of V (or something else likewise | ||||
1444 | // uninteresting for instruction selection) through the SmallVector. | ||||
1445 | std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V, | ||||
1446 | unsigned NumBits) { | ||||
1447 | auto &ValueEntry = Memoizer[V]; | ||||
1448 | if (ValueEntry) | ||||
1449 | return std::make_pair(ValueEntry->first, &ValueEntry->second); | ||||
1450 | ValueEntry.reset(new ValueBitsMemoizedValue()); | ||||
1451 | bool &Interesting = ValueEntry->first; | ||||
1452 | SmallVector<ValueBit, 64> &Bits = ValueEntry->second; | ||||
1453 | Bits.resize(NumBits); | ||||
1454 | |||||
1455 | switch (V.getOpcode()) { | ||||
1456 | default: break; | ||||
1457 | case ISD::ROTL: | ||||
1458 | if (isa<ConstantSDNode>(V.getOperand(1))) { | ||||
1459 | unsigned RotAmt = V.getConstantOperandVal(1); | ||||
1460 | |||||
1461 | const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; | ||||
1462 | |||||
1463 | for (unsigned i = 0; i < NumBits; ++i) | ||||
1464 | Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; | ||||
1465 | |||||
1466 | return std::make_pair(Interesting = true, &Bits); | ||||
1467 | } | ||||
1468 | break; | ||||
1469 | case ISD::SHL: | ||||
1470 | case PPCISD::SHL: | ||||
1471 | if (isa<ConstantSDNode>(V.getOperand(1))) { | ||||
1472 | unsigned ShiftAmt = V.getConstantOperandVal(1); | ||||
1473 | |||||
1474 | const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; | ||||
1475 | |||||
1476 | for (unsigned i = ShiftAmt; i < NumBits; ++i) | ||||
1477 | Bits[i] = LHSBits[i - ShiftAmt]; | ||||
1478 | |||||
1479 | for (unsigned i = 0; i < ShiftAmt; ++i) | ||||
1480 | Bits[i] = ValueBit(ValueBit::ConstZero); | ||||
1481 | |||||
1482 | return std::make_pair(Interesting = true, &Bits); | ||||
1483 | } | ||||
1484 | break; | ||||
1485 | case ISD::SRL: | ||||
1486 | case PPCISD::SRL: | ||||
1487 | if (isa<ConstantSDNode>(V.getOperand(1))) { | ||||
1488 | unsigned ShiftAmt = V.getConstantOperandVal(1); | ||||
1489 | |||||
1490 | const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; | ||||
1491 | |||||
1492 | for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) | ||||
1493 | Bits[i] = LHSBits[i + ShiftAmt]; | ||||
1494 | |||||
1495 | for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) | ||||
1496 | Bits[i] = ValueBit(ValueBit::ConstZero); | ||||
1497 | |||||
1498 | return std::make_pair(Interesting = true, &Bits); | ||||
1499 | } | ||||
1500 | break; | ||||
1501 | case ISD::AND: | ||||
1502 | if (isa<ConstantSDNode>(V.getOperand(1))) { | ||||
1503 | uint64_t Mask = V.getConstantOperandVal(1); | ||||
1504 | |||||
1505 | const SmallVector<ValueBit, 64> *LHSBits; | ||||
1506 | // Mark this as interesting, only if the LHS was also interesting. This | ||||
1507 | // prevents the overall procedure from matching a single immediate 'and' | ||||
1508 | // (which is non-optimal because such an and might be folded with other | ||||
1509 | // things if we don't select it here). | ||||
1510 | std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); | ||||
1511 | |||||
1512 | for (unsigned i = 0; i < NumBits; ++i) | ||||
1513 | if (((Mask >> i) & 1) == 1) | ||||
1514 | Bits[i] = (*LHSBits)[i]; | ||||
1515 | else { | ||||
1516 | // AND instruction masks this bit. If the input is already zero, | ||||
1517 | // we have nothing to do here. Otherwise, make the bit ConstZero. | ||||
1518 | if ((*LHSBits)[i].isZero()) | ||||
1519 | Bits[i] = (*LHSBits)[i]; | ||||
1520 | else | ||||
1521 | Bits[i] = ValueBit(ValueBit::ConstZero); | ||||
1522 | } | ||||
1523 | |||||
1524 | return std::make_pair(Interesting, &Bits); | ||||
1525 | } | ||||
1526 | break; | ||||
1527 | case ISD::OR: { | ||||
1528 | const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; | ||||
1529 | const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; | ||||
1530 | |||||
1531 | bool AllDisjoint = true; | ||||
1532 | SDValue LastVal = SDValue(); | ||||
1533 | unsigned LastIdx = 0; | ||||
1534 | for (unsigned i = 0; i < NumBits; ++i) { | ||||
1535 | if (LHSBits[i].isZero() && RHSBits[i].isZero()) { | ||||
1536 | // If both inputs are known to be zero and one is ConstZero and | ||||
1537 | // another is VariableKnownToBeZero, we can select whichever | ||||
1538 | // we like. To minimize the number of bit groups, we select | ||||
1539 | // VariableKnownToBeZero if this bit is the next bit of the same | ||||
1540 | // input variable from the previous bit. Otherwise, we select | ||||
1541 | // ConstZero. | ||||
1542 | if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && | ||||
1543 | LHSBits[i].getValueBitIndex() == LastIdx + 1) | ||||
1544 | Bits[i] = LHSBits[i]; | ||||
1545 | else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && | ||||
1546 | RHSBits[i].getValueBitIndex() == LastIdx + 1) | ||||
1547 | Bits[i] = RHSBits[i]; | ||||
1548 | else | ||||
1549 | Bits[i] = ValueBit(ValueBit::ConstZero); | ||||
1550 | } | ||||
1551 | else if (LHSBits[i].isZero()) | ||||
1552 | Bits[i] = RHSBits[i]; | ||||
1553 | else if (RHSBits[i].isZero()) | ||||
1554 | Bits[i] = LHSBits[i]; | ||||
1555 | else { | ||||
1556 | AllDisjoint = false; | ||||
1557 | break; | ||||
1558 | } | ||||
1559 | // We remember the value and bit index of this bit. | ||||
1560 | if (Bits[i].hasValue()) { | ||||
1561 | LastVal = Bits[i].getValue(); | ||||
1562 | LastIdx = Bits[i].getValueBitIndex(); | ||||
1563 | } | ||||
1564 | else { | ||||
1565 | if (LastVal) LastVal = SDValue(); | ||||
1566 | LastIdx = 0; | ||||
1567 | } | ||||
1568 | } | ||||
1569 | |||||
1570 | if (!AllDisjoint) | ||||
1571 | break; | ||||
1572 | |||||
1573 | return std::make_pair(Interesting = true, &Bits); | ||||
1574 | } | ||||
1575 | case ISD::ZERO_EXTEND: { | ||||
1576 | // We support only the case with zero extension from i32 to i64 so far. | ||||
1577 | if (V.getValueType() != MVT::i64 || | ||||
1578 | V.getOperand(0).getValueType() != MVT::i32) | ||||
1579 | break; | ||||
1580 | |||||
1581 | const SmallVector<ValueBit, 64> *LHSBits; | ||||
1582 | const unsigned NumOperandBits = 32; | ||||
1583 | std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), | ||||
1584 | NumOperandBits); | ||||
1585 | |||||
1586 | for (unsigned i = 0; i < NumOperandBits; ++i) | ||||
1587 | Bits[i] = (*LHSBits)[i]; | ||||
1588 | |||||
1589 | for (unsigned i = NumOperandBits; i < NumBits; ++i) | ||||
1590 | Bits[i] = ValueBit(ValueBit::ConstZero); | ||||
1591 | |||||
1592 | return std::make_pair(Interesting, &Bits); | ||||
1593 | } | ||||
1594 | case ISD::TRUNCATE: { | ||||
1595 | EVT FromType = V.getOperand(0).getValueType(); | ||||
1596 | EVT ToType = V.getValueType(); | ||||
1597 | // We support only the case with truncate from i64 to i32. | ||||
1598 | if (FromType != MVT::i64 || ToType != MVT::i32) | ||||
1599 | break; | ||||
1600 | const unsigned NumAllBits = FromType.getSizeInBits(); | ||||
1601 | SmallVector<ValueBit, 64> *InBits; | ||||
1602 | std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), | ||||
1603 | NumAllBits); | ||||
1604 | const unsigned NumValidBits = ToType.getSizeInBits(); | ||||
1605 | |||||
1606 | // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. | ||||
1607 | // So, we cannot include this truncate. | ||||
1608 | bool UseUpper32bit = false; | ||||
1609 | for (unsigned i = 0; i < NumValidBits; ++i) | ||||
1610 | if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { | ||||
1611 | UseUpper32bit = true; | ||||
1612 | break; | ||||
1613 | } | ||||
1614 | if (UseUpper32bit) | ||||
1615 | break; | ||||
1616 | |||||
1617 | for (unsigned i = 0; i < NumValidBits; ++i) | ||||
1618 | Bits[i] = (*InBits)[i]; | ||||
1619 | |||||
1620 | return std::make_pair(Interesting, &Bits); | ||||
1621 | } | ||||
1622 | case ISD::AssertZext: { | ||||
1623 | // For AssertZext, we look through the operand and | ||||
1624 | // mark the bits known to be zero. | ||||
1625 | const SmallVector<ValueBit, 64> *LHSBits; | ||||
1626 | std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), | ||||
1627 | NumBits); | ||||
1628 | |||||
1629 | EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT(); | ||||
1630 | const unsigned NumValidBits = FromType.getSizeInBits(); | ||||
1631 | for (unsigned i = 0; i < NumValidBits; ++i) | ||||
1632 | Bits[i] = (*LHSBits)[i]; | ||||
1633 | |||||
1634 | // These bits are known to be zero but the AssertZext may be from a value | ||||
1635 | // that already has some constant zero bits (i.e. from a masking and). | ||||
1636 | for (unsigned i = NumValidBits; i < NumBits; ++i) | ||||
1637 | Bits[i] = (*LHSBits)[i].hasValue() | ||||
1638 | ? ValueBit((*LHSBits)[i].getValue(), | ||||
1639 | (*LHSBits)[i].getValueBitIndex(), | ||||
1640 | ValueBit::VariableKnownToBeZero) | ||||
1641 | : ValueBit(ValueBit::ConstZero); | ||||
1642 | |||||
1643 | return std::make_pair(Interesting, &Bits); | ||||
1644 | } | ||||
1645 | case ISD::LOAD: | ||||
1646 | LoadSDNode *LD = cast<LoadSDNode>(V); | ||||
1647 | if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { | ||||
1648 | EVT VT = LD->getMemoryVT(); | ||||
1649 | const unsigned NumValidBits = VT.getSizeInBits(); | ||||
1650 | |||||
1651 | for (unsigned i = 0; i < NumValidBits; ++i) | ||||
1652 | Bits[i] = ValueBit(V, i); | ||||
1653 | |||||
1654 | // These bits are known to be zero. | ||||
1655 | for (unsigned i = NumValidBits; i < NumBits; ++i) | ||||
1656 | Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); | ||||
1657 | |||||
1658 | // Zero-extending load itself cannot be optimized. So, it is not | ||||
1659 | // interesting by itself though it gives useful information. | ||||
1660 | return std::make_pair(Interesting = false, &Bits); | ||||
1661 | } | ||||
1662 | break; | ||||
1663 | } | ||||
1664 | |||||
1665 | for (unsigned i = 0; i < NumBits; ++i) | ||||
1666 | Bits[i] = ValueBit(V, i); | ||||
1667 | |||||
1668 | return std::make_pair(Interesting = false, &Bits); | ||||
1669 | } | ||||
1670 | |||||
1671 | // For each value (except the constant ones), compute the left-rotate amount | ||||
1672 | // to get it from its original to final position. | ||||
1673 | void computeRotationAmounts() { | ||||
1674 | NeedMask = false; | ||||
1675 | RLAmt.resize(Bits.size()); | ||||
1676 | for (unsigned i = 0; i < Bits.size(); ++i) | ||||
1677 | if (Bits[i].hasValue()) { | ||||
1678 | unsigned VBI = Bits[i].getValueBitIndex(); | ||||
1679 | if (i >= VBI) | ||||
1680 | RLAmt[i] = i - VBI; | ||||
1681 | else | ||||
1682 | RLAmt[i] = Bits.size() - (VBI - i); | ||||
1683 | } else if (Bits[i].isZero()) { | ||||
1684 | NeedMask = true; | ||||
1685 | RLAmt[i] = UINT32_MAX(4294967295U); | ||||
1686 | } else { | ||||
1687 | llvm_unreachable("Unknown value bit type")::llvm::llvm_unreachable_internal("Unknown value bit type", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1687); | ||||
1688 | } | ||||
1689 | } | ||||
1690 | |||||
1691 | // Collect groups of consecutive bits with the same underlying value and | ||||
1692 | // rotation factor. If we're doing late masking, we ignore zeros, otherwise | ||||
1693 | // they break up groups. | ||||
1694 | void collectBitGroups(bool LateMask) { | ||||
1695 | BitGroups.clear(); | ||||
1696 | |||||
1697 | unsigned LastRLAmt = RLAmt[0]; | ||||
1698 | SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); | ||||
1699 | unsigned LastGroupStartIdx = 0; | ||||
1700 | bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); | ||||
1701 | for (unsigned i = 1; i < Bits.size(); ++i) { | ||||
1702 | unsigned ThisRLAmt = RLAmt[i]; | ||||
1703 | SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); | ||||
1704 | if (LateMask && !ThisValue) { | ||||
1705 | ThisValue = LastValue; | ||||
1706 | ThisRLAmt = LastRLAmt; | ||||
1707 | // If we're doing late masking, then the first bit group always starts | ||||
1708 | // at zero (even if the first bits were zero). | ||||
1709 | if (BitGroups.empty()) | ||||
1710 | LastGroupStartIdx = 0; | ||||
1711 | } | ||||
1712 | |||||
1713 | // If this bit is known to be zero and the current group is a bit group | ||||
1714 | // of zeros, we do not need to terminate the current bit group even the | ||||
1715 | // Value or RLAmt does not match here. Instead, we terminate this group | ||||
1716 | // when the first non-zero bit appears later. | ||||
1717 | if (IsGroupOfZeros && Bits[i].isZero()) | ||||
1718 | continue; | ||||
1719 | |||||
1720 | // If this bit has the same underlying value and the same rotate factor as | ||||
1721 | // the last one, then they're part of the same group. | ||||
1722 | if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) | ||||
1723 | // We cannot continue the current group if this bits is not known to | ||||
1724 | // be zero in a bit group of zeros. | ||||
1725 | if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) | ||||
1726 | continue; | ||||
1727 | |||||
1728 | if (LastValue.getNode()) | ||||
1729 | BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, | ||||
1730 | i-1)); | ||||
1731 | LastRLAmt = ThisRLAmt; | ||||
1732 | LastValue = ThisValue; | ||||
1733 | LastGroupStartIdx = i; | ||||
1734 | IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); | ||||
1735 | } | ||||
1736 | if (LastValue.getNode()) | ||||
1737 | BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, | ||||
1738 | Bits.size()-1)); | ||||
1739 | |||||
1740 | if (BitGroups.empty()) | ||||
1741 | return; | ||||
1742 | |||||
1743 | // We might be able to combine the first and last groups. | ||||
1744 | if (BitGroups.size() > 1) { | ||||
1745 | // If the first and last groups are the same, then remove the first group | ||||
1746 | // in favor of the last group, making the ending index of the last group | ||||
1747 | // equal to the ending index of the to-be-removed first group. | ||||
1748 | if (BitGroups[0].StartIdx == 0 && | ||||
1749 | BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && | ||||
1750 | BitGroups[0].V == BitGroups[BitGroups.size()-1].V && | ||||
1751 | BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { | ||||
1752 | LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining final bit group with initial one\n" ; } } while (false); | ||||
1753 | BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; | ||||
1754 | BitGroups.erase(BitGroups.begin()); | ||||
1755 | } | ||||
1756 | } | ||||
1757 | } | ||||
1758 | |||||
1759 | // Take all (SDValue, RLAmt) pairs and sort them by the number of groups | ||||
1760 | // associated with each. If the number of groups are same, we prefer a group | ||||
1761 | // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate | ||||
1762 | // instruction. If there is a degeneracy, pick the one that occurs | ||||
1763 | // first (in the final value). | ||||
1764 | void collectValueRotInfo() { | ||||
1765 | ValueRots.clear(); | ||||
1766 | |||||
1767 | for (auto &BG : BitGroups) { | ||||
1768 | unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); | ||||
1769 | ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; | ||||
1770 | VRI.V = BG.V; | ||||
1771 | VRI.RLAmt = BG.RLAmt; | ||||
1772 | VRI.Repl32 = BG.Repl32; | ||||
1773 | VRI.NumGroups += 1; | ||||
1774 | VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); | ||||
1775 | } | ||||
1776 | |||||
1777 | // Now that we've collected the various ValueRotInfo instances, we need to | ||||
1778 | // sort them. | ||||
1779 | ValueRotsVec.clear(); | ||||
1780 | for (auto &I : ValueRots) { | ||||
1781 | ValueRotsVec.push_back(I.second); | ||||
1782 | } | ||||
1783 | llvm::sort(ValueRotsVec); | ||||
1784 | } | ||||
1785 | |||||
1786 | // In 64-bit mode, rlwinm and friends have a rotation operator that | ||||
1787 | // replicates the low-order 32 bits into the high-order 32-bits. The mask | ||||
1788 | // indices of these instructions can only be in the lower 32 bits, so they | ||||
1789 | // can only represent some 64-bit bit groups. However, when they can be used, | ||||
1790 | // the 32-bit replication can be used to represent, as a single bit group, | ||||
1791 | // otherwise separate bit groups. We'll convert to replicated-32-bit bit | ||||
1792 | // groups when possible. Returns true if any of the bit groups were | ||||
1793 | // converted. | ||||
1794 | void assignRepl32BitGroups() { | ||||
1795 | // If we have bits like this: | ||||
1796 | // | ||||
1797 | // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 | ||||
1798 | // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 | ||||
1799 | // Groups: | RLAmt = 8 | RLAmt = 40 | | ||||
1800 | // | ||||
1801 | // But, making use of a 32-bit operation that replicates the low-order 32 | ||||
1802 | // bits into the high-order 32 bits, this can be one bit group with a RLAmt | ||||
1803 | // of 8. | ||||
1804 | |||||
1805 | auto IsAllLow32 = [this](BitGroup & BG) { | ||||
1806 | if (BG.StartIdx <= BG.EndIdx) { | ||||
1807 | for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { | ||||
1808 | if (!Bits[i].hasValue()) | ||||
1809 | continue; | ||||
1810 | if (Bits[i].getValueBitIndex() >= 32) | ||||
1811 | return false; | ||||
1812 | } | ||||
1813 | } else { | ||||
1814 | for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { | ||||
1815 | if (!Bits[i].hasValue()) | ||||
1816 | continue; | ||||
1817 | if (Bits[i].getValueBitIndex() >= 32) | ||||
1818 | return false; | ||||
1819 | } | ||||
1820 | for (unsigned i = 0; i <= BG.EndIdx; ++i) { | ||||
1821 | if (!Bits[i].hasValue()) | ||||
1822 | continue; | ||||
1823 | if (Bits[i].getValueBitIndex() >= 32) | ||||
1824 | return false; | ||||
1825 | } | ||||
1826 | } | ||||
1827 | |||||
1828 | return true; | ||||
1829 | }; | ||||
1830 | |||||
1831 | for (auto &BG : BitGroups) { | ||||
1832 | // If this bit group has RLAmt of 0 and will not be merged with | ||||
1833 | // another bit group, we don't benefit from Repl32. We don't mark | ||||
1834 | // such group to give more freedom for later instruction selection. | ||||
1835 | if (BG.RLAmt == 0) { | ||||
1836 | auto PotentiallyMerged = [this](BitGroup & BG) { | ||||
1837 | for (auto &BG2 : BitGroups) | ||||
1838 | if (&BG != &BG2 && BG.V == BG2.V && | ||||
1839 | (BG2.RLAmt == 0 || BG2.RLAmt == 32)) | ||||
1840 | return true; | ||||
1841 | return false; | ||||
1842 | }; | ||||
1843 | if (!PotentiallyMerged(BG)) | ||||
1844 | continue; | ||||
1845 | } | ||||
1846 | if (BG.StartIdx < 32 && BG.EndIdx < 32) { | ||||
1847 | if (IsAllLow32(BG)) { | ||||
1848 | if (BG.RLAmt >= 32) { | ||||
1849 | BG.RLAmt -= 32; | ||||
1850 | BG.Repl32CR = true; | ||||
1851 | } | ||||
1852 | |||||
1853 | BG.Repl32 = true; | ||||
1854 | |||||
1855 | LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for " << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" << BG.StartIdx << ", " << BG .EndIdx << "]\n"; } } while (false) | ||||
1856 | << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for " << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" << BG.StartIdx << ", " << BG .EndIdx << "]\n"; } } while (false) | ||||
1857 | << BG.StartIdx << ", " << BG.EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t32-bit replicated bit group for " << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" << BG.StartIdx << ", " << BG .EndIdx << "]\n"; } } while (false); | ||||
1858 | } | ||||
1859 | } | ||||
1860 | } | ||||
1861 | |||||
1862 | // Now walk through the bit groups, consolidating where possible. | ||||
1863 | for (auto I = BitGroups.begin(); I != BitGroups.end();) { | ||||
1864 | // We might want to remove this bit group by merging it with the previous | ||||
1865 | // group (which might be the ending group). | ||||
1866 | auto IP = (I == BitGroups.begin()) ? | ||||
1867 | std::prev(BitGroups.end()) : std::prev(I); | ||||
1868 | if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && | ||||
1869 | I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { | ||||
1870 | |||||
1871 | LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP ->StartIdx << ", " << IP->EndIdx << "]\n" ; } } while (false) | ||||
1872 | << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP ->StartIdx << ", " << IP->EndIdx << "]\n" ; } } while (false) | ||||
1873 | << I->StartIdx << ", " << I->EndIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP ->StartIdx << ", " << IP->EndIdx << "]\n" ; } } while (false) | ||||
1874 | << "] with group with range [" << IP->StartIdx << ", "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP ->StartIdx << ", " << IP->EndIdx << "]\n" ; } } while (false) | ||||
1875 | << IP->EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP ->StartIdx << ", " << IP->EndIdx << "]\n" ; } } while (false); | ||||
1876 | |||||
1877 | IP->EndIdx = I->EndIdx; | ||||
1878 | IP->Repl32CR = IP->Repl32CR || I->Repl32CR; | ||||
1879 | IP->Repl32Coalesced = true; | ||||
1880 | I = BitGroups.erase(I); | ||||
1881 | continue; | ||||
1882 | } else { | ||||
1883 | // There is a special case worth handling: If there is a single group | ||||
1884 | // covering the entire upper 32 bits, and it can be merged with both | ||||
1885 | // the next and previous groups (which might be the same group), then | ||||
1886 | // do so. If it is the same group (so there will be only one group in | ||||
1887 | // total), then we need to reverse the order of the range so that it | ||||
1888 | // covers the entire 64 bits. | ||||
1889 | if (I->StartIdx == 32 && I->EndIdx == 63) { | ||||
1890 | assert(std::next(I) == BitGroups.end() &&(static_cast <bool> (std::next(I) == BitGroups.end() && "bit group ends at index 63 but there is another?") ? void ( 0) : __assert_fail ("std::next(I) == BitGroups.end() && \"bit group ends at index 63 but there is another?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1891, __extension__ __PRETTY_FUNCTION__)) | ||||
1891 | "bit group ends at index 63 but there is another?")(static_cast <bool> (std::next(I) == BitGroups.end() && "bit group ends at index 63 but there is another?") ? void ( 0) : __assert_fail ("std::next(I) == BitGroups.end() && \"bit group ends at index 63 but there is another?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1891, __extension__ __PRETTY_FUNCTION__)); | ||||
1892 | auto IN = BitGroups.begin(); | ||||
1893 | |||||
1894 | if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && | ||||
1895 | (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && | ||||
1896 | IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && | ||||
1897 | IsAllLow32(*I)) { | ||||
1898 | |||||
1899 | LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false) | ||||
1900 | << " RLAmt = " << I->RLAmt << " [" << I->StartIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false) | ||||
1901 | << ", " << I->EndIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false) | ||||
1902 | << "] with 32-bit replicated groups with ranges ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false) | ||||
1903 | << IP->StartIdx << ", " << IP->EndIdx << "] and ["do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false) | ||||
1904 | << IN->StartIdx << ", " << IN->EndIdx << "]\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I-> RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"; } } while (false); | ||||
1905 | |||||
1906 | if (IP == IN) { | ||||
1907 | // There is only one other group; change it to cover the whole | ||||
1908 | // range (backward, so that it can still be Repl32 but cover the | ||||
1909 | // whole 64-bit range). | ||||
1910 | IP->StartIdx = 31; | ||||
1911 | IP->EndIdx = 30; | ||||
1912 | IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; | ||||
1913 | IP->Repl32Coalesced = true; | ||||
1914 | I = BitGroups.erase(I); | ||||
1915 | } else { | ||||
1916 | // There are two separate groups, one before this group and one | ||||
1917 | // after us (at the beginning). We're going to remove this group, | ||||
1918 | // but also the group at the very beginning. | ||||
1919 | IP->EndIdx = IN->EndIdx; | ||||
1920 | IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; | ||||
1921 | IP->Repl32Coalesced = true; | ||||
1922 | I = BitGroups.erase(I); | ||||
1923 | BitGroups.erase(BitGroups.begin()); | ||||
1924 | } | ||||
1925 | |||||
1926 | // This must be the last group in the vector (and we might have | ||||
1927 | // just invalidated the iterator above), so break here. | ||||
1928 | break; | ||||
1929 | } | ||||
1930 | } | ||||
1931 | } | ||||
1932 | |||||
1933 | ++I; | ||||
1934 | } | ||||
1935 | } | ||||
1936 | |||||
1937 | SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { | ||||
1938 | return CurDAG->getTargetConstant(Imm, dl, MVT::i32); | ||||
1939 | } | ||||
1940 | |||||
1941 | uint64_t getZerosMask() { | ||||
1942 | uint64_t Mask = 0; | ||||
1943 | for (unsigned i = 0; i < Bits.size(); ++i) { | ||||
1944 | if (Bits[i].hasValue()) | ||||
1945 | continue; | ||||
1946 | Mask |= (UINT64_C(1)1UL << i); | ||||
1947 | } | ||||
1948 | |||||
1949 | return ~Mask; | ||||
1950 | } | ||||
1951 | |||||
1952 | // This method extends an input value to 64 bit if input is 32-bit integer. | ||||
1953 | // While selecting instructions in BitPermutationSelector in 64-bit mode, | ||||
1954 | // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. | ||||
1955 | // In such case, we extend it to 64 bit to be consistent with other values. | ||||
1956 | SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { | ||||
1957 | if (V.getValueSizeInBits() == 64) | ||||
1958 | return V; | ||||
1959 | |||||
1960 | assert(V.getValueSizeInBits() == 32)(static_cast <bool> (V.getValueSizeInBits() == 32) ? void (0) : __assert_fail ("V.getValueSizeInBits() == 32", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1960, __extension__ __PRETTY_FUNCTION__)); | ||||
1961 | SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); | ||||
1962 | SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, | ||||
1963 | MVT::i64), 0); | ||||
1964 | SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, | ||||
1965 | MVT::i64, ImDef, V, | ||||
1966 | SubRegIdx), 0); | ||||
1967 | return ExtVal; | ||||
1968 | } | ||||
1969 | |||||
1970 | SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { | ||||
1971 | if (V.getValueSizeInBits() == 32) | ||||
1972 | return V; | ||||
1973 | |||||
1974 | assert(V.getValueSizeInBits() == 64)(static_cast <bool> (V.getValueSizeInBits() == 64) ? void (0) : __assert_fail ("V.getValueSizeInBits() == 64", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 1974, __extension__ __PRETTY_FUNCTION__)); | ||||
1975 | SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); | ||||
1976 | SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, | ||||
1977 | MVT::i32, V, SubRegIdx), 0); | ||||
1978 | return SubVal; | ||||
1979 | } | ||||
1980 | |||||
1981 | // Depending on the number of groups for a particular value, it might be | ||||
1982 | // better to rotate, mask explicitly (using andi/andis), and then or the | ||||
1983 | // result. Select this part of the result first. | ||||
1984 | void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { | ||||
1985 | if (BPermRewriterNoMasking) | ||||
1986 | return; | ||||
1987 | |||||
1988 | for (ValueRotInfo &VRI : ValueRotsVec) { | ||||
1989 | unsigned Mask = 0; | ||||
1990 | for (unsigned i = 0; i < Bits.size(); ++i) { | ||||
1991 | if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) | ||||
1992 | continue; | ||||
1993 | if (RLAmt[i] != VRI.RLAmt) | ||||
1994 | continue; | ||||
1995 | Mask |= (1u << i); | ||||
1996 | } | ||||
1997 | |||||
1998 | // Compute the masks for andi/andis that would be necessary. | ||||
1999 | unsigned ANDIMask = (Mask & UINT16_MAX(65535)), ANDISMask = Mask >> 16; | ||||
2000 | assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask for value bit groups") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask for value bit groups\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2001, __extension__ __PRETTY_FUNCTION__)) | ||||
2001 | "No set bits in mask for value bit groups")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask for value bit groups") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask for value bit groups\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2001, __extension__ __PRETTY_FUNCTION__)); | ||||
2002 | bool NeedsRotate = VRI.RLAmt != 0; | ||||
2003 | |||||
2004 | // We're trying to minimize the number of instructions. If we have one | ||||
2005 | // group, using one of andi/andis can break even. If we have three | ||||
2006 | // groups, we can use both andi and andis and break even (to use both | ||||
2007 | // andi and andis we also need to or the results together). We need four | ||||
2008 | // groups if we also need to rotate. To use andi/andis we need to do more | ||||
2009 | // than break even because rotate-and-mask instructions tend to be easier | ||||
2010 | // to schedule. | ||||
2011 | |||||
2012 | // FIXME: We've biased here against using andi/andis, which is right for | ||||
2013 | // POWER cores, but not optimal everywhere. For example, on the A2, | ||||
2014 | // andi/andis have single-cycle latency whereas the rotate-and-mask | ||||
2015 | // instructions take two cycles, and it would be better to bias toward | ||||
2016 | // andi/andis in break-even cases. | ||||
2017 | |||||
2018 | unsigned NumAndInsts = (unsigned) NeedsRotate + | ||||
2019 | (unsigned) (ANDIMask != 0) + | ||||
2020 | (unsigned) (ANDISMask != 0) + | ||||
2021 | (unsigned) (ANDIMask != 0 && ANDISMask != 0) + | ||||
2022 | (unsigned) (bool) Res; | ||||
2023 | |||||
2024 | LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << ":" << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << VRI.NumGroups << "\n"; } } while (false) | ||||
2025 | << " RL: " << VRI.RLAmt << ":"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << ":" << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << VRI.NumGroups << "\n"; } } while (false) | ||||
2026 | << "\n\t\t\tisel using masking: " << NumAndInstsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << ":" << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << VRI.NumGroups << "\n"; } } while (false) | ||||
2027 | << " using rotates: " << VRI.NumGroups << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << ":" << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << VRI.NumGroups << "\n"; } } while (false); | ||||
2028 | |||||
2029 | if (NumAndInsts >= VRI.NumGroups) | ||||
2030 | continue; | ||||
2031 | |||||
2032 | LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\t\t\tusing masking\n"; } } while (false); | ||||
2033 | |||||
2034 | if (InstCnt) *InstCnt += NumAndInsts; | ||||
2035 | |||||
2036 | SDValue VRot; | ||||
2037 | if (VRI.RLAmt) { | ||||
2038 | SDValue Ops[] = | ||||
2039 | { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), | ||||
2040 | getI32Imm(0, dl), getI32Imm(31, dl) }; | ||||
2041 | VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, | ||||
2042 | Ops), 0); | ||||
2043 | } else { | ||||
2044 | VRot = TruncateToInt32(VRI.V, dl); | ||||
2045 | } | ||||
2046 | |||||
2047 | SDValue ANDIVal, ANDISVal; | ||||
2048 | if (ANDIMask != 0) | ||||
2049 | ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, | ||||
2050 | VRot, getI32Imm(ANDIMask, dl)), | ||||
2051 | 0); | ||||
2052 | if (ANDISMask != 0) | ||||
2053 | ANDISVal = | ||||
2054 | SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot, | ||||
2055 | getI32Imm(ANDISMask, dl)), | ||||
2056 | 0); | ||||
2057 | |||||
2058 | SDValue TotalVal; | ||||
2059 | if (!ANDIVal) | ||||
2060 | TotalVal = ANDISVal; | ||||
2061 | else if (!ANDISVal) | ||||
2062 | TotalVal = ANDIVal; | ||||
2063 | else | ||||
2064 | TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, | ||||
2065 | ANDIVal, ANDISVal), 0); | ||||
2066 | |||||
2067 | if (!Res) | ||||
2068 | Res = TotalVal; | ||||
2069 | else | ||||
2070 | Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, | ||||
2071 | Res, TotalVal), 0); | ||||
2072 | |||||
2073 | // Now, remove all groups with this underlying value and rotation | ||||
2074 | // factor. | ||||
2075 | eraseMatchingBitGroups([VRI](const BitGroup &BG) { | ||||
2076 | return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; | ||||
2077 | }); | ||||
2078 | } | ||||
2079 | } | ||||
2080 | |||||
2081 | // Instruction selection for the 32-bit case. | ||||
2082 | SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { | ||||
2083 | SDLoc dl(N); | ||||
2084 | SDValue Res; | ||||
2085 | |||||
2086 | if (InstCnt) *InstCnt = 0; | ||||
2087 | |||||
2088 | // Take care of cases that should use andi/andis first. | ||||
2089 | SelectAndParts32(dl, Res, InstCnt); | ||||
2090 | |||||
2091 | // If we've not yet selected a 'starting' instruction, and we have no zeros | ||||
2092 | // to fill in, select the (Value, RLAmt) with the highest priority (largest | ||||
2093 | // number of groups), and start with this rotated value. | ||||
2094 | if ((!NeedMask || LateMask) && !Res) { | ||||
2095 | ValueRotInfo &VRI = ValueRotsVec[0]; | ||||
2096 | if (VRI.RLAmt) { | ||||
2097 | if (InstCnt) *InstCnt += 1; | ||||
2098 | SDValue Ops[] = | ||||
2099 | { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), | ||||
2100 | getI32Imm(0, dl), getI32Imm(31, dl) }; | ||||
2101 | Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), | ||||
2102 | 0); | ||||
2103 | } else { | ||||
2104 | Res = TruncateToInt32(VRI.V, dl); | ||||
2105 | } | ||||
2106 | |||||
2107 | // Now, remove all groups with this underlying value and rotation factor. | ||||
2108 | eraseMatchingBitGroups([VRI](const BitGroup &BG) { | ||||
2109 | return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; | ||||
2110 | }); | ||||
2111 | } | ||||
2112 | |||||
2113 | if (InstCnt) *InstCnt += BitGroups.size(); | ||||
2114 | |||||
2115 | // Insert the other groups (one at a time). | ||||
2116 | for (auto &BG : BitGroups) { | ||||
2117 | if (!Res) { | ||||
2118 | SDValue Ops[] = | ||||
2119 | { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), | ||||
2120 | getI32Imm(Bits.size() - BG.EndIdx - 1, dl), | ||||
2121 | getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; | ||||
2122 | Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); | ||||
2123 | } else { | ||||
2124 | SDValue Ops[] = | ||||
2125 | { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), | ||||
2126 | getI32Imm(Bits.size() - BG.EndIdx - 1, dl), | ||||
2127 | getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; | ||||
2128 | Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); | ||||
2129 | } | ||||
2130 | } | ||||
2131 | |||||
2132 | if (LateMask) { | ||||
2133 | unsigned Mask = (unsigned) getZerosMask(); | ||||
2134 | |||||
2135 | unsigned ANDIMask = (Mask & UINT16_MAX(65535)), ANDISMask = Mask >> 16; | ||||
2136 | assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in zeros mask?") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in zeros mask?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2137, __extension__ __PRETTY_FUNCTION__)) | ||||
2137 | "No set bits in zeros mask?")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in zeros mask?") ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in zeros mask?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2137, __extension__ __PRETTY_FUNCTION__)); | ||||
2138 | |||||
2139 | if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + | ||||
2140 | (unsigned) (ANDISMask != 0) + | ||||
2141 | (unsigned) (ANDIMask != 0 && ANDISMask != 0); | ||||
2142 | |||||
2143 | SDValue ANDIVal, ANDISVal; | ||||
2144 | if (ANDIMask != 0) | ||||
2145 | ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, | ||||
2146 | Res, getI32Imm(ANDIMask, dl)), | ||||
2147 | 0); | ||||
2148 | if (ANDISMask != 0) | ||||
2149 | ANDISVal = | ||||
2150 | SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res, | ||||
2151 | getI32Imm(ANDISMask, dl)), | ||||
2152 | 0); | ||||
2153 | |||||
2154 | if (!ANDIVal) | ||||
2155 | Res = ANDISVal; | ||||
2156 | else if (!ANDISVal) | ||||
2157 | Res = ANDIVal; | ||||
2158 | else | ||||
2159 | Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, | ||||
2160 | ANDIVal, ANDISVal), 0); | ||||
2161 | } | ||||
2162 | |||||
2163 | return Res.getNode(); | ||||
2164 | } | ||||
2165 | |||||
2166 | unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, | ||||
2167 | unsigned MaskStart, unsigned MaskEnd, | ||||
2168 | bool IsIns) { | ||||
2169 | // In the notation used by the instructions, 'start' and 'end' are reversed | ||||
2170 | // because bits are counted from high to low order. | ||||
2171 | unsigned InstMaskStart = 64 - MaskEnd - 1, | ||||
2172 | InstMaskEnd = 64 - MaskStart - 1; | ||||
2173 | |||||
2174 | if (Repl32) | ||||
2175 | return 1; | ||||
2176 | |||||
2177 | if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || | ||||
2178 | InstMaskEnd == 63 - RLAmt) | ||||
2179 | return 1; | ||||
2180 | |||||
2181 | return 2; | ||||
2182 | } | ||||
2183 | |||||
2184 | // For 64-bit values, not all combinations of rotates and masks are | ||||
2185 | // available. Produce one if it is available. | ||||
2186 | SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, | ||||
2187 | bool Repl32, unsigned MaskStart, unsigned MaskEnd, | ||||
2188 | unsigned *InstCnt = nullptr) { | ||||
2189 | // In the notation used by the instructions, 'start' and 'end' are reversed | ||||
2190 | // because bits are counted from high to low order. | ||||
2191 | unsigned InstMaskStart = 64 - MaskEnd - 1, | ||||
2192 | InstMaskEnd = 64 - MaskStart - 1; | ||||
2193 | |||||
2194 | if (InstCnt) *InstCnt += 1; | ||||
2195 | |||||
2196 | if (Repl32) { | ||||
2197 | // This rotation amount assumes that the lower 32 bits of the quantity | ||||
2198 | // are replicated in the high 32 bits by the rotation operator (which is | ||||
2199 | // done by rlwinm and friends). | ||||
2200 | assert(InstMaskStart >= 32 && "Mask cannot start out of range")(static_cast <bool> (InstMaskStart >= 32 && "Mask cannot start out of range" ) ? void (0) : __assert_fail ("InstMaskStart >= 32 && \"Mask cannot start out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2200, __extension__ __PRETTY_FUNCTION__)); | ||||
2201 | assert(InstMaskEnd >= 32 && "Mask cannot end out of range")(static_cast <bool> (InstMaskEnd >= 32 && "Mask cannot end out of range" ) ? void (0) : __assert_fail ("InstMaskEnd >= 32 && \"Mask cannot end out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2201, __extension__ __PRETTY_FUNCTION__)); | ||||
2202 | SDValue Ops[] = | ||||
2203 | { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2204 | getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; | ||||
2205 | return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, | ||||
2206 | Ops), 0); | ||||
2207 | } | ||||
2208 | |||||
2209 | if (InstMaskEnd == 63) { | ||||
2210 | SDValue Ops[] = | ||||
2211 | { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2212 | getI32Imm(InstMaskStart, dl) }; | ||||
2213 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); | ||||
2214 | } | ||||
2215 | |||||
2216 | if (InstMaskStart == 0) { | ||||
2217 | SDValue Ops[] = | ||||
2218 | { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2219 | getI32Imm(InstMaskEnd, dl) }; | ||||
2220 | return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); | ||||
2221 | } | ||||
2222 | |||||
2223 | if (InstMaskEnd == 63 - RLAmt) { | ||||
2224 | SDValue Ops[] = | ||||
2225 | { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2226 | getI32Imm(InstMaskStart, dl) }; | ||||
2227 | return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); | ||||
2228 | } | ||||
2229 | |||||
2230 | // We cannot do this with a single instruction, so we'll use two. The | ||||
2231 | // problem is that we're not free to choose both a rotation amount and mask | ||||
2232 | // start and end independently. We can choose an arbitrary mask start and | ||||
2233 | // end, but then the rotation amount is fixed. Rotation, however, can be | ||||
2234 | // inverted, and so by applying an "inverse" rotation first, we can get the | ||||
2235 | // desired result. | ||||
2236 | if (InstCnt) *InstCnt += 1; | ||||
2237 | |||||
2238 | // The rotation mask for the second instruction must be MaskStart. | ||||
2239 | unsigned RLAmt2 = MaskStart; | ||||
2240 | // The first instruction must rotate V so that the overall rotation amount | ||||
2241 | // is RLAmt. | ||||
2242 | unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; | ||||
2243 | if (RLAmt1) | ||||
2244 | V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); | ||||
2245 | return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); | ||||
2246 | } | ||||
2247 | |||||
2248 | // For 64-bit values, not all combinations of rotates and masks are | ||||
2249 | // available. Produce a rotate-mask-and-insert if one is available. | ||||
2250 | SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, | ||||
2251 | unsigned RLAmt, bool Repl32, unsigned MaskStart, | ||||
2252 | unsigned MaskEnd, unsigned *InstCnt = nullptr) { | ||||
2253 | // In the notation used by the instructions, 'start' and 'end' are reversed | ||||
2254 | // because bits are counted from high to low order. | ||||
2255 | unsigned InstMaskStart = 64 - MaskEnd - 1, | ||||
2256 | InstMaskEnd = 64 - MaskStart - 1; | ||||
2257 | |||||
2258 | if (InstCnt) *InstCnt += 1; | ||||
2259 | |||||
2260 | if (Repl32) { | ||||
2261 | // This rotation amount assumes that the lower 32 bits of the quantity | ||||
2262 | // are replicated in the high 32 bits by the rotation operator (which is | ||||
2263 | // done by rlwinm and friends). | ||||
2264 | assert(InstMaskStart >= 32 && "Mask cannot start out of range")(static_cast <bool> (InstMaskStart >= 32 && "Mask cannot start out of range" ) ? void (0) : __assert_fail ("InstMaskStart >= 32 && \"Mask cannot start out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2264, __extension__ __PRETTY_FUNCTION__)); | ||||
2265 | assert(InstMaskEnd >= 32 && "Mask cannot end out of range")(static_cast <bool> (InstMaskEnd >= 32 && "Mask cannot end out of range" ) ? void (0) : __assert_fail ("InstMaskEnd >= 32 && \"Mask cannot end out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2265, __extension__ __PRETTY_FUNCTION__)); | ||||
2266 | SDValue Ops[] = | ||||
2267 | { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2268 | getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; | ||||
2269 | return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, | ||||
2270 | Ops), 0); | ||||
2271 | } | ||||
2272 | |||||
2273 | if (InstMaskEnd == 63 - RLAmt) { | ||||
2274 | SDValue Ops[] = | ||||
2275 | { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), | ||||
2276 | getI32Imm(InstMaskStart, dl) }; | ||||
2277 | return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); | ||||
2278 | } | ||||
2279 | |||||
2280 | // We cannot do this with a single instruction, so we'll use two. The | ||||
2281 | // problem is that we're not free to choose both a rotation amount and mask | ||||
2282 | // start and end independently. We can choose an arbitrary mask start and | ||||
2283 | // end, but then the rotation amount is fixed. Rotation, however, can be | ||||
2284 | // inverted, and so by applying an "inverse" rotation first, we can get the | ||||
2285 | // desired result. | ||||
2286 | if (InstCnt) *InstCnt += 1; | ||||
2287 | |||||
2288 | // The rotation mask for the second instruction must be MaskStart. | ||||
2289 | unsigned RLAmt2 = MaskStart; | ||||
2290 | // The first instruction must rotate V so that the overall rotation amount | ||||
2291 | // is RLAmt. | ||||
2292 | unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; | ||||
2293 | if (RLAmt1) | ||||
2294 | V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); | ||||
2295 | return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); | ||||
2296 | } | ||||
2297 | |||||
2298 | void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { | ||||
2299 | if (BPermRewriterNoMasking) | ||||
2300 | return; | ||||
2301 | |||||
2302 | // The idea here is the same as in the 32-bit version, but with additional | ||||
2303 | // complications from the fact that Repl32 might be true. Because we | ||||
2304 | // aggressively convert bit groups to Repl32 form (which, for small | ||||
2305 | // rotation factors, involves no other change), and then coalesce, it might | ||||
2306 | // be the case that a single 64-bit masking operation could handle both | ||||
2307 | // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 | ||||
2308 | // form allowed coalescing, then we must use a 32-bit rotaton in order to | ||||
2309 | // completely capture the new combined bit group. | ||||
2310 | |||||
2311 | for (ValueRotInfo &VRI : ValueRotsVec) { | ||||
2312 | uint64_t Mask = 0; | ||||
2313 | |||||
2314 | // We need to add to the mask all bits from the associated bit groups. | ||||
2315 | // If Repl32 is false, we need to add bits from bit groups that have | ||||
2316 | // Repl32 true, but are trivially convertable to Repl32 false. Such a | ||||
2317 | // group is trivially convertable if it overlaps only with the lower 32 | ||||
2318 | // bits, and the group has not been coalesced. | ||||
2319 | auto MatchingBG = [VRI](const BitGroup &BG) { | ||||
2320 | if (VRI.V != BG.V) | ||||
2321 | return false; | ||||
2322 | |||||
2323 | unsigned EffRLAmt = BG.RLAmt; | ||||
2324 | if (!VRI.Repl32 && BG.Repl32) { | ||||
2325 | if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && | ||||
2326 | !BG.Repl32Coalesced) { | ||||
2327 | if (BG.Repl32CR) | ||||
2328 | EffRLAmt += 32; | ||||
2329 | } else { | ||||
2330 | return false; | ||||
2331 | } | ||||
2332 | } else if (VRI.Repl32 != BG.Repl32) { | ||||
2333 | return false; | ||||
2334 | } | ||||
2335 | |||||
2336 | return VRI.RLAmt == EffRLAmt; | ||||
2337 | }; | ||||
2338 | |||||
2339 | for (auto &BG : BitGroups) { | ||||
2340 | if (!MatchingBG(BG)) | ||||
2341 | continue; | ||||
2342 | |||||
2343 | if (BG.StartIdx <= BG.EndIdx) { | ||||
2344 | for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) | ||||
2345 | Mask |= (UINT64_C(1)1UL << i); | ||||
2346 | } else { | ||||
2347 | for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) | ||||
2348 | Mask |= (UINT64_C(1)1UL << i); | ||||
2349 | for (unsigned i = 0; i <= BG.EndIdx; ++i) | ||||
2350 | Mask |= (UINT64_C(1)1UL << i); | ||||
2351 | } | ||||
2352 | } | ||||
2353 | |||||
2354 | // We can use the 32-bit andi/andis technique if the mask does not | ||||
2355 | // require any higher-order bits. This can save an instruction compared | ||||
2356 | // to always using the general 64-bit technique. | ||||
2357 | bool Use32BitInsts = isUInt<32>(Mask); | ||||
2358 | // Compute the masks for andi/andis that would be necessary. | ||||
2359 | unsigned ANDIMask = (Mask & UINT16_MAX(65535)), | ||||
2360 | ANDISMask = (Mask >> 16) & UINT16_MAX(65535); | ||||
2361 | |||||
2362 | bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); | ||||
2363 | |||||
2364 | unsigned NumAndInsts = (unsigned) NeedsRotate + | ||||
2365 | (unsigned) (bool) Res; | ||||
2366 | unsigned NumOfSelectInsts = 0; | ||||
2367 | selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts); | ||||
2368 | assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.")(static_cast <bool> (NumOfSelectInsts > 0 && "Failed to select an i64 constant.") ? void (0) : __assert_fail ("NumOfSelectInsts > 0 && \"Failed to select an i64 constant.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2368, __extension__ __PRETTY_FUNCTION__)); | ||||
2369 | if (Use32BitInsts) | ||||
2370 | NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + | ||||
2371 | (unsigned) (ANDIMask != 0 && ANDISMask != 0); | ||||
2372 | else | ||||
2373 | NumAndInsts += NumOfSelectInsts + /* and */ 1; | ||||
2374 | |||||
2375 | unsigned NumRLInsts = 0; | ||||
2376 | bool FirstBG = true; | ||||
2377 | bool MoreBG = false; | ||||
2378 | for (auto &BG : BitGroups) { | ||||
2379 | if (!MatchingBG(BG)) { | ||||
2380 | MoreBG = true; | ||||
2381 | continue; | ||||
2382 | } | ||||
2383 | NumRLInsts += | ||||
2384 | SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, | ||||
2385 | !FirstBG); | ||||
2386 | FirstBG = false; | ||||
2387 | } | ||||
2388 | |||||
2389 | LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << NumRLInsts << "\n"; } } while (false) | ||||
2390 | << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << NumRLInsts << "\n"; } } while (false) | ||||
2391 | << "\n\t\t\tisel using masking: " << NumAndInstsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << NumRLInsts << "\n"; } } while (false) | ||||
2392 | << " using rotates: " << NumRLInsts << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << NumRLInsts << "\n"; } } while (false); | ||||
2393 | |||||
2394 | // When we'd use andi/andis, we bias toward using the rotates (andi only | ||||
2395 | // has a record form, and is cracked on POWER cores). However, when using | ||||
2396 | // general 64-bit constant formation, bias toward the constant form, | ||||
2397 | // because that exposes more opportunities for CSE. | ||||
2398 | if (NumAndInsts > NumRLInsts) | ||||
2399 | continue; | ||||
2400 | // When merging multiple bit groups, instruction or is used. | ||||
2401 | // But when rotate is used, rldimi can inert the rotated value into any | ||||
2402 | // register, so instruction or can be avoided. | ||||
2403 | if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) | ||||
2404 | continue; | ||||
2405 | |||||
2406 | LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\t\t\tusing masking\n"; } } while (false); | ||||
2407 | |||||
2408 | if (InstCnt) *InstCnt += NumAndInsts; | ||||
2409 | |||||
2410 | SDValue VRot; | ||||
2411 | // We actually need to generate a rotation if we have a non-zero rotation | ||||
2412 | // factor or, in the Repl32 case, if we care about any of the | ||||
2413 | // higher-order replicated bits. In the latter case, we generate a mask | ||||
2414 | // backward so that it actually includes the entire 64 bits. | ||||
2415 | if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) | ||||
2416 | VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, | ||||
2417 | VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); | ||||
2418 | else | ||||
2419 | VRot = VRI.V; | ||||
2420 | |||||
2421 | SDValue TotalVal; | ||||
2422 | if (Use32BitInsts) { | ||||
2423 | assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value" ) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2424, __extension__ __PRETTY_FUNCTION__)) | ||||
2424 | "No set bits in mask when using 32-bit ands for 64-bit value")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value" ) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2424, __extension__ __PRETTY_FUNCTION__)); | ||||
2425 | |||||
2426 | SDValue ANDIVal, ANDISVal; | ||||
2427 | if (ANDIMask != 0) | ||||
2428 | ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, | ||||
2429 | ExtendToInt64(VRot, dl), | ||||
2430 | getI32Imm(ANDIMask, dl)), | ||||
2431 | 0); | ||||
2432 | if (ANDISMask != 0) | ||||
2433 | ANDISVal = | ||||
2434 | SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, | ||||
2435 | ExtendToInt64(VRot, dl), | ||||
2436 | getI32Imm(ANDISMask, dl)), | ||||
2437 | 0); | ||||
2438 | |||||
2439 | if (!ANDIVal) | ||||
2440 | TotalVal = ANDISVal; | ||||
2441 | else if (!ANDISVal) | ||||
2442 | TotalVal = ANDIVal; | ||||
2443 | else | ||||
2444 | TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, | ||||
2445 | ExtendToInt64(ANDIVal, dl), ANDISVal), 0); | ||||
2446 | } else { | ||||
2447 | TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); | ||||
2448 | TotalVal = | ||||
2449 | SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, | ||||
2450 | ExtendToInt64(VRot, dl), TotalVal), | ||||
2451 | 0); | ||||
2452 | } | ||||
2453 | |||||
2454 | if (!Res) | ||||
2455 | Res = TotalVal; | ||||
2456 | else | ||||
2457 | Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, | ||||
2458 | ExtendToInt64(Res, dl), TotalVal), | ||||
2459 | 0); | ||||
2460 | |||||
2461 | // Now, remove all groups with this underlying value and rotation | ||||
2462 | // factor. | ||||
2463 | eraseMatchingBitGroups(MatchingBG); | ||||
2464 | } | ||||
2465 | } | ||||
2466 | |||||
2467 | // Instruction selection for the 64-bit case. | ||||
2468 | SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { | ||||
2469 | SDLoc dl(N); | ||||
2470 | SDValue Res; | ||||
2471 | |||||
2472 | if (InstCnt) *InstCnt = 0; | ||||
2473 | |||||
2474 | // Take care of cases that should use andi/andis first. | ||||
2475 | SelectAndParts64(dl, Res, InstCnt); | ||||
2476 | |||||
2477 | // If we've not yet selected a 'starting' instruction, and we have no zeros | ||||
2478 | // to fill in, select the (Value, RLAmt) with the highest priority (largest | ||||
2479 | // number of groups), and start with this rotated value. | ||||
2480 | if ((!NeedMask || LateMask) && !Res) { | ||||
2481 | // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 | ||||
2482 | // groups will come first, and so the VRI representing the largest number | ||||
2483 | // of groups might not be first (it might be the first Repl32 groups). | ||||
2484 | unsigned MaxGroupsIdx = 0; | ||||
2485 | if (!ValueRotsVec[0].Repl32) { | ||||
2486 | for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) | ||||
2487 | if (ValueRotsVec[i].Repl32) { | ||||
2488 | if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) | ||||
2489 | MaxGroupsIdx = i; | ||||
2490 | break; | ||||
2491 | } | ||||
2492 | } | ||||
2493 | |||||
2494 | ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; | ||||
2495 | bool NeedsRotate = false; | ||||
2496 | if (VRI.RLAmt) { | ||||
2497 | NeedsRotate = true; | ||||
2498 | } else if (VRI.Repl32) { | ||||
2499 | for (auto &BG : BitGroups) { | ||||
2500 | if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || | ||||
2501 | BG.Repl32 != VRI.Repl32) | ||||
2502 | continue; | ||||
2503 | |||||
2504 | // We don't need a rotate if the bit group is confined to the lower | ||||
2505 | // 32 bits. | ||||
2506 | if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) | ||||
2507 | continue; | ||||
2508 | |||||
2509 | NeedsRotate = true; | ||||
2510 | break; | ||||
2511 | } | ||||
2512 | } | ||||
2513 | |||||
2514 | if (NeedsRotate) | ||||
2515 | Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, | ||||
2516 | VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, | ||||
2517 | InstCnt); | ||||
2518 | else | ||||
2519 | Res = VRI.V; | ||||
2520 | |||||
2521 | // Now, remove all groups with this underlying value and rotation factor. | ||||
2522 | if (Res) | ||||
2523 | eraseMatchingBitGroups([VRI](const BitGroup &BG) { | ||||
2524 | return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && | ||||
2525 | BG.Repl32 == VRI.Repl32; | ||||
2526 | }); | ||||
2527 | } | ||||
2528 | |||||
2529 | // Because 64-bit rotates are more flexible than inserts, we might have a | ||||
2530 | // preference regarding which one we do first (to save one instruction). | ||||
2531 | if (!Res) | ||||
2532 | for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { | ||||
2533 | if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, | ||||
2534 | false) < | ||||
2535 | SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, | ||||
2536 | true)) { | ||||
2537 | if (I != BitGroups.begin()) { | ||||
2538 | BitGroup BG = *I; | ||||
2539 | BitGroups.erase(I); | ||||
2540 | BitGroups.insert(BitGroups.begin(), BG); | ||||
2541 | } | ||||
2542 | |||||
2543 | break; | ||||
2544 | } | ||||
2545 | } | ||||
2546 | |||||
2547 | // Insert the other groups (one at a time). | ||||
2548 | for (auto &BG : BitGroups) { | ||||
2549 | if (!Res) | ||||
2550 | Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, | ||||
2551 | BG.EndIdx, InstCnt); | ||||
2552 | else | ||||
2553 | Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, | ||||
2554 | BG.StartIdx, BG.EndIdx, InstCnt); | ||||
2555 | } | ||||
2556 | |||||
2557 | if (LateMask) { | ||||
2558 | uint64_t Mask = getZerosMask(); | ||||
2559 | |||||
2560 | // We can use the 32-bit andi/andis technique if the mask does not | ||||
2561 | // require any higher-order bits. This can save an instruction compared | ||||
2562 | // to always using the general 64-bit technique. | ||||
2563 | bool Use32BitInsts = isUInt<32>(Mask); | ||||
2564 | // Compute the masks for andi/andis that would be necessary. | ||||
2565 | unsigned ANDIMask = (Mask & UINT16_MAX(65535)), | ||||
2566 | ANDISMask = (Mask >> 16) & UINT16_MAX(65535); | ||||
2567 | |||||
2568 | if (Use32BitInsts) { | ||||
2569 | assert((ANDIMask != 0 || ANDISMask != 0) &&(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value" ) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2570, __extension__ __PRETTY_FUNCTION__)) | ||||
2570 | "No set bits in mask when using 32-bit ands for 64-bit value")(static_cast <bool> ((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value" ) ? void (0) : __assert_fail ("(ANDIMask != 0 || ANDISMask != 0) && \"No set bits in mask when using 32-bit ands for 64-bit value\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2570, __extension__ __PRETTY_FUNCTION__)); | ||||
2571 | |||||
2572 | if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + | ||||
2573 | (unsigned) (ANDISMask != 0) + | ||||
2574 | (unsigned) (ANDIMask != 0 && ANDISMask != 0); | ||||
2575 | |||||
2576 | SDValue ANDIVal, ANDISVal; | ||||
2577 | if (ANDIMask != 0) | ||||
2578 | ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, | ||||
2579 | ExtendToInt64(Res, dl), | ||||
2580 | getI32Imm(ANDIMask, dl)), | ||||
2581 | 0); | ||||
2582 | if (ANDISMask != 0) | ||||
2583 | ANDISVal = | ||||
2584 | SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, | ||||
2585 | ExtendToInt64(Res, dl), | ||||
2586 | getI32Imm(ANDISMask, dl)), | ||||
2587 | 0); | ||||
2588 | |||||
2589 | if (!ANDIVal) | ||||
2590 | Res = ANDISVal; | ||||
2591 | else if (!ANDISVal) | ||||
2592 | Res = ANDIVal; | ||||
2593 | else | ||||
2594 | Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, | ||||
2595 | ExtendToInt64(ANDIVal, dl), ANDISVal), 0); | ||||
2596 | } else { | ||||
2597 | unsigned NumOfSelectInsts = 0; | ||||
2598 | SDValue MaskVal = | ||||
2599 | SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0); | ||||
2600 | Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, | ||||
2601 | ExtendToInt64(Res, dl), MaskVal), | ||||
2602 | 0); | ||||
2603 | if (InstCnt) | ||||
2604 | *InstCnt += NumOfSelectInsts + /* and */ 1; | ||||
2605 | } | ||||
2606 | } | ||||
2607 | |||||
2608 | return Res.getNode(); | ||||
2609 | } | ||||
2610 | |||||
2611 | SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { | ||||
2612 | // Fill in BitGroups. | ||||
2613 | collectBitGroups(LateMask); | ||||
2614 | if (BitGroups.empty()) | ||||
2615 | return nullptr; | ||||
2616 | |||||
2617 | // For 64-bit values, figure out when we can use 32-bit instructions. | ||||
2618 | if (Bits.size() == 64) | ||||
2619 | assignRepl32BitGroups(); | ||||
2620 | |||||
2621 | // Fill in ValueRotsVec. | ||||
2622 | collectValueRotInfo(); | ||||
2623 | |||||
2624 | if (Bits.size() == 32) { | ||||
2625 | return Select32(N, LateMask, InstCnt); | ||||
2626 | } else { | ||||
2627 | assert(Bits.size() == 64 && "Not 64 bits here?")(static_cast <bool> (Bits.size() == 64 && "Not 64 bits here?" ) ? void (0) : __assert_fail ("Bits.size() == 64 && \"Not 64 bits here?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2627, __extension__ __PRETTY_FUNCTION__)); | ||||
2628 | return Select64(N, LateMask, InstCnt); | ||||
2629 | } | ||||
2630 | |||||
2631 | return nullptr; | ||||
2632 | } | ||||
2633 | |||||
2634 | void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { | ||||
2635 | erase_if(BitGroups, F); | ||||
2636 | } | ||||
2637 | |||||
2638 | SmallVector<ValueBit, 64> Bits; | ||||
2639 | |||||
2640 | bool NeedMask = false; | ||||
2641 | SmallVector<unsigned, 64> RLAmt; | ||||
2642 | |||||
2643 | SmallVector<BitGroup, 16> BitGroups; | ||||
2644 | |||||
2645 | DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots; | ||||
2646 | SmallVector<ValueRotInfo, 16> ValueRotsVec; | ||||
2647 | |||||
2648 | SelectionDAG *CurDAG = nullptr; | ||||
2649 | |||||
2650 | public: | ||||
2651 | BitPermutationSelector(SelectionDAG *DAG) | ||||
2652 | : CurDAG(DAG) {} | ||||
2653 | |||||
2654 | // Here we try to match complex bit permutations into a set of | ||||
2655 | // rotate-and-shift/shift/and/or instructions, using a set of heuristics | ||||
2656 | // known to produce optimal code for common cases (like i32 byte swapping). | ||||
2657 | SDNode *Select(SDNode *N) { | ||||
2658 | Memoizer.clear(); | ||||
2659 | auto Result = | ||||
2660 | getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); | ||||
2661 | if (!Result.first) | ||||
2662 | return nullptr; | ||||
2663 | Bits = std::move(*Result.second); | ||||
2664 | |||||
2665 | LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Considering bit-permutation-based instruction" " selection for: "; } } while (false) | ||||
2666 | " selection for: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Considering bit-permutation-based instruction" " selection for: "; } } while (false); | ||||
2667 | LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { N->dump(CurDAG); } } while (false); | ||||
2668 | |||||
2669 | // Fill it RLAmt and set NeedMask. | ||||
2670 | computeRotationAmounts(); | ||||
2671 | |||||
2672 | if (!NeedMask) | ||||
2673 | return Select(N, false); | ||||
2674 | |||||
2675 | // We currently have two techniques for handling results with zeros: early | ||||
2676 | // masking (the default) and late masking. Late masking is sometimes more | ||||
2677 | // efficient, but because the structure of the bit groups is different, it | ||||
2678 | // is hard to tell without generating both and comparing the results. With | ||||
2679 | // late masking, we ignore zeros in the resulting value when inserting each | ||||
2680 | // set of bit groups, and then mask in the zeros at the end. With early | ||||
2681 | // masking, we only insert the non-zero parts of the result at every step. | ||||
2682 | |||||
2683 | unsigned InstCnt = 0, InstCntLateMask = 0; | ||||
2684 | LLVM_DEBUG(dbgs() << "\tEarly masking:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tEarly masking:\n"; } } while (false); | ||||
2685 | SDNode *RN = Select(N, false, &InstCnt); | ||||
2686 | LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"; } } while (false); | ||||
2687 | |||||
2688 | LLVM_DEBUG(dbgs() << "\tLate masking:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tLate masking:\n"; } } while (false); | ||||
2689 | SDNode *RNLM = Select(N, true, &InstCntLateMask); | ||||
2690 | LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMaskdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\tisel would use " << InstCntLateMask << " instructions\n"; } } while (false ) | ||||
2691 | << " instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\t\tisel would use " << InstCntLateMask << " instructions\n"; } } while (false ); | ||||
2692 | |||||
2693 | if (InstCnt <= InstCntLateMask) { | ||||
2694 | LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tUsing early-masking for isel\n" ; } } while (false); | ||||
2695 | return RN; | ||||
2696 | } | ||||
2697 | |||||
2698 | LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\tUsing late-masking for isel\n" ; } } while (false); | ||||
2699 | return RNLM; | ||||
2700 | } | ||||
2701 | }; | ||||
2702 | |||||
2703 | class IntegerCompareEliminator { | ||||
2704 | SelectionDAG *CurDAG; | ||||
2705 | PPCDAGToDAGISel *S; | ||||
2706 | // Conversion type for interpreting results of a 32-bit instruction as | ||||
2707 | // a 64-bit value or vice versa. | ||||
2708 | enum ExtOrTruncConversion { Ext, Trunc }; | ||||
2709 | |||||
2710 | // Modifiers to guide how an ISD::SETCC node's result is to be computed | ||||
2711 | // in a GPR. | ||||
2712 | // ZExtOrig - use the original condition code, zero-extend value | ||||
2713 | // ZExtInvert - invert the condition code, zero-extend value | ||||
2714 | // SExtOrig - use the original condition code, sign-extend value | ||||
2715 | // SExtInvert - invert the condition code, sign-extend value | ||||
2716 | enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; | ||||
2717 | |||||
2718 | // Comparisons against zero to emit GPR code sequences for. Each of these | ||||
2719 | // sequences may need to be emitted for two or more equivalent patterns. | ||||
2720 | // For example (a >= 0) == (a > -1). The direction of the comparison (</>) | ||||
2721 | // matters as well as the extension type: sext (-1/0), zext (1/0). | ||||
2722 | // GEZExt - (zext (LHS >= 0)) | ||||
2723 | // GESExt - (sext (LHS >= 0)) | ||||
2724 | // LEZExt - (zext (LHS <= 0)) | ||||
2725 | // LESExt - (sext (LHS <= 0)) | ||||
2726 | enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; | ||||
2727 | |||||
2728 | SDNode *tryEXTEND(SDNode *N); | ||||
2729 | SDNode *tryLogicOpOfCompares(SDNode *N); | ||||
2730 | SDValue computeLogicOpInGPR(SDValue LogicOp); | ||||
2731 | SDValue signExtendInputIfNeeded(SDValue Input); | ||||
2732 | SDValue zeroExtendInputIfNeeded(SDValue Input); | ||||
2733 | SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); | ||||
2734 | SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, | ||||
2735 | ZeroCompare CmpTy); | ||||
2736 | SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
2737 | int64_t RHSValue, SDLoc dl); | ||||
2738 | SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
2739 | int64_t RHSValue, SDLoc dl); | ||||
2740 | SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
2741 | int64_t RHSValue, SDLoc dl); | ||||
2742 | SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
2743 | int64_t RHSValue, SDLoc dl); | ||||
2744 | SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); | ||||
2745 | |||||
2746 | public: | ||||
2747 | IntegerCompareEliminator(SelectionDAG *DAG, | ||||
2748 | PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { | ||||
2749 | assert(CurDAG->getTargetLoweringInfo()(static_cast <bool> (CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && "Only expecting to use this on 64 bit targets." ) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2751, __extension__ __PRETTY_FUNCTION__)) | ||||
2750 | .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&(static_cast <bool> (CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && "Only expecting to use this on 64 bit targets." ) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2751, __extension__ __PRETTY_FUNCTION__)) | ||||
2751 | "Only expecting to use this on 64 bit targets.")(static_cast <bool> (CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && "Only expecting to use this on 64 bit targets." ) ? void (0) : __assert_fail ("CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && \"Only expecting to use this on 64 bit targets.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2751, __extension__ __PRETTY_FUNCTION__)); | ||||
2752 | } | ||||
2753 | SDNode *Select(SDNode *N) { | ||||
2754 | if (CmpInGPR == ICGPR_None) | ||||
2755 | return nullptr; | ||||
2756 | switch (N->getOpcode()) { | ||||
2757 | default: break; | ||||
2758 | case ISD::ZERO_EXTEND: | ||||
2759 | if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || | ||||
2760 | CmpInGPR == ICGPR_SextI64) | ||||
2761 | return nullptr; | ||||
2762 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
2763 | case ISD::SIGN_EXTEND: | ||||
2764 | if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || | ||||
2765 | CmpInGPR == ICGPR_ZextI64) | ||||
2766 | return nullptr; | ||||
2767 | return tryEXTEND(N); | ||||
2768 | case ISD::AND: | ||||
2769 | case ISD::OR: | ||||
2770 | case ISD::XOR: | ||||
2771 | return tryLogicOpOfCompares(N); | ||||
2772 | } | ||||
2773 | return nullptr; | ||||
2774 | } | ||||
2775 | }; | ||||
2776 | |||||
2777 | static bool isLogicOp(unsigned Opc) { | ||||
2778 | return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; | ||||
2779 | } | ||||
2780 | // The obvious case for wanting to keep the value in a GPR. Namely, the | ||||
2781 | // result of the comparison is actually needed in a GPR. | ||||
2782 | SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { | ||||
2783 | assert((N->getOpcode() == ISD::ZERO_EXTEND ||(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!" ) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2785, __extension__ __PRETTY_FUNCTION__)) | ||||
2784 | N->getOpcode() == ISD::SIGN_EXTEND) &&(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!" ) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2785, __extension__ __PRETTY_FUNCTION__)) | ||||
2785 | "Expecting a zero/sign extend node!")(static_cast <bool> ((N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!" ) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && \"Expecting a zero/sign extend node!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2785, __extension__ __PRETTY_FUNCTION__)); | ||||
2786 | SDValue WideRes; | ||||
2787 | // If we are zero-extending the result of a logical operation on i1 | ||||
2788 | // values, we can keep the values in GPRs. | ||||
2789 | if (isLogicOp(N->getOperand(0).getOpcode()) && | ||||
2790 | N->getOperand(0).getValueType() == MVT::i1 && | ||||
2791 | N->getOpcode() == ISD::ZERO_EXTEND) | ||||
2792 | WideRes = computeLogicOpInGPR(N->getOperand(0)); | ||||
2793 | else if (N->getOperand(0).getOpcode() != ISD::SETCC) | ||||
2794 | return nullptr; | ||||
2795 | else | ||||
2796 | WideRes = | ||||
2797 | getSETCCInGPR(N->getOperand(0), | ||||
2798 | N->getOpcode() == ISD::SIGN_EXTEND ? | ||||
2799 | SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); | ||||
2800 | |||||
2801 | if (!WideRes) | ||||
2802 | return nullptr; | ||||
2803 | |||||
2804 | SDLoc dl(N); | ||||
2805 | bool Input32Bit = WideRes.getValueType() == MVT::i32; | ||||
2806 | bool Output32Bit = N->getValueType(0) == MVT::i32; | ||||
2807 | |||||
2808 | NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; | ||||
2809 | NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; | ||||
2810 | |||||
2811 | SDValue ConvOp = WideRes; | ||||
2812 | if (Input32Bit != Output32Bit) | ||||
2813 | ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : | ||||
2814 | ExtOrTruncConversion::Trunc); | ||||
2815 | return ConvOp.getNode(); | ||||
2816 | } | ||||
2817 | |||||
2818 | // Attempt to perform logical operations on the results of comparisons while | ||||
2819 | // keeping the values in GPRs. Without doing so, these would end up being | ||||
2820 | // lowered to CR-logical operations which suffer from significant latency and | ||||
2821 | // low ILP. | ||||
2822 | SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { | ||||
2823 | if (N->getValueType(0) != MVT::i1) | ||||
2824 | return nullptr; | ||||
2825 | assert(isLogicOp(N->getOpcode()) &&(static_cast <bool> (isLogicOp(N->getOpcode()) && "Expected a logic operation on setcc results.") ? void (0) : __assert_fail ("isLogicOp(N->getOpcode()) && \"Expected a logic operation on setcc results.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2826, __extension__ __PRETTY_FUNCTION__)) | ||||
2826 | "Expected a logic operation on setcc results.")(static_cast <bool> (isLogicOp(N->getOpcode()) && "Expected a logic operation on setcc results.") ? void (0) : __assert_fail ("isLogicOp(N->getOpcode()) && \"Expected a logic operation on setcc results.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2826, __extension__ __PRETTY_FUNCTION__)); | ||||
2827 | SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); | ||||
2828 | if (!LoweredLogical) | ||||
2829 | return nullptr; | ||||
2830 | |||||
2831 | SDLoc dl(N); | ||||
2832 | bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; | ||||
2833 | unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; | ||||
2834 | SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); | ||||
2835 | SDValue LHS = LoweredLogical.getOperand(0); | ||||
2836 | SDValue RHS = LoweredLogical.getOperand(1); | ||||
2837 | SDValue WideOp; | ||||
2838 | SDValue OpToConvToRecForm; | ||||
2839 | |||||
2840 | // Look through any 32-bit to 64-bit implicit extend nodes to find the | ||||
2841 | // opcode that is input to the XORI. | ||||
2842 | if (IsBitwiseNegate && | ||||
2843 | LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) | ||||
2844 | OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); | ||||
2845 | else if (IsBitwiseNegate) | ||||
2846 | // If the input to the XORI isn't an extension, that's what we're after. | ||||
2847 | OpToConvToRecForm = LoweredLogical.getOperand(0); | ||||
2848 | else | ||||
2849 | // If this is not an XORI, it is a reg-reg logical op and we can convert | ||||
2850 | // it to record-form. | ||||
2851 | OpToConvToRecForm = LoweredLogical; | ||||
2852 | |||||
2853 | // Get the record-form version of the node we're looking to use to get the | ||||
2854 | // CR result from. | ||||
2855 | uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); | ||||
2856 | int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); | ||||
2857 | |||||
2858 | // Convert the right node to record-form. This is either the logical we're | ||||
2859 | // looking at or it is the input node to the negation (if we're looking at | ||||
2860 | // a bitwise negation). | ||||
2861 | if (NewOpc != -1 && IsBitwiseNegate) { | ||||
2862 | // The input to the XORI has a record-form. Use it. | ||||
2863 | assert(LoweredLogical.getConstantOperandVal(1) == 1 &&(static_cast <bool> (LoweredLogical.getConstantOperandVal (1) == 1 && "Expected a PPC::XORI8 only for bitwise negation." ) ? void (0) : __assert_fail ("LoweredLogical.getConstantOperandVal(1) == 1 && \"Expected a PPC::XORI8 only for bitwise negation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2864, __extension__ __PRETTY_FUNCTION__)) | ||||
2864 | "Expected a PPC::XORI8 only for bitwise negation.")(static_cast <bool> (LoweredLogical.getConstantOperandVal (1) == 1 && "Expected a PPC::XORI8 only for bitwise negation." ) ? void (0) : __assert_fail ("LoweredLogical.getConstantOperandVal(1) == 1 && \"Expected a PPC::XORI8 only for bitwise negation.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2864, __extension__ __PRETTY_FUNCTION__)); | ||||
2865 | // Emit the record-form instruction. | ||||
2866 | std::vector<SDValue> Ops; | ||||
2867 | for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) | ||||
2868 | Ops.push_back(OpToConvToRecForm.getOperand(i)); | ||||
2869 | |||||
2870 | WideOp = | ||||
2871 | SDValue(CurDAG->getMachineNode(NewOpc, dl, | ||||
2872 | OpToConvToRecForm.getValueType(), | ||||
2873 | MVT::Glue, Ops), 0); | ||||
2874 | } else { | ||||
2875 | assert((NewOpc != -1 || !IsBitwiseNegate) &&(static_cast <bool> ((NewOpc != -1 || !IsBitwiseNegate) && "No record form available for AND8/OR8/XOR8?") ? void (0) : __assert_fail ("(NewOpc != -1 || !IsBitwiseNegate) && \"No record form available for AND8/OR8/XOR8?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2876, __extension__ __PRETTY_FUNCTION__)) | ||||
2876 | "No record form available for AND8/OR8/XOR8?")(static_cast <bool> ((NewOpc != -1 || !IsBitwiseNegate) && "No record form available for AND8/OR8/XOR8?") ? void (0) : __assert_fail ("(NewOpc != -1 || !IsBitwiseNegate) && \"No record form available for AND8/OR8/XOR8?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2876, __extension__ __PRETTY_FUNCTION__)); | ||||
2877 | WideOp = | ||||
2878 | SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc, | ||||
2879 | dl, MVT::i64, MVT::Glue, LHS, RHS), | ||||
2880 | 0); | ||||
2881 | } | ||||
2882 | |||||
2883 | // Select this node to a single bit from CR0 set by the record-form node | ||||
2884 | // just created. For bitwise negation, use the EQ bit which is the equivalent | ||||
2885 | // of negating the result (i.e. it is a bit set when the result of the | ||||
2886 | // operation is zero). | ||||
2887 | SDValue SRIdxVal = | ||||
2888 | CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); | ||||
2889 | SDValue CRBit = | ||||
2890 | SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, | ||||
2891 | MVT::i1, CR0Reg, SRIdxVal, | ||||
2892 | WideOp.getValue(1)), 0); | ||||
2893 | return CRBit.getNode(); | ||||
2894 | } | ||||
2895 | |||||
2896 | // Lower a logical operation on i1 values into a GPR sequence if possible. | ||||
2897 | // The result can be kept in a GPR if requested. | ||||
2898 | // Three types of inputs can be handled: | ||||
2899 | // - SETCC | ||||
2900 | // - TRUNCATE | ||||
2901 | // - Logical operation (AND/OR/XOR) | ||||
2902 | // There is also a special case that is handled (namely a complement operation | ||||
2903 | // achieved with xor %a, -1). | ||||
2904 | SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { | ||||
2905 | assert(isLogicOp(LogicOp.getOpcode()) &&(static_cast <bool> (isLogicOp(LogicOp.getOpcode()) && "Can only handle logic operations here.") ? void (0) : __assert_fail ("isLogicOp(LogicOp.getOpcode()) && \"Can only handle logic operations here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2906, __extension__ __PRETTY_FUNCTION__)) | ||||
2906 | "Can only handle logic operations here.")(static_cast <bool> (isLogicOp(LogicOp.getOpcode()) && "Can only handle logic operations here.") ? void (0) : __assert_fail ("isLogicOp(LogicOp.getOpcode()) && \"Can only handle logic operations here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2906, __extension__ __PRETTY_FUNCTION__)); | ||||
2907 | assert(LogicOp.getValueType() == MVT::i1 &&(static_cast <bool> (LogicOp.getValueType() == MVT::i1 && "Can only handle logic operations on i1 values here.") ? void (0) : __assert_fail ("LogicOp.getValueType() == MVT::i1 && \"Can only handle logic operations on i1 values here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2908, __extension__ __PRETTY_FUNCTION__)) | ||||
2908 | "Can only handle logic operations on i1 values here.")(static_cast <bool> (LogicOp.getValueType() == MVT::i1 && "Can only handle logic operations on i1 values here.") ? void (0) : __assert_fail ("LogicOp.getValueType() == MVT::i1 && \"Can only handle logic operations on i1 values here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2908, __extension__ __PRETTY_FUNCTION__)); | ||||
2909 | SDLoc dl(LogicOp); | ||||
2910 | SDValue LHS, RHS; | ||||
2911 | |||||
2912 | // Special case: xor %a, -1 | ||||
2913 | bool IsBitwiseNegation = isBitwiseNot(LogicOp); | ||||
2914 | |||||
2915 | // Produces a GPR sequence for each operand of the binary logic operation. | ||||
2916 | // For SETCC, it produces the respective comparison, for TRUNCATE it truncates | ||||
2917 | // the value in a GPR and for logic operations, it will recursively produce | ||||
2918 | // a GPR sequence for the operation. | ||||
2919 | auto getLogicOperand = [&] (SDValue Operand) -> SDValue { | ||||
2920 | unsigned OperandOpcode = Operand.getOpcode(); | ||||
2921 | if (OperandOpcode == ISD::SETCC) | ||||
2922 | return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); | ||||
2923 | else if (OperandOpcode == ISD::TRUNCATE) { | ||||
2924 | SDValue InputOp = Operand.getOperand(0); | ||||
2925 | EVT InVT = InputOp.getValueType(); | ||||
2926 | return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : | ||||
2927 | PPC::RLDICL, dl, InVT, InputOp, | ||||
2928 | S->getI64Imm(0, dl), | ||||
2929 | S->getI64Imm(63, dl)), 0); | ||||
2930 | } else if (isLogicOp(OperandOpcode)) | ||||
2931 | return computeLogicOpInGPR(Operand); | ||||
2932 | return SDValue(); | ||||
2933 | }; | ||||
2934 | LHS = getLogicOperand(LogicOp.getOperand(0)); | ||||
2935 | RHS = getLogicOperand(LogicOp.getOperand(1)); | ||||
2936 | |||||
2937 | // If a GPR sequence can't be produced for the LHS we can't proceed. | ||||
2938 | // Not producing a GPR sequence for the RHS is only a problem if this isn't | ||||
2939 | // a bitwise negation operation. | ||||
2940 | if (!LHS || (!RHS && !IsBitwiseNegation)) | ||||
2941 | return SDValue(); | ||||
2942 | |||||
2943 | NumLogicOpsOnComparison++; | ||||
2944 | |||||
2945 | // We will use the inputs as 64-bit values. | ||||
2946 | if (LHS.getValueType() == MVT::i32) | ||||
2947 | LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); | ||||
2948 | if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) | ||||
2949 | RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); | ||||
2950 | |||||
2951 | unsigned NewOpc; | ||||
2952 | switch (LogicOp.getOpcode()) { | ||||
2953 | default: llvm_unreachable("Unknown logic operation.")::llvm::llvm_unreachable_internal("Unknown logic operation.", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2953); | ||||
2954 | case ISD::AND: NewOpc = PPC::AND8; break; | ||||
2955 | case ISD::OR: NewOpc = PPC::OR8; break; | ||||
2956 | case ISD::XOR: NewOpc = PPC::XOR8; break; | ||||
2957 | } | ||||
2958 | |||||
2959 | if (IsBitwiseNegation) { | ||||
2960 | RHS = S->getI64Imm(1, dl); | ||||
2961 | NewOpc = PPC::XORI8; | ||||
2962 | } | ||||
2963 | |||||
2964 | return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); | ||||
2965 | |||||
2966 | } | ||||
2967 | |||||
2968 | /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. | ||||
2969 | /// Otherwise just reinterpret it as a 64-bit value. | ||||
2970 | /// Useful when emitting comparison code for 32-bit values without using | ||||
2971 | /// the compare instruction (which only considers the lower 32-bits). | ||||
2972 | SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { | ||||
2973 | assert(Input.getValueType() == MVT::i32 &&(static_cast <bool> (Input.getValueType() == MVT::i32 && "Can only sign-extend 32-bit values here.") ? void (0) : __assert_fail ("Input.getValueType() == MVT::i32 && \"Can only sign-extend 32-bit values here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2974, __extension__ __PRETTY_FUNCTION__)) | ||||
2974 | "Can only sign-extend 32-bit values here.")(static_cast <bool> (Input.getValueType() == MVT::i32 && "Can only sign-extend 32-bit values here.") ? void (0) : __assert_fail ("Input.getValueType() == MVT::i32 && \"Can only sign-extend 32-bit values here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 2974, __extension__ __PRETTY_FUNCTION__)); | ||||
2975 | unsigned Opc = Input.getOpcode(); | ||||
2976 | |||||
2977 | // The value was sign extended and then truncated to 32-bits. No need to | ||||
2978 | // sign extend it again. | ||||
2979 | if (Opc == ISD::TRUNCATE && | ||||
2980 | (Input.getOperand(0).getOpcode() == ISD::AssertSext || | ||||
2981 | Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) | ||||
2982 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
2983 | |||||
2984 | LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); | ||||
2985 | // The input is a sign-extending load. All ppc sign-extending loads | ||||
2986 | // sign-extend to the full 64-bits. | ||||
2987 | if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) | ||||
2988 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
2989 | |||||
2990 | ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); | ||||
2991 | // We don't sign-extend constants. | ||||
2992 | if (InputConst) | ||||
2993 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
2994 | |||||
2995 | SDLoc dl(Input); | ||||
2996 | SignExtensionsAdded++; | ||||
2997 | return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, | ||||
2998 | MVT::i64, Input), 0); | ||||
2999 | } | ||||
3000 | |||||
3001 | /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. | ||||
3002 | /// Otherwise just reinterpret it as a 64-bit value. | ||||
3003 | /// Useful when emitting comparison code for 32-bit values without using | ||||
3004 | /// the compare instruction (which only considers the lower 32-bits). | ||||
3005 | SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { | ||||
3006 | assert(Input.getValueType() == MVT::i32 &&(static_cast <bool> (Input.getValueType() == MVT::i32 && "Can only zero-extend 32-bit values here.") ? void (0) : __assert_fail ("Input.getValueType() == MVT::i32 && \"Can only zero-extend 32-bit values here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3007, __extension__ __PRETTY_FUNCTION__)) | ||||
3007 | "Can only zero-extend 32-bit values here.")(static_cast <bool> (Input.getValueType() == MVT::i32 && "Can only zero-extend 32-bit values here.") ? void (0) : __assert_fail ("Input.getValueType() == MVT::i32 && \"Can only zero-extend 32-bit values here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3007, __extension__ __PRETTY_FUNCTION__)); | ||||
3008 | unsigned Opc = Input.getOpcode(); | ||||
3009 | |||||
3010 | // The only condition under which we can omit the actual extend instruction: | ||||
3011 | // - The value is a positive constant | ||||
3012 | // - The value comes from a load that isn't a sign-extending load | ||||
3013 | // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. | ||||
3014 | bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && | ||||
3015 | (Input.getOperand(0).getOpcode() == ISD::AssertZext || | ||||
3016 | Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); | ||||
3017 | if (IsTruncateOfZExt) | ||||
3018 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
3019 | |||||
3020 | ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); | ||||
3021 | if (InputConst && InputConst->getSExtValue() >= 0) | ||||
3022 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
3023 | |||||
3024 | LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); | ||||
3025 | // The input is a load that doesn't sign-extend (it will be zero-extended). | ||||
3026 | if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) | ||||
3027 | return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); | ||||
3028 | |||||
3029 | // None of the above, need to zero-extend. | ||||
3030 | SDLoc dl(Input); | ||||
3031 | ZeroExtensionsAdded++; | ||||
3032 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, | ||||
3033 | S->getI64Imm(0, dl), | ||||
3034 | S->getI64Imm(32, dl)), 0); | ||||
3035 | } | ||||
3036 | |||||
3037 | // Handle a 32-bit value in a 64-bit register and vice-versa. These are of | ||||
3038 | // course not actual zero/sign extensions that will generate machine code, | ||||
3039 | // they're just a way to reinterpret a 32 bit value in a register as a | ||||
3040 | // 64 bit value and vice-versa. | ||||
3041 | SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, | ||||
3042 | ExtOrTruncConversion Conv) { | ||||
3043 | SDLoc dl(NatWidthRes); | ||||
3044 | |||||
3045 | // For reinterpreting 32-bit values as 64 bit values, we generate | ||||
3046 | // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> | ||||
3047 | if (Conv == ExtOrTruncConversion::Ext) { | ||||
3048 | SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); | ||||
3049 | SDValue SubRegIdx = | ||||
3050 | CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); | ||||
3051 | return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, | ||||
3052 | ImDef, NatWidthRes, SubRegIdx), 0); | ||||
3053 | } | ||||
3054 | |||||
3055 | assert(Conv == ExtOrTruncConversion::Trunc &&(static_cast <bool> (Conv == ExtOrTruncConversion::Trunc && "Unknown convertion between 32 and 64 bit values." ) ? void (0) : __assert_fail ("Conv == ExtOrTruncConversion::Trunc && \"Unknown convertion between 32 and 64 bit values.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3056, __extension__ __PRETTY_FUNCTION__)) | ||||
3056 | "Unknown convertion between 32 and 64 bit values.")(static_cast <bool> (Conv == ExtOrTruncConversion::Trunc && "Unknown convertion between 32 and 64 bit values." ) ? void (0) : __assert_fail ("Conv == ExtOrTruncConversion::Trunc && \"Unknown convertion between 32 and 64 bit values.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3056, __extension__ __PRETTY_FUNCTION__)); | ||||
3057 | // For reinterpreting 64-bit values as 32-bit values, we just need to | ||||
3058 | // EXTRACT_SUBREG (i.e. extract the low word). | ||||
3059 | SDValue SubRegIdx = | ||||
3060 | CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); | ||||
3061 | return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, | ||||
3062 | NatWidthRes, SubRegIdx), 0); | ||||
3063 | } | ||||
3064 | |||||
3065 | // Produce a GPR sequence for compound comparisons (<=, >=) against zero. | ||||
3066 | // Handle both zero-extensions and sign-extensions. | ||||
3067 | SDValue | ||||
3068 | IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, | ||||
3069 | ZeroCompare CmpTy) { | ||||
3070 | EVT InVT = LHS.getValueType(); | ||||
3071 | bool Is32Bit = InVT == MVT::i32; | ||||
3072 | SDValue ToExtend; | ||||
3073 | |||||
3074 | // Produce the value that needs to be either zero or sign extended. | ||||
3075 | switch (CmpTy) { | ||||
3076 | case ZeroCompare::GEZExt: | ||||
3077 | case ZeroCompare::GESExt: | ||||
3078 | ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, | ||||
3079 | dl, InVT, LHS, LHS), 0); | ||||
3080 | break; | ||||
3081 | case ZeroCompare::LEZExt: | ||||
3082 | case ZeroCompare::LESExt: { | ||||
3083 | if (Is32Bit) { | ||||
3084 | // Upper 32 bits cannot be undefined for this sequence. | ||||
3085 | LHS = signExtendInputIfNeeded(LHS); | ||||
3086 | SDValue Neg = | ||||
3087 | SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); | ||||
3088 | ToExtend = | ||||
3089 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3090 | Neg, S->getI64Imm(1, dl), | ||||
3091 | S->getI64Imm(63, dl)), 0); | ||||
3092 | } else { | ||||
3093 | SDValue Addi = | ||||
3094 | SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, | ||||
3095 | S->getI64Imm(~0ULL, dl)), 0); | ||||
3096 | ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, | ||||
3097 | Addi, LHS), 0); | ||||
3098 | } | ||||
3099 | break; | ||||
3100 | } | ||||
3101 | } | ||||
3102 | |||||
3103 | // For 64-bit sequences, the extensions are the same for the GE/LE cases. | ||||
3104 | if (!Is32Bit && | ||||
3105 | (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) | ||||
3106 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3107 | ToExtend, S->getI64Imm(1, dl), | ||||
3108 | S->getI64Imm(63, dl)), 0); | ||||
3109 | if (!Is32Bit && | ||||
3110 | (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) | ||||
3111 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, | ||||
3112 | S->getI64Imm(63, dl)), 0); | ||||
3113 | |||||
3114 | assert(Is32Bit && "Should have handled the 32-bit sequences above.")(static_cast <bool> (Is32Bit && "Should have handled the 32-bit sequences above." ) ? void (0) : __assert_fail ("Is32Bit && \"Should have handled the 32-bit sequences above.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3114, __extension__ __PRETTY_FUNCTION__)); | ||||
3115 | // For 32-bit sequences, the extensions differ between GE/LE cases. | ||||
3116 | switch (CmpTy) { | ||||
3117 | case ZeroCompare::GEZExt: { | ||||
3118 | SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), | ||||
3119 | S->getI32Imm(31, dl) }; | ||||
3120 | return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, | ||||
3121 | ShiftOps), 0); | ||||
3122 | } | ||||
3123 | case ZeroCompare::GESExt: | ||||
3124 | return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, | ||||
3125 | S->getI32Imm(31, dl)), 0); | ||||
3126 | case ZeroCompare::LEZExt: | ||||
3127 | return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, | ||||
3128 | S->getI32Imm(1, dl)), 0); | ||||
3129 | case ZeroCompare::LESExt: | ||||
3130 | return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, | ||||
3131 | S->getI32Imm(-1, dl)), 0); | ||||
3132 | } | ||||
3133 | |||||
3134 | // The above case covers all the enumerators so it can't have a default clause | ||||
3135 | // to avoid compiler warnings. | ||||
3136 | llvm_unreachable("Unknown zero-comparison type.")::llvm::llvm_unreachable_internal("Unknown zero-comparison type." , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3136); | ||||
3137 | } | ||||
3138 | |||||
3139 | /// Produces a zero-extended result of comparing two 32-bit values according to | ||||
3140 | /// the passed condition code. | ||||
3141 | SDValue | ||||
3142 | IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, | ||||
3143 | ISD::CondCode CC, | ||||
3144 | int64_t RHSValue, SDLoc dl) { | ||||
3145 | if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || | ||||
3146 | CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) | ||||
3147 | return SDValue(); | ||||
3148 | bool IsRHSZero = RHSValue == 0; | ||||
3149 | bool IsRHSOne = RHSValue == 1; | ||||
3150 | bool IsRHSNegOne = RHSValue == -1LL; | ||||
3151 | switch (CC) { | ||||
3152 | default: return SDValue(); | ||||
3153 | case ISD::SETEQ: { | ||||
3154 | // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) | ||||
3155 | // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) | ||||
3156 | SDValue Xor = IsRHSZero ? LHS : | ||||
3157 | SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); | ||||
3158 | SDValue Clz = | ||||
3159 | SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); | ||||
3160 | SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), | ||||
3161 | S->getI32Imm(31, dl) }; | ||||
3162 | return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, | ||||
3163 | ShiftOps), 0); | ||||
3164 | } | ||||
3165 | case ISD::SETNE: { | ||||
3166 | // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) | ||||
3167 | // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) | ||||
3168 | SDValue Xor = IsRHSZero ? LHS : | ||||
3169 | SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); | ||||
3170 | SDValue Clz = | ||||
3171 | SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); | ||||
3172 | SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), | ||||
3173 | S->getI32Imm(31, dl) }; | ||||
3174 | SDValue Shift = | ||||
3175 | SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); | ||||
3176 | return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, | ||||
3177 | S->getI32Imm(1, dl)), 0); | ||||
3178 | } | ||||
3179 | case ISD::SETGE: { | ||||
3180 | // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) | ||||
3181 | // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) | ||||
3182 | if(IsRHSZero) | ||||
3183 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); | ||||
3184 | |||||
3185 | // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) | ||||
3186 | // by swapping inputs and falling through. | ||||
3187 | std::swap(LHS, RHS); | ||||
3188 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3189 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3190 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3191 | } | ||||
3192 | case ISD::SETLE: { | ||||
3193 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3194 | return SDValue(); | ||||
3195 | // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) | ||||
3196 | // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) | ||||
3197 | if(IsRHSZero) { | ||||
3198 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3199 | return SDValue(); | ||||
3200 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); | ||||
3201 | } | ||||
3202 | |||||
3203 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3204 | LHS = signExtendInputIfNeeded(LHS); | ||||
3205 | RHS = signExtendInputIfNeeded(RHS); | ||||
3206 | SDValue Sub = | ||||
3207 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); | ||||
3208 | SDValue Shift = | ||||
3209 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, | ||||
3210 | S->getI64Imm(1, dl), S->getI64Imm(63, dl)), | ||||
3211 | 0); | ||||
3212 | return | ||||
3213 | SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, | ||||
3214 | MVT::i64, Shift, S->getI32Imm(1, dl)), 0); | ||||
3215 | } | ||||
3216 | case ISD::SETGT: { | ||||
3217 | // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) | ||||
3218 | // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) | ||||
3219 | // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) | ||||
3220 | // Handle SETLT -1 (which is equivalent to SETGE 0). | ||||
3221 | if (IsRHSNegOne) | ||||
3222 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); | ||||
3223 | |||||
3224 | if (IsRHSZero) { | ||||
3225 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3226 | return SDValue(); | ||||
3227 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3228 | LHS = signExtendInputIfNeeded(LHS); | ||||
3229 | RHS = signExtendInputIfNeeded(RHS); | ||||
3230 | SDValue Neg = | ||||
3231 | SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); | ||||
3232 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3233 | Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); | ||||
3234 | } | ||||
3235 | // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as | ||||
3236 | // (%b < %a) by swapping inputs and falling through. | ||||
3237 | std::swap(LHS, RHS); | ||||
3238 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3239 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3240 | IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; | ||||
3241 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3242 | } | ||||
3243 | case ISD::SETLT: { | ||||
3244 | // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) | ||||
3245 | // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) | ||||
3246 | // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) | ||||
3247 | // Handle SETLT 1 (which is equivalent to SETLE 0). | ||||
3248 | if (IsRHSOne) { | ||||
3249 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3250 | return SDValue(); | ||||
3251 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); | ||||
3252 | } | ||||
3253 | |||||
3254 | if (IsRHSZero) { | ||||
3255 | SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), | ||||
3256 | S->getI32Imm(31, dl) }; | ||||
3257 | return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, | ||||
3258 | ShiftOps), 0); | ||||
3259 | } | ||||
3260 | |||||
3261 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3262 | return SDValue(); | ||||
3263 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3264 | LHS = signExtendInputIfNeeded(LHS); | ||||
3265 | RHS = signExtendInputIfNeeded(RHS); | ||||
3266 | SDValue SUBFNode = | ||||
3267 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); | ||||
3268 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3269 | SUBFNode, S->getI64Imm(1, dl), | ||||
3270 | S->getI64Imm(63, dl)), 0); | ||||
3271 | } | ||||
3272 | case ISD::SETUGE: | ||||
3273 | // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) | ||||
3274 | // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) | ||||
3275 | std::swap(LHS, RHS); | ||||
3276 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3277 | case ISD::SETULE: { | ||||
3278 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3279 | return SDValue(); | ||||
3280 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3281 | LHS = zeroExtendInputIfNeeded(LHS); | ||||
3282 | RHS = zeroExtendInputIfNeeded(RHS); | ||||
3283 | SDValue Subtract = | ||||
3284 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); | ||||
3285 | SDValue SrdiNode = | ||||
3286 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3287 | Subtract, S->getI64Imm(1, dl), | ||||
3288 | S->getI64Imm(63, dl)), 0); | ||||
3289 | return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, | ||||
3290 | S->getI32Imm(1, dl)), 0); | ||||
3291 | } | ||||
3292 | case ISD::SETUGT: | ||||
3293 | // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) | ||||
3294 | // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) | ||||
3295 | std::swap(LHS, RHS); | ||||
3296 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3297 | case ISD::SETULT: { | ||||
3298 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3299 | return SDValue(); | ||||
3300 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3301 | LHS = zeroExtendInputIfNeeded(LHS); | ||||
3302 | RHS = zeroExtendInputIfNeeded(RHS); | ||||
3303 | SDValue Subtract = | ||||
3304 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); | ||||
3305 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3306 | Subtract, S->getI64Imm(1, dl), | ||||
3307 | S->getI64Imm(63, dl)), 0); | ||||
3308 | } | ||||
3309 | } | ||||
3310 | } | ||||
3311 | |||||
3312 | /// Produces a sign-extended result of comparing two 32-bit values according to | ||||
3313 | /// the passed condition code. | ||||
3314 | SDValue | ||||
3315 | IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, | ||||
3316 | ISD::CondCode CC, | ||||
3317 | int64_t RHSValue, SDLoc dl) { | ||||
3318 | if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || | ||||
3319 | CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) | ||||
3320 | return SDValue(); | ||||
3321 | bool IsRHSZero = RHSValue == 0; | ||||
3322 | bool IsRHSOne = RHSValue == 1; | ||||
3323 | bool IsRHSNegOne = RHSValue == -1LL; | ||||
3324 | |||||
3325 | switch (CC) { | ||||
3326 | default: return SDValue(); | ||||
3327 | case ISD::SETEQ: { | ||||
3328 | // (sext (setcc %a, %b, seteq)) -> | ||||
3329 | // (ashr (shl (ctlz (xor %a, %b)), 58), 63) | ||||
3330 | // (sext (setcc %a, 0, seteq)) -> | ||||
3331 | // (ashr (shl (ctlz %a), 58), 63) | ||||
3332 | SDValue CountInput = IsRHSZero ? LHS : | ||||
3333 | SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); | ||||
3334 | SDValue Cntlzw = | ||||
3335 | SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); | ||||
3336 | SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), | ||||
3337 | S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; | ||||
3338 | SDValue Slwi = | ||||
3339 | SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); | ||||
3340 | return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); | ||||
3341 | } | ||||
3342 | case ISD::SETNE: { | ||||
3343 | // Bitwise xor the operands, count leading zeros, shift right by 5 bits and | ||||
3344 | // flip the bit, finally take 2's complement. | ||||
3345 | // (sext (setcc %a, %b, setne)) -> | ||||
3346 | // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) | ||||
3347 | // Same as above, but the first xor is not needed. | ||||
3348 | // (sext (setcc %a, 0, setne)) -> | ||||
3349 | // (neg (xor (lshr (ctlz %a), 5), 1)) | ||||
3350 | SDValue Xor = IsRHSZero ? LHS : | ||||
3351 | SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); | ||||
3352 | SDValue Clz = | ||||
3353 | SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); | ||||
3354 | SDValue ShiftOps[] = | ||||
3355 | { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; | ||||
3356 | SDValue Shift = | ||||
3357 | SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); | ||||
3358 | SDValue Xori = | ||||
3359 | SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, | ||||
3360 | S->getI32Imm(1, dl)), 0); | ||||
3361 | return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); | ||||
3362 | } | ||||
3363 | case ISD::SETGE: { | ||||
3364 | // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) | ||||
3365 | // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) | ||||
3366 | if (IsRHSZero) | ||||
3367 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); | ||||
3368 | |||||
3369 | // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) | ||||
3370 | // by swapping inputs and falling through. | ||||
3371 | std::swap(LHS, RHS); | ||||
3372 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3373 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3374 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3375 | } | ||||
3376 | case ISD::SETLE: { | ||||
3377 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3378 | return SDValue(); | ||||
3379 | // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) | ||||
3380 | // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) | ||||
3381 | if (IsRHSZero) | ||||
3382 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); | ||||
3383 | |||||
3384 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3385 | LHS = signExtendInputIfNeeded(LHS); | ||||
3386 | RHS = signExtendInputIfNeeded(RHS); | ||||
3387 | SDValue SUBFNode = | ||||
3388 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, | ||||
3389 | LHS, RHS), 0); | ||||
3390 | SDValue Srdi = | ||||
3391 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3392 | SUBFNode, S->getI64Imm(1, dl), | ||||
3393 | S->getI64Imm(63, dl)), 0); | ||||
3394 | return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, | ||||
3395 | S->getI32Imm(-1, dl)), 0); | ||||
3396 | } | ||||
3397 | case ISD::SETGT: { | ||||
3398 | // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) | ||||
3399 | // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) | ||||
3400 | // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) | ||||
3401 | if (IsRHSNegOne) | ||||
3402 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); | ||||
3403 | if (IsRHSZero) { | ||||
3404 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3405 | return SDValue(); | ||||
3406 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3407 | LHS = signExtendInputIfNeeded(LHS); | ||||
3408 | RHS = signExtendInputIfNeeded(RHS); | ||||
3409 | SDValue Neg = | ||||
3410 | SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); | ||||
3411 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, | ||||
3412 | S->getI64Imm(63, dl)), 0); | ||||
3413 | } | ||||
3414 | // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as | ||||
3415 | // (%b < %a) by swapping inputs and falling through. | ||||
3416 | std::swap(LHS, RHS); | ||||
3417 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3418 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3419 | IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; | ||||
3420 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3421 | } | ||||
3422 | case ISD::SETLT: { | ||||
3423 | // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) | ||||
3424 | // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) | ||||
3425 | // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) | ||||
3426 | if (IsRHSOne) { | ||||
3427 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3428 | return SDValue(); | ||||
3429 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); | ||||
3430 | } | ||||
3431 | if (IsRHSZero) | ||||
3432 | return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, | ||||
3433 | S->getI32Imm(31, dl)), 0); | ||||
3434 | |||||
3435 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3436 | return SDValue(); | ||||
3437 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3438 | LHS = signExtendInputIfNeeded(LHS); | ||||
3439 | RHS = signExtendInputIfNeeded(RHS); | ||||
3440 | SDValue SUBFNode = | ||||
3441 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); | ||||
3442 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, | ||||
3443 | SUBFNode, S->getI64Imm(63, dl)), 0); | ||||
3444 | } | ||||
3445 | case ISD::SETUGE: | ||||
3446 | // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) | ||||
3447 | // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) | ||||
3448 | std::swap(LHS, RHS); | ||||
3449 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3450 | case ISD::SETULE: { | ||||
3451 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3452 | return SDValue(); | ||||
3453 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3454 | LHS = zeroExtendInputIfNeeded(LHS); | ||||
3455 | RHS = zeroExtendInputIfNeeded(RHS); | ||||
3456 | SDValue Subtract = | ||||
3457 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); | ||||
3458 | SDValue Shift = | ||||
3459 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, | ||||
3460 | S->getI32Imm(1, dl), S->getI32Imm(63,dl)), | ||||
3461 | 0); | ||||
3462 | return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, | ||||
3463 | S->getI32Imm(-1, dl)), 0); | ||||
3464 | } | ||||
3465 | case ISD::SETUGT: | ||||
3466 | // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) | ||||
3467 | // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) | ||||
3468 | std::swap(LHS, RHS); | ||||
3469 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3470 | case ISD::SETULT: { | ||||
3471 | if (CmpInGPR == ICGPR_NonExtIn) | ||||
3472 | return SDValue(); | ||||
3473 | // The upper 32-bits of the register can't be undefined for this sequence. | ||||
3474 | LHS = zeroExtendInputIfNeeded(LHS); | ||||
3475 | RHS = zeroExtendInputIfNeeded(RHS); | ||||
3476 | SDValue Subtract = | ||||
3477 | SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); | ||||
3478 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, | ||||
3479 | Subtract, S->getI64Imm(63, dl)), 0); | ||||
3480 | } | ||||
3481 | } | ||||
3482 | } | ||||
3483 | |||||
3484 | /// Produces a zero-extended result of comparing two 64-bit values according to | ||||
3485 | /// the passed condition code. | ||||
3486 | SDValue | ||||
3487 | IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, | ||||
3488 | ISD::CondCode CC, | ||||
3489 | int64_t RHSValue, SDLoc dl) { | ||||
3490 | if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || | ||||
3491 | CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) | ||||
3492 | return SDValue(); | ||||
3493 | bool IsRHSZero = RHSValue == 0; | ||||
3494 | bool IsRHSOne = RHSValue == 1; | ||||
3495 | bool IsRHSNegOne = RHSValue == -1LL; | ||||
3496 | switch (CC) { | ||||
3497 | default: return SDValue(); | ||||
3498 | case ISD::SETEQ: { | ||||
3499 | // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) | ||||
3500 | // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) | ||||
3501 | SDValue Xor = IsRHSZero ? LHS : | ||||
3502 | SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); | ||||
3503 | SDValue Clz = | ||||
3504 | SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); | ||||
3505 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, | ||||
3506 | S->getI64Imm(58, dl), | ||||
3507 | S->getI64Imm(63, dl)), 0); | ||||
3508 | } | ||||
3509 | case ISD::SETNE: { | ||||
3510 | // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) | ||||
3511 | // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) | ||||
3512 | // {addcz.reg, addcz.CA} = (addcarry %a, -1) | ||||
3513 | // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) | ||||
3514 | SDValue Xor = IsRHSZero ? LHS : | ||||
3515 | SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); | ||||
3516 | SDValue AC = | ||||
3517 | SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, | ||||
3518 | Xor, S->getI32Imm(~0U, dl)), 0); | ||||
3519 | return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, | ||||
3520 | Xor, AC.getValue(1)), 0); | ||||
3521 | } | ||||
3522 | case ISD::SETGE: { | ||||
3523 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3524 | // (zext (setcc %a, %b, setge)) -> | ||||
3525 | // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) | ||||
3526 | // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) | ||||
3527 | if (IsRHSZero) | ||||
3528 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); | ||||
3529 | std::swap(LHS, RHS); | ||||
3530 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3531 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3532 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3533 | } | ||||
3534 | case ISD::SETLE: { | ||||
3535 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3536 | // (zext (setcc %a, %b, setge)) -> | ||||
3537 | // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) | ||||
3538 | // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) | ||||
3539 | if (IsRHSZero) | ||||
3540 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); | ||||
3541 | SDValue ShiftL = | ||||
3542 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, | ||||
3543 | S->getI64Imm(1, dl), | ||||
3544 | S->getI64Imm(63, dl)), 0); | ||||
3545 | SDValue ShiftR = | ||||
3546 | SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, | ||||
3547 | S->getI64Imm(63, dl)), 0); | ||||
3548 | SDValue SubtractCarry = | ||||
3549 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3550 | LHS, RHS), 1); | ||||
3551 | return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, | ||||
3552 | ShiftR, ShiftL, SubtractCarry), 0); | ||||
3553 | } | ||||
3554 | case ISD::SETGT: { | ||||
3555 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3556 | // (zext (setcc %a, %b, setgt)) -> | ||||
3557 | // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) | ||||
3558 | // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) | ||||
3559 | if (IsRHSNegOne) | ||||
3560 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); | ||||
3561 | if (IsRHSZero) { | ||||
3562 | SDValue Addi = | ||||
3563 | SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, | ||||
3564 | S->getI64Imm(~0ULL, dl)), 0); | ||||
3565 | SDValue Nor = | ||||
3566 | SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); | ||||
3567 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, | ||||
3568 | S->getI64Imm(1, dl), | ||||
3569 | S->getI64Imm(63, dl)), 0); | ||||
3570 | } | ||||
3571 | std::swap(LHS, RHS); | ||||
3572 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3573 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3574 | IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; | ||||
3575 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3576 | } | ||||
3577 | case ISD::SETLT: { | ||||
3578 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3579 | // (zext (setcc %a, %b, setlt)) -> | ||||
3580 | // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) | ||||
3581 | // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) | ||||
3582 | if (IsRHSOne) | ||||
3583 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); | ||||
3584 | if (IsRHSZero) | ||||
3585 | return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, | ||||
3586 | S->getI64Imm(1, dl), | ||||
3587 | S->getI64Imm(63, dl)), 0); | ||||
3588 | SDValue SRADINode = | ||||
3589 | SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, | ||||
3590 | LHS, S->getI64Imm(63, dl)), 0); | ||||
3591 | SDValue SRDINode = | ||||
3592 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3593 | RHS, S->getI64Imm(1, dl), | ||||
3594 | S->getI64Imm(63, dl)), 0); | ||||
3595 | SDValue SUBFC8Carry = | ||||
3596 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3597 | RHS, LHS), 1); | ||||
3598 | SDValue ADDE8Node = | ||||
3599 | SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, | ||||
3600 | SRDINode, SRADINode, SUBFC8Carry), 0); | ||||
3601 | return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, | ||||
3602 | ADDE8Node, S->getI64Imm(1, dl)), 0); | ||||
3603 | } | ||||
3604 | case ISD::SETUGE: | ||||
3605 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3606 | // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) | ||||
3607 | std::swap(LHS, RHS); | ||||
3608 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3609 | case ISD::SETULE: { | ||||
3610 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3611 | // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) | ||||
3612 | SDValue SUBFC8Carry = | ||||
3613 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3614 | LHS, RHS), 1); | ||||
3615 | SDValue SUBFE8Node = | ||||
3616 | SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, | ||||
3617 | LHS, LHS, SUBFC8Carry), 0); | ||||
3618 | return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, | ||||
3619 | SUBFE8Node, S->getI64Imm(1, dl)), 0); | ||||
3620 | } | ||||
3621 | case ISD::SETUGT: | ||||
3622 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3623 | // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) | ||||
3624 | std::swap(LHS, RHS); | ||||
3625 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3626 | case ISD::SETULT: { | ||||
3627 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3628 | // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) | ||||
3629 | SDValue SubtractCarry = | ||||
3630 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3631 | RHS, LHS), 1); | ||||
3632 | SDValue ExtSub = | ||||
3633 | SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, | ||||
3634 | LHS, LHS, SubtractCarry), 0); | ||||
3635 | return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, | ||||
3636 | ExtSub), 0); | ||||
3637 | } | ||||
3638 | } | ||||
3639 | } | ||||
3640 | |||||
3641 | /// Produces a sign-extended result of comparing two 64-bit values according to | ||||
3642 | /// the passed condition code. | ||||
3643 | SDValue | ||||
3644 | IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, | ||||
3645 | ISD::CondCode CC, | ||||
3646 | int64_t RHSValue, SDLoc dl) { | ||||
3647 | if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || | ||||
3648 | CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) | ||||
3649 | return SDValue(); | ||||
3650 | bool IsRHSZero = RHSValue == 0; | ||||
3651 | bool IsRHSOne = RHSValue == 1; | ||||
3652 | bool IsRHSNegOne = RHSValue == -1LL; | ||||
3653 | switch (CC) { | ||||
3654 | default: return SDValue(); | ||||
3655 | case ISD::SETEQ: { | ||||
3656 | // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) | ||||
3657 | // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) | ||||
3658 | // {addcz.reg, addcz.CA} = (addcarry %a, -1) | ||||
3659 | // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) | ||||
3660 | SDValue AddInput = IsRHSZero ? LHS : | ||||
3661 | SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); | ||||
3662 | SDValue Addic = | ||||
3663 | SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, | ||||
3664 | AddInput, S->getI32Imm(~0U, dl)), 0); | ||||
3665 | return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, | ||||
3666 | Addic, Addic.getValue(1)), 0); | ||||
3667 | } | ||||
3668 | case ISD::SETNE: { | ||||
3669 | // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) | ||||
3670 | // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) | ||||
3671 | // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) | ||||
3672 | // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) | ||||
3673 | SDValue Xor = IsRHSZero ? LHS : | ||||
3674 | SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); | ||||
3675 | SDValue SC = | ||||
3676 | SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, | ||||
3677 | Xor, S->getI32Imm(0, dl)), 0); | ||||
3678 | return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, | ||||
3679 | SC, SC.getValue(1)), 0); | ||||
3680 | } | ||||
3681 | case ISD::SETGE: { | ||||
3682 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3683 | // (zext (setcc %a, %b, setge)) -> | ||||
3684 | // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) | ||||
3685 | // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) | ||||
3686 | if (IsRHSZero) | ||||
3687 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); | ||||
3688 | std::swap(LHS, RHS); | ||||
3689 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3690 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3691 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3692 | } | ||||
3693 | case ISD::SETLE: { | ||||
3694 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3695 | // (zext (setcc %a, %b, setge)) -> | ||||
3696 | // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) | ||||
3697 | // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) | ||||
3698 | if (IsRHSZero) | ||||
3699 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); | ||||
3700 | SDValue ShiftR = | ||||
3701 | SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, | ||||
3702 | S->getI64Imm(63, dl)), 0); | ||||
3703 | SDValue ShiftL = | ||||
3704 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, | ||||
3705 | S->getI64Imm(1, dl), | ||||
3706 | S->getI64Imm(63, dl)), 0); | ||||
3707 | SDValue SubtractCarry = | ||||
3708 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3709 | LHS, RHS), 1); | ||||
3710 | SDValue Adde = | ||||
3711 | SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, | ||||
3712 | ShiftR, ShiftL, SubtractCarry), 0); | ||||
3713 | return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); | ||||
3714 | } | ||||
3715 | case ISD::SETGT: { | ||||
3716 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3717 | // (zext (setcc %a, %b, setgt)) -> | ||||
3718 | // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) | ||||
3719 | // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) | ||||
3720 | if (IsRHSNegOne) | ||||
3721 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); | ||||
3722 | if (IsRHSZero) { | ||||
3723 | SDValue Add = | ||||
3724 | SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, | ||||
3725 | S->getI64Imm(-1, dl)), 0); | ||||
3726 | SDValue Nor = | ||||
3727 | SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); | ||||
3728 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, | ||||
3729 | S->getI64Imm(63, dl)), 0); | ||||
3730 | } | ||||
3731 | std::swap(LHS, RHS); | ||||
3732 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3733 | IsRHSZero = RHSConst && RHSConst->isNullValue(); | ||||
3734 | IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; | ||||
3735 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3736 | } | ||||
3737 | case ISD::SETLT: { | ||||
3738 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3739 | // (zext (setcc %a, %b, setlt)) -> | ||||
3740 | // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) | ||||
3741 | // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) | ||||
3742 | if (IsRHSOne) | ||||
3743 | return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); | ||||
3744 | if (IsRHSZero) { | ||||
3745 | return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, | ||||
3746 | S->getI64Imm(63, dl)), 0); | ||||
3747 | } | ||||
3748 | SDValue SRADINode = | ||||
3749 | SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, | ||||
3750 | LHS, S->getI64Imm(63, dl)), 0); | ||||
3751 | SDValue SRDINode = | ||||
3752 | SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, | ||||
3753 | RHS, S->getI64Imm(1, dl), | ||||
3754 | S->getI64Imm(63, dl)), 0); | ||||
3755 | SDValue SUBFC8Carry = | ||||
3756 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3757 | RHS, LHS), 1); | ||||
3758 | SDValue ADDE8Node = | ||||
3759 | SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, | ||||
3760 | SRDINode, SRADINode, SUBFC8Carry), 0); | ||||
3761 | SDValue XORI8Node = | ||||
3762 | SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, | ||||
3763 | ADDE8Node, S->getI64Imm(1, dl)), 0); | ||||
3764 | return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, | ||||
3765 | XORI8Node), 0); | ||||
3766 | } | ||||
3767 | case ISD::SETUGE: | ||||
3768 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3769 | // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) | ||||
3770 | std::swap(LHS, RHS); | ||||
3771 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3772 | case ISD::SETULE: { | ||||
3773 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3774 | // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) | ||||
3775 | SDValue SubtractCarry = | ||||
3776 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3777 | LHS, RHS), 1); | ||||
3778 | SDValue ExtSub = | ||||
3779 | SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, | ||||
3780 | LHS, SubtractCarry), 0); | ||||
3781 | return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, | ||||
3782 | ExtSub, ExtSub), 0); | ||||
3783 | } | ||||
3784 | case ISD::SETUGT: | ||||
3785 | // {subc.reg, subc.CA} = (subcarry %b, %a) | ||||
3786 | // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) | ||||
3787 | std::swap(LHS, RHS); | ||||
3788 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
3789 | case ISD::SETULT: { | ||||
3790 | // {subc.reg, subc.CA} = (subcarry %a, %b) | ||||
3791 | // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) | ||||
3792 | SDValue SubCarry = | ||||
3793 | SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, | ||||
3794 | RHS, LHS), 1); | ||||
3795 | return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, | ||||
3796 | LHS, LHS, SubCarry), 0); | ||||
3797 | } | ||||
3798 | } | ||||
3799 | } | ||||
3800 | |||||
3801 | /// Do all uses of this SDValue need the result in a GPR? | ||||
3802 | /// This is meant to be used on values that have type i1 since | ||||
3803 | /// it is somewhat meaningless to ask if values of other types | ||||
3804 | /// should be kept in GPR's. | ||||
3805 | static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { | ||||
3806 | assert(Compare.getOpcode() == ISD::SETCC &&(static_cast <bool> (Compare.getOpcode() == ISD::SETCC && "An ISD::SETCC node required here.") ? void (0) : __assert_fail ("Compare.getOpcode() == ISD::SETCC && \"An ISD::SETCC node required here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3807, __extension__ __PRETTY_FUNCTION__)) | ||||
3807 | "An ISD::SETCC node required here.")(static_cast <bool> (Compare.getOpcode() == ISD::SETCC && "An ISD::SETCC node required here.") ? void (0) : __assert_fail ("Compare.getOpcode() == ISD::SETCC && \"An ISD::SETCC node required here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3807, __extension__ __PRETTY_FUNCTION__)); | ||||
3808 | |||||
3809 | // For values that have a single use, the caller should obviously already have | ||||
3810 | // checked if that use is an extending use. We check the other uses here. | ||||
3811 | if (Compare.hasOneUse()) | ||||
3812 | return true; | ||||
3813 | // We want the value in a GPR if it is being extended, used for a select, or | ||||
3814 | // used in logical operations. | ||||
3815 | for (auto CompareUse : Compare.getNode()->uses()) | ||||
3816 | if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && | ||||
3817 | CompareUse->getOpcode() != ISD::ZERO_EXTEND && | ||||
3818 | CompareUse->getOpcode() != ISD::SELECT && | ||||
3819 | !isLogicOp(CompareUse->getOpcode())) { | ||||
3820 | OmittedForNonExtendUses++; | ||||
3821 | return false; | ||||
3822 | } | ||||
3823 | return true; | ||||
3824 | } | ||||
3825 | |||||
3826 | /// Returns an equivalent of a SETCC node but with the result the same width as | ||||
3827 | /// the inputs. This can also be used for SELECT_CC if either the true or false | ||||
3828 | /// values is a power of two while the other is zero. | ||||
3829 | SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, | ||||
3830 | SetccInGPROpts ConvOpts) { | ||||
3831 | assert((Compare.getOpcode() == ISD::SETCC ||(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here." ) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3833, __extension__ __PRETTY_FUNCTION__)) | ||||
3832 | Compare.getOpcode() == ISD::SELECT_CC) &&(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here." ) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3833, __extension__ __PRETTY_FUNCTION__)) | ||||
3833 | "An ISD::SETCC node required here.")(static_cast <bool> ((Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here." ) ? void (0) : __assert_fail ("(Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && \"An ISD::SETCC node required here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 3833, __extension__ __PRETTY_FUNCTION__)); | ||||
3834 | |||||
3835 | // Don't convert this comparison to a GPR sequence because there are uses | ||||
3836 | // of the i1 result (i.e. uses that require the result in the CR). | ||||
3837 | if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) | ||||
3838 | return SDValue(); | ||||
3839 | |||||
3840 | SDValue LHS = Compare.getOperand(0); | ||||
3841 | SDValue RHS = Compare.getOperand(1); | ||||
3842 | |||||
3843 | // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. | ||||
3844 | int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; | ||||
3845 | ISD::CondCode CC = | ||||
3846 | cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); | ||||
3847 | EVT InputVT = LHS.getValueType(); | ||||
3848 | if (InputVT != MVT::i32 && InputVT != MVT::i64) | ||||
3849 | return SDValue(); | ||||
3850 | |||||
3851 | if (ConvOpts == SetccInGPROpts::ZExtInvert || | ||||
3852 | ConvOpts == SetccInGPROpts::SExtInvert) | ||||
3853 | CC = ISD::getSetCCInverse(CC, InputVT); | ||||
3854 | |||||
3855 | bool Inputs32Bit = InputVT == MVT::i32; | ||||
3856 | |||||
3857 | SDLoc dl(Compare); | ||||
3858 | ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); | ||||
3859 | int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX(9223372036854775807L); | ||||
3860 | bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || | ||||
3861 | ConvOpts == SetccInGPROpts::SExtInvert; | ||||
3862 | |||||
3863 | if (IsSext && Inputs32Bit) | ||||
3864 | return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); | ||||
3865 | else if (Inputs32Bit) | ||||
3866 | return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); | ||||
3867 | else if (IsSext) | ||||
3868 | return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); | ||||
3869 | return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); | ||||
3870 | } | ||||
3871 | |||||
3872 | } // end anonymous namespace | ||||
3873 | |||||
3874 | bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { | ||||
3875 | if (N->getValueType(0) != MVT::i32 && | ||||
3876 | N->getValueType(0) != MVT::i64) | ||||
3877 | return false; | ||||
3878 | |||||
3879 | // This optimization will emit code that assumes 64-bit registers | ||||
3880 | // so we don't want to run it in 32-bit mode. Also don't run it | ||||
3881 | // on functions that are not to be optimized. | ||||
3882 | if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) | ||||
3883 | return false; | ||||
3884 | |||||
3885 | // For POWER10, it is more profitable to use the set boolean extension | ||||
3886 | // instructions rather than the integer compare elimination codegen. | ||||
3887 | // Users can override this via the command line option, `--ppc-gpr-icmps`. | ||||
3888 | if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1()) | ||||
3889 | return false; | ||||
3890 | |||||
3891 | switch (N->getOpcode()) { | ||||
3892 | default: break; | ||||
3893 | case ISD::ZERO_EXTEND: | ||||
3894 | case ISD::SIGN_EXTEND: | ||||
3895 | case ISD::AND: | ||||
3896 | case ISD::OR: | ||||
3897 | case ISD::XOR: { | ||||
3898 | IntegerCompareEliminator ICmpElim(CurDAG, this); | ||||
3899 | if (SDNode *New = ICmpElim.Select(N)) { | ||||
3900 | ReplaceNode(N, New); | ||||
3901 | return true; | ||||
3902 | } | ||||
3903 | } | ||||
3904 | } | ||||
3905 | return false; | ||||
3906 | } | ||||
3907 | |||||
3908 | bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { | ||||
3909 | if (N->getValueType(0) != MVT::i32 && | ||||
3910 | N->getValueType(0) != MVT::i64) | ||||
3911 | return false; | ||||
3912 | |||||
3913 | if (!UseBitPermRewriter) | ||||
3914 | return false; | ||||
3915 | |||||
3916 | switch (N->getOpcode()) { | ||||
3917 | default: break; | ||||
3918 | case ISD::ROTL: | ||||
3919 | case ISD::SHL: | ||||
3920 | case ISD::SRL: | ||||
3921 | case ISD::AND: | ||||
3922 | case ISD::OR: { | ||||
3923 | BitPermutationSelector BPS(CurDAG); | ||||
3924 | if (SDNode *New = BPS.Select(N)) { | ||||
3925 | ReplaceNode(N, New); | ||||
3926 | return true; | ||||
3927 | } | ||||
3928 | return false; | ||||
3929 | } | ||||
3930 | } | ||||
3931 | |||||
3932 | return false; | ||||
3933 | } | ||||
3934 | |||||
3935 | /// SelectCC - Select a comparison of the specified values with the specified | ||||
3936 | /// condition code, returning the CR# of the expression. | ||||
3937 | SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
3938 | const SDLoc &dl, SDValue Chain) { | ||||
3939 | // Always select the LHS. | ||||
3940 | unsigned Opc; | ||||
3941 | |||||
3942 | if (LHS.getValueType() == MVT::i32) { | ||||
3943 | unsigned Imm; | ||||
3944 | if (CC == ISD::SETEQ || CC == ISD::SETNE) { | ||||
3945 | if (isInt32Immediate(RHS, Imm)) { | ||||
3946 | // SETEQ/SETNE comparison with 16-bit immediate, fold it. | ||||
3947 | if (isUInt<16>(Imm)) | ||||
3948 | return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, | ||||
3949 | getI32Imm(Imm & 0xFFFF, dl)), | ||||
3950 | 0); | ||||
3951 | // If this is a 16-bit signed immediate, fold it. | ||||
3952 | if (isInt<16>((int)Imm)) | ||||
3953 | return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, | ||||
3954 | getI32Imm(Imm & 0xFFFF, dl)), | ||||
3955 | 0); | ||||
3956 | |||||
3957 | // For non-equality comparisons, the default code would materialize the | ||||
3958 | // constant, then compare against it, like this: | ||||
3959 | // lis r2, 4660 | ||||
3960 | // ori r2, r2, 22136 | ||||
3961 | // cmpw cr0, r3, r2 | ||||
3962 | // Since we are just comparing for equality, we can emit this instead: | ||||
3963 | // xoris r0,r3,0x1234 | ||||
3964 | // cmplwi cr0,r0,0x5678 | ||||
3965 | // beq cr0,L6 | ||||
3966 | SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, | ||||
3967 | getI32Imm(Imm >> 16, dl)), 0); | ||||
3968 | return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, | ||||
3969 | getI32Imm(Imm & 0xFFFF, dl)), 0); | ||||
3970 | } | ||||
3971 | Opc = PPC::CMPLW; | ||||
3972 | } else if (ISD::isUnsignedIntSetCC(CC)) { | ||||
3973 | if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) | ||||
3974 | return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, | ||||
3975 | getI32Imm(Imm & 0xFFFF, dl)), 0); | ||||
3976 | Opc = PPC::CMPLW; | ||||
3977 | } else { | ||||
3978 | int16_t SImm; | ||||
3979 | if (isIntS16Immediate(RHS, SImm)) | ||||
3980 | return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, | ||||
3981 | getI32Imm((int)SImm & 0xFFFF, | ||||
3982 | dl)), | ||||
3983 | 0); | ||||
3984 | Opc = PPC::CMPW; | ||||
3985 | } | ||||
3986 | } else if (LHS.getValueType() == MVT::i64) { | ||||
3987 | uint64_t Imm; | ||||
3988 | if (CC == ISD::SETEQ || CC == ISD::SETNE) { | ||||
3989 | if (isInt64Immediate(RHS.getNode(), Imm)) { | ||||
3990 | // SETEQ/SETNE comparison with 16-bit immediate, fold it. | ||||
3991 | if (isUInt<16>(Imm)) | ||||
3992 | return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, | ||||
3993 | getI32Imm(Imm & 0xFFFF, dl)), | ||||
3994 | 0); | ||||
3995 | // If this is a 16-bit signed immediate, fold it. | ||||
3996 | if (isInt<16>(Imm)) | ||||
3997 | return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, | ||||
3998 | getI32Imm(Imm & 0xFFFF, dl)), | ||||
3999 | 0); | ||||
4000 | |||||
4001 | // For non-equality comparisons, the default code would materialize the | ||||
4002 | // constant, then compare against it, like this: | ||||
4003 | // lis r2, 4660 | ||||
4004 | // ori r2, r2, 22136 | ||||
4005 | // cmpd cr0, r3, r2 | ||||
4006 | // Since we are just comparing for equality, we can emit this instead: | ||||
4007 | // xoris r0,r3,0x1234 | ||||
4008 | // cmpldi cr0,r0,0x5678 | ||||
4009 | // beq cr0,L6 | ||||
4010 | if (isUInt<32>(Imm)) { | ||||
4011 | SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, | ||||
4012 | getI64Imm(Imm >> 16, dl)), 0); | ||||
4013 | return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, | ||||
4014 | getI64Imm(Imm & 0xFFFF, dl)), | ||||
4015 | 0); | ||||
4016 | } | ||||
4017 | } | ||||
4018 | Opc = PPC::CMPLD; | ||||
4019 | } else if (ISD::isUnsignedIntSetCC(CC)) { | ||||
4020 | if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) | ||||
4021 | return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, | ||||
4022 | getI64Imm(Imm & 0xFFFF, dl)), 0); | ||||
4023 | Opc = PPC::CMPLD; | ||||
4024 | } else { | ||||
4025 | int16_t SImm; | ||||
4026 | if (isIntS16Immediate(RHS, SImm)) | ||||
4027 | return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, | ||||
4028 | getI64Imm(SImm & 0xFFFF, dl)), | ||||
4029 | 0); | ||||
4030 | Opc = PPC::CMPD; | ||||
4031 | } | ||||
4032 | } else if (LHS.getValueType() == MVT::f32) { | ||||
4033 | if (Subtarget->hasSPE()) { | ||||
4034 | switch (CC) { | ||||
4035 | default: | ||||
4036 | case ISD::SETEQ: | ||||
4037 | case ISD::SETNE: | ||||
4038 | Opc = PPC::EFSCMPEQ; | ||||
4039 | break; | ||||
4040 | case ISD::SETLT: | ||||
4041 | case ISD::SETGE: | ||||
4042 | case ISD::SETOLT: | ||||
4043 | case ISD::SETOGE: | ||||
4044 | case ISD::SETULT: | ||||
4045 | case ISD::SETUGE: | ||||
4046 | Opc = PPC::EFSCMPLT; | ||||
4047 | break; | ||||
4048 | case ISD::SETGT: | ||||
4049 | case ISD::SETLE: | ||||
4050 | case ISD::SETOGT: | ||||
4051 | case ISD::SETOLE: | ||||
4052 | case ISD::SETUGT: | ||||
4053 | case ISD::SETULE: | ||||
4054 | Opc = PPC::EFSCMPGT; | ||||
4055 | break; | ||||
4056 | } | ||||
4057 | } else | ||||
4058 | Opc = PPC::FCMPUS; | ||||
4059 | } else if (LHS.getValueType() == MVT::f64) { | ||||
4060 | if (Subtarget->hasSPE()) { | ||||
4061 | switch (CC) { | ||||
4062 | default: | ||||
4063 | case ISD::SETEQ: | ||||
4064 | case ISD::SETNE: | ||||
4065 | Opc = PPC::EFDCMPEQ; | ||||
4066 | break; | ||||
4067 | case ISD::SETLT: | ||||
4068 | case ISD::SETGE: | ||||
4069 | case ISD::SETOLT: | ||||
4070 | case ISD::SETOGE: | ||||
4071 | case ISD::SETULT: | ||||
4072 | case ISD::SETUGE: | ||||
4073 | Opc = PPC::EFDCMPLT; | ||||
4074 | break; | ||||
4075 | case ISD::SETGT: | ||||
4076 | case ISD::SETLE: | ||||
4077 | case ISD::SETOGT: | ||||
4078 | case ISD::SETOLE: | ||||
4079 | case ISD::SETUGT: | ||||
4080 | case ISD::SETULE: | ||||
4081 | Opc = PPC::EFDCMPGT; | ||||
4082 | break; | ||||
4083 | } | ||||
4084 | } else | ||||
4085 | Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; | ||||
4086 | } else { | ||||
4087 | assert(LHS.getValueType() == MVT::f128 && "Unknown vt!")(static_cast <bool> (LHS.getValueType() == MVT::f128 && "Unknown vt!") ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f128 && \"Unknown vt!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4087, __extension__ __PRETTY_FUNCTION__)); | ||||
4088 | assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector")(static_cast <bool> (Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector") ? void (0) : __assert_fail ("Subtarget->hasP9Vector() && \"XSCMPUQP requires Power9 Vector\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4088, __extension__ __PRETTY_FUNCTION__)); | ||||
4089 | Opc = PPC::XSCMPUQP; | ||||
4090 | } | ||||
4091 | if (Chain) | ||||
4092 | return SDValue( | ||||
4093 | CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain), | ||||
4094 | 0); | ||||
4095 | else | ||||
4096 | return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); | ||||
4097 | } | ||||
4098 | |||||
4099 | static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, | ||||
4100 | const PPCSubtarget *Subtarget) { | ||||
4101 | // For SPE instructions, the result is in GT bit of the CR | ||||
4102 | bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint(); | ||||
4103 | |||||
4104 | switch (CC) { | ||||
4105 | case ISD::SETUEQ: | ||||
4106 | case ISD::SETONE: | ||||
4107 | case ISD::SETOLE: | ||||
4108 | case ISD::SETOGE: | ||||
4109 | llvm_unreachable("Should be lowered by legalize!")::llvm::llvm_unreachable_internal("Should be lowered by legalize!" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4109); | ||||
4110 | default: llvm_unreachable("Unknown condition!")::llvm::llvm_unreachable_internal("Unknown condition!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4110); | ||||
4111 | case ISD::SETOEQ: | ||||
4112 | case ISD::SETEQ: | ||||
4113 | return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ; | ||||
4114 | case ISD::SETUNE: | ||||
4115 | case ISD::SETNE: | ||||
4116 | return UseSPE ? PPC::PRED_LE : PPC::PRED_NE; | ||||
4117 | case ISD::SETOLT: | ||||
4118 | case ISD::SETLT: | ||||
4119 | return UseSPE ? PPC::PRED_GT : PPC::PRED_LT; | ||||
4120 | case ISD::SETULE: | ||||
4121 | case ISD::SETLE: | ||||
4122 | return PPC::PRED_LE; | ||||
4123 | case ISD::SETOGT: | ||||
4124 | case ISD::SETGT: | ||||
4125 | return PPC::PRED_GT; | ||||
4126 | case ISD::SETUGE: | ||||
4127 | case ISD::SETGE: | ||||
4128 | return UseSPE ? PPC::PRED_LE : PPC::PRED_GE; | ||||
4129 | case ISD::SETO: return PPC::PRED_NU; | ||||
4130 | case ISD::SETUO: return PPC::PRED_UN; | ||||
4131 | // These two are invalid for floating point. Assume we have int. | ||||
4132 | case ISD::SETULT: return PPC::PRED_LT; | ||||
4133 | case ISD::SETUGT: return PPC::PRED_GT; | ||||
4134 | } | ||||
4135 | } | ||||
4136 | |||||
4137 | /// getCRIdxForSetCC - Return the index of the condition register field | ||||
4138 | /// associated with the SetCC condition, and whether or not the field is | ||||
4139 | /// treated as inverted. That is, lt = 0; ge = 0 inverted. | ||||
4140 | static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { | ||||
4141 | Invert = false; | ||||
4142 | switch (CC) { | ||||
4143 | default: llvm_unreachable("Unknown condition!")::llvm::llvm_unreachable_internal("Unknown condition!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4143); | ||||
4144 | case ISD::SETOLT: | ||||
4145 | case ISD::SETLT: return 0; // Bit #0 = SETOLT | ||||
4146 | case ISD::SETOGT: | ||||
4147 | case ISD::SETGT: return 1; // Bit #1 = SETOGT | ||||
4148 | case ISD::SETOEQ: | ||||
4149 | case ISD::SETEQ: return 2; // Bit #2 = SETOEQ | ||||
4150 | case ISD::SETUO: return 3; // Bit #3 = SETUO | ||||
4151 | case ISD::SETUGE: | ||||
4152 | case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE | ||||
4153 | case ISD::SETULE: | ||||
4154 | case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE | ||||
4155 | case ISD::SETUNE: | ||||
4156 | case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE | ||||
4157 | case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO | ||||
4158 | case ISD::SETUEQ: | ||||
4159 | case ISD::SETOGE: | ||||
4160 | case ISD::SETOLE: | ||||
4161 | case ISD::SETONE: | ||||
4162 | llvm_unreachable("Invalid branch code: should be expanded by legalize")::llvm::llvm_unreachable_internal("Invalid branch code: should be expanded by legalize" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4162); | ||||
4163 | // These are invalid for floating point. Assume integer. | ||||
4164 | case ISD::SETULT: return 0; | ||||
4165 | case ISD::SETUGT: return 1; | ||||
4166 | } | ||||
4167 | } | ||||
4168 | |||||
4169 | // getVCmpInst: return the vector compare instruction for the specified | ||||
4170 | // vector type and condition code. Since this is for altivec specific code, | ||||
4171 | // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128, | ||||
4172 | // and v4f32). | ||||
4173 | static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, | ||||
4174 | bool HasVSX, bool &Swap, bool &Negate) { | ||||
4175 | Swap = false; | ||||
4176 | Negate = false; | ||||
4177 | |||||
4178 | if (VecVT.isFloatingPoint()) { | ||||
4179 | /* Handle some cases by swapping input operands. */ | ||||
4180 | switch (CC) { | ||||
4181 | case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; | ||||
4182 | case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; | ||||
4183 | case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; | ||||
4184 | case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; | ||||
4185 | case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; | ||||
4186 | case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; | ||||
4187 | default: break; | ||||
4188 | } | ||||
4189 | /* Handle some cases by negating the result. */ | ||||
4190 | switch (CC) { | ||||
4191 | case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; | ||||
4192 | case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; | ||||
4193 | case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; | ||||
4194 | case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; | ||||
4195 | default: break; | ||||
4196 | } | ||||
4197 | /* We have instructions implementing the remaining cases. */ | ||||
4198 | switch (CC) { | ||||
4199 | case ISD::SETEQ: | ||||
4200 | case ISD::SETOEQ: | ||||
4201 | if (VecVT == MVT::v4f32) | ||||
4202 | return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; | ||||
4203 | else if (VecVT == MVT::v2f64) | ||||
4204 | return PPC::XVCMPEQDP; | ||||
4205 | break; | ||||
4206 | case ISD::SETGT: | ||||
4207 | case ISD::SETOGT: | ||||
4208 | if (VecVT == MVT::v4f32) | ||||
4209 | return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; | ||||
4210 | else if (VecVT == MVT::v2f64) | ||||
4211 | return PPC::XVCMPGTDP; | ||||
4212 | break; | ||||
4213 | case ISD::SETGE: | ||||
4214 | case ISD::SETOGE: | ||||
4215 | if (VecVT == MVT::v4f32) | ||||
4216 | return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; | ||||
4217 | else if (VecVT == MVT::v2f64) | ||||
4218 | return PPC::XVCMPGEDP; | ||||
4219 | break; | ||||
4220 | default: | ||||
4221 | break; | ||||
4222 | } | ||||
4223 | llvm_unreachable("Invalid floating-point vector compare condition")::llvm::llvm_unreachable_internal("Invalid floating-point vector compare condition" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4223); | ||||
4224 | } else { | ||||
4225 | /* Handle some cases by swapping input operands. */ | ||||
4226 | switch (CC) { | ||||
4227 | case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; | ||||
4228 | case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; | ||||
4229 | case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; | ||||
4230 | case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; | ||||
4231 | default: break; | ||||
4232 | } | ||||
4233 | /* Handle some cases by negating the result. */ | ||||
4234 | switch (CC) { | ||||
4235 | case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; | ||||
4236 | case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; | ||||
4237 | case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; | ||||
4238 | case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; | ||||
4239 | default: break; | ||||
4240 | } | ||||
4241 | /* We have instructions implementing the remaining cases. */ | ||||
4242 | switch (CC) { | ||||
4243 | case ISD::SETEQ: | ||||
4244 | case ISD::SETUEQ: | ||||
4245 | if (VecVT == MVT::v16i8) | ||||
4246 | return PPC::VCMPEQUB; | ||||
4247 | else if (VecVT == MVT::v8i16) | ||||
4248 | return PPC::VCMPEQUH; | ||||
4249 | else if (VecVT == MVT::v4i32) | ||||
4250 | return PPC::VCMPEQUW; | ||||
4251 | else if (VecVT == MVT::v2i64) | ||||
4252 | return PPC::VCMPEQUD; | ||||
4253 | else if (VecVT == MVT::v1i128) | ||||
4254 | return PPC::VCMPEQUQ; | ||||
4255 | break; | ||||
4256 | case ISD::SETGT: | ||||
4257 | if (VecVT == MVT::v16i8) | ||||
4258 | return PPC::VCMPGTSB; | ||||
4259 | else if (VecVT == MVT::v8i16) | ||||
4260 | return PPC::VCMPGTSH; | ||||
4261 | else if (VecVT == MVT::v4i32) | ||||
4262 | return PPC::VCMPGTSW; | ||||
4263 | else if (VecVT == MVT::v2i64) | ||||
4264 | return PPC::VCMPGTSD; | ||||
4265 | else if (VecVT == MVT::v1i128) | ||||
4266 | return PPC::VCMPGTSQ; | ||||
4267 | break; | ||||
4268 | case ISD::SETUGT: | ||||
4269 | if (VecVT == MVT::v16i8) | ||||
4270 | return PPC::VCMPGTUB; | ||||
4271 | else if (VecVT == MVT::v8i16) | ||||
4272 | return PPC::VCMPGTUH; | ||||
4273 | else if (VecVT == MVT::v4i32) | ||||
4274 | return PPC::VCMPGTUW; | ||||
4275 | else if (VecVT == MVT::v2i64) | ||||
4276 | return PPC::VCMPGTUD; | ||||
4277 | else if (VecVT == MVT::v1i128) | ||||
4278 | return PPC::VCMPGTUQ; | ||||
4279 | break; | ||||
4280 | default: | ||||
4281 | break; | ||||
4282 | } | ||||
4283 | llvm_unreachable("Invalid integer vector compare condition")::llvm::llvm_unreachable_internal("Invalid integer vector compare condition" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4283); | ||||
4284 | } | ||||
4285 | } | ||||
4286 | |||||
4287 | bool PPCDAGToDAGISel::trySETCC(SDNode *N) { | ||||
4288 | SDLoc dl(N); | ||||
4289 | unsigned Imm; | ||||
4290 | bool IsStrict = N->isStrictFPOpcode(); | ||||
4291 | ISD::CondCode CC = | ||||
4292 | cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get(); | ||||
4293 | EVT PtrVT = | ||||
4294 | CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); | ||||
4295 | bool isPPC64 = (PtrVT == MVT::i64); | ||||
4296 | SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); | ||||
4297 | |||||
4298 | SDValue LHS = N->getOperand(IsStrict ? 1 : 0); | ||||
4299 | SDValue RHS = N->getOperand(IsStrict ? 2 : 1); | ||||
4300 | |||||
4301 | if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) { | ||||
4302 | // We can codegen setcc op, imm very efficiently compared to a brcond. | ||||
4303 | // Check for those cases here. | ||||
4304 | // setcc op, 0 | ||||
4305 | if (Imm == 0) { | ||||
4306 | SDValue Op = LHS; | ||||
4307 | switch (CC) { | ||||
4308 | default: break; | ||||
4309 | case ISD::SETEQ: { | ||||
4310 | Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); | ||||
4311 | SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), | ||||
4312 | getI32Imm(31, dl) }; | ||||
4313 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4314 | return true; | ||||
4315 | } | ||||
4316 | case ISD::SETNE: { | ||||
4317 | if (isPPC64) break; | ||||
4318 | SDValue AD = | ||||
4319 | SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, | ||||
4320 | Op, getI32Imm(~0U, dl)), 0); | ||||
4321 | CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); | ||||
4322 | return true; | ||||
4323 | } | ||||
4324 | case ISD::SETLT: { | ||||
4325 | SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), | ||||
4326 | getI32Imm(31, dl) }; | ||||
4327 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4328 | return true; | ||||
4329 | } | ||||
4330 | case ISD::SETGT: { | ||||
4331 | SDValue T = | ||||
4332 | SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); | ||||
4333 | T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); | ||||
4334 | SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), | ||||
4335 | getI32Imm(31, dl) }; | ||||
4336 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4337 | return true; | ||||
4338 | } | ||||
4339 | } | ||||
4340 | } else if (Imm == ~0U) { // setcc op, -1 | ||||
4341 | SDValue Op = LHS; | ||||
4342 | switch (CC) { | ||||
4343 | default: break; | ||||
4344 | case ISD::SETEQ: | ||||
4345 | if (isPPC64) break; | ||||
4346 | Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, | ||||
4347 | Op, getI32Imm(1, dl)), 0); | ||||
4348 | CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, | ||||
4349 | SDValue(CurDAG->getMachineNode(PPC::LI, dl, | ||||
4350 | MVT::i32, | ||||
4351 | getI32Imm(0, dl)), | ||||
4352 | 0), Op.getValue(1)); | ||||
4353 | return true; | ||||
4354 | case ISD::SETNE: { | ||||
4355 | if (isPPC64) break; | ||||
4356 | Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); | ||||
4357 | SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, | ||||
4358 | Op, getI32Imm(~0U, dl)); | ||||
4359 | CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, | ||||
4360 | SDValue(AD, 1)); | ||||
4361 | return true; | ||||
4362 | } | ||||
4363 | case ISD::SETLT: { | ||||
4364 | SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, | ||||
4365 | getI32Imm(1, dl)), 0); | ||||
4366 | SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, | ||||
4367 | Op), 0); | ||||
4368 | SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), | ||||
4369 | getI32Imm(31, dl) }; | ||||
4370 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4371 | return true; | ||||
4372 | } | ||||
4373 | case ISD::SETGT: { | ||||
4374 | SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), | ||||
4375 | getI32Imm(31, dl) }; | ||||
4376 | Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); | ||||
4377 | CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); | ||||
4378 | return true; | ||||
4379 | } | ||||
4380 | } | ||||
4381 | } | ||||
4382 | } | ||||
4383 | |||||
4384 | // Altivec Vector compare instructions do not set any CR register by default and | ||||
4385 | // vector compare operations return the same type as the operands. | ||||
4386 | if (!IsStrict && LHS.getValueType().isVector()) { | ||||
4387 | if (Subtarget->hasSPE()) | ||||
4388 | return false; | ||||
4389 | |||||
4390 | EVT VecVT = LHS.getValueType(); | ||||
4391 | bool Swap, Negate; | ||||
4392 | unsigned int VCmpInst = | ||||
4393 | getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate); | ||||
4394 | if (Swap) | ||||
4395 | std::swap(LHS, RHS); | ||||
4396 | |||||
4397 | EVT ResVT = VecVT.changeVectorElementTypeToInteger(); | ||||
4398 | if (Negate) { | ||||
4399 | SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); | ||||
4400 | CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, | ||||
4401 | ResVT, VCmp, VCmp); | ||||
4402 | return true; | ||||
4403 | } | ||||
4404 | |||||
4405 | CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); | ||||
4406 | return true; | ||||
4407 | } | ||||
4408 | |||||
4409 | if (Subtarget->useCRBits()) | ||||
4410 | return false; | ||||
4411 | |||||
4412 | bool Inv; | ||||
4413 | unsigned Idx = getCRIdxForSetCC(CC, Inv); | ||||
4414 | SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain); | ||||
4415 | if (IsStrict) | ||||
4416 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1)); | ||||
4417 | SDValue IntCR; | ||||
4418 | |||||
4419 | // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that | ||||
4420 | // The correct compare instruction is already set by SelectCC() | ||||
4421 | if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { | ||||
4422 | Idx = 1; | ||||
4423 | } | ||||
4424 | |||||
4425 | // Force the ccreg into CR7. | ||||
4426 | SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); | ||||
4427 | |||||
4428 | SDValue InFlag(nullptr, 0); // Null incoming flag value. | ||||
4429 | CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, | ||||
4430 | InFlag).getValue(1); | ||||
4431 | |||||
4432 | IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, | ||||
4433 | CCReg), 0); | ||||
4434 | |||||
4435 | SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), | ||||
4436 | getI32Imm(31, dl), getI32Imm(31, dl) }; | ||||
4437 | if (!Inv) { | ||||
4438 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4439 | return true; | ||||
4440 | } | ||||
4441 | |||||
4442 | // Get the specified bit. | ||||
4443 | SDValue Tmp = | ||||
4444 | SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); | ||||
4445 | CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); | ||||
4446 | return true; | ||||
4447 | } | ||||
4448 | |||||
4449 | /// Does this node represent a load/store node whose address can be represented | ||||
4450 | /// with a register plus an immediate that's a multiple of \p Val: | ||||
4451 | bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { | ||||
4452 | LoadSDNode *LDN = dyn_cast<LoadSDNode>(N); | ||||
| |||||
4453 | StoreSDNode *STN = dyn_cast<StoreSDNode>(N); | ||||
4454 | SDValue AddrOp; | ||||
4455 | if (LDN
| ||||
4456 | AddrOp = LDN->getOperand(1); | ||||
4457 | else if (STN
| ||||
4458 | AddrOp = STN->getOperand(2); | ||||
4459 | |||||
4460 | // If the address points a frame object or a frame object with an offset, | ||||
4461 | // we need to check the object alignment. | ||||
4462 | short Imm = 0; | ||||
4463 | if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>( | ||||
4464 | AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) : | ||||
4465 | AddrOp)) { | ||||
4466 | // If op0 is a frame index that is under aligned, we can't do it either, | ||||
4467 | // because it is translated to r31 or r1 + slot + offset. We won't know the | ||||
4468 | // slot number until the stack frame is finalized. | ||||
4469 | const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); | ||||
4470 | unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value(); | ||||
4471 | if ((SlotAlign % Val) != 0) | ||||
4472 | return false; | ||||
4473 | |||||
4474 | // If we have an offset, we need further check on the offset. | ||||
4475 | if (AddrOp.getOpcode() != ISD::ADD) | ||||
4476 | return true; | ||||
4477 | } | ||||
4478 | |||||
4479 | if (AddrOp.getOpcode() == ISD::ADD) | ||||
4480 | return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); | ||||
4481 | |||||
4482 | // If the address comes from the outside, the offset will be zero. | ||||
4483 | return AddrOp.getOpcode() == ISD::CopyFromReg; | ||||
4484 | } | ||||
4485 | |||||
4486 | void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { | ||||
4487 | // Transfer memoperands. | ||||
4488 | MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); | ||||
4489 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); | ||||
4490 | } | ||||
4491 | |||||
4492 | static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, | ||||
4493 | bool &NeedSwapOps, bool &IsUnCmp) { | ||||
4494 | |||||
4495 | assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.")(static_cast <bool> (N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.") ? void (0) : __assert_fail ("N->getOpcode() == ISD::SELECT_CC && \"Expecting a SELECT_CC here.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4495, __extension__ __PRETTY_FUNCTION__)); | ||||
4496 | |||||
4497 | SDValue LHS = N->getOperand(0); | ||||
4498 | SDValue RHS = N->getOperand(1); | ||||
4499 | SDValue TrueRes = N->getOperand(2); | ||||
4500 | SDValue FalseRes = N->getOperand(3); | ||||
4501 | ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); | ||||
4502 | if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 && | ||||
4503 | N->getSimpleValueType(0) != MVT::i32)) | ||||
4504 | return false; | ||||
4505 | |||||
4506 | // We are looking for any of: | ||||
4507 | // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) | ||||
4508 | // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) | ||||
4509 | // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) | ||||
4510 | // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) | ||||
4511 | int64_t TrueResVal = TrueConst->getSExtValue(); | ||||
4512 | if ((TrueResVal < -1 || TrueResVal > 1) || | ||||
4513 | (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || | ||||
4514 | (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || | ||||
4515 | (TrueResVal == 0 && | ||||
4516 | (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) | ||||
4517 | return false; | ||||
4518 | |||||
4519 | SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC | ||||
4520 | ? FalseRes | ||||
4521 | : FalseRes.getOperand(0); | ||||
4522 | bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC; | ||||
4523 | if (SetOrSelCC.getOpcode() != ISD::SETCC && | ||||
4524 | SetOrSelCC.getOpcode() != ISD::SELECT_CC) | ||||
4525 | return false; | ||||
4526 | |||||
4527 | // Without this setb optimization, the outer SELECT_CC will be manually | ||||
4528 | // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass | ||||
4529 | // transforms pseudo instruction to isel instruction. When there are more than | ||||
4530 | // one use for result like zext/sext, with current optimization we only see | ||||
4531 | // isel is replaced by setb but can't see any significant gain. Since | ||||
4532 | // setb has longer latency than original isel, we should avoid this. Another | ||||
4533 | // point is that setb requires comparison always kept, it can break the | ||||
4534 | // opportunity to get the comparison away if we have in future. | ||||
4535 | if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) | ||||
4536 | return false; | ||||
4537 | |||||
4538 | SDValue InnerLHS = SetOrSelCC.getOperand(0); | ||||
4539 | SDValue InnerRHS = SetOrSelCC.getOperand(1); | ||||
4540 | ISD::CondCode InnerCC = | ||||
4541 | cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); | ||||
4542 | // If the inner comparison is a select_cc, make sure the true/false values are | ||||
4543 | // 1/-1 and canonicalize it if needed. | ||||
4544 | if (InnerIsSel) { | ||||
4545 | ConstantSDNode *SelCCTrueConst = | ||||
4546 | dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2)); | ||||
4547 | ConstantSDNode *SelCCFalseConst = | ||||
4548 | dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3)); | ||||
4549 | if (!SelCCTrueConst || !SelCCFalseConst) | ||||
4550 | return false; | ||||
4551 | int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); | ||||
4552 | int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); | ||||
4553 | // The values must be -1/1 (requiring a swap) or 1/-1. | ||||
4554 | if (SelCCTVal == -1 && SelCCFVal == 1) { | ||||
4555 | std::swap(InnerLHS, InnerRHS); | ||||
4556 | } else if (SelCCTVal != 1 || SelCCFVal != -1) | ||||
4557 | return false; | ||||
4558 | } | ||||
4559 | |||||
4560 | // Canonicalize unsigned case | ||||
4561 | if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { | ||||
4562 | IsUnCmp = true; | ||||
4563 | InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; | ||||
4564 | } | ||||
4565 | |||||
4566 | bool InnerSwapped = false; | ||||
4567 | if (LHS == InnerRHS && RHS == InnerLHS) | ||||
4568 | InnerSwapped = true; | ||||
4569 | else if (LHS != InnerLHS || RHS != InnerRHS) | ||||
4570 | return false; | ||||
4571 | |||||
4572 | switch (CC) { | ||||
4573 | // (select_cc lhs, rhs, 0, \ | ||||
4574 | // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) | ||||
4575 | case ISD::SETEQ: | ||||
4576 | if (!InnerIsSel) | ||||
4577 | return false; | ||||
4578 | if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) | ||||
4579 | return false; | ||||
4580 | NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; | ||||
4581 | break; | ||||
4582 | |||||
4583 | // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) | ||||
4584 | // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) | ||||
4585 | // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) | ||||
4586 | // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) | ||||
4587 | // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) | ||||
4588 | // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) | ||||
4589 | case ISD::SETULT: | ||||
4590 | if (!IsUnCmp && InnerCC != ISD::SETNE) | ||||
4591 | return false; | ||||
4592 | IsUnCmp = true; | ||||
4593 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
4594 | case ISD::SETLT: | ||||
4595 | if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || | ||||
4596 | (InnerCC == ISD::SETLT && InnerSwapped)) | ||||
4597 | NeedSwapOps = (TrueResVal == 1); | ||||
4598 | else | ||||
4599 | return false; | ||||
4600 | break; | ||||
4601 | |||||
4602 | // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) | ||||
4603 | // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) | ||||
4604 | // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) | ||||
4605 | // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) | ||||
4606 | // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) | ||||
4607 | // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) | ||||
4608 | case ISD::SETUGT: | ||||
4609 | if (!IsUnCmp && InnerCC != ISD::SETNE) | ||||
4610 | return false; | ||||
4611 | IsUnCmp = true; | ||||
4612 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
4613 | case ISD::SETGT: | ||||
4614 | if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || | ||||
4615 | (InnerCC == ISD::SETGT && InnerSwapped)) | ||||
4616 | NeedSwapOps = (TrueResVal == -1); | ||||
4617 | else | ||||
4618 | return false; | ||||
4619 | break; | ||||
4620 | |||||
4621 | default: | ||||
4622 | return false; | ||||
4623 | } | ||||
4624 | |||||
4625 | LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Found a node that can be lowered to a SETB: " ; } } while (false); | ||||
4626 | LLVM_DEBUG(N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { N->dump(); } } while (false); | ||||
4627 | |||||
4628 | return true; | ||||
4629 | } | ||||
4630 | |||||
4631 | // Return true if it's a software square-root/divide operand. | ||||
4632 | static bool isSWTestOp(SDValue N) { | ||||
4633 | if (N.getOpcode() == PPCISD::FTSQRT) | ||||
4634 | return true; | ||||
4635 | if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0))) | ||||
4636 | return false; | ||||
4637 | switch (N.getConstantOperandVal(0)) { | ||||
4638 | case Intrinsic::ppc_vsx_xvtdivdp: | ||||
4639 | case Intrinsic::ppc_vsx_xvtdivsp: | ||||
4640 | case Intrinsic::ppc_vsx_xvtsqrtdp: | ||||
4641 | case Intrinsic::ppc_vsx_xvtsqrtsp: | ||||
4642 | return true; | ||||
4643 | } | ||||
4644 | return false; | ||||
4645 | } | ||||
4646 | |||||
4647 | bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) { | ||||
4648 | assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.")(static_cast <bool> (N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.") ? void (0) : __assert_fail ("N->getOpcode() == ISD::BR_CC && \"ISD::BR_CC is expected.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4648, __extension__ __PRETTY_FUNCTION__)); | ||||
4649 | // We are looking for following patterns, where `truncate to i1` actually has | ||||
4650 | // the same semantic with `and 1`. | ||||
4651 | // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp) | ||||
4652 | // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp) | ||||
4653 | // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp) | ||||
4654 | // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp) | ||||
4655 | // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp) | ||||
4656 | // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp) | ||||
4657 | // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp) | ||||
4658 | // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp) | ||||
4659 | ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); | ||||
4660 | if (CC != ISD::SETEQ && CC != ISD::SETNE) | ||||
4661 | return false; | ||||
4662 | |||||
4663 | SDValue CmpRHS = N->getOperand(3); | ||||
4664 | if (!isa<ConstantSDNode>(CmpRHS) || | ||||
4665 | cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0) | ||||
4666 | return false; | ||||
4667 | |||||
4668 | SDValue CmpLHS = N->getOperand(2); | ||||
4669 | if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0))) | ||||
4670 | return false; | ||||
4671 | |||||
4672 | unsigned PCC = 0; | ||||
4673 | bool IsCCNE = CC == ISD::SETNE; | ||||
4674 | if (CmpLHS.getOpcode() == ISD::AND && | ||||
4675 | isa<ConstantSDNode>(CmpLHS.getOperand(1))) | ||||
4676 | switch (CmpLHS.getConstantOperandVal(1)) { | ||||
4677 | case 1: | ||||
4678 | PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; | ||||
4679 | break; | ||||
4680 | case 2: | ||||
4681 | PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE; | ||||
4682 | break; | ||||
4683 | case 4: | ||||
4684 | PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE; | ||||
4685 | break; | ||||
4686 | case 8: | ||||
4687 | PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE; | ||||
4688 | break; | ||||
4689 | default: | ||||
4690 | return false; | ||||
4691 | } | ||||
4692 | else if (CmpLHS.getOpcode() == ISD::TRUNCATE && | ||||
4693 | CmpLHS.getValueType() == MVT::i1) | ||||
4694 | PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; | ||||
4695 | |||||
4696 | if (PCC) { | ||||
4697 | SDLoc dl(N); | ||||
4698 | SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4), | ||||
4699 | N->getOperand(0)}; | ||||
4700 | CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); | ||||
4701 | return true; | ||||
4702 | } | ||||
4703 | return false; | ||||
4704 | } | ||||
4705 | |||||
4706 | bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { | ||||
4707 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4707, __extension__ __PRETTY_FUNCTION__)); | ||||
4708 | unsigned Imm; | ||||
4709 | if (!isInt32Immediate(N->getOperand(1), Imm)) | ||||
4710 | return false; | ||||
4711 | |||||
4712 | SDLoc dl(N); | ||||
4713 | SDValue Val = N->getOperand(0); | ||||
4714 | unsigned SH, MB, ME; | ||||
4715 | // If this is an and of a value rotated between 0 and 31 bits and then and'd | ||||
4716 | // with a mask, emit rlwinm | ||||
4717 | if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) { | ||||
4718 | Val = Val.getOperand(0); | ||||
4719 | SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl), | ||||
4720 | getI32Imm(ME, dl)}; | ||||
4721 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4722 | return true; | ||||
4723 | } | ||||
4724 | |||||
4725 | // If this is just a masked value where the input is not handled, and | ||||
4726 | // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm | ||||
4727 | if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) { | ||||
4728 | SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl), | ||||
4729 | getI32Imm(ME, dl)}; | ||||
4730 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
4731 | return true; | ||||
4732 | } | ||||
4733 | |||||
4734 | // AND X, 0 -> 0, not "rlwinm 32". | ||||
4735 | if (Imm == 0) { | ||||
4736 | ReplaceUses(SDValue(N, 0), N->getOperand(1)); | ||||
4737 | return true; | ||||
4738 | } | ||||
4739 | |||||
4740 | return false; | ||||
4741 | } | ||||
4742 | |||||
4743 | bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) { | ||||
4744 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4744, __extension__ __PRETTY_FUNCTION__)); | ||||
4745 | uint64_t Imm64; | ||||
4746 | if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) | ||||
4747 | return false; | ||||
4748 | |||||
4749 | unsigned MB, ME; | ||||
4750 | if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) { | ||||
4751 | // MB ME | ||||
4752 | // +----------------------+ | ||||
4753 | // |xxxxxxxxxxx00011111000| | ||||
4754 | // +----------------------+ | ||||
4755 | // 0 32 64 | ||||
4756 | // We can only do it if the MB is larger than 32 and MB <= ME | ||||
4757 | // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even | ||||
4758 | // we didn't rotate it. | ||||
4759 | SDLoc dl(N); | ||||
4760 | SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl), | ||||
4761 | getI64Imm(ME - 32, dl)}; | ||||
4762 | CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); | ||||
4763 | return true; | ||||
4764 | } | ||||
4765 | |||||
4766 | return false; | ||||
4767 | } | ||||
4768 | |||||
4769 | bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { | ||||
4770 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4770, __extension__ __PRETTY_FUNCTION__)); | ||||
4771 | uint64_t Imm64; | ||||
4772 | if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) | ||||
4773 | return false; | ||||
4774 | |||||
4775 | // Do nothing if it is 16-bit imm as the pattern in the .td file handle | ||||
4776 | // it well with "andi.". | ||||
4777 | if (isUInt<16>(Imm64)) | ||||
4778 | return false; | ||||
4779 | |||||
4780 | SDLoc Loc(N); | ||||
4781 | SDValue Val = N->getOperand(0); | ||||
4782 | |||||
4783 | // Optimized with two rldicl's as follows: | ||||
4784 | // Add missing bits on left to the mask and check that the mask is a | ||||
4785 | // wrapped run of ones, i.e. | ||||
4786 | // Change pattern |0001111100000011111111| | ||||
4787 | // to |1111111100000011111111|. | ||||
4788 | unsigned NumOfLeadingZeros = countLeadingZeros(Imm64); | ||||
4789 | if (NumOfLeadingZeros != 0) | ||||
4790 | Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros); | ||||
4791 | |||||
4792 | unsigned MB, ME; | ||||
4793 | if (!isRunOfOnes64(Imm64, MB, ME)) | ||||
4794 | return false; | ||||
4795 | |||||
4796 | // ME MB MB-ME+63 | ||||
4797 | // +----------------------+ +----------------------+ | ||||
4798 | // |1111111100000011111111| -> |0000001111111111111111| | ||||
4799 | // +----------------------+ +----------------------+ | ||||
4800 | // 0 63 0 63 | ||||
4801 | // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between. | ||||
4802 | unsigned OnesOnLeft = ME + 1; | ||||
4803 | unsigned ZerosInBetween = (MB - ME + 63) & 63; | ||||
4804 | // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear | ||||
4805 | // on the left the bits that are already zeros in the mask. | ||||
4806 | Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val, | ||||
4807 | getI64Imm(OnesOnLeft, Loc), | ||||
4808 | getI64Imm(ZerosInBetween, Loc)), | ||||
4809 | 0); | ||||
4810 | // MB-ME+63 ME MB | ||||
4811 | // +----------------------+ +----------------------+ | ||||
4812 | // |0000001111111111111111| -> |0001111100000011111111| | ||||
4813 | // +----------------------+ +----------------------+ | ||||
4814 | // 0 63 0 63 | ||||
4815 | // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the | ||||
4816 | // left the number of ones we previously added. | ||||
4817 | SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc), | ||||
4818 | getI64Imm(NumOfLeadingZeros, Loc)}; | ||||
4819 | CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); | ||||
4820 | return true; | ||||
4821 | } | ||||
4822 | |||||
4823 | bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { | ||||
4824 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4824, __extension__ __PRETTY_FUNCTION__)); | ||||
4825 | unsigned Imm; | ||||
4826 | if (!isInt32Immediate(N->getOperand(1), Imm)) | ||||
4827 | return false; | ||||
4828 | |||||
4829 | SDValue Val = N->getOperand(0); | ||||
4830 | unsigned Imm2; | ||||
4831 | // ISD::OR doesn't get all the bitfield insertion fun. | ||||
4832 | // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a | ||||
4833 | // bitfield insert. | ||||
4834 | if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2)) | ||||
4835 | return false; | ||||
4836 | |||||
4837 | // The idea here is to check whether this is equivalent to: | ||||
4838 | // (c1 & m) | (x & ~m) | ||||
4839 | // where m is a run-of-ones mask. The logic here is that, for each bit in | ||||
4840 | // c1 and c2: | ||||
4841 | // - if both are 1, then the output will be 1. | ||||
4842 | // - if both are 0, then the output will be 0. | ||||
4843 | // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will | ||||
4844 | // come from x. | ||||
4845 | // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will | ||||
4846 | // be 0. | ||||
4847 | // If that last condition is never the case, then we can form m from the | ||||
4848 | // bits that are the same between c1 and c2. | ||||
4849 | unsigned MB, ME; | ||||
4850 | if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) { | ||||
4851 | SDLoc dl(N); | ||||
4852 | SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl), | ||||
4853 | getI32Imm(MB, dl), getI32Imm(ME, dl)}; | ||||
4854 | ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); | ||||
4855 | return true; | ||||
4856 | } | ||||
4857 | |||||
4858 | return false; | ||||
4859 | } | ||||
4860 | |||||
4861 | bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) { | ||||
4862 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4862, __extension__ __PRETTY_FUNCTION__)); | ||||
4863 | uint64_t Imm64; | ||||
4864 | if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64)) | ||||
4865 | return false; | ||||
4866 | |||||
4867 | // If this is a 64-bit zero-extension mask, emit rldicl. | ||||
4868 | unsigned MB = 64 - countTrailingOnes(Imm64); | ||||
4869 | unsigned SH = 0; | ||||
4870 | unsigned Imm; | ||||
4871 | SDValue Val = N->getOperand(0); | ||||
4872 | SDLoc dl(N); | ||||
4873 | |||||
4874 | if (Val.getOpcode() == ISD::ANY_EXTEND) { | ||||
4875 | auto Op0 = Val.getOperand(0); | ||||
4876 | if (Op0.getOpcode() == ISD::SRL && | ||||
4877 | isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { | ||||
4878 | |||||
4879 | auto ResultType = Val.getNode()->getValueType(0); | ||||
4880 | auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType); | ||||
4881 | SDValue IDVal(ImDef, 0); | ||||
4882 | |||||
4883 | Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType, | ||||
4884 | IDVal, Op0.getOperand(0), | ||||
4885 | getI32Imm(1, dl)), | ||||
4886 | 0); | ||||
4887 | SH = 64 - Imm; | ||||
4888 | } | ||||
4889 | } | ||||
4890 | |||||
4891 | // If the operand is a logical right shift, we can fold it into this | ||||
4892 | // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) | ||||
4893 | // for n <= mb. The right shift is really a left rotate followed by a | ||||
4894 | // mask, and this mask is a more-restrictive sub-mask of the mask implied | ||||
4895 | // by the shift. | ||||
4896 | if (Val.getOpcode() == ISD::SRL && | ||||
4897 | isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { | ||||
4898 | assert(Imm < 64 && "Illegal shift amount")(static_cast <bool> (Imm < 64 && "Illegal shift amount" ) ? void (0) : __assert_fail ("Imm < 64 && \"Illegal shift amount\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4898, __extension__ __PRETTY_FUNCTION__)); | ||||
4899 | Val = Val.getOperand(0); | ||||
4900 | SH = 64 - Imm; | ||||
4901 | } | ||||
4902 | |||||
4903 | SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)}; | ||||
4904 | CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); | ||||
4905 | return true; | ||||
4906 | } | ||||
4907 | |||||
4908 | bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) { | ||||
4909 | assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::AND && "ISD::AND SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"ISD::AND SDNode expected\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4909, __extension__ __PRETTY_FUNCTION__)); | ||||
4910 | uint64_t Imm64; | ||||
4911 | if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || | ||||
4912 | !isMask_64(~Imm64)) | ||||
4913 | return false; | ||||
4914 | |||||
4915 | // If this is a negated 64-bit zero-extension mask, | ||||
4916 | // i.e. the immediate is a sequence of ones from most significant side | ||||
4917 | // and all zero for reminder, we should use rldicr. | ||||
4918 | unsigned MB = 63 - countTrailingOnes(~Imm64); | ||||
4919 | unsigned SH = 0; | ||||
4920 | SDLoc dl(N); | ||||
4921 | SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)}; | ||||
4922 | CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); | ||||
4923 | return true; | ||||
4924 | } | ||||
4925 | |||||
4926 | bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) { | ||||
4927 | assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected")(static_cast <bool> (N->getOpcode() == ISD::OR && "ISD::OR SDNode expected") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"ISD::OR SDNode expected\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4927, __extension__ __PRETTY_FUNCTION__)); | ||||
4928 | uint64_t Imm64; | ||||
4929 | unsigned MB, ME; | ||||
4930 | SDValue N0 = N->getOperand(0); | ||||
4931 | |||||
4932 | // We won't get fewer instructions if the imm is 32-bit integer. | ||||
4933 | // rldimi requires the imm to have consecutive ones with both sides zero. | ||||
4934 | // Also, make sure the first Op has only one use, otherwise this may increase | ||||
4935 | // register pressure since rldimi is destructive. | ||||
4936 | if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || | ||||
4937 | isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse()) | ||||
4938 | return false; | ||||
4939 | |||||
4940 | unsigned SH = 63 - ME; | ||||
4941 | SDLoc Dl(N); | ||||
4942 | // Use select64Imm for making LI instr instead of directly putting Imm64 | ||||
4943 | SDValue Ops[] = { | ||||
4944 | N->getOperand(0), | ||||
4945 | SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0), | ||||
4946 | getI32Imm(SH, Dl), getI32Imm(MB, Dl)}; | ||||
4947 | CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops); | ||||
4948 | return true; | ||||
4949 | } | ||||
4950 | |||||
4951 | // Select - Convert the specified operand from a target-independent to a | ||||
4952 | // target-specific node if it hasn't already been changed. | ||||
4953 | void PPCDAGToDAGISel::Select(SDNode *N) { | ||||
4954 | SDLoc dl(N); | ||||
4955 | if (N->isMachineOpcode()) { | ||||
4956 | N->setNodeId(-1); | ||||
4957 | return; // Already selected. | ||||
4958 | } | ||||
4959 | |||||
4960 | // In case any misguided DAG-level optimizations form an ADD with a | ||||
4961 | // TargetConstant operand, crash here instead of miscompiling (by selecting | ||||
4962 | // an r+r add instead of some kind of r+i add). | ||||
4963 | if (N->getOpcode() == ISD::ADD && | ||||
4964 | N->getOperand(1).getOpcode() == ISD::TargetConstant) | ||||
4965 | llvm_unreachable("Invalid ADD with TargetConstant operand")::llvm::llvm_unreachable_internal("Invalid ADD with TargetConstant operand" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 4965); | ||||
4966 | |||||
4967 | // Try matching complex bit permutations before doing anything else. | ||||
4968 | if (tryBitPermutation(N)) | ||||
4969 | return; | ||||
4970 | |||||
4971 | // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). | ||||
4972 | if (tryIntCompareInGPR(N)) | ||||
4973 | return; | ||||
4974 | |||||
4975 | switch (N->getOpcode()) { | ||||
4976 | default: break; | ||||
4977 | |||||
4978 | case ISD::Constant: | ||||
4979 | if (N->getValueType(0) == MVT::i64) { | ||||
4980 | ReplaceNode(N, selectI64Imm(CurDAG, N)); | ||||
4981 | return; | ||||
4982 | } | ||||
4983 | break; | ||||
4984 | |||||
4985 | case ISD::INTRINSIC_WO_CHAIN: { | ||||
4986 | // We emit the PPC::FSELS instruction here because of type conflicts with | ||||
4987 | // the comparison operand. The FSELS instruction is defined to use an 8-byte | ||||
4988 | // comparison like the FSELD version. The fsels intrinsic takes a 4-byte | ||||
4989 | // value for the comparison. When selecting through a .td file, a type | ||||
4990 | // error is raised. Must check this first so we never break on the | ||||
4991 | // !Subtarget->isISA3_1() check. | ||||
4992 | if (N->getConstantOperandVal(0) == Intrinsic::ppc_fsels) { | ||||
4993 | SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)}; | ||||
4994 | CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops); | ||||
4995 | return; | ||||
4996 | } | ||||
4997 | |||||
4998 | if (!Subtarget->isISA3_1()) | ||||
4999 | break; | ||||
5000 | unsigned Opcode = 0; | ||||
5001 | switch (N->getConstantOperandVal(0)) { | ||||
5002 | default: | ||||
5003 | break; | ||||
5004 | case Intrinsic::ppc_altivec_vstribr_p: | ||||
5005 | Opcode = PPC::VSTRIBR_rec; | ||||
5006 | break; | ||||
5007 | case Intrinsic::ppc_altivec_vstribl_p: | ||||
5008 | Opcode = PPC::VSTRIBL_rec; | ||||
5009 | break; | ||||
5010 | case Intrinsic::ppc_altivec_vstrihr_p: | ||||
5011 | Opcode = PPC::VSTRIHR_rec; | ||||
5012 | break; | ||||
5013 | case Intrinsic::ppc_altivec_vstrihl_p: | ||||
5014 | Opcode = PPC::VSTRIHL_rec; | ||||
5015 | break; | ||||
5016 | } | ||||
5017 | if (!Opcode) | ||||
5018 | break; | ||||
5019 | |||||
5020 | // Generate the appropriate vector string isolate intrinsic to match. | ||||
5021 | EVT VTs[] = {MVT::v16i8, MVT::Glue}; | ||||
5022 | SDValue VecStrOp = | ||||
5023 | SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0); | ||||
5024 | // Vector string isolate instructions update the EQ bit of CR6. | ||||
5025 | // Generate a SETBC instruction to extract the bit and place it in a GPR. | ||||
5026 | SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32); | ||||
5027 | SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32); | ||||
5028 | SDValue CRBit = SDValue( | ||||
5029 | CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, | ||||
5030 | CR6Reg, SubRegIdx, VecStrOp.getValue(1)), | ||||
5031 | 0); | ||||
5032 | CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit); | ||||
5033 | return; | ||||
5034 | } | ||||
5035 | |||||
5036 | case ISD::SETCC: | ||||
5037 | case ISD::STRICT_FSETCC: | ||||
5038 | case ISD::STRICT_FSETCCS: | ||||
5039 | if (trySETCC(N)) | ||||
5040 | return; | ||||
5041 | break; | ||||
5042 | // These nodes will be transformed into GETtlsADDR32 node, which | ||||
5043 | // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT | ||||
5044 | case PPCISD::ADDI_TLSLD_L_ADDR: | ||||
5045 | case PPCISD::ADDI_TLSGD_L_ADDR: { | ||||
5046 | const Module *Mod = MF->getFunction().getParent(); | ||||
5047 | if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || | ||||
5048 | !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() || | ||||
5049 | Mod->getPICLevel() == PICLevel::SmallPIC) | ||||
5050 | break; | ||||
5051 | // Attach global base pointer on GETtlsADDR32 node in order to | ||||
5052 | // generate secure plt code for TLS symbols. | ||||
5053 | getGlobalBaseReg(); | ||||
5054 | } break; | ||||
5055 | case PPCISD::CALL: { | ||||
5056 | if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || | ||||
5057 | !TM.isPositionIndependent() || !Subtarget->isSecurePlt() || | ||||
5058 | !Subtarget->isTargetELF()) | ||||
5059 | break; | ||||
5060 | |||||
5061 | SDValue Op = N->getOperand(1); | ||||
5062 | |||||
5063 | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { | ||||
5064 | if (GA->getTargetFlags() == PPCII::MO_PLT) | ||||
5065 | getGlobalBaseReg(); | ||||
5066 | } | ||||
5067 | else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { | ||||
5068 | if (ES->getTargetFlags() == PPCII::MO_PLT) | ||||
5069 | getGlobalBaseReg(); | ||||
5070 | } | ||||
5071 | } | ||||
5072 | break; | ||||
5073 | |||||
5074 | case PPCISD::GlobalBaseReg: | ||||
5075 | ReplaceNode(N, getGlobalBaseReg()); | ||||
5076 | return; | ||||
5077 | |||||
5078 | case ISD::FrameIndex: | ||||
5079 | selectFrameIndex(N, N); | ||||
5080 | return; | ||||
5081 | |||||
5082 | case PPCISD::MFOCRF: { | ||||
5083 | SDValue InFlag = N->getOperand(1); | ||||
5084 | ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, | ||||
5085 | N->getOperand(0), InFlag)); | ||||
5086 | return; | ||||
5087 | } | ||||
5088 | |||||
5089 | case PPCISD::READ_TIME_BASE: | ||||
5090 | ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, | ||||
5091 | MVT::Other, N->getOperand(0))); | ||||
5092 | return; | ||||
5093 | |||||
5094 | case PPCISD::SRA_ADDZE: { | ||||
5095 | SDValue N0 = N->getOperand(0); | ||||
5096 | SDValue ShiftAmt = | ||||
5097 | CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))-> | ||||
5098 | getConstantIntValue(), dl, | ||||
5099 | N->getValueType(0)); | ||||
5100 | if (N->getValueType(0) == MVT::i64) { | ||||
5101 | SDNode *Op = | ||||
5102 | CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, | ||||
5103 | N0, ShiftAmt); | ||||
5104 | CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), | ||||
5105 | SDValue(Op, 1)); | ||||
5106 | return; | ||||
5107 | } else { | ||||
5108 | assert(N->getValueType(0) == MVT::i32 &&(static_cast <bool> (N->getValueType(0) == MVT::i32 && "Expecting i64 or i32 in PPCISD::SRA_ADDZE") ? void (0) : __assert_fail ("N->getValueType(0) == MVT::i32 && \"Expecting i64 or i32 in PPCISD::SRA_ADDZE\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5109, __extension__ __PRETTY_FUNCTION__)) | ||||
5109 | "Expecting i64 or i32 in PPCISD::SRA_ADDZE")(static_cast <bool> (N->getValueType(0) == MVT::i32 && "Expecting i64 or i32 in PPCISD::SRA_ADDZE") ? void (0) : __assert_fail ("N->getValueType(0) == MVT::i32 && \"Expecting i64 or i32 in PPCISD::SRA_ADDZE\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5109, __extension__ __PRETTY_FUNCTION__)); | ||||
5110 | SDNode *Op = | ||||
5111 | CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, | ||||
5112 | N0, ShiftAmt); | ||||
5113 | CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), | ||||
5114 | SDValue(Op, 1)); | ||||
5115 | return; | ||||
5116 | } | ||||
5117 | } | ||||
5118 | |||||
5119 | case ISD::STORE: { | ||||
5120 | // Change TLS initial-exec D-form stores to X-form stores. | ||||
5121 | StoreSDNode *ST = cast<StoreSDNode>(N); | ||||
5122 | if (EnableTLSOpt && Subtarget->isELFv2ABI() && | ||||
5123 | ST->getAddressingMode() != ISD::PRE_INC) | ||||
5124 | if (tryTLSXFormStore(ST)) | ||||
5125 | return; | ||||
5126 | break; | ||||
5127 | } | ||||
5128 | case ISD::LOAD: { | ||||
5129 | // Handle preincrement loads. | ||||
5130 | LoadSDNode *LD = cast<LoadSDNode>(N); | ||||
5131 | EVT LoadedVT = LD->getMemoryVT(); | ||||
5132 | |||||
5133 | // Normal loads are handled by code generated from the .td file. | ||||
5134 | if (LD->getAddressingMode() != ISD::PRE_INC) { | ||||
5135 | // Change TLS initial-exec D-form loads to X-form loads. | ||||
5136 | if (EnableTLSOpt && Subtarget->isELFv2ABI()) | ||||
5137 | if (tryTLSXFormLoad(LD)) | ||||
5138 | return; | ||||
5139 | break; | ||||
5140 | } | ||||
5141 | |||||
5142 | SDValue Offset = LD->getOffset(); | ||||
5143 | if (Offset.getOpcode() == ISD::TargetConstant || | ||||
5144 | Offset.getOpcode() == ISD::TargetGlobalAddress) { | ||||
5145 | |||||
5146 | unsigned Opcode; | ||||
5147 | bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; | ||||
5148 | if (LD->getValueType(0) != MVT::i64) { | ||||
5149 | // Handle PPC32 integer and normal FP loads. | ||||
5150 | assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5150, __extension__ __PRETTY_FUNCTION__)); | ||||
5151 | switch (LoadedVT.getSimpleVT().SimpleTy) { | ||||
5152 | default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5152); | ||||
5153 | case MVT::f64: Opcode = PPC::LFDU; break; | ||||
5154 | case MVT::f32: Opcode = PPC::LFSU; break; | ||||
5155 | case MVT::i32: Opcode = PPC::LWZU; break; | ||||
5156 | case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; | ||||
5157 | case MVT::i1: | ||||
5158 | case MVT::i8: Opcode = PPC::LBZU; break; | ||||
5159 | } | ||||
5160 | } else { | ||||
5161 | assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!")(static_cast <bool> (LD->getValueType(0) == MVT::i64 && "Unknown load result type!") ? void (0) : __assert_fail ("LD->getValueType(0) == MVT::i64 && \"Unknown load result type!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5161, __extension__ __PRETTY_FUNCTION__)); | ||||
5162 | assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5162, __extension__ __PRETTY_FUNCTION__)); | ||||
5163 | switch (LoadedVT.getSimpleVT().SimpleTy) { | ||||
5164 | default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5164); | ||||
5165 | case MVT::i64: Opcode = PPC::LDU; break; | ||||
5166 | case MVT::i32: Opcode = PPC::LWZU8; break; | ||||
5167 | case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; | ||||
5168 | case MVT::i1: | ||||
5169 | case MVT::i8: Opcode = PPC::LBZU8; break; | ||||
5170 | } | ||||
5171 | } | ||||
5172 | |||||
5173 | SDValue Chain = LD->getChain(); | ||||
5174 | SDValue Base = LD->getBasePtr(); | ||||
5175 | SDValue Ops[] = { Offset, Base, Chain }; | ||||
5176 | SDNode *MN = CurDAG->getMachineNode( | ||||
5177 | Opcode, dl, LD->getValueType(0), | ||||
5178 | PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); | ||||
5179 | transferMemOperands(N, MN); | ||||
5180 | ReplaceNode(N, MN); | ||||
5181 | return; | ||||
5182 | } else { | ||||
5183 | unsigned Opcode; | ||||
5184 | bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; | ||||
5185 | if (LD->getValueType(0) != MVT::i64) { | ||||
5186 | // Handle PPC32 integer and normal FP loads. | ||||
5187 | assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16) && \"Invalid sext update load\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5187, __extension__ __PRETTY_FUNCTION__)); | ||||
5188 | switch (LoadedVT.getSimpleVT().SimpleTy) { | ||||
5189 | default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5189); | ||||
5190 | case MVT::f64: Opcode = PPC::LFDUX; break; | ||||
5191 | case MVT::f32: Opcode = PPC::LFSUX; break; | ||||
5192 | case MVT::i32: Opcode = PPC::LWZUX; break; | ||||
5193 | case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; | ||||
5194 | case MVT::i1: | ||||
5195 | case MVT::i8: Opcode = PPC::LBZUX; break; | ||||
5196 | } | ||||
5197 | } else { | ||||
5198 | assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!")(static_cast <bool> (LD->getValueType(0) == MVT::i64 && "Unknown load result type!") ? void (0) : __assert_fail ("LD->getValueType(0) == MVT::i64 && \"Unknown load result type!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5198, __extension__ __PRETTY_FUNCTION__)); | ||||
5199 | assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && "Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && \"Invalid sext update load\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5200, __extension__ __PRETTY_FUNCTION__)) | ||||
5200 | "Invalid sext update load")(static_cast <bool> ((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && "Invalid sext update load") ? void (0) : __assert_fail ("(!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && \"Invalid sext update load\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5200, __extension__ __PRETTY_FUNCTION__)); | ||||
5201 | switch (LoadedVT.getSimpleVT().SimpleTy) { | ||||
5202 | default: llvm_unreachable("Invalid PPC load type!")::llvm::llvm_unreachable_internal("Invalid PPC load type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5202); | ||||
5203 | case MVT::i64: Opcode = PPC::LDUX; break; | ||||
5204 | case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; | ||||
5205 | case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; | ||||
5206 | case MVT::i1: | ||||
5207 | case MVT::i8: Opcode = PPC::LBZUX8; break; | ||||
5208 | } | ||||
5209 | } | ||||
5210 | |||||
5211 | SDValue Chain = LD->getChain(); | ||||
5212 | SDValue Base = LD->getBasePtr(); | ||||
5213 | SDValue Ops[] = { Base, Offset, Chain }; | ||||
5214 | SDNode *MN = CurDAG->getMachineNode( | ||||
5215 | Opcode, dl, LD->getValueType(0), | ||||
5216 | PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); | ||||
5217 | transferMemOperands(N, MN); | ||||
5218 | ReplaceNode(N, MN); | ||||
5219 | return; | ||||
5220 | } | ||||
5221 | } | ||||
5222 | |||||
5223 | case ISD::AND: | ||||
5224 | // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr | ||||
5225 | if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || | ||||
5226 | tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) | ||||
5227 | return; | ||||
5228 | |||||
5229 | // Other cases are autogenerated. | ||||
5230 | break; | ||||
5231 | case ISD::OR: { | ||||
5232 | if (N->getValueType(0) == MVT::i32) | ||||
5233 | if (tryBitfieldInsert(N)) | ||||
5234 | return; | ||||
5235 | |||||
5236 | int16_t Imm; | ||||
5237 | if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && | ||||
5238 | isIntS16Immediate(N->getOperand(1), Imm)) { | ||||
5239 | KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0)); | ||||
5240 | |||||
5241 | // If this is equivalent to an add, then we can fold it with the | ||||
5242 | // FrameIndex calculation. | ||||
5243 | if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { | ||||
5244 | selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); | ||||
5245 | return; | ||||
5246 | } | ||||
5247 | } | ||||
5248 | |||||
5249 | // If this is 'or' against an imm with consecutive ones and both sides zero, | ||||
5250 | // try to emit rldimi | ||||
5251 | if (tryAsSingleRLDIMI(N)) | ||||
5252 | return; | ||||
5253 | |||||
5254 | // OR with a 32-bit immediate can be handled by ori + oris | ||||
5255 | // without creating an immediate in a GPR. | ||||
5256 | uint64_t Imm64 = 0; | ||||
5257 | bool IsPPC64 = Subtarget->isPPC64(); | ||||
5258 | if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && | ||||
5259 | (Imm64 & ~0xFFFFFFFFuLL) == 0) { | ||||
5260 | // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. | ||||
5261 | uint64_t ImmHi = Imm64 >> 16; | ||||
5262 | uint64_t ImmLo = Imm64 & 0xFFFF; | ||||
5263 | if (ImmHi != 0 && ImmLo != 0) { | ||||
5264 | SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, | ||||
5265 | N->getOperand(0), | ||||
5266 | getI16Imm(ImmLo, dl)); | ||||
5267 | SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; | ||||
5268 | CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); | ||||
5269 | return; | ||||
5270 | } | ||||
5271 | } | ||||
5272 | |||||
5273 | // Other cases are autogenerated. | ||||
5274 | break; | ||||
5275 | } | ||||
5276 | case ISD::XOR: { | ||||
5277 | // XOR with a 32-bit immediate can be handled by xori + xoris | ||||
5278 | // without creating an immediate in a GPR. | ||||
5279 | uint64_t Imm64 = 0; | ||||
5280 | bool IsPPC64 = Subtarget->isPPC64(); | ||||
5281 | if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && | ||||
5282 | (Imm64 & ~0xFFFFFFFFuLL) == 0) { | ||||
5283 | // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. | ||||
5284 | uint64_t ImmHi = Imm64 >> 16; | ||||
5285 | uint64_t ImmLo = Imm64 & 0xFFFF; | ||||
5286 | if (ImmHi != 0 && ImmLo != 0) { | ||||
5287 | SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, | ||||
5288 | N->getOperand(0), | ||||
5289 | getI16Imm(ImmLo, dl)); | ||||
5290 | SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; | ||||
5291 | CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); | ||||
5292 | return; | ||||
5293 | } | ||||
5294 | } | ||||
5295 | |||||
5296 | break; | ||||
5297 | } | ||||
5298 | case ISD::ADD: { | ||||
5299 | int16_t Imm; | ||||
5300 | if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && | ||||
5301 | isIntS16Immediate(N->getOperand(1), Imm)) { | ||||
5302 | selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); | ||||
5303 | return; | ||||
5304 | } | ||||
5305 | |||||
5306 | break; | ||||
5307 | } | ||||
5308 | case ISD::SHL: { | ||||
5309 | unsigned Imm, SH, MB, ME; | ||||
5310 | if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && | ||||
5311 | isRotateAndMask(N, Imm, true, SH, MB, ME)) { | ||||
5312 | SDValue Ops[] = { N->getOperand(0).getOperand(0), | ||||
5313 | getI32Imm(SH, dl), getI32Imm(MB, dl), | ||||
5314 | getI32Imm(ME, dl) }; | ||||
5315 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
5316 | return; | ||||
5317 | } | ||||
5318 | |||||
5319 | // Other cases are autogenerated. | ||||
5320 | break; | ||||
5321 | } | ||||
5322 | case ISD::SRL: { | ||||
5323 | unsigned Imm, SH, MB, ME; | ||||
5324 | if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && | ||||
5325 | isRotateAndMask(N, Imm, true, SH, MB, ME)) { | ||||
5326 | SDValue Ops[] = { N->getOperand(0).getOperand(0), | ||||
5327 | getI32Imm(SH, dl), getI32Imm(MB, dl), | ||||
5328 | getI32Imm(ME, dl) }; | ||||
5329 | CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); | ||||
5330 | return; | ||||
5331 | } | ||||
5332 | |||||
5333 | // Other cases are autogenerated. | ||||
5334 | break; | ||||
5335 | } | ||||
5336 | case ISD::MUL: { | ||||
5337 | SDValue Op1 = N->getOperand(1); | ||||
5338 | if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64) | ||||
5339 | break; | ||||
5340 | |||||
5341 | // If the multiplier fits int16, we can handle it with mulli. | ||||
5342 | int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue(); | ||||
5343 | unsigned Shift = countTrailingZeros<uint64_t>(Imm); | ||||
5344 | if (isInt<16>(Imm) || !Shift) | ||||
5345 | break; | ||||
5346 | |||||
5347 | // If the shifted value fits int16, we can do this transformation: | ||||
5348 | // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to | ||||
5349 | // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2). | ||||
5350 | uint64_t ImmSh = Imm >> Shift; | ||||
5351 | if (isInt<16>(ImmSh)) { | ||||
5352 | uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); | ||||
5353 | SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); | ||||
5354 | SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64, | ||||
5355 | N->getOperand(0), SDImm); | ||||
5356 | CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0), | ||||
5357 | getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl)); | ||||
5358 | return; | ||||
5359 | } | ||||
5360 | break; | ||||
5361 | } | ||||
5362 | // FIXME: Remove this once the ANDI glue bug is fixed: | ||||
5363 | case PPCISD::ANDI_rec_1_EQ_BIT: | ||||
5364 | case PPCISD::ANDI_rec_1_GT_BIT: { | ||||
5365 | if (!ANDIGlueBug) | ||||
5366 | break; | ||||
5367 | |||||
5368 | EVT InVT = N->getOperand(0).getValueType(); | ||||
5369 | assert((InVT == MVT::i64 || InVT == MVT::i32) &&(static_cast <bool> ((InVT == MVT::i64 || InVT == MVT:: i32) && "Invalid input type for ANDI_rec_1_EQ_BIT") ? void (0) : __assert_fail ("(InVT == MVT::i64 || InVT == MVT::i32) && \"Invalid input type for ANDI_rec_1_EQ_BIT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5370, __extension__ __PRETTY_FUNCTION__)) | ||||
5370 | "Invalid input type for ANDI_rec_1_EQ_BIT")(static_cast <bool> ((InVT == MVT::i64 || InVT == MVT:: i32) && "Invalid input type for ANDI_rec_1_EQ_BIT") ? void (0) : __assert_fail ("(InVT == MVT::i64 || InVT == MVT::i32) && \"Invalid input type for ANDI_rec_1_EQ_BIT\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5370, __extension__ __PRETTY_FUNCTION__)); | ||||
5371 | |||||
5372 | unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec; | ||||
5373 | SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, | ||||
5374 | N->getOperand(0), | ||||
5375 | CurDAG->getTargetConstant(1, dl, InVT)), | ||||
5376 | 0); | ||||
5377 | SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); | ||||
5378 | SDValue SRIdxVal = CurDAG->getTargetConstant( | ||||
5379 | N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt, | ||||
5380 | dl, MVT::i32); | ||||
5381 | |||||
5382 | CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, | ||||
5383 | SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); | ||||
5384 | return; | ||||
5385 | } | ||||
5386 | case ISD::SELECT_CC: { | ||||
5387 | ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); | ||||
5388 | EVT PtrVT = | ||||
5389 | CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); | ||||
5390 | bool isPPC64 = (PtrVT == MVT::i64); | ||||
5391 | |||||
5392 | // If this is a select of i1 operands, we'll pattern match it. | ||||
5393 | if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1) | ||||
5394 | break; | ||||
5395 | |||||
5396 | if (Subtarget->isISA3_0() && Subtarget->isPPC64()) { | ||||
5397 | bool NeedSwapOps = false; | ||||
5398 | bool IsUnCmp = false; | ||||
5399 | if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { | ||||
5400 | SDValue LHS = N->getOperand(0); | ||||
5401 | SDValue RHS = N->getOperand(1); | ||||
5402 | if (NeedSwapOps) | ||||
5403 | std::swap(LHS, RHS); | ||||
5404 | |||||
5405 | // Make use of SelectCC to generate the comparison to set CR bits, for | ||||
5406 | // equality comparisons having one literal operand, SelectCC probably | ||||
5407 | // doesn't need to materialize the whole literal and just use xoris to | ||||
5408 | // check it first, it leads the following comparison result can't | ||||
5409 | // exactly represent GT/LT relationship. So to avoid this we specify | ||||
5410 | // SETGT/SETUGT here instead of SETEQ. | ||||
5411 | SDValue GenCC = | ||||
5412 | SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); | ||||
5413 | CurDAG->SelectNodeTo( | ||||
5414 | N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, | ||||
5415 | N->getValueType(0), GenCC); | ||||
5416 | NumP9Setb++; | ||||
5417 | return; | ||||
5418 | } | ||||
5419 | } | ||||
5420 | |||||
5421 | // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc | ||||
5422 | if (!isPPC64) | ||||
5423 | if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) | ||||
5424 | if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) | ||||
5425 | if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) | ||||
5426 | if (N1C->isNullValue() && N3C->isNullValue() && | ||||
5427 | N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && | ||||
5428 | // FIXME: Implement this optzn for PPC64. | ||||
5429 | N->getValueType(0) == MVT::i32) { | ||||
5430 | SDNode *Tmp = | ||||
5431 | CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, | ||||
5432 | N->getOperand(0), getI32Imm(~0U, dl)); | ||||
5433 | CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), | ||||
5434 | N->getOperand(0), SDValue(Tmp, 1)); | ||||
5435 | return; | ||||
5436 | } | ||||
5437 | |||||
5438 | SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); | ||||
5439 | |||||
5440 | if (N->getValueType(0) == MVT::i1) { | ||||
5441 | // An i1 select is: (c & t) | (!c & f). | ||||
5442 | bool Inv; | ||||
5443 | unsigned Idx = getCRIdxForSetCC(CC, Inv); | ||||
5444 | |||||
5445 | unsigned SRI; | ||||
5446 | switch (Idx) { | ||||
5447 | default: llvm_unreachable("Invalid CC index")::llvm::llvm_unreachable_internal("Invalid CC index", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5447); | ||||
5448 | case 0: SRI = PPC::sub_lt; break; | ||||
5449 | case 1: SRI = PPC::sub_gt; break; | ||||
5450 | case 2: SRI = PPC::sub_eq; break; | ||||
5451 | case 3: SRI = PPC::sub_un; break; | ||||
5452 | } | ||||
5453 | |||||
5454 | SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); | ||||
5455 | |||||
5456 | SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, | ||||
5457 | CCBit, CCBit), 0); | ||||
5458 | SDValue C = Inv ? NotCCBit : CCBit, | ||||
5459 | NotC = Inv ? CCBit : NotCCBit; | ||||
5460 | |||||
5461 | SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, | ||||
5462 | C, N->getOperand(2)), 0); | ||||
5463 | SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, | ||||
5464 | NotC, N->getOperand(3)), 0); | ||||
5465 | |||||
5466 | CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); | ||||
5467 | return; | ||||
5468 | } | ||||
5469 | |||||
5470 | unsigned BROpc = | ||||
5471 | getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget); | ||||
5472 | |||||
5473 | unsigned SelectCCOp; | ||||
5474 | if (N->getValueType(0) == MVT::i32) | ||||
5475 | SelectCCOp = PPC::SELECT_CC_I4; | ||||
5476 | else if (N->getValueType(0) == MVT::i64) | ||||
5477 | SelectCCOp = PPC::SELECT_CC_I8; | ||||
5478 | else if (N->getValueType(0) == MVT::f32) { | ||||
5479 | if (Subtarget->hasP8Vector()) | ||||
5480 | SelectCCOp = PPC::SELECT_CC_VSSRC; | ||||
5481 | else if (Subtarget->hasSPE()) | ||||
5482 | SelectCCOp = PPC::SELECT_CC_SPE4; | ||||
5483 | else | ||||
5484 | SelectCCOp = PPC::SELECT_CC_F4; | ||||
5485 | } else if (N->getValueType(0) == MVT::f64) { | ||||
5486 | if (Subtarget->hasVSX()) | ||||
5487 | SelectCCOp = PPC::SELECT_CC_VSFRC; | ||||
5488 | else if (Subtarget->hasSPE()) | ||||
5489 | SelectCCOp = PPC::SELECT_CC_SPE; | ||||
5490 | else | ||||
5491 | SelectCCOp = PPC::SELECT_CC_F8; | ||||
5492 | } else if (N->getValueType(0) == MVT::f128) | ||||
5493 | SelectCCOp = PPC::SELECT_CC_F16; | ||||
5494 | else if (Subtarget->hasSPE()) | ||||
5495 | SelectCCOp = PPC::SELECT_CC_SPE; | ||||
5496 | else if (N->getValueType(0) == MVT::v2f64 || | ||||
5497 | N->getValueType(0) == MVT::v2i64) | ||||
5498 | SelectCCOp = PPC::SELECT_CC_VSRC; | ||||
5499 | else | ||||
5500 | SelectCCOp = PPC::SELECT_CC_VRRC; | ||||
5501 | |||||
5502 | SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), | ||||
5503 | getI32Imm(BROpc, dl) }; | ||||
5504 | CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); | ||||
5505 | return; | ||||
5506 | } | ||||
5507 | case ISD::VECTOR_SHUFFLE: | ||||
5508 | if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || | ||||
5509 | N->getValueType(0) == MVT::v2i64)) { | ||||
5510 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); | ||||
5511 | |||||
5512 | SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), | ||||
5513 | Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); | ||||
5514 | unsigned DM[2]; | ||||
5515 | |||||
5516 | for (int i = 0; i < 2; ++i) | ||||
5517 | if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) | ||||
5518 | DM[i] = 0; | ||||
5519 | else | ||||
5520 | DM[i] = 1; | ||||
5521 | |||||
5522 | if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && | ||||
5523 | Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && | ||||
5524 | isa<LoadSDNode>(Op1.getOperand(0))) { | ||||
5525 | LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0)); | ||||
5526 | SDValue Base, Offset; | ||||
5527 | |||||
5528 | if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && | ||||
5529 | (LD->getMemoryVT() == MVT::f64 || | ||||
5530 | LD->getMemoryVT() == MVT::i64) && | ||||
5531 | SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { | ||||
5532 | SDValue Chain = LD->getChain(); | ||||
5533 | SDValue Ops[] = { Base, Offset, Chain }; | ||||
5534 | MachineMemOperand *MemOp = LD->getMemOperand(); | ||||
5535 | SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, | ||||
5536 | N->getValueType(0), Ops); | ||||
5537 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp}); | ||||
5538 | return; | ||||
5539 | } | ||||
5540 | } | ||||
5541 | |||||
5542 | // For little endian, we must swap the input operands and adjust | ||||
5543 | // the mask elements (reverse and invert them). | ||||
5544 | if (Subtarget->isLittleEndian()) { | ||||
5545 | std::swap(Op1, Op2); | ||||
5546 | unsigned tmp = DM[0]; | ||||
5547 | DM[0] = 1 - DM[1]; | ||||
5548 | DM[1] = 1 - tmp; | ||||
5549 | } | ||||
5550 | |||||
5551 | SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, | ||||
5552 | MVT::i32); | ||||
5553 | SDValue Ops[] = { Op1, Op2, DMV }; | ||||
5554 | CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); | ||||
5555 | return; | ||||
5556 | } | ||||
5557 | |||||
5558 | break; | ||||
5559 | case PPCISD::BDNZ: | ||||
5560 | case PPCISD::BDZ: { | ||||
5561 | bool IsPPC64 = Subtarget->isPPC64(); | ||||
5562 | SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; | ||||
5563 | CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ | ||||
5564 | ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) | ||||
5565 | : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), | ||||
5566 | MVT::Other, Ops); | ||||
5567 | return; | ||||
5568 | } | ||||
5569 | case PPCISD::COND_BRANCH: { | ||||
5570 | // Op #0 is the Chain. | ||||
5571 | // Op #1 is the PPC::PRED_* number. | ||||
5572 | // Op #2 is the CR# | ||||
5573 | // Op #3 is the Dest MBB | ||||
5574 | // Op #4 is the Flag. | ||||
5575 | // Prevent PPC::PRED_* from being selected into LI. | ||||
5576 | unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); | ||||
5577 | if (EnableBranchHint) | ||||
5578 | PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3)); | ||||
5579 | |||||
5580 | SDValue Pred = getI32Imm(PCC, dl); | ||||
5581 | SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), | ||||
5582 | N->getOperand(0), N->getOperand(4) }; | ||||
5583 | CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); | ||||
5584 | return; | ||||
5585 | } | ||||
5586 | case ISD::BR_CC: { | ||||
5587 | if (tryFoldSWTestBRCC(N)) | ||||
5588 | return; | ||||
5589 | ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); | ||||
5590 | unsigned PCC = | ||||
5591 | getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget); | ||||
5592 | |||||
5593 | if (N->getOperand(2).getValueType() == MVT::i1) { | ||||
5594 | unsigned Opc; | ||||
5595 | bool Swap; | ||||
5596 | switch (PCC) { | ||||
5597 | default: llvm_unreachable("Unexpected Boolean-operand predicate")::llvm::llvm_unreachable_internal("Unexpected Boolean-operand predicate" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5597); | ||||
5598 | case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; | ||||
5599 | case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; | ||||
5600 | case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; | ||||
5601 | case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; | ||||
5602 | case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; | ||||
5603 | case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; | ||||
5604 | } | ||||
5605 | |||||
5606 | // A signed comparison of i1 values produces the opposite result to an | ||||
5607 | // unsigned one if the condition code includes less-than or greater-than. | ||||
5608 | // This is because 1 is the most negative signed i1 number and the most | ||||
5609 | // positive unsigned i1 number. The CR-logical operations used for such | ||||
5610 | // comparisons are non-commutative so for signed comparisons vs. unsigned | ||||
5611 | // ones, the input operands just need to be swapped. | ||||
5612 | if (ISD::isSignedIntSetCC(CC)) | ||||
5613 | Swap = !Swap; | ||||
5614 | |||||
5615 | SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, | ||||
5616 | N->getOperand(Swap ? 3 : 2), | ||||
5617 | N->getOperand(Swap ? 2 : 3)), 0); | ||||
5618 | CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), | ||||
5619 | N->getOperand(0)); | ||||
5620 | return; | ||||
5621 | } | ||||
5622 | |||||
5623 | if (EnableBranchHint) | ||||
5624 | PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4)); | ||||
5625 | |||||
5626 | SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); | ||||
5627 | SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, | ||||
5628 | N->getOperand(4), N->getOperand(0) }; | ||||
5629 | CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); | ||||
5630 | return; | ||||
5631 | } | ||||
5632 | case ISD::BRIND: { | ||||
5633 | // FIXME: Should custom lower this. | ||||
5634 | SDValue Chain = N->getOperand(0); | ||||
5635 | SDValue Target = N->getOperand(1); | ||||
5636 | unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; | ||||
5637 | unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; | ||||
5638 | Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, | ||||
5639 | Chain), 0); | ||||
5640 | CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); | ||||
5641 | return; | ||||
5642 | } | ||||
5643 | case PPCISD::TOC_ENTRY: { | ||||
5644 | const bool isPPC64 = Subtarget->isPPC64(); | ||||
5645 | const bool isELFABI = Subtarget->isSVR4ABI(); | ||||
5646 | const bool isAIXABI = Subtarget->isAIXABI(); | ||||
5647 | |||||
5648 | // PowerPC only support small, medium and large code model. | ||||
5649 | const CodeModel::Model CModel = TM.getCodeModel(); | ||||
5650 | assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&(static_cast <bool> (!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && "PowerPC doesn't support tiny or kernel code models." ) ? void (0) : __assert_fail ("!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && \"PowerPC doesn't support tiny or kernel code models.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5651, __extension__ __PRETTY_FUNCTION__)) | ||||
5651 | "PowerPC doesn't support tiny or kernel code models.")(static_cast <bool> (!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && "PowerPC doesn't support tiny or kernel code models." ) ? void (0) : __assert_fail ("!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && \"PowerPC doesn't support tiny or kernel code models.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5651, __extension__ __PRETTY_FUNCTION__)); | ||||
5652 | |||||
5653 | if (isAIXABI && CModel == CodeModel::Medium) | ||||
5654 | report_fatal_error("Medium code model is not supported on AIX."); | ||||
5655 | |||||
5656 | // For 64-bit small code model, we allow SelectCodeCommon to handle this, | ||||
5657 | // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. | ||||
5658 | if (isPPC64 && CModel == CodeModel::Small) | ||||
5659 | break; | ||||
5660 | |||||
5661 | // Handle 32-bit small code model. | ||||
5662 | if (!isPPC64) { | ||||
5663 | // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either | ||||
5664 | // PPC::ADDItoc, or PPC::LWZtoc | ||||
5665 | auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry) { | ||||
5666 | SDValue GA = TocEntry->getOperand(0); | ||||
5667 | SDValue TocBase = TocEntry->getOperand(1); | ||||
5668 | SDNode *MN = CurDAG->getMachineNode(OpCode, dl, MVT::i32, GA, TocBase); | ||||
5669 | transferMemOperands(TocEntry, MN); | ||||
5670 | ReplaceNode(TocEntry, MN); | ||||
5671 | }; | ||||
5672 | |||||
5673 | if (isELFABI) { | ||||
5674 | assert(TM.isPositionIndependent() &&(static_cast <bool> (TM.isPositionIndependent() && "32-bit ELF can only have TOC entries in position independent" " code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5676, __extension__ __PRETTY_FUNCTION__)) | ||||
5675 | "32-bit ELF can only have TOC entries in position independent"(static_cast <bool> (TM.isPositionIndependent() && "32-bit ELF can only have TOC entries in position independent" " code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5676, __extension__ __PRETTY_FUNCTION__)) | ||||
5676 | " code.")(static_cast <bool> (TM.isPositionIndependent() && "32-bit ELF can only have TOC entries in position independent" " code.") ? void (0) : __assert_fail ("TM.isPositionIndependent() && \"32-bit ELF can only have TOC entries in position independent\" \" code.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5676, __extension__ __PRETTY_FUNCTION__)); | ||||
5677 | // 32-bit ELF always uses a small code model toc access. | ||||
5678 | replaceWith(PPC::LWZtoc, N); | ||||
5679 | return; | ||||
5680 | } | ||||
5681 | |||||
5682 | if (isAIXABI && CModel == CodeModel::Small) { | ||||
5683 | if (hasTocDataAttr(N->getOperand(0), | ||||
5684 | CurDAG->getDataLayout().getPointerSize())) | ||||
5685 | replaceWith(PPC::ADDItoc, N); | ||||
5686 | else | ||||
5687 | replaceWith(PPC::LWZtoc, N); | ||||
5688 | |||||
5689 | return; | ||||
5690 | } | ||||
5691 | } | ||||
5692 | |||||
5693 | assert(CModel != CodeModel::Small && "All small code models handled.")(static_cast <bool> (CModel != CodeModel::Small && "All small code models handled.") ? void (0) : __assert_fail ("CModel != CodeModel::Small && \"All small code models handled.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5693, __extension__ __PRETTY_FUNCTION__)); | ||||
5694 | |||||
5695 | assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"(static_cast <bool> ((isPPC64 || (isAIXABI && ! isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following." ) ? void (0) : __assert_fail ("(isPPC64 || (isAIXABI && !isPPC64)) && \"We are dealing with 64-bit\" \" ELF/AIX or 32-bit AIX in the following.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5696, __extension__ __PRETTY_FUNCTION__)) | ||||
5696 | " ELF/AIX or 32-bit AIX in the following.")(static_cast <bool> ((isPPC64 || (isAIXABI && ! isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following." ) ? void (0) : __assert_fail ("(isPPC64 || (isAIXABI && !isPPC64)) && \"We are dealing with 64-bit\" \" ELF/AIX or 32-bit AIX in the following.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5696, __extension__ __PRETTY_FUNCTION__)); | ||||
5697 | |||||
5698 | // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode | ||||
5699 | // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We | ||||
5700 | // generate two instructions as described below. The first source operand | ||||
5701 | // is a symbol reference. If it must be toc-referenced according to | ||||
5702 | // Subtarget, we generate: | ||||
5703 | // [32-bit AIX] | ||||
5704 | // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) | ||||
5705 | // [64-bit ELF/AIX] | ||||
5706 | // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) | ||||
5707 | // Otherwise we generate: | ||||
5708 | // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) | ||||
5709 | SDValue GA = N->getOperand(0); | ||||
5710 | SDValue TOCbase = N->getOperand(1); | ||||
5711 | |||||
5712 | EVT VT = isPPC64 ? MVT::i64 : MVT::i32; | ||||
5713 | SDNode *Tmp = CurDAG->getMachineNode( | ||||
5714 | isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA); | ||||
5715 | |||||
5716 | if (PPCLowering->isAccessedAsGotIndirect(GA)) { | ||||
5717 | // If it is accessed as got-indirect, we need an extra LWZ/LD to load | ||||
5718 | // the address. | ||||
5719 | SDNode *MN = CurDAG->getMachineNode( | ||||
5720 | isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0)); | ||||
5721 | |||||
5722 | transferMemOperands(N, MN); | ||||
5723 | ReplaceNode(N, MN); | ||||
5724 | return; | ||||
5725 | } | ||||
5726 | |||||
5727 | // Build the address relative to the TOC-pointer. | ||||
5728 | ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, | ||||
5729 | SDValue(Tmp, 0), GA)); | ||||
5730 | return; | ||||
5731 | } | ||||
5732 | case PPCISD::PPC32_PICGOT: | ||||
5733 | // Generate a PIC-safe GOT reference. | ||||
5734 | assert(Subtarget->is32BitELFABI() &&(static_cast <bool> (Subtarget->is32BitELFABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4") ? void (0) : __assert_fail ("Subtarget->is32BitELFABI() && \"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5735, __extension__ __PRETTY_FUNCTION__)) | ||||
5735 | "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4")(static_cast <bool> (Subtarget->is32BitELFABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4") ? void (0) : __assert_fail ("Subtarget->is32BitELFABI() && \"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5735, __extension__ __PRETTY_FUNCTION__)); | ||||
5736 | CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, | ||||
5737 | PPCLowering->getPointerTy(CurDAG->getDataLayout()), | ||||
5738 | MVT::i32); | ||||
5739 | return; | ||||
5740 | |||||
5741 | case PPCISD::VADD_SPLAT: { | ||||
5742 | // This expands into one of three sequences, depending on whether | ||||
5743 | // the first operand is odd or even, positive or negative. | ||||
5744 | assert(isa<ConstantSDNode>(N->getOperand(0)) &&(static_cast <bool> (isa<ConstantSDNode>(N->getOperand (0)) && isa<ConstantSDNode>(N->getOperand(1) ) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5746, __extension__ __PRETTY_FUNCTION__)) | ||||
5745 | isa<ConstantSDNode>(N->getOperand(1)) &&(static_cast <bool> (isa<ConstantSDNode>(N->getOperand (0)) && isa<ConstantSDNode>(N->getOperand(1) ) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5746, __extension__ __PRETTY_FUNCTION__)) | ||||
5746 | "Invalid operand on VADD_SPLAT!")(static_cast <bool> (isa<ConstantSDNode>(N->getOperand (0)) && isa<ConstantSDNode>(N->getOperand(1) ) && "Invalid operand on VADD_SPLAT!") ? void (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(0)) && isa<ConstantSDNode>(N->getOperand(1)) && \"Invalid operand on VADD_SPLAT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5746, __extension__ __PRETTY_FUNCTION__)); | ||||
5747 | |||||
5748 | int Elt = N->getConstantOperandVal(0); | ||||
5749 | int EltSize = N->getConstantOperandVal(1); | ||||
5750 | unsigned Opc1, Opc2, Opc3; | ||||
5751 | EVT VT; | ||||
5752 | |||||
5753 | if (EltSize == 1) { | ||||
5754 | Opc1 = PPC::VSPLTISB; | ||||
5755 | Opc2 = PPC::VADDUBM; | ||||
5756 | Opc3 = PPC::VSUBUBM; | ||||
5757 | VT = MVT::v16i8; | ||||
5758 | } else if (EltSize == 2) { | ||||
5759 | Opc1 = PPC::VSPLTISH; | ||||
5760 | Opc2 = PPC::VADDUHM; | ||||
5761 | Opc3 = PPC::VSUBUHM; | ||||
5762 | VT = MVT::v8i16; | ||||
5763 | } else { | ||||
5764 | assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!")(static_cast <bool> (EltSize == 4 && "Invalid element size on VADD_SPLAT!" ) ? void (0) : __assert_fail ("EltSize == 4 && \"Invalid element size on VADD_SPLAT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5764, __extension__ __PRETTY_FUNCTION__)); | ||||
5765 | Opc1 = PPC::VSPLTISW; | ||||
5766 | Opc2 = PPC::VADDUWM; | ||||
5767 | Opc3 = PPC::VSUBUWM; | ||||
5768 | VT = MVT::v4i32; | ||||
5769 | } | ||||
5770 | |||||
5771 | if ((Elt & 1) == 0) { | ||||
5772 | // Elt is even, in the range [-32,-18] + [16,30]. | ||||
5773 | // | ||||
5774 | // Convert: VADD_SPLAT elt, size | ||||
5775 | // Into: tmp = VSPLTIS[BHW] elt | ||||
5776 | // VADDU[BHW]M tmp, tmp | ||||
5777 | // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 | ||||
5778 | SDValue EltVal = getI32Imm(Elt >> 1, dl); | ||||
5779 | SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); | ||||
5780 | SDValue TmpVal = SDValue(Tmp, 0); | ||||
5781 | ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); | ||||
5782 | return; | ||||
5783 | } else if (Elt > 0) { | ||||
5784 | // Elt is odd and positive, in the range [17,31]. | ||||
5785 | // | ||||
5786 | // Convert: VADD_SPLAT elt, size | ||||
5787 | // Into: tmp1 = VSPLTIS[BHW] elt-16 | ||||
5788 | // tmp2 = VSPLTIS[BHW] -16 | ||||
5789 | // VSUBU[BHW]M tmp1, tmp2 | ||||
5790 | SDValue EltVal = getI32Imm(Elt - 16, dl); | ||||
5791 | SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); | ||||
5792 | EltVal = getI32Imm(-16, dl); | ||||
5793 | SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); | ||||
5794 | ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), | ||||
5795 | SDValue(Tmp2, 0))); | ||||
5796 | return; | ||||
5797 | } else { | ||||
5798 | // Elt is odd and negative, in the range [-31,-17]. | ||||
5799 | // | ||||
5800 | // Convert: VADD_SPLAT elt, size | ||||
5801 | // Into: tmp1 = VSPLTIS[BHW] elt+16 | ||||
5802 | // tmp2 = VSPLTIS[BHW] -16 | ||||
5803 | // VADDU[BHW]M tmp1, tmp2 | ||||
5804 | SDValue EltVal = getI32Imm(Elt + 16, dl); | ||||
5805 | SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); | ||||
5806 | EltVal = getI32Imm(-16, dl); | ||||
5807 | SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); | ||||
5808 | ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), | ||||
5809 | SDValue(Tmp2, 0))); | ||||
5810 | return; | ||||
5811 | } | ||||
5812 | } | ||||
5813 | } | ||||
5814 | |||||
5815 | SelectCode(N); | ||||
5816 | } | ||||
5817 | |||||
5818 | // If the target supports the cmpb instruction, do the idiom recognition here. | ||||
5819 | // We don't do this as a DAG combine because we don't want to do it as nodes | ||||
5820 | // are being combined (because we might miss part of the eventual idiom). We | ||||
5821 | // don't want to do it during instruction selection because we want to reuse | ||||
5822 | // the logic for lowering the masking operations already part of the | ||||
5823 | // instruction selector. | ||||
5824 | SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { | ||||
5825 | SDLoc dl(N); | ||||
5826 | |||||
5827 | assert(N->getOpcode() == ISD::OR &&(static_cast <bool> (N->getOpcode() == ISD::OR && "Only OR nodes are supported for CMPB") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Only OR nodes are supported for CMPB\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5828, __extension__ __PRETTY_FUNCTION__)) | ||||
5828 | "Only OR nodes are supported for CMPB")(static_cast <bool> (N->getOpcode() == ISD::OR && "Only OR nodes are supported for CMPB") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Only OR nodes are supported for CMPB\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 5828, __extension__ __PRETTY_FUNCTION__)); | ||||
5829 | |||||
5830 | SDValue Res; | ||||
5831 | if (!Subtarget->hasCMPB()) | ||||
5832 | return Res; | ||||
5833 | |||||
5834 | if (N->getValueType(0) != MVT::i32 && | ||||
5835 | N->getValueType(0) != MVT::i64) | ||||
5836 | return Res; | ||||
5837 | |||||
5838 | EVT VT = N->getValueType(0); | ||||
5839 | |||||
5840 | SDValue RHS, LHS; | ||||
5841 | bool BytesFound[8] = {false, false, false, false, false, false, false, false}; | ||||
5842 | uint64_t Mask = 0, Alt = 0; | ||||
5843 | |||||
5844 | auto IsByteSelectCC = [this](SDValue O, unsigned &b, | ||||
5845 | uint64_t &Mask, uint64_t &Alt, | ||||
5846 | SDValue &LHS, SDValue &RHS) { | ||||
5847 | if (O.getOpcode() != ISD::SELECT_CC) | ||||
5848 | return false; | ||||
5849 | ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get(); | ||||
5850 | |||||
5851 | if (!isa<ConstantSDNode>(O.getOperand(2)) || | ||||
5852 | !isa<ConstantSDNode>(O.getOperand(3))) | ||||
5853 | return false; | ||||
5854 | |||||
5855 | uint64_t PM = O.getConstantOperandVal(2); | ||||
5856 | uint64_t PAlt = O.getConstantOperandVal(3); | ||||
5857 | for (b = 0; b < 8; ++b) { | ||||
5858 | uint64_t Mask = UINT64_C(0xFF)0xFFUL << (8*b); | ||||
5859 | if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) | ||||
5860 | break; | ||||
5861 | } | ||||
5862 | |||||
5863 | if (b == 8) | ||||
5864 | return false; | ||||
5865 | Mask |= PM; | ||||
5866 | Alt |= PAlt; | ||||
5867 | |||||
5868 | if (!isa<ConstantSDNode>(O.getOperand(1)) || | ||||
5869 | O.getConstantOperandVal(1) != 0) { | ||||
5870 | SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); | ||||
5871 | if (Op0.getOpcode() == ISD::TRUNCATE) | ||||
5872 | Op0 = Op0.getOperand(0); | ||||
5873 | if (Op1.getOpcode() == ISD::TRUNCATE) | ||||
5874 | Op1 = Op1.getOperand(0); | ||||
5875 | |||||
5876 | if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && | ||||
5877 | Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && | ||||
5878 | isa<ConstantSDNode>(Op0.getOperand(1))) { | ||||
5879 | |||||
5880 | unsigned Bits = Op0.getValueSizeInBits(); | ||||
5881 | if (b != Bits/8-1) | ||||
5882 | return false; | ||||
5883 | if (Op0.getConstantOperandVal(1) != Bits-8) | ||||
5884 | return false; | ||||
5885 | |||||
5886 | LHS = Op0.getOperand(0); | ||||
5887 | RHS = Op1.getOperand(0); | ||||
5888 | return true; | ||||
5889 | } | ||||
5890 | |||||
5891 | // When we have small integers (i16 to be specific), the form present | ||||
5892 | // post-legalization uses SETULT in the SELECT_CC for the | ||||
5893 | // higher-order byte, depending on the fact that the | ||||
5894 | // even-higher-order bytes are known to all be zero, for example: | ||||
5895 | // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult | ||||
5896 | // (so when the second byte is the same, because all higher-order | ||||
5897 | // bits from bytes 3 and 4 are known to be zero, the result of the | ||||
5898 | // xor can be at most 255) | ||||
5899 | if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && | ||||
5900 | isa<ConstantSDNode>(O.getOperand(1))) { | ||||
5901 | |||||
5902 | uint64_t ULim = O.getConstantOperandVal(1); | ||||
5903 | if (ULim != (UINT64_C(1)1UL << b*8)) | ||||
5904 | return false; | ||||
5905 | |||||
5906 | // Now we need to make sure that the upper bytes are known to be | ||||
5907 | // zero. | ||||
5908 | unsigned Bits = Op0.getValueSizeInBits(); | ||||
5909 | if (!CurDAG->MaskedValueIsZero( | ||||
5910 | Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) | ||||
5911 | return false; | ||||
5912 | |||||
5913 | LHS = Op0.getOperand(0); | ||||
5914 | RHS = Op0.getOperand(1); | ||||
5915 | return true; | ||||
5916 | } | ||||
5917 | |||||
5918 | return false; | ||||
5919 | } | ||||
5920 | |||||
5921 | if (CC != ISD::SETEQ) | ||||
5922 | return false; | ||||
5923 | |||||
5924 | SDValue Op = O.getOperand(0); | ||||
5925 | if (Op.getOpcode() == ISD::AND) { | ||||
5926 | if (!isa<ConstantSDNode>(Op.getOperand(1))) | ||||
5927 | return false; | ||||
5928 | if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF)0xFFUL << (8*b))) | ||||
5929 | return false; | ||||
5930 | |||||
5931 | SDValue XOR = Op.getOperand(0); | ||||
5932 | if (XOR.getOpcode() == ISD::TRUNCATE) | ||||
5933 | XOR = XOR.getOperand(0); | ||||
5934 | if (XOR.getOpcode() != ISD::XOR) | ||||
5935 | return false; | ||||
5936 | |||||
5937 | LHS = XOR.getOperand(0); | ||||
5938 | RHS = XOR.getOperand(1); | ||||
5939 | return true; | ||||
5940 | } else if (Op.getOpcode() == ISD::SRL) { | ||||
5941 | if (!isa<ConstantSDNode>(Op.getOperand(1))) | ||||
5942 | return false; | ||||
5943 | unsigned Bits = Op.getValueSizeInBits(); | ||||
5944 | if (b != Bits/8-1) | ||||
5945 | return false; | ||||
5946 | if (Op.getConstantOperandVal(1) != Bits-8) | ||||
5947 | return false; | ||||
5948 | |||||
5949 | SDValue XOR = Op.getOperand(0); | ||||
5950 | if (XOR.getOpcode() == ISD::TRUNCATE) | ||||
5951 | XOR = XOR.getOperand(0); | ||||
5952 | if (XOR.getOpcode() != ISD::XOR) | ||||
5953 | return false; | ||||
5954 | |||||
5955 | LHS = XOR.getOperand(0); | ||||
5956 | RHS = XOR.getOperand(1); | ||||
5957 | return true; | ||||
5958 | } | ||||
5959 | |||||
5960 | return false; | ||||
5961 | }; | ||||
5962 | |||||
5963 | SmallVector<SDValue, 8> Queue(1, SDValue(N, 0)); | ||||
5964 | while (!Queue.empty()) { | ||||
5965 | SDValue V = Queue.pop_back_val(); | ||||
5966 | |||||
5967 | for (const SDValue &O : V.getNode()->ops()) { | ||||
5968 | unsigned b = 0; | ||||
5969 | uint64_t M = 0, A = 0; | ||||
5970 | SDValue OLHS, ORHS; | ||||
5971 | if (O.getOpcode() == ISD::OR) { | ||||
5972 | Queue.push_back(O); | ||||
5973 | } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { | ||||
5974 | if (!LHS) { | ||||
5975 | LHS = OLHS; | ||||
5976 | RHS = ORHS; | ||||
5977 | BytesFound[b] = true; | ||||
5978 | Mask |= M; | ||||
5979 | Alt |= A; | ||||
5980 | } else if ((LHS == ORHS && RHS == OLHS) || | ||||
5981 | (RHS == ORHS && LHS == OLHS)) { | ||||
5982 | BytesFound[b] = true; | ||||
5983 | Mask |= M; | ||||
5984 | Alt |= A; | ||||
5985 | } else { | ||||
5986 | return Res; | ||||
5987 | } | ||||
5988 | } else { | ||||
5989 | return Res; | ||||
5990 | } | ||||
5991 | } | ||||
5992 | } | ||||
5993 | |||||
5994 | unsigned LastB = 0, BCnt = 0; | ||||
5995 | for (unsigned i = 0; i < 8; ++i) | ||||
5996 | if (BytesFound[LastB]) { | ||||
5997 | ++BCnt; | ||||
5998 | LastB = i; | ||||
5999 | } | ||||
6000 | |||||
6001 | if (!LastB || BCnt < 2) | ||||
6002 | return Res; | ||||
6003 | |||||
6004 | // Because we'll be zero-extending the output anyway if don't have a specific | ||||
6005 | // value for each input byte (via the Mask), we can 'anyext' the inputs. | ||||
6006 | if (LHS.getValueType() != VT) { | ||||
6007 | LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); | ||||
6008 | RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); | ||||
6009 | } | ||||
6010 | |||||
6011 | Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); | ||||
6012 | |||||
6013 | bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1)-1L; | ||||
6014 | if (NonTrivialMask && !Alt) { | ||||
6015 | // Res = Mask & CMPB | ||||
6016 | Res = CurDAG->getNode(ISD::AND, dl, VT, Res, | ||||
6017 | CurDAG->getConstant(Mask, dl, VT)); | ||||
6018 | } else if (Alt) { | ||||
6019 | // Res = (CMPB & Mask) | (~CMPB & Alt) | ||||
6020 | // Which, as suggested here: | ||||
6021 | // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge | ||||
6022 | // can be written as: | ||||
6023 | // Res = Alt ^ ((Alt ^ Mask) & CMPB) | ||||
6024 | // useful because the (Alt ^ Mask) can be pre-computed. | ||||
6025 | Res = CurDAG->getNode(ISD::AND, dl, VT, Res, | ||||
6026 | CurDAG->getConstant(Mask ^ Alt, dl, VT)); | ||||
6027 | Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, | ||||
6028 | CurDAG->getConstant(Alt, dl, VT)); | ||||
6029 | } | ||||
6030 | |||||
6031 | return Res; | ||||
6032 | } | ||||
6033 | |||||
6034 | // When CR bit registers are enabled, an extension of an i1 variable to a i32 | ||||
6035 | // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus | ||||
6036 | // involves constant materialization of a 0 or a 1 or both. If the result of | ||||
6037 | // the extension is then operated upon by some operator that can be constant | ||||
6038 | // folded with a constant 0 or 1, and that constant can be materialized using | ||||
6039 | // only one instruction (like a zero or one), then we should fold in those | ||||
6040 | // operations with the select. | ||||
6041 | void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { | ||||
6042 | if (!Subtarget->useCRBits()) | ||||
6043 | return; | ||||
6044 | |||||
6045 | if (N->getOpcode() != ISD::ZERO_EXTEND && | ||||
6046 | N->getOpcode() != ISD::SIGN_EXTEND && | ||||
6047 | N->getOpcode() != ISD::ANY_EXTEND) | ||||
6048 | return; | ||||
6049 | |||||
6050 | if (N->getOperand(0).getValueType() != MVT::i1) | ||||
6051 | return; | ||||
6052 | |||||
6053 | if (!N->hasOneUse()) | ||||
6054 | return; | ||||
6055 | |||||
6056 | SDLoc dl(N); | ||||
6057 | EVT VT = N->getValueType(0); | ||||
6058 | SDValue Cond = N->getOperand(0); | ||||
6059 | SDValue ConstTrue = | ||||
6060 | CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); | ||||
6061 | SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); | ||||
6062 | |||||
6063 | do { | ||||
6064 | SDNode *User = *N->use_begin(); | ||||
6065 | if (User->getNumOperands() != 2) | ||||
6066 | break; | ||||
6067 | |||||
6068 | auto TryFold = [this, N, User, dl](SDValue Val) { | ||||
6069 | SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); | ||||
6070 | SDValue O0 = UserO0.getNode() == N ? Val : UserO0; | ||||
6071 | SDValue O1 = UserO1.getNode() == N ? Val : UserO1; | ||||
6072 | |||||
6073 | return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, | ||||
6074 | User->getValueType(0), {O0, O1}); | ||||
6075 | }; | ||||
6076 | |||||
6077 | // FIXME: When the semantics of the interaction between select and undef | ||||
6078 | // are clearly defined, it may turn out to be unnecessary to break here. | ||||
6079 | SDValue TrueRes = TryFold(ConstTrue); | ||||
6080 | if (!TrueRes || TrueRes.isUndef()) | ||||
6081 | break; | ||||
6082 | SDValue FalseRes = TryFold(ConstFalse); | ||||
6083 | if (!FalseRes || FalseRes.isUndef()) | ||||
6084 | break; | ||||
6085 | |||||
6086 | // For us to materialize these using one instruction, we must be able to | ||||
6087 | // represent them as signed 16-bit integers. | ||||
6088 | uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(), | ||||
6089 | False = cast<ConstantSDNode>(FalseRes)->getZExtValue(); | ||||
6090 | if (!isInt<16>(True) || !isInt<16>(False)) | ||||
6091 | break; | ||||
6092 | |||||
6093 | // We can replace User with a new SELECT node, and try again to see if we | ||||
6094 | // can fold the select with its user. | ||||
6095 | Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); | ||||
6096 | N = User; | ||||
6097 | ConstTrue = TrueRes; | ||||
6098 | ConstFalse = FalseRes; | ||||
6099 | } while (N->hasOneUse()); | ||||
6100 | } | ||||
6101 | |||||
6102 | void PPCDAGToDAGISel::PreprocessISelDAG() { | ||||
6103 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | ||||
6104 | |||||
6105 | bool MadeChange = false; | ||||
6106 | while (Position != CurDAG->allnodes_begin()) { | ||||
6107 | SDNode *N = &*--Position; | ||||
6108 | if (N->use_empty()) | ||||
6109 | continue; | ||||
6110 | |||||
6111 | SDValue Res; | ||||
6112 | switch (N->getOpcode()) { | ||||
6113 | default: break; | ||||
6114 | case ISD::OR: | ||||
6115 | Res = combineToCMPB(N); | ||||
6116 | break; | ||||
6117 | } | ||||
6118 | |||||
6119 | if (!Res) | ||||
6120 | foldBoolExts(Res, N); | ||||
6121 | |||||
6122 | if (Res) { | ||||
6123 | LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "PPC DAG preprocessing replacing:\nOld: " ; } } while (false); | ||||
6124 | LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { N->dump(CurDAG); } } while (false); | ||||
6125 | LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false ); | ||||
6126 | LLVM_DEBUG(Res.getNode()->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { Res.getNode()->dump(CurDAG); } } while ( false); | ||||
6127 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
6128 | |||||
6129 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); | ||||
6130 | MadeChange = true; | ||||
6131 | } | ||||
6132 | } | ||||
6133 | |||||
6134 | if (MadeChange) | ||||
6135 | CurDAG->RemoveDeadNodes(); | ||||
6136 | } | ||||
6137 | |||||
6138 | /// PostprocessISelDAG - Perform some late peephole optimizations | ||||
6139 | /// on the DAG representation. | ||||
6140 | void PPCDAGToDAGISel::PostprocessISelDAG() { | ||||
6141 | // Skip peepholes at -O0. | ||||
6142 | if (TM.getOptLevel() == CodeGenOpt::None) | ||||
6143 | return; | ||||
6144 | |||||
6145 | PeepholePPC64(); | ||||
6146 | PeepholeCROps(); | ||||
6147 | PeepholePPC64ZExt(); | ||||
6148 | } | ||||
6149 | |||||
6150 | // Check if all users of this node will become isel where the second operand | ||||
6151 | // is the constant zero. If this is so, and if we can negate the condition, | ||||
6152 | // then we can flip the true and false operands. This will allow the zero to | ||||
6153 | // be folded with the isel so that we don't need to materialize a register | ||||
6154 | // containing zero. | ||||
6155 | bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { | ||||
6156 | for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); | ||||
6157 | UI != UE; ++UI) { | ||||
6158 | SDNode *User = *UI; | ||||
6159 | if (!User->isMachineOpcode()) | ||||
6160 | return false; | ||||
6161 | if (User->getMachineOpcode() != PPC::SELECT_I4 && | ||||
6162 | User->getMachineOpcode() != PPC::SELECT_I8) | ||||
6163 | return false; | ||||
6164 | |||||
6165 | SDNode *Op1 = User->getOperand(1).getNode(); | ||||
6166 | SDNode *Op2 = User->getOperand(2).getNode(); | ||||
6167 | // If we have a degenerate select with two equal operands, swapping will | ||||
6168 | // not do anything, and we may run into an infinite loop. | ||||
6169 | if (Op1 == Op2) | ||||
6170 | return false; | ||||
6171 | |||||
6172 | if (!Op2->isMachineOpcode()) | ||||
6173 | return false; | ||||
6174 | |||||
6175 | if (Op2->getMachineOpcode() != PPC::LI && | ||||
6176 | Op2->getMachineOpcode() != PPC::LI8) | ||||
6177 | return false; | ||||
6178 | |||||
6179 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0)); | ||||
6180 | if (!C) | ||||
6181 | return false; | ||||
6182 | |||||
6183 | if (!C->isNullValue()) | ||||
6184 | return false; | ||||
6185 | } | ||||
6186 | |||||
6187 | return true; | ||||
6188 | } | ||||
6189 | |||||
6190 | void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { | ||||
6191 | SmallVector<SDNode *, 4> ToReplace; | ||||
6192 | for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); | ||||
6193 | UI != UE; ++UI) { | ||||
6194 | SDNode *User = *UI; | ||||
6195 | assert((User->getMachineOpcode() == PPC::SELECT_I4 ||(static_cast <bool> ((User->getMachineOpcode() == PPC ::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && "Must have all select users") ? void (0) : __assert_fail ("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 6197, __extension__ __PRETTY_FUNCTION__)) | ||||
6196 | User->getMachineOpcode() == PPC::SELECT_I8) &&(static_cast <bool> ((User->getMachineOpcode() == PPC ::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && "Must have all select users") ? void (0) : __assert_fail ("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 6197, __extension__ __PRETTY_FUNCTION__)) | ||||
6197 | "Must have all select users")(static_cast <bool> ((User->getMachineOpcode() == PPC ::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && "Must have all select users") ? void (0) : __assert_fail ("(User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && \"Must have all select users\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 6197, __extension__ __PRETTY_FUNCTION__)); | ||||
6198 | ToReplace.push_back(User); | ||||
6199 | } | ||||
6200 | |||||
6201 | for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), | ||||
6202 | UE = ToReplace.end(); UI != UE; ++UI) { | ||||
6203 | SDNode *User = *UI; | ||||
6204 | SDNode *ResNode = | ||||
6205 | CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), | ||||
6206 | User->getValueType(0), User->getOperand(0), | ||||
6207 | User->getOperand(2), | ||||
6208 | User->getOperand(1)); | ||||
6209 | |||||
6210 | LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "CR Peephole replacing:\nOld: " ; } } while (false); | ||||
6211 | LLVM_DEBUG(User->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { User->dump(CurDAG); } } while (false); | ||||
6212 | LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false ); | ||||
6213 | LLVM_DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { ResNode->dump(CurDAG); } } while (false ); | ||||
6214 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
6215 | |||||
6216 | ReplaceUses(User, ResNode); | ||||
6217 | } | ||||
6218 | } | ||||
6219 | |||||
6220 | void PPCDAGToDAGISel::PeepholeCROps() { | ||||
6221 | bool IsModified; | ||||
6222 | do { | ||||
6223 | IsModified = false; | ||||
6224 | for (SDNode &Node : CurDAG->allnodes()) { | ||||
6225 | MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); | ||||
6226 | if (!MachineNode || MachineNode->use_empty()) | ||||
6227 | continue; | ||||
6228 | SDNode *ResNode = MachineNode; | ||||
6229 | |||||
6230 | bool Op1Set = false, Op1Unset = false, | ||||
6231 | Op1Not = false, | ||||
6232 | Op2Set = false, Op2Unset = false, | ||||
6233 | Op2Not = false; | ||||
6234 | |||||
6235 | unsigned Opcode = MachineNode->getMachineOpcode(); | ||||
6236 | switch (Opcode) { | ||||
6237 | default: break; | ||||
6238 | case PPC::CRAND: | ||||
6239 | case PPC::CRNAND: | ||||
6240 | case PPC::CROR: | ||||
6241 | case PPC::CRXOR: | ||||
6242 | case PPC::CRNOR: | ||||
6243 | case PPC::CREQV: | ||||
6244 | case PPC::CRANDC: | ||||
6245 | case PPC::CRORC: { | ||||
6246 | SDValue Op = MachineNode->getOperand(1); | ||||
6247 | if (Op.isMachineOpcode()) { | ||||
6248 | if (Op.getMachineOpcode() == PPC::CRSET) | ||||
6249 | Op2Set = true; | ||||
6250 | else if (Op.getMachineOpcode() == PPC::CRUNSET) | ||||
6251 | Op2Unset = true; | ||||
6252 | else if (Op.getMachineOpcode() == PPC::CRNOR && | ||||
6253 | Op.getOperand(0) == Op.getOperand(1)) | ||||
6254 | Op2Not = true; | ||||
6255 | } | ||||
6256 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
6257 | } | ||||
6258 | case PPC::BC: | ||||
6259 | case PPC::BCn: | ||||
6260 | case PPC::SELECT_I4: | ||||
6261 | case PPC::SELECT_I8: | ||||
6262 | case PPC::SELECT_F4: | ||||
6263 | case PPC::SELECT_F8: | ||||
6264 | case PPC::SELECT_SPE: | ||||
6265 | case PPC::SELECT_SPE4: | ||||
6266 | case PPC::SELECT_VRRC: | ||||
6267 | case PPC::SELECT_VSFRC: | ||||
6268 | case PPC::SELECT_VSSRC: | ||||
6269 | case PPC::SELECT_VSRC: { | ||||
6270 | SDValue Op = MachineNode->getOperand(0); | ||||
6271 | if (Op.isMachineOpcode()) { | ||||
6272 | if (Op.getMachineOpcode() == PPC::CRSET) | ||||
6273 | Op1Set = true; | ||||
6274 | else if (Op.getMachineOpcode() == PPC::CRUNSET) | ||||
6275 | Op1Unset = true; | ||||
6276 | else if (Op.getMachineOpcode() == PPC::CRNOR && | ||||
6277 | Op.getOperand(0) == Op.getOperand(1)) | ||||
6278 | Op1Not = true; | ||||
6279 | } | ||||
6280 | } | ||||
6281 | break; | ||||
6282 | } | ||||
6283 | |||||
6284 | bool SelectSwap = false; | ||||
6285 | switch (Opcode) { | ||||
6286 | default: break; | ||||
6287 | case PPC::CRAND: | ||||
6288 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6289 | // x & x = x | ||||
6290 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6291 | else if (Op1Set) | ||||
6292 | // 1 & y = y | ||||
6293 | ResNode = MachineNode->getOperand(1).getNode(); | ||||
6294 | else if (Op2Set) | ||||
6295 | // x & 1 = x | ||||
6296 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6297 | else if (Op1Unset || Op2Unset) | ||||
6298 | // x & 0 = 0 & y = 0 | ||||
6299 | ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), | ||||
6300 | MVT::i1); | ||||
6301 | else if (Op1Not) | ||||
6302 | // ~x & y = andc(y, x) | ||||
6303 | ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), | ||||
6304 | MVT::i1, MachineNode->getOperand(1), | ||||
6305 | MachineNode->getOperand(0). | ||||
6306 | getOperand(0)); | ||||
6307 | else if (Op2Not) | ||||
6308 | // x & ~y = andc(x, y) | ||||
6309 | ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), | ||||
6310 | MVT::i1, MachineNode->getOperand(0), | ||||
6311 | MachineNode->getOperand(1). | ||||
6312 | getOperand(0)); | ||||
6313 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6314 | ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), | ||||
6315 | MVT::i1, MachineNode->getOperand(0), | ||||
6316 | MachineNode->getOperand(1)); | ||||
6317 | SelectSwap = true; | ||||
6318 | } | ||||
6319 | break; | ||||
6320 | case PPC::CRNAND: | ||||
6321 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6322 | // nand(x, x) -> nor(x, x) | ||||
6323 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6324 | MVT::i1, MachineNode->getOperand(0), | ||||
6325 | MachineNode->getOperand(0)); | ||||
6326 | else if (Op1Set) | ||||
6327 | // nand(1, y) -> nor(y, y) | ||||
6328 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6329 | MVT::i1, MachineNode->getOperand(1), | ||||
6330 | MachineNode->getOperand(1)); | ||||
6331 | else if (Op2Set) | ||||
6332 | // nand(x, 1) -> nor(x, x) | ||||
6333 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6334 | MVT::i1, MachineNode->getOperand(0), | ||||
6335 | MachineNode->getOperand(0)); | ||||
6336 | else if (Op1Unset || Op2Unset) | ||||
6337 | // nand(x, 0) = nand(0, y) = 1 | ||||
6338 | ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), | ||||
6339 | MVT::i1); | ||||
6340 | else if (Op1Not) | ||||
6341 | // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) | ||||
6342 | ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), | ||||
6343 | MVT::i1, MachineNode->getOperand(0). | ||||
6344 | getOperand(0), | ||||
6345 | MachineNode->getOperand(1)); | ||||
6346 | else if (Op2Not) | ||||
6347 | // nand(x, ~y) = ~x | y = orc(y, x) | ||||
6348 | ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), | ||||
6349 | MVT::i1, MachineNode->getOperand(1). | ||||
6350 | getOperand(0), | ||||
6351 | MachineNode->getOperand(0)); | ||||
6352 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6353 | ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), | ||||
6354 | MVT::i1, MachineNode->getOperand(0), | ||||
6355 | MachineNode->getOperand(1)); | ||||
6356 | SelectSwap = true; | ||||
6357 | } | ||||
6358 | break; | ||||
6359 | case PPC::CROR: | ||||
6360 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6361 | // x | x = x | ||||
6362 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6363 | else if (Op1Set || Op2Set) | ||||
6364 | // x | 1 = 1 | y = 1 | ||||
6365 | ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), | ||||
6366 | MVT::i1); | ||||
6367 | else if (Op1Unset) | ||||
6368 | // 0 | y = y | ||||
6369 | ResNode = MachineNode->getOperand(1).getNode(); | ||||
6370 | else if (Op2Unset) | ||||
6371 | // x | 0 = x | ||||
6372 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6373 | else if (Op1Not) | ||||
6374 | // ~x | y = orc(y, x) | ||||
6375 | ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), | ||||
6376 | MVT::i1, MachineNode->getOperand(1), | ||||
6377 | MachineNode->getOperand(0). | ||||
6378 | getOperand(0)); | ||||
6379 | else if (Op2Not) | ||||
6380 | // x | ~y = orc(x, y) | ||||
6381 | ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), | ||||
6382 | MVT::i1, MachineNode->getOperand(0), | ||||
6383 | MachineNode->getOperand(1). | ||||
6384 | getOperand(0)); | ||||
6385 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6386 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6387 | MVT::i1, MachineNode->getOperand(0), | ||||
6388 | MachineNode->getOperand(1)); | ||||
6389 | SelectSwap = true; | ||||
6390 | } | ||||
6391 | break; | ||||
6392 | case PPC::CRXOR: | ||||
6393 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6394 | // xor(x, x) = 0 | ||||
6395 | ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), | ||||
6396 | MVT::i1); | ||||
6397 | else if (Op1Set) | ||||
6398 | // xor(1, y) -> nor(y, y) | ||||
6399 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6400 | MVT::i1, MachineNode->getOperand(1), | ||||
6401 | MachineNode->getOperand(1)); | ||||
6402 | else if (Op2Set) | ||||
6403 | // xor(x, 1) -> nor(x, x) | ||||
6404 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6405 | MVT::i1, MachineNode->getOperand(0), | ||||
6406 | MachineNode->getOperand(0)); | ||||
6407 | else if (Op1Unset) | ||||
6408 | // xor(0, y) = y | ||||
6409 | ResNode = MachineNode->getOperand(1).getNode(); | ||||
6410 | else if (Op2Unset) | ||||
6411 | // xor(x, 0) = x | ||||
6412 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6413 | else if (Op1Not) | ||||
6414 | // xor(~x, y) = eqv(x, y) | ||||
6415 | ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), | ||||
6416 | MVT::i1, MachineNode->getOperand(0). | ||||
6417 | getOperand(0), | ||||
6418 | MachineNode->getOperand(1)); | ||||
6419 | else if (Op2Not) | ||||
6420 | // xor(x, ~y) = eqv(x, y) | ||||
6421 | ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), | ||||
6422 | MVT::i1, MachineNode->getOperand(0), | ||||
6423 | MachineNode->getOperand(1). | ||||
6424 | getOperand(0)); | ||||
6425 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6426 | ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), | ||||
6427 | MVT::i1, MachineNode->getOperand(0), | ||||
6428 | MachineNode->getOperand(1)); | ||||
6429 | SelectSwap = true; | ||||
6430 | } | ||||
6431 | break; | ||||
6432 | case PPC::CRNOR: | ||||
6433 | if (Op1Set || Op2Set) | ||||
6434 | // nor(1, y) -> 0 | ||||
6435 | ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), | ||||
6436 | MVT::i1); | ||||
6437 | else if (Op1Unset) | ||||
6438 | // nor(0, y) = ~y -> nor(y, y) | ||||
6439 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6440 | MVT::i1, MachineNode->getOperand(1), | ||||
6441 | MachineNode->getOperand(1)); | ||||
6442 | else if (Op2Unset) | ||||
6443 | // nor(x, 0) = ~x | ||||
6444 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6445 | MVT::i1, MachineNode->getOperand(0), | ||||
6446 | MachineNode->getOperand(0)); | ||||
6447 | else if (Op1Not) | ||||
6448 | // nor(~x, y) = andc(x, y) | ||||
6449 | ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), | ||||
6450 | MVT::i1, MachineNode->getOperand(0). | ||||
6451 | getOperand(0), | ||||
6452 | MachineNode->getOperand(1)); | ||||
6453 | else if (Op2Not) | ||||
6454 | // nor(x, ~y) = andc(y, x) | ||||
6455 | ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), | ||||
6456 | MVT::i1, MachineNode->getOperand(1). | ||||
6457 | getOperand(0), | ||||
6458 | MachineNode->getOperand(0)); | ||||
6459 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6460 | ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), | ||||
6461 | MVT::i1, MachineNode->getOperand(0), | ||||
6462 | MachineNode->getOperand(1)); | ||||
6463 | SelectSwap = true; | ||||
6464 | } | ||||
6465 | break; | ||||
6466 | case PPC::CREQV: | ||||
6467 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6468 | // eqv(x, x) = 1 | ||||
6469 | ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), | ||||
6470 | MVT::i1); | ||||
6471 | else if (Op1Set) | ||||
6472 | // eqv(1, y) = y | ||||
6473 | ResNode = MachineNode->getOperand(1).getNode(); | ||||
6474 | else if (Op2Set) | ||||
6475 | // eqv(x, 1) = x | ||||
6476 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6477 | else if (Op1Unset) | ||||
6478 | // eqv(0, y) = ~y -> nor(y, y) | ||||
6479 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6480 | MVT::i1, MachineNode->getOperand(1), | ||||
6481 | MachineNode->getOperand(1)); | ||||
6482 | else if (Op2Unset) | ||||
6483 | // eqv(x, 0) = ~x | ||||
6484 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6485 | MVT::i1, MachineNode->getOperand(0), | ||||
6486 | MachineNode->getOperand(0)); | ||||
6487 | else if (Op1Not) | ||||
6488 | // eqv(~x, y) = xor(x, y) | ||||
6489 | ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), | ||||
6490 | MVT::i1, MachineNode->getOperand(0). | ||||
6491 | getOperand(0), | ||||
6492 | MachineNode->getOperand(1)); | ||||
6493 | else if (Op2Not) | ||||
6494 | // eqv(x, ~y) = xor(x, y) | ||||
6495 | ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), | ||||
6496 | MVT::i1, MachineNode->getOperand(0), | ||||
6497 | MachineNode->getOperand(1). | ||||
6498 | getOperand(0)); | ||||
6499 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6500 | ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), | ||||
6501 | MVT::i1, MachineNode->getOperand(0), | ||||
6502 | MachineNode->getOperand(1)); | ||||
6503 | SelectSwap = true; | ||||
6504 | } | ||||
6505 | break; | ||||
6506 | case PPC::CRANDC: | ||||
6507 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6508 | // andc(x, x) = 0 | ||||
6509 | ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), | ||||
6510 | MVT::i1); | ||||
6511 | else if (Op1Set) | ||||
6512 | // andc(1, y) = ~y | ||||
6513 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6514 | MVT::i1, MachineNode->getOperand(1), | ||||
6515 | MachineNode->getOperand(1)); | ||||
6516 | else if (Op1Unset || Op2Set) | ||||
6517 | // andc(0, y) = andc(x, 1) = 0 | ||||
6518 | ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), | ||||
6519 | MVT::i1); | ||||
6520 | else if (Op2Unset) | ||||
6521 | // andc(x, 0) = x | ||||
6522 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6523 | else if (Op1Not) | ||||
6524 | // andc(~x, y) = ~(x | y) = nor(x, y) | ||||
6525 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6526 | MVT::i1, MachineNode->getOperand(0). | ||||
6527 | getOperand(0), | ||||
6528 | MachineNode->getOperand(1)); | ||||
6529 | else if (Op2Not) | ||||
6530 | // andc(x, ~y) = x & y | ||||
6531 | ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), | ||||
6532 | MVT::i1, MachineNode->getOperand(0), | ||||
6533 | MachineNode->getOperand(1). | ||||
6534 | getOperand(0)); | ||||
6535 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6536 | ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), | ||||
6537 | MVT::i1, MachineNode->getOperand(1), | ||||
6538 | MachineNode->getOperand(0)); | ||||
6539 | SelectSwap = true; | ||||
6540 | } | ||||
6541 | break; | ||||
6542 | case PPC::CRORC: | ||||
6543 | if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) | ||||
6544 | // orc(x, x) = 1 | ||||
6545 | ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), | ||||
6546 | MVT::i1); | ||||
6547 | else if (Op1Set || Op2Unset) | ||||
6548 | // orc(1, y) = orc(x, 0) = 1 | ||||
6549 | ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), | ||||
6550 | MVT::i1); | ||||
6551 | else if (Op2Set) | ||||
6552 | // orc(x, 1) = x | ||||
6553 | ResNode = MachineNode->getOperand(0).getNode(); | ||||
6554 | else if (Op1Unset) | ||||
6555 | // orc(0, y) = ~y | ||||
6556 | ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), | ||||
6557 | MVT::i1, MachineNode->getOperand(1), | ||||
6558 | MachineNode->getOperand(1)); | ||||
6559 | else if (Op1Not) | ||||
6560 | // orc(~x, y) = ~(x & y) = nand(x, y) | ||||
6561 | ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), | ||||
6562 | MVT::i1, MachineNode->getOperand(0). | ||||
6563 | getOperand(0), | ||||
6564 | MachineNode->getOperand(1)); | ||||
6565 | else if (Op2Not) | ||||
6566 | // orc(x, ~y) = x | y | ||||
6567 | ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), | ||||
6568 | MVT::i1, MachineNode->getOperand(0), | ||||
6569 | MachineNode->getOperand(1). | ||||
6570 | getOperand(0)); | ||||
6571 | else if (AllUsersSelectZero(MachineNode)) { | ||||
6572 | ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), | ||||
6573 | MVT::i1, MachineNode->getOperand(1), | ||||
6574 | MachineNode->getOperand(0)); | ||||
6575 | SelectSwap = true; | ||||
6576 | } | ||||
6577 | break; | ||||
6578 | case PPC::SELECT_I4: | ||||
6579 | case PPC::SELECT_I8: | ||||
6580 | case PPC::SELECT_F4: | ||||
6581 | case PPC::SELECT_F8: | ||||
6582 | case PPC::SELECT_SPE: | ||||
6583 | case PPC::SELECT_SPE4: | ||||
6584 | case PPC::SELECT_VRRC: | ||||
6585 | case PPC::SELECT_VSFRC: | ||||
6586 | case PPC::SELECT_VSSRC: | ||||
6587 | case PPC::SELECT_VSRC: | ||||
6588 | if (Op1Set) | ||||
6589 | ResNode = MachineNode->getOperand(1).getNode(); | ||||
6590 | else if (Op1Unset) | ||||
6591 | ResNode = MachineNode->getOperand(2).getNode(); | ||||
6592 | else if (Op1Not) | ||||
6593 | ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), | ||||
6594 | SDLoc(MachineNode), | ||||
6595 | MachineNode->getValueType(0), | ||||
6596 | MachineNode->getOperand(0). | ||||
6597 | getOperand(0), | ||||
6598 | MachineNode->getOperand(2), | ||||
6599 | MachineNode->getOperand(1)); | ||||
6600 | break; | ||||
6601 | case PPC::BC: | ||||
6602 | case PPC::BCn: | ||||
6603 | if (Op1Not) | ||||
6604 | ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : | ||||
6605 | PPC::BC, | ||||
6606 | SDLoc(MachineNode), | ||||
6607 | MVT::Other, | ||||
6608 | MachineNode->getOperand(0). | ||||
6609 | getOperand(0), | ||||
6610 | MachineNode->getOperand(1), | ||||
6611 | MachineNode->getOperand(2)); | ||||
6612 | // FIXME: Handle Op1Set, Op1Unset here too. | ||||
6613 | break; | ||||
6614 | } | ||||
6615 | |||||
6616 | // If we're inverting this node because it is used only by selects that | ||||
6617 | // we'd like to swap, then swap the selects before the node replacement. | ||||
6618 | if (SelectSwap) | ||||
6619 | SwapAllSelectUsers(MachineNode); | ||||
6620 | |||||
6621 | if (ResNode != MachineNode) { | ||||
6622 | LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "CR Peephole replacing:\nOld: " ; } } while (false); | ||||
6623 | LLVM_DEBUG(MachineNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { MachineNode->dump(CurDAG); } } while (false ); | ||||
6624 | LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false ); | ||||
6625 | LLVM_DEBUG(ResNode->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { ResNode->dump(CurDAG); } } while (false ); | ||||
6626 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
6627 | |||||
6628 | ReplaceUses(MachineNode, ResNode); | ||||
6629 | IsModified = true; | ||||
6630 | } | ||||
6631 | } | ||||
6632 | if (IsModified) | ||||
6633 | CurDAG->RemoveDeadNodes(); | ||||
6634 | } while (IsModified); | ||||
6635 | } | ||||
6636 | |||||
6637 | // Gather the set of 32-bit operations that are known to have their | ||||
6638 | // higher-order 32 bits zero, where ToPromote contains all such operations. | ||||
6639 | static bool PeepholePPC64ZExtGather(SDValue Op32, | ||||
6640 | SmallPtrSetImpl<SDNode *> &ToPromote) { | ||||
6641 | if (!Op32.isMachineOpcode()) | ||||
6642 | return false; | ||||
6643 | |||||
6644 | // First, check for the "frontier" instructions (those that will clear the | ||||
6645 | // higher-order 32 bits. | ||||
6646 | |||||
6647 | // For RLWINM and RLWNM, we need to make sure that the mask does not wrap | ||||
6648 | // around. If it does not, then these instructions will clear the | ||||
6649 | // higher-order bits. | ||||
6650 | if ((Op32.getMachineOpcode() == PPC::RLWINM || | ||||
6651 | Op32.getMachineOpcode() == PPC::RLWNM) && | ||||
6652 | Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { | ||||
6653 | ToPromote.insert(Op32.getNode()); | ||||
6654 | return true; | ||||
6655 | } | ||||
6656 | |||||
6657 | // SLW and SRW always clear the higher-order bits. | ||||
6658 | if (Op32.getMachineOpcode() == PPC::SLW || | ||||
6659 | Op32.getMachineOpcode() == PPC::SRW) { | ||||
6660 | ToPromote.insert(Op32.getNode()); | ||||
6661 | return true; | ||||
6662 | } | ||||
6663 | |||||
6664 | // For LI and LIS, we need the immediate to be positive (so that it is not | ||||
6665 | // sign extended). | ||||
6666 | if (Op32.getMachineOpcode() == PPC::LI || | ||||
6667 | Op32.getMachineOpcode() == PPC::LIS) { | ||||
6668 | if (!isUInt<15>(Op32.getConstantOperandVal(0))) | ||||
6669 | return false; | ||||
6670 | |||||
6671 | ToPromote.insert(Op32.getNode()); | ||||
6672 | return true; | ||||
6673 | } | ||||
6674 | |||||
6675 | // LHBRX and LWBRX always clear the higher-order bits. | ||||
6676 | if (Op32.getMachineOpcode() == PPC::LHBRX || | ||||
6677 | Op32.getMachineOpcode() == PPC::LWBRX) { | ||||
6678 | ToPromote.insert(Op32.getNode()); | ||||
6679 | return true; | ||||
6680 | } | ||||
6681 | |||||
6682 | // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. | ||||
6683 | if (Op32.getMachineOpcode() == PPC::CNTLZW || | ||||
6684 | Op32.getMachineOpcode() == PPC::CNTTZW) { | ||||
6685 | ToPromote.insert(Op32.getNode()); | ||||
6686 | return true; | ||||
6687 | } | ||||
6688 | |||||
6689 | // Next, check for those instructions we can look through. | ||||
6690 | |||||
6691 | // Assuming the mask does not wrap around, then the higher-order bits are | ||||
6692 | // taken directly from the first operand. | ||||
6693 | if (Op32.getMachineOpcode() == PPC::RLWIMI && | ||||
6694 | Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { | ||||
6695 | SmallPtrSet<SDNode *, 16> ToPromote1; | ||||
6696 | if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) | ||||
6697 | return false; | ||||
6698 | |||||
6699 | ToPromote.insert(Op32.getNode()); | ||||
6700 | ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); | ||||
6701 | return true; | ||||
6702 | } | ||||
6703 | |||||
6704 | // For OR, the higher-order bits are zero if that is true for both operands. | ||||
6705 | // For SELECT_I4, the same is true (but the relevant operand numbers are | ||||
6706 | // shifted by 1). | ||||
6707 | if (Op32.getMachineOpcode() == PPC::OR || | ||||
6708 | Op32.getMachineOpcode() == PPC::SELECT_I4) { | ||||
6709 | unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; | ||||
6710 | SmallPtrSet<SDNode *, 16> ToPromote1; | ||||
6711 | if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) | ||||
6712 | return false; | ||||
6713 | if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) | ||||
6714 | return false; | ||||
6715 | |||||
6716 | ToPromote.insert(Op32.getNode()); | ||||
6717 | ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); | ||||
6718 | return true; | ||||
6719 | } | ||||
6720 | |||||
6721 | // For ORI and ORIS, we need the higher-order bits of the first operand to be | ||||
6722 | // zero, and also for the constant to be positive (so that it is not sign | ||||
6723 | // extended). | ||||
6724 | if (Op32.getMachineOpcode() == PPC::ORI || | ||||
6725 | Op32.getMachineOpcode() == PPC::ORIS) { | ||||
6726 | SmallPtrSet<SDNode *, 16> ToPromote1; | ||||
6727 | if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) | ||||
6728 | return false; | ||||
6729 | if (!isUInt<15>(Op32.getConstantOperandVal(1))) | ||||
6730 | return false; | ||||
6731 | |||||
6732 | ToPromote.insert(Op32.getNode()); | ||||
6733 | ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); | ||||
6734 | return true; | ||||
6735 | } | ||||
6736 | |||||
6737 | // The higher-order bits of AND are zero if that is true for at least one of | ||||
6738 | // the operands. | ||||
6739 | if (Op32.getMachineOpcode() == PPC::AND) { | ||||
6740 | SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2; | ||||
6741 | bool Op0OK = | ||||
6742 | PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); | ||||
6743 | bool Op1OK = | ||||
6744 | PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); | ||||
6745 | if (!Op0OK && !Op1OK) | ||||
6746 | return false; | ||||
6747 | |||||
6748 | ToPromote.insert(Op32.getNode()); | ||||
6749 | |||||
6750 | if (Op0OK) | ||||
6751 | ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); | ||||
6752 | |||||
6753 | if (Op1OK) | ||||
6754 | ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); | ||||
6755 | |||||
6756 | return true; | ||||
6757 | } | ||||
6758 | |||||
6759 | // For ANDI and ANDIS, the higher-order bits are zero if either that is true | ||||
6760 | // of the first operand, or if the second operand is positive (so that it is | ||||
6761 | // not sign extended). | ||||
6762 | if (Op32.getMachineOpcode() == PPC::ANDI_rec || | ||||
6763 | Op32.getMachineOpcode() == PPC::ANDIS_rec) { | ||||
6764 | SmallPtrSet<SDNode *, 16> ToPromote1; | ||||
6765 | bool Op0OK = | ||||
6766 | PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); | ||||
6767 | bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); | ||||
6768 | if (!Op0OK && !Op1OK) | ||||
6769 | return false; | ||||
6770 | |||||
6771 | ToPromote.insert(Op32.getNode()); | ||||
6772 | |||||
6773 | if (Op0OK) | ||||
6774 | ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); | ||||
6775 | |||||
6776 | return true; | ||||
6777 | } | ||||
6778 | |||||
6779 | return false; | ||||
6780 | } | ||||
6781 | |||||
6782 | void PPCDAGToDAGISel::PeepholePPC64ZExt() { | ||||
6783 | if (!Subtarget->isPPC64()) | ||||
6784 | return; | ||||
6785 | |||||
6786 | // When we zero-extend from i32 to i64, we use a pattern like this: | ||||
6787 | // def : Pat<(i64 (zext i32:$in)), | ||||
6788 | // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), | ||||
6789 | // 0, 32)>; | ||||
6790 | // There are several 32-bit shift/rotate instructions, however, that will | ||||
6791 | // clear the higher-order bits of their output, rendering the RLDICL | ||||
6792 | // unnecessary. When that happens, we remove it here, and redefine the | ||||
6793 | // relevant 32-bit operation to be a 64-bit operation. | ||||
6794 | |||||
6795 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | ||||
6796 | |||||
6797 | bool MadeChange = false; | ||||
6798 | while (Position != CurDAG->allnodes_begin()) { | ||||
6799 | SDNode *N = &*--Position; | ||||
6800 | // Skip dead nodes and any non-machine opcodes. | ||||
6801 | if (N->use_empty() || !N->isMachineOpcode()) | ||||
6802 | continue; | ||||
6803 | |||||
6804 | if (N->getMachineOpcode() != PPC::RLDICL) | ||||
6805 | continue; | ||||
6806 | |||||
6807 | if (N->getConstantOperandVal(1) != 0 || | ||||
6808 | N->getConstantOperandVal(2) != 32) | ||||
6809 | continue; | ||||
6810 | |||||
6811 | SDValue ISR = N->getOperand(0); | ||||
6812 | if (!ISR.isMachineOpcode() || | ||||
6813 | ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) | ||||
6814 | continue; | ||||
6815 | |||||
6816 | if (!ISR.hasOneUse()) | ||||
6817 | continue; | ||||
6818 | |||||
6819 | if (ISR.getConstantOperandVal(2) != PPC::sub_32) | ||||
6820 | continue; | ||||
6821 | |||||
6822 | SDValue IDef = ISR.getOperand(0); | ||||
6823 | if (!IDef.isMachineOpcode() || | ||||
6824 | IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) | ||||
6825 | continue; | ||||
6826 | |||||
6827 | // We now know that we're looking at a canonical i32 -> i64 zext. See if we | ||||
6828 | // can get rid of it. | ||||
6829 | |||||
6830 | SDValue Op32 = ISR->getOperand(1); | ||||
6831 | if (!Op32.isMachineOpcode()) | ||||
6832 | continue; | ||||
6833 | |||||
6834 | // There are some 32-bit instructions that always clear the high-order 32 | ||||
6835 | // bits, there are also some instructions (like AND) that we can look | ||||
6836 | // through. | ||||
6837 | SmallPtrSet<SDNode *, 16> ToPromote; | ||||
6838 | if (!PeepholePPC64ZExtGather(Op32, ToPromote)) | ||||
6839 | continue; | ||||
6840 | |||||
6841 | // If the ToPromote set contains nodes that have uses outside of the set | ||||
6842 | // (except for the original INSERT_SUBREG), then abort the transformation. | ||||
6843 | bool OutsideUse = false; | ||||
6844 | for (SDNode *PN : ToPromote) { | ||||
6845 | for (SDNode *UN : PN->uses()) { | ||||
6846 | if (!ToPromote.count(UN) && UN != ISR.getNode()) { | ||||
6847 | OutsideUse = true; | ||||
6848 | break; | ||||
6849 | } | ||||
6850 | } | ||||
6851 | |||||
6852 | if (OutsideUse) | ||||
6853 | break; | ||||
6854 | } | ||||
6855 | if (OutsideUse) | ||||
6856 | continue; | ||||
6857 | |||||
6858 | MadeChange = true; | ||||
6859 | |||||
6860 | // We now know that this zero extension can be removed by promoting to | ||||
6861 | // nodes in ToPromote to 64-bit operations, where for operations in the | ||||
6862 | // frontier of the set, we need to insert INSERT_SUBREGs for their | ||||
6863 | // operands. | ||||
6864 | for (SDNode *PN : ToPromote) { | ||||
6865 | unsigned NewOpcode; | ||||
6866 | switch (PN->getMachineOpcode()) { | ||||
6867 | default: | ||||
6868 | llvm_unreachable("Don't know the 64-bit variant of this instruction")::llvm::llvm_unreachable_internal("Don't know the 64-bit variant of this instruction" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp" , 6868); | ||||
6869 | case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; | ||||
6870 | case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; | ||||
6871 | case PPC::SLW: NewOpcode = PPC::SLW8; break; | ||||
6872 | case PPC::SRW: NewOpcode = PPC::SRW8; break; | ||||
6873 | case PPC::LI: NewOpcode = PPC::LI8; break; | ||||
6874 | case PPC::LIS: NewOpcode = PPC::LIS8; break; | ||||
6875 | case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; | ||||
6876 | case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; | ||||
6877 | case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; | ||||
6878 | case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; | ||||
6879 | case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; | ||||
6880 | case PPC::OR: NewOpcode = PPC::OR8; break; | ||||
6881 | case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; | ||||
6882 | case PPC::ORI: NewOpcode = PPC::ORI8; break; | ||||
6883 | case PPC::ORIS: NewOpcode = PPC::ORIS8; break; | ||||
6884 | case PPC::AND: NewOpcode = PPC::AND8; break; | ||||
6885 | case PPC::ANDI_rec: | ||||
6886 | NewOpcode = PPC::ANDI8_rec; | ||||
6887 | break; | ||||
6888 | case PPC::ANDIS_rec: | ||||
6889 | NewOpcode = PPC::ANDIS8_rec; | ||||
6890 | break; | ||||
6891 | } | ||||
6892 | |||||
6893 | // Note: During the replacement process, the nodes will be in an | ||||
6894 | // inconsistent state (some instructions will have operands with values | ||||
6895 | // of the wrong type). Once done, however, everything should be right | ||||
6896 | // again. | ||||
6897 | |||||
6898 | SmallVector<SDValue, 4> Ops; | ||||
6899 | for (const SDValue &V : PN->ops()) { | ||||
6900 | if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && | ||||
6901 | !isa<ConstantSDNode>(V)) { | ||||
6902 | SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; | ||||
6903 | SDNode *ReplOp = | ||||
6904 | CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), | ||||
6905 | ISR.getNode()->getVTList(), ReplOpOps); | ||||
6906 | Ops.push_back(SDValue(ReplOp, 0)); | ||||
6907 | } else { | ||||
6908 | Ops.push_back(V); | ||||
6909 | } | ||||
6910 | } | ||||
6911 | |||||
6912 | // Because all to-be-promoted nodes only have users that are other | ||||
6913 | // promoted nodes (or the original INSERT_SUBREG), we can safely replace | ||||
6914 | // the i32 result value type with i64. | ||||
6915 | |||||
6916 | SmallVector<EVT, 2> NewVTs; | ||||
6917 | SDVTList VTs = PN->getVTList(); | ||||
6918 | for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) | ||||
6919 | if (VTs.VTs[i] == MVT::i32) | ||||
6920 | NewVTs.push_back(MVT::i64); | ||||
6921 | else | ||||
6922 | NewVTs.push_back(VTs.VTs[i]); | ||||
6923 | |||||
6924 | LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "PPC64 ZExt Peephole morphing:\nOld: " ; } } while (false); | ||||
6925 | LLVM_DEBUG(PN->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { PN->dump(CurDAG); } } while (false); | ||||
6926 | |||||
6927 | CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); | ||||
6928 | |||||
6929 | LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false ); | ||||
6930 | LLVM_DEBUG(PN->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { PN->dump(CurDAG); } } while (false); | ||||
6931 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
6932 | } | ||||
6933 | |||||
6934 | // Now we replace the original zero extend and its associated INSERT_SUBREG | ||||
6935 | // with the value feeding the INSERT_SUBREG (which has now been promoted to | ||||
6936 | // return an i64). | ||||
6937 | |||||
6938 | LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "PPC64 ZExt Peephole replacing:\nOld: " ; } } while (false); | ||||
6939 | LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { N->dump(CurDAG); } } while (false); | ||||
6940 | LLVM_DEBUG(dbgs() << "\nNew: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nNew: "; } } while (false ); | ||||
6941 | LLVM_DEBUG(Op32.getNode()->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { Op32.getNode()->dump(CurDAG); } } while (false); | ||||
6942 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
6943 | |||||
6944 | ReplaceUses(N, Op32.getNode()); | ||||
6945 | } | ||||
6946 | |||||
6947 | if (MadeChange) | ||||
6948 | CurDAG->RemoveDeadNodes(); | ||||
6949 | } | ||||
6950 | |||||
6951 | static bool isVSXSwap(SDValue N) { | ||||
6952 | if (!N->isMachineOpcode()) | ||||
6953 | return false; | ||||
6954 | unsigned Opc = N->getMachineOpcode(); | ||||
6955 | |||||
6956 | // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate | ||||
6957 | // operand is 2. | ||||
6958 | if (Opc == PPC::XXPERMDIs) { | ||||
6959 | return isa<ConstantSDNode>(N->getOperand(1)) && | ||||
6960 | N->getConstantOperandVal(1) == 2; | ||||
6961 | } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) { | ||||
6962 | return N->getOperand(0) == N->getOperand(1) && | ||||
6963 | isa<ConstantSDNode>(N->getOperand(2)) && | ||||
6964 | N->getConstantOperandVal(2) == 2; | ||||
6965 | } | ||||
6966 | |||||
6967 | return false; | ||||
6968 | } | ||||
6969 | |||||
6970 | // TODO: Make this complete and replace with a table-gen bit. | ||||
6971 | static bool isLaneInsensitive(SDValue N) { | ||||
6972 | if (!N->isMachineOpcode()) | ||||
6973 | return false; | ||||
6974 | unsigned Opc = N->getMachineOpcode(); | ||||
6975 | |||||
6976 | switch (Opc) { | ||||
6977 | default: | ||||
6978 | return false; | ||||
6979 | case PPC::VAVGSB: | ||||
6980 | case PPC::VAVGUB: | ||||
6981 | case PPC::VAVGSH: | ||||
6982 | case PPC::VAVGUH: | ||||
6983 | case PPC::VAVGSW: | ||||
6984 | case PPC::VAVGUW: | ||||
6985 | case PPC::VMAXFP: | ||||
6986 | case PPC::VMAXSB: | ||||
6987 | case PPC::VMAXUB: | ||||
6988 | case PPC::VMAXSH: | ||||
6989 | case PPC::VMAXUH: | ||||
6990 | case PPC::VMAXSW: | ||||
6991 | case PPC::VMAXUW: | ||||
6992 | case PPC::VMINFP: | ||||
6993 | case PPC::VMINSB: | ||||
6994 | case PPC::VMINUB: | ||||
6995 | case PPC::VMINSH: | ||||
6996 | case PPC::VMINUH: | ||||
6997 | case PPC::VMINSW: | ||||
6998 | case PPC::VMINUW: | ||||
6999 | case PPC::VADDFP: | ||||
7000 | case PPC::VADDUBM: | ||||
7001 | case PPC::VADDUHM: | ||||
7002 | case PPC::VADDUWM: | ||||
7003 | case PPC::VSUBFP: | ||||
7004 | case PPC::VSUBUBM: | ||||
7005 | case PPC::VSUBUHM: | ||||
7006 | case PPC::VSUBUWM: | ||||
7007 | case PPC::VAND: | ||||
7008 | case PPC::VANDC: | ||||
7009 | case PPC::VOR: | ||||
7010 | case PPC::VORC: | ||||
7011 | case PPC::VXOR: | ||||
7012 | case PPC::VNOR: | ||||
7013 | case PPC::VMULUWM: | ||||
7014 | return true; | ||||
7015 | } | ||||
7016 | } | ||||
7017 | |||||
7018 | // Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is | ||||
7019 | // lane-insensitive. | ||||
7020 | static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) { | ||||
7021 | // Our desired xxswap might be source of COPY_TO_REGCLASS. | ||||
7022 | // TODO: Can we put this a common method for DAG? | ||||
7023 | auto SkipRCCopy = [](SDValue V) { | ||||
7024 | while (V->isMachineOpcode() && | ||||
7025 | V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) { | ||||
7026 | // All values in the chain should have single use. | ||||
7027 | if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode())) | ||||
7028 | return SDValue(); | ||||
7029 | V = V->getOperand(0); | ||||
7030 | } | ||||
7031 | return V.hasOneUse() ? V : SDValue(); | ||||
7032 | }; | ||||
7033 | |||||
7034 | SDValue VecOp = SkipRCCopy(N->getOperand(0)); | ||||
7035 | if (!VecOp || !isLaneInsensitive(VecOp)) | ||||
7036 | return; | ||||
7037 | |||||
7038 | SDValue LHS = SkipRCCopy(VecOp.getOperand(0)), | ||||
7039 | RHS = SkipRCCopy(VecOp.getOperand(1)); | ||||
7040 | if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS)) | ||||
7041 | return; | ||||
7042 | |||||
7043 | // These swaps may still have chain-uses here, count on dead code elimination | ||||
7044 | // in following passes to remove them. | ||||
7045 | DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0)); | ||||
7046 | DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0)); | ||||
7047 | DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0)); | ||||
7048 | } | ||||
7049 | |||||
7050 | void PPCDAGToDAGISel::PeepholePPC64() { | ||||
7051 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | ||||
7052 | |||||
7053 | while (Position != CurDAG->allnodes_begin()) { | ||||
7054 | SDNode *N = &*--Position; | ||||
7055 | // Skip dead nodes and any non-machine opcodes. | ||||
7056 | if (N->use_empty() || !N->isMachineOpcode()) | ||||
7057 | continue; | ||||
7058 | |||||
7059 | if (isVSXSwap(SDValue(N, 0))) | ||||
7060 | reduceVSXSwap(N, CurDAG); | ||||
7061 | |||||
7062 | unsigned FirstOp; | ||||
7063 | unsigned StorageOpcode = N->getMachineOpcode(); | ||||
7064 | bool RequiresMod4Offset = false; | ||||
7065 | |||||
7066 | switch (StorageOpcode) { | ||||
7067 | default: continue; | ||||
7068 | |||||
7069 | case PPC::LWA: | ||||
7070 | case PPC::LD: | ||||
7071 | case PPC::DFLOADf64: | ||||
7072 | case PPC::DFLOADf32: | ||||
7073 | RequiresMod4Offset = true; | ||||
7074 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
7075 | case PPC::LBZ: | ||||
7076 | case PPC::LBZ8: | ||||
7077 | case PPC::LFD: | ||||
7078 | case PPC::LFS: | ||||
7079 | case PPC::LHA: | ||||
7080 | case PPC::LHA8: | ||||
7081 | case PPC::LHZ: | ||||
7082 | case PPC::LHZ8: | ||||
7083 | case PPC::LWZ: | ||||
7084 | case PPC::LWZ8: | ||||
7085 | FirstOp = 0; | ||||
7086 | break; | ||||
7087 | |||||
7088 | case PPC::STD: | ||||
7089 | case PPC::DFSTOREf64: | ||||
7090 | case PPC::DFSTOREf32: | ||||
7091 | RequiresMod4Offset = true; | ||||
7092 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
7093 | case PPC::STB: | ||||
7094 | case PPC::STB8: | ||||
7095 | case PPC::STFD: | ||||
7096 | case PPC::STFS: | ||||
7097 | case PPC::STH: | ||||
7098 | case PPC::STH8: | ||||
7099 | case PPC::STW: | ||||
7100 | case PPC::STW8: | ||||
7101 | FirstOp = 1; | ||||
7102 | break; | ||||
7103 | } | ||||
7104 | |||||
7105 | // If this is a load or store with a zero offset, or within the alignment, | ||||
7106 | // we may be able to fold an add-immediate into the memory operation. | ||||
7107 | // The check against alignment is below, as it can't occur until we check | ||||
7108 | // the arguments to N | ||||
7109 | if (!isa<ConstantSDNode>(N->getOperand(FirstOp))) | ||||
7110 | continue; | ||||
7111 | |||||
7112 | SDValue Base = N->getOperand(FirstOp + 1); | ||||
7113 | if (!Base.isMachineOpcode()) | ||||
7114 | continue; | ||||
7115 | |||||
7116 | unsigned Flags = 0; | ||||
7117 | bool ReplaceFlags = true; | ||||
7118 | |||||
7119 | // When the feeding operation is an add-immediate of some sort, | ||||
7120 | // determine whether we need to add relocation information to the | ||||
7121 | // target flags on the immediate operand when we fold it into the | ||||
7122 | // load instruction. | ||||
7123 | // | ||||
7124 | // For something like ADDItocL, the relocation information is | ||||
7125 | // inferred from the opcode; when we process it in the AsmPrinter, | ||||
7126 | // we add the necessary relocation there. A load, though, can receive | ||||
7127 | // relocation from various flavors of ADDIxxx, so we need to carry | ||||
7128 | // the relocation information in the target flags. | ||||
7129 | switch (Base.getMachineOpcode()) { | ||||
7130 | default: continue; | ||||
7131 | |||||
7132 | case PPC::ADDI8: | ||||
7133 | case PPC::ADDI: | ||||
7134 | // In some cases (such as TLS) the relocation information | ||||
7135 | // is already in place on the operand, so copying the operand | ||||
7136 | // is sufficient. | ||||
7137 | ReplaceFlags = false; | ||||
7138 | // For these cases, the immediate may not be divisible by 4, in | ||||
7139 | // which case the fold is illegal for DS-form instructions. (The | ||||
7140 | // other cases provide aligned addresses and are always safe.) | ||||
7141 | if (RequiresMod4Offset && | ||||
7142 | (!isa<ConstantSDNode>(Base.getOperand(1)) || | ||||
7143 | Base.getConstantOperandVal(1) % 4 != 0)) | ||||
7144 | continue; | ||||
7145 | break; | ||||
7146 | case PPC::ADDIdtprelL: | ||||
7147 | Flags = PPCII::MO_DTPREL_LO; | ||||
7148 | break; | ||||
7149 | case PPC::ADDItlsldL: | ||||
7150 | Flags = PPCII::MO_TLSLD_LO; | ||||
7151 | break; | ||||
7152 | case PPC::ADDItocL: | ||||
7153 | Flags = PPCII::MO_TOC_LO; | ||||
7154 | break; | ||||
7155 | } | ||||
7156 | |||||
7157 | SDValue ImmOpnd = Base.getOperand(1); | ||||
7158 | |||||
7159 | // On PPC64, the TOC base pointer is guaranteed by the ABI only to have | ||||
7160 | // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, | ||||
7161 | // we might have needed different @ha relocation values for the offset | ||||
7162 | // pointers). | ||||
7163 | int MaxDisplacement = 7; | ||||
7164 | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { | ||||
7165 | const GlobalValue *GV = GA->getGlobal(); | ||||
7166 | Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); | ||||
7167 | MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement); | ||||
7168 | } | ||||
7169 | |||||
7170 | bool UpdateHBase = false; | ||||
7171 | SDValue HBase = Base.getOperand(0); | ||||
7172 | |||||
7173 | int Offset = N->getConstantOperandVal(FirstOp); | ||||
7174 | if (ReplaceFlags) { | ||||
7175 | if (Offset < 0 || Offset > MaxDisplacement) { | ||||
7176 | // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only | ||||
7177 | // one use, then we can do this for any offset, we just need to also | ||||
7178 | // update the offset (i.e. the symbol addend) on the addis also. | ||||
7179 | if (Base.getMachineOpcode() != PPC::ADDItocL) | ||||
7180 | continue; | ||||
7181 | |||||
7182 | if (!HBase.isMachineOpcode() || | ||||
7183 | HBase.getMachineOpcode() != PPC::ADDIStocHA8) | ||||
7184 | continue; | ||||
7185 | |||||
7186 | if (!Base.hasOneUse() || !HBase.hasOneUse()) | ||||
7187 | continue; | ||||
7188 | |||||
7189 | SDValue HImmOpnd = HBase.getOperand(1); | ||||
7190 | if (HImmOpnd != ImmOpnd) | ||||
7191 | continue; | ||||
7192 | |||||
7193 | UpdateHBase = true; | ||||
7194 | } | ||||
7195 | } else { | ||||
7196 | // If we're directly folding the addend from an addi instruction, then: | ||||
7197 | // 1. In general, the offset on the memory access must be zero. | ||||
7198 | // 2. If the addend is a constant, then it can be combined with a | ||||
7199 | // non-zero offset, but only if the result meets the encoding | ||||
7200 | // requirements. | ||||
7201 | if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) { | ||||
7202 | Offset += C->getSExtValue(); | ||||
7203 | |||||
7204 | if (RequiresMod4Offset && (Offset % 4) != 0) | ||||
7205 | continue; | ||||
7206 | |||||
7207 | if (!isInt<16>(Offset)) | ||||
7208 | continue; | ||||
7209 | |||||
7210 | ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), | ||||
7211 | ImmOpnd.getValueType()); | ||||
7212 | } else if (Offset != 0) { | ||||
7213 | continue; | ||||
7214 | } | ||||
7215 | } | ||||
7216 | |||||
7217 | // We found an opportunity. Reverse the operands from the add | ||||
7218 | // immediate and substitute them into the load or store. If | ||||
7219 | // needed, update the target flags for the immediate operand to | ||||
7220 | // reflect the necessary relocation information. | ||||
7221 | LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Folding add-immediate into mem-op:\nBase: " ; } } while (false); | ||||
7222 | LLVM_DEBUG(Base->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { Base->dump(CurDAG); } } while (false); | ||||
7223 | LLVM_DEBUG(dbgs() << "\nN: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\nN: "; } } while (false); | ||||
7224 | LLVM_DEBUG(N->dump(CurDAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { N->dump(CurDAG); } } while (false); | ||||
7225 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "\n"; } } while (false); | ||||
7226 | |||||
7227 | // If the relocation information isn't already present on the | ||||
7228 | // immediate operand, add it now. | ||||
7229 | if (ReplaceFlags) { | ||||
7230 | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { | ||||
7231 | SDLoc dl(GA); | ||||
7232 | const GlobalValue *GV = GA->getGlobal(); | ||||
7233 | Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); | ||||
7234 | // We can't perform this optimization for data whose alignment | ||||
7235 | // is insufficient for the instruction encoding. | ||||
7236 | if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) { | ||||
7237 | LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("ppc-codegen")) { dbgs() << "Rejected this candidate for alignment.\n\n" ; } } while (false); | ||||
7238 | continue; | ||||
7239 | } | ||||
7240 | ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); | ||||
7241 | } else if (ConstantPoolSDNode *CP = | ||||
7242 | dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { | ||||
7243 | const Constant *C = CP->getConstVal(); | ||||
7244 | ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), | ||||
7245 | Offset, Flags); | ||||
7246 | } | ||||
7247 | } | ||||
7248 | |||||
7249 | if (FirstOp == 1) // Store | ||||
7250 | (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, | ||||
7251 | Base.getOperand(0), N->getOperand(3)); | ||||
7252 | else // Load | ||||
7253 | (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), | ||||
7254 | N->getOperand(2)); | ||||
7255 | |||||
7256 | if (UpdateHBase) | ||||
7257 | (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), | ||||
7258 | ImmOpnd); | ||||
7259 | |||||
7260 | // The add-immediate may now be dead, in which case remove it. | ||||
7261 | if (Base.getNode()->use_empty()) | ||||
7262 | CurDAG->RemoveDeadNode(Base.getNode()); | ||||
7263 | } | ||||
7264 | } | ||||
7265 | |||||
7266 | /// createPPCISelDag - This pass converts a legalized DAG into a | ||||
7267 | /// PowerPC-specific DAG, ready for instruction scheduling. | ||||
7268 | /// | ||||
7269 | FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, | ||||
7270 | CodeGenOpt::Level OptLevel) { | ||||
7271 | return new PPCDAGToDAGISel(TM, OptLevel); | ||||
7272 | } |
1 | //===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file declares the SDNode class and derived classes, which are used to | |||
10 | // represent the nodes and operations present in a SelectionDAG. These nodes | |||
11 | // and operations are machine code level operations, with some similarities to | |||
12 | // the GCC RTL representation. | |||
13 | // | |||
14 | // Clients should include the SelectionDAG.h file instead of this file directly. | |||
15 | // | |||
16 | //===----------------------------------------------------------------------===// | |||
17 | ||||
18 | #ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H | |||
19 | #define LLVM_CODEGEN_SELECTIONDAGNODES_H | |||
20 | ||||
21 | #include "llvm/ADT/APFloat.h" | |||
22 | #include "llvm/ADT/ArrayRef.h" | |||
23 | #include "llvm/ADT/BitVector.h" | |||
24 | #include "llvm/ADT/FoldingSet.h" | |||
25 | #include "llvm/ADT/GraphTraits.h" | |||
26 | #include "llvm/ADT/SmallPtrSet.h" | |||
27 | #include "llvm/ADT/SmallVector.h" | |||
28 | #include "llvm/ADT/ilist_node.h" | |||
29 | #include "llvm/ADT/iterator.h" | |||
30 | #include "llvm/ADT/iterator_range.h" | |||
31 | #include "llvm/CodeGen/ISDOpcodes.h" | |||
32 | #include "llvm/CodeGen/MachineMemOperand.h" | |||
33 | #include "llvm/CodeGen/Register.h" | |||
34 | #include "llvm/CodeGen/ValueTypes.h" | |||
35 | #include "llvm/IR/Constants.h" | |||
36 | #include "llvm/IR/DebugLoc.h" | |||
37 | #include "llvm/IR/Instruction.h" | |||
38 | #include "llvm/IR/Instructions.h" | |||
39 | #include "llvm/IR/Metadata.h" | |||
40 | #include "llvm/IR/Operator.h" | |||
41 | #include "llvm/Support/AlignOf.h" | |||
42 | #include "llvm/Support/AtomicOrdering.h" | |||
43 | #include "llvm/Support/Casting.h" | |||
44 | #include "llvm/Support/ErrorHandling.h" | |||
45 | #include "llvm/Support/MachineValueType.h" | |||
46 | #include "llvm/Support/TypeSize.h" | |||
47 | #include <algorithm> | |||
48 | #include <cassert> | |||
49 | #include <climits> | |||
50 | #include <cstddef> | |||
51 | #include <cstdint> | |||
52 | #include <cstring> | |||
53 | #include <iterator> | |||
54 | #include <string> | |||
55 | #include <tuple> | |||
56 | ||||
57 | namespace llvm { | |||
58 | ||||
59 | class APInt; | |||
60 | class Constant; | |||
61 | template <typename T> struct DenseMapInfo; | |||
62 | class GlobalValue; | |||
63 | class MachineBasicBlock; | |||
64 | class MachineConstantPoolValue; | |||
65 | class MCSymbol; | |||
66 | class raw_ostream; | |||
67 | class SDNode; | |||
68 | class SelectionDAG; | |||
69 | class Type; | |||
70 | class Value; | |||
71 | ||||
72 | void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr, | |||
73 | bool force = false); | |||
74 | ||||
75 | /// This represents a list of ValueType's that has been intern'd by | |||
76 | /// a SelectionDAG. Instances of this simple value class are returned by | |||
77 | /// SelectionDAG::getVTList(...). | |||
78 | /// | |||
79 | struct SDVTList { | |||
80 | const EVT *VTs; | |||
81 | unsigned int NumVTs; | |||
82 | }; | |||
83 | ||||
84 | namespace ISD { | |||
85 | ||||
86 | /// Node predicates | |||
87 | ||||
88 | /// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the | |||
89 | /// same constant or undefined, return true and return the constant value in | |||
90 | /// \p SplatValue. | |||
91 | bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); | |||
92 | ||||
93 | /// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where | |||
94 | /// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to | |||
95 | /// true, it only checks BUILD_VECTOR. | |||
96 | bool isConstantSplatVectorAllOnes(const SDNode *N, | |||
97 | bool BuildVectorOnly = false); | |||
98 | ||||
99 | /// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where | |||
100 | /// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it | |||
101 | /// only checks BUILD_VECTOR. | |||
102 | bool isConstantSplatVectorAllZeros(const SDNode *N, | |||
103 | bool BuildVectorOnly = false); | |||
104 | ||||
105 | /// Return true if the specified node is a BUILD_VECTOR where all of the | |||
106 | /// elements are ~0 or undef. | |||
107 | bool isBuildVectorAllOnes(const SDNode *N); | |||
108 | ||||
109 | /// Return true if the specified node is a BUILD_VECTOR where all of the | |||
110 | /// elements are 0 or undef. | |||
111 | bool isBuildVectorAllZeros(const SDNode *N); | |||
112 | ||||
113 | /// Return true if the specified node is a BUILD_VECTOR node of all | |||
114 | /// ConstantSDNode or undef. | |||
115 | bool isBuildVectorOfConstantSDNodes(const SDNode *N); | |||
116 | ||||
117 | /// Return true if the specified node is a BUILD_VECTOR node of all | |||
118 | /// ConstantFPSDNode or undef. | |||
119 | bool isBuildVectorOfConstantFPSDNodes(const SDNode *N); | |||
120 | ||||
121 | /// Return true if the node has at least one operand and all operands of the | |||
122 | /// specified node are ISD::UNDEF. | |||
123 | bool allOperandsUndef(const SDNode *N); | |||
124 | ||||
125 | } // end namespace ISD | |||
126 | ||||
127 | //===----------------------------------------------------------------------===// | |||
128 | /// Unlike LLVM values, Selection DAG nodes may return multiple | |||
129 | /// values as the result of a computation. Many nodes return multiple values, | |||
130 | /// from loads (which define a token and a return value) to ADDC (which returns | |||
131 | /// a result and a carry value), to calls (which may return an arbitrary number | |||
132 | /// of values). | |||
133 | /// | |||
134 | /// As such, each use of a SelectionDAG computation must indicate the node that | |||
135 | /// computes it as well as which return value to use from that node. This pair | |||
136 | /// of information is represented with the SDValue value type. | |||
137 | /// | |||
138 | class SDValue { | |||
139 | friend struct DenseMapInfo<SDValue>; | |||
140 | ||||
141 | SDNode *Node = nullptr; // The node defining the value we are using. | |||
142 | unsigned ResNo = 0; // Which return value of the node we are using. | |||
143 | ||||
144 | public: | |||
145 | SDValue() = default; | |||
146 | SDValue(SDNode *node, unsigned resno); | |||
147 | ||||
148 | /// get the index which selects a specific result in the SDNode | |||
149 | unsigned getResNo() const { return ResNo; } | |||
150 | ||||
151 | /// get the SDNode which holds the desired result | |||
152 | SDNode *getNode() const { return Node; } | |||
153 | ||||
154 | /// set the SDNode | |||
155 | void setNode(SDNode *N) { Node = N; } | |||
156 | ||||
157 | inline SDNode *operator->() const { return Node; } | |||
158 | ||||
159 | bool operator==(const SDValue &O) const { | |||
160 | return Node == O.Node && ResNo == O.ResNo; | |||
161 | } | |||
162 | bool operator!=(const SDValue &O) const { | |||
163 | return !operator==(O); | |||
164 | } | |||
165 | bool operator<(const SDValue &O) const { | |||
166 | return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo); | |||
167 | } | |||
168 | explicit operator bool() const { | |||
169 | return Node != nullptr; | |||
170 | } | |||
171 | ||||
172 | SDValue getValue(unsigned R) const { | |||
173 | return SDValue(Node, R); | |||
174 | } | |||
175 | ||||
176 | /// Return true if this node is an operand of N. | |||
177 | bool isOperandOf(const SDNode *N) const; | |||
178 | ||||
179 | /// Return the ValueType of the referenced return value. | |||
180 | inline EVT getValueType() const; | |||
181 | ||||
182 | /// Return the simple ValueType of the referenced return value. | |||
183 | MVT getSimpleValueType() const { | |||
184 | return getValueType().getSimpleVT(); | |||
185 | } | |||
186 | ||||
187 | /// Returns the size of the value in bits. | |||
188 | /// | |||
189 | /// If the value type is a scalable vector type, the scalable property will | |||
190 | /// be set and the runtime size will be a positive integer multiple of the | |||
191 | /// base size. | |||
192 | TypeSize getValueSizeInBits() const { | |||
193 | return getValueType().getSizeInBits(); | |||
194 | } | |||
195 | ||||
196 | uint64_t getScalarValueSizeInBits() const { | |||
197 | return getValueType().getScalarType().getFixedSizeInBits(); | |||
198 | } | |||
199 | ||||
200 | // Forwarding methods - These forward to the corresponding methods in SDNode. | |||
201 | inline unsigned getOpcode() const; | |||
202 | inline unsigned getNumOperands() const; | |||
203 | inline const SDValue &getOperand(unsigned i) const; | |||
204 | inline uint64_t getConstantOperandVal(unsigned i) const; | |||
205 | inline const APInt &getConstantOperandAPInt(unsigned i) const; | |||
206 | inline bool isTargetMemoryOpcode() const; | |||
207 | inline bool isTargetOpcode() const; | |||
208 | inline bool isMachineOpcode() const; | |||
209 | inline bool isUndef() const; | |||
210 | inline unsigned getMachineOpcode() const; | |||
211 | inline const DebugLoc &getDebugLoc() const; | |||
212 | inline void dump() const; | |||
213 | inline void dump(const SelectionDAG *G) const; | |||
214 | inline void dumpr() const; | |||
215 | inline void dumpr(const SelectionDAG *G) const; | |||
216 | ||||
217 | /// Return true if this operand (which must be a chain) reaches the | |||
218 | /// specified operand without crossing any side-effecting instructions. | |||
219 | /// In practice, this looks through token factors and non-volatile loads. | |||
220 | /// In order to remain efficient, this only | |||
221 | /// looks a couple of nodes in, it does not do an exhaustive search. | |||
222 | bool reachesChainWithoutSideEffects(SDValue Dest, | |||
223 | unsigned Depth = 2) const; | |||
224 | ||||
225 | /// Return true if there are no nodes using value ResNo of Node. | |||
226 | inline bool use_empty() const; | |||
227 | ||||
228 | /// Return true if there is exactly one node using value ResNo of Node. | |||
229 | inline bool hasOneUse() const; | |||
230 | }; | |||
231 | ||||
232 | template<> struct DenseMapInfo<SDValue> { | |||
233 | static inline SDValue getEmptyKey() { | |||
234 | SDValue V; | |||
235 | V.ResNo = -1U; | |||
236 | return V; | |||
237 | } | |||
238 | ||||
239 | static inline SDValue getTombstoneKey() { | |||
240 | SDValue V; | |||
241 | V.ResNo = -2U; | |||
242 | return V; | |||
243 | } | |||
244 | ||||
245 | static unsigned getHashValue(const SDValue &Val) { | |||
246 | return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^ | |||
247 | (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo(); | |||
248 | } | |||
249 | ||||
250 | static bool isEqual(const SDValue &LHS, const SDValue &RHS) { | |||
251 | return LHS == RHS; | |||
252 | } | |||
253 | }; | |||
254 | ||||
255 | /// Allow casting operators to work directly on | |||
256 | /// SDValues as if they were SDNode*'s. | |||
257 | template<> struct simplify_type<SDValue> { | |||
258 | using SimpleType = SDNode *; | |||
259 | ||||
260 | static SimpleType getSimplifiedValue(SDValue &Val) { | |||
261 | return Val.getNode(); | |||
262 | } | |||
263 | }; | |||
264 | template<> struct simplify_type<const SDValue> { | |||
265 | using SimpleType = /*const*/ SDNode *; | |||
266 | ||||
267 | static SimpleType getSimplifiedValue(const SDValue &Val) { | |||
268 | return Val.getNode(); | |||
269 | } | |||
270 | }; | |||
271 | ||||
272 | /// Represents a use of a SDNode. This class holds an SDValue, | |||
273 | /// which records the SDNode being used and the result number, a | |||
274 | /// pointer to the SDNode using the value, and Next and Prev pointers, | |||
275 | /// which link together all the uses of an SDNode. | |||
276 | /// | |||
277 | class SDUse { | |||
278 | /// Val - The value being used. | |||
279 | SDValue Val; | |||
280 | /// User - The user of this value. | |||
281 | SDNode *User = nullptr; | |||
282 | /// Prev, Next - Pointers to the uses list of the SDNode referred by | |||
283 | /// this operand. | |||
284 | SDUse **Prev = nullptr; | |||
285 | SDUse *Next = nullptr; | |||
286 | ||||
287 | public: | |||
288 | SDUse() = default; | |||
289 | SDUse(const SDUse &U) = delete; | |||
290 | SDUse &operator=(const SDUse &) = delete; | |||
291 | ||||
292 | /// Normally SDUse will just implicitly convert to an SDValue that it holds. | |||
293 | operator const SDValue&() const { return Val; } | |||
294 | ||||
295 | /// If implicit conversion to SDValue doesn't work, the get() method returns | |||
296 | /// the SDValue. | |||
297 | const SDValue &get() const { return Val; } | |||
298 | ||||
299 | /// This returns the SDNode that contains this Use. | |||
300 | SDNode *getUser() { return User; } | |||
301 | ||||
302 | /// Get the next SDUse in the use list. | |||
303 | SDUse *getNext() const { return Next; } | |||
304 | ||||
305 | /// Convenience function for get().getNode(). | |||
306 | SDNode *getNode() const { return Val.getNode(); } | |||
307 | /// Convenience function for get().getResNo(). | |||
308 | unsigned getResNo() const { return Val.getResNo(); } | |||
309 | /// Convenience function for get().getValueType(). | |||
310 | EVT getValueType() const { return Val.getValueType(); } | |||
311 | ||||
312 | /// Convenience function for get().operator== | |||
313 | bool operator==(const SDValue &V) const { | |||
314 | return Val == V; | |||
315 | } | |||
316 | ||||
317 | /// Convenience function for get().operator!= | |||
318 | bool operator!=(const SDValue &V) const { | |||
319 | return Val != V; | |||
320 | } | |||
321 | ||||
322 | /// Convenience function for get().operator< | |||
323 | bool operator<(const SDValue &V) const { | |||
324 | return Val < V; | |||
325 | } | |||
326 | ||||
327 | private: | |||
328 | friend class SelectionDAG; | |||
329 | friend class SDNode; | |||
330 | // TODO: unfriend HandleSDNode once we fix its operand handling. | |||
331 | friend class HandleSDNode; | |||
332 | ||||
333 | void setUser(SDNode *p) { User = p; } | |||
334 | ||||
335 | /// Remove this use from its existing use list, assign it the | |||
336 | /// given value, and add it to the new value's node's use list. | |||
337 | inline void set(const SDValue &V); | |||
338 | /// Like set, but only supports initializing a newly-allocated | |||
339 | /// SDUse with a non-null value. | |||
340 | inline void setInitial(const SDValue &V); | |||
341 | /// Like set, but only sets the Node portion of the value, | |||
342 | /// leaving the ResNo portion unmodified. | |||
343 | inline void setNode(SDNode *N); | |||
344 | ||||
345 | void addToList(SDUse **List) { | |||
346 | Next = *List; | |||
347 | if (Next) Next->Prev = &Next; | |||
348 | Prev = List; | |||
349 | *List = this; | |||
350 | } | |||
351 | ||||
352 | void removeFromList() { | |||
353 | *Prev = Next; | |||
354 | if (Next) Next->Prev = Prev; | |||
355 | } | |||
356 | }; | |||
357 | ||||
358 | /// simplify_type specializations - Allow casting operators to work directly on | |||
359 | /// SDValues as if they were SDNode*'s. | |||
360 | template<> struct simplify_type<SDUse> { | |||
361 | using SimpleType = SDNode *; | |||
362 | ||||
363 | static SimpleType getSimplifiedValue(SDUse &Val) { | |||
364 | return Val.getNode(); | |||
365 | } | |||
366 | }; | |||
367 | ||||
368 | /// These are IR-level optimization flags that may be propagated to SDNodes. | |||
369 | /// TODO: This data structure should be shared by the IR optimizer and the | |||
370 | /// the backend. | |||
371 | struct SDNodeFlags { | |||
372 | private: | |||
373 | bool NoUnsignedWrap : 1; | |||
374 | bool NoSignedWrap : 1; | |||
375 | bool Exact : 1; | |||
376 | bool NoNaNs : 1; | |||
377 | bool NoInfs : 1; | |||
378 | bool NoSignedZeros : 1; | |||
379 | bool AllowReciprocal : 1; | |||
380 | bool AllowContract : 1; | |||
381 | bool ApproximateFuncs : 1; | |||
382 | bool AllowReassociation : 1; | |||
383 | ||||
384 | // We assume instructions do not raise floating-point exceptions by default, | |||
385 | // and only those marked explicitly may do so. We could choose to represent | |||
386 | // this via a positive "FPExcept" flags like on the MI level, but having a | |||
387 | // negative "NoFPExcept" flag here (that defaults to true) makes the flag | |||
388 | // intersection logic more straightforward. | |||
389 | bool NoFPExcept : 1; | |||
390 | ||||
391 | public: | |||
392 | /// Default constructor turns off all optimization flags. | |||
393 | SDNodeFlags() | |||
394 | : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), | |||
395 | NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), | |||
396 | AllowContract(false), ApproximateFuncs(false), | |||
397 | AllowReassociation(false), NoFPExcept(false) {} | |||
398 | ||||
399 | /// Propagate the fast-math-flags from an IR FPMathOperator. | |||
400 | void copyFMF(const FPMathOperator &FPMO) { | |||
401 | setNoNaNs(FPMO.hasNoNaNs()); | |||
402 | setNoInfs(FPMO.hasNoInfs()); | |||
403 | setNoSignedZeros(FPMO.hasNoSignedZeros()); | |||
404 | setAllowReciprocal(FPMO.hasAllowReciprocal()); | |||
405 | setAllowContract(FPMO.hasAllowContract()); | |||
406 | setApproximateFuncs(FPMO.hasApproxFunc()); | |||
407 | setAllowReassociation(FPMO.hasAllowReassoc()); | |||
408 | } | |||
409 | ||||
410 | // These are mutators for each flag. | |||
411 | void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } | |||
412 | void setNoSignedWrap(bool b) { NoSignedWrap = b; } | |||
413 | void setExact(bool b) { Exact = b; } | |||
414 | void setNoNaNs(bool b) { NoNaNs = b; } | |||
415 | void setNoInfs(bool b) { NoInfs = b; } | |||
416 | void setNoSignedZeros(bool b) { NoSignedZeros = b; } | |||
417 | void setAllowReciprocal(bool b) { AllowReciprocal = b; } | |||
418 | void setAllowContract(bool b) { AllowContract = b; } | |||
419 | void setApproximateFuncs(bool b) { ApproximateFuncs = b; } | |||
420 | void setAllowReassociation(bool b) { AllowReassociation = b; } | |||
421 | void setNoFPExcept(bool b) { NoFPExcept = b; } | |||
422 | ||||
423 | // These are accessors for each flag. | |||
424 | bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } | |||
425 | bool hasNoSignedWrap() const { return NoSignedWrap; } | |||
426 | bool hasExact() const { return Exact; } | |||
427 | bool hasNoNaNs() const { return NoNaNs; } | |||
428 | bool hasNoInfs() const { return NoInfs; } | |||
429 | bool hasNoSignedZeros() const { return NoSignedZeros; } | |||
430 | bool hasAllowReciprocal() const { return AllowReciprocal; } | |||
431 | bool hasAllowContract() const { return AllowContract; } | |||
432 | bool hasApproximateFuncs() const { return ApproximateFuncs; } | |||
433 | bool hasAllowReassociation() const { return AllowReassociation; } | |||
434 | bool hasNoFPExcept() const { return NoFPExcept; } | |||
435 | ||||
436 | /// Clear any flags in this flag set that aren't also set in Flags. All | |||
437 | /// flags will be cleared if Flags are undefined. | |||
438 | void intersectWith(const SDNodeFlags Flags) { | |||
439 | NoUnsignedWrap &= Flags.NoUnsignedWrap; | |||
440 | NoSignedWrap &= Flags.NoSignedWrap; | |||
441 | Exact &= Flags.Exact; | |||
442 | NoNaNs &= Flags.NoNaNs; | |||
443 | NoInfs &= Flags.NoInfs; | |||
444 | NoSignedZeros &= Flags.NoSignedZeros; | |||
445 | AllowReciprocal &= Flags.AllowReciprocal; | |||
446 | AllowContract &= Flags.AllowContract; | |||
447 | ApproximateFuncs &= Flags.ApproximateFuncs; | |||
448 | AllowReassociation &= Flags.AllowReassociation; | |||
449 | NoFPExcept &= Flags.NoFPExcept; | |||
450 | } | |||
451 | }; | |||
452 | ||||
453 | /// Represents one node in the SelectionDAG. | |||
454 | /// | |||
455 | class SDNode : public FoldingSetNode, public ilist_node<SDNode> { | |||
456 | private: | |||
457 | /// The operation that this node performs. | |||
458 | int16_t NodeType; | |||
459 | ||||
460 | protected: | |||
461 | // We define a set of mini-helper classes to help us interpret the bits in our | |||
462 | // SubclassData. These are designed to fit within a uint16_t so they pack | |||
463 | // with NodeType. | |||
464 | ||||
465 | #if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1)) | |||
466 | // Except for GCC; by default, AIX compilers store bit-fields in 4-byte words | |||
467 | // and give the `pack` pragma push semantics. | |||
468 | #define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2) | |||
469 | #define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop) | |||
470 | #else | |||
471 | #define BEGIN_TWO_BYTE_PACK() | |||
472 | #define END_TWO_BYTE_PACK() | |||
473 | #endif | |||
474 | ||||
475 | BEGIN_TWO_BYTE_PACK() | |||
476 | class SDNodeBitfields { | |||
477 | friend class SDNode; | |||
478 | friend class MemIntrinsicSDNode; | |||
479 | friend class MemSDNode; | |||
480 | friend class SelectionDAG; | |||
481 | ||||
482 | uint16_t HasDebugValue : 1; | |||
483 | uint16_t IsMemIntrinsic : 1; | |||
484 | uint16_t IsDivergent : 1; | |||
485 | }; | |||
486 | enum { NumSDNodeBits = 3 }; | |||
487 | ||||
488 | class ConstantSDNodeBitfields { | |||
489 | friend class ConstantSDNode; | |||
490 | ||||
491 | uint16_t : NumSDNodeBits; | |||
492 | ||||
493 | uint16_t IsOpaque : 1; | |||
494 | }; | |||
495 | ||||
496 | class MemSDNodeBitfields { | |||
497 | friend class MemSDNode; | |||
498 | friend class MemIntrinsicSDNode; | |||
499 | friend class AtomicSDNode; | |||
500 | ||||
501 | uint16_t : NumSDNodeBits; | |||
502 | ||||
503 | uint16_t IsVolatile : 1; | |||
504 | uint16_t IsNonTemporal : 1; | |||
505 | uint16_t IsDereferenceable : 1; | |||
506 | uint16_t IsInvariant : 1; | |||
507 | }; | |||
508 | enum { NumMemSDNodeBits = NumSDNodeBits + 4 }; | |||
509 | ||||
510 | class LSBaseSDNodeBitfields { | |||
511 | friend class LSBaseSDNode; | |||
512 | friend class MaskedLoadStoreSDNode; | |||
513 | friend class MaskedGatherScatterSDNode; | |||
514 | ||||
515 | uint16_t : NumMemSDNodeBits; | |||
516 | ||||
517 | // This storage is shared between disparate class hierarchies to hold an | |||
518 | // enumeration specific to the class hierarchy in use. | |||
519 | // LSBaseSDNode => enum ISD::MemIndexedMode | |||
520 | // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode | |||
521 | // MaskedGatherScatterSDNode => enum ISD::MemIndexType | |||
522 | uint16_t AddressingMode : 3; | |||
523 | }; | |||
524 | enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 }; | |||
525 | ||||
526 | class LoadSDNodeBitfields { | |||
527 | friend class LoadSDNode; | |||
528 | friend class MaskedLoadSDNode; | |||
529 | friend class MaskedGatherSDNode; | |||
530 | ||||
531 | uint16_t : NumLSBaseSDNodeBits; | |||
532 | ||||
533 | uint16_t ExtTy : 2; // enum ISD::LoadExtType | |||
534 | uint16_t IsExpanding : 1; | |||
535 | }; | |||
536 | ||||
537 | class StoreSDNodeBitfields { | |||
538 | friend class StoreSDNode; | |||
539 | friend class MaskedStoreSDNode; | |||
540 | friend class MaskedScatterSDNode; | |||
541 | ||||
542 | uint16_t : NumLSBaseSDNodeBits; | |||
543 | ||||
544 | uint16_t IsTruncating : 1; | |||
545 | uint16_t IsCompressing : 1; | |||
546 | }; | |||
547 | ||||
548 | union { | |||
549 | char RawSDNodeBits[sizeof(uint16_t)]; | |||
550 | SDNodeBitfields SDNodeBits; | |||
551 | ConstantSDNodeBitfields ConstantSDNodeBits; | |||
552 | MemSDNodeBitfields MemSDNodeBits; | |||
553 | LSBaseSDNodeBitfields LSBaseSDNodeBits; | |||
554 | LoadSDNodeBitfields LoadSDNodeBits; | |||
555 | StoreSDNodeBitfields StoreSDNodeBits; | |||
556 | }; | |||
557 | END_TWO_BYTE_PACK() | |||
558 | #undef BEGIN_TWO_BYTE_PACK | |||
559 | #undef END_TWO_BYTE_PACK | |||
560 | ||||
561 | // RawSDNodeBits must cover the entirety of the union. This means that all of | |||
562 | // the union's members must have size <= RawSDNodeBits. We write the RHS as | |||
563 | // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter. | |||
564 | static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide"); | |||
565 | static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide"); | |||
566 | static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide"); | |||
567 | static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide"); | |||
568 | static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide"); | |||
569 | static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide"); | |||
570 | ||||
571 | private: | |||
572 | friend class SelectionDAG; | |||
573 | // TODO: unfriend HandleSDNode once we fix its operand handling. | |||
574 | friend class HandleSDNode; | |||
575 | ||||
576 | /// Unique id per SDNode in the DAG. | |||
577 | int NodeId = -1; | |||
578 | ||||
579 | /// The values that are used by this operation. | |||
580 | SDUse *OperandList = nullptr; | |||
581 | ||||
582 | /// The types of the values this node defines. SDNode's may | |||
583 | /// define multiple values simultaneously. | |||
584 | const EVT *ValueList; | |||
585 | ||||
586 | /// List of uses for this SDNode. | |||
587 | SDUse *UseList = nullptr; | |||
588 | ||||
589 | /// The number of entries in the Operand/Value list. | |||
590 | unsigned short NumOperands = 0; | |||
591 | unsigned short NumValues; | |||
592 | ||||
593 | // The ordering of the SDNodes. It roughly corresponds to the ordering of the | |||
594 | // original LLVM instructions. | |||
595 | // This is used for turning off scheduling, because we'll forgo | |||
596 | // the normal scheduling algorithms and output the instructions according to | |||
597 | // this ordering. | |||
598 | unsigned IROrder; | |||
599 | ||||
600 | /// Source line information. | |||
601 | DebugLoc debugLoc; | |||
602 | ||||
603 | /// Return a pointer to the specified value type. | |||
604 | static const EVT *getValueTypeList(EVT VT); | |||
605 | ||||
606 | SDNodeFlags Flags; | |||
607 | ||||
608 | public: | |||
609 | /// Unique and persistent id per SDNode in the DAG. | |||
610 | /// Used for debug printing. | |||
611 | uint16_t PersistentId; | |||
612 | ||||
613 | //===--------------------------------------------------------------------===// | |||
614 | // Accessors | |||
615 | // | |||
616 | ||||
617 | /// Return the SelectionDAG opcode value for this node. For | |||
618 | /// pre-isel nodes (those for which isMachineOpcode returns false), these | |||
619 | /// are the opcode values in the ISD and <target>ISD namespaces. For | |||
620 | /// post-isel opcodes, see getMachineOpcode. | |||
621 | unsigned getOpcode() const { return (unsigned short)NodeType; } | |||
622 | ||||
623 | /// Test if this node has a target-specific opcode (in the | |||
624 | /// \<target\>ISD namespace). | |||
625 | bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; } | |||
626 | ||||
627 | /// Test if this node has a target-specific opcode that may raise | |||
628 | /// FP exceptions (in the \<target\>ISD namespace and greater than | |||
629 | /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory | |||
630 | /// opcode are currently automatically considered to possibly raise | |||
631 | /// FP exceptions as well. | |||
632 | bool isTargetStrictFPOpcode() const { | |||
633 | return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE; | |||
634 | } | |||
635 | ||||
636 | /// Test if this node has a target-specific | |||
637 | /// memory-referencing opcode (in the \<target\>ISD namespace and | |||
638 | /// greater than FIRST_TARGET_MEMORY_OPCODE). | |||
639 | bool isTargetMemoryOpcode() const { | |||
640 | return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE; | |||
641 | } | |||
642 | ||||
643 | /// Return true if the type of the node type undefined. | |||
644 | bool isUndef() const { return NodeType == ISD::UNDEF; } | |||
645 | ||||
646 | /// Test if this node is a memory intrinsic (with valid pointer information). | |||
647 | /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for | |||
648 | /// non-memory intrinsics (with chains) that are not really instances of | |||
649 | /// MemSDNode. For such nodes, we need some extra state to determine the | |||
650 | /// proper classof relationship. | |||
651 | bool isMemIntrinsic() const { | |||
652 | return (NodeType == ISD::INTRINSIC_W_CHAIN || | |||
653 | NodeType == ISD::INTRINSIC_VOID) && | |||
654 | SDNodeBits.IsMemIntrinsic; | |||
655 | } | |||
656 | ||||
657 | /// Test if this node is a strict floating point pseudo-op. | |||
658 | bool isStrictFPOpcode() { | |||
659 | switch (NodeType) { | |||
660 | default: | |||
661 | return false; | |||
662 | case ISD::STRICT_FP16_TO_FP: | |||
663 | case ISD::STRICT_FP_TO_FP16: | |||
664 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ | |||
665 | case ISD::STRICT_##DAGN: | |||
666 | #include "llvm/IR/ConstrainedOps.def" | |||
667 | return true; | |||
668 | } | |||
669 | } | |||
670 | ||||
671 | /// Test if this node has a post-isel opcode, directly | |||
672 | /// corresponding to a MachineInstr opcode. | |||
673 | bool isMachineOpcode() const { return NodeType < 0; } | |||
674 | ||||
675 | /// This may only be called if isMachineOpcode returns | |||
676 | /// true. It returns the MachineInstr opcode value that the node's opcode | |||
677 | /// corresponds to. | |||
678 | unsigned getMachineOpcode() const { | |||
679 | assert(isMachineOpcode() && "Not a MachineInstr opcode!")(static_cast <bool> (isMachineOpcode() && "Not a MachineInstr opcode!" ) ? void (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 679, __extension__ __PRETTY_FUNCTION__)); | |||
680 | return ~NodeType; | |||
681 | } | |||
682 | ||||
683 | bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; } | |||
684 | void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; } | |||
685 | ||||
686 | bool isDivergent() const { return SDNodeBits.IsDivergent; } | |||
687 | ||||
688 | /// Return true if there are no uses of this node. | |||
689 | bool use_empty() const { return UseList == nullptr; } | |||
690 | ||||
691 | /// Return true if there is exactly one use of this node. | |||
692 | bool hasOneUse() const { return hasSingleElement(uses()); } | |||
693 | ||||
694 | /// Return the number of uses of this node. This method takes | |||
695 | /// time proportional to the number of uses. | |||
696 | size_t use_size() const { return std::distance(use_begin(), use_end()); } | |||
697 | ||||
698 | /// Return the unique node id. | |||
699 | int getNodeId() const { return NodeId; } | |||
700 | ||||
701 | /// Set unique node id. | |||
702 | void setNodeId(int Id) { NodeId = Id; } | |||
703 | ||||
704 | /// Return the node ordering. | |||
705 | unsigned getIROrder() const { return IROrder; } | |||
706 | ||||
707 | /// Set the node ordering. | |||
708 | void setIROrder(unsigned Order) { IROrder = Order; } | |||
709 | ||||
710 | /// Return the source location info. | |||
711 | const DebugLoc &getDebugLoc() const { return debugLoc; } | |||
712 | ||||
713 | /// Set source location info. Try to avoid this, putting | |||
714 | /// it in the constructor is preferable. | |||
715 | void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); } | |||
716 | ||||
717 | /// This class provides iterator support for SDUse | |||
718 | /// operands that use a specific SDNode. | |||
719 | class use_iterator { | |||
720 | friend class SDNode; | |||
721 | ||||
722 | SDUse *Op = nullptr; | |||
723 | ||||
724 | explicit use_iterator(SDUse *op) : Op(op) {} | |||
725 | ||||
726 | public: | |||
727 | using iterator_category = std::forward_iterator_tag; | |||
728 | using value_type = SDUse; | |||
729 | using difference_type = std::ptrdiff_t; | |||
730 | using pointer = value_type *; | |||
731 | using reference = value_type &; | |||
732 | ||||
733 | use_iterator() = default; | |||
734 | use_iterator(const use_iterator &I) : Op(I.Op) {} | |||
735 | ||||
736 | bool operator==(const use_iterator &x) const { | |||
737 | return Op == x.Op; | |||
738 | } | |||
739 | bool operator!=(const use_iterator &x) const { | |||
740 | return !operator==(x); | |||
741 | } | |||
742 | ||||
743 | /// Return true if this iterator is at the end of uses list. | |||
744 | bool atEnd() const { return Op == nullptr; } | |||
745 | ||||
746 | // Iterator traversal: forward iteration only. | |||
747 | use_iterator &operator++() { // Preincrement | |||
748 | assert(Op && "Cannot increment end iterator!")(static_cast <bool> (Op && "Cannot increment end iterator!" ) ? void (0) : __assert_fail ("Op && \"Cannot increment end iterator!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 748, __extension__ __PRETTY_FUNCTION__)); | |||
749 | Op = Op->getNext(); | |||
750 | return *this; | |||
751 | } | |||
752 | ||||
753 | use_iterator operator++(int) { // Postincrement | |||
754 | use_iterator tmp = *this; ++*this; return tmp; | |||
755 | } | |||
756 | ||||
757 | /// Retrieve a pointer to the current user node. | |||
758 | SDNode *operator*() const { | |||
759 | assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!" ) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 759, __extension__ __PRETTY_FUNCTION__)); | |||
760 | return Op->getUser(); | |||
761 | } | |||
762 | ||||
763 | SDNode *operator->() const { return operator*(); } | |||
764 | ||||
765 | SDUse &getUse() const { return *Op; } | |||
766 | ||||
767 | /// Retrieve the operand # of this use in its user. | |||
768 | unsigned getOperandNo() const { | |||
769 | assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!" ) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 769, __extension__ __PRETTY_FUNCTION__)); | |||
770 | return (unsigned)(Op - Op->getUser()->OperandList); | |||
771 | } | |||
772 | }; | |||
773 | ||||
774 | /// Provide iteration support to walk over all uses of an SDNode. | |||
775 | use_iterator use_begin() const { | |||
776 | return use_iterator(UseList); | |||
777 | } | |||
778 | ||||
779 | static use_iterator use_end() { return use_iterator(nullptr); } | |||
780 | ||||
781 | inline iterator_range<use_iterator> uses() { | |||
782 | return make_range(use_begin(), use_end()); | |||
783 | } | |||
784 | inline iterator_range<use_iterator> uses() const { | |||
785 | return make_range(use_begin(), use_end()); | |||
786 | } | |||
787 | ||||
788 | /// Return true if there are exactly NUSES uses of the indicated value. | |||
789 | /// This method ignores uses of other values defined by this operation. | |||
790 | bool hasNUsesOfValue(unsigned NUses, unsigned Value) const; | |||
791 | ||||
792 | /// Return true if there are any use of the indicated value. | |||
793 | /// This method ignores uses of other values defined by this operation. | |||
794 | bool hasAnyUseOfValue(unsigned Value) const; | |||
795 | ||||
796 | /// Return true if this node is the only use of N. | |||
797 | bool isOnlyUserOf(const SDNode *N) const; | |||
798 | ||||
799 | /// Return true if this node is an operand of N. | |||
800 | bool isOperandOf(const SDNode *N) const; | |||
801 | ||||
802 | /// Return true if this node is a predecessor of N. | |||
803 | /// NOTE: Implemented on top of hasPredecessor and every bit as | |||
804 | /// expensive. Use carefully. | |||
805 | bool isPredecessorOf(const SDNode *N) const { | |||
806 | return N->hasPredecessor(this); | |||
807 | } | |||
808 | ||||
809 | /// Return true if N is a predecessor of this node. | |||
810 | /// N is either an operand of this node, or can be reached by recursively | |||
811 | /// traversing up the operands. | |||
812 | /// NOTE: This is an expensive method. Use it carefully. | |||
813 | bool hasPredecessor(const SDNode *N) const; | |||
814 | ||||
815 | /// Returns true if N is a predecessor of any node in Worklist. This | |||
816 | /// helper keeps Visited and Worklist sets externally to allow unions | |||
817 | /// searches to be performed in parallel, caching of results across | |||
818 | /// queries and incremental addition to Worklist. Stops early if N is | |||
819 | /// found but will resume. Remember to clear Visited and Worklists | |||
820 | /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before | |||
821 | /// giving up. The TopologicalPrune flag signals that positive NodeIds are | |||
822 | /// topologically ordered (Operands have strictly smaller node id) and search | |||
823 | /// can be pruned leveraging this. | |||
824 | static bool hasPredecessorHelper(const SDNode *N, | |||
825 | SmallPtrSetImpl<const SDNode *> &Visited, | |||
826 | SmallVectorImpl<const SDNode *> &Worklist, | |||
827 | unsigned int MaxSteps = 0, | |||
828 | bool TopologicalPrune = false) { | |||
829 | SmallVector<const SDNode *, 8> DeferredNodes; | |||
830 | if (Visited.count(N)) | |||
831 | return true; | |||
832 | ||||
833 | // Node Id's are assigned in three places: As a topological | |||
834 | // ordering (> 0), during legalization (results in values set to | |||
835 | // 0), new nodes (set to -1). If N has a topolgical id then we | |||
836 | // know that all nodes with ids smaller than it cannot be | |||
837 | // successors and we need not check them. Filter out all node | |||
838 | // that can't be matches. We add them to the worklist before exit | |||
839 | // in case of multiple calls. Note that during selection the topological id | |||
840 | // may be violated if a node's predecessor is selected before it. We mark | |||
841 | // this at selection negating the id of unselected successors and | |||
842 | // restricting topological pruning to positive ids. | |||
843 | ||||
844 | int NId = N->getNodeId(); | |||
845 | // If we Invalidated the Id, reconstruct original NId. | |||
846 | if (NId < -1) | |||
847 | NId = -(NId + 1); | |||
848 | ||||
849 | bool Found = false; | |||
850 | while (!Worklist.empty()) { | |||
851 | const SDNode *M = Worklist.pop_back_val(); | |||
852 | int MId = M->getNodeId(); | |||
853 | if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) && | |||
854 | (MId > 0) && (MId < NId)) { | |||
855 | DeferredNodes.push_back(M); | |||
856 | continue; | |||
857 | } | |||
858 | for (const SDValue &OpV : M->op_values()) { | |||
859 | SDNode *Op = OpV.getNode(); | |||
860 | if (Visited.insert(Op).second) | |||
861 | Worklist.push_back(Op); | |||
862 | if (Op == N) | |||
863 | Found = true; | |||
864 | } | |||
865 | if (Found) | |||
866 | break; | |||
867 | if (MaxSteps != 0 && Visited.size() >= MaxSteps) | |||
868 | break; | |||
869 | } | |||
870 | // Push deferred nodes back on worklist. | |||
871 | Worklist.append(DeferredNodes.begin(), DeferredNodes.end()); | |||
872 | // If we bailed early, conservatively return found. | |||
873 | if (MaxSteps != 0 && Visited.size() >= MaxSteps) | |||
874 | return true; | |||
875 | return Found; | |||
876 | } | |||
877 | ||||
878 | /// Return true if all the users of N are contained in Nodes. | |||
879 | /// NOTE: Requires at least one match, but doesn't require them all. | |||
880 | static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N); | |||
881 | ||||
882 | /// Return the number of values used by this operation. | |||
883 | unsigned getNumOperands() const { return NumOperands; } | |||
884 | ||||
885 | /// Return the maximum number of operands that a SDNode can hold. | |||
886 | static constexpr size_t getMaxNumOperands() { | |||
887 | return std::numeric_limits<decltype(SDNode::NumOperands)>::max(); | |||
888 | } | |||
889 | ||||
890 | /// Helper method returns the integer value of a ConstantSDNode operand. | |||
891 | inline uint64_t getConstantOperandVal(unsigned Num) const; | |||
892 | ||||
893 | /// Helper method returns the APInt of a ConstantSDNode operand. | |||
894 | inline const APInt &getConstantOperandAPInt(unsigned Num) const; | |||
895 | ||||
896 | const SDValue &getOperand(unsigned Num) const { | |||
897 | assert(Num < NumOperands && "Invalid child # of SDNode!")(static_cast <bool> (Num < NumOperands && "Invalid child # of SDNode!" ) ? void (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 897, __extension__ __PRETTY_FUNCTION__)); | |||
898 | return OperandList[Num]; | |||
899 | } | |||
900 | ||||
901 | using op_iterator = SDUse *; | |||
902 | ||||
903 | op_iterator op_begin() const { return OperandList; } | |||
904 | op_iterator op_end() const { return OperandList+NumOperands; } | |||
905 | ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); } | |||
906 | ||||
907 | /// Iterator for directly iterating over the operand SDValue's. | |||
908 | struct value_op_iterator | |||
909 | : iterator_adaptor_base<value_op_iterator, op_iterator, | |||
910 | std::random_access_iterator_tag, SDValue, | |||
911 | ptrdiff_t, value_op_iterator *, | |||
912 | value_op_iterator *> { | |||
913 | explicit value_op_iterator(SDUse *U = nullptr) | |||
914 | : iterator_adaptor_base(U) {} | |||
915 | ||||
916 | const SDValue &operator*() const { return I->get(); } | |||
917 | }; | |||
918 | ||||
919 | iterator_range<value_op_iterator> op_values() const { | |||
920 | return make_range(value_op_iterator(op_begin()), | |||
921 | value_op_iterator(op_end())); | |||
922 | } | |||
923 | ||||
924 | SDVTList getVTList() const { | |||
925 | SDVTList X = { ValueList, NumValues }; | |||
926 | return X; | |||
927 | } | |||
928 | ||||
929 | /// If this node has a glue operand, return the node | |||
930 | /// to which the glue operand points. Otherwise return NULL. | |||
931 | SDNode *getGluedNode() const { | |||
932 | if (getNumOperands() != 0 && | |||
933 | getOperand(getNumOperands()-1).getValueType() == MVT::Glue) | |||
934 | return getOperand(getNumOperands()-1).getNode(); | |||
935 | return nullptr; | |||
936 | } | |||
937 | ||||
938 | /// If this node has a glue value with a user, return | |||
939 | /// the user (there is at most one). Otherwise return NULL. | |||
940 | SDNode *getGluedUser() const { | |||
941 | for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI) | |||
942 | if (UI.getUse().get().getValueType() == MVT::Glue) | |||
943 | return *UI; | |||
944 | return nullptr; | |||
945 | } | |||
946 | ||||
947 | SDNodeFlags getFlags() const { return Flags; } | |||
948 | void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; } | |||
949 | ||||
950 | /// Clear any flags in this node that aren't also set in Flags. | |||
951 | /// If Flags is not in a defined state then this has no effect. | |||
952 | void intersectFlagsWith(const SDNodeFlags Flags); | |||
953 | ||||
954 | /// Return the number of values defined/returned by this operator. | |||
955 | unsigned getNumValues() const { return NumValues; } | |||
956 | ||||
957 | /// Return the type of a specified result. | |||
958 | EVT getValueType(unsigned ResNo) const { | |||
959 | assert(ResNo < NumValues && "Illegal result number!")(static_cast <bool> (ResNo < NumValues && "Illegal result number!" ) ? void (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 959, __extension__ __PRETTY_FUNCTION__)); | |||
960 | return ValueList[ResNo]; | |||
961 | } | |||
962 | ||||
963 | /// Return the type of a specified result as a simple type. | |||
964 | MVT getSimpleValueType(unsigned ResNo) const { | |||
965 | return getValueType(ResNo).getSimpleVT(); | |||
966 | } | |||
967 | ||||
968 | /// Returns MVT::getSizeInBits(getValueType(ResNo)). | |||
969 | /// | |||
970 | /// If the value type is a scalable vector type, the scalable property will | |||
971 | /// be set and the runtime size will be a positive integer multiple of the | |||
972 | /// base size. | |||
973 | TypeSize getValueSizeInBits(unsigned ResNo) const { | |||
974 | return getValueType(ResNo).getSizeInBits(); | |||
975 | } | |||
976 | ||||
977 | using value_iterator = const EVT *; | |||
978 | ||||
979 | value_iterator value_begin() const { return ValueList; } | |||
980 | value_iterator value_end() const { return ValueList+NumValues; } | |||
981 | iterator_range<value_iterator> values() const { | |||
982 | return llvm::make_range(value_begin(), value_end()); | |||
983 | } | |||
984 | ||||
985 | /// Return the opcode of this operation for printing. | |||
986 | std::string getOperationName(const SelectionDAG *G = nullptr) const; | |||
987 | static const char* getIndexedModeName(ISD::MemIndexedMode AM); | |||
988 | void print_types(raw_ostream &OS, const SelectionDAG *G) const; | |||
989 | void print_details(raw_ostream &OS, const SelectionDAG *G) const; | |||
990 | void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const; | |||
991 | void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const; | |||
992 | ||||
993 | /// Print a SelectionDAG node and all children down to | |||
994 | /// the leaves. The given SelectionDAG allows target-specific nodes | |||
995 | /// to be printed in human-readable form. Unlike printr, this will | |||
996 | /// print the whole DAG, including children that appear multiple | |||
997 | /// times. | |||
998 | /// | |||
999 | void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const; | |||
1000 | ||||
1001 | /// Print a SelectionDAG node and children up to | |||
1002 | /// depth "depth." The given SelectionDAG allows target-specific | |||
1003 | /// nodes to be printed in human-readable form. Unlike printr, this | |||
1004 | /// will print children that appear multiple times wherever they are | |||
1005 | /// used. | |||
1006 | /// | |||
1007 | void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr, | |||
1008 | unsigned depth = 100) const; | |||
1009 | ||||
1010 | /// Dump this node, for debugging. | |||
1011 | void dump() const; | |||
1012 | ||||
1013 | /// Dump (recursively) this node and its use-def subgraph. | |||
1014 | void dumpr() const; | |||
1015 | ||||
1016 | /// Dump this node, for debugging. | |||
1017 | /// The given SelectionDAG allows target-specific nodes to be printed | |||
1018 | /// in human-readable form. | |||
1019 | void dump(const SelectionDAG *G) const; | |||
1020 | ||||
1021 | /// Dump (recursively) this node and its use-def subgraph. | |||
1022 | /// The given SelectionDAG allows target-specific nodes to be printed | |||
1023 | /// in human-readable form. | |||
1024 | void dumpr(const SelectionDAG *G) const; | |||
1025 | ||||
1026 | /// printrFull to dbgs(). The given SelectionDAG allows | |||
1027 | /// target-specific nodes to be printed in human-readable form. | |||
1028 | /// Unlike dumpr, this will print the whole DAG, including children | |||
1029 | /// that appear multiple times. | |||
1030 | void dumprFull(const SelectionDAG *G = nullptr) const; | |||
1031 | ||||
1032 | /// printrWithDepth to dbgs(). The given | |||
1033 | /// SelectionDAG allows target-specific nodes to be printed in | |||
1034 | /// human-readable form. Unlike dumpr, this will print children | |||
1035 | /// that appear multiple times wherever they are used. | |||
1036 | /// | |||
1037 | void dumprWithDepth(const SelectionDAG *G = nullptr, | |||
1038 | unsigned depth = 100) const; | |||
1039 | ||||
1040 | /// Gather unique data for the node. | |||
1041 | void Profile(FoldingSetNodeID &ID) const; | |||
1042 | ||||
1043 | /// This method should only be used by the SDUse class. | |||
1044 | void addUse(SDUse &U) { U.addToList(&UseList); } | |||
1045 | ||||
1046 | protected: | |||
1047 | static SDVTList getSDVTList(EVT VT) { | |||
1048 | SDVTList Ret = { getValueTypeList(VT), 1 }; | |||
1049 | return Ret; | |||
1050 | } | |||
1051 | ||||
1052 | /// Create an SDNode. | |||
1053 | /// | |||
1054 | /// SDNodes are created without any operands, and never own the operand | |||
1055 | /// storage. To add operands, see SelectionDAG::createOperands. | |||
1056 | SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs) | |||
1057 | : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs), | |||
1058 | IROrder(Order), debugLoc(std::move(dl)) { | |||
1059 | memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits)); | |||
1060 | assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")(static_cast <bool> (debugLoc.hasTrivialDestructor() && "Expected trivial destructor") ? void (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1060, __extension__ __PRETTY_FUNCTION__)); | |||
1061 | assert(NumValues == VTs.NumVTs &&(static_cast <bool> (NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!") ? void (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1062, __extension__ __PRETTY_FUNCTION__)) | |||
1062 | "NumValues wasn't wide enough for its operands!")(static_cast <bool> (NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!") ? void (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1062, __extension__ __PRETTY_FUNCTION__)); | |||
1063 | } | |||
1064 | ||||
1065 | /// Release the operands and set this node to have zero operands. | |||
1066 | void DropOperands(); | |||
1067 | }; | |||
1068 | ||||
1069 | /// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed | |||
1070 | /// into SDNode creation functions. | |||
1071 | /// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted | |||
1072 | /// from the original Instruction, and IROrder is the ordinal position of | |||
1073 | /// the instruction. | |||
1074 | /// When an SDNode is created after the DAG is being built, both DebugLoc and | |||
1075 | /// the IROrder are propagated from the original SDNode. | |||
1076 | /// So SDLoc class provides two constructors besides the default one, one to | |||
1077 | /// be used by the DAGBuilder, the other to be used by others. | |||
1078 | class SDLoc { | |||
1079 | private: | |||
1080 | DebugLoc DL; | |||
1081 | int IROrder = 0; | |||
1082 | ||||
1083 | public: | |||
1084 | SDLoc() = default; | |||
1085 | SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {} | |||
1086 | SDLoc(const SDValue V) : SDLoc(V.getNode()) {} | |||
1087 | SDLoc(const Instruction *I, int Order) : IROrder(Order) { | |||
1088 | assert(Order >= 0 && "bad IROrder")(static_cast <bool> (Order >= 0 && "bad IROrder" ) ? void (0) : __assert_fail ("Order >= 0 && \"bad IROrder\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1088, __extension__ __PRETTY_FUNCTION__)); | |||
1089 | if (I) | |||
1090 | DL = I->getDebugLoc(); | |||
1091 | } | |||
1092 | ||||
1093 | unsigned getIROrder() const { return IROrder; } | |||
1094 | const DebugLoc &getDebugLoc() const { return DL; } | |||
1095 | }; | |||
1096 | ||||
1097 | // Define inline functions from the SDValue class. | |||
1098 | ||||
1099 | inline SDValue::SDValue(SDNode *node, unsigned resno) | |||
1100 | : Node(node), ResNo(resno) { | |||
1101 | // Explicitly check for !ResNo to avoid use-after-free, because there are | |||
1102 | // callers that use SDValue(N, 0) with a deleted N to indicate successful | |||
1103 | // combines. | |||
1104 | assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(static_cast <bool> ((!Node || !ResNo || ResNo < Node ->getNumValues()) && "Invalid result number for the given node!" ) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1105, __extension__ __PRETTY_FUNCTION__)) | |||
1105 | "Invalid result number for the given node!")(static_cast <bool> ((!Node || !ResNo || ResNo < Node ->getNumValues()) && "Invalid result number for the given node!" ) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1105, __extension__ __PRETTY_FUNCTION__)); | |||
1106 | assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")(static_cast <bool> (ResNo < -2U && "Cannot use result numbers reserved for DenseMaps." ) ? void (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1106, __extension__ __PRETTY_FUNCTION__)); | |||
1107 | } | |||
1108 | ||||
1109 | inline unsigned SDValue::getOpcode() const { | |||
1110 | return Node->getOpcode(); | |||
| ||||
1111 | } | |||
1112 | ||||
1113 | inline EVT SDValue::getValueType() const { | |||
1114 | return Node->getValueType(ResNo); | |||
1115 | } | |||
1116 | ||||
1117 | inline unsigned SDValue::getNumOperands() const { | |||
1118 | return Node->getNumOperands(); | |||
1119 | } | |||
1120 | ||||
1121 | inline const SDValue &SDValue::getOperand(unsigned i) const { | |||
1122 | return Node->getOperand(i); | |||
1123 | } | |||
1124 | ||||
1125 | inline uint64_t SDValue::getConstantOperandVal(unsigned i) const { | |||
1126 | return Node->getConstantOperandVal(i); | |||
1127 | } | |||
1128 | ||||
1129 | inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const { | |||
1130 | return Node->getConstantOperandAPInt(i); | |||
1131 | } | |||
1132 | ||||
1133 | inline bool SDValue::isTargetOpcode() const { | |||
1134 | return Node->isTargetOpcode(); | |||
1135 | } | |||
1136 | ||||
1137 | inline bool SDValue::isTargetMemoryOpcode() const { | |||
1138 | return Node->isTargetMemoryOpcode(); | |||
1139 | } | |||
1140 | ||||
1141 | inline bool SDValue::isMachineOpcode() const { | |||
1142 | return Node->isMachineOpcode(); | |||
1143 | } | |||
1144 | ||||
1145 | inline unsigned SDValue::getMachineOpcode() const { | |||
1146 | return Node->getMachineOpcode(); | |||
1147 | } | |||
1148 | ||||
1149 | inline bool SDValue::isUndef() const { | |||
1150 | return Node->isUndef(); | |||
1151 | } | |||
1152 | ||||
1153 | inline bool SDValue::use_empty() const { | |||
1154 | return !Node->hasAnyUseOfValue(ResNo); | |||
1155 | } | |||
1156 | ||||
1157 | inline bool SDValue::hasOneUse() const { | |||
1158 | return Node->hasNUsesOfValue(1, ResNo); | |||
1159 | } | |||
1160 | ||||
1161 | inline const DebugLoc &SDValue::getDebugLoc() const { | |||
1162 | return Node->getDebugLoc(); | |||
1163 | } | |||
1164 | ||||
1165 | inline void SDValue::dump() const { | |||
1166 | return Node->dump(); | |||
1167 | } | |||
1168 | ||||
1169 | inline void SDValue::dump(const SelectionDAG *G) const { | |||
1170 | return Node->dump(G); | |||
1171 | } | |||
1172 | ||||
1173 | inline void SDValue::dumpr() const { | |||
1174 | return Node->dumpr(); | |||
1175 | } | |||
1176 | ||||
1177 | inline void SDValue::dumpr(const SelectionDAG *G) const { | |||
1178 | return Node->dumpr(G); | |||
1179 | } | |||
1180 | ||||
1181 | // Define inline functions from the SDUse class. | |||
1182 | ||||
1183 | inline void SDUse::set(const SDValue &V) { | |||
1184 | if (Val.getNode()) removeFromList(); | |||
1185 | Val = V; | |||
1186 | if (V.getNode()) V.getNode()->addUse(*this); | |||
1187 | } | |||
1188 | ||||
1189 | inline void SDUse::setInitial(const SDValue &V) { | |||
1190 | Val = V; | |||
1191 | V.getNode()->addUse(*this); | |||
1192 | } | |||
1193 | ||||
1194 | inline void SDUse::setNode(SDNode *N) { | |||
1195 | if (Val.getNode()) removeFromList(); | |||
1196 | Val.setNode(N); | |||
1197 | if (N) N->addUse(*this); | |||
1198 | } | |||
1199 | ||||
1200 | /// This class is used to form a handle around another node that | |||
1201 | /// is persistent and is updated across invocations of replaceAllUsesWith on its | |||
1202 | /// operand. This node should be directly created by end-users and not added to | |||
1203 | /// the AllNodes list. | |||
1204 | class HandleSDNode : public SDNode { | |||
1205 | SDUse Op; | |||
1206 | ||||
1207 | public: | |||
1208 | explicit HandleSDNode(SDValue X) | |||
1209 | : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) { | |||
1210 | // HandleSDNodes are never inserted into the DAG, so they won't be | |||
1211 | // auto-numbered. Use ID 65535 as a sentinel. | |||
1212 | PersistentId = 0xffff; | |||
1213 | ||||
1214 | // Manually set up the operand list. This node type is special in that it's | |||
1215 | // always stack allocated and SelectionDAG does not manage its operands. | |||
1216 | // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not | |||
1217 | // be so special. | |||
1218 | Op.setUser(this); | |||
1219 | Op.setInitial(X); | |||
1220 | NumOperands = 1; | |||
1221 | OperandList = &Op; | |||
1222 | } | |||
1223 | ~HandleSDNode(); | |||
1224 | ||||
1225 | const SDValue &getValue() const { return Op; } | |||
1226 | }; | |||
1227 | ||||
1228 | class AddrSpaceCastSDNode : public SDNode { | |||
1229 | private: | |||
1230 | unsigned SrcAddrSpace; | |||
1231 | unsigned DestAddrSpace; | |||
1232 | ||||
1233 | public: | |||
1234 | AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT, | |||
1235 | unsigned SrcAS, unsigned DestAS); | |||
1236 | ||||
1237 | unsigned getSrcAddressSpace() const { return SrcAddrSpace; } | |||
1238 | unsigned getDestAddressSpace() const { return DestAddrSpace; } | |||
1239 | ||||
1240 | static bool classof(const SDNode *N) { | |||
1241 | return N->getOpcode() == ISD::ADDRSPACECAST; | |||
1242 | } | |||
1243 | }; | |||
1244 | ||||
1245 | /// This is an abstract virtual class for memory operations. | |||
1246 | class MemSDNode : public SDNode { | |||
1247 | private: | |||
1248 | // VT of in-memory value. | |||
1249 | EVT MemoryVT; | |||
1250 | ||||
1251 | protected: | |||
1252 | /// Memory reference information. | |||
1253 | MachineMemOperand *MMO; | |||
1254 | ||||
1255 | public: | |||
1256 | MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
1257 | EVT memvt, MachineMemOperand *MMO); | |||
1258 | ||||
1259 | bool readMem() const { return MMO->isLoad(); } | |||
1260 | bool writeMem() const { return MMO->isStore(); } | |||
1261 | ||||
1262 | /// Returns alignment and volatility of the memory access | |||
1263 | Align getOriginalAlign() const { return MMO->getBaseAlign(); } | |||
1264 | Align getAlign() const { return MMO->getAlign(); } | |||
1265 | // FIXME: Remove once transition to getAlign is over. | |||
1266 | unsigned getAlignment() const { return MMO->getAlign().value(); } | |||
1267 | ||||
1268 | /// Return the SubclassData value, without HasDebugValue. This contains an | |||
1269 | /// encoding of the volatile flag, as well as bits used by subclasses. This | |||
1270 | /// function should only be used to compute a FoldingSetNodeID value. | |||
1271 | /// The HasDebugValue bit is masked out because CSE map needs to match | |||
1272 | /// nodes with debug info with nodes without debug info. Same is about | |||
1273 | /// isDivergent bit. | |||
1274 | unsigned getRawSubclassData() const { | |||
1275 | uint16_t Data; | |||
1276 | union { | |||
1277 | char RawSDNodeBits[sizeof(uint16_t)]; | |||
1278 | SDNodeBitfields SDNodeBits; | |||
1279 | }; | |||
1280 | memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits)); | |||
1281 | SDNodeBits.HasDebugValue = 0; | |||
1282 | SDNodeBits.IsDivergent = false; | |||
1283 | memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits)); | |||
1284 | return Data; | |||
1285 | } | |||
1286 | ||||
1287 | bool isVolatile() const { return MemSDNodeBits.IsVolatile; } | |||
1288 | bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; } | |||
1289 | bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; } | |||
1290 | bool isInvariant() const { return MemSDNodeBits.IsInvariant; } | |||
1291 | ||||
1292 | // Returns the offset from the location of the access. | |||
1293 | int64_t getSrcValueOffset() const { return MMO->getOffset(); } | |||
1294 | ||||
1295 | /// Returns the AA info that describes the dereference. | |||
1296 | AAMDNodes getAAInfo() const { return MMO->getAAInfo(); } | |||
1297 | ||||
1298 | /// Returns the Ranges that describes the dereference. | |||
1299 | const MDNode *getRanges() const { return MMO->getRanges(); } | |||
1300 | ||||
1301 | /// Returns the synchronization scope ID for this memory operation. | |||
1302 | SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); } | |||
1303 | ||||
1304 | /// Return the atomic ordering requirements for this memory operation. For | |||
1305 | /// cmpxchg atomic operations, return the atomic ordering requirements when | |||
1306 | /// store occurs. | |||
1307 | AtomicOrdering getSuccessOrdering() const { | |||
1308 | return MMO->getSuccessOrdering(); | |||
1309 | } | |||
1310 | ||||
1311 | /// Return a single atomic ordering that is at least as strong as both the | |||
1312 | /// success and failure orderings for an atomic operation. (For operations | |||
1313 | /// other than cmpxchg, this is equivalent to getSuccessOrdering().) | |||
1314 | AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); } | |||
1315 | ||||
1316 | /// Return true if the memory operation ordering is Unordered or higher. | |||
1317 | bool isAtomic() const { return MMO->isAtomic(); } | |||
1318 | ||||
1319 | /// Returns true if the memory operation doesn't imply any ordering | |||
1320 | /// constraints on surrounding memory operations beyond the normal memory | |||
1321 | /// aliasing rules. | |||
1322 | bool isUnordered() const { return MMO->isUnordered(); } | |||
1323 | ||||
1324 | /// Returns true if the memory operation is neither atomic or volatile. | |||
1325 | bool isSimple() const { return !isAtomic() && !isVolatile(); } | |||
1326 | ||||
1327 | /// Return the type of the in-memory value. | |||
1328 | EVT getMemoryVT() const { return MemoryVT; } | |||
1329 | ||||
1330 | /// Return a MachineMemOperand object describing the memory | |||
1331 | /// reference performed by operation. | |||
1332 | MachineMemOperand *getMemOperand() const { return MMO; } | |||
1333 | ||||
1334 | const MachinePointerInfo &getPointerInfo() const { | |||
1335 | return MMO->getPointerInfo(); | |||
1336 | } | |||
1337 | ||||
1338 | /// Return the address space for the associated pointer | |||
1339 | unsigned getAddressSpace() const { | |||
1340 | return getPointerInfo().getAddrSpace(); | |||
1341 | } | |||
1342 | ||||
1343 | /// Update this MemSDNode's MachineMemOperand information | |||
1344 | /// to reflect the alignment of NewMMO, if it has a greater alignment. | |||
1345 | /// This must only be used when the new alignment applies to all users of | |||
1346 | /// this MachineMemOperand. | |||
1347 | void refineAlignment(const MachineMemOperand *NewMMO) { | |||
1348 | MMO->refineAlignment(NewMMO); | |||
1349 | } | |||
1350 | ||||
1351 | const SDValue &getChain() const { return getOperand(0); } | |||
1352 | ||||
1353 | const SDValue &getBasePtr() const { | |||
1354 | switch (getOpcode()) { | |||
1355 | case ISD::STORE: | |||
1356 | case ISD::MSTORE: | |||
1357 | return getOperand(2); | |||
1358 | case ISD::MGATHER: | |||
1359 | case ISD::MSCATTER: | |||
1360 | return getOperand(3); | |||
1361 | default: | |||
1362 | return getOperand(1); | |||
1363 | } | |||
1364 | } | |||
1365 | ||||
1366 | // Methods to support isa and dyn_cast | |||
1367 | static bool classof(const SDNode *N) { | |||
1368 | // For some targets, we lower some target intrinsics to a MemIntrinsicNode | |||
1369 | // with either an intrinsic or a target opcode. | |||
1370 | switch (N->getOpcode()) { | |||
1371 | case ISD::LOAD: | |||
1372 | case ISD::STORE: | |||
1373 | case ISD::PREFETCH: | |||
1374 | case ISD::ATOMIC_CMP_SWAP: | |||
1375 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: | |||
1376 | case ISD::ATOMIC_SWAP: | |||
1377 | case ISD::ATOMIC_LOAD_ADD: | |||
1378 | case ISD::ATOMIC_LOAD_SUB: | |||
1379 | case ISD::ATOMIC_LOAD_AND: | |||
1380 | case ISD::ATOMIC_LOAD_CLR: | |||
1381 | case ISD::ATOMIC_LOAD_OR: | |||
1382 | case ISD::ATOMIC_LOAD_XOR: | |||
1383 | case ISD::ATOMIC_LOAD_NAND: | |||
1384 | case ISD::ATOMIC_LOAD_MIN: | |||
1385 | case ISD::ATOMIC_LOAD_MAX: | |||
1386 | case ISD::ATOMIC_LOAD_UMIN: | |||
1387 | case ISD::ATOMIC_LOAD_UMAX: | |||
1388 | case ISD::ATOMIC_LOAD_FADD: | |||
1389 | case ISD::ATOMIC_LOAD_FSUB: | |||
1390 | case ISD::ATOMIC_LOAD: | |||
1391 | case ISD::ATOMIC_STORE: | |||
1392 | case ISD::MLOAD: | |||
1393 | case ISD::MSTORE: | |||
1394 | case ISD::MGATHER: | |||
1395 | case ISD::MSCATTER: | |||
1396 | return true; | |||
1397 | default: | |||
1398 | return N->isMemIntrinsic() || N->isTargetMemoryOpcode(); | |||
1399 | } | |||
1400 | } | |||
1401 | }; | |||
1402 | ||||
1403 | /// This is an SDNode representing atomic operations. | |||
1404 | class AtomicSDNode : public MemSDNode { | |||
1405 | public: | |||
1406 | AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL, | |||
1407 | EVT MemVT, MachineMemOperand *MMO) | |||
1408 | : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) { | |||
1409 | assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?" ) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1410, __extension__ __PRETTY_FUNCTION__)) | |||
1410 | MMO->isAtomic()) && "then why are we using an AtomicSDNode?")(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?" ) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1410, __extension__ __PRETTY_FUNCTION__)); | |||
1411 | } | |||
1412 | ||||
1413 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
1414 | const SDValue &getVal() const { return getOperand(2); } | |||
1415 | ||||
1416 | /// Returns true if this SDNode represents cmpxchg atomic operation, false | |||
1417 | /// otherwise. | |||
1418 | bool isCompareAndSwap() const { | |||
1419 | unsigned Op = getOpcode(); | |||
1420 | return Op == ISD::ATOMIC_CMP_SWAP || | |||
1421 | Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS; | |||
1422 | } | |||
1423 | ||||
1424 | /// For cmpxchg atomic operations, return the atomic ordering requirements | |||
1425 | /// when store does not occur. | |||
1426 | AtomicOrdering getFailureOrdering() const { | |||
1427 | assert(isCompareAndSwap() && "Must be cmpxchg operation")(static_cast <bool> (isCompareAndSwap() && "Must be cmpxchg operation" ) ? void (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1427, __extension__ __PRETTY_FUNCTION__)); | |||
1428 | return MMO->getFailureOrdering(); | |||
1429 | } | |||
1430 | ||||
1431 | // Methods to support isa and dyn_cast | |||
1432 | static bool classof(const SDNode *N) { | |||
1433 | return N->getOpcode() == ISD::ATOMIC_CMP_SWAP || | |||
1434 | N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS || | |||
1435 | N->getOpcode() == ISD::ATOMIC_SWAP || | |||
1436 | N->getOpcode() == ISD::ATOMIC_LOAD_ADD || | |||
1437 | N->getOpcode() == ISD::ATOMIC_LOAD_SUB || | |||
1438 | N->getOpcode() == ISD::ATOMIC_LOAD_AND || | |||
1439 | N->getOpcode() == ISD::ATOMIC_LOAD_CLR || | |||
1440 | N->getOpcode() == ISD::ATOMIC_LOAD_OR || | |||
1441 | N->getOpcode() == ISD::ATOMIC_LOAD_XOR || | |||
1442 | N->getOpcode() == ISD::ATOMIC_LOAD_NAND || | |||
1443 | N->getOpcode() == ISD::ATOMIC_LOAD_MIN || | |||
1444 | N->getOpcode() == ISD::ATOMIC_LOAD_MAX || | |||
1445 | N->getOpcode() == ISD::ATOMIC_LOAD_UMIN || | |||
1446 | N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || | |||
1447 | N->getOpcode() == ISD::ATOMIC_LOAD_FADD || | |||
1448 | N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || | |||
1449 | N->getOpcode() == ISD::ATOMIC_LOAD || | |||
1450 | N->getOpcode() == ISD::ATOMIC_STORE; | |||
1451 | } | |||
1452 | }; | |||
1453 | ||||
1454 | /// This SDNode is used for target intrinsics that touch | |||
1455 | /// memory and need an associated MachineMemOperand. Its opcode may be | |||
1456 | /// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode | |||
1457 | /// with a value not less than FIRST_TARGET_MEMORY_OPCODE. | |||
1458 | class MemIntrinsicSDNode : public MemSDNode { | |||
1459 | public: | |||
1460 | MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, | |||
1461 | SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO) | |||
1462 | : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) { | |||
1463 | SDNodeBits.IsMemIntrinsic = true; | |||
1464 | } | |||
1465 | ||||
1466 | // Methods to support isa and dyn_cast | |||
1467 | static bool classof(const SDNode *N) { | |||
1468 | // We lower some target intrinsics to their target opcode | |||
1469 | // early a node with a target opcode can be of this class | |||
1470 | return N->isMemIntrinsic() || | |||
1471 | N->getOpcode() == ISD::PREFETCH || | |||
1472 | N->isTargetMemoryOpcode(); | |||
1473 | } | |||
1474 | }; | |||
1475 | ||||
1476 | /// This SDNode is used to implement the code generator | |||
1477 | /// support for the llvm IR shufflevector instruction. It combines elements | |||
1478 | /// from two input vectors into a new input vector, with the selection and | |||
1479 | /// ordering of elements determined by an array of integers, referred to as | |||
1480 | /// the shuffle mask. For input vectors of width N, mask indices of 0..N-1 | |||
1481 | /// refer to elements from the LHS input, and indices from N to 2N-1 the RHS. | |||
1482 | /// An index of -1 is treated as undef, such that the code generator may put | |||
1483 | /// any value in the corresponding element of the result. | |||
1484 | class ShuffleVectorSDNode : public SDNode { | |||
1485 | // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and | |||
1486 | // is freed when the SelectionDAG object is destroyed. | |||
1487 | const int *Mask; | |||
1488 | ||||
1489 | protected: | |||
1490 | friend class SelectionDAG; | |||
1491 | ||||
1492 | ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M) | |||
1493 | : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {} | |||
1494 | ||||
1495 | public: | |||
1496 | ArrayRef<int> getMask() const { | |||
1497 | EVT VT = getValueType(0); | |||
1498 | return makeArrayRef(Mask, VT.getVectorNumElements()); | |||
1499 | } | |||
1500 | ||||
1501 | int getMaskElt(unsigned Idx) const { | |||
1502 | assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")(static_cast <bool> (Idx < getValueType(0).getVectorNumElements () && "Idx out of range!") ? void (0) : __assert_fail ("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1502, __extension__ __PRETTY_FUNCTION__)); | |||
1503 | return Mask[Idx]; | |||
1504 | } | |||
1505 | ||||
1506 | bool isSplat() const { return isSplatMask(Mask, getValueType(0)); } | |||
1507 | ||||
1508 | int getSplatIndex() const { | |||
1509 | assert(isSplat() && "Cannot get splat index for non-splat!")(static_cast <bool> (isSplat() && "Cannot get splat index for non-splat!" ) ? void (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1509, __extension__ __PRETTY_FUNCTION__)); | |||
1510 | EVT VT = getValueType(0); | |||
1511 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) | |||
1512 | if (Mask[i] >= 0) | |||
1513 | return Mask[i]; | |||
1514 | ||||
1515 | // We can choose any index value here and be correct because all elements | |||
1516 | // are undefined. Return 0 for better potential for callers to simplify. | |||
1517 | return 0; | |||
1518 | } | |||
1519 | ||||
1520 | static bool isSplatMask(const int *Mask, EVT VT); | |||
1521 | ||||
1522 | /// Change values in a shuffle permute mask assuming | |||
1523 | /// the two vector operands have swapped position. | |||
1524 | static void commuteMask(MutableArrayRef<int> Mask) { | |||
1525 | unsigned NumElems = Mask.size(); | |||
1526 | for (unsigned i = 0; i != NumElems; ++i) { | |||
1527 | int idx = Mask[i]; | |||
1528 | if (idx < 0) | |||
1529 | continue; | |||
1530 | else if (idx < (int)NumElems) | |||
1531 | Mask[i] = idx + NumElems; | |||
1532 | else | |||
1533 | Mask[i] = idx - NumElems; | |||
1534 | } | |||
1535 | } | |||
1536 | ||||
1537 | static bool classof(const SDNode *N) { | |||
1538 | return N->getOpcode() == ISD::VECTOR_SHUFFLE; | |||
1539 | } | |||
1540 | }; | |||
1541 | ||||
1542 | class ConstantSDNode : public SDNode { | |||
1543 | friend class SelectionDAG; | |||
1544 | ||||
1545 | const ConstantInt *Value; | |||
1546 | ||||
1547 | ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT) | |||
1548 | : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(), | |||
1549 | getSDVTList(VT)), | |||
1550 | Value(val) { | |||
1551 | ConstantSDNodeBits.IsOpaque = isOpaque; | |||
1552 | } | |||
1553 | ||||
1554 | public: | |||
1555 | const ConstantInt *getConstantIntValue() const { return Value; } | |||
1556 | const APInt &getAPIntValue() const { return Value->getValue(); } | |||
1557 | uint64_t getZExtValue() const { return Value->getZExtValue(); } | |||
1558 | int64_t getSExtValue() const { return Value->getSExtValue(); } | |||
1559 | uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) { | |||
1560 | return Value->getLimitedValue(Limit); | |||
1561 | } | |||
1562 | MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); } | |||
1563 | Align getAlignValue() const { return Value->getAlignValue(); } | |||
1564 | ||||
1565 | bool isOne() const { return Value->isOne(); } | |||
1566 | bool isNullValue() const { return Value->isZero(); } | |||
1567 | bool isAllOnesValue() const { return Value->isMinusOne(); } | |||
1568 | bool isMaxSignedValue() const { return Value->isMaxValue(true); } | |||
1569 | bool isMinSignedValue() const { return Value->isMinValue(true); } | |||
1570 | ||||
1571 | bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; } | |||
1572 | ||||
1573 | static bool classof(const SDNode *N) { | |||
1574 | return N->getOpcode() == ISD::Constant || | |||
1575 | N->getOpcode() == ISD::TargetConstant; | |||
1576 | } | |||
1577 | }; | |||
1578 | ||||
1579 | uint64_t SDNode::getConstantOperandVal(unsigned Num) const { | |||
1580 | return cast<ConstantSDNode>(getOperand(Num))->getZExtValue(); | |||
1581 | } | |||
1582 | ||||
1583 | const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const { | |||
1584 | return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue(); | |||
1585 | } | |||
1586 | ||||
1587 | class ConstantFPSDNode : public SDNode { | |||
1588 | friend class SelectionDAG; | |||
1589 | ||||
1590 | const ConstantFP *Value; | |||
1591 | ||||
1592 | ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT) | |||
1593 | : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, | |||
1594 | DebugLoc(), getSDVTList(VT)), | |||
1595 | Value(val) {} | |||
1596 | ||||
1597 | public: | |||
1598 | const APFloat& getValueAPF() const { return Value->getValueAPF(); } | |||
1599 | const ConstantFP *getConstantFPValue() const { return Value; } | |||
1600 | ||||
1601 | /// Return true if the value is positive or negative zero. | |||
1602 | bool isZero() const { return Value->isZero(); } | |||
1603 | ||||
1604 | /// Return true if the value is a NaN. | |||
1605 | bool isNaN() const { return Value->isNaN(); } | |||
1606 | ||||
1607 | /// Return true if the value is an infinity | |||
1608 | bool isInfinity() const { return Value->isInfinity(); } | |||
1609 | ||||
1610 | /// Return true if the value is negative. | |||
1611 | bool isNegative() const { return Value->isNegative(); } | |||
1612 | ||||
1613 | /// We don't rely on operator== working on double values, as | |||
1614 | /// it returns true for things that are clearly not equal, like -0.0 and 0.0. | |||
1615 | /// As such, this method can be used to do an exact bit-for-bit comparison of | |||
1616 | /// two floating point values. | |||
1617 | ||||
1618 | /// We leave the version with the double argument here because it's just so | |||
1619 | /// convenient to write "2.0" and the like. Without this function we'd | |||
1620 | /// have to duplicate its logic everywhere it's called. | |||
1621 | bool isExactlyValue(double V) const { | |||
1622 | return Value->getValueAPF().isExactlyValue(V); | |||
1623 | } | |||
1624 | bool isExactlyValue(const APFloat& V) const; | |||
1625 | ||||
1626 | static bool isValueValidForType(EVT VT, const APFloat& Val); | |||
1627 | ||||
1628 | static bool classof(const SDNode *N) { | |||
1629 | return N->getOpcode() == ISD::ConstantFP || | |||
1630 | N->getOpcode() == ISD::TargetConstantFP; | |||
1631 | } | |||
1632 | }; | |||
1633 | ||||
1634 | /// Returns true if \p V is a constant integer zero. | |||
1635 | bool isNullConstant(SDValue V); | |||
1636 | ||||
1637 | /// Returns true if \p V is an FP constant with a value of positive zero. | |||
1638 | bool isNullFPConstant(SDValue V); | |||
1639 | ||||
1640 | /// Returns true if \p V is an integer constant with all bits set. | |||
1641 | bool isAllOnesConstant(SDValue V); | |||
1642 | ||||
1643 | /// Returns true if \p V is a constant integer one. | |||
1644 | bool isOneConstant(SDValue V); | |||
1645 | ||||
1646 | /// Return the non-bitcasted source operand of \p V if it exists. | |||
1647 | /// If \p V is not a bitcasted value, it is returned as-is. | |||
1648 | SDValue peekThroughBitcasts(SDValue V); | |||
1649 | ||||
1650 | /// Return the non-bitcasted and one-use source operand of \p V if it exists. | |||
1651 | /// If \p V is not a bitcasted one-use value, it is returned as-is. | |||
1652 | SDValue peekThroughOneUseBitcasts(SDValue V); | |||
1653 | ||||
1654 | /// Return the non-extracted vector source operand of \p V if it exists. | |||
1655 | /// If \p V is not an extracted subvector, it is returned as-is. | |||
1656 | SDValue peekThroughExtractSubvectors(SDValue V); | |||
1657 | ||||
1658 | /// Returns true if \p V is a bitwise not operation. Assumes that an all ones | |||
1659 | /// constant is canonicalized to be operand 1. | |||
1660 | bool isBitwiseNot(SDValue V, bool AllowUndefs = false); | |||
1661 | ||||
1662 | /// Returns the SDNode if it is a constant splat BuildVector or constant int. | |||
1663 | ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false, | |||
1664 | bool AllowTruncation = false); | |||
1665 | ||||
1666 | /// Returns the SDNode if it is a demanded constant splat BuildVector or | |||
1667 | /// constant int. | |||
1668 | ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts, | |||
1669 | bool AllowUndefs = false, | |||
1670 | bool AllowTruncation = false); | |||
1671 | ||||
1672 | /// Returns the SDNode if it is a constant splat BuildVector or constant float. | |||
1673 | ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false); | |||
1674 | ||||
1675 | /// Returns the SDNode if it is a demanded constant splat BuildVector or | |||
1676 | /// constant float. | |||
1677 | ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts, | |||
1678 | bool AllowUndefs = false); | |||
1679 | ||||
1680 | /// Return true if the value is a constant 0 integer or a splatted vector of | |||
1681 | /// a constant 0 integer (with no undefs by default). | |||
1682 | /// Build vector implicit truncation is not an issue for null values. | |||
1683 | bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false); | |||
1684 | ||||
1685 | /// Return true if the value is a constant 1 integer or a splatted vector of a | |||
1686 | /// constant 1 integer (with no undefs). | |||
1687 | /// Does not permit build vector implicit truncation. | |||
1688 | bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false); | |||
1689 | ||||
1690 | /// Return true if the value is a constant -1 integer or a splatted vector of a | |||
1691 | /// constant -1 integer (with no undefs). | |||
1692 | /// Does not permit build vector implicit truncation. | |||
1693 | bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false); | |||
1694 | ||||
1695 | /// Return true if \p V is either a integer or FP constant. | |||
1696 | inline bool isIntOrFPConstant(SDValue V) { | |||
1697 | return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V); | |||
1698 | } | |||
1699 | ||||
1700 | class GlobalAddressSDNode : public SDNode { | |||
1701 | friend class SelectionDAG; | |||
1702 | ||||
1703 | const GlobalValue *TheGlobal; | |||
1704 | int64_t Offset; | |||
1705 | unsigned TargetFlags; | |||
1706 | ||||
1707 | GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, | |||
1708 | const GlobalValue *GA, EVT VT, int64_t o, | |||
1709 | unsigned TF); | |||
1710 | ||||
1711 | public: | |||
1712 | const GlobalValue *getGlobal() const { return TheGlobal; } | |||
1713 | int64_t getOffset() const { return Offset; } | |||
1714 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1715 | // Return the address space this GlobalAddress belongs to. | |||
1716 | unsigned getAddressSpace() const; | |||
1717 | ||||
1718 | static bool classof(const SDNode *N) { | |||
1719 | return N->getOpcode() == ISD::GlobalAddress || | |||
1720 | N->getOpcode() == ISD::TargetGlobalAddress || | |||
1721 | N->getOpcode() == ISD::GlobalTLSAddress || | |||
1722 | N->getOpcode() == ISD::TargetGlobalTLSAddress; | |||
1723 | } | |||
1724 | }; | |||
1725 | ||||
1726 | class FrameIndexSDNode : public SDNode { | |||
1727 | friend class SelectionDAG; | |||
1728 | ||||
1729 | int FI; | |||
1730 | ||||
1731 | FrameIndexSDNode(int fi, EVT VT, bool isTarg) | |||
1732 | : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex, | |||
1733 | 0, DebugLoc(), getSDVTList(VT)), FI(fi) { | |||
1734 | } | |||
1735 | ||||
1736 | public: | |||
1737 | int getIndex() const { return FI; } | |||
1738 | ||||
1739 | static bool classof(const SDNode *N) { | |||
1740 | return N->getOpcode() == ISD::FrameIndex || | |||
1741 | N->getOpcode() == ISD::TargetFrameIndex; | |||
1742 | } | |||
1743 | }; | |||
1744 | ||||
1745 | /// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate | |||
1746 | /// the offet and size that are started/ended in the underlying FrameIndex. | |||
1747 | class LifetimeSDNode : public SDNode { | |||
1748 | friend class SelectionDAG; | |||
1749 | int64_t Size; | |||
1750 | int64_t Offset; // -1 if offset is unknown. | |||
1751 | ||||
1752 | LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, | |||
1753 | SDVTList VTs, int64_t Size, int64_t Offset) | |||
1754 | : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {} | |||
1755 | public: | |||
1756 | int64_t getFrameIndex() const { | |||
1757 | return cast<FrameIndexSDNode>(getOperand(1))->getIndex(); | |||
1758 | } | |||
1759 | ||||
1760 | bool hasOffset() const { return Offset >= 0; } | |||
1761 | int64_t getOffset() const { | |||
1762 | assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown" ) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1762, __extension__ __PRETTY_FUNCTION__)); | |||
1763 | return Offset; | |||
1764 | } | |||
1765 | int64_t getSize() const { | |||
1766 | assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown" ) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1766, __extension__ __PRETTY_FUNCTION__)); | |||
1767 | return Size; | |||
1768 | } | |||
1769 | ||||
1770 | // Methods to support isa and dyn_cast | |||
1771 | static bool classof(const SDNode *N) { | |||
1772 | return N->getOpcode() == ISD::LIFETIME_START || | |||
1773 | N->getOpcode() == ISD::LIFETIME_END; | |||
1774 | } | |||
1775 | }; | |||
1776 | ||||
1777 | /// This SDNode is used for PSEUDO_PROBE values, which are the function guid and | |||
1778 | /// the index of the basic block being probed. A pseudo probe serves as a place | |||
1779 | /// holder and will be removed at the end of compilation. It does not have any | |||
1780 | /// operand because we do not want the instruction selection to deal with any. | |||
1781 | class PseudoProbeSDNode : public SDNode { | |||
1782 | friend class SelectionDAG; | |||
1783 | uint64_t Guid; | |||
1784 | uint64_t Index; | |||
1785 | uint32_t Attributes; | |||
1786 | ||||
1787 | PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl, | |||
1788 | SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr) | |||
1789 | : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index), | |||
1790 | Attributes(Attr) {} | |||
1791 | ||||
1792 | public: | |||
1793 | uint64_t getGuid() const { return Guid; } | |||
1794 | uint64_t getIndex() const { return Index; } | |||
1795 | uint32_t getAttributes() const { return Attributes; } | |||
1796 | ||||
1797 | // Methods to support isa and dyn_cast | |||
1798 | static bool classof(const SDNode *N) { | |||
1799 | return N->getOpcode() == ISD::PSEUDO_PROBE; | |||
1800 | } | |||
1801 | }; | |||
1802 | ||||
1803 | class JumpTableSDNode : public SDNode { | |||
1804 | friend class SelectionDAG; | |||
1805 | ||||
1806 | int JTI; | |||
1807 | unsigned TargetFlags; | |||
1808 | ||||
1809 | JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF) | |||
1810 | : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable, | |||
1811 | 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) { | |||
1812 | } | |||
1813 | ||||
1814 | public: | |||
1815 | int getIndex() const { return JTI; } | |||
1816 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1817 | ||||
1818 | static bool classof(const SDNode *N) { | |||
1819 | return N->getOpcode() == ISD::JumpTable || | |||
1820 | N->getOpcode() == ISD::TargetJumpTable; | |||
1821 | } | |||
1822 | }; | |||
1823 | ||||
1824 | class ConstantPoolSDNode : public SDNode { | |||
1825 | friend class SelectionDAG; | |||
1826 | ||||
1827 | union { | |||
1828 | const Constant *ConstVal; | |||
1829 | MachineConstantPoolValue *MachineCPVal; | |||
1830 | } Val; | |||
1831 | int Offset; // It's a MachineConstantPoolValue if top bit is set. | |||
1832 | Align Alignment; // Minimum alignment requirement of CP. | |||
1833 | unsigned TargetFlags; | |||
1834 | ||||
1835 | ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o, | |||
1836 | Align Alignment, unsigned TF) | |||
1837 | : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, | |||
1838 | DebugLoc(), getSDVTList(VT)), | |||
1839 | Offset(o), Alignment(Alignment), TargetFlags(TF) { | |||
1840 | assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large" ) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1840, __extension__ __PRETTY_FUNCTION__)); | |||
1841 | Val.ConstVal = c; | |||
1842 | } | |||
1843 | ||||
1844 | ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o, | |||
1845 | Align Alignment, unsigned TF) | |||
1846 | : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, | |||
1847 | DebugLoc(), getSDVTList(VT)), | |||
1848 | Offset(o), Alignment(Alignment), TargetFlags(TF) { | |||
1849 | assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large" ) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1849, __extension__ __PRETTY_FUNCTION__)); | |||
1850 | Val.MachineCPVal = v; | |||
1851 | Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1); | |||
1852 | } | |||
1853 | ||||
1854 | public: | |||
1855 | bool isMachineConstantPoolEntry() const { | |||
1856 | return Offset < 0; | |||
1857 | } | |||
1858 | ||||
1859 | const Constant *getConstVal() const { | |||
1860 | assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (!isMachineConstantPoolEntry() && "Wrong constantpool type") ? void (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1860, __extension__ __PRETTY_FUNCTION__)); | |||
1861 | return Val.ConstVal; | |||
1862 | } | |||
1863 | ||||
1864 | MachineConstantPoolValue *getMachineCPVal() const { | |||
1865 | assert(isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (isMachineConstantPoolEntry() && "Wrong constantpool type") ? void (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1865, __extension__ __PRETTY_FUNCTION__)); | |||
1866 | return Val.MachineCPVal; | |||
1867 | } | |||
1868 | ||||
1869 | int getOffset() const { | |||
1870 | return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1)); | |||
1871 | } | |||
1872 | ||||
1873 | // Return the alignment of this constant pool object, which is either 0 (for | |||
1874 | // default alignment) or the desired value. | |||
1875 | Align getAlign() const { return Alignment; } | |||
1876 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1877 | ||||
1878 | Type *getType() const; | |||
1879 | ||||
1880 | static bool classof(const SDNode *N) { | |||
1881 | return N->getOpcode() == ISD::ConstantPool || | |||
1882 | N->getOpcode() == ISD::TargetConstantPool; | |||
1883 | } | |||
1884 | }; | |||
1885 | ||||
1886 | /// Completely target-dependent object reference. | |||
1887 | class TargetIndexSDNode : public SDNode { | |||
1888 | friend class SelectionDAG; | |||
1889 | ||||
1890 | unsigned TargetFlags; | |||
1891 | int Index; | |||
1892 | int64_t Offset; | |||
1893 | ||||
1894 | public: | |||
1895 | TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF) | |||
1896 | : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), | |||
1897 | TargetFlags(TF), Index(Idx), Offset(Ofs) {} | |||
1898 | ||||
1899 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1900 | int getIndex() const { return Index; } | |||
1901 | int64_t getOffset() const { return Offset; } | |||
1902 | ||||
1903 | static bool classof(const SDNode *N) { | |||
1904 | return N->getOpcode() == ISD::TargetIndex; | |||
1905 | } | |||
1906 | }; | |||
1907 | ||||
1908 | class BasicBlockSDNode : public SDNode { | |||
1909 | friend class SelectionDAG; | |||
1910 | ||||
1911 | MachineBasicBlock *MBB; | |||
1912 | ||||
1913 | /// Debug info is meaningful and potentially useful here, but we create | |||
1914 | /// blocks out of order when they're jumped to, which makes it a bit | |||
1915 | /// harder. Let's see if we need it first. | |||
1916 | explicit BasicBlockSDNode(MachineBasicBlock *mbb) | |||
1917 | : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb) | |||
1918 | {} | |||
1919 | ||||
1920 | public: | |||
1921 | MachineBasicBlock *getBasicBlock() const { return MBB; } | |||
1922 | ||||
1923 | static bool classof(const SDNode *N) { | |||
1924 | return N->getOpcode() == ISD::BasicBlock; | |||
1925 | } | |||
1926 | }; | |||
1927 | ||||
1928 | /// A "pseudo-class" with methods for operating on BUILD_VECTORs. | |||
1929 | class BuildVectorSDNode : public SDNode { | |||
1930 | public: | |||
1931 | // These are constructed as SDNodes and then cast to BuildVectorSDNodes. | |||
1932 | explicit BuildVectorSDNode() = delete; | |||
1933 | ||||
1934 | /// Check if this is a constant splat, and if so, find the | |||
1935 | /// smallest element size that splats the vector. If MinSplatBits is | |||
1936 | /// nonzero, the element size must be at least that large. Note that the | |||
1937 | /// splat element may be the entire vector (i.e., a one element vector). | |||
1938 | /// Returns the splat element value in SplatValue. Any undefined bits in | |||
1939 | /// that value are zero, and the corresponding bits in the SplatUndef mask | |||
1940 | /// are set. The SplatBitSize value is set to the splat element size in | |||
1941 | /// bits. HasAnyUndefs is set to true if any bits in the vector are | |||
1942 | /// undefined. isBigEndian describes the endianness of the target. | |||
1943 | bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, | |||
1944 | unsigned &SplatBitSize, bool &HasAnyUndefs, | |||
1945 | unsigned MinSplatBits = 0, | |||
1946 | bool isBigEndian = false) const; | |||
1947 | ||||
1948 | /// Returns the demanded splatted value or a null value if this is not a | |||
1949 | /// splat. | |||
1950 | /// | |||
1951 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
1952 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1953 | /// the vector width and set the bits where elements are undef. | |||
1954 | SDValue getSplatValue(const APInt &DemandedElts, | |||
1955 | BitVector *UndefElements = nullptr) const; | |||
1956 | ||||
1957 | /// Returns the splatted value or a null value if this is not a splat. | |||
1958 | /// | |||
1959 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1960 | /// the vector width and set the bits where elements are undef. | |||
1961 | SDValue getSplatValue(BitVector *UndefElements = nullptr) const; | |||
1962 | ||||
1963 | /// Find the shortest repeating sequence of values in the build vector. | |||
1964 | /// | |||
1965 | /// e.g. { u, X, u, X, u, u, X, u } -> { X } | |||
1966 | /// { X, Y, u, Y, u, u, X, u } -> { X, Y } | |||
1967 | /// | |||
1968 | /// Currently this must be a power-of-2 build vector. | |||
1969 | /// The DemandedElts mask indicates the elements that must be present, | |||
1970 | /// undemanded elements in Sequence may be null (SDValue()). If passed a | |||
1971 | /// non-null UndefElements bitvector, it will resize it to match the original | |||
1972 | /// vector width and set the bits where elements are undef. If result is | |||
1973 | /// false, Sequence will be empty. | |||
1974 | bool getRepeatedSequence(const APInt &DemandedElts, | |||
1975 | SmallVectorImpl<SDValue> &Sequence, | |||
1976 | BitVector *UndefElements = nullptr) const; | |||
1977 | ||||
1978 | /// Find the shortest repeating sequence of values in the build vector. | |||
1979 | /// | |||
1980 | /// e.g. { u, X, u, X, u, u, X, u } -> { X } | |||
1981 | /// { X, Y, u, Y, u, u, X, u } -> { X, Y } | |||
1982 | /// | |||
1983 | /// Currently this must be a power-of-2 build vector. | |||
1984 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1985 | /// the original vector width and set the bits where elements are undef. | |||
1986 | /// If result is false, Sequence will be empty. | |||
1987 | bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, | |||
1988 | BitVector *UndefElements = nullptr) const; | |||
1989 | ||||
1990 | /// Returns the demanded splatted constant or null if this is not a constant | |||
1991 | /// splat. | |||
1992 | /// | |||
1993 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
1994 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1995 | /// the vector width and set the bits where elements are undef. | |||
1996 | ConstantSDNode * | |||
1997 | getConstantSplatNode(const APInt &DemandedElts, | |||
1998 | BitVector *UndefElements = nullptr) const; | |||
1999 | ||||
2000 | /// Returns the splatted constant or null if this is not a constant | |||
2001 | /// splat. | |||
2002 | /// | |||
2003 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
2004 | /// the vector width and set the bits where elements are undef. | |||
2005 | ConstantSDNode * | |||
2006 | getConstantSplatNode(BitVector *UndefElements = nullptr) const; | |||
2007 | ||||
2008 | /// Returns the demanded splatted constant FP or null if this is not a | |||
2009 | /// constant FP splat. | |||
2010 | /// | |||
2011 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
2012 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
2013 | /// the vector width and set the bits where elements are undef. | |||
2014 | ConstantFPSDNode * | |||
2015 | getConstantFPSplatNode(const APInt &DemandedElts, | |||
2016 | BitVector *UndefElements = nullptr) const; | |||
2017 | ||||
2018 | /// Returns the splatted constant FP or null if this is not a constant | |||
2019 | /// FP splat. | |||
2020 | /// | |||
2021 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
2022 | /// the vector width and set the bits where elements are undef. | |||
2023 | ConstantFPSDNode * | |||
2024 | getConstantFPSplatNode(BitVector *UndefElements = nullptr) const; | |||
2025 | ||||
2026 | /// If this is a constant FP splat and the splatted constant FP is an | |||
2027 | /// exact power or 2, return the log base 2 integer value. Otherwise, | |||
2028 | /// return -1. | |||
2029 | /// | |||
2030 | /// The BitWidth specifies the necessary bit precision. | |||
2031 | int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, | |||
2032 | uint32_t BitWidth) const; | |||
2033 | ||||
2034 | bool isConstant() const; | |||
2035 | ||||
2036 | static bool classof(const SDNode *N) { | |||
2037 | return N->getOpcode() == ISD::BUILD_VECTOR; | |||
2038 | } | |||
2039 | }; | |||
2040 | ||||
2041 | /// An SDNode that holds an arbitrary LLVM IR Value. This is | |||
2042 | /// used when the SelectionDAG needs to make a simple reference to something | |||
2043 | /// in the LLVM IR representation. | |||
2044 | /// | |||
2045 | class SrcValueSDNode : public SDNode { | |||
2046 | friend class SelectionDAG; | |||
2047 | ||||
2048 | const Value *V; | |||
2049 | ||||
2050 | /// Create a SrcValue for a general value. | |||
2051 | explicit SrcValueSDNode(const Value *v) | |||
2052 | : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {} | |||
2053 | ||||
2054 | public: | |||
2055 | /// Return the contained Value. | |||
2056 | const Value *getValue() const { return V; } | |||
2057 | ||||
2058 | static bool classof(const SDNode *N) { | |||
2059 | return N->getOpcode() == ISD::SRCVALUE; | |||
2060 | } | |||
2061 | }; | |||
2062 | ||||
2063 | class MDNodeSDNode : public SDNode { | |||
2064 | friend class SelectionDAG; | |||
2065 | ||||
2066 | const MDNode *MD; | |||
2067 | ||||
2068 | explicit MDNodeSDNode(const MDNode *md) | |||
2069 | : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md) | |||
2070 | {} | |||
2071 | ||||
2072 | public: | |||
2073 | const MDNode *getMD() const { return MD; } | |||
2074 | ||||
2075 | static bool classof(const SDNode *N) { | |||
2076 | return N->getOpcode() == ISD::MDNODE_SDNODE; | |||
2077 | } | |||
2078 | }; | |||
2079 | ||||
2080 | class RegisterSDNode : public SDNode { | |||
2081 | friend class SelectionDAG; | |||
2082 | ||||
2083 | Register Reg; | |||
2084 | ||||
2085 | RegisterSDNode(Register reg, EVT VT) | |||
2086 | : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {} | |||
2087 | ||||
2088 | public: | |||
2089 | Register getReg() const { return Reg; } | |||
2090 | ||||
2091 | static bool classof(const SDNode *N) { | |||
2092 | return N->getOpcode() == ISD::Register; | |||
2093 | } | |||
2094 | }; | |||
2095 | ||||
2096 | class RegisterMaskSDNode : public SDNode { | |||
2097 | friend class SelectionDAG; | |||
2098 | ||||
2099 | // The memory for RegMask is not owned by the node. | |||
2100 | const uint32_t *RegMask; | |||
2101 | ||||
2102 | RegisterMaskSDNode(const uint32_t *mask) | |||
2103 | : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)), | |||
2104 | RegMask(mask) {} | |||
2105 | ||||
2106 | public: | |||
2107 | const uint32_t *getRegMask() const { return RegMask; } | |||
2108 | ||||
2109 | static bool classof(const SDNode *N) { | |||
2110 | return N->getOpcode() == ISD::RegisterMask; | |||
2111 | } | |||
2112 | }; | |||
2113 | ||||
2114 | class BlockAddressSDNode : public SDNode { | |||
2115 | friend class SelectionDAG; | |||
2116 | ||||
2117 | const BlockAddress *BA; | |||
2118 | int64_t Offset; | |||
2119 | unsigned TargetFlags; | |||
2120 | ||||
2121 | BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, | |||
2122 | int64_t o, unsigned Flags) | |||
2123 | : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), | |||
2124 | BA(ba), Offset(o), TargetFlags(Flags) {} | |||
2125 | ||||
2126 | public: | |||
2127 | const BlockAddress *getBlockAddress() const { return BA; } | |||
2128 | int64_t getOffset() const { return Offset; } | |||
2129 | unsigned getTargetFlags() const { return TargetFlags; } | |||
2130 | ||||
2131 | static bool classof(const SDNode *N) { | |||
2132 | return N->getOpcode() == ISD::BlockAddress || | |||
2133 | N->getOpcode() == ISD::TargetBlockAddress; | |||
2134 | } | |||
2135 | }; | |||
2136 | ||||
2137 | class LabelSDNode : public SDNode { | |||
2138 | friend class SelectionDAG; | |||
2139 | ||||
2140 | MCSymbol *Label; | |||
2141 | ||||
2142 | LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L) | |||
2143 | : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) { | |||
2144 | assert(LabelSDNode::classof(this) && "not a label opcode")(static_cast <bool> (LabelSDNode::classof(this) && "not a label opcode") ? void (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2144, __extension__ __PRETTY_FUNCTION__)); | |||
2145 | } | |||
2146 | ||||
2147 | public: | |||
2148 | MCSymbol *getLabel() const { return Label; } | |||
2149 | ||||
2150 | static bool classof(const SDNode *N) { | |||
2151 | return N->getOpcode() == ISD::EH_LABEL || | |||
2152 | N->getOpcode() == ISD::ANNOTATION_LABEL; | |||
2153 | } | |||
2154 | }; | |||
2155 | ||||
2156 | class ExternalSymbolSDNode : public SDNode { | |||
2157 | friend class SelectionDAG; | |||
2158 | ||||
2159 | const char *Symbol; | |||
2160 | unsigned TargetFlags; | |||
2161 | ||||
2162 | ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT) | |||
2163 | : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0, | |||
2164 | DebugLoc(), getSDVTList(VT)), | |||
2165 | Symbol(Sym), TargetFlags(TF) {} | |||
2166 | ||||
2167 | public: | |||
2168 | const char *getSymbol() const { return Symbol; } | |||
2169 | unsigned getTargetFlags() const { return TargetFlags; } | |||
2170 | ||||
2171 | static bool classof(const SDNode *N) { | |||
2172 | return N->getOpcode() == ISD::ExternalSymbol || | |||
2173 | N->getOpcode() == ISD::TargetExternalSymbol; | |||
2174 | } | |||
2175 | }; | |||
2176 | ||||
2177 | class MCSymbolSDNode : public SDNode { | |||
2178 | friend class SelectionDAG; | |||
2179 | ||||
2180 | MCSymbol *Symbol; | |||
2181 | ||||
2182 | MCSymbolSDNode(MCSymbol *Symbol, EVT VT) | |||
2183 | : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {} | |||
2184 | ||||
2185 | public: | |||
2186 | MCSymbol *getMCSymbol() const { return Symbol; } | |||
2187 | ||||
2188 | static bool classof(const SDNode *N) { | |||
2189 | return N->getOpcode() == ISD::MCSymbol; | |||
2190 | } | |||
2191 | }; | |||
2192 | ||||
2193 | class CondCodeSDNode : public SDNode { | |||
2194 | friend class SelectionDAG; | |||
2195 | ||||
2196 | ISD::CondCode Condition; | |||
2197 | ||||
2198 | explicit CondCodeSDNode(ISD::CondCode Cond) | |||
2199 | : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)), | |||
2200 | Condition(Cond) {} | |||
2201 | ||||
2202 | public: | |||
2203 | ISD::CondCode get() const { return Condition; } | |||
2204 | ||||
2205 | static bool classof(const SDNode *N) { | |||
2206 | return N->getOpcode() == ISD::CONDCODE; | |||
2207 | } | |||
2208 | }; | |||
2209 | ||||
2210 | /// This class is used to represent EVT's, which are used | |||
2211 | /// to parameterize some operations. | |||
2212 | class VTSDNode : public SDNode { | |||
2213 | friend class SelectionDAG; | |||
2214 | ||||
2215 | EVT ValueType; | |||
2216 | ||||
2217 | explicit VTSDNode(EVT VT) | |||
2218 | : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)), | |||
2219 | ValueType(VT) {} | |||
2220 | ||||
2221 | public: | |||
2222 | EVT getVT() const { return ValueType; } | |||
2223 | ||||
2224 | static bool classof(const SDNode *N) { | |||
2225 | return N->getOpcode() == ISD::VALUETYPE; | |||
2226 | } | |||
2227 | }; | |||
2228 | ||||
2229 | /// Base class for LoadSDNode and StoreSDNode | |||
2230 | class LSBaseSDNode : public MemSDNode { | |||
2231 | public: | |||
2232 | LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, | |||
2233 | SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT, | |||
2234 | MachineMemOperand *MMO) | |||
2235 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2236 | LSBaseSDNodeBits.AddressingMode = AM; | |||
2237 | assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM && "Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2237, __extension__ __PRETTY_FUNCTION__)); | |||
2238 | } | |||
2239 | ||||
2240 | const SDValue &getOffset() const { | |||
2241 | return getOperand(getOpcode() == ISD::LOAD ? 2 : 3); | |||
2242 | } | |||
2243 | ||||
2244 | /// Return the addressing mode for this load or store: | |||
2245 | /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. | |||
2246 | ISD::MemIndexedMode getAddressingMode() const { | |||
2247 | return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); | |||
2248 | } | |||
2249 | ||||
2250 | /// Return true if this is a pre/post inc/dec load/store. | |||
2251 | bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } | |||
2252 | ||||
2253 | /// Return true if this is NOT a pre/post inc/dec load/store. | |||
2254 | bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } | |||
2255 | ||||
2256 | static bool classof(const SDNode *N) { | |||
2257 | return N->getOpcode() == ISD::LOAD || | |||
2258 | N->getOpcode() == ISD::STORE; | |||
2259 | } | |||
2260 | }; | |||
2261 | ||||
2262 | /// This class is used to represent ISD::LOAD nodes. | |||
2263 | class LoadSDNode : public LSBaseSDNode { | |||
2264 | friend class SelectionDAG; | |||
2265 | ||||
2266 | LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2267 | ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT, | |||
2268 | MachineMemOperand *MMO) | |||
2269 | : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) { | |||
2270 | LoadSDNodeBits.ExtTy = ETy; | |||
2271 | assert(readMem() && "Load MachineMemOperand is not a load!")(static_cast <bool> (readMem() && "Load MachineMemOperand is not a load!" ) ? void (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2271, __extension__ __PRETTY_FUNCTION__)); | |||
2272 | assert(!writeMem() && "Load MachineMemOperand is a store!")(static_cast <bool> (!writeMem() && "Load MachineMemOperand is a store!" ) ? void (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2272, __extension__ __PRETTY_FUNCTION__)); | |||
2273 | } | |||
2274 | ||||
2275 | public: | |||
2276 | /// Return whether this is a plain node, | |||
2277 | /// or one of the varieties of value-extending loads. | |||
2278 | ISD::LoadExtType getExtensionType() const { | |||
2279 | return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); | |||
2280 | } | |||
2281 | ||||
2282 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
2283 | const SDValue &getOffset() const { return getOperand(2); } | |||
2284 | ||||
2285 | static bool classof(const SDNode *N) { | |||
2286 | return N->getOpcode() == ISD::LOAD; | |||
2287 | } | |||
2288 | }; | |||
2289 | ||||
2290 | /// This class is used to represent ISD::STORE nodes. | |||
2291 | class StoreSDNode : public LSBaseSDNode { | |||
2292 | friend class SelectionDAG; | |||
2293 | ||||
2294 | StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2295 | ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT, | |||
2296 | MachineMemOperand *MMO) | |||
2297 | : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) { | |||
2298 | StoreSDNodeBits.IsTruncating = isTrunc; | |||
2299 | assert(!readMem() && "Store MachineMemOperand is a load!")(static_cast <bool> (!readMem() && "Store MachineMemOperand is a load!" ) ? void (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2299, __extension__ __PRETTY_FUNCTION__)); | |||
2300 | assert(writeMem() && "Store MachineMemOperand is not a store!")(static_cast <bool> (writeMem() && "Store MachineMemOperand is not a store!" ) ? void (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2300, __extension__ __PRETTY_FUNCTION__)); | |||
2301 | } | |||
2302 | ||||
2303 | public: | |||
2304 | /// Return true if the op does a truncation before store. | |||
2305 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2306 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2307 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2308 | void setTruncatingStore(bool Truncating) { | |||
2309 | StoreSDNodeBits.IsTruncating = Truncating; | |||
2310 | } | |||
2311 | ||||
2312 | const SDValue &getValue() const { return getOperand(1); } | |||
2313 | const SDValue &getBasePtr() const { return getOperand(2); } | |||
2314 | const SDValue &getOffset() const { return getOperand(3); } | |||
2315 | ||||
2316 | static bool classof(const SDNode *N) { | |||
2317 | return N->getOpcode() == ISD::STORE; | |||
2318 | } | |||
2319 | }; | |||
2320 | ||||
2321 | /// This base class is used to represent MLOAD and MSTORE nodes | |||
2322 | class MaskedLoadStoreSDNode : public MemSDNode { | |||
2323 | public: | |||
2324 | friend class SelectionDAG; | |||
2325 | ||||
2326 | MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, | |||
2327 | const DebugLoc &dl, SDVTList VTs, | |||
2328 | ISD::MemIndexedMode AM, EVT MemVT, | |||
2329 | MachineMemOperand *MMO) | |||
2330 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2331 | LSBaseSDNodeBits.AddressingMode = AM; | |||
2332 | assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM && "Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2332, __extension__ __PRETTY_FUNCTION__)); | |||
2333 | } | |||
2334 | ||||
2335 | // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru) | |||
2336 | // MaskedStoreSDNode (Chain, data, ptr, offset, mask) | |||
2337 | // Mask is a vector of i1 elements | |||
2338 | const SDValue &getOffset() const { | |||
2339 | return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3); | |||
2340 | } | |||
2341 | const SDValue &getMask() const { | |||
2342 | return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4); | |||
2343 | } | |||
2344 | ||||
2345 | /// Return the addressing mode for this load or store: | |||
2346 | /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. | |||
2347 | ISD::MemIndexedMode getAddressingMode() const { | |||
2348 | return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); | |||
2349 | } | |||
2350 | ||||
2351 | /// Return true if this is a pre/post inc/dec load/store. | |||
2352 | bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } | |||
2353 | ||||
2354 | /// Return true if this is NOT a pre/post inc/dec load/store. | |||
2355 | bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } | |||
2356 | ||||
2357 | static bool classof(const SDNode *N) { | |||
2358 | return N->getOpcode() == ISD::MLOAD || | |||
2359 | N->getOpcode() == ISD::MSTORE; | |||
2360 | } | |||
2361 | }; | |||
2362 | ||||
2363 | /// This class is used to represent an MLOAD node | |||
2364 | class MaskedLoadSDNode : public MaskedLoadStoreSDNode { | |||
2365 | public: | |||
2366 | friend class SelectionDAG; | |||
2367 | ||||
2368 | MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2369 | ISD::MemIndexedMode AM, ISD::LoadExtType ETy, | |||
2370 | bool IsExpanding, EVT MemVT, MachineMemOperand *MMO) | |||
2371 | : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) { | |||
2372 | LoadSDNodeBits.ExtTy = ETy; | |||
2373 | LoadSDNodeBits.IsExpanding = IsExpanding; | |||
2374 | } | |||
2375 | ||||
2376 | ISD::LoadExtType getExtensionType() const { | |||
2377 | return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); | |||
2378 | } | |||
2379 | ||||
2380 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
2381 | const SDValue &getOffset() const { return getOperand(2); } | |||
2382 | const SDValue &getMask() const { return getOperand(3); } | |||
2383 | const SDValue &getPassThru() const { return getOperand(4); } | |||
2384 | ||||
2385 | static bool classof(const SDNode *N) { | |||
2386 | return N->getOpcode() == ISD::MLOAD; | |||
2387 | } | |||
2388 | ||||
2389 | bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } | |||
2390 | }; | |||
2391 | ||||
2392 | /// This class is used to represent an MSTORE node | |||
2393 | class MaskedStoreSDNode : public MaskedLoadStoreSDNode { | |||
2394 | public: | |||
2395 | friend class SelectionDAG; | |||
2396 | ||||
2397 | MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2398 | ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing, | |||
2399 | EVT MemVT, MachineMemOperand *MMO) | |||
2400 | : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) { | |||
2401 | StoreSDNodeBits.IsTruncating = isTrunc; | |||
2402 | StoreSDNodeBits.IsCompressing = isCompressing; | |||
2403 | } | |||
2404 | ||||
2405 | /// Return true if the op does a truncation before store. | |||
2406 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2407 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2408 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2409 | ||||
2410 | /// Returns true if the op does a compression to the vector before storing. | |||
2411 | /// The node contiguously stores the active elements (integers or floats) | |||
2412 | /// in src (those with their respective bit set in writemask k) to unaligned | |||
2413 | /// memory at base_addr. | |||
2414 | bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } | |||
2415 | ||||
2416 | const SDValue &getValue() const { return getOperand(1); } | |||
2417 | const SDValue &getBasePtr() const { return getOperand(2); } | |||
2418 | const SDValue &getOffset() const { return getOperand(3); } | |||
2419 | const SDValue &getMask() const { return getOperand(4); } | |||
2420 | ||||
2421 | static bool classof(const SDNode *N) { | |||
2422 | return N->getOpcode() == ISD::MSTORE; | |||
2423 | } | |||
2424 | }; | |||
2425 | ||||
2426 | /// This is a base class used to represent | |||
2427 | /// MGATHER and MSCATTER nodes | |||
2428 | /// | |||
2429 | class MaskedGatherScatterSDNode : public MemSDNode { | |||
2430 | public: | |||
2431 | friend class SelectionDAG; | |||
2432 | ||||
2433 | MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, | |||
2434 | const DebugLoc &dl, SDVTList VTs, EVT MemVT, | |||
2435 | MachineMemOperand *MMO, ISD::MemIndexType IndexType) | |||
2436 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2437 | LSBaseSDNodeBits.AddressingMode = IndexType; | |||
2438 | assert(getIndexType() == IndexType && "Value truncated")(static_cast <bool> (getIndexType() == IndexType && "Value truncated") ? void (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2438, __extension__ __PRETTY_FUNCTION__)); | |||
2439 | } | |||
2440 | ||||
2441 | /// How is Index applied to BasePtr when computing addresses. | |||
2442 | ISD::MemIndexType getIndexType() const { | |||
2443 | return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode); | |||
2444 | } | |||
2445 | void setIndexType(ISD::MemIndexType IndexType) { | |||
2446 | LSBaseSDNodeBits.AddressingMode = IndexType; | |||
2447 | } | |||
2448 | bool isIndexScaled() const { | |||
2449 | return (getIndexType() == ISD::SIGNED_SCALED) || | |||
2450 | (getIndexType() == ISD::UNSIGNED_SCALED); | |||
2451 | } | |||
2452 | bool isIndexSigned() const { | |||
2453 | return (getIndexType() == ISD::SIGNED_SCALED) || | |||
2454 | (getIndexType() == ISD::SIGNED_UNSCALED); | |||
2455 | } | |||
2456 | ||||
2457 | // In the both nodes address is Op1, mask is Op2: | |||
2458 | // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) | |||
2459 | // MaskedScatterSDNode (Chain, value, mask, base, index, scale) | |||
2460 | // Mask is a vector of i1 elements | |||
2461 | const SDValue &getBasePtr() const { return getOperand(3); } | |||
2462 | const SDValue &getIndex() const { return getOperand(4); } | |||
2463 | const SDValue &getMask() const { return getOperand(2); } | |||
2464 | const SDValue &getScale() const { return getOperand(5); } | |||
2465 | ||||
2466 | static bool classof(const SDNode *N) { | |||
2467 | return N->getOpcode() == ISD::MGATHER || | |||
2468 | N->getOpcode() == ISD::MSCATTER; | |||
2469 | } | |||
2470 | }; | |||
2471 | ||||
2472 | /// This class is used to represent an MGATHER node | |||
2473 | /// | |||
2474 | class MaskedGatherSDNode : public MaskedGatherScatterSDNode { | |||
2475 | public: | |||
2476 | friend class SelectionDAG; | |||
2477 | ||||
2478 | MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2479 | EVT MemVT, MachineMemOperand *MMO, | |||
2480 | ISD::MemIndexType IndexType, ISD::LoadExtType ETy) | |||
2481 | : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO, | |||
2482 | IndexType) { | |||
2483 | LoadSDNodeBits.ExtTy = ETy; | |||
2484 | } | |||
2485 | ||||
2486 | const SDValue &getPassThru() const { return getOperand(1); } | |||
2487 | ||||
2488 | ISD::LoadExtType getExtensionType() const { | |||
2489 | return ISD::LoadExtType(LoadSDNodeBits.ExtTy); | |||
2490 | } | |||
2491 | ||||
2492 | static bool classof(const SDNode *N) { | |||
2493 | return N->getOpcode() == ISD::MGATHER; | |||
2494 | } | |||
2495 | }; | |||
2496 | ||||
2497 | /// This class is used to represent an MSCATTER node | |||
2498 | /// | |||
2499 | class MaskedScatterSDNode : public MaskedGatherScatterSDNode { | |||
2500 | public: | |||
2501 | friend class SelectionDAG; | |||
2502 | ||||
2503 | MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2504 | EVT MemVT, MachineMemOperand *MMO, | |||
2505 | ISD::MemIndexType IndexType, bool IsTrunc) | |||
2506 | : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO, | |||
2507 | IndexType) { | |||
2508 | StoreSDNodeBits.IsTruncating = IsTrunc; | |||
2509 | } | |||
2510 | ||||
2511 | /// Return true if the op does a truncation before store. | |||
2512 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2513 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2514 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2515 | ||||
2516 | const SDValue &getValue() const { return getOperand(1); } | |||
2517 | ||||
2518 | static bool classof(const SDNode *N) { | |||
2519 | return N->getOpcode() == ISD::MSCATTER; | |||
2520 | } | |||
2521 | }; | |||
2522 | ||||
2523 | /// An SDNode that represents everything that will be needed | |||
2524 | /// to construct a MachineInstr. These nodes are created during the | |||
2525 | /// instruction selection proper phase. | |||
2526 | /// | |||
2527 | /// Note that the only supported way to set the `memoperands` is by calling the | |||
2528 | /// `SelectionDAG::setNodeMemRefs` function as the memory management happens | |||
2529 | /// inside the DAG rather than in the node. | |||
2530 | class MachineSDNode : public SDNode { | |||
2531 | private: | |||
2532 | friend class SelectionDAG; | |||
2533 | ||||
2534 | MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs) | |||
2535 | : SDNode(Opc, Order, DL, VTs) {} | |||
2536 | ||||
2537 | // We use a pointer union between a single `MachineMemOperand` pointer and | |||
2538 | // a pointer to an array of `MachineMemOperand` pointers. This is null when | |||
2539 | // the number of these is zero, the single pointer variant used when the | |||
2540 | // number is one, and the array is used for larger numbers. | |||
2541 | // | |||
2542 | // The array is allocated via the `SelectionDAG`'s allocator and so will | |||
2543 | // always live until the DAG is cleaned up and doesn't require ownership here. | |||
2544 | // | |||
2545 | // We can't use something simpler like `TinyPtrVector` here because `SDNode` | |||
2546 | // subclasses aren't managed in a conforming C++ manner. See the comments on | |||
2547 | // `SelectionDAG::MorphNodeTo` which details what all goes on, but the | |||
2548 | // constraint here is that these don't manage memory with their constructor or | |||
2549 | // destructor and can be initialized to a good state even if they start off | |||
2550 | // uninitialized. | |||
2551 | PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {}; | |||
2552 | ||||
2553 | // Note that this could be folded into the above `MemRefs` member if doing so | |||
2554 | // is advantageous at some point. We don't need to store this in most cases. | |||
2555 | // However, at the moment this doesn't appear to make the allocation any | |||
2556 | // smaller and makes the code somewhat simpler to read. | |||
2557 | int NumMemRefs = 0; | |||
2558 | ||||
2559 | public: | |||
2560 | using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator; | |||
2561 | ||||
2562 | ArrayRef<MachineMemOperand *> memoperands() const { | |||
2563 | // Special case the common cases. | |||
2564 | if (NumMemRefs == 0) | |||
2565 | return {}; | |||
2566 | if (NumMemRefs == 1) | |||
2567 | return makeArrayRef(MemRefs.getAddrOfPtr1(), 1); | |||
2568 | ||||
2569 | // Otherwise we have an actual array. | |||
2570 | return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs); | |||
2571 | } | |||
2572 | mmo_iterator memoperands_begin() const { return memoperands().begin(); } | |||
2573 | mmo_iterator memoperands_end() const { return memoperands().end(); } | |||
2574 | bool memoperands_empty() const { return memoperands().empty(); } | |||
2575 | ||||
2576 | /// Clear out the memory reference descriptor list. | |||
2577 | void clearMemRefs() { | |||
2578 | MemRefs = nullptr; | |||
2579 | NumMemRefs = 0; | |||
2580 | } | |||
2581 | ||||
2582 | static bool classof(const SDNode *N) { | |||
2583 | return N->isMachineOpcode(); | |||
2584 | } | |||
2585 | }; | |||
2586 | ||||
2587 | /// An SDNode that records if a register contains a value that is guaranteed to | |||
2588 | /// be aligned accordingly. | |||
2589 | class AssertAlignSDNode : public SDNode { | |||
2590 | Align Alignment; | |||
2591 | ||||
2592 | public: | |||
2593 | AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A) | |||
2594 | : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {} | |||
2595 | ||||
2596 | Align getAlign() const { return Alignment; } | |||
2597 | ||||
2598 | static bool classof(const SDNode *N) { | |||
2599 | return N->getOpcode() == ISD::AssertAlign; | |||
2600 | } | |||
2601 | }; | |||
2602 | ||||
2603 | class SDNodeIterator { | |||
2604 | const SDNode *Node; | |||
2605 | unsigned Operand; | |||
2606 | ||||
2607 | SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {} | |||
2608 | ||||
2609 | public: | |||
2610 | using iterator_category = std::forward_iterator_tag; | |||
2611 | using value_type = SDNode; | |||
2612 | using difference_type = std::ptrdiff_t; | |||
2613 | using pointer = value_type *; | |||
2614 | using reference = value_type &; | |||
2615 | ||||
2616 | bool operator==(const SDNodeIterator& x) const { | |||
2617 | return Operand == x.Operand; | |||
2618 | } | |||
2619 | bool operator!=(const SDNodeIterator& x) const { return !operator==(x); } | |||
2620 | ||||
2621 | pointer operator*() const { | |||
2622 | return Node->getOperand(Operand).getNode(); | |||
2623 | } | |||
2624 | pointer operator->() const { return operator*(); } | |||
2625 | ||||
2626 | SDNodeIterator& operator++() { // Preincrement | |||
2627 | ++Operand; | |||
2628 | return *this; | |||
2629 | } | |||
2630 | SDNodeIterator operator++(int) { // Postincrement | |||
2631 | SDNodeIterator tmp = *this; ++*this; return tmp; | |||
2632 | } | |||
2633 | size_t operator-(SDNodeIterator Other) const { | |||
2634 | assert(Node == Other.Node &&(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!" ) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2635, __extension__ __PRETTY_FUNCTION__)) | |||
2635 | "Cannot compare iterators of two different nodes!")(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!" ) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2635, __extension__ __PRETTY_FUNCTION__)); | |||
2636 | return Operand - Other.Operand; | |||
2637 | } | |||
2638 | ||||
2639 | static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); } | |||
2640 | static SDNodeIterator end (const SDNode *N) { | |||
2641 | return SDNodeIterator(N, N->getNumOperands()); | |||
2642 | } | |||
2643 | ||||
2644 | unsigned getOperand() const { return Operand; } | |||
2645 | const SDNode *getNode() const { return Node; } | |||
2646 | }; | |||
2647 | ||||
2648 | template <> struct GraphTraits<SDNode*> { | |||
2649 | using NodeRef = SDNode *; | |||
2650 | using ChildIteratorType = SDNodeIterator; | |||
2651 | ||||
2652 | static NodeRef getEntryNode(SDNode *N) { return N; } | |||
2653 | ||||
2654 | static ChildIteratorType child_begin(NodeRef N) { | |||
2655 | return SDNodeIterator::begin(N); | |||
2656 | } | |||
2657 | ||||
2658 | static ChildIteratorType child_end(NodeRef N) { | |||
2659 | return SDNodeIterator::end(N); | |||
2660 | } | |||
2661 | }; | |||
2662 | ||||
2663 | /// A representation of the largest SDNode, for use in sizeof(). | |||
2664 | /// | |||
2665 | /// This needs to be a union because the largest node differs on 32 bit systems | |||
2666 | /// with 4 and 8 byte pointer alignment, respectively. | |||
2667 | using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode, | |||
2668 | BlockAddressSDNode, | |||
2669 | GlobalAddressSDNode, | |||
2670 | PseudoProbeSDNode>; | |||
2671 | ||||
2672 | /// The SDNode class with the greatest alignment requirement. | |||
2673 | using MostAlignedSDNode = GlobalAddressSDNode; | |||
2674 | ||||
2675 | namespace ISD { | |||
2676 | ||||
2677 | /// Returns true if the specified node is a non-extending and unindexed load. | |||
2678 | inline bool isNormalLoad(const SDNode *N) { | |||
2679 | const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N); | |||
2680 | return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD && | |||
2681 | Ld->getAddressingMode() == ISD::UNINDEXED; | |||
2682 | } | |||
2683 | ||||
2684 | /// Returns true if the specified node is a non-extending load. | |||
2685 | inline bool isNON_EXTLoad(const SDNode *N) { | |||
2686 | return isa<LoadSDNode>(N) && | |||
2687 | cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD; | |||
2688 | } | |||
2689 | ||||
2690 | /// Returns true if the specified node is a EXTLOAD. | |||
2691 | inline bool isEXTLoad(const SDNode *N) { | |||
2692 | return isa<LoadSDNode>(N) && | |||
2693 | cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD; | |||
2694 | } | |||
2695 | ||||
2696 | /// Returns true if the specified node is a SEXTLOAD. | |||
2697 | inline bool isSEXTLoad(const SDNode *N) { | |||
2698 | return isa<LoadSDNode>(N) && | |||
2699 | cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; | |||
2700 | } | |||
2701 | ||||
2702 | /// Returns true if the specified node is a ZEXTLOAD. | |||
2703 | inline bool isZEXTLoad(const SDNode *N) { | |||
2704 | return isa<LoadSDNode>(N) && | |||
2705 | cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; | |||
2706 | } | |||
2707 | ||||
2708 | /// Returns true if the specified node is an unindexed load. | |||
2709 | inline bool isUNINDEXEDLoad(const SDNode *N) { | |||
2710 | return isa<LoadSDNode>(N) && | |||
2711 | cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; | |||
2712 | } | |||
2713 | ||||
2714 | /// Returns true if the specified node is a non-truncating | |||
2715 | /// and unindexed store. | |||
2716 | inline bool isNormalStore(const SDNode *N) { | |||
2717 | const StoreSDNode *St = dyn_cast<StoreSDNode>(N); | |||
2718 | return St && !St->isTruncatingStore() && | |||
2719 | St->getAddressingMode() == ISD::UNINDEXED; | |||
2720 | } | |||
2721 | ||||
2722 | /// Returns true if the specified node is an unindexed store. | |||
2723 | inline bool isUNINDEXEDStore(const SDNode *N) { | |||
2724 | return isa<StoreSDNode>(N) && | |||
2725 | cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; | |||
2726 | } | |||
2727 | ||||
2728 | /// Attempt to match a unary predicate against a scalar/splat constant or | |||
2729 | /// every element of a constant BUILD_VECTOR. | |||
2730 | /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. | |||
2731 | bool matchUnaryPredicate(SDValue Op, | |||
2732 | std::function<bool(ConstantSDNode *)> Match, | |||
2733 | bool AllowUndefs = false); | |||
2734 | ||||
2735 | /// Attempt to match a binary predicate against a pair of scalar/splat | |||
2736 | /// constants or every element of a pair of constant BUILD_VECTORs. | |||
2737 | /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. | |||
2738 | /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match. | |||
2739 | bool matchBinaryPredicate( | |||
2740 | SDValue LHS, SDValue RHS, | |||
2741 | std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, | |||
2742 | bool AllowUndefs = false, bool AllowTypeMismatch = false); | |||
2743 | ||||
2744 | /// Returns true if the specified value is the overflow result from one | |||
2745 | /// of the overflow intrinsic nodes. | |||
2746 | inline bool isOverflowIntrOpRes(SDValue Op) { | |||
2747 | unsigned Opc = Op.getOpcode(); | |||
2748 | return (Op.getResNo() == 1 && | |||
2749 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | |||
2750 | Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)); | |||
2751 | } | |||
2752 | ||||
2753 | } // end namespace ISD | |||
2754 | ||||
2755 | } // end namespace llvm | |||
2756 | ||||
2757 | #endif // LLVM_CODEGEN_SELECTIONDAGNODES_H |