File: | llvm/include/llvm/CodeGen/SelectionDAGNodes.h |
Warning: | line 1114, column 10 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// | ||||||
2 | // | ||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||
6 | // | ||||||
7 | //===----------------------------------------------------------------------===// | ||||||
8 | // | ||||||
9 | // This file defines a DAG pattern matching instruction selector for X86, | ||||||
10 | // converting from a legalized dag to a X86 dag. | ||||||
11 | // | ||||||
12 | //===----------------------------------------------------------------------===// | ||||||
13 | |||||||
14 | #include "X86.h" | ||||||
15 | #include "X86MachineFunctionInfo.h" | ||||||
16 | #include "X86RegisterInfo.h" | ||||||
17 | #include "X86Subtarget.h" | ||||||
18 | #include "X86TargetMachine.h" | ||||||
19 | #include "llvm/ADT/Statistic.h" | ||||||
20 | #include "llvm/CodeGen/MachineModuleInfo.h" | ||||||
21 | #include "llvm/CodeGen/SelectionDAGISel.h" | ||||||
22 | #include "llvm/Config/llvm-config.h" | ||||||
23 | #include "llvm/IR/ConstantRange.h" | ||||||
24 | #include "llvm/IR/Function.h" | ||||||
25 | #include "llvm/IR/Instructions.h" | ||||||
26 | #include "llvm/IR/Intrinsics.h" | ||||||
27 | #include "llvm/IR/IntrinsicsX86.h" | ||||||
28 | #include "llvm/IR/Type.h" | ||||||
29 | #include "llvm/Support/Debug.h" | ||||||
30 | #include "llvm/Support/ErrorHandling.h" | ||||||
31 | #include "llvm/Support/KnownBits.h" | ||||||
32 | #include "llvm/Support/MathExtras.h" | ||||||
33 | #include <cstdint> | ||||||
34 | |||||||
35 | using namespace llvm; | ||||||
36 | |||||||
37 | #define DEBUG_TYPE"x86-isel" "x86-isel" | ||||||
38 | |||||||
39 | STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor")static llvm::Statistic NumLoadMoved = {"x86-isel", "NumLoadMoved" , "Number of loads moved below TokenFactor"}; | ||||||
40 | |||||||
41 | static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true), | ||||||
42 | cl::desc("Enable setting constant bits to reduce size of mask immediates"), | ||||||
43 | cl::Hidden); | ||||||
44 | |||||||
45 | static cl::opt<bool> EnablePromoteAnyextLoad( | ||||||
46 | "x86-promote-anyext-load", cl::init(true), | ||||||
47 | cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden); | ||||||
48 | |||||||
49 | extern cl::opt<bool> IndirectBranchTracking; | ||||||
50 | |||||||
51 | //===----------------------------------------------------------------------===// | ||||||
52 | // Pattern Matcher Implementation | ||||||
53 | //===----------------------------------------------------------------------===// | ||||||
54 | |||||||
55 | namespace { | ||||||
56 | /// This corresponds to X86AddressMode, but uses SDValue's instead of register | ||||||
57 | /// numbers for the leaves of the matched tree. | ||||||
58 | struct X86ISelAddressMode { | ||||||
59 | enum { | ||||||
60 | RegBase, | ||||||
61 | FrameIndexBase | ||||||
62 | } BaseType; | ||||||
63 | |||||||
64 | // This is really a union, discriminated by BaseType! | ||||||
65 | SDValue Base_Reg; | ||||||
66 | int Base_FrameIndex; | ||||||
67 | |||||||
68 | unsigned Scale; | ||||||
69 | SDValue IndexReg; | ||||||
70 | int32_t Disp; | ||||||
71 | SDValue Segment; | ||||||
72 | const GlobalValue *GV; | ||||||
73 | const Constant *CP; | ||||||
74 | const BlockAddress *BlockAddr; | ||||||
75 | const char *ES; | ||||||
76 | MCSymbol *MCSym; | ||||||
77 | int JT; | ||||||
78 | Align Alignment; // CP alignment. | ||||||
79 | unsigned char SymbolFlags; // X86II::MO_* | ||||||
80 | bool NegateIndex = false; | ||||||
81 | |||||||
82 | X86ISelAddressMode() | ||||||
83 | : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), | ||||||
84 | Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), | ||||||
85 | MCSym(nullptr), JT(-1), SymbolFlags(X86II::MO_NO_FLAG) {} | ||||||
86 | |||||||
87 | bool hasSymbolicDisplacement() const { | ||||||
88 | return GV != nullptr || CP != nullptr || ES != nullptr || | ||||||
89 | MCSym != nullptr || JT != -1 || BlockAddr != nullptr; | ||||||
90 | } | ||||||
91 | |||||||
92 | bool hasBaseOrIndexReg() const { | ||||||
93 | return BaseType == FrameIndexBase || | ||||||
94 | IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; | ||||||
95 | } | ||||||
96 | |||||||
97 | /// Return true if this addressing mode is already RIP-relative. | ||||||
98 | bool isRIPRelative() const { | ||||||
99 | if (BaseType != RegBase) return false; | ||||||
100 | if (RegisterSDNode *RegNode = | ||||||
101 | dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode())) | ||||||
102 | return RegNode->getReg() == X86::RIP; | ||||||
103 | return false; | ||||||
104 | } | ||||||
105 | |||||||
106 | void setBaseReg(SDValue Reg) { | ||||||
107 | BaseType = RegBase; | ||||||
108 | Base_Reg = Reg; | ||||||
109 | } | ||||||
110 | |||||||
111 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||||||
112 | void dump(SelectionDAG *DAG = nullptr) { | ||||||
113 | dbgs() << "X86ISelAddressMode " << this << '\n'; | ||||||
114 | dbgs() << "Base_Reg "; | ||||||
115 | if (Base_Reg.getNode()) | ||||||
116 | Base_Reg.getNode()->dump(DAG); | ||||||
117 | else | ||||||
118 | dbgs() << "nul\n"; | ||||||
119 | if (BaseType == FrameIndexBase) | ||||||
120 | dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'; | ||||||
121 | dbgs() << " Scale " << Scale << '\n' | ||||||
122 | << "IndexReg "; | ||||||
123 | if (NegateIndex) | ||||||
124 | dbgs() << "negate "; | ||||||
125 | if (IndexReg.getNode()) | ||||||
126 | IndexReg.getNode()->dump(DAG); | ||||||
127 | else | ||||||
128 | dbgs() << "nul\n"; | ||||||
129 | dbgs() << " Disp " << Disp << '\n' | ||||||
130 | << "GV "; | ||||||
131 | if (GV) | ||||||
132 | GV->dump(); | ||||||
133 | else | ||||||
134 | dbgs() << "nul"; | ||||||
135 | dbgs() << " CP "; | ||||||
136 | if (CP) | ||||||
137 | CP->dump(); | ||||||
138 | else | ||||||
139 | dbgs() << "nul"; | ||||||
140 | dbgs() << '\n' | ||||||
141 | << "ES "; | ||||||
142 | if (ES) | ||||||
143 | dbgs() << ES; | ||||||
144 | else | ||||||
145 | dbgs() << "nul"; | ||||||
146 | dbgs() << " MCSym "; | ||||||
147 | if (MCSym) | ||||||
148 | dbgs() << MCSym; | ||||||
149 | else | ||||||
150 | dbgs() << "nul"; | ||||||
151 | dbgs() << " JT" << JT << " Align" << Alignment.value() << '\n'; | ||||||
152 | } | ||||||
153 | #endif | ||||||
154 | }; | ||||||
155 | } | ||||||
156 | |||||||
157 | namespace { | ||||||
158 | //===--------------------------------------------------------------------===// | ||||||
159 | /// ISel - X86-specific code to select X86 machine instructions for | ||||||
160 | /// SelectionDAG operations. | ||||||
161 | /// | ||||||
162 | class X86DAGToDAGISel final : public SelectionDAGISel { | ||||||
163 | /// Keep a pointer to the X86Subtarget around so that we can | ||||||
164 | /// make the right decision when generating code for different targets. | ||||||
165 | const X86Subtarget *Subtarget; | ||||||
166 | |||||||
167 | /// If true, selector should try to optimize for minimum code size. | ||||||
168 | bool OptForMinSize; | ||||||
169 | |||||||
170 | /// Disable direct TLS access through segment registers. | ||||||
171 | bool IndirectTlsSegRefs; | ||||||
172 | |||||||
173 | public: | ||||||
174 | explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) | ||||||
175 | : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), | ||||||
176 | OptForMinSize(false), IndirectTlsSegRefs(false) {} | ||||||
177 | |||||||
178 | StringRef getPassName() const override { | ||||||
179 | return "X86 DAG->DAG Instruction Selection"; | ||||||
180 | } | ||||||
181 | |||||||
182 | bool runOnMachineFunction(MachineFunction &MF) override { | ||||||
183 | // Reset the subtarget each time through. | ||||||
184 | Subtarget = &MF.getSubtarget<X86Subtarget>(); | ||||||
185 | IndirectTlsSegRefs = MF.getFunction().hasFnAttribute( | ||||||
186 | "indirect-tls-seg-refs"); | ||||||
187 | |||||||
188 | // OptFor[Min]Size are used in pattern predicates that isel is matching. | ||||||
189 | OptForMinSize = MF.getFunction().hasMinSize(); | ||||||
190 | assert((!OptForMinSize || MF.getFunction().hasOptSize()) &&(static_cast <bool> ((!OptForMinSize || MF.getFunction( ).hasOptSize()) && "OptForMinSize implies OptForSize" ) ? void (0) : __assert_fail ("(!OptForMinSize || MF.getFunction().hasOptSize()) && \"OptForMinSize implies OptForSize\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 191, __extension__ __PRETTY_FUNCTION__)) | ||||||
191 | "OptForMinSize implies OptForSize")(static_cast <bool> ((!OptForMinSize || MF.getFunction( ).hasOptSize()) && "OptForMinSize implies OptForSize" ) ? void (0) : __assert_fail ("(!OptForMinSize || MF.getFunction().hasOptSize()) && \"OptForMinSize implies OptForSize\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 191, __extension__ __PRETTY_FUNCTION__)); | ||||||
192 | |||||||
193 | SelectionDAGISel::runOnMachineFunction(MF); | ||||||
194 | return true; | ||||||
195 | } | ||||||
196 | |||||||
197 | void emitFunctionEntryCode() override; | ||||||
198 | |||||||
199 | bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; | ||||||
200 | |||||||
201 | void PreprocessISelDAG() override; | ||||||
202 | void PostprocessISelDAG() override; | ||||||
203 | |||||||
204 | // Include the pieces autogenerated from the target description. | ||||||
205 | #include "X86GenDAGISel.inc" | ||||||
206 | |||||||
207 | private: | ||||||
208 | void Select(SDNode *N) override; | ||||||
209 | |||||||
210 | bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); | ||||||
211 | bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, | ||||||
212 | bool AllowSegmentRegForX32 = false); | ||||||
213 | bool matchWrapper(SDValue N, X86ISelAddressMode &AM); | ||||||
214 | bool matchAddress(SDValue N, X86ISelAddressMode &AM); | ||||||
215 | bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM); | ||||||
216 | bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth); | ||||||
217 | bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, | ||||||
218 | unsigned Depth); | ||||||
219 | bool matchAddressBase(SDValue N, X86ISelAddressMode &AM); | ||||||
220 | bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base, | ||||||
221 | SDValue &Scale, SDValue &Index, SDValue &Disp, | ||||||
222 | SDValue &Segment); | ||||||
223 | bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp, | ||||||
224 | SDValue ScaleOp, SDValue &Base, SDValue &Scale, | ||||||
225 | SDValue &Index, SDValue &Disp, SDValue &Segment); | ||||||
226 | bool selectMOV64Imm32(SDValue N, SDValue &Imm); | ||||||
227 | bool selectLEAAddr(SDValue N, SDValue &Base, | ||||||
228 | SDValue &Scale, SDValue &Index, SDValue &Disp, | ||||||
229 | SDValue &Segment); | ||||||
230 | bool selectLEA64_32Addr(SDValue N, SDValue &Base, | ||||||
231 | SDValue &Scale, SDValue &Index, SDValue &Disp, | ||||||
232 | SDValue &Segment); | ||||||
233 | bool selectTLSADDRAddr(SDValue N, SDValue &Base, | ||||||
234 | SDValue &Scale, SDValue &Index, SDValue &Disp, | ||||||
235 | SDValue &Segment); | ||||||
236 | bool selectRelocImm(SDValue N, SDValue &Op); | ||||||
237 | |||||||
238 | bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, | ||||||
239 | SDValue &Base, SDValue &Scale, | ||||||
240 | SDValue &Index, SDValue &Disp, | ||||||
241 | SDValue &Segment); | ||||||
242 | |||||||
243 | // Convenience method where P is also root. | ||||||
244 | bool tryFoldLoad(SDNode *P, SDValue N, | ||||||
245 | SDValue &Base, SDValue &Scale, | ||||||
246 | SDValue &Index, SDValue &Disp, | ||||||
247 | SDValue &Segment) { | ||||||
248 | return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment); | ||||||
249 | } | ||||||
250 | |||||||
251 | bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, | ||||||
252 | SDValue &Base, SDValue &Scale, | ||||||
253 | SDValue &Index, SDValue &Disp, | ||||||
254 | SDValue &Segment); | ||||||
255 | |||||||
256 | bool isProfitableToFormMaskedOp(SDNode *N) const; | ||||||
257 | |||||||
258 | /// Implement addressing mode selection for inline asm expressions. | ||||||
259 | bool SelectInlineAsmMemoryOperand(const SDValue &Op, | ||||||
260 | unsigned ConstraintID, | ||||||
261 | std::vector<SDValue> &OutOps) override; | ||||||
262 | |||||||
263 | void emitSpecialCodeForMain(); | ||||||
264 | |||||||
265 | inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL, | ||||||
266 | MVT VT, SDValue &Base, SDValue &Scale, | ||||||
267 | SDValue &Index, SDValue &Disp, | ||||||
268 | SDValue &Segment) { | ||||||
269 | if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) | ||||||
270 | Base = CurDAG->getTargetFrameIndex( | ||||||
271 | AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout())); | ||||||
272 | else if (AM.Base_Reg.getNode()) | ||||||
273 | Base = AM.Base_Reg; | ||||||
274 | else | ||||||
275 | Base = CurDAG->getRegister(0, VT); | ||||||
276 | |||||||
277 | Scale = getI8Imm(AM.Scale, DL); | ||||||
278 | |||||||
279 | // Negate the index if needed. | ||||||
280 | if (AM.NegateIndex) { | ||||||
281 | unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r; | ||||||
282 | SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32, | ||||||
283 | AM.IndexReg), 0); | ||||||
284 | AM.IndexReg = Neg; | ||||||
285 | } | ||||||
286 | |||||||
287 | if (AM.IndexReg.getNode()) | ||||||
288 | Index = AM.IndexReg; | ||||||
289 | else | ||||||
290 | Index = CurDAG->getRegister(0, VT); | ||||||
291 | |||||||
292 | // These are 32-bit even in 64-bit mode since RIP-relative offset | ||||||
293 | // is 32-bit. | ||||||
294 | if (AM.GV) | ||||||
295 | Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), | ||||||
296 | MVT::i32, AM.Disp, | ||||||
297 | AM.SymbolFlags); | ||||||
298 | else if (AM.CP) | ||||||
299 | Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment, | ||||||
300 | AM.Disp, AM.SymbolFlags); | ||||||
301 | else if (AM.ES) { | ||||||
302 | assert(!AM.Disp && "Non-zero displacement is ignored with ES.")(static_cast <bool> (!AM.Disp && "Non-zero displacement is ignored with ES." ) ? void (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with ES.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 302, __extension__ __PRETTY_FUNCTION__)); | ||||||
303 | Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); | ||||||
304 | } else if (AM.MCSym) { | ||||||
305 | assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.")(static_cast <bool> (!AM.Disp && "Non-zero displacement is ignored with MCSym." ) ? void (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with MCSym.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 305, __extension__ __PRETTY_FUNCTION__)); | ||||||
306 | assert(AM.SymbolFlags == 0 && "oo")(static_cast <bool> (AM.SymbolFlags == 0 && "oo" ) ? void (0) : __assert_fail ("AM.SymbolFlags == 0 && \"oo\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 306, __extension__ __PRETTY_FUNCTION__)); | ||||||
307 | Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32); | ||||||
308 | } else if (AM.JT != -1) { | ||||||
309 | assert(!AM.Disp && "Non-zero displacement is ignored with JT.")(static_cast <bool> (!AM.Disp && "Non-zero displacement is ignored with JT." ) ? void (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with JT.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 309, __extension__ __PRETTY_FUNCTION__)); | ||||||
310 | Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); | ||||||
311 | } else if (AM.BlockAddr) | ||||||
312 | Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, | ||||||
313 | AM.SymbolFlags); | ||||||
314 | else | ||||||
315 | Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32); | ||||||
316 | |||||||
317 | if (AM.Segment.getNode()) | ||||||
318 | Segment = AM.Segment; | ||||||
319 | else | ||||||
320 | Segment = CurDAG->getRegister(0, MVT::i16); | ||||||
321 | } | ||||||
322 | |||||||
323 | // Utility function to determine whether we should avoid selecting | ||||||
324 | // immediate forms of instructions for better code size or not. | ||||||
325 | // At a high level, we'd like to avoid such instructions when | ||||||
326 | // we have similar constants used within the same basic block | ||||||
327 | // that can be kept in a register. | ||||||
328 | // | ||||||
329 | bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const { | ||||||
330 | uint32_t UseCount = 0; | ||||||
331 | |||||||
332 | // Do not want to hoist if we're not optimizing for size. | ||||||
333 | // TODO: We'd like to remove this restriction. | ||||||
334 | // See the comment in X86InstrInfo.td for more info. | ||||||
335 | if (!CurDAG->shouldOptForSize()) | ||||||
336 | return false; | ||||||
337 | |||||||
338 | // Walk all the users of the immediate. | ||||||
339 | for (SDNode::use_iterator UI = N->use_begin(), | ||||||
340 | UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) { | ||||||
341 | |||||||
342 | SDNode *User = *UI; | ||||||
343 | |||||||
344 | // This user is already selected. Count it as a legitimate use and | ||||||
345 | // move on. | ||||||
346 | if (User->isMachineOpcode()) { | ||||||
347 | UseCount++; | ||||||
348 | continue; | ||||||
349 | } | ||||||
350 | |||||||
351 | // We want to count stores of immediates as real uses. | ||||||
352 | if (User->getOpcode() == ISD::STORE && | ||||||
353 | User->getOperand(1).getNode() == N) { | ||||||
354 | UseCount++; | ||||||
355 | continue; | ||||||
356 | } | ||||||
357 | |||||||
358 | // We don't currently match users that have > 2 operands (except | ||||||
359 | // for stores, which are handled above) | ||||||
360 | // Those instruction won't match in ISEL, for now, and would | ||||||
361 | // be counted incorrectly. | ||||||
362 | // This may change in the future as we add additional instruction | ||||||
363 | // types. | ||||||
364 | if (User->getNumOperands() != 2) | ||||||
365 | continue; | ||||||
366 | |||||||
367 | // If this is a sign-extended 8-bit integer immediate used in an ALU | ||||||
368 | // instruction, there is probably an opcode encoding to save space. | ||||||
369 | auto *C = dyn_cast<ConstantSDNode>(N); | ||||||
370 | if (C && isInt<8>(C->getSExtValue())) | ||||||
371 | continue; | ||||||
372 | |||||||
373 | // Immediates that are used for offsets as part of stack | ||||||
374 | // manipulation should be left alone. These are typically | ||||||
375 | // used to indicate SP offsets for argument passing and | ||||||
376 | // will get pulled into stores/pushes (implicitly). | ||||||
377 | if (User->getOpcode() == X86ISD::ADD || | ||||||
378 | User->getOpcode() == ISD::ADD || | ||||||
379 | User->getOpcode() == X86ISD::SUB || | ||||||
380 | User->getOpcode() == ISD::SUB) { | ||||||
381 | |||||||
382 | // Find the other operand of the add/sub. | ||||||
383 | SDValue OtherOp = User->getOperand(0); | ||||||
384 | if (OtherOp.getNode() == N) | ||||||
385 | OtherOp = User->getOperand(1); | ||||||
386 | |||||||
387 | // Don't count if the other operand is SP. | ||||||
388 | RegisterSDNode *RegNode; | ||||||
389 | if (OtherOp->getOpcode() == ISD::CopyFromReg && | ||||||
390 | (RegNode = dyn_cast_or_null<RegisterSDNode>( | ||||||
391 | OtherOp->getOperand(1).getNode()))) | ||||||
392 | if ((RegNode->getReg() == X86::ESP) || | ||||||
393 | (RegNode->getReg() == X86::RSP)) | ||||||
394 | continue; | ||||||
395 | } | ||||||
396 | |||||||
397 | // ... otherwise, count this and move on. | ||||||
398 | UseCount++; | ||||||
399 | } | ||||||
400 | |||||||
401 | // If we have more than 1 use, then recommend for hoisting. | ||||||
402 | return (UseCount > 1); | ||||||
403 | } | ||||||
404 | |||||||
405 | /// Return a target constant with the specified value of type i8. | ||||||
406 | inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) { | ||||||
407 | return CurDAG->getTargetConstant(Imm, DL, MVT::i8); | ||||||
408 | } | ||||||
409 | |||||||
410 | /// Return a target constant with the specified value, of type i32. | ||||||
411 | inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) { | ||||||
412 | return CurDAG->getTargetConstant(Imm, DL, MVT::i32); | ||||||
413 | } | ||||||
414 | |||||||
415 | /// Return a target constant with the specified value, of type i64. | ||||||
416 | inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) { | ||||||
417 | return CurDAG->getTargetConstant(Imm, DL, MVT::i64); | ||||||
418 | } | ||||||
419 | |||||||
420 | SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth, | ||||||
421 | const SDLoc &DL) { | ||||||
422 | assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width")(static_cast <bool> ((VecWidth == 128 || VecWidth == 256 ) && "Unexpected vector width") ? void (0) : __assert_fail ("(VecWidth == 128 || VecWidth == 256) && \"Unexpected vector width\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 422, __extension__ __PRETTY_FUNCTION__)); | ||||||
423 | uint64_t Index = N->getConstantOperandVal(1); | ||||||
424 | MVT VecVT = N->getOperand(0).getSimpleValueType(); | ||||||
425 | return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); | ||||||
426 | } | ||||||
427 | |||||||
428 | SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth, | ||||||
429 | const SDLoc &DL) { | ||||||
430 | assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width")(static_cast <bool> ((VecWidth == 128 || VecWidth == 256 ) && "Unexpected vector width") ? void (0) : __assert_fail ("(VecWidth == 128 || VecWidth == 256) && \"Unexpected vector width\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 430, __extension__ __PRETTY_FUNCTION__)); | ||||||
431 | uint64_t Index = N->getConstantOperandVal(2); | ||||||
432 | MVT VecVT = N->getSimpleValueType(0); | ||||||
433 | return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); | ||||||
434 | } | ||||||
435 | |||||||
436 | // Helper to detect unneeded and instructions on shift amounts. Called | ||||||
437 | // from PatFrags in tablegen. | ||||||
438 | bool isUnneededShiftMask(SDNode *N, unsigned Width) const { | ||||||
439 | assert(N->getOpcode() == ISD::AND && "Unexpected opcode")(static_cast <bool> (N->getOpcode() == ISD::AND && "Unexpected opcode") ? void (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 439, __extension__ __PRETTY_FUNCTION__)); | ||||||
440 | const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); | ||||||
441 | |||||||
442 | if (Val.countTrailingOnes() >= Width) | ||||||
443 | return true; | ||||||
444 | |||||||
445 | APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero; | ||||||
446 | return Mask.countTrailingOnes() >= Width; | ||||||
447 | } | ||||||
448 | |||||||
449 | /// Return an SDNode that returns the value of the global base register. | ||||||
450 | /// Output instructions required to initialize the global base register, | ||||||
451 | /// if necessary. | ||||||
452 | SDNode *getGlobalBaseReg(); | ||||||
453 | |||||||
454 | /// Return a reference to the TargetMachine, casted to the target-specific | ||||||
455 | /// type. | ||||||
456 | const X86TargetMachine &getTargetMachine() const { | ||||||
457 | return static_cast<const X86TargetMachine &>(TM); | ||||||
458 | } | ||||||
459 | |||||||
460 | /// Return a reference to the TargetInstrInfo, casted to the target-specific | ||||||
461 | /// type. | ||||||
462 | const X86InstrInfo *getInstrInfo() const { | ||||||
463 | return Subtarget->getInstrInfo(); | ||||||
464 | } | ||||||
465 | |||||||
466 | /// Address-mode matching performs shift-of-and to and-of-shift | ||||||
467 | /// reassociation in order to expose more scaled addressing | ||||||
468 | /// opportunities. | ||||||
469 | bool ComplexPatternFuncMutatesDAG() const override { | ||||||
470 | return true; | ||||||
471 | } | ||||||
472 | |||||||
473 | bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const; | ||||||
474 | |||||||
475 | // Indicates we should prefer to use a non-temporal load for this load. | ||||||
476 | bool useNonTemporalLoad(LoadSDNode *N) const { | ||||||
477 | if (!N->isNonTemporal()) | ||||||
478 | return false; | ||||||
479 | |||||||
480 | unsigned StoreSize = N->getMemoryVT().getStoreSize(); | ||||||
481 | |||||||
482 | if (N->getAlignment() < StoreSize) | ||||||
483 | return false; | ||||||
484 | |||||||
485 | switch (StoreSize) { | ||||||
486 | default: llvm_unreachable("Unsupported store size")::llvm::llvm_unreachable_internal("Unsupported store size", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 486); | ||||||
487 | case 4: | ||||||
488 | case 8: | ||||||
489 | return false; | ||||||
490 | case 16: | ||||||
491 | return Subtarget->hasSSE41(); | ||||||
492 | case 32: | ||||||
493 | return Subtarget->hasAVX2(); | ||||||
494 | case 64: | ||||||
495 | return Subtarget->hasAVX512(); | ||||||
496 | } | ||||||
497 | } | ||||||
498 | |||||||
499 | bool foldLoadStoreIntoMemOperand(SDNode *Node); | ||||||
500 | MachineSDNode *matchBEXTRFromAndImm(SDNode *Node); | ||||||
501 | bool matchBitExtract(SDNode *Node); | ||||||
502 | bool shrinkAndImmediate(SDNode *N); | ||||||
503 | bool isMaskZeroExtended(SDNode *N) const; | ||||||
504 | bool tryShiftAmountMod(SDNode *N); | ||||||
505 | bool tryShrinkShlLogicImm(SDNode *N); | ||||||
506 | bool tryVPTERNLOG(SDNode *N); | ||||||
507 | bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentBC, | ||||||
508 | SDValue A, SDValue B, SDValue C, uint8_t Imm); | ||||||
509 | bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); | ||||||
510 | bool tryMatchBitSelect(SDNode *N); | ||||||
511 | |||||||
512 | MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, | ||||||
513 | const SDLoc &dl, MVT VT, SDNode *Node); | ||||||
514 | MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, | ||||||
515 | const SDLoc &dl, MVT VT, SDNode *Node, | ||||||
516 | SDValue &InFlag); | ||||||
517 | |||||||
518 | bool tryOptimizeRem8Extend(SDNode *N); | ||||||
519 | |||||||
520 | bool onlyUsesZeroFlag(SDValue Flags) const; | ||||||
521 | bool hasNoSignFlagUses(SDValue Flags) const; | ||||||
522 | bool hasNoCarryFlagUses(SDValue Flags) const; | ||||||
523 | }; | ||||||
524 | } | ||||||
525 | |||||||
526 | |||||||
527 | // Returns true if this masked compare can be implemented legally with this | ||||||
528 | // type. | ||||||
529 | static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { | ||||||
530 | unsigned Opcode = N->getOpcode(); | ||||||
531 | if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM || | ||||||
532 | Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC || | ||||||
533 | Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) { | ||||||
534 | // We can get 256-bit 8 element types here without VLX being enabled. When | ||||||
535 | // this happens we will use 512-bit operations and the mask will not be | ||||||
536 | // zero extended. | ||||||
537 | EVT OpVT = N->getOperand(0).getValueType(); | ||||||
538 | // The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the | ||||||
539 | // second operand. | ||||||
540 | if (Opcode == X86ISD::STRICT_CMPM) | ||||||
541 | OpVT = N->getOperand(1).getValueType(); | ||||||
542 | if (OpVT.is256BitVector() || OpVT.is128BitVector()) | ||||||
543 | return Subtarget->hasVLX(); | ||||||
544 | |||||||
545 | return true; | ||||||
546 | } | ||||||
547 | // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check. | ||||||
548 | if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM || | ||||||
549 | Opcode == X86ISD::FSETCCM_SAE) | ||||||
550 | return true; | ||||||
551 | |||||||
552 | return false; | ||||||
553 | } | ||||||
554 | |||||||
555 | // Returns true if we can assume the writer of the mask has zero extended it | ||||||
556 | // for us. | ||||||
557 | bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const { | ||||||
558 | // If this is an AND, check if we have a compare on either side. As long as | ||||||
559 | // one side guarantees the mask is zero extended, the AND will preserve those | ||||||
560 | // zeros. | ||||||
561 | if (N->getOpcode() == ISD::AND) | ||||||
562 | return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) || | ||||||
563 | isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget); | ||||||
564 | |||||||
565 | return isLegalMaskCompare(N, Subtarget); | ||||||
566 | } | ||||||
567 | |||||||
568 | bool | ||||||
569 | X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { | ||||||
570 | if (OptLevel == CodeGenOpt::None) return false; | ||||||
571 | |||||||
572 | if (!N.hasOneUse()) | ||||||
573 | return false; | ||||||
574 | |||||||
575 | if (N.getOpcode() != ISD::LOAD) | ||||||
576 | return true; | ||||||
577 | |||||||
578 | // Don't fold non-temporal loads if we have an instruction for them. | ||||||
579 | if (useNonTemporalLoad(cast<LoadSDNode>(N))) | ||||||
580 | return false; | ||||||
581 | |||||||
582 | // If N is a load, do additional profitability checks. | ||||||
583 | if (U == Root) { | ||||||
584 | switch (U->getOpcode()) { | ||||||
585 | default: break; | ||||||
586 | case X86ISD::ADD: | ||||||
587 | case X86ISD::ADC: | ||||||
588 | case X86ISD::SUB: | ||||||
589 | case X86ISD::SBB: | ||||||
590 | case X86ISD::AND: | ||||||
591 | case X86ISD::XOR: | ||||||
592 | case X86ISD::OR: | ||||||
593 | case ISD::ADD: | ||||||
594 | case ISD::ADDCARRY: | ||||||
595 | case ISD::AND: | ||||||
596 | case ISD::OR: | ||||||
597 | case ISD::XOR: { | ||||||
598 | SDValue Op1 = U->getOperand(1); | ||||||
599 | |||||||
600 | // If the other operand is a 8-bit immediate we should fold the immediate | ||||||
601 | // instead. This reduces code size. | ||||||
602 | // e.g. | ||||||
603 | // movl 4(%esp), %eax | ||||||
604 | // addl $4, %eax | ||||||
605 | // vs. | ||||||
606 | // movl $4, %eax | ||||||
607 | // addl 4(%esp), %eax | ||||||
608 | // The former is 2 bytes shorter. In case where the increment is 1, then | ||||||
609 | // the saving can be 4 bytes (by using incl %eax). | ||||||
610 | if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) { | ||||||
611 | if (Imm->getAPIntValue().isSignedIntN(8)) | ||||||
612 | return false; | ||||||
613 | |||||||
614 | // If this is a 64-bit AND with an immediate that fits in 32-bits, | ||||||
615 | // prefer using the smaller and over folding the load. This is needed to | ||||||
616 | // make sure immediates created by shrinkAndImmediate are always folded. | ||||||
617 | // Ideally we would narrow the load during DAG combine and get the | ||||||
618 | // best of both worlds. | ||||||
619 | if (U->getOpcode() == ISD::AND && | ||||||
620 | Imm->getAPIntValue().getBitWidth() == 64 && | ||||||
621 | Imm->getAPIntValue().isIntN(32)) | ||||||
622 | return false; | ||||||
623 | |||||||
624 | // If this really a zext_inreg that can be represented with a movzx | ||||||
625 | // instruction, prefer that. | ||||||
626 | // TODO: We could shrink the load and fold if it is non-volatile. | ||||||
627 | if (U->getOpcode() == ISD::AND && | ||||||
628 | (Imm->getAPIntValue() == UINT8_MAX(255) || | ||||||
629 | Imm->getAPIntValue() == UINT16_MAX(65535) || | ||||||
630 | Imm->getAPIntValue() == UINT32_MAX(4294967295U))) | ||||||
631 | return false; | ||||||
632 | |||||||
633 | // ADD/SUB with can negate the immediate and use the opposite operation | ||||||
634 | // to fit 128 into a sign extended 8 bit immediate. | ||||||
635 | if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) && | ||||||
636 | (-Imm->getAPIntValue()).isSignedIntN(8)) | ||||||
637 | return false; | ||||||
638 | |||||||
639 | if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) && | ||||||
640 | (-Imm->getAPIntValue()).isSignedIntN(8) && | ||||||
641 | hasNoCarryFlagUses(SDValue(U, 1))) | ||||||
642 | return false; | ||||||
643 | } | ||||||
644 | |||||||
645 | // If the other operand is a TLS address, we should fold it instead. | ||||||
646 | // This produces | ||||||
647 | // movl %gs:0, %eax | ||||||
648 | // leal i@NTPOFF(%eax), %eax | ||||||
649 | // instead of | ||||||
650 | // movl $i@NTPOFF, %eax | ||||||
651 | // addl %gs:0, %eax | ||||||
652 | // if the block also has an access to a second TLS address this will save | ||||||
653 | // a load. | ||||||
654 | // FIXME: This is probably also true for non-TLS addresses. | ||||||
655 | if (Op1.getOpcode() == X86ISD::Wrapper) { | ||||||
656 | SDValue Val = Op1.getOperand(0); | ||||||
657 | if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) | ||||||
658 | return false; | ||||||
659 | } | ||||||
660 | |||||||
661 | // Don't fold load if this matches the BTS/BTR/BTC patterns. | ||||||
662 | // BTS: (or X, (shl 1, n)) | ||||||
663 | // BTR: (and X, (rotl -2, n)) | ||||||
664 | // BTC: (xor X, (shl 1, n)) | ||||||
665 | if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) { | ||||||
666 | if (U->getOperand(0).getOpcode() == ISD::SHL && | ||||||
667 | isOneConstant(U->getOperand(0).getOperand(0))) | ||||||
668 | return false; | ||||||
669 | |||||||
670 | if (U->getOperand(1).getOpcode() == ISD::SHL && | ||||||
671 | isOneConstant(U->getOperand(1).getOperand(0))) | ||||||
672 | return false; | ||||||
673 | } | ||||||
674 | if (U->getOpcode() == ISD::AND) { | ||||||
675 | SDValue U0 = U->getOperand(0); | ||||||
676 | SDValue U1 = U->getOperand(1); | ||||||
677 | if (U0.getOpcode() == ISD::ROTL) { | ||||||
678 | auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0)); | ||||||
679 | if (C && C->getSExtValue() == -2) | ||||||
680 | return false; | ||||||
681 | } | ||||||
682 | |||||||
683 | if (U1.getOpcode() == ISD::ROTL) { | ||||||
684 | auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0)); | ||||||
685 | if (C && C->getSExtValue() == -2) | ||||||
686 | return false; | ||||||
687 | } | ||||||
688 | } | ||||||
689 | |||||||
690 | break; | ||||||
691 | } | ||||||
692 | case ISD::SHL: | ||||||
693 | case ISD::SRA: | ||||||
694 | case ISD::SRL: | ||||||
695 | // Don't fold a load into a shift by immediate. The BMI2 instructions | ||||||
696 | // support folding a load, but not an immediate. The legacy instructions | ||||||
697 | // support folding an immediate, but can't fold a load. Folding an | ||||||
698 | // immediate is preferable to folding a load. | ||||||
699 | if (isa<ConstantSDNode>(U->getOperand(1))) | ||||||
700 | return false; | ||||||
701 | |||||||
702 | break; | ||||||
703 | } | ||||||
704 | } | ||||||
705 | |||||||
706 | // Prevent folding a load if this can implemented with an insert_subreg or | ||||||
707 | // a move that implicitly zeroes. | ||||||
708 | if (Root->getOpcode() == ISD::INSERT_SUBVECTOR && | ||||||
709 | isNullConstant(Root->getOperand(2)) && | ||||||
710 | (Root->getOperand(0).isUndef() || | ||||||
711 | ISD::isBuildVectorAllZeros(Root->getOperand(0).getNode()))) | ||||||
712 | return false; | ||||||
713 | |||||||
714 | return true; | ||||||
715 | } | ||||||
716 | |||||||
717 | // Indicates it is profitable to form an AVX512 masked operation. Returning | ||||||
718 | // false will favor a masked register-register masked move or vblendm and the | ||||||
719 | // operation will be selected separately. | ||||||
720 | bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const { | ||||||
721 | assert((static_cast <bool> ((N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) && "Unexpected opcode!" ) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) && \"Unexpected opcode!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 723, __extension__ __PRETTY_FUNCTION__)) | ||||||
722 | (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) &&(static_cast <bool> ((N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) && "Unexpected opcode!" ) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) && \"Unexpected opcode!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 723, __extension__ __PRETTY_FUNCTION__)) | ||||||
723 | "Unexpected opcode!")(static_cast <bool> ((N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) && "Unexpected opcode!" ) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) && \"Unexpected opcode!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 723, __extension__ __PRETTY_FUNCTION__)); | ||||||
724 | |||||||
725 | // If the operation has additional users, the operation will be duplicated. | ||||||
726 | // Check the use count to prevent that. | ||||||
727 | // FIXME: Are there cheap opcodes we might want to duplicate? | ||||||
728 | return N->getOperand(1).hasOneUse(); | ||||||
729 | } | ||||||
730 | |||||||
731 | /// Replace the original chain operand of the call with | ||||||
732 | /// load's chain operand and move load below the call's chain operand. | ||||||
733 | static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, | ||||||
734 | SDValue Call, SDValue OrigChain) { | ||||||
735 | SmallVector<SDValue, 8> Ops; | ||||||
736 | SDValue Chain = OrigChain.getOperand(0); | ||||||
737 | if (Chain.getNode() == Load.getNode()) | ||||||
738 | Ops.push_back(Load.getOperand(0)); | ||||||
739 | else { | ||||||
740 | assert(Chain.getOpcode() == ISD::TokenFactor &&(static_cast <bool> (Chain.getOpcode() == ISD::TokenFactor && "Unexpected chain operand") ? void (0) : __assert_fail ("Chain.getOpcode() == ISD::TokenFactor && \"Unexpected chain operand\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 741, __extension__ __PRETTY_FUNCTION__)) | ||||||
741 | "Unexpected chain operand")(static_cast <bool> (Chain.getOpcode() == ISD::TokenFactor && "Unexpected chain operand") ? void (0) : __assert_fail ("Chain.getOpcode() == ISD::TokenFactor && \"Unexpected chain operand\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 741, __extension__ __PRETTY_FUNCTION__)); | ||||||
742 | for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) | ||||||
743 | if (Chain.getOperand(i).getNode() == Load.getNode()) | ||||||
744 | Ops.push_back(Load.getOperand(0)); | ||||||
745 | else | ||||||
746 | Ops.push_back(Chain.getOperand(i)); | ||||||
747 | SDValue NewChain = | ||||||
748 | CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); | ||||||
749 | Ops.clear(); | ||||||
750 | Ops.push_back(NewChain); | ||||||
751 | } | ||||||
752 | Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end()); | ||||||
753 | CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); | ||||||
754 | CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), | ||||||
755 | Load.getOperand(1), Load.getOperand(2)); | ||||||
756 | |||||||
757 | Ops.clear(); | ||||||
758 | Ops.push_back(SDValue(Load.getNode(), 1)); | ||||||
759 | Ops.append(Call->op_begin() + 1, Call->op_end()); | ||||||
760 | CurDAG->UpdateNodeOperands(Call.getNode(), Ops); | ||||||
761 | } | ||||||
762 | |||||||
763 | /// Return true if call address is a load and it can be | ||||||
764 | /// moved below CALLSEQ_START and the chains leading up to the call. | ||||||
765 | /// Return the CALLSEQ_START by reference as a second output. | ||||||
766 | /// In the case of a tail call, there isn't a callseq node between the call | ||||||
767 | /// chain and the load. | ||||||
768 | static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { | ||||||
769 | // The transformation is somewhat dangerous if the call's chain was glued to | ||||||
770 | // the call. After MoveBelowOrigChain the load is moved between the call and | ||||||
771 | // the chain, this can create a cycle if the load is not folded. So it is | ||||||
772 | // *really* important that we are sure the load will be folded. | ||||||
773 | if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) | ||||||
774 | return false; | ||||||
775 | LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); | ||||||
776 | if (!LD || | ||||||
777 | !LD->isSimple() || | ||||||
778 | LD->getAddressingMode() != ISD::UNINDEXED || | ||||||
779 | LD->getExtensionType() != ISD::NON_EXTLOAD) | ||||||
780 | return false; | ||||||
781 | |||||||
782 | // Now let's find the callseq_start. | ||||||
783 | while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { | ||||||
784 | if (!Chain.hasOneUse()) | ||||||
785 | return false; | ||||||
786 | Chain = Chain.getOperand(0); | ||||||
787 | } | ||||||
788 | |||||||
789 | if (!Chain.getNumOperands()) | ||||||
790 | return false; | ||||||
791 | // Since we are not checking for AA here, conservatively abort if the chain | ||||||
792 | // writes to memory. It's not safe to move the callee (a load) across a store. | ||||||
793 | if (isa<MemSDNode>(Chain.getNode()) && | ||||||
794 | cast<MemSDNode>(Chain.getNode())->writeMem()) | ||||||
795 | return false; | ||||||
796 | if (Chain.getOperand(0).getNode() == Callee.getNode()) | ||||||
797 | return true; | ||||||
798 | if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && | ||||||
799 | Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && | ||||||
800 | Callee.getValue(1).hasOneUse()) | ||||||
801 | return true; | ||||||
802 | return false; | ||||||
803 | } | ||||||
804 | |||||||
805 | static bool isEndbrImm64(uint64_t Imm) { | ||||||
806 | // There may be some other prefix bytes between 0xF3 and 0x0F1EFA. | ||||||
807 | // i.g: 0xF3660F1EFA, 0xF3670F1EFA | ||||||
808 | if ((Imm & 0x00FFFFFF) != 0x0F1EFA) | ||||||
809 | return false; | ||||||
810 | |||||||
811 | uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64, | ||||||
812 | 0x65, 0x66, 0x67, 0xf0, 0xf2}; | ||||||
813 | int i = 24; // 24bit 0x0F1EFA has matched | ||||||
814 | while (i < 64) { | ||||||
815 | uint8_t Byte = (Imm >> i) & 0xFF; | ||||||
816 | if (Byte == 0xF3) | ||||||
817 | return true; | ||||||
818 | if (!llvm::is_contained(OptionalPrefixBytes, Byte)) | ||||||
819 | return false; | ||||||
820 | i += 8; | ||||||
821 | } | ||||||
822 | |||||||
823 | return false; | ||||||
824 | } | ||||||
825 | |||||||
826 | void X86DAGToDAGISel::PreprocessISelDAG() { | ||||||
827 | bool MadeChange = false; | ||||||
828 | for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), | ||||||
829 | E = CurDAG->allnodes_end(); I != E; ) { | ||||||
830 | SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. | ||||||
831 | |||||||
832 | // This is for CET enhancement. | ||||||
833 | // | ||||||
834 | // ENDBR32 and ENDBR64 have specific opcodes: | ||||||
835 | // ENDBR32: F3 0F 1E FB | ||||||
836 | // ENDBR64: F3 0F 1E FA | ||||||
837 | // And we want that attackers won’t find unintended ENDBR32/64 | ||||||
838 | // opcode matches in the binary | ||||||
839 | // Here’s an example: | ||||||
840 | // If the compiler had to generate asm for the following code: | ||||||
841 | // a = 0xF30F1EFA | ||||||
842 | // it could, for example, generate: | ||||||
843 | // mov 0xF30F1EFA, dword ptr[a] | ||||||
844 | // In such a case, the binary would include a gadget that starts | ||||||
845 | // with a fake ENDBR64 opcode. Therefore, we split such generation | ||||||
846 | // into multiple operations, let it not shows in the binary | ||||||
847 | if (N->getOpcode() == ISD::Constant) { | ||||||
848 | MVT VT = N->getSimpleValueType(0); | ||||||
849 | int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue(); | ||||||
850 | int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB; | ||||||
851 | if (Imm == EndbrImm || isEndbrImm64(Imm)) { | ||||||
852 | // Check that the cf-protection-branch is enabled. | ||||||
853 | Metadata *CFProtectionBranch = | ||||||
854 | MF->getMMI().getModule()->getModuleFlag("cf-protection-branch"); | ||||||
855 | if (CFProtectionBranch || IndirectBranchTracking) { | ||||||
856 | SDLoc dl(N); | ||||||
857 | SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true); | ||||||
858 | Complement = CurDAG->getNOT(dl, Complement, VT); | ||||||
859 | --I; | ||||||
860 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Complement); | ||||||
861 | ++I; | ||||||
862 | MadeChange = true; | ||||||
863 | continue; | ||||||
864 | } | ||||||
865 | } | ||||||
866 | } | ||||||
867 | |||||||
868 | // If this is a target specific AND node with no flag usages, turn it back | ||||||
869 | // into ISD::AND to enable test instruction matching. | ||||||
870 | if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) { | ||||||
871 | SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0), | ||||||
872 | N->getOperand(0), N->getOperand(1)); | ||||||
873 | --I; | ||||||
874 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); | ||||||
875 | ++I; | ||||||
876 | MadeChange = true; | ||||||
877 | continue; | ||||||
878 | } | ||||||
879 | |||||||
880 | /// Convert vector increment or decrement to sub/add with an all-ones | ||||||
881 | /// constant: | ||||||
882 | /// add X, <1, 1...> --> sub X, <-1, -1...> | ||||||
883 | /// sub X, <1, 1...> --> add X, <-1, -1...> | ||||||
884 | /// The all-ones vector constant can be materialized using a pcmpeq | ||||||
885 | /// instruction that is commonly recognized as an idiom (has no register | ||||||
886 | /// dependency), so that's better/smaller than loading a splat 1 constant. | ||||||
887 | if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && | ||||||
888 | N->getSimpleValueType(0).isVector()) { | ||||||
889 | |||||||
890 | APInt SplatVal; | ||||||
891 | if (X86::isConstantSplat(N->getOperand(1), SplatVal) && | ||||||
892 | SplatVal.isOneValue()) { | ||||||
893 | SDLoc DL(N); | ||||||
894 | |||||||
895 | MVT VT = N->getSimpleValueType(0); | ||||||
896 | unsigned NumElts = VT.getSizeInBits() / 32; | ||||||
897 | SDValue AllOnes = | ||||||
898 | CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts)); | ||||||
899 | AllOnes = CurDAG->getBitcast(VT, AllOnes); | ||||||
900 | |||||||
901 | unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; | ||||||
902 | SDValue Res = | ||||||
903 | CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes); | ||||||
904 | --I; | ||||||
905 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); | ||||||
906 | ++I; | ||||||
907 | MadeChange = true; | ||||||
908 | continue; | ||||||
909 | } | ||||||
910 | } | ||||||
911 | |||||||
912 | switch (N->getOpcode()) { | ||||||
913 | case X86ISD::VBROADCAST: { | ||||||
914 | MVT VT = N->getSimpleValueType(0); | ||||||
915 | // Emulate v32i16/v64i8 broadcast without BWI. | ||||||
916 | if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) { | ||||||
917 | MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8; | ||||||
918 | SDLoc dl(N); | ||||||
919 | SDValue NarrowBCast = | ||||||
920 | CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0)); | ||||||
921 | SDValue Res = | ||||||
922 | CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT), | ||||||
923 | NarrowBCast, CurDAG->getIntPtrConstant(0, dl)); | ||||||
924 | unsigned Index = VT == MVT::v32i16 ? 16 : 32; | ||||||
925 | Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast, | ||||||
926 | CurDAG->getIntPtrConstant(Index, dl)); | ||||||
927 | |||||||
928 | --I; | ||||||
929 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); | ||||||
930 | ++I; | ||||||
931 | MadeChange = true; | ||||||
932 | continue; | ||||||
933 | } | ||||||
934 | |||||||
935 | break; | ||||||
936 | } | ||||||
937 | case X86ISD::VBROADCAST_LOAD: { | ||||||
938 | MVT VT = N->getSimpleValueType(0); | ||||||
939 | // Emulate v32i16/v64i8 broadcast without BWI. | ||||||
940 | if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) { | ||||||
941 | MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8; | ||||||
942 | auto *MemNode = cast<MemSDNode>(N); | ||||||
943 | SDLoc dl(N); | ||||||
944 | SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other); | ||||||
945 | SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()}; | ||||||
946 | SDValue NarrowBCast = CurDAG->getMemIntrinsicNode( | ||||||
947 | X86ISD::VBROADCAST_LOAD, dl, VTs, Ops, MemNode->getMemoryVT(), | ||||||
948 | MemNode->getMemOperand()); | ||||||
949 | SDValue Res = | ||||||
950 | CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT), | ||||||
951 | NarrowBCast, CurDAG->getIntPtrConstant(0, dl)); | ||||||
952 | unsigned Index = VT == MVT::v32i16 ? 16 : 32; | ||||||
953 | Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast, | ||||||
954 | CurDAG->getIntPtrConstant(Index, dl)); | ||||||
955 | |||||||
956 | --I; | ||||||
957 | SDValue To[] = {Res, NarrowBCast.getValue(1)}; | ||||||
958 | CurDAG->ReplaceAllUsesWith(N, To); | ||||||
959 | ++I; | ||||||
960 | MadeChange = true; | ||||||
961 | continue; | ||||||
962 | } | ||||||
963 | |||||||
964 | break; | ||||||
965 | } | ||||||
966 | case ISD::VSELECT: { | ||||||
967 | // Replace VSELECT with non-mask conditions with with BLENDV. | ||||||
968 | if (N->getOperand(0).getValueType().getVectorElementType() == MVT::i1) | ||||||
969 | break; | ||||||
970 | |||||||
971 | assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!")(static_cast <bool> (Subtarget->hasSSE41() && "Expected SSE4.1 support!") ? void (0) : __assert_fail ("Subtarget->hasSSE41() && \"Expected SSE4.1 support!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 971, __extension__ __PRETTY_FUNCTION__)); | ||||||
972 | SDValue Blendv = | ||||||
973 | CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), | ||||||
974 | N->getOperand(0), N->getOperand(1), N->getOperand(2)); | ||||||
975 | --I; | ||||||
976 | CurDAG->ReplaceAllUsesWith(N, Blendv.getNode()); | ||||||
977 | ++I; | ||||||
978 | MadeChange = true; | ||||||
979 | continue; | ||||||
980 | } | ||||||
981 | case ISD::FP_ROUND: | ||||||
982 | case ISD::STRICT_FP_ROUND: | ||||||
983 | case ISD::FP_TO_SINT: | ||||||
984 | case ISD::FP_TO_UINT: | ||||||
985 | case ISD::STRICT_FP_TO_SINT: | ||||||
986 | case ISD::STRICT_FP_TO_UINT: { | ||||||
987 | // Replace vector fp_to_s/uint with their X86 specific equivalent so we | ||||||
988 | // don't need 2 sets of patterns. | ||||||
989 | if (!N->getSimpleValueType(0).isVector()) | ||||||
990 | break; | ||||||
991 | |||||||
992 | unsigned NewOpc; | ||||||
993 | switch (N->getOpcode()) { | ||||||
994 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 994); | ||||||
995 | case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break; | ||||||
996 | case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break; | ||||||
997 | case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break; | ||||||
998 | case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break; | ||||||
999 | case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break; | ||||||
1000 | case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break; | ||||||
1001 | } | ||||||
1002 | SDValue Res; | ||||||
1003 | if (N->isStrictFPOpcode()) | ||||||
1004 | Res = | ||||||
1005 | CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other}, | ||||||
1006 | {N->getOperand(0), N->getOperand(1)}); | ||||||
1007 | else | ||||||
1008 | Res = | ||||||
1009 | CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), | ||||||
1010 | N->getOperand(0)); | ||||||
1011 | --I; | ||||||
1012 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); | ||||||
1013 | ++I; | ||||||
1014 | MadeChange = true; | ||||||
1015 | continue; | ||||||
1016 | } | ||||||
1017 | case ISD::SHL: | ||||||
1018 | case ISD::SRA: | ||||||
1019 | case ISD::SRL: { | ||||||
1020 | // Replace vector shifts with their X86 specific equivalent so we don't | ||||||
1021 | // need 2 sets of patterns. | ||||||
1022 | if (!N->getValueType(0).isVector()) | ||||||
1023 | break; | ||||||
1024 | |||||||
1025 | unsigned NewOpc; | ||||||
1026 | switch (N->getOpcode()) { | ||||||
1027 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1027); | ||||||
1028 | case ISD::SHL: NewOpc = X86ISD::VSHLV; break; | ||||||
1029 | case ISD::SRA: NewOpc = X86ISD::VSRAV; break; | ||||||
1030 | case ISD::SRL: NewOpc = X86ISD::VSRLV; break; | ||||||
1031 | } | ||||||
1032 | SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), | ||||||
1033 | N->getOperand(0), N->getOperand(1)); | ||||||
1034 | --I; | ||||||
1035 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); | ||||||
1036 | ++I; | ||||||
1037 | MadeChange = true; | ||||||
1038 | continue; | ||||||
1039 | } | ||||||
1040 | case ISD::ANY_EXTEND: | ||||||
1041 | case ISD::ANY_EXTEND_VECTOR_INREG: { | ||||||
1042 | // Replace vector any extend with the zero extend equivalents so we don't | ||||||
1043 | // need 2 sets of patterns. Ignore vXi1 extensions. | ||||||
1044 | if (!N->getValueType(0).isVector()) | ||||||
1045 | break; | ||||||
1046 | |||||||
1047 | unsigned NewOpc; | ||||||
1048 | if (N->getOperand(0).getScalarValueSizeInBits() == 1) { | ||||||
1049 | assert(N->getOpcode() == ISD::ANY_EXTEND &&(static_cast <bool> (N->getOpcode() == ISD::ANY_EXTEND && "Unexpected opcode for mask vector!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::ANY_EXTEND && \"Unexpected opcode for mask vector!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1050, __extension__ __PRETTY_FUNCTION__)) | ||||||
1050 | "Unexpected opcode for mask vector!")(static_cast <bool> (N->getOpcode() == ISD::ANY_EXTEND && "Unexpected opcode for mask vector!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::ANY_EXTEND && \"Unexpected opcode for mask vector!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1050, __extension__ __PRETTY_FUNCTION__)); | ||||||
1051 | NewOpc = ISD::SIGN_EXTEND; | ||||||
1052 | } else { | ||||||
1053 | NewOpc = N->getOpcode() == ISD::ANY_EXTEND | ||||||
1054 | ? ISD::ZERO_EXTEND | ||||||
1055 | : ISD::ZERO_EXTEND_VECTOR_INREG; | ||||||
1056 | } | ||||||
1057 | |||||||
1058 | SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), | ||||||
1059 | N->getOperand(0)); | ||||||
1060 | --I; | ||||||
1061 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); | ||||||
1062 | ++I; | ||||||
1063 | MadeChange = true; | ||||||
1064 | continue; | ||||||
1065 | } | ||||||
1066 | case ISD::FCEIL: | ||||||
1067 | case ISD::STRICT_FCEIL: | ||||||
1068 | case ISD::FFLOOR: | ||||||
1069 | case ISD::STRICT_FFLOOR: | ||||||
1070 | case ISD::FTRUNC: | ||||||
1071 | case ISD::STRICT_FTRUNC: | ||||||
1072 | case ISD::FROUNDEVEN: | ||||||
1073 | case ISD::STRICT_FROUNDEVEN: | ||||||
1074 | case ISD::FNEARBYINT: | ||||||
1075 | case ISD::STRICT_FNEARBYINT: | ||||||
1076 | case ISD::FRINT: | ||||||
1077 | case ISD::STRICT_FRINT: { | ||||||
1078 | // Replace fp rounding with their X86 specific equivalent so we don't | ||||||
1079 | // need 2 sets of patterns. | ||||||
1080 | unsigned Imm; | ||||||
1081 | switch (N->getOpcode()) { | ||||||
1082 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1082); | ||||||
1083 | case ISD::STRICT_FCEIL: | ||||||
1084 | case ISD::FCEIL: Imm = 0xA; break; | ||||||
1085 | case ISD::STRICT_FFLOOR: | ||||||
1086 | case ISD::FFLOOR: Imm = 0x9; break; | ||||||
1087 | case ISD::STRICT_FTRUNC: | ||||||
1088 | case ISD::FTRUNC: Imm = 0xB; break; | ||||||
1089 | case ISD::STRICT_FROUNDEVEN: | ||||||
1090 | case ISD::FROUNDEVEN: Imm = 0x8; break; | ||||||
1091 | case ISD::STRICT_FNEARBYINT: | ||||||
1092 | case ISD::FNEARBYINT: Imm = 0xC; break; | ||||||
1093 | case ISD::STRICT_FRINT: | ||||||
1094 | case ISD::FRINT: Imm = 0x4; break; | ||||||
1095 | } | ||||||
1096 | SDLoc dl(N); | ||||||
1097 | bool IsStrict = N->isStrictFPOpcode(); | ||||||
1098 | SDValue Res; | ||||||
1099 | if (IsStrict) | ||||||
1100 | Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl, | ||||||
1101 | {N->getValueType(0), MVT::Other}, | ||||||
1102 | {N->getOperand(0), N->getOperand(1), | ||||||
1103 | CurDAG->getTargetConstant(Imm, dl, MVT::i32)}); | ||||||
1104 | else | ||||||
1105 | Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0), | ||||||
1106 | N->getOperand(0), | ||||||
1107 | CurDAG->getTargetConstant(Imm, dl, MVT::i32)); | ||||||
1108 | --I; | ||||||
1109 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); | ||||||
1110 | ++I; | ||||||
1111 | MadeChange = true; | ||||||
1112 | continue; | ||||||
1113 | } | ||||||
1114 | case X86ISD::FANDN: | ||||||
1115 | case X86ISD::FAND: | ||||||
1116 | case X86ISD::FOR: | ||||||
1117 | case X86ISD::FXOR: { | ||||||
1118 | // Widen scalar fp logic ops to vector to reduce isel patterns. | ||||||
1119 | // FIXME: Can we do this during lowering/combine. | ||||||
1120 | MVT VT = N->getSimpleValueType(0); | ||||||
1121 | if (VT.isVector() || VT == MVT::f128) | ||||||
1122 | break; | ||||||
1123 | |||||||
1124 | MVT VecVT = VT == MVT::f64 ? MVT::v2f64 | ||||||
1125 | : VT == MVT::f32 ? MVT::v4f32 | ||||||
1126 | : MVT::v8f16; | ||||||
1127 | |||||||
1128 | SDLoc dl(N); | ||||||
1129 | SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, | ||||||
1130 | N->getOperand(0)); | ||||||
1131 | SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, | ||||||
1132 | N->getOperand(1)); | ||||||
1133 | |||||||
1134 | SDValue Res; | ||||||
1135 | if (Subtarget->hasSSE2()) { | ||||||
1136 | EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger(); | ||||||
1137 | Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0); | ||||||
1138 | Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1); | ||||||
1139 | unsigned Opc; | ||||||
1140 | switch (N->getOpcode()) { | ||||||
1141 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1141); | ||||||
1142 | case X86ISD::FANDN: Opc = X86ISD::ANDNP; break; | ||||||
1143 | case X86ISD::FAND: Opc = ISD::AND; break; | ||||||
1144 | case X86ISD::FOR: Opc = ISD::OR; break; | ||||||
1145 | case X86ISD::FXOR: Opc = ISD::XOR; break; | ||||||
1146 | } | ||||||
1147 | Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1); | ||||||
1148 | Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res); | ||||||
1149 | } else { | ||||||
1150 | Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1); | ||||||
1151 | } | ||||||
1152 | Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, | ||||||
1153 | CurDAG->getIntPtrConstant(0, dl)); | ||||||
1154 | --I; | ||||||
1155 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); | ||||||
1156 | ++I; | ||||||
1157 | MadeChange = true; | ||||||
1158 | continue; | ||||||
1159 | } | ||||||
1160 | } | ||||||
1161 | |||||||
1162 | if (OptLevel != CodeGenOpt::None && | ||||||
1163 | // Only do this when the target can fold the load into the call or | ||||||
1164 | // jmp. | ||||||
1165 | !Subtarget->useIndirectThunkCalls() && | ||||||
1166 | ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || | ||||||
1167 | (N->getOpcode() == X86ISD::TC_RETURN && | ||||||
1168 | (Subtarget->is64Bit() || | ||||||
1169 | !getTargetMachine().isPositionIndependent())))) { | ||||||
1170 | /// Also try moving call address load from outside callseq_start to just | ||||||
1171 | /// before the call to allow it to be folded. | ||||||
1172 | /// | ||||||
1173 | /// [Load chain] | ||||||
1174 | /// ^ | ||||||
1175 | /// | | ||||||
1176 | /// [Load] | ||||||
1177 | /// ^ ^ | ||||||
1178 | /// | | | ||||||
1179 | /// / \-- | ||||||
1180 | /// / | | ||||||
1181 | ///[CALLSEQ_START] | | ||||||
1182 | /// ^ | | ||||||
1183 | /// | | | ||||||
1184 | /// [LOAD/C2Reg] | | ||||||
1185 | /// | | | ||||||
1186 | /// \ / | ||||||
1187 | /// \ / | ||||||
1188 | /// [CALL] | ||||||
1189 | bool HasCallSeq = N->getOpcode() == X86ISD::CALL; | ||||||
1190 | SDValue Chain = N->getOperand(0); | ||||||
1191 | SDValue Load = N->getOperand(1); | ||||||
1192 | if (!isCalleeLoad(Load, Chain, HasCallSeq)) | ||||||
1193 | continue; | ||||||
1194 | moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); | ||||||
1195 | ++NumLoadMoved; | ||||||
1196 | MadeChange = true; | ||||||
1197 | continue; | ||||||
1198 | } | ||||||
1199 | |||||||
1200 | // Lower fpround and fpextend nodes that target the FP stack to be store and | ||||||
1201 | // load to the stack. This is a gross hack. We would like to simply mark | ||||||
1202 | // these as being illegal, but when we do that, legalize produces these when | ||||||
1203 | // it expands calls, then expands these in the same legalize pass. We would | ||||||
1204 | // like dag combine to be able to hack on these between the call expansion | ||||||
1205 | // and the node legalization. As such this pass basically does "really | ||||||
1206 | // late" legalization of these inline with the X86 isel pass. | ||||||
1207 | // FIXME: This should only happen when not compiled with -O0. | ||||||
1208 | switch (N->getOpcode()) { | ||||||
1209 | default: continue; | ||||||
1210 | case ISD::FP_ROUND: | ||||||
1211 | case ISD::FP_EXTEND: | ||||||
1212 | { | ||||||
1213 | MVT SrcVT = N->getOperand(0).getSimpleValueType(); | ||||||
1214 | MVT DstVT = N->getSimpleValueType(0); | ||||||
1215 | |||||||
1216 | // If any of the sources are vectors, no fp stack involved. | ||||||
1217 | if (SrcVT.isVector() || DstVT.isVector()) | ||||||
1218 | continue; | ||||||
1219 | |||||||
1220 | // If the source and destination are SSE registers, then this is a legal | ||||||
1221 | // conversion that should not be lowered. | ||||||
1222 | const X86TargetLowering *X86Lowering = | ||||||
1223 | static_cast<const X86TargetLowering *>(TLI); | ||||||
1224 | bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); | ||||||
1225 | bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); | ||||||
1226 | if (SrcIsSSE && DstIsSSE) | ||||||
1227 | continue; | ||||||
1228 | |||||||
1229 | if (!SrcIsSSE && !DstIsSSE) { | ||||||
1230 | // If this is an FPStack extension, it is a noop. | ||||||
1231 | if (N->getOpcode() == ISD::FP_EXTEND) | ||||||
1232 | continue; | ||||||
1233 | // If this is a value-preserving FPStack truncation, it is a noop. | ||||||
1234 | if (N->getConstantOperandVal(1)) | ||||||
1235 | continue; | ||||||
1236 | } | ||||||
1237 | |||||||
1238 | // Here we could have an FP stack truncation or an FPStack <-> SSE convert. | ||||||
1239 | // FPStack has extload and truncstore. SSE can fold direct loads into other | ||||||
1240 | // operations. Based on this, decide what we want to do. | ||||||
1241 | MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT; | ||||||
1242 | SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); | ||||||
1243 | int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex(); | ||||||
1244 | MachinePointerInfo MPI = | ||||||
1245 | MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI); | ||||||
1246 | SDLoc dl(N); | ||||||
1247 | |||||||
1248 | // FIXME: optimize the case where the src/dest is a load or store? | ||||||
1249 | |||||||
1250 | SDValue Store = CurDAG->getTruncStore( | ||||||
1251 | CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT); | ||||||
1252 | SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, | ||||||
1253 | MemTmp, MPI, MemVT); | ||||||
1254 | |||||||
1255 | // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the | ||||||
1256 | // extload we created. This will cause general havok on the dag because | ||||||
1257 | // anything below the conversion could be folded into other existing nodes. | ||||||
1258 | // To avoid invalidating 'I', back it up to the convert node. | ||||||
1259 | --I; | ||||||
1260 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); | ||||||
1261 | break; | ||||||
1262 | } | ||||||
1263 | |||||||
1264 | //The sequence of events for lowering STRICT_FP versions of these nodes requires | ||||||
1265 | //dealing with the chain differently, as there is already a preexisting chain. | ||||||
1266 | case ISD::STRICT_FP_ROUND: | ||||||
1267 | case ISD::STRICT_FP_EXTEND: | ||||||
1268 | { | ||||||
1269 | MVT SrcVT = N->getOperand(1).getSimpleValueType(); | ||||||
1270 | MVT DstVT = N->getSimpleValueType(0); | ||||||
1271 | |||||||
1272 | // If any of the sources are vectors, no fp stack involved. | ||||||
1273 | if (SrcVT.isVector() || DstVT.isVector()) | ||||||
1274 | continue; | ||||||
1275 | |||||||
1276 | // If the source and destination are SSE registers, then this is a legal | ||||||
1277 | // conversion that should not be lowered. | ||||||
1278 | const X86TargetLowering *X86Lowering = | ||||||
1279 | static_cast<const X86TargetLowering *>(TLI); | ||||||
1280 | bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); | ||||||
1281 | bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); | ||||||
1282 | if (SrcIsSSE && DstIsSSE) | ||||||
1283 | continue; | ||||||
1284 | |||||||
1285 | if (!SrcIsSSE && !DstIsSSE) { | ||||||
1286 | // If this is an FPStack extension, it is a noop. | ||||||
1287 | if (N->getOpcode() == ISD::STRICT_FP_EXTEND) | ||||||
1288 | continue; | ||||||
1289 | // If this is a value-preserving FPStack truncation, it is a noop. | ||||||
1290 | if (N->getConstantOperandVal(2)) | ||||||
1291 | continue; | ||||||
1292 | } | ||||||
1293 | |||||||
1294 | // Here we could have an FP stack truncation or an FPStack <-> SSE convert. | ||||||
1295 | // FPStack has extload and truncstore. SSE can fold direct loads into other | ||||||
1296 | // operations. Based on this, decide what we want to do. | ||||||
1297 | MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT; | ||||||
1298 | SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); | ||||||
1299 | int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex(); | ||||||
1300 | MachinePointerInfo MPI = | ||||||
1301 | MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI); | ||||||
1302 | SDLoc dl(N); | ||||||
1303 | |||||||
1304 | // FIXME: optimize the case where the src/dest is a load or store? | ||||||
1305 | |||||||
1306 | //Since the operation is StrictFP, use the preexisting chain. | ||||||
1307 | SDValue Store, Result; | ||||||
1308 | if (!SrcIsSSE) { | ||||||
1309 | SDVTList VTs = CurDAG->getVTList(MVT::Other); | ||||||
1310 | SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp}; | ||||||
1311 | Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT, | ||||||
1312 | MPI, /*Align*/ None, | ||||||
1313 | MachineMemOperand::MOStore); | ||||||
1314 | if (N->getFlags().hasNoFPExcept()) { | ||||||
1315 | SDNodeFlags Flags = Store->getFlags(); | ||||||
1316 | Flags.setNoFPExcept(true); | ||||||
1317 | Store->setFlags(Flags); | ||||||
1318 | } | ||||||
1319 | } else { | ||||||
1320 | assert(SrcVT == MemVT && "Unexpected VT!")(static_cast <bool> (SrcVT == MemVT && "Unexpected VT!" ) ? void (0) : __assert_fail ("SrcVT == MemVT && \"Unexpected VT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1320, __extension__ __PRETTY_FUNCTION__)); | ||||||
1321 | Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp, | ||||||
1322 | MPI); | ||||||
1323 | } | ||||||
1324 | |||||||
1325 | if (!DstIsSSE) { | ||||||
1326 | SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other); | ||||||
1327 | SDValue Ops[] = {Store, MemTmp}; | ||||||
1328 | Result = CurDAG->getMemIntrinsicNode( | ||||||
1329 | X86ISD::FLD, dl, VTs, Ops, MemVT, MPI, | ||||||
1330 | /*Align*/ None, MachineMemOperand::MOLoad); | ||||||
1331 | if (N->getFlags().hasNoFPExcept()) { | ||||||
1332 | SDNodeFlags Flags = Result->getFlags(); | ||||||
1333 | Flags.setNoFPExcept(true); | ||||||
1334 | Result->setFlags(Flags); | ||||||
1335 | } | ||||||
1336 | } else { | ||||||
1337 | assert(DstVT == MemVT && "Unexpected VT!")(static_cast <bool> (DstVT == MemVT && "Unexpected VT!" ) ? void (0) : __assert_fail ("DstVT == MemVT && \"Unexpected VT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1337, __extension__ __PRETTY_FUNCTION__)); | ||||||
1338 | Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI); | ||||||
1339 | } | ||||||
1340 | |||||||
1341 | // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the | ||||||
1342 | // extload we created. This will cause general havok on the dag because | ||||||
1343 | // anything below the conversion could be folded into other existing nodes. | ||||||
1344 | // To avoid invalidating 'I', back it up to the convert node. | ||||||
1345 | --I; | ||||||
1346 | CurDAG->ReplaceAllUsesWith(N, Result.getNode()); | ||||||
1347 | break; | ||||||
1348 | } | ||||||
1349 | } | ||||||
1350 | |||||||
1351 | |||||||
1352 | // Now that we did that, the node is dead. Increment the iterator to the | ||||||
1353 | // next node to process, then delete N. | ||||||
1354 | ++I; | ||||||
1355 | MadeChange = true; | ||||||
1356 | } | ||||||
1357 | |||||||
1358 | // Remove any dead nodes that may have been left behind. | ||||||
1359 | if (MadeChange) | ||||||
1360 | CurDAG->RemoveDeadNodes(); | ||||||
1361 | } | ||||||
1362 | |||||||
1363 | // Look for a redundant movzx/movsx that can occur after an 8-bit divrem. | ||||||
1364 | bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) { | ||||||
1365 | unsigned Opc = N->getMachineOpcode(); | ||||||
1366 | if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 && | ||||||
1367 | Opc != X86::MOVSX64rr8) | ||||||
1368 | return false; | ||||||
1369 | |||||||
1370 | SDValue N0 = N->getOperand(0); | ||||||
1371 | |||||||
1372 | // We need to be extracting the lower bit of an extend. | ||||||
1373 | if (!N0.isMachineOpcode() || | ||||||
1374 | N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG || | ||||||
1375 | N0.getConstantOperandVal(1) != X86::sub_8bit) | ||||||
1376 | return false; | ||||||
1377 | |||||||
1378 | // We're looking for either a movsx or movzx to match the original opcode. | ||||||
1379 | unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX | ||||||
1380 | : X86::MOVSX32rr8_NOREX; | ||||||
1381 | SDValue N00 = N0.getOperand(0); | ||||||
1382 | if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc) | ||||||
1383 | return false; | ||||||
1384 | |||||||
1385 | if (Opc == X86::MOVSX64rr8) { | ||||||
1386 | // If we had a sign extend from 8 to 64 bits. We still need to go from 32 | ||||||
1387 | // to 64. | ||||||
1388 | MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N), | ||||||
1389 | MVT::i64, N00); | ||||||
1390 | ReplaceUses(N, Extend); | ||||||
1391 | } else { | ||||||
1392 | // Ok we can drop this extend and just use the original extend. | ||||||
1393 | ReplaceUses(N, N00.getNode()); | ||||||
1394 | } | ||||||
1395 | |||||||
1396 | return true; | ||||||
1397 | } | ||||||
1398 | |||||||
1399 | void X86DAGToDAGISel::PostprocessISelDAG() { | ||||||
1400 | // Skip peepholes at -O0. | ||||||
1401 | if (TM.getOptLevel() == CodeGenOpt::None) | ||||||
1402 | return; | ||||||
1403 | |||||||
1404 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | ||||||
1405 | |||||||
1406 | bool MadeChange = false; | ||||||
1407 | while (Position != CurDAG->allnodes_begin()) { | ||||||
1408 | SDNode *N = &*--Position; | ||||||
1409 | // Skip dead nodes and any non-machine opcodes. | ||||||
1410 | if (N->use_empty() || !N->isMachineOpcode()) | ||||||
1411 | continue; | ||||||
1412 | |||||||
1413 | if (tryOptimizeRem8Extend(N)) { | ||||||
1414 | MadeChange = true; | ||||||
1415 | continue; | ||||||
1416 | } | ||||||
1417 | |||||||
1418 | // Look for a TESTrr+ANDrr pattern where both operands of the test are | ||||||
1419 | // the same. Rewrite to remove the AND. | ||||||
1420 | unsigned Opc = N->getMachineOpcode(); | ||||||
1421 | if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr || | ||||||
1422 | Opc == X86::TEST32rr || Opc == X86::TEST64rr) && | ||||||
1423 | N->getOperand(0) == N->getOperand(1) && | ||||||
1424 | N->isOnlyUserOf(N->getOperand(0).getNode()) && | ||||||
1425 | N->getOperand(0).isMachineOpcode()) { | ||||||
1426 | SDValue And = N->getOperand(0); | ||||||
1427 | unsigned N0Opc = And.getMachineOpcode(); | ||||||
1428 | if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr || | ||||||
1429 | N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) { | ||||||
1430 | MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N), | ||||||
1431 | MVT::i32, | ||||||
1432 | And.getOperand(0), | ||||||
1433 | And.getOperand(1)); | ||||||
1434 | ReplaceUses(N, Test); | ||||||
1435 | MadeChange = true; | ||||||
1436 | continue; | ||||||
1437 | } | ||||||
1438 | if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm || | ||||||
1439 | N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) { | ||||||
1440 | unsigned NewOpc; | ||||||
1441 | switch (N0Opc) { | ||||||
1442 | case X86::AND8rm: NewOpc = X86::TEST8mr; break; | ||||||
1443 | case X86::AND16rm: NewOpc = X86::TEST16mr; break; | ||||||
1444 | case X86::AND32rm: NewOpc = X86::TEST32mr; break; | ||||||
1445 | case X86::AND64rm: NewOpc = X86::TEST64mr; break; | ||||||
1446 | } | ||||||
1447 | |||||||
1448 | // Need to swap the memory and register operand. | ||||||
1449 | SDValue Ops[] = { And.getOperand(1), | ||||||
1450 | And.getOperand(2), | ||||||
1451 | And.getOperand(3), | ||||||
1452 | And.getOperand(4), | ||||||
1453 | And.getOperand(5), | ||||||
1454 | And.getOperand(0), | ||||||
1455 | And.getOperand(6) /* Chain */ }; | ||||||
1456 | MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N), | ||||||
1457 | MVT::i32, MVT::Other, Ops); | ||||||
1458 | CurDAG->setNodeMemRefs( | ||||||
1459 | Test, cast<MachineSDNode>(And.getNode())->memoperands()); | ||||||
1460 | ReplaceUses(N, Test); | ||||||
1461 | MadeChange = true; | ||||||
1462 | continue; | ||||||
1463 | } | ||||||
1464 | } | ||||||
1465 | |||||||
1466 | // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is | ||||||
1467 | // used. We're doing this late so we can prefer to fold the AND into masked | ||||||
1468 | // comparisons. Doing that can be better for the live range of the mask | ||||||
1469 | // register. | ||||||
1470 | if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr || | ||||||
1471 | Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) && | ||||||
1472 | N->getOperand(0) == N->getOperand(1) && | ||||||
1473 | N->isOnlyUserOf(N->getOperand(0).getNode()) && | ||||||
1474 | N->getOperand(0).isMachineOpcode() && | ||||||
1475 | onlyUsesZeroFlag(SDValue(N, 0))) { | ||||||
1476 | SDValue And = N->getOperand(0); | ||||||
1477 | unsigned N0Opc = And.getMachineOpcode(); | ||||||
1478 | // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other | ||||||
1479 | // KAND instructions and KTEST use the same ISA feature. | ||||||
1480 | if (N0Opc == X86::KANDBrr || | ||||||
1481 | (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) || | ||||||
1482 | N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) { | ||||||
1483 | unsigned NewOpc; | ||||||
1484 | switch (Opc) { | ||||||
1485 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1485); | ||||||
1486 | case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break; | ||||||
1487 | case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break; | ||||||
1488 | case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break; | ||||||
1489 | case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break; | ||||||
1490 | } | ||||||
1491 | MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N), | ||||||
1492 | MVT::i32, | ||||||
1493 | And.getOperand(0), | ||||||
1494 | And.getOperand(1)); | ||||||
1495 | ReplaceUses(N, KTest); | ||||||
1496 | MadeChange = true; | ||||||
1497 | continue; | ||||||
1498 | } | ||||||
1499 | } | ||||||
1500 | |||||||
1501 | // Attempt to remove vectors moves that were inserted to zero upper bits. | ||||||
1502 | if (Opc != TargetOpcode::SUBREG_TO_REG) | ||||||
1503 | continue; | ||||||
1504 | |||||||
1505 | unsigned SubRegIdx = N->getConstantOperandVal(2); | ||||||
1506 | if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm) | ||||||
1507 | continue; | ||||||
1508 | |||||||
1509 | SDValue Move = N->getOperand(1); | ||||||
1510 | if (!Move.isMachineOpcode()) | ||||||
1511 | continue; | ||||||
1512 | |||||||
1513 | // Make sure its one of the move opcodes we recognize. | ||||||
1514 | switch (Move.getMachineOpcode()) { | ||||||
1515 | default: | ||||||
1516 | continue; | ||||||
1517 | case X86::VMOVAPDrr: case X86::VMOVUPDrr: | ||||||
1518 | case X86::VMOVAPSrr: case X86::VMOVUPSrr: | ||||||
1519 | case X86::VMOVDQArr: case X86::VMOVDQUrr: | ||||||
1520 | case X86::VMOVAPDYrr: case X86::VMOVUPDYrr: | ||||||
1521 | case X86::VMOVAPSYrr: case X86::VMOVUPSYrr: | ||||||
1522 | case X86::VMOVDQAYrr: case X86::VMOVDQUYrr: | ||||||
1523 | case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr: | ||||||
1524 | case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr: | ||||||
1525 | case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr: | ||||||
1526 | case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr: | ||||||
1527 | case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr: | ||||||
1528 | case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr: | ||||||
1529 | case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr: | ||||||
1530 | case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr: | ||||||
1531 | break; | ||||||
1532 | } | ||||||
1533 | |||||||
1534 | SDValue In = Move.getOperand(0); | ||||||
1535 | if (!In.isMachineOpcode() || | ||||||
1536 | In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END) | ||||||
1537 | continue; | ||||||
1538 | |||||||
1539 | // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers | ||||||
1540 | // the SHA instructions which use a legacy encoding. | ||||||
1541 | uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags; | ||||||
1542 | if ((TSFlags & X86II::EncodingMask) != X86II::VEX && | ||||||
1543 | (TSFlags & X86II::EncodingMask) != X86II::EVEX && | ||||||
1544 | (TSFlags & X86II::EncodingMask) != X86II::XOP) | ||||||
1545 | continue; | ||||||
1546 | |||||||
1547 | // Producing instruction is another vector instruction. We can drop the | ||||||
1548 | // move. | ||||||
1549 | CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2)); | ||||||
1550 | MadeChange = true; | ||||||
1551 | } | ||||||
1552 | |||||||
1553 | if (MadeChange) | ||||||
1554 | CurDAG->RemoveDeadNodes(); | ||||||
1555 | } | ||||||
1556 | |||||||
1557 | |||||||
1558 | /// Emit any code that needs to be executed only in the main function. | ||||||
1559 | void X86DAGToDAGISel::emitSpecialCodeForMain() { | ||||||
1560 | if (Subtarget->isTargetCygMing()) { | ||||||
1561 | TargetLowering::ArgListTy Args; | ||||||
1562 | auto &DL = CurDAG->getDataLayout(); | ||||||
1563 | |||||||
1564 | TargetLowering::CallLoweringInfo CLI(*CurDAG); | ||||||
1565 | CLI.setChain(CurDAG->getRoot()) | ||||||
1566 | .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()), | ||||||
1567 | CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)), | ||||||
1568 | std::move(Args)); | ||||||
1569 | const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); | ||||||
1570 | std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); | ||||||
1571 | CurDAG->setRoot(Result.second); | ||||||
1572 | } | ||||||
1573 | } | ||||||
1574 | |||||||
1575 | void X86DAGToDAGISel::emitFunctionEntryCode() { | ||||||
1576 | // If this is main, emit special code for main. | ||||||
1577 | const Function &F = MF->getFunction(); | ||||||
1578 | if (F.hasExternalLinkage() && F.getName() == "main") | ||||||
1579 | emitSpecialCodeForMain(); | ||||||
1580 | } | ||||||
1581 | |||||||
1582 | static bool isDispSafeForFrameIndex(int64_t Val) { | ||||||
1583 | // On 64-bit platforms, we can run into an issue where a frame index | ||||||
1584 | // includes a displacement that, when added to the explicit displacement, | ||||||
1585 | // will overflow the displacement field. Assuming that the frame index | ||||||
1586 | // displacement fits into a 31-bit integer (which is only slightly more | ||||||
1587 | // aggressive than the current fundamental assumption that it fits into | ||||||
1588 | // a 32-bit integer), a 31-bit disp should always be safe. | ||||||
1589 | return isInt<31>(Val); | ||||||
1590 | } | ||||||
1591 | |||||||
1592 | bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset, | ||||||
1593 | X86ISelAddressMode &AM) { | ||||||
1594 | // We may have already matched a displacement and the caller just added the | ||||||
1595 | // symbolic displacement. So we still need to do the checks even if Offset | ||||||
1596 | // is zero. | ||||||
1597 | |||||||
1598 | int64_t Val = AM.Disp + Offset; | ||||||
1599 | |||||||
1600 | // Cannot combine ExternalSymbol displacements with integer offsets. | ||||||
1601 | if (Val != 0 && (AM.ES || AM.MCSym)) | ||||||
1602 | return true; | ||||||
1603 | |||||||
1604 | CodeModel::Model M = TM.getCodeModel(); | ||||||
1605 | if (Subtarget->is64Bit()) { | ||||||
1606 | if (Val != 0 && | ||||||
1607 | !X86::isOffsetSuitableForCodeModel(Val, M, | ||||||
1608 | AM.hasSymbolicDisplacement())) | ||||||
1609 | return true; | ||||||
1610 | // In addition to the checks required for a register base, check that | ||||||
1611 | // we do not try to use an unsafe Disp with a frame index. | ||||||
1612 | if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && | ||||||
1613 | !isDispSafeForFrameIndex(Val)) | ||||||
1614 | return true; | ||||||
1615 | } | ||||||
1616 | AM.Disp = Val; | ||||||
1617 | return false; | ||||||
1618 | |||||||
1619 | } | ||||||
1620 | |||||||
1621 | bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, | ||||||
1622 | bool AllowSegmentRegForX32) { | ||||||
1623 | SDValue Address = N->getOperand(1); | ||||||
1624 | |||||||
1625 | // load gs:0 -> GS segment register. | ||||||
1626 | // load fs:0 -> FS segment register. | ||||||
1627 | // | ||||||
1628 | // This optimization is generally valid because the GNU TLS model defines that | ||||||
1629 | // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode | ||||||
1630 | // with 32-bit registers, as we get in ILP32 mode, those registers are first | ||||||
1631 | // zero-extended to 64 bits and then added it to the base address, which gives | ||||||
1632 | // unwanted results when the register holds a negative value. | ||||||
1633 | // For more information see http://people.redhat.com/drepper/tls.pdf | ||||||
1634 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) { | ||||||
1635 | if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr && | ||||||
1636 | !IndirectTlsSegRefs && | ||||||
1637 | (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() || | ||||||
1638 | Subtarget->isTargetFuchsia())) { | ||||||
1639 | if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32) | ||||||
1640 | return true; | ||||||
1641 | switch (N->getPointerInfo().getAddrSpace()) { | ||||||
1642 | case X86AS::GS: | ||||||
1643 | AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); | ||||||
1644 | return false; | ||||||
1645 | case X86AS::FS: | ||||||
1646 | AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); | ||||||
1647 | return false; | ||||||
1648 | // Address space X86AS::SS is not handled here, because it is not used to | ||||||
1649 | // address TLS areas. | ||||||
1650 | } | ||||||
1651 | } | ||||||
1652 | } | ||||||
1653 | |||||||
1654 | return true; | ||||||
1655 | } | ||||||
1656 | |||||||
1657 | /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing | ||||||
1658 | /// mode. These wrap things that will resolve down into a symbol reference. | ||||||
1659 | /// If no match is possible, this returns true, otherwise it returns false. | ||||||
1660 | bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { | ||||||
1661 | // If the addressing mode already has a symbol as the displacement, we can | ||||||
1662 | // never match another symbol. | ||||||
1663 | if (AM.hasSymbolicDisplacement()) | ||||||
1664 | return true; | ||||||
1665 | |||||||
1666 | bool IsRIPRelTLS = false; | ||||||
1667 | bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP; | ||||||
1668 | if (IsRIPRel) { | ||||||
1669 | SDValue Val = N.getOperand(0); | ||||||
1670 | if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) | ||||||
1671 | IsRIPRelTLS = true; | ||||||
1672 | } | ||||||
1673 | |||||||
1674 | // We can't use an addressing mode in the 64-bit large code model. | ||||||
1675 | // Global TLS addressing is an exception. In the medium code model, | ||||||
1676 | // we use can use a mode when RIP wrappers are present. | ||||||
1677 | // That signifies access to globals that are known to be "near", | ||||||
1678 | // such as the GOT itself. | ||||||
1679 | CodeModel::Model M = TM.getCodeModel(); | ||||||
1680 | if (Subtarget->is64Bit() && | ||||||
1681 | ((M == CodeModel::Large && !IsRIPRelTLS) || | ||||||
1682 | (M == CodeModel::Medium && !IsRIPRel))) | ||||||
1683 | return true; | ||||||
1684 | |||||||
1685 | // Base and index reg must be 0 in order to use %rip as base. | ||||||
1686 | if (IsRIPRel && AM.hasBaseOrIndexReg()) | ||||||
1687 | return true; | ||||||
1688 | |||||||
1689 | // Make a local copy in case we can't do this fold. | ||||||
1690 | X86ISelAddressMode Backup = AM; | ||||||
1691 | |||||||
1692 | int64_t Offset = 0; | ||||||
1693 | SDValue N0 = N.getOperand(0); | ||||||
1694 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { | ||||||
1695 | AM.GV = G->getGlobal(); | ||||||
1696 | AM.SymbolFlags = G->getTargetFlags(); | ||||||
1697 | Offset = G->getOffset(); | ||||||
1698 | } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { | ||||||
1699 | AM.CP = CP->getConstVal(); | ||||||
1700 | AM.Alignment = CP->getAlign(); | ||||||
1701 | AM.SymbolFlags = CP->getTargetFlags(); | ||||||
1702 | Offset = CP->getOffset(); | ||||||
1703 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { | ||||||
1704 | AM.ES = S->getSymbol(); | ||||||
1705 | AM.SymbolFlags = S->getTargetFlags(); | ||||||
1706 | } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) { | ||||||
1707 | AM.MCSym = S->getMCSymbol(); | ||||||
1708 | } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { | ||||||
1709 | AM.JT = J->getIndex(); | ||||||
1710 | AM.SymbolFlags = J->getTargetFlags(); | ||||||
1711 | } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { | ||||||
1712 | AM.BlockAddr = BA->getBlockAddress(); | ||||||
1713 | AM.SymbolFlags = BA->getTargetFlags(); | ||||||
1714 | Offset = BA->getOffset(); | ||||||
1715 | } else | ||||||
1716 | llvm_unreachable("Unhandled symbol reference node.")::llvm::llvm_unreachable_internal("Unhandled symbol reference node." , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1716); | ||||||
1717 | |||||||
1718 | if (foldOffsetIntoAddress(Offset, AM)) { | ||||||
1719 | AM = Backup; | ||||||
1720 | return true; | ||||||
1721 | } | ||||||
1722 | |||||||
1723 | if (IsRIPRel) | ||||||
1724 | AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); | ||||||
1725 | |||||||
1726 | // Commit the changes now that we know this fold is safe. | ||||||
1727 | return false; | ||||||
1728 | } | ||||||
1729 | |||||||
1730 | /// Add the specified node to the specified addressing mode, returning true if | ||||||
1731 | /// it cannot be done. This just pattern matches for the addressing mode. | ||||||
1732 | bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { | ||||||
1733 | if (matchAddressRecursively(N, AM, 0)) | ||||||
1734 | return true; | ||||||
1735 | |||||||
1736 | // Post-processing: Make a second attempt to fold a load, if we now know | ||||||
1737 | // that there will not be any other register. This is only performed for | ||||||
1738 | // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded | ||||||
1739 | // any foldable load the first time. | ||||||
1740 | if (Subtarget->isTarget64BitILP32() && | ||||||
1741 | AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
1742 | AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) { | ||||||
1743 | SDValue Save_Base_Reg = AM.Base_Reg; | ||||||
1744 | if (auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) { | ||||||
1745 | AM.Base_Reg = SDValue(); | ||||||
1746 | if (matchLoadInAddress(LoadN, AM, /*AllowSegmentRegForX32=*/true)) | ||||||
1747 | AM.Base_Reg = Save_Base_Reg; | ||||||
1748 | } | ||||||
1749 | } | ||||||
1750 | |||||||
1751 | // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has | ||||||
1752 | // a smaller encoding and avoids a scaled-index. | ||||||
1753 | if (AM.Scale == 2 && | ||||||
1754 | AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
1755 | AM.Base_Reg.getNode() == nullptr) { | ||||||
1756 | AM.Base_Reg = AM.IndexReg; | ||||||
1757 | AM.Scale = 1; | ||||||
1758 | } | ||||||
1759 | |||||||
1760 | // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, | ||||||
1761 | // because it has a smaller encoding. | ||||||
1762 | // TODO: Which other code models can use this? | ||||||
1763 | switch (TM.getCodeModel()) { | ||||||
1764 | default: break; | ||||||
1765 | case CodeModel::Small: | ||||||
1766 | case CodeModel::Kernel: | ||||||
1767 | if (Subtarget->is64Bit() && | ||||||
1768 | AM.Scale == 1 && | ||||||
1769 | AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
1770 | AM.Base_Reg.getNode() == nullptr && | ||||||
1771 | AM.IndexReg.getNode() == nullptr && | ||||||
1772 | AM.SymbolFlags == X86II::MO_NO_FLAG && | ||||||
1773 | AM.hasSymbolicDisplacement()) | ||||||
1774 | AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); | ||||||
1775 | break; | ||||||
1776 | } | ||||||
1777 | |||||||
1778 | return false; | ||||||
1779 | } | ||||||
1780 | |||||||
1781 | bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM, | ||||||
1782 | unsigned Depth) { | ||||||
1783 | // Add an artificial use to this node so that we can keep track of | ||||||
1784 | // it if it gets CSE'd with a different node. | ||||||
1785 | HandleSDNode Handle(N); | ||||||
1786 | |||||||
1787 | X86ISelAddressMode Backup = AM; | ||||||
1788 | if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) && | ||||||
1789 | !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) | ||||||
1790 | return false; | ||||||
1791 | AM = Backup; | ||||||
1792 | |||||||
1793 | // Try again after commutating the operands. | ||||||
1794 | if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, | ||||||
1795 | Depth + 1) && | ||||||
1796 | !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1)) | ||||||
1797 | return false; | ||||||
1798 | AM = Backup; | ||||||
1799 | |||||||
1800 | // If we couldn't fold both operands into the address at the same time, | ||||||
1801 | // see if we can just put each operand into a register and fold at least | ||||||
1802 | // the add. | ||||||
1803 | if (AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
1804 | !AM.Base_Reg.getNode() && | ||||||
1805 | !AM.IndexReg.getNode()) { | ||||||
1806 | N = Handle.getValue(); | ||||||
1807 | AM.Base_Reg = N.getOperand(0); | ||||||
1808 | AM.IndexReg = N.getOperand(1); | ||||||
1809 | AM.Scale = 1; | ||||||
1810 | return false; | ||||||
1811 | } | ||||||
1812 | N = Handle.getValue(); | ||||||
1813 | return true; | ||||||
1814 | } | ||||||
1815 | |||||||
1816 | // Insert a node into the DAG at least before the Pos node's position. This | ||||||
1817 | // will reposition the node as needed, and will assign it a node ID that is <= | ||||||
1818 | // the Pos node's ID. Note that this does *not* preserve the uniqueness of node | ||||||
1819 | // IDs! The selection DAG must no longer depend on their uniqueness when this | ||||||
1820 | // is used. | ||||||
1821 | static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { | ||||||
1822 | if (N->getNodeId() == -1 || | ||||||
1823 | (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > | ||||||
1824 | SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) { | ||||||
1825 | DAG.RepositionNode(Pos->getIterator(), N.getNode()); | ||||||
1826 | // Mark Node as invalid for pruning as after this it may be a successor to a | ||||||
1827 | // selected node but otherwise be in the same position of Pos. | ||||||
1828 | // Conservatively mark it with the same -abs(Id) to assure node id | ||||||
1829 | // invariant is preserved. | ||||||
1830 | N->setNodeId(Pos->getNodeId()); | ||||||
1831 | SelectionDAGISel::InvalidateNodeId(N.getNode()); | ||||||
1832 | } | ||||||
1833 | } | ||||||
1834 | |||||||
1835 | // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if | ||||||
1836 | // safe. This allows us to convert the shift and and into an h-register | ||||||
1837 | // extract and a scaled index. Returns false if the simplification is | ||||||
1838 | // performed. | ||||||
1839 | static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, | ||||||
1840 | uint64_t Mask, | ||||||
1841 | SDValue Shift, SDValue X, | ||||||
1842 | X86ISelAddressMode &AM) { | ||||||
1843 | if (Shift.getOpcode() != ISD::SRL || | ||||||
1844 | !isa<ConstantSDNode>(Shift.getOperand(1)) || | ||||||
1845 | !Shift.hasOneUse()) | ||||||
1846 | return true; | ||||||
1847 | |||||||
1848 | int ScaleLog = 8 - Shift.getConstantOperandVal(1); | ||||||
1849 | if (ScaleLog <= 0 || ScaleLog >= 4 || | ||||||
1850 | Mask != (0xffu << ScaleLog)) | ||||||
1851 | return true; | ||||||
1852 | |||||||
1853 | MVT VT = N.getSimpleValueType(); | ||||||
1854 | SDLoc DL(N); | ||||||
1855 | SDValue Eight = DAG.getConstant(8, DL, MVT::i8); | ||||||
1856 | SDValue NewMask = DAG.getConstant(0xff, DL, VT); | ||||||
1857 | SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); | ||||||
1858 | SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask); | ||||||
1859 | SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8); | ||||||
1860 | SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount); | ||||||
1861 | |||||||
1862 | // Insert the new nodes into the topological ordering. We must do this in | ||||||
1863 | // a valid topological ordering as nothing is going to go back and re-sort | ||||||
1864 | // these nodes. We continually insert before 'N' in sequence as this is | ||||||
1865 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | ||||||
1866 | // hierarchy left to express. | ||||||
1867 | insertDAGNode(DAG, N, Eight); | ||||||
1868 | insertDAGNode(DAG, N, Srl); | ||||||
1869 | insertDAGNode(DAG, N, NewMask); | ||||||
1870 | insertDAGNode(DAG, N, And); | ||||||
1871 | insertDAGNode(DAG, N, ShlCount); | ||||||
1872 | insertDAGNode(DAG, N, Shl); | ||||||
1873 | DAG.ReplaceAllUsesWith(N, Shl); | ||||||
1874 | DAG.RemoveDeadNode(N.getNode()); | ||||||
1875 | AM.IndexReg = And; | ||||||
1876 | AM.Scale = (1 << ScaleLog); | ||||||
1877 | return false; | ||||||
1878 | } | ||||||
1879 | |||||||
1880 | // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this | ||||||
1881 | // allows us to fold the shift into this addressing mode. Returns false if the | ||||||
1882 | // transform succeeded. | ||||||
1883 | static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, | ||||||
1884 | X86ISelAddressMode &AM) { | ||||||
1885 | SDValue Shift = N.getOperand(0); | ||||||
1886 | |||||||
1887 | // Use a signed mask so that shifting right will insert sign bits. These | ||||||
1888 | // bits will be removed when we shift the result left so it doesn't matter | ||||||
1889 | // what we use. This might allow a smaller immediate encoding. | ||||||
1890 | int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue(); | ||||||
1891 | |||||||
1892 | // If we have an any_extend feeding the AND, look through it to see if there | ||||||
1893 | // is a shift behind it. But only if the AND doesn't use the extended bits. | ||||||
1894 | // FIXME: Generalize this to other ANY_EXTEND than i32 to i64? | ||||||
1895 | bool FoundAnyExtend = false; | ||||||
1896 | if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && | ||||||
1897 | Shift.getOperand(0).getSimpleValueType() == MVT::i32 && | ||||||
1898 | isUInt<32>(Mask)) { | ||||||
1899 | FoundAnyExtend = true; | ||||||
1900 | Shift = Shift.getOperand(0); | ||||||
1901 | } | ||||||
1902 | |||||||
1903 | if (Shift.getOpcode() != ISD::SHL || | ||||||
1904 | !isa<ConstantSDNode>(Shift.getOperand(1))) | ||||||
1905 | return true; | ||||||
1906 | |||||||
1907 | SDValue X = Shift.getOperand(0); | ||||||
1908 | |||||||
1909 | // Not likely to be profitable if either the AND or SHIFT node has more | ||||||
1910 | // than one use (unless all uses are for address computation). Besides, | ||||||
1911 | // isel mechanism requires their node ids to be reused. | ||||||
1912 | if (!N.hasOneUse() || !Shift.hasOneUse()) | ||||||
1913 | return true; | ||||||
1914 | |||||||
1915 | // Verify that the shift amount is something we can fold. | ||||||
1916 | unsigned ShiftAmt = Shift.getConstantOperandVal(1); | ||||||
1917 | if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) | ||||||
1918 | return true; | ||||||
1919 | |||||||
1920 | MVT VT = N.getSimpleValueType(); | ||||||
1921 | SDLoc DL(N); | ||||||
1922 | if (FoundAnyExtend) { | ||||||
1923 | SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); | ||||||
1924 | insertDAGNode(DAG, N, NewX); | ||||||
1925 | X = NewX; | ||||||
1926 | } | ||||||
1927 | |||||||
1928 | SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT); | ||||||
1929 | SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); | ||||||
1930 | SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); | ||||||
1931 | |||||||
1932 | // Insert the new nodes into the topological ordering. We must do this in | ||||||
1933 | // a valid topological ordering as nothing is going to go back and re-sort | ||||||
1934 | // these nodes. We continually insert before 'N' in sequence as this is | ||||||
1935 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | ||||||
1936 | // hierarchy left to express. | ||||||
1937 | insertDAGNode(DAG, N, NewMask); | ||||||
1938 | insertDAGNode(DAG, N, NewAnd); | ||||||
1939 | insertDAGNode(DAG, N, NewShift); | ||||||
1940 | DAG.ReplaceAllUsesWith(N, NewShift); | ||||||
1941 | DAG.RemoveDeadNode(N.getNode()); | ||||||
1942 | |||||||
1943 | AM.Scale = 1 << ShiftAmt; | ||||||
1944 | AM.IndexReg = NewAnd; | ||||||
1945 | return false; | ||||||
1946 | } | ||||||
1947 | |||||||
1948 | // Implement some heroics to detect shifts of masked values where the mask can | ||||||
1949 | // be replaced by extending the shift and undoing that in the addressing mode | ||||||
1950 | // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and | ||||||
1951 | // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in | ||||||
1952 | // the addressing mode. This results in code such as: | ||||||
1953 | // | ||||||
1954 | // int f(short *y, int *lookup_table) { | ||||||
1955 | // ... | ||||||
1956 | // return *y + lookup_table[*y >> 11]; | ||||||
1957 | // } | ||||||
1958 | // | ||||||
1959 | // Turning into: | ||||||
1960 | // movzwl (%rdi), %eax | ||||||
1961 | // movl %eax, %ecx | ||||||
1962 | // shrl $11, %ecx | ||||||
1963 | // addl (%rsi,%rcx,4), %eax | ||||||
1964 | // | ||||||
1965 | // Instead of: | ||||||
1966 | // movzwl (%rdi), %eax | ||||||
1967 | // movl %eax, %ecx | ||||||
1968 | // shrl $9, %ecx | ||||||
1969 | // andl $124, %rcx | ||||||
1970 | // addl (%rsi,%rcx), %eax | ||||||
1971 | // | ||||||
1972 | // Note that this function assumes the mask is provided as a mask *after* the | ||||||
1973 | // value is shifted. The input chain may or may not match that, but computing | ||||||
1974 | // such a mask is trivial. | ||||||
1975 | static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, | ||||||
1976 | uint64_t Mask, | ||||||
1977 | SDValue Shift, SDValue X, | ||||||
1978 | X86ISelAddressMode &AM) { | ||||||
1979 | if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || | ||||||
1980 | !isa<ConstantSDNode>(Shift.getOperand(1))) | ||||||
1981 | return true; | ||||||
1982 | |||||||
1983 | unsigned ShiftAmt = Shift.getConstantOperandVal(1); | ||||||
1984 | unsigned MaskLZ = countLeadingZeros(Mask); | ||||||
1985 | unsigned MaskTZ = countTrailingZeros(Mask); | ||||||
1986 | |||||||
1987 | // The amount of shift we're trying to fit into the addressing mode is taken | ||||||
1988 | // from the trailing zeros of the mask. | ||||||
1989 | unsigned AMShiftAmt = MaskTZ; | ||||||
1990 | |||||||
1991 | // There is nothing we can do here unless the mask is removing some bits. | ||||||
1992 | // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. | ||||||
1993 | if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; | ||||||
1994 | |||||||
1995 | // We also need to ensure that mask is a continuous run of bits. | ||||||
1996 | if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; | ||||||
1997 | |||||||
1998 | // Scale the leading zero count down based on the actual size of the value. | ||||||
1999 | // Also scale it down based on the size of the shift. | ||||||
2000 | unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; | ||||||
2001 | if (MaskLZ < ScaleDown) | ||||||
2002 | return true; | ||||||
2003 | MaskLZ -= ScaleDown; | ||||||
2004 | |||||||
2005 | // The final check is to ensure that any masked out high bits of X are | ||||||
2006 | // already known to be zero. Otherwise, the mask has a semantic impact | ||||||
2007 | // other than masking out a couple of low bits. Unfortunately, because of | ||||||
2008 | // the mask, zero extensions will be removed from operands in some cases. | ||||||
2009 | // This code works extra hard to look through extensions because we can | ||||||
2010 | // replace them with zero extensions cheaply if necessary. | ||||||
2011 | bool ReplacingAnyExtend = false; | ||||||
2012 | if (X.getOpcode() == ISD::ANY_EXTEND) { | ||||||
2013 | unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - | ||||||
2014 | X.getOperand(0).getSimpleValueType().getSizeInBits(); | ||||||
2015 | // Assume that we'll replace the any-extend with a zero-extend, and | ||||||
2016 | // narrow the search to the extended value. | ||||||
2017 | X = X.getOperand(0); | ||||||
2018 | MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; | ||||||
2019 | ReplacingAnyExtend = true; | ||||||
2020 | } | ||||||
2021 | APInt MaskedHighBits = | ||||||
2022 | APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); | ||||||
2023 | KnownBits Known = DAG.computeKnownBits(X); | ||||||
2024 | if (MaskedHighBits != Known.Zero) return true; | ||||||
2025 | |||||||
2026 | // We've identified a pattern that can be transformed into a single shift | ||||||
2027 | // and an addressing mode. Make it so. | ||||||
2028 | MVT VT = N.getSimpleValueType(); | ||||||
2029 | if (ReplacingAnyExtend) { | ||||||
2030 | assert(X.getValueType() != VT)(static_cast <bool> (X.getValueType() != VT) ? void (0) : __assert_fail ("X.getValueType() != VT", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2030, __extension__ __PRETTY_FUNCTION__)); | ||||||
2031 | // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. | ||||||
2032 | SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); | ||||||
2033 | insertDAGNode(DAG, N, NewX); | ||||||
2034 | X = NewX; | ||||||
2035 | } | ||||||
2036 | SDLoc DL(N); | ||||||
2037 | SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); | ||||||
2038 | SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); | ||||||
2039 | SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); | ||||||
2040 | SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); | ||||||
2041 | |||||||
2042 | // Insert the new nodes into the topological ordering. We must do this in | ||||||
2043 | // a valid topological ordering as nothing is going to go back and re-sort | ||||||
2044 | // these nodes. We continually insert before 'N' in sequence as this is | ||||||
2045 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | ||||||
2046 | // hierarchy left to express. | ||||||
2047 | insertDAGNode(DAG, N, NewSRLAmt); | ||||||
2048 | insertDAGNode(DAG, N, NewSRL); | ||||||
2049 | insertDAGNode(DAG, N, NewSHLAmt); | ||||||
2050 | insertDAGNode(DAG, N, NewSHL); | ||||||
2051 | DAG.ReplaceAllUsesWith(N, NewSHL); | ||||||
2052 | DAG.RemoveDeadNode(N.getNode()); | ||||||
2053 | |||||||
2054 | AM.Scale = 1 << AMShiftAmt; | ||||||
2055 | AM.IndexReg = NewSRL; | ||||||
2056 | return false; | ||||||
2057 | } | ||||||
2058 | |||||||
2059 | // Transform "(X >> SHIFT) & (MASK << C1)" to | ||||||
2060 | // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be | ||||||
2061 | // matched to a BEXTR later. Returns false if the simplification is performed. | ||||||
2062 | static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, | ||||||
2063 | uint64_t Mask, | ||||||
2064 | SDValue Shift, SDValue X, | ||||||
2065 | X86ISelAddressMode &AM, | ||||||
2066 | const X86Subtarget &Subtarget) { | ||||||
2067 | if (Shift.getOpcode() != ISD::SRL || | ||||||
2068 | !isa<ConstantSDNode>(Shift.getOperand(1)) || | ||||||
2069 | !Shift.hasOneUse() || !N.hasOneUse()) | ||||||
2070 | return true; | ||||||
2071 | |||||||
2072 | // Only do this if BEXTR will be matched by matchBEXTRFromAndImm. | ||||||
2073 | if (!Subtarget.hasTBM() && | ||||||
2074 | !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR())) | ||||||
2075 | return true; | ||||||
2076 | |||||||
2077 | // We need to ensure that mask is a continuous run of bits. | ||||||
2078 | if (!isShiftedMask_64(Mask)) return true; | ||||||
2079 | |||||||
2080 | unsigned ShiftAmt = Shift.getConstantOperandVal(1); | ||||||
2081 | |||||||
2082 | // The amount of shift we're trying to fit into the addressing mode is taken | ||||||
2083 | // from the trailing zeros of the mask. | ||||||
2084 | unsigned AMShiftAmt = countTrailingZeros(Mask); | ||||||
2085 | |||||||
2086 | // There is nothing we can do here unless the mask is removing some bits. | ||||||
2087 | // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. | ||||||
2088 | if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; | ||||||
2089 | |||||||
2090 | MVT VT = N.getSimpleValueType(); | ||||||
2091 | SDLoc DL(N); | ||||||
2092 | SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); | ||||||
2093 | SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); | ||||||
2094 | SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT); | ||||||
2095 | SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask); | ||||||
2096 | SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); | ||||||
2097 | SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt); | ||||||
2098 | |||||||
2099 | // Insert the new nodes into the topological ordering. We must do this in | ||||||
2100 | // a valid topological ordering as nothing is going to go back and re-sort | ||||||
2101 | // these nodes. We continually insert before 'N' in sequence as this is | ||||||
2102 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | ||||||
2103 | // hierarchy left to express. | ||||||
2104 | insertDAGNode(DAG, N, NewSRLAmt); | ||||||
2105 | insertDAGNode(DAG, N, NewSRL); | ||||||
2106 | insertDAGNode(DAG, N, NewMask); | ||||||
2107 | insertDAGNode(DAG, N, NewAnd); | ||||||
2108 | insertDAGNode(DAG, N, NewSHLAmt); | ||||||
2109 | insertDAGNode(DAG, N, NewSHL); | ||||||
2110 | DAG.ReplaceAllUsesWith(N, NewSHL); | ||||||
2111 | DAG.RemoveDeadNode(N.getNode()); | ||||||
2112 | |||||||
2113 | AM.Scale = 1 << AMShiftAmt; | ||||||
2114 | AM.IndexReg = NewAnd; | ||||||
2115 | return false; | ||||||
2116 | } | ||||||
2117 | |||||||
2118 | bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, | ||||||
2119 | unsigned Depth) { | ||||||
2120 | SDLoc dl(N); | ||||||
2121 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG ); }; } } while (false) | ||||||
2122 | dbgs() << "MatchAddress: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG ); }; } } while (false) | ||||||
2123 | AM.dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG ); }; } } while (false) | ||||||
2124 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG ); }; } } while (false); | ||||||
2125 | // Limit recursion. | ||||||
2126 | if (Depth > 5) | ||||||
2127 | return matchAddressBase(N, AM); | ||||||
2128 | |||||||
2129 | // If this is already a %rip relative address, we can only merge immediates | ||||||
2130 | // into it. Instead of handling this in every case, we handle it here. | ||||||
2131 | // RIP relative addressing: %rip + 32-bit displacement! | ||||||
2132 | if (AM.isRIPRelative()) { | ||||||
2133 | // FIXME: JumpTable and ExternalSymbol address currently don't like | ||||||
2134 | // displacements. It isn't very important, but this should be fixed for | ||||||
2135 | // consistency. | ||||||
2136 | if (!(AM.ES || AM.MCSym) && AM.JT != -1) | ||||||
2137 | return true; | ||||||
2138 | |||||||
2139 | if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) | ||||||
2140 | if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM)) | ||||||
2141 | return false; | ||||||
2142 | return true; | ||||||
2143 | } | ||||||
2144 | |||||||
2145 | switch (N.getOpcode()) { | ||||||
2146 | default: break; | ||||||
2147 | case ISD::LOCAL_RECOVER: { | ||||||
2148 | if (!AM.hasSymbolicDisplacement() && AM.Disp == 0) | ||||||
2149 | if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) { | ||||||
2150 | // Use the symbol and don't prefix it. | ||||||
2151 | AM.MCSym = ESNode->getMCSymbol(); | ||||||
2152 | return false; | ||||||
2153 | } | ||||||
2154 | break; | ||||||
2155 | } | ||||||
2156 | case ISD::Constant: { | ||||||
2157 | uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); | ||||||
2158 | if (!foldOffsetIntoAddress(Val, AM)) | ||||||
2159 | return false; | ||||||
2160 | break; | ||||||
2161 | } | ||||||
2162 | |||||||
2163 | case X86ISD::Wrapper: | ||||||
2164 | case X86ISD::WrapperRIP: | ||||||
2165 | if (!matchWrapper(N, AM)) | ||||||
2166 | return false; | ||||||
2167 | break; | ||||||
2168 | |||||||
2169 | case ISD::LOAD: | ||||||
2170 | if (!matchLoadInAddress(cast<LoadSDNode>(N), AM)) | ||||||
2171 | return false; | ||||||
2172 | break; | ||||||
2173 | |||||||
2174 | case ISD::FrameIndex: | ||||||
2175 | if (AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
2176 | AM.Base_Reg.getNode() == nullptr && | ||||||
2177 | (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { | ||||||
2178 | AM.BaseType = X86ISelAddressMode::FrameIndexBase; | ||||||
2179 | AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); | ||||||
2180 | return false; | ||||||
2181 | } | ||||||
2182 | break; | ||||||
2183 | |||||||
2184 | case ISD::SHL: | ||||||
2185 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) | ||||||
2186 | break; | ||||||
2187 | |||||||
2188 | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { | ||||||
2189 | unsigned Val = CN->getZExtValue(); | ||||||
2190 | // Note that we handle x<<1 as (,x,2) rather than (x,x) here so | ||||||
2191 | // that the base operand remains free for further matching. If | ||||||
2192 | // the base doesn't end up getting used, a post-processing step | ||||||
2193 | // in MatchAddress turns (,x,2) into (x,x), which is cheaper. | ||||||
2194 | if (Val == 1 || Val == 2 || Val == 3) { | ||||||
2195 | AM.Scale = 1 << Val; | ||||||
2196 | SDValue ShVal = N.getOperand(0); | ||||||
2197 | |||||||
2198 | // Okay, we know that we have a scale by now. However, if the scaled | ||||||
2199 | // value is an add of something and a constant, we can fold the | ||||||
2200 | // constant into the disp field here. | ||||||
2201 | if (CurDAG->isBaseWithConstantOffset(ShVal)) { | ||||||
2202 | AM.IndexReg = ShVal.getOperand(0); | ||||||
2203 | ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1)); | ||||||
2204 | uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; | ||||||
2205 | if (!foldOffsetIntoAddress(Disp, AM)) | ||||||
2206 | return false; | ||||||
2207 | } | ||||||
2208 | |||||||
2209 | AM.IndexReg = ShVal; | ||||||
2210 | return false; | ||||||
2211 | } | ||||||
2212 | } | ||||||
2213 | break; | ||||||
2214 | |||||||
2215 | case ISD::SRL: { | ||||||
2216 | // Scale must not be used already. | ||||||
2217 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; | ||||||
2218 | |||||||
2219 | // We only handle up to 64-bit values here as those are what matter for | ||||||
2220 | // addressing mode optimizations. | ||||||
2221 | assert(N.getSimpleValueType().getSizeInBits() <= 64 &&(static_cast <bool> (N.getSimpleValueType().getSizeInBits () <= 64 && "Unexpected value size!") ? void (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2222, __extension__ __PRETTY_FUNCTION__)) | ||||||
2222 | "Unexpected value size!")(static_cast <bool> (N.getSimpleValueType().getSizeInBits () <= 64 && "Unexpected value size!") ? void (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2222, __extension__ __PRETTY_FUNCTION__)); | ||||||
2223 | |||||||
2224 | SDValue And = N.getOperand(0); | ||||||
2225 | if (And.getOpcode() != ISD::AND) break; | ||||||
2226 | SDValue X = And.getOperand(0); | ||||||
2227 | |||||||
2228 | // The mask used for the transform is expected to be post-shift, but we | ||||||
2229 | // found the shift first so just apply the shift to the mask before passing | ||||||
2230 | // it down. | ||||||
2231 | if (!isa<ConstantSDNode>(N.getOperand(1)) || | ||||||
2232 | !isa<ConstantSDNode>(And.getOperand(1))) | ||||||
2233 | break; | ||||||
2234 | uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1); | ||||||
2235 | |||||||
2236 | // Try to fold the mask and shift into the scale, and return false if we | ||||||
2237 | // succeed. | ||||||
2238 | if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) | ||||||
2239 | return false; | ||||||
2240 | break; | ||||||
2241 | } | ||||||
2242 | |||||||
2243 | case ISD::SMUL_LOHI: | ||||||
2244 | case ISD::UMUL_LOHI: | ||||||
2245 | // A mul_lohi where we need the low part can be folded as a plain multiply. | ||||||
2246 | if (N.getResNo() != 0) break; | ||||||
2247 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
2248 | case ISD::MUL: | ||||||
2249 | case X86ISD::MUL_IMM: | ||||||
2250 | // X*[3,5,9] -> X+X*[2,4,8] | ||||||
2251 | if (AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
2252 | AM.Base_Reg.getNode() == nullptr && | ||||||
2253 | AM.IndexReg.getNode() == nullptr) { | ||||||
2254 | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) | ||||||
2255 | if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || | ||||||
2256 | CN->getZExtValue() == 9) { | ||||||
2257 | AM.Scale = unsigned(CN->getZExtValue())-1; | ||||||
2258 | |||||||
2259 | SDValue MulVal = N.getOperand(0); | ||||||
2260 | SDValue Reg; | ||||||
2261 | |||||||
2262 | // Okay, we know that we have a scale by now. However, if the scaled | ||||||
2263 | // value is an add of something and a constant, we can fold the | ||||||
2264 | // constant into the disp field here. | ||||||
2265 | if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && | ||||||
2266 | isa<ConstantSDNode>(MulVal.getOperand(1))) { | ||||||
2267 | Reg = MulVal.getOperand(0); | ||||||
2268 | ConstantSDNode *AddVal = | ||||||
2269 | cast<ConstantSDNode>(MulVal.getOperand(1)); | ||||||
2270 | uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); | ||||||
2271 | if (foldOffsetIntoAddress(Disp, AM)) | ||||||
2272 | Reg = N.getOperand(0); | ||||||
2273 | } else { | ||||||
2274 | Reg = N.getOperand(0); | ||||||
2275 | } | ||||||
2276 | |||||||
2277 | AM.IndexReg = AM.Base_Reg = Reg; | ||||||
2278 | return false; | ||||||
2279 | } | ||||||
2280 | } | ||||||
2281 | break; | ||||||
2282 | |||||||
2283 | case ISD::SUB: { | ||||||
2284 | // Given A-B, if A can be completely folded into the address and | ||||||
2285 | // the index field with the index field unused, use -B as the index. | ||||||
2286 | // This is a win if a has multiple parts that can be folded into | ||||||
2287 | // the address. Also, this saves a mov if the base register has | ||||||
2288 | // other uses, since it avoids a two-address sub instruction, however | ||||||
2289 | // it costs an additional mov if the index register has other uses. | ||||||
2290 | |||||||
2291 | // Add an artificial use to this node so that we can keep track of | ||||||
2292 | // it if it gets CSE'd with a different node. | ||||||
2293 | HandleSDNode Handle(N); | ||||||
2294 | |||||||
2295 | // Test if the LHS of the sub can be folded. | ||||||
2296 | X86ISelAddressMode Backup = AM; | ||||||
2297 | if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) { | ||||||
2298 | N = Handle.getValue(); | ||||||
2299 | AM = Backup; | ||||||
2300 | break; | ||||||
2301 | } | ||||||
2302 | N = Handle.getValue(); | ||||||
2303 | // Test if the index field is free for use. | ||||||
2304 | if (AM.IndexReg.getNode() || AM.isRIPRelative()) { | ||||||
2305 | AM = Backup; | ||||||
2306 | break; | ||||||
2307 | } | ||||||
2308 | |||||||
2309 | int Cost = 0; | ||||||
2310 | SDValue RHS = N.getOperand(1); | ||||||
2311 | // If the RHS involves a register with multiple uses, this | ||||||
2312 | // transformation incurs an extra mov, due to the neg instruction | ||||||
2313 | // clobbering its operand. | ||||||
2314 | if (!RHS.getNode()->hasOneUse() || | ||||||
2315 | RHS.getNode()->getOpcode() == ISD::CopyFromReg || | ||||||
2316 | RHS.getNode()->getOpcode() == ISD::TRUNCATE || | ||||||
2317 | RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || | ||||||
2318 | (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && | ||||||
2319 | RHS.getOperand(0).getValueType() == MVT::i32)) | ||||||
2320 | ++Cost; | ||||||
2321 | // If the base is a register with multiple uses, this | ||||||
2322 | // transformation may save a mov. | ||||||
2323 | if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() && | ||||||
2324 | !AM.Base_Reg.getNode()->hasOneUse()) || | ||||||
2325 | AM.BaseType == X86ISelAddressMode::FrameIndexBase) | ||||||
2326 | --Cost; | ||||||
2327 | // If the folded LHS was interesting, this transformation saves | ||||||
2328 | // address arithmetic. | ||||||
2329 | if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + | ||||||
2330 | ((AM.Disp != 0) && (Backup.Disp == 0)) + | ||||||
2331 | (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) | ||||||
2332 | --Cost; | ||||||
2333 | // If it doesn't look like it may be an overall win, don't do it. | ||||||
2334 | if (Cost >= 0) { | ||||||
2335 | AM = Backup; | ||||||
2336 | break; | ||||||
2337 | } | ||||||
2338 | |||||||
2339 | // Ok, the transformation is legal and appears profitable. Go for it. | ||||||
2340 | // Negation will be emitted later to avoid creating dangling nodes if this | ||||||
2341 | // was an unprofitable LEA. | ||||||
2342 | AM.IndexReg = RHS; | ||||||
2343 | AM.NegateIndex = true; | ||||||
2344 | AM.Scale = 1; | ||||||
2345 | return false; | ||||||
2346 | } | ||||||
2347 | |||||||
2348 | case ISD::ADD: | ||||||
2349 | if (!matchAdd(N, AM, Depth)) | ||||||
2350 | return false; | ||||||
2351 | break; | ||||||
2352 | |||||||
2353 | case ISD::OR: | ||||||
2354 | // We want to look through a transform in InstCombine and DAGCombiner that | ||||||
2355 | // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'. | ||||||
2356 | // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3)) | ||||||
2357 | // An 'lea' can then be used to match the shift (multiply) and add: | ||||||
2358 | // and $1, %esi | ||||||
2359 | // lea (%rsi, %rdi, 8), %rax | ||||||
2360 | if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) && | ||||||
2361 | !matchAdd(N, AM, Depth)) | ||||||
2362 | return false; | ||||||
2363 | break; | ||||||
2364 | |||||||
2365 | case ISD::AND: { | ||||||
2366 | // Perform some heroic transforms on an and of a constant-count shift | ||||||
2367 | // with a constant to enable use of the scaled offset field. | ||||||
2368 | |||||||
2369 | // Scale must not be used already. | ||||||
2370 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; | ||||||
2371 | |||||||
2372 | // We only handle up to 64-bit values here as those are what matter for | ||||||
2373 | // addressing mode optimizations. | ||||||
2374 | assert(N.getSimpleValueType().getSizeInBits() <= 64 &&(static_cast <bool> (N.getSimpleValueType().getSizeInBits () <= 64 && "Unexpected value size!") ? void (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2375, __extension__ __PRETTY_FUNCTION__)) | ||||||
2375 | "Unexpected value size!")(static_cast <bool> (N.getSimpleValueType().getSizeInBits () <= 64 && "Unexpected value size!") ? void (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2375, __extension__ __PRETTY_FUNCTION__)); | ||||||
2376 | |||||||
2377 | if (!isa<ConstantSDNode>(N.getOperand(1))) | ||||||
2378 | break; | ||||||
2379 | |||||||
2380 | if (N.getOperand(0).getOpcode() == ISD::SRL) { | ||||||
2381 | SDValue Shift = N.getOperand(0); | ||||||
2382 | SDValue X = Shift.getOperand(0); | ||||||
2383 | |||||||
2384 | uint64_t Mask = N.getConstantOperandVal(1); | ||||||
2385 | |||||||
2386 | // Try to fold the mask and shift into an extract and scale. | ||||||
2387 | if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) | ||||||
2388 | return false; | ||||||
2389 | |||||||
2390 | // Try to fold the mask and shift directly into the scale. | ||||||
2391 | if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) | ||||||
2392 | return false; | ||||||
2393 | |||||||
2394 | // Try to fold the mask and shift into BEXTR and scale. | ||||||
2395 | if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget)) | ||||||
2396 | return false; | ||||||
2397 | } | ||||||
2398 | |||||||
2399 | // Try to swap the mask and shift to place shifts which can be done as | ||||||
2400 | // a scale on the outside of the mask. | ||||||
2401 | if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM)) | ||||||
2402 | return false; | ||||||
2403 | |||||||
2404 | break; | ||||||
2405 | } | ||||||
2406 | case ISD::ZERO_EXTEND: { | ||||||
2407 | // Try to widen a zexted shift left to the same size as its use, so we can | ||||||
2408 | // match the shift as a scale factor. | ||||||
2409 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) | ||||||
2410 | break; | ||||||
2411 | if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse()) | ||||||
2412 | break; | ||||||
2413 | |||||||
2414 | // Give up if the shift is not a valid scale factor [1,2,3]. | ||||||
2415 | SDValue Shl = N.getOperand(0); | ||||||
2416 | auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1)); | ||||||
2417 | if (!ShAmtC || ShAmtC->getZExtValue() > 3) | ||||||
2418 | break; | ||||||
2419 | |||||||
2420 | // The narrow shift must only shift out zero bits (it must be 'nuw'). | ||||||
2421 | // That makes it safe to widen to the destination type. | ||||||
2422 | APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(), | ||||||
2423 | ShAmtC->getZExtValue()); | ||||||
2424 | if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros)) | ||||||
2425 | break; | ||||||
2426 | |||||||
2427 | // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C) | ||||||
2428 | MVT VT = N.getSimpleValueType(); | ||||||
2429 | SDLoc DL(N); | ||||||
2430 | SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0)); | ||||||
2431 | SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1)); | ||||||
2432 | |||||||
2433 | // Convert the shift to scale factor. | ||||||
2434 | AM.Scale = 1 << ShAmtC->getZExtValue(); | ||||||
2435 | AM.IndexReg = Zext; | ||||||
2436 | |||||||
2437 | insertDAGNode(*CurDAG, N, Zext); | ||||||
2438 | insertDAGNode(*CurDAG, N, NewShl); | ||||||
2439 | CurDAG->ReplaceAllUsesWith(N, NewShl); | ||||||
2440 | CurDAG->RemoveDeadNode(N.getNode()); | ||||||
2441 | return false; | ||||||
2442 | } | ||||||
2443 | } | ||||||
2444 | |||||||
2445 | return matchAddressBase(N, AM); | ||||||
2446 | } | ||||||
2447 | |||||||
2448 | /// Helper for MatchAddress. Add the specified node to the | ||||||
2449 | /// specified addressing mode without any further recursion. | ||||||
2450 | bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { | ||||||
2451 | // Is the base register already occupied? | ||||||
2452 | if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { | ||||||
2453 | // If so, check to see if the scale index register is set. | ||||||
2454 | if (!AM.IndexReg.getNode()) { | ||||||
2455 | AM.IndexReg = N; | ||||||
2456 | AM.Scale = 1; | ||||||
2457 | return false; | ||||||
2458 | } | ||||||
2459 | |||||||
2460 | // Otherwise, we cannot select it. | ||||||
2461 | return true; | ||||||
2462 | } | ||||||
2463 | |||||||
2464 | // Default, generate it as a register. | ||||||
2465 | AM.BaseType = X86ISelAddressMode::RegBase; | ||||||
2466 | AM.Base_Reg = N; | ||||||
2467 | return false; | ||||||
2468 | } | ||||||
2469 | |||||||
2470 | /// Helper for selectVectorAddr. Handles things that can be folded into a | ||||||
2471 | /// gather scatter address. The index register and scale should have already | ||||||
2472 | /// been handled. | ||||||
2473 | bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) { | ||||||
2474 | // TODO: Support other operations. | ||||||
2475 | switch (N.getOpcode()) { | ||||||
2476 | case ISD::Constant: { | ||||||
2477 | uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); | ||||||
2478 | if (!foldOffsetIntoAddress(Val, AM)) | ||||||
2479 | return false; | ||||||
2480 | break; | ||||||
2481 | } | ||||||
2482 | case X86ISD::Wrapper: | ||||||
2483 | if (!matchWrapper(N, AM)) | ||||||
2484 | return false; | ||||||
2485 | break; | ||||||
2486 | } | ||||||
2487 | |||||||
2488 | return matchAddressBase(N, AM); | ||||||
2489 | } | ||||||
2490 | |||||||
2491 | bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, | ||||||
2492 | SDValue IndexOp, SDValue ScaleOp, | ||||||
2493 | SDValue &Base, SDValue &Scale, | ||||||
2494 | SDValue &Index, SDValue &Disp, | ||||||
2495 | SDValue &Segment) { | ||||||
2496 | X86ISelAddressMode AM; | ||||||
2497 | AM.IndexReg = IndexOp; | ||||||
2498 | AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue(); | ||||||
2499 | |||||||
2500 | unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace(); | ||||||
2501 | if (AddrSpace == X86AS::GS) | ||||||
2502 | AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); | ||||||
2503 | if (AddrSpace == X86AS::FS) | ||||||
2504 | AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); | ||||||
2505 | if (AddrSpace == X86AS::SS) | ||||||
2506 | AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); | ||||||
2507 | |||||||
2508 | SDLoc DL(BasePtr); | ||||||
2509 | MVT VT = BasePtr.getSimpleValueType(); | ||||||
2510 | |||||||
2511 | // Try to match into the base and displacement fields. | ||||||
2512 | if (matchVectorAddress(BasePtr, AM)) | ||||||
2513 | return false; | ||||||
2514 | |||||||
2515 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); | ||||||
2516 | return true; | ||||||
2517 | } | ||||||
2518 | |||||||
2519 | /// Returns true if it is able to pattern match an addressing mode. | ||||||
2520 | /// It returns the operands which make up the maximal addressing mode it can | ||||||
2521 | /// match by reference. | ||||||
2522 | /// | ||||||
2523 | /// Parent is the parent node of the addr operand that is being matched. It | ||||||
2524 | /// is always a load, store, atomic node, or null. It is only null when | ||||||
2525 | /// checking memory operands for inline asm nodes. | ||||||
2526 | bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, | ||||||
2527 | SDValue &Scale, SDValue &Index, | ||||||
2528 | SDValue &Disp, SDValue &Segment) { | ||||||
2529 | X86ISelAddressMode AM; | ||||||
2530 | |||||||
2531 | if (Parent && | ||||||
2532 | // This list of opcodes are all the nodes that have an "addr:$ptr" operand | ||||||
2533 | // that are not a MemSDNode, and thus don't have proper addrspace info. | ||||||
2534 | Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme | ||||||
2535 | Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores | ||||||
2536 | Parent->getOpcode() != X86ISD::TLSCALL && // Fixme | ||||||
2537 | Parent->getOpcode() != X86ISD::ENQCMD && // Fixme | ||||||
2538 | Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme | ||||||
2539 | Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp | ||||||
2540 | Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp | ||||||
2541 | unsigned AddrSpace = | ||||||
2542 | cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); | ||||||
2543 | if (AddrSpace == X86AS::GS) | ||||||
2544 | AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); | ||||||
2545 | if (AddrSpace == X86AS::FS) | ||||||
2546 | AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); | ||||||
2547 | if (AddrSpace == X86AS::SS) | ||||||
2548 | AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); | ||||||
2549 | } | ||||||
2550 | |||||||
2551 | // Save the DL and VT before calling matchAddress, it can invalidate N. | ||||||
2552 | SDLoc DL(N); | ||||||
2553 | MVT VT = N.getSimpleValueType(); | ||||||
2554 | |||||||
2555 | if (matchAddress(N, AM)) | ||||||
2556 | return false; | ||||||
2557 | |||||||
2558 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); | ||||||
2559 | return true; | ||||||
2560 | } | ||||||
2561 | |||||||
2562 | bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { | ||||||
2563 | // In static codegen with small code model, we can get the address of a label | ||||||
2564 | // into a register with 'movl' | ||||||
2565 | if (N->getOpcode() != X86ISD::Wrapper) | ||||||
2566 | return false; | ||||||
2567 | |||||||
2568 | N = N.getOperand(0); | ||||||
2569 | |||||||
2570 | // At least GNU as does not accept 'movl' for TPOFF relocations. | ||||||
2571 | // FIXME: We could use 'movl' when we know we are targeting MC. | ||||||
2572 | if (N->getOpcode() == ISD::TargetGlobalTLSAddress) | ||||||
2573 | return false; | ||||||
2574 | |||||||
2575 | Imm = N; | ||||||
2576 | if (N->getOpcode() != ISD::TargetGlobalAddress) | ||||||
2577 | return TM.getCodeModel() == CodeModel::Small; | ||||||
2578 | |||||||
2579 | Optional<ConstantRange> CR = | ||||||
2580 | cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange(); | ||||||
2581 | if (!CR) | ||||||
2582 | return TM.getCodeModel() == CodeModel::Small; | ||||||
2583 | |||||||
2584 | return CR->getUnsignedMax().ult(1ull << 32); | ||||||
2585 | } | ||||||
2586 | |||||||
2587 | bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base, | ||||||
2588 | SDValue &Scale, SDValue &Index, | ||||||
2589 | SDValue &Disp, SDValue &Segment) { | ||||||
2590 | // Save the debug loc before calling selectLEAAddr, in case it invalidates N. | ||||||
2591 | SDLoc DL(N); | ||||||
2592 | |||||||
2593 | if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment)) | ||||||
| |||||||
2594 | return false; | ||||||
2595 | |||||||
2596 | RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base); | ||||||
2597 | if (RN && RN->getReg() == 0) | ||||||
2598 | Base = CurDAG->getRegister(0, MVT::i64); | ||||||
2599 | else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) { | ||||||
2600 | // Base could already be %rip, particularly in the x32 ABI. | ||||||
2601 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL, | ||||||
2602 | MVT::i64), 0); | ||||||
2603 | Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef, | ||||||
2604 | Base); | ||||||
2605 | } | ||||||
2606 | |||||||
2607 | RN = dyn_cast<RegisterSDNode>(Index); | ||||||
2608 | if (RN && RN->getReg() == 0) | ||||||
2609 | Index = CurDAG->getRegister(0, MVT::i64); | ||||||
2610 | else { | ||||||
2611 | assert(Index.getValueType() == MVT::i32 &&(static_cast <bool> (Index.getValueType() == MVT::i32 && "Expect to be extending 32-bit registers for use in LEA") ? void (0) : __assert_fail ("Index.getValueType() == MVT::i32 && \"Expect to be extending 32-bit registers for use in LEA\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2612, __extension__ __PRETTY_FUNCTION__)) | ||||||
2612 | "Expect to be extending 32-bit registers for use in LEA")(static_cast <bool> (Index.getValueType() == MVT::i32 && "Expect to be extending 32-bit registers for use in LEA") ? void (0) : __assert_fail ("Index.getValueType() == MVT::i32 && \"Expect to be extending 32-bit registers for use in LEA\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2612, __extension__ __PRETTY_FUNCTION__)); | ||||||
2613 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL, | ||||||
2614 | MVT::i64), 0); | ||||||
2615 | Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef, | ||||||
2616 | Index); | ||||||
2617 | } | ||||||
2618 | |||||||
2619 | return true; | ||||||
2620 | } | ||||||
2621 | |||||||
2622 | /// Calls SelectAddr and determines if the maximal addressing | ||||||
2623 | /// mode it matches can be cost effectively emitted as an LEA instruction. | ||||||
2624 | bool X86DAGToDAGISel::selectLEAAddr(SDValue N, | ||||||
2625 | SDValue &Base, SDValue &Scale, | ||||||
2626 | SDValue &Index, SDValue &Disp, | ||||||
2627 | SDValue &Segment) { | ||||||
2628 | X86ISelAddressMode AM; | ||||||
2629 | |||||||
2630 | // Save the DL and VT before calling matchAddress, it can invalidate N. | ||||||
2631 | SDLoc DL(N); | ||||||
2632 | MVT VT = N.getSimpleValueType(); | ||||||
2633 | |||||||
2634 | // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support | ||||||
2635 | // segments. | ||||||
2636 | SDValue Copy = AM.Segment; | ||||||
2637 | SDValue T = CurDAG->getRegister(0, MVT::i32); | ||||||
2638 | AM.Segment = T; | ||||||
2639 | if (matchAddress(N, AM)) | ||||||
2640 | return false; | ||||||
2641 | assert (T == AM.Segment)(static_cast <bool> (T == AM.Segment) ? void (0) : __assert_fail ("T == AM.Segment", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2641, __extension__ __PRETTY_FUNCTION__)); | ||||||
2642 | AM.Segment = Copy; | ||||||
2643 | |||||||
2644 | unsigned Complexity = 0; | ||||||
2645 | if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode()) | ||||||
2646 | Complexity = 1; | ||||||
2647 | else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) | ||||||
2648 | Complexity = 4; | ||||||
2649 | |||||||
2650 | if (AM.IndexReg.getNode()) | ||||||
2651 | Complexity++; | ||||||
2652 | |||||||
2653 | // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with | ||||||
2654 | // a simple shift. | ||||||
2655 | if (AM.Scale > 1) | ||||||
2656 | Complexity++; | ||||||
2657 | |||||||
2658 | // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA | ||||||
2659 | // to a LEA. This is determined with some experimentation but is by no means | ||||||
2660 | // optimal (especially for code size consideration). LEA is nice because of | ||||||
2661 | // its three-address nature. Tweak the cost function again when we can run | ||||||
2662 | // convertToThreeAddress() at register allocation time. | ||||||
2663 | if (AM.hasSymbolicDisplacement()) { | ||||||
2664 | // For X86-64, always use LEA to materialize RIP-relative addresses. | ||||||
2665 | if (Subtarget->is64Bit()) | ||||||
2666 | Complexity = 4; | ||||||
2667 | else | ||||||
2668 | Complexity += 2; | ||||||
2669 | } | ||||||
2670 | |||||||
2671 | // Heuristic: try harder to form an LEA from ADD if the operands set flags. | ||||||
2672 | // Unlike ADD, LEA does not affect flags, so we will be less likely to require | ||||||
2673 | // duplicating flag-producing instructions later in the pipeline. | ||||||
2674 | if (N.getOpcode() == ISD::ADD) { | ||||||
2675 | auto isMathWithFlags = [](SDValue V) { | ||||||
2676 | switch (V.getOpcode()) { | ||||||
2677 | case X86ISD::ADD: | ||||||
2678 | case X86ISD::SUB: | ||||||
2679 | case X86ISD::ADC: | ||||||
2680 | case X86ISD::SBB: | ||||||
2681 | /* TODO: These opcodes can be added safely, but we may want to justify | ||||||
2682 | their inclusion for different reasons (better for reg-alloc). | ||||||
2683 | case X86ISD::SMUL: | ||||||
2684 | case X86ISD::UMUL: | ||||||
2685 | case X86ISD::OR: | ||||||
2686 | case X86ISD::XOR: | ||||||
2687 | case X86ISD::AND: | ||||||
2688 | */ | ||||||
2689 | // Value 1 is the flag output of the node - verify it's not dead. | ||||||
2690 | return !SDValue(V.getNode(), 1).use_empty(); | ||||||
2691 | default: | ||||||
2692 | return false; | ||||||
2693 | } | ||||||
2694 | }; | ||||||
2695 | // TODO: This could be an 'or' rather than 'and' to make the transform more | ||||||
2696 | // likely to happen. We might want to factor in whether there's a | ||||||
2697 | // load folding opportunity for the math op that disappears with LEA. | ||||||
2698 | if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) | ||||||
2699 | Complexity++; | ||||||
2700 | } | ||||||
2701 | |||||||
2702 | if (AM.Disp) | ||||||
2703 | Complexity++; | ||||||
2704 | |||||||
2705 | // If it isn't worth using an LEA, reject it. | ||||||
2706 | if (Complexity
| ||||||
2707 | return false; | ||||||
2708 | |||||||
2709 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); | ||||||
2710 | return true; | ||||||
2711 | } | ||||||
2712 | |||||||
2713 | /// This is only run on TargetGlobalTLSAddress nodes. | ||||||
2714 | bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base, | ||||||
2715 | SDValue &Scale, SDValue &Index, | ||||||
2716 | SDValue &Disp, SDValue &Segment) { | ||||||
2717 | assert(N.getOpcode() == ISD::TargetGlobalTLSAddress)(static_cast <bool> (N.getOpcode() == ISD::TargetGlobalTLSAddress ) ? void (0) : __assert_fail ("N.getOpcode() == ISD::TargetGlobalTLSAddress" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2717, __extension__ __PRETTY_FUNCTION__)); | ||||||
2718 | const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); | ||||||
2719 | |||||||
2720 | X86ISelAddressMode AM; | ||||||
2721 | AM.GV = GA->getGlobal(); | ||||||
2722 | AM.Disp += GA->getOffset(); | ||||||
2723 | AM.SymbolFlags = GA->getTargetFlags(); | ||||||
2724 | |||||||
2725 | if (Subtarget->is32Bit()) { | ||||||
2726 | AM.Scale = 1; | ||||||
2727 | AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); | ||||||
2728 | } | ||||||
2729 | |||||||
2730 | MVT VT = N.getSimpleValueType(); | ||||||
2731 | getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment); | ||||||
2732 | return true; | ||||||
2733 | } | ||||||
2734 | |||||||
2735 | bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) { | ||||||
2736 | // Keep track of the original value type and whether this value was | ||||||
2737 | // truncated. If we see a truncation from pointer type to VT that truncates | ||||||
2738 | // bits that are known to be zero, we can use a narrow reference. | ||||||
2739 | EVT VT = N.getValueType(); | ||||||
2740 | bool WasTruncated = false; | ||||||
2741 | if (N.getOpcode() == ISD::TRUNCATE) { | ||||||
2742 | WasTruncated = true; | ||||||
2743 | N = N.getOperand(0); | ||||||
2744 | } | ||||||
2745 | |||||||
2746 | if (N.getOpcode() != X86ISD::Wrapper) | ||||||
2747 | return false; | ||||||
2748 | |||||||
2749 | // We can only use non-GlobalValues as immediates if they were not truncated, | ||||||
2750 | // as we do not have any range information. If we have a GlobalValue and the | ||||||
2751 | // address was not truncated, we can select it as an operand directly. | ||||||
2752 | unsigned Opc = N.getOperand(0)->getOpcode(); | ||||||
2753 | if (Opc != ISD::TargetGlobalAddress || !WasTruncated) { | ||||||
2754 | Op = N.getOperand(0); | ||||||
2755 | // We can only select the operand directly if we didn't have to look past a | ||||||
2756 | // truncate. | ||||||
2757 | return !WasTruncated; | ||||||
2758 | } | ||||||
2759 | |||||||
2760 | // Check that the global's range fits into VT. | ||||||
2761 | auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0)); | ||||||
2762 | Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange(); | ||||||
2763 | if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits())) | ||||||
2764 | return false; | ||||||
2765 | |||||||
2766 | // Okay, we can use a narrow reference. | ||||||
2767 | Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT, | ||||||
2768 | GA->getOffset(), GA->getTargetFlags()); | ||||||
2769 | return true; | ||||||
2770 | } | ||||||
2771 | |||||||
2772 | bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, | ||||||
2773 | SDValue &Base, SDValue &Scale, | ||||||
2774 | SDValue &Index, SDValue &Disp, | ||||||
2775 | SDValue &Segment) { | ||||||
2776 | assert(Root && P && "Unknown root/parent nodes")(static_cast <bool> (Root && P && "Unknown root/parent nodes" ) ? void (0) : __assert_fail ("Root && P && \"Unknown root/parent nodes\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2776, __extension__ __PRETTY_FUNCTION__)); | ||||||
2777 | if (!ISD::isNON_EXTLoad(N.getNode()) || | ||||||
2778 | !IsProfitableToFold(N, P, Root) || | ||||||
2779 | !IsLegalToFold(N, P, Root, OptLevel)) | ||||||
2780 | return false; | ||||||
2781 | |||||||
2782 | return selectAddr(N.getNode(), | ||||||
2783 | N.getOperand(1), Base, Scale, Index, Disp, Segment); | ||||||
2784 | } | ||||||
2785 | |||||||
2786 | bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, | ||||||
2787 | SDValue &Base, SDValue &Scale, | ||||||
2788 | SDValue &Index, SDValue &Disp, | ||||||
2789 | SDValue &Segment) { | ||||||
2790 | assert(Root && P && "Unknown root/parent nodes")(static_cast <bool> (Root && P && "Unknown root/parent nodes" ) ? void (0) : __assert_fail ("Root && P && \"Unknown root/parent nodes\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2790, __extension__ __PRETTY_FUNCTION__)); | ||||||
2791 | if (N->getOpcode() != X86ISD::VBROADCAST_LOAD || | ||||||
2792 | !IsProfitableToFold(N, P, Root) || | ||||||
2793 | !IsLegalToFold(N, P, Root, OptLevel)) | ||||||
2794 | return false; | ||||||
2795 | |||||||
2796 | return selectAddr(N.getNode(), | ||||||
2797 | N.getOperand(1), Base, Scale, Index, Disp, Segment); | ||||||
2798 | } | ||||||
2799 | |||||||
2800 | /// Return an SDNode that returns the value of the global base register. | ||||||
2801 | /// Output instructions required to initialize the global base register, | ||||||
2802 | /// if necessary. | ||||||
2803 | SDNode *X86DAGToDAGISel::getGlobalBaseReg() { | ||||||
2804 | unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); | ||||||
2805 | auto &DL = MF->getDataLayout(); | ||||||
2806 | return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode(); | ||||||
2807 | } | ||||||
2808 | |||||||
2809 | bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { | ||||||
2810 | if (N->getOpcode() == ISD::TRUNCATE) | ||||||
2811 | N = N->getOperand(0).getNode(); | ||||||
2812 | if (N->getOpcode() != X86ISD::Wrapper) | ||||||
2813 | return false; | ||||||
2814 | |||||||
2815 | auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0)); | ||||||
2816 | if (!GA) | ||||||
2817 | return false; | ||||||
2818 | |||||||
2819 | Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange(); | ||||||
2820 | if (!CR) | ||||||
2821 | return Width == 32 && TM.getCodeModel() == CodeModel::Small; | ||||||
2822 | |||||||
2823 | return CR->getSignedMin().sge(-1ull << Width) && | ||||||
2824 | CR->getSignedMax().slt(1ull << Width); | ||||||
2825 | } | ||||||
2826 | |||||||
2827 | static X86::CondCode getCondFromNode(SDNode *N) { | ||||||
2828 | assert(N->isMachineOpcode() && "Unexpected node")(static_cast <bool> (N->isMachineOpcode() && "Unexpected node") ? void (0) : __assert_fail ("N->isMachineOpcode() && \"Unexpected node\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2828, __extension__ __PRETTY_FUNCTION__)); | ||||||
2829 | X86::CondCode CC = X86::COND_INVALID; | ||||||
2830 | unsigned Opc = N->getMachineOpcode(); | ||||||
2831 | if (Opc == X86::JCC_1) | ||||||
2832 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1)); | ||||||
2833 | else if (Opc == X86::SETCCr) | ||||||
2834 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0)); | ||||||
2835 | else if (Opc == X86::SETCCm) | ||||||
2836 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5)); | ||||||
2837 | else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr || | ||||||
2838 | Opc == X86::CMOV64rr) | ||||||
2839 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2)); | ||||||
2840 | else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm || | ||||||
2841 | Opc == X86::CMOV64rm) | ||||||
2842 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6)); | ||||||
2843 | |||||||
2844 | return CC; | ||||||
2845 | } | ||||||
2846 | |||||||
2847 | /// Test whether the given X86ISD::CMP node has any users that use a flag | ||||||
2848 | /// other than ZF. | ||||||
2849 | bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const { | ||||||
2850 | // Examine each user of the node. | ||||||
2851 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); | ||||||
2852 | UI != UE; ++UI) { | ||||||
2853 | // Only check things that use the flags. | ||||||
2854 | if (UI.getUse().getResNo() != Flags.getResNo()) | ||||||
2855 | continue; | ||||||
2856 | // Only examine CopyToReg uses that copy to EFLAGS. | ||||||
2857 | if (UI->getOpcode() != ISD::CopyToReg || | ||||||
2858 | cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS) | ||||||
2859 | return false; | ||||||
2860 | // Examine each user of the CopyToReg use. | ||||||
2861 | for (SDNode::use_iterator FlagUI = UI->use_begin(), | ||||||
2862 | FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { | ||||||
2863 | // Only examine the Flag result. | ||||||
2864 | if (FlagUI.getUse().getResNo() != 1) continue; | ||||||
2865 | // Anything unusual: assume conservatively. | ||||||
2866 | if (!FlagUI->isMachineOpcode()) return false; | ||||||
2867 | // Examine the condition code of the user. | ||||||
2868 | X86::CondCode CC = getCondFromNode(*FlagUI); | ||||||
2869 | |||||||
2870 | switch (CC) { | ||||||
2871 | // Comparisons which only use the zero flag. | ||||||
2872 | case X86::COND_E: case X86::COND_NE: | ||||||
2873 | continue; | ||||||
2874 | // Anything else: assume conservatively. | ||||||
2875 | default: | ||||||
2876 | return false; | ||||||
2877 | } | ||||||
2878 | } | ||||||
2879 | } | ||||||
2880 | return true; | ||||||
2881 | } | ||||||
2882 | |||||||
2883 | /// Test whether the given X86ISD::CMP node has any uses which require the SF | ||||||
2884 | /// flag to be accurate. | ||||||
2885 | bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { | ||||||
2886 | // Examine each user of the node. | ||||||
2887 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); | ||||||
2888 | UI != UE; ++UI) { | ||||||
2889 | // Only check things that use the flags. | ||||||
2890 | if (UI.getUse().getResNo() != Flags.getResNo()) | ||||||
2891 | continue; | ||||||
2892 | // Only examine CopyToReg uses that copy to EFLAGS. | ||||||
2893 | if (UI->getOpcode() != ISD::CopyToReg || | ||||||
2894 | cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS) | ||||||
2895 | return false; | ||||||
2896 | // Examine each user of the CopyToReg use. | ||||||
2897 | for (SDNode::use_iterator FlagUI = UI->use_begin(), | ||||||
2898 | FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { | ||||||
2899 | // Only examine the Flag result. | ||||||
2900 | if (FlagUI.getUse().getResNo() != 1) continue; | ||||||
2901 | // Anything unusual: assume conservatively. | ||||||
2902 | if (!FlagUI->isMachineOpcode()) return false; | ||||||
2903 | // Examine the condition code of the user. | ||||||
2904 | X86::CondCode CC = getCondFromNode(*FlagUI); | ||||||
2905 | |||||||
2906 | switch (CC) { | ||||||
2907 | // Comparisons which don't examine the SF flag. | ||||||
2908 | case X86::COND_A: case X86::COND_AE: | ||||||
2909 | case X86::COND_B: case X86::COND_BE: | ||||||
2910 | case X86::COND_E: case X86::COND_NE: | ||||||
2911 | case X86::COND_O: case X86::COND_NO: | ||||||
2912 | case X86::COND_P: case X86::COND_NP: | ||||||
2913 | continue; | ||||||
2914 | // Anything else: assume conservatively. | ||||||
2915 | default: | ||||||
2916 | return false; | ||||||
2917 | } | ||||||
2918 | } | ||||||
2919 | } | ||||||
2920 | return true; | ||||||
2921 | } | ||||||
2922 | |||||||
2923 | static bool mayUseCarryFlag(X86::CondCode CC) { | ||||||
2924 | switch (CC) { | ||||||
2925 | // Comparisons which don't examine the CF flag. | ||||||
2926 | case X86::COND_O: case X86::COND_NO: | ||||||
2927 | case X86::COND_E: case X86::COND_NE: | ||||||
2928 | case X86::COND_S: case X86::COND_NS: | ||||||
2929 | case X86::COND_P: case X86::COND_NP: | ||||||
2930 | case X86::COND_L: case X86::COND_GE: | ||||||
2931 | case X86::COND_G: case X86::COND_LE: | ||||||
2932 | return false; | ||||||
2933 | // Anything else: assume conservatively. | ||||||
2934 | default: | ||||||
2935 | return true; | ||||||
2936 | } | ||||||
2937 | } | ||||||
2938 | |||||||
2939 | /// Test whether the given node which sets flags has any uses which require the | ||||||
2940 | /// CF flag to be accurate. | ||||||
2941 | bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { | ||||||
2942 | // Examine each user of the node. | ||||||
2943 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); | ||||||
2944 | UI != UE; ++UI) { | ||||||
2945 | // Only check things that use the flags. | ||||||
2946 | if (UI.getUse().getResNo() != Flags.getResNo()) | ||||||
2947 | continue; | ||||||
2948 | |||||||
2949 | unsigned UIOpc = UI->getOpcode(); | ||||||
2950 | |||||||
2951 | if (UIOpc == ISD::CopyToReg) { | ||||||
2952 | // Only examine CopyToReg uses that copy to EFLAGS. | ||||||
2953 | if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS) | ||||||
2954 | return false; | ||||||
2955 | // Examine each user of the CopyToReg use. | ||||||
2956 | for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); | ||||||
2957 | FlagUI != FlagUE; ++FlagUI) { | ||||||
2958 | // Only examine the Flag result. | ||||||
2959 | if (FlagUI.getUse().getResNo() != 1) | ||||||
2960 | continue; | ||||||
2961 | // Anything unusual: assume conservatively. | ||||||
2962 | if (!FlagUI->isMachineOpcode()) | ||||||
2963 | return false; | ||||||
2964 | // Examine the condition code of the user. | ||||||
2965 | X86::CondCode CC = getCondFromNode(*FlagUI); | ||||||
2966 | |||||||
2967 | if (mayUseCarryFlag(CC)) | ||||||
2968 | return false; | ||||||
2969 | } | ||||||
2970 | |||||||
2971 | // This CopyToReg is ok. Move on to the next user. | ||||||
2972 | continue; | ||||||
2973 | } | ||||||
2974 | |||||||
2975 | // This might be an unselected node. So look for the pre-isel opcodes that | ||||||
2976 | // use flags. | ||||||
2977 | unsigned CCOpNo; | ||||||
2978 | switch (UIOpc) { | ||||||
2979 | default: | ||||||
2980 | // Something unusual. Be conservative. | ||||||
2981 | return false; | ||||||
2982 | case X86ISD::SETCC: CCOpNo = 0; break; | ||||||
2983 | case X86ISD::SETCC_CARRY: CCOpNo = 0; break; | ||||||
2984 | case X86ISD::CMOV: CCOpNo = 2; break; | ||||||
2985 | case X86ISD::BRCOND: CCOpNo = 2; break; | ||||||
2986 | } | ||||||
2987 | |||||||
2988 | X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo); | ||||||
2989 | if (mayUseCarryFlag(CC)) | ||||||
2990 | return false; | ||||||
2991 | } | ||||||
2992 | return true; | ||||||
2993 | } | ||||||
2994 | |||||||
2995 | /// Check whether or not the chain ending in StoreNode is suitable for doing | ||||||
2996 | /// the {load; op; store} to modify transformation. | ||||||
2997 | static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, | ||||||
2998 | SDValue StoredVal, SelectionDAG *CurDAG, | ||||||
2999 | unsigned LoadOpNo, | ||||||
3000 | LoadSDNode *&LoadNode, | ||||||
3001 | SDValue &InputChain) { | ||||||
3002 | // Is the stored value result 0 of the operation? | ||||||
3003 | if (StoredVal.getResNo() != 0) return false; | ||||||
3004 | |||||||
3005 | // Are there other uses of the operation other than the store? | ||||||
3006 | if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; | ||||||
3007 | |||||||
3008 | // Is the store non-extending and non-indexed? | ||||||
3009 | if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) | ||||||
3010 | return false; | ||||||
3011 | |||||||
3012 | SDValue Load = StoredVal->getOperand(LoadOpNo); | ||||||
3013 | // Is the stored value a non-extending and non-indexed load? | ||||||
3014 | if (!ISD::isNormalLoad(Load.getNode())) return false; | ||||||
3015 | |||||||
3016 | // Return LoadNode by reference. | ||||||
3017 | LoadNode = cast<LoadSDNode>(Load); | ||||||
3018 | |||||||
3019 | // Is store the only read of the loaded value? | ||||||
3020 | if (!Load.hasOneUse()) | ||||||
3021 | return false; | ||||||
3022 | |||||||
3023 | // Is the address of the store the same as the load? | ||||||
3024 | if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || | ||||||
3025 | LoadNode->getOffset() != StoreNode->getOffset()) | ||||||
3026 | return false; | ||||||
3027 | |||||||
3028 | bool FoundLoad = false; | ||||||
3029 | SmallVector<SDValue, 4> ChainOps; | ||||||
3030 | SmallVector<const SDNode *, 4> LoopWorklist; | ||||||
3031 | SmallPtrSet<const SDNode *, 16> Visited; | ||||||
3032 | const unsigned int Max = 1024; | ||||||
3033 | |||||||
3034 | // Visualization of Load-Op-Store fusion: | ||||||
3035 | // ------------------------- | ||||||
3036 | // Legend: | ||||||
3037 | // *-lines = Chain operand dependencies. | ||||||
3038 | // |-lines = Normal operand dependencies. | ||||||
3039 | // Dependencies flow down and right. n-suffix references multiple nodes. | ||||||
3040 | // | ||||||
3041 | // C Xn C | ||||||
3042 | // * * * | ||||||
3043 | // * * * | ||||||
3044 | // Xn A-LD Yn TF Yn | ||||||
3045 | // * * \ | * | | ||||||
3046 | // * * \ | * | | ||||||
3047 | // * * \ | => A--LD_OP_ST | ||||||
3048 | // * * \| \ | ||||||
3049 | // TF OP \ | ||||||
3050 | // * | \ Zn | ||||||
3051 | // * | \ | ||||||
3052 | // A-ST Zn | ||||||
3053 | // | ||||||
3054 | |||||||
3055 | // This merge induced dependences from: #1: Xn -> LD, OP, Zn | ||||||
3056 | // #2: Yn -> LD | ||||||
3057 | // #3: ST -> Zn | ||||||
3058 | |||||||
3059 | // Ensure the transform is safe by checking for the dual | ||||||
3060 | // dependencies to make sure we do not induce a loop. | ||||||
3061 | |||||||
3062 | // As LD is a predecessor to both OP and ST we can do this by checking: | ||||||
3063 | // a). if LD is a predecessor to a member of Xn or Yn. | ||||||
3064 | // b). if a Zn is a predecessor to ST. | ||||||
3065 | |||||||
3066 | // However, (b) can only occur through being a chain predecessor to | ||||||
3067 | // ST, which is the same as Zn being a member or predecessor of Xn, | ||||||
3068 | // which is a subset of LD being a predecessor of Xn. So it's | ||||||
3069 | // subsumed by check (a). | ||||||
3070 | |||||||
3071 | SDValue Chain = StoreNode->getChain(); | ||||||
3072 | |||||||
3073 | // Gather X elements in ChainOps. | ||||||
3074 | if (Chain == Load.getValue(1)) { | ||||||
3075 | FoundLoad = true; | ||||||
3076 | ChainOps.push_back(Load.getOperand(0)); | ||||||
3077 | } else if (Chain.getOpcode() == ISD::TokenFactor) { | ||||||
3078 | for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { | ||||||
3079 | SDValue Op = Chain.getOperand(i); | ||||||
3080 | if (Op == Load.getValue(1)) { | ||||||
3081 | FoundLoad = true; | ||||||
3082 | // Drop Load, but keep its chain. No cycle check necessary. | ||||||
3083 | ChainOps.push_back(Load.getOperand(0)); | ||||||
3084 | continue; | ||||||
3085 | } | ||||||
3086 | LoopWorklist.push_back(Op.getNode()); | ||||||
3087 | ChainOps.push_back(Op); | ||||||
3088 | } | ||||||
3089 | } | ||||||
3090 | |||||||
3091 | if (!FoundLoad) | ||||||
3092 | return false; | ||||||
3093 | |||||||
3094 | // Worklist is currently Xn. Add Yn to worklist. | ||||||
3095 | for (SDValue Op : StoredVal->ops()) | ||||||
3096 | if (Op.getNode() != LoadNode) | ||||||
3097 | LoopWorklist.push_back(Op.getNode()); | ||||||
3098 | |||||||
3099 | // Check (a) if Load is a predecessor to Xn + Yn | ||||||
3100 | if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max, | ||||||
3101 | true)) | ||||||
3102 | return false; | ||||||
3103 | |||||||
3104 | InputChain = | ||||||
3105 | CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps); | ||||||
3106 | return true; | ||||||
3107 | } | ||||||
3108 | |||||||
3109 | // Change a chain of {load; op; store} of the same value into a simple op | ||||||
3110 | // through memory of that value, if the uses of the modified value and its | ||||||
3111 | // address are suitable. | ||||||
3112 | // | ||||||
3113 | // The tablegen pattern memory operand pattern is currently not able to match | ||||||
3114 | // the case where the EFLAGS on the original operation are used. | ||||||
3115 | // | ||||||
3116 | // To move this to tablegen, we'll need to improve tablegen to allow flags to | ||||||
3117 | // be transferred from a node in the pattern to the result node, probably with | ||||||
3118 | // a new keyword. For example, we have this | ||||||
3119 | // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", | ||||||
3120 | // [(store (add (loadi64 addr:$dst), -1), addr:$dst), | ||||||
3121 | // (implicit EFLAGS)]>; | ||||||
3122 | // but maybe need something like this | ||||||
3123 | // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", | ||||||
3124 | // [(store (add (loadi64 addr:$dst), -1), addr:$dst), | ||||||
3125 | // (transferrable EFLAGS)]>; | ||||||
3126 | // | ||||||
3127 | // Until then, we manually fold these and instruction select the operation | ||||||
3128 | // here. | ||||||
3129 | bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { | ||||||
3130 | StoreSDNode *StoreNode = cast<StoreSDNode>(Node); | ||||||
3131 | SDValue StoredVal = StoreNode->getOperand(1); | ||||||
3132 | unsigned Opc = StoredVal->getOpcode(); | ||||||
3133 | |||||||
3134 | // Before we try to select anything, make sure this is memory operand size | ||||||
3135 | // and opcode we can handle. Note that this must match the code below that | ||||||
3136 | // actually lowers the opcodes. | ||||||
3137 | EVT MemVT = StoreNode->getMemoryVT(); | ||||||
3138 | if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 && | ||||||
3139 | MemVT != MVT::i8) | ||||||
3140 | return false; | ||||||
3141 | |||||||
3142 | bool IsCommutable = false; | ||||||
3143 | bool IsNegate = false; | ||||||
3144 | switch (Opc) { | ||||||
3145 | default: | ||||||
3146 | return false; | ||||||
3147 | case X86ISD::SUB: | ||||||
3148 | IsNegate = isNullConstant(StoredVal.getOperand(0)); | ||||||
3149 | break; | ||||||
3150 | case X86ISD::SBB: | ||||||
3151 | break; | ||||||
3152 | case X86ISD::ADD: | ||||||
3153 | case X86ISD::ADC: | ||||||
3154 | case X86ISD::AND: | ||||||
3155 | case X86ISD::OR: | ||||||
3156 | case X86ISD::XOR: | ||||||
3157 | IsCommutable = true; | ||||||
3158 | break; | ||||||
3159 | } | ||||||
3160 | |||||||
3161 | unsigned LoadOpNo = IsNegate ? 1 : 0; | ||||||
3162 | LoadSDNode *LoadNode = nullptr; | ||||||
3163 | SDValue InputChain; | ||||||
3164 | if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, | ||||||
3165 | LoadNode, InputChain)) { | ||||||
3166 | if (!IsCommutable) | ||||||
3167 | return false; | ||||||
3168 | |||||||
3169 | // This operation is commutable, try the other operand. | ||||||
3170 | LoadOpNo = 1; | ||||||
3171 | if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, | ||||||
3172 | LoadNode, InputChain)) | ||||||
3173 | return false; | ||||||
3174 | } | ||||||
3175 | |||||||
3176 | SDValue Base, Scale, Index, Disp, Segment; | ||||||
3177 | if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp, | ||||||
3178 | Segment)) | ||||||
3179 | return false; | ||||||
3180 | |||||||
3181 | auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, | ||||||
3182 | unsigned Opc8) { | ||||||
3183 | switch (MemVT.getSimpleVT().SimpleTy) { | ||||||
3184 | case MVT::i64: | ||||||
3185 | return Opc64; | ||||||
3186 | case MVT::i32: | ||||||
3187 | return Opc32; | ||||||
3188 | case MVT::i16: | ||||||
3189 | return Opc16; | ||||||
3190 | case MVT::i8: | ||||||
3191 | return Opc8; | ||||||
3192 | default: | ||||||
3193 | llvm_unreachable("Invalid size!")::llvm::llvm_unreachable_internal("Invalid size!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3193); | ||||||
3194 | } | ||||||
3195 | }; | ||||||
3196 | |||||||
3197 | MachineSDNode *Result; | ||||||
3198 | switch (Opc) { | ||||||
3199 | case X86ISD::SUB: | ||||||
3200 | // Handle negate. | ||||||
3201 | if (IsNegate) { | ||||||
3202 | unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m, | ||||||
3203 | X86::NEG8m); | ||||||
3204 | const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; | ||||||
3205 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, | ||||||
3206 | MVT::Other, Ops); | ||||||
3207 | break; | ||||||
3208 | } | ||||||
3209 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
3210 | case X86ISD::ADD: | ||||||
3211 | // Try to match inc/dec. | ||||||
3212 | if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) { | ||||||
3213 | bool IsOne = isOneConstant(StoredVal.getOperand(1)); | ||||||
3214 | bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1)); | ||||||
3215 | // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec. | ||||||
3216 | if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) { | ||||||
3217 | unsigned NewOpc = | ||||||
3218 | ((Opc == X86ISD::ADD) == IsOne) | ||||||
3219 | ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) | ||||||
3220 | : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); | ||||||
3221 | const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; | ||||||
3222 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, | ||||||
3223 | MVT::Other, Ops); | ||||||
3224 | break; | ||||||
3225 | } | ||||||
3226 | } | ||||||
3227 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
3228 | case X86ISD::ADC: | ||||||
3229 | case X86ISD::SBB: | ||||||
3230 | case X86ISD::AND: | ||||||
3231 | case X86ISD::OR: | ||||||
3232 | case X86ISD::XOR: { | ||||||
3233 | auto SelectRegOpcode = [SelectOpcode](unsigned Opc) { | ||||||
3234 | switch (Opc) { | ||||||
3235 | case X86ISD::ADD: | ||||||
3236 | return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr, | ||||||
3237 | X86::ADD8mr); | ||||||
3238 | case X86ISD::ADC: | ||||||
3239 | return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr, | ||||||
3240 | X86::ADC8mr); | ||||||
3241 | case X86ISD::SUB: | ||||||
3242 | return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr, | ||||||
3243 | X86::SUB8mr); | ||||||
3244 | case X86ISD::SBB: | ||||||
3245 | return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr, | ||||||
3246 | X86::SBB8mr); | ||||||
3247 | case X86ISD::AND: | ||||||
3248 | return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr, | ||||||
3249 | X86::AND8mr); | ||||||
3250 | case X86ISD::OR: | ||||||
3251 | return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr); | ||||||
3252 | case X86ISD::XOR: | ||||||
3253 | return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr, | ||||||
3254 | X86::XOR8mr); | ||||||
3255 | default: | ||||||
3256 | llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3256); | ||||||
3257 | } | ||||||
3258 | }; | ||||||
3259 | auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) { | ||||||
3260 | switch (Opc) { | ||||||
3261 | case X86ISD::ADD: | ||||||
3262 | return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0); | ||||||
3263 | case X86ISD::ADC: | ||||||
3264 | return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0); | ||||||
3265 | case X86ISD::SUB: | ||||||
3266 | return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0); | ||||||
3267 | case X86ISD::SBB: | ||||||
3268 | return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0); | ||||||
3269 | case X86ISD::AND: | ||||||
3270 | return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0); | ||||||
3271 | case X86ISD::OR: | ||||||
3272 | return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0); | ||||||
3273 | case X86ISD::XOR: | ||||||
3274 | return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0); | ||||||
3275 | default: | ||||||
3276 | llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3276); | ||||||
3277 | } | ||||||
3278 | }; | ||||||
3279 | auto SelectImmOpcode = [SelectOpcode](unsigned Opc) { | ||||||
3280 | switch (Opc) { | ||||||
3281 | case X86ISD::ADD: | ||||||
3282 | return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi, | ||||||
3283 | X86::ADD8mi); | ||||||
3284 | case X86ISD::ADC: | ||||||
3285 | return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi, | ||||||
3286 | X86::ADC8mi); | ||||||
3287 | case X86ISD::SUB: | ||||||
3288 | return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi, | ||||||
3289 | X86::SUB8mi); | ||||||
3290 | case X86ISD::SBB: | ||||||
3291 | return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi, | ||||||
3292 | X86::SBB8mi); | ||||||
3293 | case X86ISD::AND: | ||||||
3294 | return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi, | ||||||
3295 | X86::AND8mi); | ||||||
3296 | case X86ISD::OR: | ||||||
3297 | return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi, | ||||||
3298 | X86::OR8mi); | ||||||
3299 | case X86ISD::XOR: | ||||||
3300 | return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi, | ||||||
3301 | X86::XOR8mi); | ||||||
3302 | default: | ||||||
3303 | llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3303); | ||||||
3304 | } | ||||||
3305 | }; | ||||||
3306 | |||||||
3307 | unsigned NewOpc = SelectRegOpcode(Opc); | ||||||
3308 | SDValue Operand = StoredVal->getOperand(1-LoadOpNo); | ||||||
3309 | |||||||
3310 | // See if the operand is a constant that we can fold into an immediate | ||||||
3311 | // operand. | ||||||
3312 | if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) { | ||||||
3313 | int64_t OperandV = OperandC->getSExtValue(); | ||||||
3314 | |||||||
3315 | // Check if we can shrink the operand enough to fit in an immediate (or | ||||||
3316 | // fit into a smaller immediate) by negating it and switching the | ||||||
3317 | // operation. | ||||||
3318 | if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) && | ||||||
3319 | ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) || | ||||||
3320 | (MemVT == MVT::i64 && !isInt<32>(OperandV) && | ||||||
3321 | isInt<32>(-OperandV))) && | ||||||
3322 | hasNoCarryFlagUses(StoredVal.getValue(1))) { | ||||||
3323 | OperandV = -OperandV; | ||||||
3324 | Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD; | ||||||
3325 | } | ||||||
3326 | |||||||
3327 | // First try to fit this into an Imm8 operand. If it doesn't fit, then try | ||||||
3328 | // the larger immediate operand. | ||||||
3329 | if (MemVT != MVT::i8 && isInt<8>(OperandV)) { | ||||||
3330 | Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); | ||||||
3331 | NewOpc = SelectImm8Opcode(Opc); | ||||||
3332 | } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) { | ||||||
3333 | Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); | ||||||
3334 | NewOpc = SelectImmOpcode(Opc); | ||||||
3335 | } | ||||||
3336 | } | ||||||
3337 | |||||||
3338 | if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) { | ||||||
3339 | SDValue CopyTo = | ||||||
3340 | CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS, | ||||||
3341 | StoredVal.getOperand(2), SDValue()); | ||||||
3342 | |||||||
3343 | const SDValue Ops[] = {Base, Scale, Index, Disp, | ||||||
3344 | Segment, Operand, CopyTo, CopyTo.getValue(1)}; | ||||||
3345 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, | ||||||
3346 | Ops); | ||||||
3347 | } else { | ||||||
3348 | const SDValue Ops[] = {Base, Scale, Index, Disp, | ||||||
3349 | Segment, Operand, InputChain}; | ||||||
3350 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, | ||||||
3351 | Ops); | ||||||
3352 | } | ||||||
3353 | break; | ||||||
3354 | } | ||||||
3355 | default: | ||||||
3356 | llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3356); | ||||||
3357 | } | ||||||
3358 | |||||||
3359 | MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(), | ||||||
3360 | LoadNode->getMemOperand()}; | ||||||
3361 | CurDAG->setNodeMemRefs(Result, MemOps); | ||||||
3362 | |||||||
3363 | // Update Load Chain uses as well. | ||||||
3364 | ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1)); | ||||||
3365 | ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); | ||||||
3366 | ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); | ||||||
3367 | CurDAG->RemoveDeadNode(Node); | ||||||
3368 | return true; | ||||||
3369 | } | ||||||
3370 | |||||||
3371 | // See if this is an X & Mask that we can match to BEXTR/BZHI. | ||||||
3372 | // Where Mask is one of the following patterns: | ||||||
3373 | // a) x & (1 << nbits) - 1 | ||||||
3374 | // b) x & ~(-1 << nbits) | ||||||
3375 | // c) x & (-1 >> (32 - y)) | ||||||
3376 | // d) x << (32 - y) >> (32 - y) | ||||||
3377 | bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { | ||||||
3378 | assert((static_cast <bool> ((Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits." ) ? void (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3380, __extension__ __PRETTY_FUNCTION__)) | ||||||
3379 | (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&(static_cast <bool> ((Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits." ) ? void (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3380, __extension__ __PRETTY_FUNCTION__)) | ||||||
3380 | "Should be either an and-mask, or right-shift after clearing high bits.")(static_cast <bool> ((Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits." ) ? void (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3380, __extension__ __PRETTY_FUNCTION__)); | ||||||
3381 | |||||||
3382 | // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one. | ||||||
3383 | if (!Subtarget->hasBMI() && !Subtarget->hasBMI2()) | ||||||
3384 | return false; | ||||||
3385 | |||||||
3386 | MVT NVT = Node->getSimpleValueType(0); | ||||||
3387 | |||||||
3388 | // Only supported for 32 and 64 bits. | ||||||
3389 | if (NVT != MVT::i32 && NVT != MVT::i64) | ||||||
3390 | return false; | ||||||
3391 | |||||||
3392 | SDValue NBits; | ||||||
3393 | |||||||
3394 | // If we have BMI2's BZHI, we are ok with muti-use patterns. | ||||||
3395 | // Else, if we only have BMI1's BEXTR, we require one-use. | ||||||
3396 | const bool CanHaveExtraUses = Subtarget->hasBMI2(); | ||||||
3397 | auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) { | ||||||
3398 | return CanHaveExtraUses || | ||||||
3399 | Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo()); | ||||||
3400 | }; | ||||||
3401 | auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); }; | ||||||
3402 | auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); }; | ||||||
3403 | |||||||
3404 | auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) { | ||||||
3405 | if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) { | ||||||
3406 | assert(V.getSimpleValueType() == MVT::i32 &&(static_cast <bool> (V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation") ? void (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3408, __extension__ __PRETTY_FUNCTION__)) | ||||||
3407 | V.getOperand(0).getSimpleValueType() == MVT::i64 &&(static_cast <bool> (V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation") ? void (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3408, __extension__ __PRETTY_FUNCTION__)) | ||||||
3408 | "Expected i64 -> i32 truncation")(static_cast <bool> (V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation") ? void (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3408, __extension__ __PRETTY_FUNCTION__)); | ||||||
3409 | V = V.getOperand(0); | ||||||
3410 | } | ||||||
3411 | return V; | ||||||
3412 | }; | ||||||
3413 | |||||||
3414 | // a) x & ((1 << nbits) + (-1)) | ||||||
3415 | auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, | ||||||
3416 | &NBits](SDValue Mask) -> bool { | ||||||
3417 | // Match `add`. Must only have one use! | ||||||
3418 | if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask)) | ||||||
3419 | return false; | ||||||
3420 | // We should be adding all-ones constant (i.e. subtracting one.) | ||||||
3421 | if (!isAllOnesConstant(Mask->getOperand(1))) | ||||||
3422 | return false; | ||||||
3423 | // Match `1 << nbits`. Might be truncated. Must only have one use! | ||||||
3424 | SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); | ||||||
3425 | if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) | ||||||
3426 | return false; | ||||||
3427 | if (!isOneConstant(M0->getOperand(0))) | ||||||
3428 | return false; | ||||||
3429 | NBits = M0->getOperand(1); | ||||||
3430 | return true; | ||||||
3431 | }; | ||||||
3432 | |||||||
3433 | auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) { | ||||||
3434 | V = peekThroughOneUseTruncation(V); | ||||||
3435 | return CurDAG->MaskedValueIsAllOnes( | ||||||
3436 | V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(), | ||||||
3437 | NVT.getSizeInBits())); | ||||||
3438 | }; | ||||||
3439 | |||||||
3440 | // b) x & ~(-1 << nbits) | ||||||
3441 | auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation, | ||||||
3442 | &NBits](SDValue Mask) -> bool { | ||||||
3443 | // Match `~()`. Must only have one use! | ||||||
3444 | if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask)) | ||||||
3445 | return false; | ||||||
3446 | // The -1 only has to be all-ones for the final Node's NVT. | ||||||
3447 | if (!isAllOnes(Mask->getOperand(1))) | ||||||
3448 | return false; | ||||||
3449 | // Match `-1 << nbits`. Might be truncated. Must only have one use! | ||||||
3450 | SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); | ||||||
3451 | if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) | ||||||
3452 | return false; | ||||||
3453 | // The -1 only has to be all-ones for the final Node's NVT. | ||||||
3454 | if (!isAllOnes(M0->getOperand(0))) | ||||||
3455 | return false; | ||||||
3456 | NBits = M0->getOperand(1); | ||||||
3457 | return true; | ||||||
3458 | }; | ||||||
3459 | |||||||
3460 | // Match potentially-truncated (bitwidth - y) | ||||||
3461 | auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt, | ||||||
3462 | unsigned Bitwidth) { | ||||||
3463 | // Skip over a truncate of the shift amount. | ||||||
3464 | if (ShiftAmt.getOpcode() == ISD::TRUNCATE) { | ||||||
3465 | ShiftAmt = ShiftAmt.getOperand(0); | ||||||
3466 | // The trunc should have been the only user of the real shift amount. | ||||||
3467 | if (!checkOneUse(ShiftAmt)) | ||||||
3468 | return false; | ||||||
3469 | } | ||||||
3470 | // Match the shift amount as: (bitwidth - y). It should go away, too. | ||||||
3471 | if (ShiftAmt.getOpcode() != ISD::SUB) | ||||||
3472 | return false; | ||||||
3473 | auto *V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0)); | ||||||
3474 | if (!V0 || V0->getZExtValue() != Bitwidth) | ||||||
3475 | return false; | ||||||
3476 | NBits = ShiftAmt.getOperand(1); | ||||||
3477 | return true; | ||||||
3478 | }; | ||||||
3479 | |||||||
3480 | // c) x & (-1 >> (32 - y)) | ||||||
3481 | auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, | ||||||
3482 | matchShiftAmt](SDValue Mask) -> bool { | ||||||
3483 | // The mask itself may be truncated. | ||||||
3484 | Mask = peekThroughOneUseTruncation(Mask); | ||||||
3485 | unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits(); | ||||||
3486 | // Match `l>>`. Must only have one use! | ||||||
3487 | if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask)) | ||||||
3488 | return false; | ||||||
3489 | // We should be shifting truly all-ones constant. | ||||||
3490 | if (!isAllOnesConstant(Mask.getOperand(0))) | ||||||
3491 | return false; | ||||||
3492 | SDValue M1 = Mask.getOperand(1); | ||||||
3493 | // The shift amount should not be used externally. | ||||||
3494 | if (!checkOneUse(M1)) | ||||||
3495 | return false; | ||||||
3496 | return matchShiftAmt(M1, Bitwidth); | ||||||
3497 | }; | ||||||
3498 | |||||||
3499 | SDValue X; | ||||||
3500 | |||||||
3501 | // d) x << (32 - y) >> (32 - y) | ||||||
3502 | auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt, | ||||||
3503 | &X](SDNode *Node) -> bool { | ||||||
3504 | if (Node->getOpcode() != ISD::SRL) | ||||||
3505 | return false; | ||||||
3506 | SDValue N0 = Node->getOperand(0); | ||||||
3507 | if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0)) | ||||||
3508 | return false; | ||||||
3509 | unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits(); | ||||||
3510 | SDValue N1 = Node->getOperand(1); | ||||||
3511 | SDValue N01 = N0->getOperand(1); | ||||||
3512 | // Both of the shifts must be by the exact same value. | ||||||
3513 | // There should not be any uses of the shift amount outside of the pattern. | ||||||
3514 | if (N1 != N01 || !checkTwoUse(N1)) | ||||||
3515 | return false; | ||||||
3516 | if (!matchShiftAmt(N1, Bitwidth)) | ||||||
3517 | return false; | ||||||
3518 | X = N0->getOperand(0); | ||||||
3519 | return true; | ||||||
3520 | }; | ||||||
3521 | |||||||
3522 | auto matchLowBitMask = [matchPatternA, matchPatternB, | ||||||
3523 | matchPatternC](SDValue Mask) -> bool { | ||||||
3524 | return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask); | ||||||
3525 | }; | ||||||
3526 | |||||||
3527 | if (Node->getOpcode() == ISD::AND) { | ||||||
3528 | X = Node->getOperand(0); | ||||||
3529 | SDValue Mask = Node->getOperand(1); | ||||||
3530 | |||||||
3531 | if (matchLowBitMask(Mask)) { | ||||||
3532 | // Great. | ||||||
3533 | } else { | ||||||
3534 | std::swap(X, Mask); | ||||||
3535 | if (!matchLowBitMask(Mask)) | ||||||
3536 | return false; | ||||||
3537 | } | ||||||
3538 | } else if (!matchPatternD(Node)) | ||||||
3539 | return false; | ||||||
3540 | |||||||
3541 | SDLoc DL(Node); | ||||||
3542 | |||||||
3543 | // Truncate the shift amount. | ||||||
3544 | NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits); | ||||||
3545 | insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); | ||||||
3546 | |||||||
3547 | // Insert 8-bit NBits into lowest 8 bits of 32-bit register. | ||||||
3548 | // All the other bits are undefined, we do not care about them. | ||||||
3549 | SDValue ImplDef = SDValue( | ||||||
3550 | CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0); | ||||||
3551 | insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef); | ||||||
3552 | |||||||
3553 | SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32); | ||||||
3554 | insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal); | ||||||
3555 | NBits = SDValue( | ||||||
3556 | CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef, | ||||||
3557 | NBits, SRIdxVal), 0); | ||||||
3558 | insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); | ||||||
3559 | |||||||
3560 | if (Subtarget->hasBMI2()) { | ||||||
3561 | // Great, just emit the the BZHI.. | ||||||
3562 | if (NVT != MVT::i32) { | ||||||
3563 | // But have to place the bit count into the wide-enough register first. | ||||||
3564 | NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits); | ||||||
3565 | insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); | ||||||
3566 | } | ||||||
3567 | |||||||
3568 | SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits); | ||||||
3569 | ReplaceNode(Node, Extract.getNode()); | ||||||
3570 | SelectCode(Extract.getNode()); | ||||||
3571 | return true; | ||||||
3572 | } | ||||||
3573 | |||||||
3574 | // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is | ||||||
3575 | // *logically* shifted (potentially with one-use trunc inbetween), | ||||||
3576 | // and the truncation was the only use of the shift, | ||||||
3577 | // and if so look past one-use truncation. | ||||||
3578 | { | ||||||
3579 | SDValue RealX = peekThroughOneUseTruncation(X); | ||||||
3580 | // FIXME: only if the shift is one-use? | ||||||
3581 | if (RealX != X && RealX.getOpcode() == ISD::SRL) | ||||||
3582 | X = RealX; | ||||||
3583 | } | ||||||
3584 | |||||||
3585 | MVT XVT = X.getSimpleValueType(); | ||||||
3586 | |||||||
3587 | // Else, emitting BEXTR requires one more step. | ||||||
3588 | // The 'control' of BEXTR has the pattern of: | ||||||
3589 | // [15...8 bit][ 7...0 bit] location | ||||||
3590 | // [ bit count][ shift] name | ||||||
3591 | // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11 | ||||||
3592 | |||||||
3593 | // Shift NBits left by 8 bits, thus producing 'control'. | ||||||
3594 | // This makes the low 8 bits to be zero. | ||||||
3595 | SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8); | ||||||
3596 | insertDAGNode(*CurDAG, SDValue(Node, 0), C8); | ||||||
3597 | SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8); | ||||||
3598 | insertDAGNode(*CurDAG, SDValue(Node, 0), Control); | ||||||
3599 | |||||||
3600 | // If the 'X' is *logically* shifted, we can fold that shift into 'control'. | ||||||
3601 | // FIXME: only if the shift is one-use? | ||||||
3602 | if (X.getOpcode() == ISD::SRL) { | ||||||
3603 | SDValue ShiftAmt = X.getOperand(1); | ||||||
3604 | X = X.getOperand(0); | ||||||
3605 | |||||||
3606 | assert(ShiftAmt.getValueType() == MVT::i8 &&(static_cast <bool> (ShiftAmt.getValueType() == MVT::i8 && "Expected shift amount to be i8") ? void (0) : __assert_fail ("ShiftAmt.getValueType() == MVT::i8 && \"Expected shift amount to be i8\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3607, __extension__ __PRETTY_FUNCTION__)) | ||||||
3607 | "Expected shift amount to be i8")(static_cast <bool> (ShiftAmt.getValueType() == MVT::i8 && "Expected shift amount to be i8") ? void (0) : __assert_fail ("ShiftAmt.getValueType() == MVT::i8 && \"Expected shift amount to be i8\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3607, __extension__ __PRETTY_FUNCTION__)); | ||||||
3608 | |||||||
3609 | // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero! | ||||||
3610 | // We could zext to i16 in some form, but we intentionally don't do that. | ||||||
3611 | SDValue OrigShiftAmt = ShiftAmt; | ||||||
3612 | ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt); | ||||||
3613 | insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt); | ||||||
3614 | |||||||
3615 | // And now 'or' these low 8 bits of shift amount into the 'control'. | ||||||
3616 | Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt); | ||||||
3617 | insertDAGNode(*CurDAG, SDValue(Node, 0), Control); | ||||||
3618 | } | ||||||
3619 | |||||||
3620 | // But have to place the 'control' into the wide-enough register first. | ||||||
3621 | if (XVT != MVT::i32) { | ||||||
3622 | Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control); | ||||||
3623 | insertDAGNode(*CurDAG, SDValue(Node, 0), Control); | ||||||
3624 | } | ||||||
3625 | |||||||
3626 | // And finally, form the BEXTR itself. | ||||||
3627 | SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control); | ||||||
3628 | |||||||
3629 | // The 'X' was originally truncated. Do that now. | ||||||
3630 | if (XVT != NVT) { | ||||||
3631 | insertDAGNode(*CurDAG, SDValue(Node, 0), Extract); | ||||||
3632 | Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract); | ||||||
3633 | } | ||||||
3634 | |||||||
3635 | ReplaceNode(Node, Extract.getNode()); | ||||||
3636 | SelectCode(Extract.getNode()); | ||||||
3637 | |||||||
3638 | return true; | ||||||
3639 | } | ||||||
3640 | |||||||
3641 | // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI. | ||||||
3642 | MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { | ||||||
3643 | MVT NVT = Node->getSimpleValueType(0); | ||||||
3644 | SDLoc dl(Node); | ||||||
3645 | |||||||
3646 | SDValue N0 = Node->getOperand(0); | ||||||
3647 | SDValue N1 = Node->getOperand(1); | ||||||
3648 | |||||||
3649 | // If we have TBM we can use an immediate for the control. If we have BMI | ||||||
3650 | // we should only do this if the BEXTR instruction is implemented well. | ||||||
3651 | // Otherwise moving the control into a register makes this more costly. | ||||||
3652 | // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM | ||||||
3653 | // hoisting the move immediate would make it worthwhile with a less optimal | ||||||
3654 | // BEXTR? | ||||||
3655 | bool PreferBEXTR = | ||||||
3656 | Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR()); | ||||||
3657 | if (!PreferBEXTR && !Subtarget->hasBMI2()) | ||||||
3658 | return nullptr; | ||||||
3659 | |||||||
3660 | // Must have a shift right. | ||||||
3661 | if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA) | ||||||
3662 | return nullptr; | ||||||
3663 | |||||||
3664 | // Shift can't have additional users. | ||||||
3665 | if (!N0->hasOneUse()) | ||||||
3666 | return nullptr; | ||||||
3667 | |||||||
3668 | // Only supported for 32 and 64 bits. | ||||||
3669 | if (NVT != MVT::i32 && NVT != MVT::i64) | ||||||
3670 | return nullptr; | ||||||
3671 | |||||||
3672 | // Shift amount and RHS of and must be constant. | ||||||
3673 | ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1); | ||||||
3674 | ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1)); | ||||||
3675 | if (!MaskCst || !ShiftCst) | ||||||
3676 | return nullptr; | ||||||
3677 | |||||||
3678 | // And RHS must be a mask. | ||||||
3679 | uint64_t Mask = MaskCst->getZExtValue(); | ||||||
3680 | if (!isMask_64(Mask)) | ||||||
3681 | return nullptr; | ||||||
3682 | |||||||
3683 | uint64_t Shift = ShiftCst->getZExtValue(); | ||||||
3684 | uint64_t MaskSize = countPopulation(Mask); | ||||||
3685 | |||||||
3686 | // Don't interfere with something that can be handled by extracting AH. | ||||||
3687 | // TODO: If we are able to fold a load, BEXTR might still be better than AH. | ||||||
3688 | if (Shift == 8 && MaskSize == 8) | ||||||
3689 | return nullptr; | ||||||
3690 | |||||||
3691 | // Make sure we are only using bits that were in the original value, not | ||||||
3692 | // shifted in. | ||||||
3693 | if (Shift + MaskSize > NVT.getSizeInBits()) | ||||||
3694 | return nullptr; | ||||||
3695 | |||||||
3696 | // BZHI, if available, is always fast, unlike BEXTR. But even if we decide | ||||||
3697 | // that we can't use BEXTR, it is only worthwhile using BZHI if the mask | ||||||
3698 | // does not fit into 32 bits. Load folding is not a sufficient reason. | ||||||
3699 | if (!PreferBEXTR && MaskSize <= 32) | ||||||
3700 | return nullptr; | ||||||
3701 | |||||||
3702 | SDValue Control; | ||||||
3703 | unsigned ROpc, MOpc; | ||||||
3704 | |||||||
3705 | if (!PreferBEXTR) { | ||||||
3706 | assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.")(static_cast <bool> (Subtarget->hasBMI2() && "We must have BMI2's BZHI then.") ? void (0) : __assert_fail ("Subtarget->hasBMI2() && \"We must have BMI2's BZHI then.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3706, __extension__ __PRETTY_FUNCTION__)); | ||||||
3707 | // If we can't make use of BEXTR then we can't fuse shift+mask stages. | ||||||
3708 | // Let's perform the mask first, and apply shift later. Note that we need to | ||||||
3709 | // widen the mask to account for the fact that we'll apply shift afterwards! | ||||||
3710 | Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT); | ||||||
3711 | ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr; | ||||||
3712 | MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm; | ||||||
3713 | unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; | ||||||
3714 | Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); | ||||||
3715 | } else { | ||||||
3716 | // The 'control' of BEXTR has the pattern of: | ||||||
3717 | // [15...8 bit][ 7...0 bit] location | ||||||
3718 | // [ bit count][ shift] name | ||||||
3719 | // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11 | ||||||
3720 | Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT); | ||||||
3721 | if (Subtarget->hasTBM()) { | ||||||
3722 | ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri; | ||||||
3723 | MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi; | ||||||
3724 | } else { | ||||||
3725 | assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.")(static_cast <bool> (Subtarget->hasBMI() && "We must have BMI1's BEXTR then." ) ? void (0) : __assert_fail ("Subtarget->hasBMI() && \"We must have BMI1's BEXTR then.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3725, __extension__ __PRETTY_FUNCTION__)); | ||||||
3726 | // BMI requires the immediate to placed in a register. | ||||||
3727 | ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr; | ||||||
3728 | MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm; | ||||||
3729 | unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; | ||||||
3730 | Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); | ||||||
3731 | } | ||||||
3732 | } | ||||||
3733 | |||||||
3734 | MachineSDNode *NewNode; | ||||||
3735 | SDValue Input = N0->getOperand(0); | ||||||
3736 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
3737 | if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
3738 | SDValue Ops[] = { | ||||||
3739 | Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)}; | ||||||
3740 | SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); | ||||||
3741 | NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
3742 | // Update the chain. | ||||||
3743 | ReplaceUses(Input.getValue(1), SDValue(NewNode, 2)); | ||||||
3744 | // Record the mem-refs | ||||||
3745 | CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()}); | ||||||
3746 | } else { | ||||||
3747 | NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control); | ||||||
3748 | } | ||||||
3749 | |||||||
3750 | if (!PreferBEXTR) { | ||||||
3751 | // We still need to apply the shift. | ||||||
3752 | SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT); | ||||||
3753 | unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri; | ||||||
3754 | NewNode = | ||||||
3755 | CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt); | ||||||
3756 | } | ||||||
3757 | |||||||
3758 | return NewNode; | ||||||
3759 | } | ||||||
3760 | |||||||
3761 | // Emit a PCMISTR(I/M) instruction. | ||||||
3762 | MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc, | ||||||
3763 | bool MayFoldLoad, const SDLoc &dl, | ||||||
3764 | MVT VT, SDNode *Node) { | ||||||
3765 | SDValue N0 = Node->getOperand(0); | ||||||
3766 | SDValue N1 = Node->getOperand(1); | ||||||
3767 | SDValue Imm = Node->getOperand(2); | ||||||
3768 | const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue(); | ||||||
3769 | Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType()); | ||||||
3770 | |||||||
3771 | // Try to fold a load. No need to check alignment. | ||||||
3772 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
3773 | if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
3774 | SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, | ||||||
3775 | N1.getOperand(0) }; | ||||||
3776 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other); | ||||||
3777 | MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
3778 | // Update the chain. | ||||||
3779 | ReplaceUses(N1.getValue(1), SDValue(CNode, 2)); | ||||||
3780 | // Record the mem-refs | ||||||
3781 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); | ||||||
3782 | return CNode; | ||||||
3783 | } | ||||||
3784 | |||||||
3785 | SDValue Ops[] = { N0, N1, Imm }; | ||||||
3786 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32); | ||||||
3787 | MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops); | ||||||
3788 | return CNode; | ||||||
3789 | } | ||||||
3790 | |||||||
3791 | // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need | ||||||
3792 | // to emit a second instruction after this one. This is needed since we have two | ||||||
3793 | // copyToReg nodes glued before this and we need to continue that glue through. | ||||||
3794 | MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc, | ||||||
3795 | bool MayFoldLoad, const SDLoc &dl, | ||||||
3796 | MVT VT, SDNode *Node, | ||||||
3797 | SDValue &InFlag) { | ||||||
3798 | SDValue N0 = Node->getOperand(0); | ||||||
3799 | SDValue N2 = Node->getOperand(2); | ||||||
3800 | SDValue Imm = Node->getOperand(4); | ||||||
3801 | const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue(); | ||||||
3802 | Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType()); | ||||||
3803 | |||||||
3804 | // Try to fold a load. No need to check alignment. | ||||||
3805 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
3806 | if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
3807 | SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, | ||||||
3808 | N2.getOperand(0), InFlag }; | ||||||
3809 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue); | ||||||
3810 | MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
3811 | InFlag = SDValue(CNode, 3); | ||||||
3812 | // Update the chain. | ||||||
3813 | ReplaceUses(N2.getValue(1), SDValue(CNode, 2)); | ||||||
3814 | // Record the mem-refs | ||||||
3815 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()}); | ||||||
3816 | return CNode; | ||||||
3817 | } | ||||||
3818 | |||||||
3819 | SDValue Ops[] = { N0, N2, Imm, InFlag }; | ||||||
3820 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue); | ||||||
3821 | MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops); | ||||||
3822 | InFlag = SDValue(CNode, 2); | ||||||
3823 | return CNode; | ||||||
3824 | } | ||||||
3825 | |||||||
3826 | bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { | ||||||
3827 | EVT VT = N->getValueType(0); | ||||||
3828 | |||||||
3829 | // Only handle scalar shifts. | ||||||
3830 | if (VT.isVector()) | ||||||
3831 | return false; | ||||||
3832 | |||||||
3833 | // Narrower shifts only mask to 5 bits in hardware. | ||||||
3834 | unsigned Size = VT == MVT::i64 ? 64 : 32; | ||||||
3835 | |||||||
3836 | SDValue OrigShiftAmt = N->getOperand(1); | ||||||
3837 | SDValue ShiftAmt = OrigShiftAmt; | ||||||
3838 | SDLoc DL(N); | ||||||
3839 | |||||||
3840 | // Skip over a truncate of the shift amount. | ||||||
3841 | if (ShiftAmt->getOpcode() == ISD::TRUNCATE) | ||||||
3842 | ShiftAmt = ShiftAmt->getOperand(0); | ||||||
3843 | |||||||
3844 | // This function is called after X86DAGToDAGISel::matchBitExtract(), | ||||||
3845 | // so we are not afraid that we might mess up BZHI/BEXTR pattern. | ||||||
3846 | |||||||
3847 | SDValue NewShiftAmt; | ||||||
3848 | if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { | ||||||
3849 | SDValue Add0 = ShiftAmt->getOperand(0); | ||||||
3850 | SDValue Add1 = ShiftAmt->getOperand(1); | ||||||
3851 | auto *Add0C = dyn_cast<ConstantSDNode>(Add0); | ||||||
3852 | auto *Add1C = dyn_cast<ConstantSDNode>(Add1); | ||||||
3853 | // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X | ||||||
3854 | // to avoid the ADD/SUB. | ||||||
3855 | if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) { | ||||||
3856 | NewShiftAmt = Add0; | ||||||
3857 | // If we are shifting by N-X where N == 0 mod Size, then just shift by -X | ||||||
3858 | // to generate a NEG instead of a SUB of a constant. | ||||||
3859 | } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C && | ||||||
3860 | Add0C->getZExtValue() != 0) { | ||||||
3861 | EVT SubVT = ShiftAmt.getValueType(); | ||||||
3862 | SDValue X; | ||||||
3863 | if (Add0C->getZExtValue() % Size == 0) | ||||||
3864 | X = Add1; | ||||||
3865 | else if (ShiftAmt.hasOneUse() && Size == 64 && | ||||||
3866 | Add0C->getZExtValue() % 32 == 0) { | ||||||
3867 | // We have a 64-bit shift by (n*32-x), turn it into -(x+n*32). | ||||||
3868 | // This is mainly beneficial if we already compute (x+n*32). | ||||||
3869 | if (Add1.getOpcode() == ISD::TRUNCATE) { | ||||||
3870 | Add1 = Add1.getOperand(0); | ||||||
3871 | SubVT = Add1.getValueType(); | ||||||
3872 | } | ||||||
3873 | if (Add0.getValueType() != SubVT) { | ||||||
3874 | Add0 = CurDAG->getZExtOrTrunc(Add0, DL, SubVT); | ||||||
3875 | insertDAGNode(*CurDAG, OrigShiftAmt, Add0); | ||||||
3876 | } | ||||||
3877 | |||||||
3878 | X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1, Add0); | ||||||
3879 | insertDAGNode(*CurDAG, OrigShiftAmt, X); | ||||||
3880 | } else | ||||||
3881 | return false; | ||||||
3882 | // Insert a negate op. | ||||||
3883 | // TODO: This isn't guaranteed to replace the sub if there is a logic cone | ||||||
3884 | // that uses it that's not a shift. | ||||||
3885 | SDValue Zero = CurDAG->getConstant(0, DL, SubVT); | ||||||
3886 | SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X); | ||||||
3887 | NewShiftAmt = Neg; | ||||||
3888 | |||||||
3889 | // Insert these operands into a valid topological order so they can | ||||||
3890 | // get selected independently. | ||||||
3891 | insertDAGNode(*CurDAG, OrigShiftAmt, Zero); | ||||||
3892 | insertDAGNode(*CurDAG, OrigShiftAmt, Neg); | ||||||
3893 | } else | ||||||
3894 | return false; | ||||||
3895 | } else | ||||||
3896 | return false; | ||||||
3897 | |||||||
3898 | if (NewShiftAmt.getValueType() != MVT::i8) { | ||||||
3899 | // Need to truncate the shift amount. | ||||||
3900 | NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt); | ||||||
3901 | // Add to a correct topological ordering. | ||||||
3902 | insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); | ||||||
3903 | } | ||||||
3904 | |||||||
3905 | // Insert a new mask to keep the shift amount legal. This should be removed | ||||||
3906 | // by isel patterns. | ||||||
3907 | NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt, | ||||||
3908 | CurDAG->getConstant(Size - 1, DL, MVT::i8)); | ||||||
3909 | // Place in a correct topological ordering. | ||||||
3910 | insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); | ||||||
3911 | |||||||
3912 | SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0), | ||||||
3913 | NewShiftAmt); | ||||||
3914 | if (UpdatedNode != N) { | ||||||
3915 | // If we found an existing node, we should replace ourselves with that node | ||||||
3916 | // and wait for it to be selected after its other users. | ||||||
3917 | ReplaceNode(N, UpdatedNode); | ||||||
3918 | return true; | ||||||
3919 | } | ||||||
3920 | |||||||
3921 | // If the original shift amount is now dead, delete it so that we don't run | ||||||
3922 | // it through isel. | ||||||
3923 | if (OrigShiftAmt.getNode()->use_empty()) | ||||||
3924 | CurDAG->RemoveDeadNode(OrigShiftAmt.getNode()); | ||||||
3925 | |||||||
3926 | // Now that we've optimized the shift amount, defer to normal isel to get | ||||||
3927 | // load folding and legacy vs BMI2 selection without repeating it here. | ||||||
3928 | SelectCode(N); | ||||||
3929 | return true; | ||||||
3930 | } | ||||||
3931 | |||||||
3932 | bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { | ||||||
3933 | MVT NVT = N->getSimpleValueType(0); | ||||||
3934 | unsigned Opcode = N->getOpcode(); | ||||||
3935 | SDLoc dl(N); | ||||||
3936 | |||||||
3937 | // For operations of the form (x << C1) op C2, check if we can use a smaller | ||||||
3938 | // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. | ||||||
3939 | SDValue Shift = N->getOperand(0); | ||||||
3940 | SDValue N1 = N->getOperand(1); | ||||||
3941 | |||||||
3942 | ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); | ||||||
3943 | if (!Cst) | ||||||
3944 | return false; | ||||||
3945 | |||||||
3946 | int64_t Val = Cst->getSExtValue(); | ||||||
3947 | |||||||
3948 | // If we have an any_extend feeding the AND, look through it to see if there | ||||||
3949 | // is a shift behind it. But only if the AND doesn't use the extended bits. | ||||||
3950 | // FIXME: Generalize this to other ANY_EXTEND than i32 to i64? | ||||||
3951 | bool FoundAnyExtend = false; | ||||||
3952 | if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && | ||||||
3953 | Shift.getOperand(0).getSimpleValueType() == MVT::i32 && | ||||||
3954 | isUInt<32>(Val)) { | ||||||
3955 | FoundAnyExtend = true; | ||||||
3956 | Shift = Shift.getOperand(0); | ||||||
3957 | } | ||||||
3958 | |||||||
3959 | if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) | ||||||
3960 | return false; | ||||||
3961 | |||||||
3962 | // i8 is unshrinkable, i16 should be promoted to i32. | ||||||
3963 | if (NVT != MVT::i32 && NVT != MVT::i64) | ||||||
3964 | return false; | ||||||
3965 | |||||||
3966 | ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); | ||||||
3967 | if (!ShlCst) | ||||||
3968 | return false; | ||||||
3969 | |||||||
3970 | uint64_t ShAmt = ShlCst->getZExtValue(); | ||||||
3971 | |||||||
3972 | // Make sure that we don't change the operation by removing bits. | ||||||
3973 | // This only matters for OR and XOR, AND is unaffected. | ||||||
3974 | uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1; | ||||||
3975 | if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) | ||||||
3976 | return false; | ||||||
3977 | |||||||
3978 | // Check the minimum bitwidth for the new constant. | ||||||
3979 | // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. | ||||||
3980 | auto CanShrinkImmediate = [&](int64_t &ShiftedVal) { | ||||||
3981 | if (Opcode == ISD::AND) { | ||||||
3982 | // AND32ri is the same as AND64ri32 with zext imm. | ||||||
3983 | // Try this before sign extended immediates below. | ||||||
3984 | ShiftedVal = (uint64_t)Val >> ShAmt; | ||||||
3985 | if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) | ||||||
3986 | return true; | ||||||
3987 | // Also swap order when the AND can become MOVZX. | ||||||
3988 | if (ShiftedVal == UINT8_MAX(255) || ShiftedVal == UINT16_MAX(65535)) | ||||||
3989 | return true; | ||||||
3990 | } | ||||||
3991 | ShiftedVal = Val >> ShAmt; | ||||||
3992 | if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) || | ||||||
3993 | (!isInt<32>(Val) && isInt<32>(ShiftedVal))) | ||||||
3994 | return true; | ||||||
3995 | if (Opcode != ISD::AND) { | ||||||
3996 | // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr | ||||||
3997 | ShiftedVal = (uint64_t)Val >> ShAmt; | ||||||
3998 | if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) | ||||||
3999 | return true; | ||||||
4000 | } | ||||||
4001 | return false; | ||||||
4002 | }; | ||||||
4003 | |||||||
4004 | int64_t ShiftedVal; | ||||||
4005 | if (!CanShrinkImmediate(ShiftedVal)) | ||||||
4006 | return false; | ||||||
4007 | |||||||
4008 | // Ok, we can reorder to get a smaller immediate. | ||||||
4009 | |||||||
4010 | // But, its possible the original immediate allowed an AND to become MOVZX. | ||||||
4011 | // Doing this late due to avoid the MakedValueIsZero call as late as | ||||||
4012 | // possible. | ||||||
4013 | if (Opcode == ISD::AND) { | ||||||
4014 | // Find the smallest zext this could possibly be. | ||||||
4015 | unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits(); | ||||||
4016 | ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U)); | ||||||
4017 | |||||||
4018 | // Figure out which bits need to be zero to achieve that mask. | ||||||
4019 | APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(), | ||||||
4020 | ZExtWidth); | ||||||
4021 | NeededMask &= ~Cst->getAPIntValue(); | ||||||
4022 | |||||||
4023 | if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask)) | ||||||
4024 | return false; | ||||||
4025 | } | ||||||
4026 | |||||||
4027 | SDValue X = Shift.getOperand(0); | ||||||
4028 | if (FoundAnyExtend) { | ||||||
4029 | SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X); | ||||||
4030 | insertDAGNode(*CurDAG, SDValue(N, 0), NewX); | ||||||
4031 | X = NewX; | ||||||
4032 | } | ||||||
4033 | |||||||
4034 | SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT); | ||||||
4035 | insertDAGNode(*CurDAG, SDValue(N, 0), NewCst); | ||||||
4036 | SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst); | ||||||
4037 | insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp); | ||||||
4038 | SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp, | ||||||
4039 | Shift.getOperand(1)); | ||||||
4040 | ReplaceNode(N, NewSHL.getNode()); | ||||||
4041 | SelectCode(NewSHL.getNode()); | ||||||
4042 | return true; | ||||||
4043 | } | ||||||
4044 | |||||||
4045 | bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA, | ||||||
4046 | SDNode *ParentBC, SDValue A, SDValue B, | ||||||
4047 | SDValue C, uint8_t Imm) { | ||||||
4048 | assert(A.isOperandOf(ParentA))(static_cast <bool> (A.isOperandOf(ParentA)) ? void (0) : __assert_fail ("A.isOperandOf(ParentA)", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4048, __extension__ __PRETTY_FUNCTION__)); | ||||||
4049 | assert(B.isOperandOf(ParentBC))(static_cast <bool> (B.isOperandOf(ParentBC)) ? void (0 ) : __assert_fail ("B.isOperandOf(ParentBC)", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4049, __extension__ __PRETTY_FUNCTION__)); | ||||||
4050 | assert(C.isOperandOf(ParentBC))(static_cast <bool> (C.isOperandOf(ParentBC)) ? void (0 ) : __assert_fail ("C.isOperandOf(ParentBC)", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4050, __extension__ __PRETTY_FUNCTION__)); | ||||||
4051 | |||||||
4052 | auto tryFoldLoadOrBCast = | ||||||
4053 | [this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale, | ||||||
4054 | SDValue &Index, SDValue &Disp, SDValue &Segment) { | ||||||
4055 | if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment)) | ||||||
4056 | return true; | ||||||
4057 | |||||||
4058 | // Not a load, check for broadcast which may be behind a bitcast. | ||||||
4059 | if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { | ||||||
4060 | P = L.getNode(); | ||||||
4061 | L = L.getOperand(0); | ||||||
4062 | } | ||||||
4063 | |||||||
4064 | if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) | ||||||
4065 | return false; | ||||||
4066 | |||||||
4067 | // Only 32 and 64 bit broadcasts are supported. | ||||||
4068 | auto *MemIntr = cast<MemIntrinsicSDNode>(L); | ||||||
4069 | unsigned Size = MemIntr->getMemoryVT().getSizeInBits(); | ||||||
4070 | if (Size != 32 && Size != 64) | ||||||
4071 | return false; | ||||||
4072 | |||||||
4073 | return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment); | ||||||
4074 | }; | ||||||
4075 | |||||||
4076 | bool FoldedLoad = false; | ||||||
4077 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
4078 | if (tryFoldLoadOrBCast(Root, ParentBC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
4079 | FoldedLoad = true; | ||||||
4080 | } else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3, | ||||||
4081 | Tmp4)) { | ||||||
4082 | FoldedLoad = true; | ||||||
4083 | std::swap(A, C); | ||||||
4084 | // Swap bits 1/4 and 3/6. | ||||||
4085 | uint8_t OldImm = Imm; | ||||||
4086 | Imm = OldImm & 0xa5; | ||||||
4087 | if (OldImm & 0x02) Imm |= 0x10; | ||||||
4088 | if (OldImm & 0x10) Imm |= 0x02; | ||||||
4089 | if (OldImm & 0x08) Imm |= 0x40; | ||||||
4090 | if (OldImm & 0x40) Imm |= 0x08; | ||||||
4091 | } else if (tryFoldLoadOrBCast(Root, ParentBC, B, Tmp0, Tmp1, Tmp2, Tmp3, | ||||||
4092 | Tmp4)) { | ||||||
4093 | FoldedLoad = true; | ||||||
4094 | std::swap(B, C); | ||||||
4095 | // Swap bits 1/2 and 5/6. | ||||||
4096 | uint8_t OldImm = Imm; | ||||||
4097 | Imm = OldImm & 0x99; | ||||||
4098 | if (OldImm & 0x02) Imm |= 0x04; | ||||||
4099 | if (OldImm & 0x04) Imm |= 0x02; | ||||||
4100 | if (OldImm & 0x20) Imm |= 0x40; | ||||||
4101 | if (OldImm & 0x40) Imm |= 0x20; | ||||||
4102 | } | ||||||
4103 | |||||||
4104 | SDLoc DL(Root); | ||||||
4105 | |||||||
4106 | SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8); | ||||||
4107 | |||||||
4108 | MVT NVT = Root->getSimpleValueType(0); | ||||||
4109 | |||||||
4110 | MachineSDNode *MNode; | ||||||
4111 | if (FoldedLoad) { | ||||||
4112 | SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other); | ||||||
4113 | |||||||
4114 | unsigned Opc; | ||||||
4115 | if (C.getOpcode() == X86ISD::VBROADCAST_LOAD) { | ||||||
4116 | auto *MemIntr = cast<MemIntrinsicSDNode>(C); | ||||||
4117 | unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits(); | ||||||
4118 | assert((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!")(static_cast <bool> ((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!") ? void (0) : __assert_fail ("(EltSize == 32 || EltSize == 64) && \"Unexpected broadcast size!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4118, __extension__ __PRETTY_FUNCTION__)); | ||||||
4119 | |||||||
4120 | bool UseD = EltSize == 32; | ||||||
4121 | if (NVT.is128BitVector()) | ||||||
4122 | Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi; | ||||||
4123 | else if (NVT.is256BitVector()) | ||||||
4124 | Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi; | ||||||
4125 | else if (NVT.is512BitVector()) | ||||||
4126 | Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi; | ||||||
4127 | else | ||||||
4128 | llvm_unreachable("Unexpected vector size!")::llvm::llvm_unreachable_internal("Unexpected vector size!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4128); | ||||||
4129 | } else { | ||||||
4130 | bool UseD = NVT.getVectorElementType() == MVT::i32; | ||||||
4131 | if (NVT.is128BitVector()) | ||||||
4132 | Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi; | ||||||
4133 | else if (NVT.is256BitVector()) | ||||||
4134 | Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi; | ||||||
4135 | else if (NVT.is512BitVector()) | ||||||
4136 | Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi; | ||||||
4137 | else | ||||||
4138 | llvm_unreachable("Unexpected vector size!")::llvm::llvm_unreachable_internal("Unexpected vector size!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4138); | ||||||
4139 | } | ||||||
4140 | |||||||
4141 | SDValue Ops[] = {A, B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm, C.getOperand(0)}; | ||||||
4142 | MNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops); | ||||||
4143 | |||||||
4144 | // Update the chain. | ||||||
4145 | ReplaceUses(C.getValue(1), SDValue(MNode, 1)); | ||||||
4146 | // Record the mem-refs | ||||||
4147 | CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(C)->getMemOperand()}); | ||||||
4148 | } else { | ||||||
4149 | bool UseD = NVT.getVectorElementType() == MVT::i32; | ||||||
4150 | unsigned Opc; | ||||||
4151 | if (NVT.is128BitVector()) | ||||||
4152 | Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri; | ||||||
4153 | else if (NVT.is256BitVector()) | ||||||
4154 | Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri; | ||||||
4155 | else if (NVT.is512BitVector()) | ||||||
4156 | Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri; | ||||||
4157 | else | ||||||
4158 | llvm_unreachable("Unexpected vector size!")::llvm::llvm_unreachable_internal("Unexpected vector size!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4158); | ||||||
4159 | |||||||
4160 | MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm}); | ||||||
4161 | } | ||||||
4162 | |||||||
4163 | ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0)); | ||||||
4164 | CurDAG->RemoveDeadNode(Root); | ||||||
4165 | return true; | ||||||
4166 | } | ||||||
4167 | |||||||
4168 | // Try to match two logic ops to a VPTERNLOG. | ||||||
4169 | // FIXME: Handle inverted inputs? | ||||||
4170 | // FIXME: Handle more complex patterns that use an operand more than once? | ||||||
4171 | bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { | ||||||
4172 | MVT NVT = N->getSimpleValueType(0); | ||||||
4173 | |||||||
4174 | // Make sure we support VPTERNLOG. | ||||||
4175 | if (!NVT.isVector() || !Subtarget->hasAVX512() || | ||||||
4176 | NVT.getVectorElementType() == MVT::i1) | ||||||
4177 | return false; | ||||||
4178 | |||||||
4179 | // We need VLX for 128/256-bit. | ||||||
4180 | if (!(Subtarget->hasVLX() || NVT.is512BitVector())) | ||||||
4181 | return false; | ||||||
4182 | |||||||
4183 | SDValue N0 = N->getOperand(0); | ||||||
4184 | SDValue N1 = N->getOperand(1); | ||||||
4185 | |||||||
4186 | auto getFoldableLogicOp = [](SDValue Op) { | ||||||
4187 | // Peek through single use bitcast. | ||||||
4188 | if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse()) | ||||||
4189 | Op = Op.getOperand(0); | ||||||
4190 | |||||||
4191 | if (!Op.hasOneUse()) | ||||||
4192 | return SDValue(); | ||||||
4193 | |||||||
4194 | unsigned Opc = Op.getOpcode(); | ||||||
4195 | if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR || | ||||||
4196 | Opc == X86ISD::ANDNP) | ||||||
4197 | return Op; | ||||||
4198 | |||||||
4199 | return SDValue(); | ||||||
4200 | }; | ||||||
4201 | |||||||
4202 | SDValue A, FoldableOp; | ||||||
4203 | if ((FoldableOp = getFoldableLogicOp(N1))) { | ||||||
4204 | A = N0; | ||||||
4205 | } else if ((FoldableOp = getFoldableLogicOp(N0))) { | ||||||
4206 | A = N1; | ||||||
4207 | } else | ||||||
4208 | return false; | ||||||
4209 | |||||||
4210 | SDValue B = FoldableOp.getOperand(0); | ||||||
4211 | SDValue C = FoldableOp.getOperand(1); | ||||||
4212 | |||||||
4213 | // We can build the appropriate control immediate by performing the logic | ||||||
4214 | // operation we're matching using these constants for A, B, and C. | ||||||
4215 | const uint8_t TernlogMagicA = 0xf0; | ||||||
4216 | const uint8_t TernlogMagicB = 0xcc; | ||||||
4217 | const uint8_t TernlogMagicC = 0xaa; | ||||||
4218 | |||||||
4219 | uint8_t Imm; | ||||||
4220 | switch (FoldableOp.getOpcode()) { | ||||||
4221 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4221); | ||||||
4222 | case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break; | ||||||
4223 | case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break; | ||||||
4224 | case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break; | ||||||
4225 | case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break; | ||||||
4226 | } | ||||||
4227 | |||||||
4228 | switch (N->getOpcode()) { | ||||||
4229 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4229); | ||||||
4230 | case X86ISD::ANDNP: | ||||||
4231 | if (A == N0) | ||||||
4232 | Imm &= ~TernlogMagicA; | ||||||
4233 | else | ||||||
4234 | Imm = ~(Imm) & TernlogMagicA; | ||||||
4235 | break; | ||||||
4236 | case ISD::AND: Imm &= TernlogMagicA; break; | ||||||
4237 | case ISD::OR: Imm |= TernlogMagicA; break; | ||||||
4238 | case ISD::XOR: Imm ^= TernlogMagicA; break; | ||||||
4239 | } | ||||||
4240 | |||||||
4241 | return matchVPTERNLOG(N, N, FoldableOp.getNode(), A, B, C, Imm); | ||||||
4242 | } | ||||||
4243 | |||||||
4244 | /// If the high bits of an 'and' operand are known zero, try setting the | ||||||
4245 | /// high bits of an 'and' constant operand to produce a smaller encoding by | ||||||
4246 | /// creating a small, sign-extended negative immediate rather than a large | ||||||
4247 | /// positive one. This reverses a transform in SimplifyDemandedBits that | ||||||
4248 | /// shrinks mask constants by clearing bits. There is also a possibility that | ||||||
4249 | /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that | ||||||
4250 | /// case, just replace the 'and'. Return 'true' if the node is replaced. | ||||||
4251 | bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) { | ||||||
4252 | // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't | ||||||
4253 | // have immediate operands. | ||||||
4254 | MVT VT = And->getSimpleValueType(0); | ||||||
4255 | if (VT != MVT::i32 && VT != MVT::i64) | ||||||
4256 | return false; | ||||||
4257 | |||||||
4258 | auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1)); | ||||||
4259 | if (!And1C) | ||||||
4260 | return false; | ||||||
4261 | |||||||
4262 | // Bail out if the mask constant is already negative. It's can't shrink more. | ||||||
4263 | // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel | ||||||
4264 | // patterns to use a 32-bit and instead of a 64-bit and by relying on the | ||||||
4265 | // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits | ||||||
4266 | // are negative too. | ||||||
4267 | APInt MaskVal = And1C->getAPIntValue(); | ||||||
4268 | unsigned MaskLZ = MaskVal.countLeadingZeros(); | ||||||
4269 | if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32)) | ||||||
4270 | return false; | ||||||
4271 | |||||||
4272 | // Don't extend into the upper 32 bits of a 64 bit mask. | ||||||
4273 | if (VT == MVT::i64 && MaskLZ >= 32) { | ||||||
4274 | MaskLZ -= 32; | ||||||
4275 | MaskVal = MaskVal.trunc(32); | ||||||
4276 | } | ||||||
4277 | |||||||
4278 | SDValue And0 = And->getOperand(0); | ||||||
4279 | APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ); | ||||||
4280 | APInt NegMaskVal = MaskVal | HighZeros; | ||||||
4281 | |||||||
4282 | // If a negative constant would not allow a smaller encoding, there's no need | ||||||
4283 | // to continue. Only change the constant when we know it's a win. | ||||||
4284 | unsigned MinWidth = NegMaskVal.getMinSignedBits(); | ||||||
4285 | if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32)) | ||||||
4286 | return false; | ||||||
4287 | |||||||
4288 | // Extend masks if we truncated above. | ||||||
4289 | if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) { | ||||||
4290 | NegMaskVal = NegMaskVal.zext(64); | ||||||
4291 | HighZeros = HighZeros.zext(64); | ||||||
4292 | } | ||||||
4293 | |||||||
4294 | // The variable operand must be all zeros in the top bits to allow using the | ||||||
4295 | // new, negative constant as the mask. | ||||||
4296 | if (!CurDAG->MaskedValueIsZero(And0, HighZeros)) | ||||||
4297 | return false; | ||||||
4298 | |||||||
4299 | // Check if the mask is -1. In that case, this is an unnecessary instruction | ||||||
4300 | // that escaped earlier analysis. | ||||||
4301 | if (NegMaskVal.isAllOnesValue()) { | ||||||
4302 | ReplaceNode(And, And0.getNode()); | ||||||
4303 | return true; | ||||||
4304 | } | ||||||
4305 | |||||||
4306 | // A negative mask allows a smaller encoding. Create a new 'and' node. | ||||||
4307 | SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT); | ||||||
4308 | insertDAGNode(*CurDAG, SDValue(And, 0), NewMask); | ||||||
4309 | SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask); | ||||||
4310 | ReplaceNode(And, NewAnd.getNode()); | ||||||
4311 | SelectCode(NewAnd.getNode()); | ||||||
4312 | return true; | ||||||
4313 | } | ||||||
4314 | |||||||
4315 | static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, | ||||||
4316 | bool FoldedBCast, bool Masked) { | ||||||
4317 | #define VPTESTM_CASE(VT, SUFFIX) \ | ||||||
4318 | case MVT::VT: \ | ||||||
4319 | if (Masked) \ | ||||||
4320 | return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \ | ||||||
4321 | return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX; | ||||||
4322 | |||||||
4323 | |||||||
4324 | #define VPTESTM_BROADCAST_CASES(SUFFIX) \ | ||||||
4325 | default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4325); \ | ||||||
4326 | VPTESTM_CASE(v4i32, DZ128##SUFFIX) \ | ||||||
4327 | VPTESTM_CASE(v2i64, QZ128##SUFFIX) \ | ||||||
4328 | VPTESTM_CASE(v8i32, DZ256##SUFFIX) \ | ||||||
4329 | VPTESTM_CASE(v4i64, QZ256##SUFFIX) \ | ||||||
4330 | VPTESTM_CASE(v16i32, DZ##SUFFIX) \ | ||||||
4331 | VPTESTM_CASE(v8i64, QZ##SUFFIX) | ||||||
4332 | |||||||
4333 | #define VPTESTM_FULL_CASES(SUFFIX) \ | ||||||
4334 | VPTESTM_BROADCAST_CASES(SUFFIX) \ | ||||||
4335 | VPTESTM_CASE(v16i8, BZ128##SUFFIX) \ | ||||||
4336 | VPTESTM_CASE(v8i16, WZ128##SUFFIX) \ | ||||||
4337 | VPTESTM_CASE(v32i8, BZ256##SUFFIX) \ | ||||||
4338 | VPTESTM_CASE(v16i16, WZ256##SUFFIX) \ | ||||||
4339 | VPTESTM_CASE(v64i8, BZ##SUFFIX) \ | ||||||
4340 | VPTESTM_CASE(v32i16, WZ##SUFFIX) | ||||||
4341 | |||||||
4342 | if (FoldedBCast) { | ||||||
4343 | switch (TestVT.SimpleTy) { | ||||||
4344 | VPTESTM_BROADCAST_CASES(rmb) | ||||||
4345 | } | ||||||
4346 | } | ||||||
4347 | |||||||
4348 | if (FoldedLoad) { | ||||||
4349 | switch (TestVT.SimpleTy) { | ||||||
4350 | VPTESTM_FULL_CASES(rm) | ||||||
4351 | } | ||||||
4352 | } | ||||||
4353 | |||||||
4354 | switch (TestVT.SimpleTy) { | ||||||
4355 | VPTESTM_FULL_CASES(rr) | ||||||
4356 | } | ||||||
4357 | |||||||
4358 | #undef VPTESTM_FULL_CASES | ||||||
4359 | #undef VPTESTM_BROADCAST_CASES | ||||||
4360 | #undef VPTESTM_CASE | ||||||
4361 | } | ||||||
4362 | |||||||
4363 | // Try to create VPTESTM instruction. If InMask is not null, it will be used | ||||||
4364 | // to form a masked operation. | ||||||
4365 | bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, | ||||||
4366 | SDValue InMask) { | ||||||
4367 | assert(Subtarget->hasAVX512() && "Expected AVX512!")(static_cast <bool> (Subtarget->hasAVX512() && "Expected AVX512!") ? void (0) : __assert_fail ("Subtarget->hasAVX512() && \"Expected AVX512!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4367, __extension__ __PRETTY_FUNCTION__)); | ||||||
4368 | assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&(static_cast <bool> (Setcc.getSimpleValueType().getVectorElementType () == MVT::i1 && "Unexpected VT!") ? void (0) : __assert_fail ("Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && \"Unexpected VT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4369, __extension__ __PRETTY_FUNCTION__)) | ||||||
4369 | "Unexpected VT!")(static_cast <bool> (Setcc.getSimpleValueType().getVectorElementType () == MVT::i1 && "Unexpected VT!") ? void (0) : __assert_fail ("Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && \"Unexpected VT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4369, __extension__ __PRETTY_FUNCTION__)); | ||||||
4370 | |||||||
4371 | // Look for equal and not equal compares. | ||||||
4372 | ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get(); | ||||||
4373 | if (CC != ISD::SETEQ && CC != ISD::SETNE) | ||||||
4374 | return false; | ||||||
4375 | |||||||
4376 | SDValue SetccOp0 = Setcc.getOperand(0); | ||||||
4377 | SDValue SetccOp1 = Setcc.getOperand(1); | ||||||
4378 | |||||||
4379 | // Canonicalize the all zero vector to the RHS. | ||||||
4380 | if (ISD::isBuildVectorAllZeros(SetccOp0.getNode())) | ||||||
4381 | std::swap(SetccOp0, SetccOp1); | ||||||
4382 | |||||||
4383 | // See if we're comparing against zero. | ||||||
4384 | if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode())) | ||||||
4385 | return false; | ||||||
4386 | |||||||
4387 | SDValue N0 = SetccOp0; | ||||||
4388 | |||||||
4389 | MVT CmpVT = N0.getSimpleValueType(); | ||||||
4390 | MVT CmpSVT = CmpVT.getVectorElementType(); | ||||||
4391 | |||||||
4392 | // Start with both operands the same. We'll try to refine this. | ||||||
4393 | SDValue Src0 = N0; | ||||||
4394 | SDValue Src1 = N0; | ||||||
4395 | |||||||
4396 | { | ||||||
4397 | // Look through single use bitcasts. | ||||||
4398 | SDValue N0Temp = N0; | ||||||
4399 | if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse()) | ||||||
4400 | N0Temp = N0.getOperand(0); | ||||||
4401 | |||||||
4402 | // Look for single use AND. | ||||||
4403 | if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) { | ||||||
4404 | Src0 = N0Temp.getOperand(0); | ||||||
4405 | Src1 = N0Temp.getOperand(1); | ||||||
4406 | } | ||||||
4407 | } | ||||||
4408 | |||||||
4409 | // Without VLX we need to widen the operation. | ||||||
4410 | bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector(); | ||||||
4411 | |||||||
4412 | auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L, | ||||||
4413 | SDValue &Base, SDValue &Scale, SDValue &Index, | ||||||
4414 | SDValue &Disp, SDValue &Segment) { | ||||||
4415 | // If we need to widen, we can't fold the load. | ||||||
4416 | if (!Widen) | ||||||
4417 | if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment)) | ||||||
4418 | return true; | ||||||
4419 | |||||||
4420 | // If we didn't fold a load, try to match broadcast. No widening limitation | ||||||
4421 | // for this. But only 32 and 64 bit types are supported. | ||||||
4422 | if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64) | ||||||
4423 | return false; | ||||||
4424 | |||||||
4425 | // Look through single use bitcasts. | ||||||
4426 | if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { | ||||||
4427 | P = L.getNode(); | ||||||
4428 | L = L.getOperand(0); | ||||||
4429 | } | ||||||
4430 | |||||||
4431 | if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) | ||||||
4432 | return false; | ||||||
4433 | |||||||
4434 | auto *MemIntr = cast<MemIntrinsicSDNode>(L); | ||||||
4435 | if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits()) | ||||||
4436 | return false; | ||||||
4437 | |||||||
4438 | return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment); | ||||||
4439 | }; | ||||||
4440 | |||||||
4441 | // We can only fold loads if the sources are unique. | ||||||
4442 | bool CanFoldLoads = Src0 != Src1; | ||||||
4443 | |||||||
4444 | bool FoldedLoad = false; | ||||||
4445 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
4446 | if (CanFoldLoads) { | ||||||
4447 | FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2, | ||||||
4448 | Tmp3, Tmp4); | ||||||
4449 | if (!FoldedLoad) { | ||||||
4450 | // And is commutative. | ||||||
4451 | FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1, | ||||||
4452 | Tmp2, Tmp3, Tmp4); | ||||||
4453 | if (FoldedLoad) | ||||||
4454 | std::swap(Src0, Src1); | ||||||
4455 | } | ||||||
4456 | } | ||||||
4457 | |||||||
4458 | bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD; | ||||||
4459 | |||||||
4460 | bool IsMasked = InMask.getNode() != nullptr; | ||||||
4461 | |||||||
4462 | SDLoc dl(Root); | ||||||
4463 | |||||||
4464 | MVT ResVT = Setcc.getSimpleValueType(); | ||||||
4465 | MVT MaskVT = ResVT; | ||||||
4466 | if (Widen) { | ||||||
4467 | // Widen the inputs using insert_subreg or copy_to_regclass. | ||||||
4468 | unsigned Scale = CmpVT.is128BitVector() ? 4 : 2; | ||||||
4469 | unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm; | ||||||
4470 | unsigned NumElts = CmpVT.getVectorNumElements() * Scale; | ||||||
4471 | CmpVT = MVT::getVectorVT(CmpSVT, NumElts); | ||||||
4472 | MaskVT = MVT::getVectorVT(MVT::i1, NumElts); | ||||||
4473 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl, | ||||||
4474 | CmpVT), 0); | ||||||
4475 | Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0); | ||||||
4476 | |||||||
4477 | if (!FoldedBCast) | ||||||
4478 | Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1); | ||||||
4479 | |||||||
4480 | if (IsMasked) { | ||||||
4481 | // Widen the mask. | ||||||
4482 | unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID(); | ||||||
4483 | SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); | ||||||
4484 | InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, | ||||||
4485 | dl, MaskVT, InMask, RC), 0); | ||||||
4486 | } | ||||||
4487 | } | ||||||
4488 | |||||||
4489 | bool IsTestN = CC == ISD::SETEQ; | ||||||
4490 | unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast, | ||||||
4491 | IsMasked); | ||||||
4492 | |||||||
4493 | MachineSDNode *CNode; | ||||||
4494 | if (FoldedLoad) { | ||||||
4495 | SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other); | ||||||
4496 | |||||||
4497 | if (IsMasked) { | ||||||
4498 | SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, | ||||||
4499 | Src1.getOperand(0) }; | ||||||
4500 | CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); | ||||||
4501 | } else { | ||||||
4502 | SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, | ||||||
4503 | Src1.getOperand(0) }; | ||||||
4504 | CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); | ||||||
4505 | } | ||||||
4506 | |||||||
4507 | // Update the chain. | ||||||
4508 | ReplaceUses(Src1.getValue(1), SDValue(CNode, 1)); | ||||||
4509 | // Record the mem-refs | ||||||
4510 | CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()}); | ||||||
4511 | } else { | ||||||
4512 | if (IsMasked) | ||||||
4513 | CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1); | ||||||
4514 | else | ||||||
4515 | CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1); | ||||||
4516 | } | ||||||
4517 | |||||||
4518 | // If we widened, we need to shrink the mask VT. | ||||||
4519 | if (Widen) { | ||||||
4520 | unsigned RegClass = TLI->getRegClassFor(ResVT)->getID(); | ||||||
4521 | SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); | ||||||
4522 | CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, | ||||||
4523 | dl, ResVT, SDValue(CNode, 0), RC); | ||||||
4524 | } | ||||||
4525 | |||||||
4526 | ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0)); | ||||||
4527 | CurDAG->RemoveDeadNode(Root); | ||||||
4528 | return true; | ||||||
4529 | } | ||||||
4530 | |||||||
4531 | // Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it | ||||||
4532 | // into vpternlog. | ||||||
4533 | bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { | ||||||
4534 | assert(N->getOpcode() == ISD::OR && "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == ISD::OR && "Unexpected opcode!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Unexpected opcode!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4534, __extension__ __PRETTY_FUNCTION__)); | ||||||
4535 | |||||||
4536 | MVT NVT = N->getSimpleValueType(0); | ||||||
4537 | |||||||
4538 | // Make sure we support VPTERNLOG. | ||||||
4539 | if (!NVT.isVector() || !Subtarget->hasAVX512()) | ||||||
4540 | return false; | ||||||
4541 | |||||||
4542 | // We need VLX for 128/256-bit. | ||||||
4543 | if (!(Subtarget->hasVLX() || NVT.is512BitVector())) | ||||||
4544 | return false; | ||||||
4545 | |||||||
4546 | SDValue N0 = N->getOperand(0); | ||||||
4547 | SDValue N1 = N->getOperand(1); | ||||||
4548 | |||||||
4549 | // Canonicalize AND to LHS. | ||||||
4550 | if (N1.getOpcode() == ISD::AND) | ||||||
4551 | std::swap(N0, N1); | ||||||
4552 | |||||||
4553 | if (N0.getOpcode() != ISD::AND || | ||||||
4554 | N1.getOpcode() != X86ISD::ANDNP || | ||||||
4555 | !N0.hasOneUse() || !N1.hasOneUse()) | ||||||
4556 | return false; | ||||||
4557 | |||||||
4558 | // ANDN is not commutable, use it to pick down A and C. | ||||||
4559 | SDValue A = N1.getOperand(0); | ||||||
4560 | SDValue C = N1.getOperand(1); | ||||||
4561 | |||||||
4562 | // AND is commutable, if one operand matches A, the other operand is B. | ||||||
4563 | // Otherwise this isn't a match. | ||||||
4564 | SDValue B; | ||||||
4565 | if (N0.getOperand(0) == A) | ||||||
4566 | B = N0.getOperand(1); | ||||||
4567 | else if (N0.getOperand(1) == A) | ||||||
4568 | B = N0.getOperand(0); | ||||||
4569 | else | ||||||
4570 | return false; | ||||||
4571 | |||||||
4572 | SDLoc dl(N); | ||||||
4573 | SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8); | ||||||
4574 | SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm); | ||||||
4575 | ReplaceNode(N, Ternlog.getNode()); | ||||||
4576 | |||||||
4577 | return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(), | ||||||
4578 | A, B, C, 0xCA); | ||||||
4579 | } | ||||||
4580 | |||||||
4581 | void X86DAGToDAGISel::Select(SDNode *Node) { | ||||||
4582 | MVT NVT = Node->getSimpleValueType(0); | ||||||
4583 | unsigned Opcode = Node->getOpcode(); | ||||||
4584 | SDLoc dl(Node); | ||||||
4585 | |||||||
4586 | if (Node->isMachineOpcode()) { | ||||||
4587 | LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
4588 | Node->setNodeId(-1); | ||||||
4589 | return; // Already selected. | ||||||
4590 | } | ||||||
4591 | |||||||
4592 | switch (Opcode) { | ||||||
4593 | default: break; | ||||||
4594 | case ISD::INTRINSIC_W_CHAIN: { | ||||||
4595 | unsigned IntNo = Node->getConstantOperandVal(1); | ||||||
4596 | switch (IntNo) { | ||||||
4597 | default: break; | ||||||
4598 | case Intrinsic::x86_encodekey128: | ||||||
4599 | case Intrinsic::x86_encodekey256: { | ||||||
4600 | if (!Subtarget->hasKL()) | ||||||
4601 | break; | ||||||
4602 | |||||||
4603 | unsigned Opcode; | ||||||
4604 | switch (IntNo) { | ||||||
4605 | default: llvm_unreachable("Impossible intrinsic")::llvm::llvm_unreachable_internal("Impossible intrinsic", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4605); | ||||||
4606 | case Intrinsic::x86_encodekey128: Opcode = X86::ENCODEKEY128; break; | ||||||
4607 | case Intrinsic::x86_encodekey256: Opcode = X86::ENCODEKEY256; break; | ||||||
4608 | } | ||||||
4609 | |||||||
4610 | SDValue Chain = Node->getOperand(0); | ||||||
4611 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3), | ||||||
4612 | SDValue()); | ||||||
4613 | if (Opcode == X86::ENCODEKEY256) | ||||||
4614 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4), | ||||||
4615 | Chain.getValue(1)); | ||||||
4616 | |||||||
4617 | MachineSDNode *Res = CurDAG->getMachineNode( | ||||||
4618 | Opcode, dl, Node->getVTList(), | ||||||
4619 | {Node->getOperand(2), Chain, Chain.getValue(1)}); | ||||||
4620 | ReplaceNode(Node, Res); | ||||||
4621 | return; | ||||||
4622 | } | ||||||
4623 | case Intrinsic::x86_tileloadd64_internal: | ||||||
4624 | case Intrinsic::x86_tileloaddt164_internal: { | ||||||
4625 | if (!Subtarget->hasAMXTILE()) | ||||||
4626 | break; | ||||||
4627 | unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal | ||||||
4628 | ? X86::PTILELOADDV | ||||||
4629 | : X86::PTILELOADDT1V; | ||||||
4630 | // _tile_loadd_internal(row, col, buf, STRIDE) | ||||||
4631 | SDValue Base = Node->getOperand(4); | ||||||
4632 | SDValue Scale = getI8Imm(1, dl); | ||||||
4633 | SDValue Index = Node->getOperand(5); | ||||||
4634 | SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); | ||||||
4635 | SDValue Segment = CurDAG->getRegister(0, MVT::i16); | ||||||
4636 | SDValue Chain = Node->getOperand(0); | ||||||
4637 | MachineSDNode *CNode; | ||||||
4638 | SDValue Ops[] = {Node->getOperand(2), | ||||||
4639 | Node->getOperand(3), | ||||||
4640 | Base, | ||||||
4641 | Scale, | ||||||
4642 | Index, | ||||||
4643 | Disp, | ||||||
4644 | Segment, | ||||||
4645 | Chain}; | ||||||
4646 | CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops); | ||||||
4647 | ReplaceNode(Node, CNode); | ||||||
4648 | return; | ||||||
4649 | } | ||||||
4650 | } | ||||||
4651 | break; | ||||||
4652 | } | ||||||
4653 | case ISD::INTRINSIC_VOID: { | ||||||
4654 | unsigned IntNo = Node->getConstantOperandVal(1); | ||||||
4655 | switch (IntNo) { | ||||||
4656 | default: break; | ||||||
4657 | case Intrinsic::x86_sse3_monitor: | ||||||
4658 | case Intrinsic::x86_monitorx: | ||||||
4659 | case Intrinsic::x86_clzero: { | ||||||
4660 | bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64; | ||||||
4661 | |||||||
4662 | unsigned Opc = 0; | ||||||
4663 | switch (IntNo) { | ||||||
4664 | default: llvm_unreachable("Unexpected intrinsic!")::llvm::llvm_unreachable_internal("Unexpected intrinsic!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4664); | ||||||
4665 | case Intrinsic::x86_sse3_monitor: | ||||||
4666 | if (!Subtarget->hasSSE3()) | ||||||
4667 | break; | ||||||
4668 | Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr; | ||||||
4669 | break; | ||||||
4670 | case Intrinsic::x86_monitorx: | ||||||
4671 | if (!Subtarget->hasMWAITX()) | ||||||
4672 | break; | ||||||
4673 | Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr; | ||||||
4674 | break; | ||||||
4675 | case Intrinsic::x86_clzero: | ||||||
4676 | if (!Subtarget->hasCLZERO()) | ||||||
4677 | break; | ||||||
4678 | Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r; | ||||||
4679 | break; | ||||||
4680 | } | ||||||
4681 | |||||||
4682 | if (Opc) { | ||||||
4683 | unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX; | ||||||
4684 | SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg, | ||||||
4685 | Node->getOperand(2), SDValue()); | ||||||
4686 | SDValue InFlag = Chain.getValue(1); | ||||||
4687 | |||||||
4688 | if (IntNo == Intrinsic::x86_sse3_monitor || | ||||||
4689 | IntNo == Intrinsic::x86_monitorx) { | ||||||
4690 | // Copy the other two operands to ECX and EDX. | ||||||
4691 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3), | ||||||
4692 | InFlag); | ||||||
4693 | InFlag = Chain.getValue(1); | ||||||
4694 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4), | ||||||
4695 | InFlag); | ||||||
4696 | InFlag = Chain.getValue(1); | ||||||
4697 | } | ||||||
4698 | |||||||
4699 | MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, | ||||||
4700 | { Chain, InFlag}); | ||||||
4701 | ReplaceNode(Node, CNode); | ||||||
4702 | return; | ||||||
4703 | } | ||||||
4704 | |||||||
4705 | break; | ||||||
4706 | } | ||||||
4707 | case Intrinsic::x86_tilestored64_internal: { | ||||||
4708 | unsigned Opc = X86::PTILESTOREDV; | ||||||
4709 | // _tile_stored_internal(row, col, buf, STRIDE, c) | ||||||
4710 | SDValue Base = Node->getOperand(4); | ||||||
4711 | SDValue Scale = getI8Imm(1, dl); | ||||||
4712 | SDValue Index = Node->getOperand(5); | ||||||
4713 | SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); | ||||||
4714 | SDValue Segment = CurDAG->getRegister(0, MVT::i16); | ||||||
4715 | SDValue Chain = Node->getOperand(0); | ||||||
4716 | MachineSDNode *CNode; | ||||||
4717 | SDValue Ops[] = {Node->getOperand(2), | ||||||
4718 | Node->getOperand(3), | ||||||
4719 | Base, | ||||||
4720 | Scale, | ||||||
4721 | Index, | ||||||
4722 | Disp, | ||||||
4723 | Segment, | ||||||
4724 | Node->getOperand(6), | ||||||
4725 | Chain}; | ||||||
4726 | CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); | ||||||
4727 | ReplaceNode(Node, CNode); | ||||||
4728 | return; | ||||||
4729 | } | ||||||
4730 | case Intrinsic::x86_tileloadd64: | ||||||
4731 | case Intrinsic::x86_tileloaddt164: | ||||||
4732 | case Intrinsic::x86_tilestored64: { | ||||||
4733 | if (!Subtarget->hasAMXTILE()) | ||||||
4734 | break; | ||||||
4735 | unsigned Opc; | ||||||
4736 | switch (IntNo) { | ||||||
4737 | default: llvm_unreachable("Unexpected intrinsic!")::llvm::llvm_unreachable_internal("Unexpected intrinsic!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4737); | ||||||
4738 | case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD; break; | ||||||
4739 | case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break; | ||||||
4740 | case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED; break; | ||||||
4741 | } | ||||||
4742 | // FIXME: Match displacement and scale. | ||||||
4743 | unsigned TIndex = Node->getConstantOperandVal(2); | ||||||
4744 | SDValue TReg = getI8Imm(TIndex, dl); | ||||||
4745 | SDValue Base = Node->getOperand(3); | ||||||
4746 | SDValue Scale = getI8Imm(1, dl); | ||||||
4747 | SDValue Index = Node->getOperand(4); | ||||||
4748 | SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); | ||||||
4749 | SDValue Segment = CurDAG->getRegister(0, MVT::i16); | ||||||
4750 | SDValue Chain = Node->getOperand(0); | ||||||
4751 | MachineSDNode *CNode; | ||||||
4752 | if (Opc == X86::PTILESTORED) { | ||||||
4753 | SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain }; | ||||||
4754 | CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); | ||||||
4755 | } else { | ||||||
4756 | SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain }; | ||||||
4757 | CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); | ||||||
4758 | } | ||||||
4759 | ReplaceNode(Node, CNode); | ||||||
4760 | return; | ||||||
4761 | } | ||||||
4762 | } | ||||||
4763 | break; | ||||||
4764 | } | ||||||
4765 | case ISD::BRIND: | ||||||
4766 | case X86ISD::NT_BRIND: { | ||||||
4767 | if (Subtarget->isTargetNaCl()) | ||||||
4768 | // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We | ||||||
4769 | // leave the instruction alone. | ||||||
4770 | break; | ||||||
4771 | if (Subtarget->isTarget64BitILP32()) { | ||||||
4772 | // Converts a 32-bit register to a 64-bit, zero-extended version of | ||||||
4773 | // it. This is needed because x86-64 can do many things, but jmp %r32 | ||||||
4774 | // ain't one of them. | ||||||
4775 | SDValue Target = Node->getOperand(1); | ||||||
4776 | assert(Target.getValueType() == MVT::i32 && "Unexpected VT!")(static_cast <bool> (Target.getValueType() == MVT::i32 && "Unexpected VT!") ? void (0) : __assert_fail ("Target.getValueType() == MVT::i32 && \"Unexpected VT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4776, __extension__ __PRETTY_FUNCTION__)); | ||||||
4777 | SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64); | ||||||
4778 | SDValue Brind = CurDAG->getNode(Opcode, dl, MVT::Other, | ||||||
4779 | Node->getOperand(0), ZextTarget); | ||||||
4780 | ReplaceNode(Node, Brind.getNode()); | ||||||
4781 | SelectCode(ZextTarget.getNode()); | ||||||
4782 | SelectCode(Brind.getNode()); | ||||||
4783 | return; | ||||||
4784 | } | ||||||
4785 | break; | ||||||
4786 | } | ||||||
4787 | case X86ISD::GlobalBaseReg: | ||||||
4788 | ReplaceNode(Node, getGlobalBaseReg()); | ||||||
4789 | return; | ||||||
4790 | |||||||
4791 | case ISD::BITCAST: | ||||||
4792 | // Just drop all 128/256/512-bit bitcasts. | ||||||
4793 | if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() || | ||||||
4794 | NVT == MVT::f128) { | ||||||
4795 | ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); | ||||||
4796 | CurDAG->RemoveDeadNode(Node); | ||||||
4797 | return; | ||||||
4798 | } | ||||||
4799 | break; | ||||||
4800 | |||||||
4801 | case ISD::SRL: | ||||||
4802 | if (matchBitExtract(Node)) | ||||||
4803 | return; | ||||||
4804 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
4805 | case ISD::SRA: | ||||||
4806 | case ISD::SHL: | ||||||
4807 | if (tryShiftAmountMod(Node)) | ||||||
4808 | return; | ||||||
4809 | break; | ||||||
4810 | |||||||
4811 | case X86ISD::VPTERNLOG: { | ||||||
4812 | uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue(); | ||||||
4813 | if (matchVPTERNLOG(Node, Node, Node, Node->getOperand(0), | ||||||
4814 | Node->getOperand(1), Node->getOperand(2), Imm)) | ||||||
4815 | return; | ||||||
4816 | break; | ||||||
4817 | } | ||||||
4818 | |||||||
4819 | case X86ISD::ANDNP: | ||||||
4820 | if (tryVPTERNLOG(Node)) | ||||||
4821 | return; | ||||||
4822 | break; | ||||||
4823 | |||||||
4824 | case ISD::AND: | ||||||
4825 | if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) { | ||||||
4826 | // Try to form a masked VPTESTM. Operands can be in either order. | ||||||
4827 | SDValue N0 = Node->getOperand(0); | ||||||
4828 | SDValue N1 = Node->getOperand(1); | ||||||
4829 | if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() && | ||||||
4830 | tryVPTESTM(Node, N0, N1)) | ||||||
4831 | return; | ||||||
4832 | if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && | ||||||
4833 | tryVPTESTM(Node, N1, N0)) | ||||||
4834 | return; | ||||||
4835 | } | ||||||
4836 | |||||||
4837 | if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) { | ||||||
4838 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); | ||||||
4839 | CurDAG->RemoveDeadNode(Node); | ||||||
4840 | return; | ||||||
4841 | } | ||||||
4842 | if (matchBitExtract(Node)) | ||||||
4843 | return; | ||||||
4844 | if (AndImmShrink && shrinkAndImmediate(Node)) | ||||||
4845 | return; | ||||||
4846 | |||||||
4847 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
4848 | case ISD::OR: | ||||||
4849 | case ISD::XOR: | ||||||
4850 | if (tryShrinkShlLogicImm(Node)) | ||||||
4851 | return; | ||||||
4852 | if (Opcode == ISD::OR && tryMatchBitSelect(Node)) | ||||||
4853 | return; | ||||||
4854 | if (tryVPTERNLOG(Node)) | ||||||
4855 | return; | ||||||
4856 | |||||||
4857 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
4858 | case ISD::ADD: | ||||||
4859 | case ISD::SUB: { | ||||||
4860 | // Try to avoid folding immediates with multiple uses for optsize. | ||||||
4861 | // This code tries to select to register form directly to avoid going | ||||||
4862 | // through the isel table which might fold the immediate. We can't change | ||||||
4863 | // the patterns on the add/sub/and/or/xor with immediate paterns in the | ||||||
4864 | // tablegen files to check immediate use count without making the patterns | ||||||
4865 | // unavailable to the fast-isel table. | ||||||
4866 | if (!CurDAG->shouldOptForSize()) | ||||||
4867 | break; | ||||||
4868 | |||||||
4869 | // Only handle i8/i16/i32/i64. | ||||||
4870 | if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64) | ||||||
4871 | break; | ||||||
4872 | |||||||
4873 | SDValue N0 = Node->getOperand(0); | ||||||
4874 | SDValue N1 = Node->getOperand(1); | ||||||
4875 | |||||||
4876 | ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); | ||||||
4877 | if (!Cst) | ||||||
4878 | break; | ||||||
4879 | |||||||
4880 | int64_t Val = Cst->getSExtValue(); | ||||||
4881 | |||||||
4882 | // Make sure its an immediate that is considered foldable. | ||||||
4883 | // FIXME: Handle unsigned 32 bit immediates for 64-bit AND. | ||||||
4884 | if (!isInt<8>(Val) && !isInt<32>(Val)) | ||||||
4885 | break; | ||||||
4886 | |||||||
4887 | // If this can match to INC/DEC, let it go. | ||||||
4888 | if (Opcode == ISD::ADD && (Val == 1 || Val == -1)) | ||||||
4889 | break; | ||||||
4890 | |||||||
4891 | // Check if we should avoid folding this immediate. | ||||||
4892 | if (!shouldAvoidImmediateInstFormsForSize(N1.getNode())) | ||||||
4893 | break; | ||||||
4894 | |||||||
4895 | // We should not fold the immediate. So we need a register form instead. | ||||||
4896 | unsigned ROpc, MOpc; | ||||||
4897 | switch (NVT.SimpleTy) { | ||||||
4898 | default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4898); | ||||||
4899 | case MVT::i8: | ||||||
4900 | switch (Opcode) { | ||||||
4901 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4901); | ||||||
4902 | case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break; | ||||||
4903 | case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break; | ||||||
4904 | case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break; | ||||||
4905 | case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break; | ||||||
4906 | case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break; | ||||||
4907 | } | ||||||
4908 | break; | ||||||
4909 | case MVT::i16: | ||||||
4910 | switch (Opcode) { | ||||||
4911 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4911); | ||||||
4912 | case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break; | ||||||
4913 | case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break; | ||||||
4914 | case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break; | ||||||
4915 | case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break; | ||||||
4916 | case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break; | ||||||
4917 | } | ||||||
4918 | break; | ||||||
4919 | case MVT::i32: | ||||||
4920 | switch (Opcode) { | ||||||
4921 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4921); | ||||||
4922 | case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break; | ||||||
4923 | case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break; | ||||||
4924 | case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break; | ||||||
4925 | case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break; | ||||||
4926 | case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break; | ||||||
4927 | } | ||||||
4928 | break; | ||||||
4929 | case MVT::i64: | ||||||
4930 | switch (Opcode) { | ||||||
4931 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4931); | ||||||
4932 | case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break; | ||||||
4933 | case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break; | ||||||
4934 | case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break; | ||||||
4935 | case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break; | ||||||
4936 | case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break; | ||||||
4937 | } | ||||||
4938 | break; | ||||||
4939 | } | ||||||
4940 | |||||||
4941 | // Ok this is a AND/OR/XOR/ADD/SUB with constant. | ||||||
4942 | |||||||
4943 | // If this is a not a subtract, we can still try to fold a load. | ||||||
4944 | if (Opcode != ISD::SUB) { | ||||||
4945 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
4946 | if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
4947 | SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; | ||||||
4948 | SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); | ||||||
4949 | MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
4950 | // Update the chain. | ||||||
4951 | ReplaceUses(N0.getValue(1), SDValue(CNode, 2)); | ||||||
4952 | // Record the mem-refs | ||||||
4953 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()}); | ||||||
4954 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); | ||||||
4955 | CurDAG->RemoveDeadNode(Node); | ||||||
4956 | return; | ||||||
4957 | } | ||||||
4958 | } | ||||||
4959 | |||||||
4960 | CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1); | ||||||
4961 | return; | ||||||
4962 | } | ||||||
4963 | |||||||
4964 | case X86ISD::SMUL: | ||||||
4965 | // i16/i32/i64 are handled with isel patterns. | ||||||
4966 | if (NVT != MVT::i8) | ||||||
4967 | break; | ||||||
4968 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
4969 | case X86ISD::UMUL: { | ||||||
4970 | SDValue N0 = Node->getOperand(0); | ||||||
4971 | SDValue N1 = Node->getOperand(1); | ||||||
4972 | |||||||
4973 | unsigned LoReg, ROpc, MOpc; | ||||||
4974 | switch (NVT.SimpleTy) { | ||||||
4975 | default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4975); | ||||||
4976 | case MVT::i8: | ||||||
4977 | LoReg = X86::AL; | ||||||
4978 | ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r; | ||||||
4979 | MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m; | ||||||
4980 | break; | ||||||
4981 | case MVT::i16: | ||||||
4982 | LoReg = X86::AX; | ||||||
4983 | ROpc = X86::MUL16r; | ||||||
4984 | MOpc = X86::MUL16m; | ||||||
4985 | break; | ||||||
4986 | case MVT::i32: | ||||||
4987 | LoReg = X86::EAX; | ||||||
4988 | ROpc = X86::MUL32r; | ||||||
4989 | MOpc = X86::MUL32m; | ||||||
4990 | break; | ||||||
4991 | case MVT::i64: | ||||||
4992 | LoReg = X86::RAX; | ||||||
4993 | ROpc = X86::MUL64r; | ||||||
4994 | MOpc = X86::MUL64m; | ||||||
4995 | break; | ||||||
4996 | } | ||||||
4997 | |||||||
4998 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
4999 | bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
5000 | // Multiply is commutative. | ||||||
5001 | if (!FoldedLoad) { | ||||||
5002 | FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
5003 | if (FoldedLoad) | ||||||
5004 | std::swap(N0, N1); | ||||||
5005 | } | ||||||
5006 | |||||||
5007 | SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, | ||||||
5008 | N0, SDValue()).getValue(1); | ||||||
5009 | |||||||
5010 | MachineSDNode *CNode; | ||||||
5011 | if (FoldedLoad) { | ||||||
5012 | // i16/i32/i64 use an instruction that produces a low and high result even | ||||||
5013 | // though only the low result is used. | ||||||
5014 | SDVTList VTs; | ||||||
5015 | if (NVT == MVT::i8) | ||||||
5016 | VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); | ||||||
5017 | else | ||||||
5018 | VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other); | ||||||
5019 | |||||||
5020 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), | ||||||
5021 | InFlag }; | ||||||
5022 | CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
5023 | |||||||
5024 | // Update the chain. | ||||||
5025 | ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3)); | ||||||
5026 | // Record the mem-refs | ||||||
5027 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); | ||||||
5028 | } else { | ||||||
5029 | // i16/i32/i64 use an instruction that produces a low and high result even | ||||||
5030 | // though only the low result is used. | ||||||
5031 | SDVTList VTs; | ||||||
5032 | if (NVT == MVT::i8) | ||||||
5033 | VTs = CurDAG->getVTList(NVT, MVT::i32); | ||||||
5034 | else | ||||||
5035 | VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); | ||||||
5036 | |||||||
5037 | CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag}); | ||||||
5038 | } | ||||||
5039 | |||||||
5040 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); | ||||||
5041 | ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2)); | ||||||
5042 | CurDAG->RemoveDeadNode(Node); | ||||||
5043 | return; | ||||||
5044 | } | ||||||
5045 | |||||||
5046 | case ISD::SMUL_LOHI: | ||||||
5047 | case ISD::UMUL_LOHI: { | ||||||
5048 | SDValue N0 = Node->getOperand(0); | ||||||
5049 | SDValue N1 = Node->getOperand(1); | ||||||
5050 | |||||||
5051 | unsigned Opc, MOpc; | ||||||
5052 | unsigned LoReg, HiReg; | ||||||
5053 | bool IsSigned = Opcode == ISD::SMUL_LOHI; | ||||||
5054 | bool UseMULX = !IsSigned && Subtarget->hasBMI2(); | ||||||
5055 | bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty(); | ||||||
5056 | switch (NVT.SimpleTy) { | ||||||
5057 | default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5057); | ||||||
5058 | case MVT::i32: | ||||||
5059 | Opc = UseMULXHi ? X86::MULX32Hrr : | ||||||
5060 | UseMULX ? X86::MULX32rr : | ||||||
5061 | IsSigned ? X86::IMUL32r : X86::MUL32r; | ||||||
5062 | MOpc = UseMULXHi ? X86::MULX32Hrm : | ||||||
5063 | UseMULX ? X86::MULX32rm : | ||||||
5064 | IsSigned ? X86::IMUL32m : X86::MUL32m; | ||||||
5065 | LoReg = UseMULX ? X86::EDX : X86::EAX; | ||||||
5066 | HiReg = X86::EDX; | ||||||
5067 | break; | ||||||
5068 | case MVT::i64: | ||||||
5069 | Opc = UseMULXHi ? X86::MULX64Hrr : | ||||||
5070 | UseMULX ? X86::MULX64rr : | ||||||
5071 | IsSigned ? X86::IMUL64r : X86::MUL64r; | ||||||
5072 | MOpc = UseMULXHi ? X86::MULX64Hrm : | ||||||
5073 | UseMULX ? X86::MULX64rm : | ||||||
5074 | IsSigned ? X86::IMUL64m : X86::MUL64m; | ||||||
5075 | LoReg = UseMULX ? X86::RDX : X86::RAX; | ||||||
5076 | HiReg = X86::RDX; | ||||||
5077 | break; | ||||||
5078 | } | ||||||
5079 | |||||||
5080 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
5081 | bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
5082 | // Multiply is commmutative. | ||||||
5083 | if (!foldedLoad) { | ||||||
5084 | foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
5085 | if (foldedLoad) | ||||||
5086 | std::swap(N0, N1); | ||||||
5087 | } | ||||||
5088 | |||||||
5089 | SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, | ||||||
5090 | N0, SDValue()).getValue(1); | ||||||
5091 | SDValue ResHi, ResLo; | ||||||
5092 | if (foldedLoad) { | ||||||
5093 | SDValue Chain; | ||||||
5094 | MachineSDNode *CNode = nullptr; | ||||||
5095 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), | ||||||
5096 | InFlag }; | ||||||
5097 | if (UseMULXHi) { | ||||||
5098 | SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other); | ||||||
5099 | CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
5100 | ResHi = SDValue(CNode, 0); | ||||||
5101 | Chain = SDValue(CNode, 1); | ||||||
5102 | } else if (UseMULX) { | ||||||
5103 | SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other); | ||||||
5104 | CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
5105 | ResHi = SDValue(CNode, 0); | ||||||
5106 | ResLo = SDValue(CNode, 1); | ||||||
5107 | Chain = SDValue(CNode, 2); | ||||||
5108 | } else { | ||||||
5109 | SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); | ||||||
5110 | CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
5111 | Chain = SDValue(CNode, 0); | ||||||
5112 | InFlag = SDValue(CNode, 1); | ||||||
5113 | } | ||||||
5114 | |||||||
5115 | // Update the chain. | ||||||
5116 | ReplaceUses(N1.getValue(1), Chain); | ||||||
5117 | // Record the mem-refs | ||||||
5118 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); | ||||||
5119 | } else { | ||||||
5120 | SDValue Ops[] = { N1, InFlag }; | ||||||
5121 | if (UseMULXHi) { | ||||||
5122 | SDVTList VTs = CurDAG->getVTList(NVT); | ||||||
5123 | SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); | ||||||
5124 | ResHi = SDValue(CNode, 0); | ||||||
5125 | } else if (UseMULX) { | ||||||
5126 | SDVTList VTs = CurDAG->getVTList(NVT, NVT); | ||||||
5127 | SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); | ||||||
5128 | ResHi = SDValue(CNode, 0); | ||||||
5129 | ResLo = SDValue(CNode, 1); | ||||||
5130 | } else { | ||||||
5131 | SDVTList VTs = CurDAG->getVTList(MVT::Glue); | ||||||
5132 | SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); | ||||||
5133 | InFlag = SDValue(CNode, 0); | ||||||
5134 | } | ||||||
5135 | } | ||||||
5136 | |||||||
5137 | // Copy the low half of the result, if it is needed. | ||||||
5138 | if (!SDValue(Node, 0).use_empty()) { | ||||||
5139 | if (!ResLo) { | ||||||
5140 | assert(LoReg && "Register for low half is not defined!")(static_cast <bool> (LoReg && "Register for low half is not defined!" ) ? void (0) : __assert_fail ("LoReg && \"Register for low half is not defined!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5140, __extension__ __PRETTY_FUNCTION__)); | ||||||
5141 | ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, | ||||||
5142 | NVT, InFlag); | ||||||
5143 | InFlag = ResLo.getValue(2); | ||||||
5144 | } | ||||||
5145 | ReplaceUses(SDValue(Node, 0), ResLo); | ||||||
5146 | LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; ResLo.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false) | ||||||
5147 | dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; ResLo.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
5148 | } | ||||||
5149 | // Copy the high half of the result, if it is needed. | ||||||
5150 | if (!SDValue(Node, 1).use_empty()) { | ||||||
5151 | if (!ResHi) { | ||||||
5152 | assert(HiReg && "Register for high half is not defined!")(static_cast <bool> (HiReg && "Register for high half is not defined!" ) ? void (0) : __assert_fail ("HiReg && \"Register for high half is not defined!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5152, __extension__ __PRETTY_FUNCTION__)); | ||||||
5153 | ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, | ||||||
5154 | NVT, InFlag); | ||||||
5155 | InFlag = ResHi.getValue(2); | ||||||
5156 | } | ||||||
5157 | ReplaceUses(SDValue(Node, 1), ResHi); | ||||||
5158 | LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; ResHi.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false) | ||||||
5159 | dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; ResHi.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
5160 | } | ||||||
5161 | |||||||
5162 | CurDAG->RemoveDeadNode(Node); | ||||||
5163 | return; | ||||||
5164 | } | ||||||
5165 | |||||||
5166 | case ISD::SDIVREM: | ||||||
5167 | case ISD::UDIVREM: { | ||||||
5168 | SDValue N0 = Node->getOperand(0); | ||||||
5169 | SDValue N1 = Node->getOperand(1); | ||||||
5170 | |||||||
5171 | unsigned ROpc, MOpc; | ||||||
5172 | bool isSigned = Opcode == ISD::SDIVREM; | ||||||
5173 | if (!isSigned) { | ||||||
5174 | switch (NVT.SimpleTy) { | ||||||
5175 | default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5175); | ||||||
5176 | case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break; | ||||||
5177 | case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break; | ||||||
5178 | case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break; | ||||||
5179 | case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break; | ||||||
5180 | } | ||||||
5181 | } else { | ||||||
5182 | switch (NVT.SimpleTy) { | ||||||
5183 | default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5183); | ||||||
5184 | case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break; | ||||||
5185 | case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break; | ||||||
5186 | case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break; | ||||||
5187 | case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break; | ||||||
5188 | } | ||||||
5189 | } | ||||||
5190 | |||||||
5191 | unsigned LoReg, HiReg, ClrReg; | ||||||
5192 | unsigned SExtOpcode; | ||||||
5193 | switch (NVT.SimpleTy) { | ||||||
5194 | default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5194); | ||||||
5195 | case MVT::i8: | ||||||
5196 | LoReg = X86::AL; ClrReg = HiReg = X86::AH; | ||||||
5197 | SExtOpcode = 0; // Not used. | ||||||
5198 | break; | ||||||
5199 | case MVT::i16: | ||||||
5200 | LoReg = X86::AX; HiReg = X86::DX; | ||||||
5201 | ClrReg = X86::DX; | ||||||
5202 | SExtOpcode = X86::CWD; | ||||||
5203 | break; | ||||||
5204 | case MVT::i32: | ||||||
5205 | LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; | ||||||
5206 | SExtOpcode = X86::CDQ; | ||||||
5207 | break; | ||||||
5208 | case MVT::i64: | ||||||
5209 | LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; | ||||||
5210 | SExtOpcode = X86::CQO; | ||||||
5211 | break; | ||||||
5212 | } | ||||||
5213 | |||||||
5214 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
5215 | bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
5216 | bool signBitIsZero = CurDAG->SignBitIsZero(N0); | ||||||
5217 | |||||||
5218 | SDValue InFlag; | ||||||
5219 | if (NVT == MVT::i8) { | ||||||
5220 | // Special case for div8, just use a move with zero extension to AX to | ||||||
5221 | // clear the upper 8 bits (AH). | ||||||
5222 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain; | ||||||
5223 | MachineSDNode *Move; | ||||||
5224 | if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
5225 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; | ||||||
5226 | unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8 | ||||||
5227 | : X86::MOVZX16rm8; | ||||||
5228 | Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops); | ||||||
5229 | Chain = SDValue(Move, 1); | ||||||
5230 | ReplaceUses(N0.getValue(1), Chain); | ||||||
5231 | // Record the mem-refs | ||||||
5232 | CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()}); | ||||||
5233 | } else { | ||||||
5234 | unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8 | ||||||
5235 | : X86::MOVZX16rr8; | ||||||
5236 | Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0); | ||||||
5237 | Chain = CurDAG->getEntryNode(); | ||||||
5238 | } | ||||||
5239 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0), | ||||||
5240 | SDValue()); | ||||||
5241 | InFlag = Chain.getValue(1); | ||||||
5242 | } else { | ||||||
5243 | InFlag = | ||||||
5244 | CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, | ||||||
5245 | LoReg, N0, SDValue()).getValue(1); | ||||||
5246 | if (isSigned && !signBitIsZero) { | ||||||
5247 | // Sign extend the low part into the high part. | ||||||
5248 | InFlag = | ||||||
5249 | SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); | ||||||
5250 | } else { | ||||||
5251 | // Zero out the high part, effectively zero extending the input. | ||||||
5252 | SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); | ||||||
5253 | SDValue ClrNode = | ||||||
5254 | SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0); | ||||||
5255 | switch (NVT.SimpleTy) { | ||||||
5256 | case MVT::i16: | ||||||
5257 | ClrNode = | ||||||
5258 | SDValue(CurDAG->getMachineNode( | ||||||
5259 | TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, | ||||||
5260 | CurDAG->getTargetConstant(X86::sub_16bit, dl, | ||||||
5261 | MVT::i32)), | ||||||
5262 | 0); | ||||||
5263 | break; | ||||||
5264 | case MVT::i32: | ||||||
5265 | break; | ||||||
5266 | case MVT::i64: | ||||||
5267 | ClrNode = | ||||||
5268 | SDValue(CurDAG->getMachineNode( | ||||||
5269 | TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, | ||||||
5270 | CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode, | ||||||
5271 | CurDAG->getTargetConstant(X86::sub_32bit, dl, | ||||||
5272 | MVT::i32)), | ||||||
5273 | 0); | ||||||
5274 | break; | ||||||
5275 | default: | ||||||
5276 | llvm_unreachable("Unexpected division source")::llvm::llvm_unreachable_internal("Unexpected division source" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5276); | ||||||
5277 | } | ||||||
5278 | |||||||
5279 | InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, | ||||||
5280 | ClrNode, InFlag).getValue(1); | ||||||
5281 | } | ||||||
5282 | } | ||||||
5283 | |||||||
5284 | if (foldedLoad) { | ||||||
5285 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), | ||||||
5286 | InFlag }; | ||||||
5287 | MachineSDNode *CNode = | ||||||
5288 | CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); | ||||||
5289 | InFlag = SDValue(CNode, 1); | ||||||
5290 | // Update the chain. | ||||||
5291 | ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); | ||||||
5292 | // Record the mem-refs | ||||||
5293 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); | ||||||
5294 | } else { | ||||||
5295 | InFlag = | ||||||
5296 | SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InFlag), 0); | ||||||
5297 | } | ||||||
5298 | |||||||
5299 | // Prevent use of AH in a REX instruction by explicitly copying it to | ||||||
5300 | // an ABCD_L register. | ||||||
5301 | // | ||||||
5302 | // The current assumption of the register allocator is that isel | ||||||
5303 | // won't generate explicit references to the GR8_ABCD_H registers. If | ||||||
5304 | // the allocator and/or the backend get enhanced to be more robust in | ||||||
5305 | // that regard, this can be, and should be, removed. | ||||||
5306 | if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) { | ||||||
5307 | SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8); | ||||||
5308 | unsigned AHExtOpcode = | ||||||
5309 | isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX; | ||||||
5310 | |||||||
5311 | SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32, | ||||||
5312 | MVT::Glue, AHCopy, InFlag); | ||||||
5313 | SDValue Result(RNode, 0); | ||||||
5314 | InFlag = SDValue(RNode, 1); | ||||||
5315 | |||||||
5316 | Result = | ||||||
5317 | CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); | ||||||
5318 | |||||||
5319 | ReplaceUses(SDValue(Node, 1), Result); | ||||||
5320 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false) | ||||||
5321 | dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
5322 | } | ||||||
5323 | // Copy the division (low) result, if it is needed. | ||||||
5324 | if (!SDValue(Node, 0).use_empty()) { | ||||||
5325 | SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, | ||||||
5326 | LoReg, NVT, InFlag); | ||||||
5327 | InFlag = Result.getValue(2); | ||||||
5328 | ReplaceUses(SDValue(Node, 0), Result); | ||||||
5329 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false) | ||||||
5330 | dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
5331 | } | ||||||
5332 | // Copy the remainder (high) result, if it is needed. | ||||||
5333 | if (!SDValue(Node, 1).use_empty()) { | ||||||
5334 | SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, | ||||||
5335 | HiReg, NVT, InFlag); | ||||||
5336 | InFlag = Result.getValue(2); | ||||||
5337 | ReplaceUses(SDValue(Node, 1), Result); | ||||||
5338 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false) | ||||||
5339 | dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
5340 | } | ||||||
5341 | CurDAG->RemoveDeadNode(Node); | ||||||
5342 | return; | ||||||
5343 | } | ||||||
5344 | |||||||
5345 | case X86ISD::FCMP: | ||||||
5346 | case X86ISD::STRICT_FCMP: | ||||||
5347 | case X86ISD::STRICT_FCMPS: { | ||||||
5348 | bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP || | ||||||
5349 | Node->getOpcode() == X86ISD::STRICT_FCMPS; | ||||||
5350 | SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0); | ||||||
5351 | SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1); | ||||||
5352 | |||||||
5353 | // Save the original VT of the compare. | ||||||
5354 | MVT CmpVT = N0.getSimpleValueType(); | ||||||
5355 | |||||||
5356 | // Floating point needs special handling if we don't have FCOMI. | ||||||
5357 | if (Subtarget->hasCMov()) | ||||||
5358 | break; | ||||||
5359 | |||||||
5360 | bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS; | ||||||
5361 | |||||||
5362 | unsigned Opc; | ||||||
5363 | switch (CmpVT.SimpleTy) { | ||||||
5364 | default: llvm_unreachable("Unexpected type!")::llvm::llvm_unreachable_internal("Unexpected type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5364); | ||||||
5365 | case MVT::f32: | ||||||
5366 | Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32; | ||||||
5367 | break; | ||||||
5368 | case MVT::f64: | ||||||
5369 | Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64; | ||||||
5370 | break; | ||||||
5371 | case MVT::f80: | ||||||
5372 | Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80; | ||||||
5373 | break; | ||||||
5374 | } | ||||||
5375 | |||||||
5376 | SDValue Chain = | ||||||
5377 | IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode(); | ||||||
5378 | SDValue Glue; | ||||||
5379 | if (IsStrictCmp) { | ||||||
5380 | SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); | ||||||
5381 | Chain = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0); | ||||||
5382 | Glue = Chain.getValue(1); | ||||||
5383 | } else { | ||||||
5384 | Glue = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N0, N1), 0); | ||||||
5385 | } | ||||||
5386 | |||||||
5387 | // Move FPSW to AX. | ||||||
5388 | SDValue FNSTSW = | ||||||
5389 | SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, Glue), 0); | ||||||
5390 | |||||||
5391 | // Extract upper 8-bits of AX. | ||||||
5392 | SDValue Extract = | ||||||
5393 | CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW); | ||||||
5394 | |||||||
5395 | // Move AH into flags. | ||||||
5396 | // Some 64-bit targets lack SAHF support, but they do support FCOMI. | ||||||
5397 | assert(Subtarget->hasLAHFSAHF() &&(static_cast <bool> (Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?") ? void (0) : __assert_fail ("Subtarget->hasLAHFSAHF() && \"Target doesn't support SAHF or FCOMI?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5398, __extension__ __PRETTY_FUNCTION__)) | ||||||
5398 | "Target doesn't support SAHF or FCOMI?")(static_cast <bool> (Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?") ? void (0) : __assert_fail ("Subtarget->hasLAHFSAHF() && \"Target doesn't support SAHF or FCOMI?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5398, __extension__ __PRETTY_FUNCTION__)); | ||||||
5399 | SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue()); | ||||||
5400 | Chain = AH; | ||||||
5401 | SDValue SAHF = SDValue( | ||||||
5402 | CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0); | ||||||
5403 | |||||||
5404 | if (IsStrictCmp) | ||||||
5405 | ReplaceUses(SDValue(Node, 1), Chain); | ||||||
5406 | |||||||
5407 | ReplaceUses(SDValue(Node, 0), SAHF); | ||||||
5408 | CurDAG->RemoveDeadNode(Node); | ||||||
5409 | return; | ||||||
5410 | } | ||||||
5411 | |||||||
5412 | case X86ISD::CMP: { | ||||||
5413 | SDValue N0 = Node->getOperand(0); | ||||||
5414 | SDValue N1 = Node->getOperand(1); | ||||||
5415 | |||||||
5416 | // Optimizations for TEST compares. | ||||||
5417 | if (!isNullConstant(N1)) | ||||||
5418 | break; | ||||||
5419 | |||||||
5420 | // Save the original VT of the compare. | ||||||
5421 | MVT CmpVT = N0.getSimpleValueType(); | ||||||
5422 | |||||||
5423 | // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed | ||||||
5424 | // by a test instruction. The test should be removed later by | ||||||
5425 | // analyzeCompare if we are using only the zero flag. | ||||||
5426 | // TODO: Should we check the users and use the BEXTR flags directly? | ||||||
5427 | if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { | ||||||
5428 | if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) { | ||||||
5429 | unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr | ||||||
5430 | : X86::TEST32rr; | ||||||
5431 | SDValue BEXTR = SDValue(NewNode, 0); | ||||||
5432 | NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR); | ||||||
5433 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); | ||||||
5434 | CurDAG->RemoveDeadNode(Node); | ||||||
5435 | return; | ||||||
5436 | } | ||||||
5437 | } | ||||||
5438 | |||||||
5439 | // We can peek through truncates, but we need to be careful below. | ||||||
5440 | if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) | ||||||
5441 | N0 = N0.getOperand(0); | ||||||
5442 | |||||||
5443 | // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to | ||||||
5444 | // use a smaller encoding. | ||||||
5445 | // Look past the truncate if CMP is the only use of it. | ||||||
5446 | if (N0.getOpcode() == ISD::AND && | ||||||
5447 | N0.getNode()->hasOneUse() && | ||||||
5448 | N0.getValueType() != MVT::i8) { | ||||||
5449 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); | ||||||
5450 | if (!C) break; | ||||||
5451 | uint64_t Mask = C->getZExtValue(); | ||||||
5452 | // We may have looked through a truncate so mask off any bits that | ||||||
5453 | // shouldn't be part of the compare. | ||||||
5454 | Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits()); | ||||||
5455 | |||||||
5456 | // Check if we can replace AND+IMM64 with a shift. This is possible for | ||||||
5457 | // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero | ||||||
5458 | // flag. | ||||||
5459 | if (CmpVT == MVT::i64 && !isInt<32>(Mask) && | ||||||
5460 | onlyUsesZeroFlag(SDValue(Node, 0))) { | ||||||
5461 | if (isMask_64(~Mask)) { | ||||||
5462 | unsigned TrailingZeros = countTrailingZeros(Mask); | ||||||
5463 | SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64); | ||||||
5464 | SDValue Shift = | ||||||
5465 | SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32, | ||||||
5466 | N0.getOperand(0), Imm), 0); | ||||||
5467 | MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, | ||||||
5468 | MVT::i32, Shift, Shift); | ||||||
5469 | ReplaceNode(Node, Test); | ||||||
5470 | return; | ||||||
5471 | } | ||||||
5472 | if (isMask_64(Mask)) { | ||||||
5473 | unsigned LeadingZeros = countLeadingZeros(Mask); | ||||||
5474 | SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64); | ||||||
5475 | SDValue Shift = | ||||||
5476 | SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32, | ||||||
5477 | N0.getOperand(0), Imm), 0); | ||||||
5478 | MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, | ||||||
5479 | MVT::i32, Shift, Shift); | ||||||
5480 | ReplaceNode(Node, Test); | ||||||
5481 | return; | ||||||
5482 | } | ||||||
5483 | } | ||||||
5484 | |||||||
5485 | MVT VT; | ||||||
5486 | int SubRegOp; | ||||||
5487 | unsigned ROpc, MOpc; | ||||||
5488 | |||||||
5489 | // For each of these checks we need to be careful if the sign flag is | ||||||
5490 | // being used. It is only safe to use the sign flag in two conditions, | ||||||
5491 | // either the sign bit in the shrunken mask is zero or the final test | ||||||
5492 | // size is equal to the original compare size. | ||||||
5493 | |||||||
5494 | if (isUInt<8>(Mask) && | ||||||
5495 | (!(Mask & 0x80) || CmpVT == MVT::i8 || | ||||||
5496 | hasNoSignFlagUses(SDValue(Node, 0)))) { | ||||||
5497 | // For example, convert "testl %eax, $8" to "testb %al, $8" | ||||||
5498 | VT = MVT::i8; | ||||||
5499 | SubRegOp = X86::sub_8bit; | ||||||
5500 | ROpc = X86::TEST8ri; | ||||||
5501 | MOpc = X86::TEST8mi; | ||||||
5502 | } else if (OptForMinSize && isUInt<16>(Mask) && | ||||||
5503 | (!(Mask & 0x8000) || CmpVT == MVT::i16 || | ||||||
5504 | hasNoSignFlagUses(SDValue(Node, 0)))) { | ||||||
5505 | // For example, "testl %eax, $32776" to "testw %ax, $32776". | ||||||
5506 | // NOTE: We only want to form TESTW instructions if optimizing for | ||||||
5507 | // min size. Otherwise we only save one byte and possibly get a length | ||||||
5508 | // changing prefix penalty in the decoders. | ||||||
5509 | VT = MVT::i16; | ||||||
5510 | SubRegOp = X86::sub_16bit; | ||||||
5511 | ROpc = X86::TEST16ri; | ||||||
5512 | MOpc = X86::TEST16mi; | ||||||
5513 | } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 && | ||||||
5514 | ((!(Mask & 0x80000000) && | ||||||
5515 | // Without minsize 16-bit Cmps can get here so we need to | ||||||
5516 | // be sure we calculate the correct sign flag if needed. | ||||||
5517 | (CmpVT != MVT::i16 || !(Mask & 0x8000))) || | ||||||
5518 | CmpVT == MVT::i32 || | ||||||
5519 | hasNoSignFlagUses(SDValue(Node, 0)))) { | ||||||
5520 | // For example, "testq %rax, $268468232" to "testl %eax, $268468232". | ||||||
5521 | // NOTE: We only want to run that transform if N0 is 32 or 64 bits. | ||||||
5522 | // Otherwize, we find ourselves in a position where we have to do | ||||||
5523 | // promotion. If previous passes did not promote the and, we assume | ||||||
5524 | // they had a good reason not to and do not promote here. | ||||||
5525 | VT = MVT::i32; | ||||||
5526 | SubRegOp = X86::sub_32bit; | ||||||
5527 | ROpc = X86::TEST32ri; | ||||||
5528 | MOpc = X86::TEST32mi; | ||||||
5529 | } else { | ||||||
5530 | // No eligible transformation was found. | ||||||
5531 | break; | ||||||
5532 | } | ||||||
5533 | |||||||
5534 | SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT); | ||||||
5535 | SDValue Reg = N0.getOperand(0); | ||||||
5536 | |||||||
5537 | // Emit a testl or testw. | ||||||
5538 | MachineSDNode *NewNode; | ||||||
5539 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
5540 | if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
5541 | if (auto *LoadN = dyn_cast<LoadSDNode>(N0.getOperand(0).getNode())) { | ||||||
5542 | if (!LoadN->isSimple()) { | ||||||
5543 | unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits(); | ||||||
5544 | if ((MOpc == X86::TEST8mi && NumVolBits != 8) || | ||||||
5545 | (MOpc == X86::TEST16mi && NumVolBits != 16) || | ||||||
5546 | (MOpc == X86::TEST32mi && NumVolBits != 32)) | ||||||
5547 | break; | ||||||
5548 | } | ||||||
5549 | } | ||||||
5550 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, | ||||||
5551 | Reg.getOperand(0) }; | ||||||
5552 | NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops); | ||||||
5553 | // Update the chain. | ||||||
5554 | ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1)); | ||||||
5555 | // Record the mem-refs | ||||||
5556 | CurDAG->setNodeMemRefs(NewNode, | ||||||
5557 | {cast<LoadSDNode>(Reg)->getMemOperand()}); | ||||||
5558 | } else { | ||||||
5559 | // Extract the subregister if necessary. | ||||||
5560 | if (N0.getValueType() != VT) | ||||||
5561 | Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg); | ||||||
5562 | |||||||
5563 | NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm); | ||||||
5564 | } | ||||||
5565 | // Replace CMP with TEST. | ||||||
5566 | ReplaceNode(Node, NewNode); | ||||||
5567 | return; | ||||||
5568 | } | ||||||
5569 | break; | ||||||
5570 | } | ||||||
5571 | case X86ISD::PCMPISTR: { | ||||||
5572 | if (!Subtarget->hasSSE42()) | ||||||
5573 | break; | ||||||
5574 | |||||||
5575 | bool NeedIndex = !SDValue(Node, 0).use_empty(); | ||||||
5576 | bool NeedMask = !SDValue(Node, 1).use_empty(); | ||||||
5577 | // We can't fold a load if we are going to make two instructions. | ||||||
5578 | bool MayFoldLoad = !NeedIndex || !NeedMask; | ||||||
5579 | |||||||
5580 | MachineSDNode *CNode; | ||||||
5581 | if (NeedMask) { | ||||||
5582 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr; | ||||||
5583 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm; | ||||||
5584 | CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node); | ||||||
5585 | ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0)); | ||||||
5586 | } | ||||||
5587 | if (NeedIndex || !NeedMask) { | ||||||
5588 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr; | ||||||
5589 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm; | ||||||
5590 | CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node); | ||||||
5591 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); | ||||||
5592 | } | ||||||
5593 | |||||||
5594 | // Connect the flag usage to the last instruction created. | ||||||
5595 | ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1)); | ||||||
5596 | CurDAG->RemoveDeadNode(Node); | ||||||
5597 | return; | ||||||
5598 | } | ||||||
5599 | case X86ISD::PCMPESTR: { | ||||||
5600 | if (!Subtarget->hasSSE42()) | ||||||
5601 | break; | ||||||
5602 | |||||||
5603 | // Copy the two implicit register inputs. | ||||||
5604 | SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX, | ||||||
5605 | Node->getOperand(1), | ||||||
5606 | SDValue()).getValue(1); | ||||||
5607 | InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, | ||||||
5608 | Node->getOperand(3), InFlag).getValue(1); | ||||||
5609 | |||||||
5610 | bool NeedIndex = !SDValue(Node, 0).use_empty(); | ||||||
5611 | bool NeedMask = !SDValue(Node, 1).use_empty(); | ||||||
5612 | // We can't fold a load if we are going to make two instructions. | ||||||
5613 | bool MayFoldLoad = !NeedIndex || !NeedMask; | ||||||
5614 | |||||||
5615 | MachineSDNode *CNode; | ||||||
5616 | if (NeedMask) { | ||||||
5617 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr; | ||||||
5618 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm; | ||||||
5619 | CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node, | ||||||
5620 | InFlag); | ||||||
5621 | ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0)); | ||||||
5622 | } | ||||||
5623 | if (NeedIndex || !NeedMask) { | ||||||
5624 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr; | ||||||
5625 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm; | ||||||
5626 | CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag); | ||||||
5627 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); | ||||||
5628 | } | ||||||
5629 | // Connect the flag usage to the last instruction created. | ||||||
5630 | ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1)); | ||||||
5631 | CurDAG->RemoveDeadNode(Node); | ||||||
5632 | return; | ||||||
5633 | } | ||||||
5634 | |||||||
5635 | case ISD::SETCC: { | ||||||
5636 | if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue())) | ||||||
5637 | return; | ||||||
5638 | |||||||
5639 | break; | ||||||
5640 | } | ||||||
5641 | |||||||
5642 | case ISD::STORE: | ||||||
5643 | if (foldLoadStoreIntoMemOperand(Node)) | ||||||
5644 | return; | ||||||
5645 | break; | ||||||
5646 | |||||||
5647 | case X86ISD::SETCC_CARRY: { | ||||||
5648 | // We have to do this manually because tblgen will put the eflags copy in | ||||||
5649 | // the wrong place if we use an extract_subreg in the pattern. | ||||||
5650 | MVT VT = Node->getSimpleValueType(0); | ||||||
5651 | |||||||
5652 | // Copy flags to the EFLAGS register and glue it to next node. | ||||||
5653 | SDValue EFLAGS = | ||||||
5654 | CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, | ||||||
5655 | Node->getOperand(1), SDValue()); | ||||||
5656 | |||||||
5657 | // Create a 64-bit instruction if the result is 64-bits otherwise use the | ||||||
5658 | // 32-bit version. | ||||||
5659 | unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r; | ||||||
5660 | MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; | ||||||
5661 | SDValue Result = SDValue( | ||||||
5662 | CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0); | ||||||
5663 | |||||||
5664 | // For less than 32-bits we need to extract from the 32-bit node. | ||||||
5665 | if (VT == MVT::i8 || VT == MVT::i16) { | ||||||
5666 | int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; | ||||||
5667 | Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); | ||||||
5668 | } | ||||||
5669 | |||||||
5670 | ReplaceUses(SDValue(Node, 0), Result); | ||||||
5671 | CurDAG->RemoveDeadNode(Node); | ||||||
5672 | return; | ||||||
5673 | } | ||||||
5674 | case X86ISD::SBB: { | ||||||
5675 | if (isNullConstant(Node->getOperand(0)) && | ||||||
5676 | isNullConstant(Node->getOperand(1))) { | ||||||
5677 | MVT VT = Node->getSimpleValueType(0); | ||||||
5678 | |||||||
5679 | // Create zero. | ||||||
5680 | SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); | ||||||
5681 | SDValue Zero = | ||||||
5682 | SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0); | ||||||
5683 | if (VT == MVT::i64) { | ||||||
5684 | Zero = SDValue( | ||||||
5685 | CurDAG->getMachineNode( | ||||||
5686 | TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, | ||||||
5687 | CurDAG->getTargetConstant(0, dl, MVT::i64), Zero, | ||||||
5688 | CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)), | ||||||
5689 | 0); | ||||||
5690 | } | ||||||
5691 | |||||||
5692 | // Copy flags to the EFLAGS register and glue it to next node. | ||||||
5693 | SDValue EFLAGS = | ||||||
5694 | CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, | ||||||
5695 | Node->getOperand(2), SDValue()); | ||||||
5696 | |||||||
5697 | // Create a 64-bit instruction if the result is 64-bits otherwise use the | ||||||
5698 | // 32-bit version. | ||||||
5699 | unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr; | ||||||
5700 | MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; | ||||||
5701 | VTs = CurDAG->getVTList(SBBVT, MVT::i32); | ||||||
5702 | SDValue Result = | ||||||
5703 | SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {Zero, Zero, EFLAGS, | ||||||
5704 | EFLAGS.getValue(1)}), | ||||||
5705 | 0); | ||||||
5706 | |||||||
5707 | // Replace the flag use. | ||||||
5708 | ReplaceUses(SDValue(Node, 1), Result.getValue(1)); | ||||||
5709 | |||||||
5710 | // Replace the result use. | ||||||
5711 | if (!SDValue(Node, 0).use_empty()) { | ||||||
5712 | // For less than 32-bits we need to extract from the 32-bit node. | ||||||
5713 | if (VT == MVT::i8 || VT == MVT::i16) { | ||||||
5714 | int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; | ||||||
5715 | Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); | ||||||
5716 | } | ||||||
5717 | ReplaceUses(SDValue(Node, 0), Result); | ||||||
5718 | } | ||||||
5719 | |||||||
5720 | CurDAG->RemoveDeadNode(Node); | ||||||
5721 | return; | ||||||
5722 | } | ||||||
5723 | break; | ||||||
5724 | } | ||||||
5725 | case X86ISD::MGATHER: { | ||||||
5726 | auto *Mgt = cast<X86MaskedGatherSDNode>(Node); | ||||||
5727 | SDValue IndexOp = Mgt->getIndex(); | ||||||
5728 | SDValue Mask = Mgt->getMask(); | ||||||
5729 | MVT IndexVT = IndexOp.getSimpleValueType(); | ||||||
5730 | MVT ValueVT = Node->getSimpleValueType(0); | ||||||
5731 | MVT MaskVT = Mask.getSimpleValueType(); | ||||||
5732 | |||||||
5733 | // This is just to prevent crashes if the nodes are malformed somehow. We're | ||||||
5734 | // otherwise only doing loose type checking in here based on type what | ||||||
5735 | // a type constraint would say just like table based isel. | ||||||
5736 | if (!ValueVT.isVector() || !MaskVT.isVector()) | ||||||
5737 | break; | ||||||
5738 | |||||||
5739 | unsigned NumElts = ValueVT.getVectorNumElements(); | ||||||
5740 | MVT ValueSVT = ValueVT.getVectorElementType(); | ||||||
5741 | |||||||
5742 | bool IsFP = ValueSVT.isFloatingPoint(); | ||||||
5743 | unsigned EltSize = ValueSVT.getSizeInBits(); | ||||||
5744 | |||||||
5745 | unsigned Opc = 0; | ||||||
5746 | bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1; | ||||||
5747 | if (AVX512Gather) { | ||||||
5748 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) | ||||||
5749 | Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm; | ||||||
5750 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) | ||||||
5751 | Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm; | ||||||
5752 | else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) | ||||||
5753 | Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm; | ||||||
5754 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) | ||||||
5755 | Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm; | ||||||
5756 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) | ||||||
5757 | Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm; | ||||||
5758 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) | ||||||
5759 | Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm; | ||||||
5760 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) | ||||||
5761 | Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm; | ||||||
5762 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) | ||||||
5763 | Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm; | ||||||
5764 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) | ||||||
5765 | Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm; | ||||||
5766 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) | ||||||
5767 | Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm; | ||||||
5768 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) | ||||||
5769 | Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm; | ||||||
5770 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) | ||||||
5771 | Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm; | ||||||
5772 | } else { | ||||||
5773 | assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() &&(static_cast <bool> (EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger () && "Unexpected mask VT!") ? void (0) : __assert_fail ("EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() && \"Unexpected mask VT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5774, __extension__ __PRETTY_FUNCTION__)) | ||||||
5774 | "Unexpected mask VT!")(static_cast <bool> (EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger () && "Unexpected mask VT!") ? void (0) : __assert_fail ("EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() && \"Unexpected mask VT!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5774, __extension__ __PRETTY_FUNCTION__)); | ||||||
5775 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) | ||||||
5776 | Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm; | ||||||
5777 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) | ||||||
5778 | Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm; | ||||||
5779 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) | ||||||
5780 | Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm; | ||||||
5781 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) | ||||||
5782 | Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm; | ||||||
5783 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) | ||||||
5784 | Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm; | ||||||
5785 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) | ||||||
5786 | Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm; | ||||||
5787 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) | ||||||
5788 | Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm; | ||||||
5789 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) | ||||||
5790 | Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm; | ||||||
5791 | } | ||||||
5792 | |||||||
5793 | if (!Opc) | ||||||
5794 | break; | ||||||
5795 | |||||||
5796 | SDValue Base, Scale, Index, Disp, Segment; | ||||||
5797 | if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(), | ||||||
5798 | Base, Scale, Index, Disp, Segment)) | ||||||
5799 | break; | ||||||
5800 | |||||||
5801 | SDValue PassThru = Mgt->getPassThru(); | ||||||
5802 | SDValue Chain = Mgt->getChain(); | ||||||
5803 | // Gather instructions have a mask output not in the ISD node. | ||||||
5804 | SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other); | ||||||
5805 | |||||||
5806 | MachineSDNode *NewNode; | ||||||
5807 | if (AVX512Gather) { | ||||||
5808 | SDValue Ops[] = {PassThru, Mask, Base, Scale, | ||||||
5809 | Index, Disp, Segment, Chain}; | ||||||
5810 | NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); | ||||||
5811 | } else { | ||||||
5812 | SDValue Ops[] = {PassThru, Base, Scale, Index, | ||||||
5813 | Disp, Segment, Mask, Chain}; | ||||||
5814 | NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); | ||||||
5815 | } | ||||||
5816 | CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()}); | ||||||
5817 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); | ||||||
5818 | ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2)); | ||||||
5819 | CurDAG->RemoveDeadNode(Node); | ||||||
5820 | return; | ||||||
5821 | } | ||||||
5822 | case X86ISD::MSCATTER: { | ||||||
5823 | auto *Sc = cast<X86MaskedScatterSDNode>(Node); | ||||||
5824 | SDValue Value = Sc->getValue(); | ||||||
5825 | SDValue IndexOp = Sc->getIndex(); | ||||||
5826 | MVT IndexVT = IndexOp.getSimpleValueType(); | ||||||
5827 | MVT ValueVT = Value.getSimpleValueType(); | ||||||
5828 | |||||||
5829 | // This is just to prevent crashes if the nodes are malformed somehow. We're | ||||||
5830 | // otherwise only doing loose type checking in here based on type what | ||||||
5831 | // a type constraint would say just like table based isel. | ||||||
5832 | if (!ValueVT.isVector()) | ||||||
5833 | break; | ||||||
5834 | |||||||
5835 | unsigned NumElts = ValueVT.getVectorNumElements(); | ||||||
5836 | MVT ValueSVT = ValueVT.getVectorElementType(); | ||||||
5837 | |||||||
5838 | bool IsFP = ValueSVT.isFloatingPoint(); | ||||||
5839 | unsigned EltSize = ValueSVT.getSizeInBits(); | ||||||
5840 | |||||||
5841 | unsigned Opc; | ||||||
5842 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) | ||||||
5843 | Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr; | ||||||
5844 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) | ||||||
5845 | Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr; | ||||||
5846 | else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) | ||||||
5847 | Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr; | ||||||
5848 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) | ||||||
5849 | Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr; | ||||||
5850 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) | ||||||
5851 | Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr; | ||||||
5852 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) | ||||||
5853 | Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr; | ||||||
5854 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) | ||||||
5855 | Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr; | ||||||
5856 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) | ||||||
5857 | Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr; | ||||||
5858 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) | ||||||
5859 | Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr; | ||||||
5860 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) | ||||||
5861 | Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr; | ||||||
5862 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) | ||||||
5863 | Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr; | ||||||
5864 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) | ||||||
5865 | Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr; | ||||||
5866 | else | ||||||
5867 | break; | ||||||
5868 | |||||||
5869 | SDValue Base, Scale, Index, Disp, Segment; | ||||||
5870 | if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(), | ||||||
5871 | Base, Scale, Index, Disp, Segment)) | ||||||
5872 | break; | ||||||
5873 | |||||||
5874 | SDValue Mask = Sc->getMask(); | ||||||
5875 | SDValue Chain = Sc->getChain(); | ||||||
5876 | // Scatter instructions have a mask output not in the ISD node. | ||||||
5877 | SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other); | ||||||
5878 | SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain}; | ||||||
5879 | |||||||
5880 | MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); | ||||||
5881 | CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()}); | ||||||
5882 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1)); | ||||||
5883 | CurDAG->RemoveDeadNode(Node); | ||||||
5884 | return; | ||||||
5885 | } | ||||||
5886 | case ISD::PREALLOCATED_SETUP: { | ||||||
5887 | auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>(); | ||||||
5888 | auto CallId = MFI->getPreallocatedIdForCallSite( | ||||||
5889 | cast<SrcValueSDNode>(Node->getOperand(1))->getValue()); | ||||||
5890 | SDValue Chain = Node->getOperand(0); | ||||||
5891 | SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); | ||||||
5892 | MachineSDNode *New = CurDAG->getMachineNode( | ||||||
5893 | TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain); | ||||||
5894 | ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain | ||||||
5895 | CurDAG->RemoveDeadNode(Node); | ||||||
5896 | return; | ||||||
5897 | } | ||||||
5898 | case ISD::PREALLOCATED_ARG: { | ||||||
5899 | auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>(); | ||||||
5900 | auto CallId = MFI->getPreallocatedIdForCallSite( | ||||||
5901 | cast<SrcValueSDNode>(Node->getOperand(1))->getValue()); | ||||||
5902 | SDValue Chain = Node->getOperand(0); | ||||||
5903 | SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); | ||||||
5904 | SDValue ArgIndex = Node->getOperand(2); | ||||||
5905 | SDValue Ops[3]; | ||||||
5906 | Ops[0] = CallIdValue; | ||||||
5907 | Ops[1] = ArgIndex; | ||||||
5908 | Ops[2] = Chain; | ||||||
5909 | MachineSDNode *New = CurDAG->getMachineNode( | ||||||
5910 | TargetOpcode::PREALLOCATED_ARG, dl, | ||||||
5911 | CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()), | ||||||
5912 | MVT::Other), | ||||||
5913 | Ops); | ||||||
5914 | ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer | ||||||
5915 | ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain | ||||||
5916 | CurDAG->RemoveDeadNode(Node); | ||||||
5917 | return; | ||||||
5918 | } | ||||||
5919 | case X86ISD::AESENCWIDE128KL: | ||||||
5920 | case X86ISD::AESDECWIDE128KL: | ||||||
5921 | case X86ISD::AESENCWIDE256KL: | ||||||
5922 | case X86ISD::AESDECWIDE256KL: { | ||||||
5923 | if (!Subtarget->hasWIDEKL()) | ||||||
5924 | break; | ||||||
5925 | |||||||
5926 | unsigned Opcode; | ||||||
5927 | switch (Node->getOpcode()) { | ||||||
5928 | default: | ||||||
5929 | llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5929); | ||||||
5930 | case X86ISD::AESENCWIDE128KL: | ||||||
5931 | Opcode = X86::AESENCWIDE128KL; | ||||||
5932 | break; | ||||||
5933 | case X86ISD::AESDECWIDE128KL: | ||||||
5934 | Opcode = X86::AESDECWIDE128KL; | ||||||
5935 | break; | ||||||
5936 | case X86ISD::AESENCWIDE256KL: | ||||||
5937 | Opcode = X86::AESENCWIDE256KL; | ||||||
5938 | break; | ||||||
5939 | case X86ISD::AESDECWIDE256KL: | ||||||
5940 | Opcode = X86::AESDECWIDE256KL; | ||||||
5941 | break; | ||||||
5942 | } | ||||||
5943 | |||||||
5944 | SDValue Chain = Node->getOperand(0); | ||||||
5945 | SDValue Addr = Node->getOperand(1); | ||||||
5946 | |||||||
5947 | SDValue Base, Scale, Index, Disp, Segment; | ||||||
5948 | if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment)) | ||||||
5949 | break; | ||||||
5950 | |||||||
5951 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2), | ||||||
5952 | SDValue()); | ||||||
5953 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3), | ||||||
5954 | Chain.getValue(1)); | ||||||
5955 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4), | ||||||
5956 | Chain.getValue(1)); | ||||||
5957 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5), | ||||||
5958 | Chain.getValue(1)); | ||||||
5959 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6), | ||||||
5960 | Chain.getValue(1)); | ||||||
5961 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7), | ||||||
5962 | Chain.getValue(1)); | ||||||
5963 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8), | ||||||
5964 | Chain.getValue(1)); | ||||||
5965 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9), | ||||||
5966 | Chain.getValue(1)); | ||||||
5967 | |||||||
5968 | MachineSDNode *Res = CurDAG->getMachineNode( | ||||||
5969 | Opcode, dl, Node->getVTList(), | ||||||
5970 | {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)}); | ||||||
5971 | CurDAG->setNodeMemRefs(Res, cast<MemSDNode>(Node)->getMemOperand()); | ||||||
5972 | ReplaceNode(Node, Res); | ||||||
5973 | return; | ||||||
5974 | } | ||||||
5975 | } | ||||||
5976 | |||||||
5977 | SelectCode(Node); | ||||||
5978 | } | ||||||
5979 | |||||||
5980 | bool X86DAGToDAGISel:: | ||||||
5981 | SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, | ||||||
5982 | std::vector<SDValue> &OutOps) { | ||||||
5983 | SDValue Op0, Op1, Op2, Op3, Op4; | ||||||
5984 | switch (ConstraintID) { | ||||||
5985 | default: | ||||||
5986 | llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5986); | ||||||
5987 | case InlineAsm::Constraint_o: // offsetable ?? | ||||||
5988 | case InlineAsm::Constraint_v: // not offsetable ?? | ||||||
5989 | case InlineAsm::Constraint_m: // memory | ||||||
5990 | case InlineAsm::Constraint_X: | ||||||
5991 | if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) | ||||||
5992 | return true; | ||||||
5993 | break; | ||||||
5994 | } | ||||||
5995 | |||||||
5996 | OutOps.push_back(Op0); | ||||||
5997 | OutOps.push_back(Op1); | ||||||
5998 | OutOps.push_back(Op2); | ||||||
5999 | OutOps.push_back(Op3); | ||||||
6000 | OutOps.push_back(Op4); | ||||||
6001 | return false; | ||||||
6002 | } | ||||||
6003 | |||||||
6004 | /// This pass converts a legalized DAG into a X86-specific DAG, | ||||||
6005 | /// ready for instruction scheduling. | ||||||
6006 | FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, | ||||||
6007 | CodeGenOpt::Level OptLevel) { | ||||||
6008 | return new X86DAGToDAGISel(TM, OptLevel); | ||||||
6009 | } |
1 | //===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the isa<X>(), cast<X>(), dyn_cast<X>(), cast_or_null<X>(), |
10 | // and dyn_cast_or_null<X>() templates. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_SUPPORT_CASTING_H |
15 | #define LLVM_SUPPORT_CASTING_H |
16 | |
17 | #include "llvm/Support/Compiler.h" |
18 | #include "llvm/Support/type_traits.h" |
19 | #include <cassert> |
20 | #include <memory> |
21 | #include <type_traits> |
22 | |
23 | namespace llvm { |
24 | |
25 | //===----------------------------------------------------------------------===// |
26 | // isa<x> Support Templates |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | // Define a template that can be specialized by smart pointers to reflect the |
30 | // fact that they are automatically dereferenced, and are not involved with the |
31 | // template selection process... the default implementation is a noop. |
32 | // |
33 | template<typename From> struct simplify_type { |
34 | using SimpleType = From; // The real type this represents... |
35 | |
36 | // An accessor to get the real value... |
37 | static SimpleType &getSimplifiedValue(From &Val) { return Val; } |
38 | }; |
39 | |
40 | template<typename From> struct simplify_type<const From> { |
41 | using NonConstSimpleType = typename simplify_type<From>::SimpleType; |
42 | using SimpleType = |
43 | typename add_const_past_pointer<NonConstSimpleType>::type; |
44 | using RetType = |
45 | typename add_lvalue_reference_if_not_pointer<SimpleType>::type; |
46 | |
47 | static RetType getSimplifiedValue(const From& Val) { |
48 | return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val)); |
49 | } |
50 | }; |
51 | |
52 | // The core of the implementation of isa<X> is here; To and From should be |
53 | // the names of classes. This template can be specialized to customize the |
54 | // implementation of isa<> without rewriting it from scratch. |
55 | template <typename To, typename From, typename Enabler = void> |
56 | struct isa_impl { |
57 | static inline bool doit(const From &Val) { |
58 | return To::classof(&Val); |
59 | } |
60 | }; |
61 | |
62 | /// Always allow upcasts, and perform no dynamic check for them. |
63 | template <typename To, typename From> |
64 | struct isa_impl<To, From, std::enable_if_t<std::is_base_of<To, From>::value>> { |
65 | static inline bool doit(const From &) { return true; } |
66 | }; |
67 | |
68 | template <typename To, typename From> struct isa_impl_cl { |
69 | static inline bool doit(const From &Val) { |
70 | return isa_impl<To, From>::doit(Val); |
71 | } |
72 | }; |
73 | |
74 | template <typename To, typename From> struct isa_impl_cl<To, const From> { |
75 | static inline bool doit(const From &Val) { |
76 | return isa_impl<To, From>::doit(Val); |
77 | } |
78 | }; |
79 | |
80 | template <typename To, typename From> |
81 | struct isa_impl_cl<To, const std::unique_ptr<From>> { |
82 | static inline bool doit(const std::unique_ptr<From> &Val) { |
83 | assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer" ) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 83, __extension__ __PRETTY_FUNCTION__)); |
84 | return isa_impl_cl<To, From>::doit(*Val); |
85 | } |
86 | }; |
87 | |
88 | template <typename To, typename From> struct isa_impl_cl<To, From*> { |
89 | static inline bool doit(const From *Val) { |
90 | assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer" ) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 90, __extension__ __PRETTY_FUNCTION__)); |
91 | return isa_impl<To, From>::doit(*Val); |
92 | } |
93 | }; |
94 | |
95 | template <typename To, typename From> struct isa_impl_cl<To, From*const> { |
96 | static inline bool doit(const From *Val) { |
97 | assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer" ) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 97, __extension__ __PRETTY_FUNCTION__)); |
98 | return isa_impl<To, From>::doit(*Val); |
99 | } |
100 | }; |
101 | |
102 | template <typename To, typename From> struct isa_impl_cl<To, const From*> { |
103 | static inline bool doit(const From *Val) { |
104 | assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer" ) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 104, __extension__ __PRETTY_FUNCTION__)); |
105 | return isa_impl<To, From>::doit(*Val); |
106 | } |
107 | }; |
108 | |
109 | template <typename To, typename From> struct isa_impl_cl<To, const From*const> { |
110 | static inline bool doit(const From *Val) { |
111 | assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer" ) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 111, __extension__ __PRETTY_FUNCTION__)); |
112 | return isa_impl<To, From>::doit(*Val); |
113 | } |
114 | }; |
115 | |
116 | template<typename To, typename From, typename SimpleFrom> |
117 | struct isa_impl_wrap { |
118 | // When From != SimplifiedType, we can simplify the type some more by using |
119 | // the simplify_type template. |
120 | static bool doit(const From &Val) { |
121 | return isa_impl_wrap<To, SimpleFrom, |
122 | typename simplify_type<SimpleFrom>::SimpleType>::doit( |
123 | simplify_type<const From>::getSimplifiedValue(Val)); |
124 | } |
125 | }; |
126 | |
127 | template<typename To, typename FromTy> |
128 | struct isa_impl_wrap<To, FromTy, FromTy> { |
129 | // When From == SimpleType, we are as simple as we are going to get. |
130 | static bool doit(const FromTy &Val) { |
131 | return isa_impl_cl<To,FromTy>::doit(Val); |
132 | } |
133 | }; |
134 | |
135 | // isa<X> - Return true if the parameter to the template is an instance of one |
136 | // of the template type arguments. Used like this: |
137 | // |
138 | // if (isa<Type>(myVal)) { ... } |
139 | // if (isa<Type0, Type1, Type2>(myVal)) { ... } |
140 | // |
141 | template <class X, class Y> LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) { |
142 | return isa_impl_wrap<X, const Y, |
143 | typename simplify_type<const Y>::SimpleType>::doit(Val); |
144 | } |
145 | |
146 | template <typename First, typename Second, typename... Rest, typename Y> |
147 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) { |
148 | return isa<First>(Val) || isa<Second, Rest...>(Val); |
149 | } |
150 | |
151 | // isa_and_nonnull<X> - Functionally identical to isa, except that a null value |
152 | // is accepted. |
153 | // |
154 | template <typename... X, class Y> |
155 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa_and_nonnull(const Y &Val) { |
156 | if (!Val) |
157 | return false; |
158 | return isa<X...>(Val); |
159 | } |
160 | |
161 | //===----------------------------------------------------------------------===// |
162 | // cast<x> Support Templates |
163 | //===----------------------------------------------------------------------===// |
164 | |
165 | template<class To, class From> struct cast_retty; |
166 | |
167 | // Calculate what type the 'cast' function should return, based on a requested |
168 | // type of To and a source type of From. |
169 | template<class To, class From> struct cast_retty_impl { |
170 | using ret_type = To &; // Normal case, return Ty& |
171 | }; |
172 | template<class To, class From> struct cast_retty_impl<To, const From> { |
173 | using ret_type = const To &; // Normal case, return Ty& |
174 | }; |
175 | |
176 | template<class To, class From> struct cast_retty_impl<To, From*> { |
177 | using ret_type = To *; // Pointer arg case, return Ty* |
178 | }; |
179 | |
180 | template<class To, class From> struct cast_retty_impl<To, const From*> { |
181 | using ret_type = const To *; // Constant pointer arg case, return const Ty* |
182 | }; |
183 | |
184 | template<class To, class From> struct cast_retty_impl<To, const From*const> { |
185 | using ret_type = const To *; // Constant pointer arg case, return const Ty* |
186 | }; |
187 | |
188 | template <class To, class From> |
189 | struct cast_retty_impl<To, std::unique_ptr<From>> { |
190 | private: |
191 | using PointerType = typename cast_retty_impl<To, From *>::ret_type; |
192 | using ResultType = std::remove_pointer_t<PointerType>; |
193 | |
194 | public: |
195 | using ret_type = std::unique_ptr<ResultType>; |
196 | }; |
197 | |
198 | template<class To, class From, class SimpleFrom> |
199 | struct cast_retty_wrap { |
200 | // When the simplified type and the from type are not the same, use the type |
201 | // simplifier to reduce the type, then reuse cast_retty_impl to get the |
202 | // resultant type. |
203 | using ret_type = typename cast_retty<To, SimpleFrom>::ret_type; |
204 | }; |
205 | |
206 | template<class To, class FromTy> |
207 | struct cast_retty_wrap<To, FromTy, FromTy> { |
208 | // When the simplified type is equal to the from type, use it directly. |
209 | using ret_type = typename cast_retty_impl<To,FromTy>::ret_type; |
210 | }; |
211 | |
212 | template<class To, class From> |
213 | struct cast_retty { |
214 | using ret_type = typename cast_retty_wrap< |
215 | To, From, typename simplify_type<From>::SimpleType>::ret_type; |
216 | }; |
217 | |
218 | // Ensure the non-simple values are converted using the simplify_type template |
219 | // that may be specialized by smart pointers... |
220 | // |
221 | template<class To, class From, class SimpleFrom> struct cast_convert_val { |
222 | // This is not a simple type, use the template to simplify it... |
223 | static typename cast_retty<To, From>::ret_type doit(From &Val) { |
224 | return cast_convert_val<To, SimpleFrom, |
225 | typename simplify_type<SimpleFrom>::SimpleType>::doit( |
226 | simplify_type<From>::getSimplifiedValue(Val)); |
227 | } |
228 | }; |
229 | |
230 | template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> { |
231 | // This _is_ a simple type, just cast it. |
232 | static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) { |
233 | typename cast_retty<To, FromTy>::ret_type Res2 |
234 | = (typename cast_retty<To, FromTy>::ret_type)const_cast<FromTy&>(Val); |
235 | return Res2; |
236 | } |
237 | }; |
238 | |
239 | template <class X> struct is_simple_type { |
240 | static const bool value = |
241 | std::is_same<X, typename simplify_type<X>::SimpleType>::value; |
242 | }; |
243 | |
244 | // cast<X> - Return the argument parameter cast to the specified type. This |
245 | // casting operator asserts that the type is correct, so it does not return null |
246 | // on failure. It does not allow a null argument (use cast_or_null for that). |
247 | // It is typically used like this: |
248 | // |
249 | // cast<Instruction>(myVal)->getParent() |
250 | // |
251 | template <class X, class Y> |
252 | inline std::enable_if_t<!is_simple_type<Y>::value, |
253 | typename cast_retty<X, const Y>::ret_type> |
254 | cast(const Y &Val) { |
255 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 255, __extension__ __PRETTY_FUNCTION__)); |
256 | return cast_convert_val< |
257 | X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val); |
258 | } |
259 | |
260 | template <class X, class Y> |
261 | inline typename cast_retty<X, Y>::ret_type cast(Y &Val) { |
262 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 262, __extension__ __PRETTY_FUNCTION__)); |
263 | return cast_convert_val<X, Y, |
264 | typename simplify_type<Y>::SimpleType>::doit(Val); |
265 | } |
266 | |
267 | template <class X, class Y> |
268 | inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) { |
269 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 269, __extension__ __PRETTY_FUNCTION__)); |
270 | return cast_convert_val<X, Y*, |
271 | typename simplify_type<Y*>::SimpleType>::doit(Val); |
272 | } |
273 | |
274 | template <class X, class Y> |
275 | inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type |
276 | cast(std::unique_ptr<Y> &&Val) { |
277 | assert(isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!") ? void (0 ) : __assert_fail ("isa<X>(Val.get()) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 277, __extension__ __PRETTY_FUNCTION__)); |
278 | using ret_type = typename cast_retty<X, std::unique_ptr<Y>>::ret_type; |
279 | return ret_type( |
280 | cast_convert_val<X, Y *, typename simplify_type<Y *>::SimpleType>::doit( |
281 | Val.release())); |
282 | } |
283 | |
284 | // cast_or_null<X> - Functionally identical to cast, except that a null value is |
285 | // accepted. |
286 | // |
287 | template <class X, class Y> |
288 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t< |
289 | !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type> |
290 | cast_or_null(const Y &Val) { |
291 | if (!Val) |
292 | return nullptr; |
293 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 293, __extension__ __PRETTY_FUNCTION__)); |
294 | return cast<X>(Val); |
295 | } |
296 | |
297 | template <class X, class Y> |
298 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<!is_simple_type<Y>::value, |
299 | typename cast_retty<X, Y>::ret_type> |
300 | cast_or_null(Y &Val) { |
301 | if (!Val) |
302 | return nullptr; |
303 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 303, __extension__ __PRETTY_FUNCTION__)); |
304 | return cast<X>(Val); |
305 | } |
306 | |
307 | template <class X, class Y> |
308 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type |
309 | cast_or_null(Y *Val) { |
310 | if (!Val) return nullptr; |
311 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/Casting.h" , 311, __extension__ __PRETTY_FUNCTION__)); |
312 | return cast<X>(Val); |
313 | } |
314 | |
315 | template <class X, class Y> |
316 | inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type |
317 | cast_or_null(std::unique_ptr<Y> &&Val) { |
318 | if (!Val) |
319 | return nullptr; |
320 | return cast<X>(std::move(Val)); |
321 | } |
322 | |
323 | // dyn_cast<X> - Return the argument parameter cast to the specified type. This |
324 | // casting operator returns null if the argument is of the wrong type, so it can |
325 | // be used to test for a type as well as cast if successful. This should be |
326 | // used in the context of an if statement like this: |
327 | // |
328 | // if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... } |
329 | // |
330 | |
331 | template <class X, class Y> |
332 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t< |
333 | !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type> |
334 | dyn_cast(const Y &Val) { |
335 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
336 | } |
337 | |
338 | template <class X, class Y> |
339 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) { |
340 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
341 | } |
342 | |
343 | template <class X, class Y> |
344 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type dyn_cast(Y *Val) { |
345 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
346 | } |
347 | |
348 | // dyn_cast_or_null<X> - Functionally identical to dyn_cast, except that a null |
349 | // value is accepted. |
350 | // |
351 | template <class X, class Y> |
352 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t< |
353 | !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type> |
354 | dyn_cast_or_null(const Y &Val) { |
355 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
356 | } |
357 | |
358 | template <class X, class Y> |
359 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<!is_simple_type<Y>::value, |
360 | typename cast_retty<X, Y>::ret_type> |
361 | dyn_cast_or_null(Y &Val) { |
362 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
363 | } |
364 | |
365 | template <class X, class Y> |
366 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type |
367 | dyn_cast_or_null(Y *Val) { |
368 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
369 | } |
370 | |
371 | // unique_dyn_cast<X> - Given a unique_ptr<Y>, try to return a unique_ptr<X>, |
372 | // taking ownership of the input pointer iff isa<X>(Val) is true. If the |
373 | // cast is successful, From refers to nullptr on exit and the casted value |
374 | // is returned. If the cast is unsuccessful, the function returns nullptr |
375 | // and From is unchanged. |
376 | template <class X, class Y> |
377 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &Val) |
378 | -> decltype(cast<X>(Val)) { |
379 | if (!isa<X>(Val)) |
380 | return nullptr; |
381 | return cast<X>(std::move(Val)); |
382 | } |
383 | |
384 | template <class X, class Y> |
385 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &&Val) { |
386 | return unique_dyn_cast<X, Y>(Val); |
387 | } |
388 | |
389 | // dyn_cast_or_null<X> - Functionally identical to unique_dyn_cast, except that |
390 | // a null value is accepted. |
391 | template <class X, class Y> |
392 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &Val) |
393 | -> decltype(cast<X>(Val)) { |
394 | if (!Val) |
395 | return nullptr; |
396 | return unique_dyn_cast<X, Y>(Val); |
397 | } |
398 | |
399 | template <class X, class Y> |
400 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &&Val) { |
401 | return unique_dyn_cast_or_null<X, Y>(Val); |
402 | } |
403 | |
404 | } // end namespace llvm |
405 | |
406 | #endif // LLVM_SUPPORT_CASTING_H |
1 | //===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file declares the SDNode class and derived classes, which are used to | |||
10 | // represent the nodes and operations present in a SelectionDAG. These nodes | |||
11 | // and operations are machine code level operations, with some similarities to | |||
12 | // the GCC RTL representation. | |||
13 | // | |||
14 | // Clients should include the SelectionDAG.h file instead of this file directly. | |||
15 | // | |||
16 | //===----------------------------------------------------------------------===// | |||
17 | ||||
18 | #ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H | |||
19 | #define LLVM_CODEGEN_SELECTIONDAGNODES_H | |||
20 | ||||
21 | #include "llvm/ADT/APFloat.h" | |||
22 | #include "llvm/ADT/ArrayRef.h" | |||
23 | #include "llvm/ADT/BitVector.h" | |||
24 | #include "llvm/ADT/FoldingSet.h" | |||
25 | #include "llvm/ADT/GraphTraits.h" | |||
26 | #include "llvm/ADT/SmallPtrSet.h" | |||
27 | #include "llvm/ADT/SmallVector.h" | |||
28 | #include "llvm/ADT/ilist_node.h" | |||
29 | #include "llvm/ADT/iterator.h" | |||
30 | #include "llvm/ADT/iterator_range.h" | |||
31 | #include "llvm/CodeGen/ISDOpcodes.h" | |||
32 | #include "llvm/CodeGen/MachineMemOperand.h" | |||
33 | #include "llvm/CodeGen/Register.h" | |||
34 | #include "llvm/CodeGen/ValueTypes.h" | |||
35 | #include "llvm/IR/Constants.h" | |||
36 | #include "llvm/IR/DebugLoc.h" | |||
37 | #include "llvm/IR/Instruction.h" | |||
38 | #include "llvm/IR/Instructions.h" | |||
39 | #include "llvm/IR/Metadata.h" | |||
40 | #include "llvm/IR/Operator.h" | |||
41 | #include "llvm/Support/AlignOf.h" | |||
42 | #include "llvm/Support/AtomicOrdering.h" | |||
43 | #include "llvm/Support/Casting.h" | |||
44 | #include "llvm/Support/ErrorHandling.h" | |||
45 | #include "llvm/Support/MachineValueType.h" | |||
46 | #include "llvm/Support/TypeSize.h" | |||
47 | #include <algorithm> | |||
48 | #include <cassert> | |||
49 | #include <climits> | |||
50 | #include <cstddef> | |||
51 | #include <cstdint> | |||
52 | #include <cstring> | |||
53 | #include <iterator> | |||
54 | #include <string> | |||
55 | #include <tuple> | |||
56 | ||||
57 | namespace llvm { | |||
58 | ||||
59 | class APInt; | |||
60 | class Constant; | |||
61 | template <typename T> struct DenseMapInfo; | |||
62 | class GlobalValue; | |||
63 | class MachineBasicBlock; | |||
64 | class MachineConstantPoolValue; | |||
65 | class MCSymbol; | |||
66 | class raw_ostream; | |||
67 | class SDNode; | |||
68 | class SelectionDAG; | |||
69 | class Type; | |||
70 | class Value; | |||
71 | ||||
72 | void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr, | |||
73 | bool force = false); | |||
74 | ||||
75 | /// This represents a list of ValueType's that has been intern'd by | |||
76 | /// a SelectionDAG. Instances of this simple value class are returned by | |||
77 | /// SelectionDAG::getVTList(...). | |||
78 | /// | |||
79 | struct SDVTList { | |||
80 | const EVT *VTs; | |||
81 | unsigned int NumVTs; | |||
82 | }; | |||
83 | ||||
84 | namespace ISD { | |||
85 | ||||
86 | /// Node predicates | |||
87 | ||||
88 | /// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the | |||
89 | /// same constant or undefined, return true and return the constant value in | |||
90 | /// \p SplatValue. | |||
91 | bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); | |||
92 | ||||
93 | /// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where | |||
94 | /// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to | |||
95 | /// true, it only checks BUILD_VECTOR. | |||
96 | bool isConstantSplatVectorAllOnes(const SDNode *N, | |||
97 | bool BuildVectorOnly = false); | |||
98 | ||||
99 | /// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where | |||
100 | /// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it | |||
101 | /// only checks BUILD_VECTOR. | |||
102 | bool isConstantSplatVectorAllZeros(const SDNode *N, | |||
103 | bool BuildVectorOnly = false); | |||
104 | ||||
105 | /// Return true if the specified node is a BUILD_VECTOR where all of the | |||
106 | /// elements are ~0 or undef. | |||
107 | bool isBuildVectorAllOnes(const SDNode *N); | |||
108 | ||||
109 | /// Return true if the specified node is a BUILD_VECTOR where all of the | |||
110 | /// elements are 0 or undef. | |||
111 | bool isBuildVectorAllZeros(const SDNode *N); | |||
112 | ||||
113 | /// Return true if the specified node is a BUILD_VECTOR node of all | |||
114 | /// ConstantSDNode or undef. | |||
115 | bool isBuildVectorOfConstantSDNodes(const SDNode *N); | |||
116 | ||||
117 | /// Return true if the specified node is a BUILD_VECTOR node of all | |||
118 | /// ConstantFPSDNode or undef. | |||
119 | bool isBuildVectorOfConstantFPSDNodes(const SDNode *N); | |||
120 | ||||
121 | /// Return true if the node has at least one operand and all operands of the | |||
122 | /// specified node are ISD::UNDEF. | |||
123 | bool allOperandsUndef(const SDNode *N); | |||
124 | ||||
125 | } // end namespace ISD | |||
126 | ||||
127 | //===----------------------------------------------------------------------===// | |||
128 | /// Unlike LLVM values, Selection DAG nodes may return multiple | |||
129 | /// values as the result of a computation. Many nodes return multiple values, | |||
130 | /// from loads (which define a token and a return value) to ADDC (which returns | |||
131 | /// a result and a carry value), to calls (which may return an arbitrary number | |||
132 | /// of values). | |||
133 | /// | |||
134 | /// As such, each use of a SelectionDAG computation must indicate the node that | |||
135 | /// computes it as well as which return value to use from that node. This pair | |||
136 | /// of information is represented with the SDValue value type. | |||
137 | /// | |||
138 | class SDValue { | |||
139 | friend struct DenseMapInfo<SDValue>; | |||
140 | ||||
141 | SDNode *Node = nullptr; // The node defining the value we are using. | |||
142 | unsigned ResNo = 0; // Which return value of the node we are using. | |||
143 | ||||
144 | public: | |||
145 | SDValue() = default; | |||
146 | SDValue(SDNode *node, unsigned resno); | |||
147 | ||||
148 | /// get the index which selects a specific result in the SDNode | |||
149 | unsigned getResNo() const { return ResNo; } | |||
150 | ||||
151 | /// get the SDNode which holds the desired result | |||
152 | SDNode *getNode() const { return Node; } | |||
153 | ||||
154 | /// set the SDNode | |||
155 | void setNode(SDNode *N) { Node = N; } | |||
156 | ||||
157 | inline SDNode *operator->() const { return Node; } | |||
158 | ||||
159 | bool operator==(const SDValue &O) const { | |||
160 | return Node == O.Node && ResNo == O.ResNo; | |||
161 | } | |||
162 | bool operator!=(const SDValue &O) const { | |||
163 | return !operator==(O); | |||
164 | } | |||
165 | bool operator<(const SDValue &O) const { | |||
166 | return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo); | |||
167 | } | |||
168 | explicit operator bool() const { | |||
169 | return Node != nullptr; | |||
170 | } | |||
171 | ||||
172 | SDValue getValue(unsigned R) const { | |||
173 | return SDValue(Node, R); | |||
174 | } | |||
175 | ||||
176 | /// Return true if this node is an operand of N. | |||
177 | bool isOperandOf(const SDNode *N) const; | |||
178 | ||||
179 | /// Return the ValueType of the referenced return value. | |||
180 | inline EVT getValueType() const; | |||
181 | ||||
182 | /// Return the simple ValueType of the referenced return value. | |||
183 | MVT getSimpleValueType() const { | |||
184 | return getValueType().getSimpleVT(); | |||
185 | } | |||
186 | ||||
187 | /// Returns the size of the value in bits. | |||
188 | /// | |||
189 | /// If the value type is a scalable vector type, the scalable property will | |||
190 | /// be set and the runtime size will be a positive integer multiple of the | |||
191 | /// base size. | |||
192 | TypeSize getValueSizeInBits() const { | |||
193 | return getValueType().getSizeInBits(); | |||
194 | } | |||
195 | ||||
196 | uint64_t getScalarValueSizeInBits() const { | |||
197 | return getValueType().getScalarType().getFixedSizeInBits(); | |||
198 | } | |||
199 | ||||
200 | // Forwarding methods - These forward to the corresponding methods in SDNode. | |||
201 | inline unsigned getOpcode() const; | |||
202 | inline unsigned getNumOperands() const; | |||
203 | inline const SDValue &getOperand(unsigned i) const; | |||
204 | inline uint64_t getConstantOperandVal(unsigned i) const; | |||
205 | inline const APInt &getConstantOperandAPInt(unsigned i) const; | |||
206 | inline bool isTargetMemoryOpcode() const; | |||
207 | inline bool isTargetOpcode() const; | |||
208 | inline bool isMachineOpcode() const; | |||
209 | inline bool isUndef() const; | |||
210 | inline unsigned getMachineOpcode() const; | |||
211 | inline const DebugLoc &getDebugLoc() const; | |||
212 | inline void dump() const; | |||
213 | inline void dump(const SelectionDAG *G) const; | |||
214 | inline void dumpr() const; | |||
215 | inline void dumpr(const SelectionDAG *G) const; | |||
216 | ||||
217 | /// Return true if this operand (which must be a chain) reaches the | |||
218 | /// specified operand without crossing any side-effecting instructions. | |||
219 | /// In practice, this looks through token factors and non-volatile loads. | |||
220 | /// In order to remain efficient, this only | |||
221 | /// looks a couple of nodes in, it does not do an exhaustive search. | |||
222 | bool reachesChainWithoutSideEffects(SDValue Dest, | |||
223 | unsigned Depth = 2) const; | |||
224 | ||||
225 | /// Return true if there are no nodes using value ResNo of Node. | |||
226 | inline bool use_empty() const; | |||
227 | ||||
228 | /// Return true if there is exactly one node using value ResNo of Node. | |||
229 | inline bool hasOneUse() const; | |||
230 | }; | |||
231 | ||||
232 | template<> struct DenseMapInfo<SDValue> { | |||
233 | static inline SDValue getEmptyKey() { | |||
234 | SDValue V; | |||
235 | V.ResNo = -1U; | |||
236 | return V; | |||
237 | } | |||
238 | ||||
239 | static inline SDValue getTombstoneKey() { | |||
240 | SDValue V; | |||
241 | V.ResNo = -2U; | |||
242 | return V; | |||
243 | } | |||
244 | ||||
245 | static unsigned getHashValue(const SDValue &Val) { | |||
246 | return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^ | |||
247 | (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo(); | |||
248 | } | |||
249 | ||||
250 | static bool isEqual(const SDValue &LHS, const SDValue &RHS) { | |||
251 | return LHS == RHS; | |||
252 | } | |||
253 | }; | |||
254 | ||||
255 | /// Allow casting operators to work directly on | |||
256 | /// SDValues as if they were SDNode*'s. | |||
257 | template<> struct simplify_type<SDValue> { | |||
258 | using SimpleType = SDNode *; | |||
259 | ||||
260 | static SimpleType getSimplifiedValue(SDValue &Val) { | |||
261 | return Val.getNode(); | |||
262 | } | |||
263 | }; | |||
264 | template<> struct simplify_type<const SDValue> { | |||
265 | using SimpleType = /*const*/ SDNode *; | |||
266 | ||||
267 | static SimpleType getSimplifiedValue(const SDValue &Val) { | |||
268 | return Val.getNode(); | |||
269 | } | |||
270 | }; | |||
271 | ||||
272 | /// Represents a use of a SDNode. This class holds an SDValue, | |||
273 | /// which records the SDNode being used and the result number, a | |||
274 | /// pointer to the SDNode using the value, and Next and Prev pointers, | |||
275 | /// which link together all the uses of an SDNode. | |||
276 | /// | |||
277 | class SDUse { | |||
278 | /// Val - The value being used. | |||
279 | SDValue Val; | |||
280 | /// User - The user of this value. | |||
281 | SDNode *User = nullptr; | |||
282 | /// Prev, Next - Pointers to the uses list of the SDNode referred by | |||
283 | /// this operand. | |||
284 | SDUse **Prev = nullptr; | |||
285 | SDUse *Next = nullptr; | |||
286 | ||||
287 | public: | |||
288 | SDUse() = default; | |||
289 | SDUse(const SDUse &U) = delete; | |||
290 | SDUse &operator=(const SDUse &) = delete; | |||
291 | ||||
292 | /// Normally SDUse will just implicitly convert to an SDValue that it holds. | |||
293 | operator const SDValue&() const { return Val; } | |||
294 | ||||
295 | /// If implicit conversion to SDValue doesn't work, the get() method returns | |||
296 | /// the SDValue. | |||
297 | const SDValue &get() const { return Val; } | |||
298 | ||||
299 | /// This returns the SDNode that contains this Use. | |||
300 | SDNode *getUser() { return User; } | |||
301 | ||||
302 | /// Get the next SDUse in the use list. | |||
303 | SDUse *getNext() const { return Next; } | |||
304 | ||||
305 | /// Convenience function for get().getNode(). | |||
306 | SDNode *getNode() const { return Val.getNode(); } | |||
307 | /// Convenience function for get().getResNo(). | |||
308 | unsigned getResNo() const { return Val.getResNo(); } | |||
309 | /// Convenience function for get().getValueType(). | |||
310 | EVT getValueType() const { return Val.getValueType(); } | |||
311 | ||||
312 | /// Convenience function for get().operator== | |||
313 | bool operator==(const SDValue &V) const { | |||
314 | return Val == V; | |||
315 | } | |||
316 | ||||
317 | /// Convenience function for get().operator!= | |||
318 | bool operator!=(const SDValue &V) const { | |||
319 | return Val != V; | |||
320 | } | |||
321 | ||||
322 | /// Convenience function for get().operator< | |||
323 | bool operator<(const SDValue &V) const { | |||
324 | return Val < V; | |||
325 | } | |||
326 | ||||
327 | private: | |||
328 | friend class SelectionDAG; | |||
329 | friend class SDNode; | |||
330 | // TODO: unfriend HandleSDNode once we fix its operand handling. | |||
331 | friend class HandleSDNode; | |||
332 | ||||
333 | void setUser(SDNode *p) { User = p; } | |||
334 | ||||
335 | /// Remove this use from its existing use list, assign it the | |||
336 | /// given value, and add it to the new value's node's use list. | |||
337 | inline void set(const SDValue &V); | |||
338 | /// Like set, but only supports initializing a newly-allocated | |||
339 | /// SDUse with a non-null value. | |||
340 | inline void setInitial(const SDValue &V); | |||
341 | /// Like set, but only sets the Node portion of the value, | |||
342 | /// leaving the ResNo portion unmodified. | |||
343 | inline void setNode(SDNode *N); | |||
344 | ||||
345 | void addToList(SDUse **List) { | |||
346 | Next = *List; | |||
347 | if (Next) Next->Prev = &Next; | |||
348 | Prev = List; | |||
349 | *List = this; | |||
350 | } | |||
351 | ||||
352 | void removeFromList() { | |||
353 | *Prev = Next; | |||
354 | if (Next) Next->Prev = Prev; | |||
355 | } | |||
356 | }; | |||
357 | ||||
358 | /// simplify_type specializations - Allow casting operators to work directly on | |||
359 | /// SDValues as if they were SDNode*'s. | |||
360 | template<> struct simplify_type<SDUse> { | |||
361 | using SimpleType = SDNode *; | |||
362 | ||||
363 | static SimpleType getSimplifiedValue(SDUse &Val) { | |||
364 | return Val.getNode(); | |||
365 | } | |||
366 | }; | |||
367 | ||||
368 | /// These are IR-level optimization flags that may be propagated to SDNodes. | |||
369 | /// TODO: This data structure should be shared by the IR optimizer and the | |||
370 | /// the backend. | |||
371 | struct SDNodeFlags { | |||
372 | private: | |||
373 | bool NoUnsignedWrap : 1; | |||
374 | bool NoSignedWrap : 1; | |||
375 | bool Exact : 1; | |||
376 | bool NoNaNs : 1; | |||
377 | bool NoInfs : 1; | |||
378 | bool NoSignedZeros : 1; | |||
379 | bool AllowReciprocal : 1; | |||
380 | bool AllowContract : 1; | |||
381 | bool ApproximateFuncs : 1; | |||
382 | bool AllowReassociation : 1; | |||
383 | ||||
384 | // We assume instructions do not raise floating-point exceptions by default, | |||
385 | // and only those marked explicitly may do so. We could choose to represent | |||
386 | // this via a positive "FPExcept" flags like on the MI level, but having a | |||
387 | // negative "NoFPExcept" flag here (that defaults to true) makes the flag | |||
388 | // intersection logic more straightforward. | |||
389 | bool NoFPExcept : 1; | |||
390 | ||||
391 | public: | |||
392 | /// Default constructor turns off all optimization flags. | |||
393 | SDNodeFlags() | |||
394 | : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), | |||
395 | NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), | |||
396 | AllowContract(false), ApproximateFuncs(false), | |||
397 | AllowReassociation(false), NoFPExcept(false) {} | |||
398 | ||||
399 | /// Propagate the fast-math-flags from an IR FPMathOperator. | |||
400 | void copyFMF(const FPMathOperator &FPMO) { | |||
401 | setNoNaNs(FPMO.hasNoNaNs()); | |||
402 | setNoInfs(FPMO.hasNoInfs()); | |||
403 | setNoSignedZeros(FPMO.hasNoSignedZeros()); | |||
404 | setAllowReciprocal(FPMO.hasAllowReciprocal()); | |||
405 | setAllowContract(FPMO.hasAllowContract()); | |||
406 | setApproximateFuncs(FPMO.hasApproxFunc()); | |||
407 | setAllowReassociation(FPMO.hasAllowReassoc()); | |||
408 | } | |||
409 | ||||
410 | // These are mutators for each flag. | |||
411 | void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } | |||
412 | void setNoSignedWrap(bool b) { NoSignedWrap = b; } | |||
413 | void setExact(bool b) { Exact = b; } | |||
414 | void setNoNaNs(bool b) { NoNaNs = b; } | |||
415 | void setNoInfs(bool b) { NoInfs = b; } | |||
416 | void setNoSignedZeros(bool b) { NoSignedZeros = b; } | |||
417 | void setAllowReciprocal(bool b) { AllowReciprocal = b; } | |||
418 | void setAllowContract(bool b) { AllowContract = b; } | |||
419 | void setApproximateFuncs(bool b) { ApproximateFuncs = b; } | |||
420 | void setAllowReassociation(bool b) { AllowReassociation = b; } | |||
421 | void setNoFPExcept(bool b) { NoFPExcept = b; } | |||
422 | ||||
423 | // These are accessors for each flag. | |||
424 | bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } | |||
425 | bool hasNoSignedWrap() const { return NoSignedWrap; } | |||
426 | bool hasExact() const { return Exact; } | |||
427 | bool hasNoNaNs() const { return NoNaNs; } | |||
428 | bool hasNoInfs() const { return NoInfs; } | |||
429 | bool hasNoSignedZeros() const { return NoSignedZeros; } | |||
430 | bool hasAllowReciprocal() const { return AllowReciprocal; } | |||
431 | bool hasAllowContract() const { return AllowContract; } | |||
432 | bool hasApproximateFuncs() const { return ApproximateFuncs; } | |||
433 | bool hasAllowReassociation() const { return AllowReassociation; } | |||
434 | bool hasNoFPExcept() const { return NoFPExcept; } | |||
435 | ||||
436 | /// Clear any flags in this flag set that aren't also set in Flags. All | |||
437 | /// flags will be cleared if Flags are undefined. | |||
438 | void intersectWith(const SDNodeFlags Flags) { | |||
439 | NoUnsignedWrap &= Flags.NoUnsignedWrap; | |||
440 | NoSignedWrap &= Flags.NoSignedWrap; | |||
441 | Exact &= Flags.Exact; | |||
442 | NoNaNs &= Flags.NoNaNs; | |||
443 | NoInfs &= Flags.NoInfs; | |||
444 | NoSignedZeros &= Flags.NoSignedZeros; | |||
445 | AllowReciprocal &= Flags.AllowReciprocal; | |||
446 | AllowContract &= Flags.AllowContract; | |||
447 | ApproximateFuncs &= Flags.ApproximateFuncs; | |||
448 | AllowReassociation &= Flags.AllowReassociation; | |||
449 | NoFPExcept &= Flags.NoFPExcept; | |||
450 | } | |||
451 | }; | |||
452 | ||||
453 | /// Represents one node in the SelectionDAG. | |||
454 | /// | |||
455 | class SDNode : public FoldingSetNode, public ilist_node<SDNode> { | |||
456 | private: | |||
457 | /// The operation that this node performs. | |||
458 | int16_t NodeType; | |||
459 | ||||
460 | protected: | |||
461 | // We define a set of mini-helper classes to help us interpret the bits in our | |||
462 | // SubclassData. These are designed to fit within a uint16_t so they pack | |||
463 | // with NodeType. | |||
464 | ||||
465 | #if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1)) | |||
466 | // Except for GCC; by default, AIX compilers store bit-fields in 4-byte words | |||
467 | // and give the `pack` pragma push semantics. | |||
468 | #define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2) | |||
469 | #define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop) | |||
470 | #else | |||
471 | #define BEGIN_TWO_BYTE_PACK() | |||
472 | #define END_TWO_BYTE_PACK() | |||
473 | #endif | |||
474 | ||||
475 | BEGIN_TWO_BYTE_PACK() | |||
476 | class SDNodeBitfields { | |||
477 | friend class SDNode; | |||
478 | friend class MemIntrinsicSDNode; | |||
479 | friend class MemSDNode; | |||
480 | friend class SelectionDAG; | |||
481 | ||||
482 | uint16_t HasDebugValue : 1; | |||
483 | uint16_t IsMemIntrinsic : 1; | |||
484 | uint16_t IsDivergent : 1; | |||
485 | }; | |||
486 | enum { NumSDNodeBits = 3 }; | |||
487 | ||||
488 | class ConstantSDNodeBitfields { | |||
489 | friend class ConstantSDNode; | |||
490 | ||||
491 | uint16_t : NumSDNodeBits; | |||
492 | ||||
493 | uint16_t IsOpaque : 1; | |||
494 | }; | |||
495 | ||||
496 | class MemSDNodeBitfields { | |||
497 | friend class MemSDNode; | |||
498 | friend class MemIntrinsicSDNode; | |||
499 | friend class AtomicSDNode; | |||
500 | ||||
501 | uint16_t : NumSDNodeBits; | |||
502 | ||||
503 | uint16_t IsVolatile : 1; | |||
504 | uint16_t IsNonTemporal : 1; | |||
505 | uint16_t IsDereferenceable : 1; | |||
506 | uint16_t IsInvariant : 1; | |||
507 | }; | |||
508 | enum { NumMemSDNodeBits = NumSDNodeBits + 4 }; | |||
509 | ||||
510 | class LSBaseSDNodeBitfields { | |||
511 | friend class LSBaseSDNode; | |||
512 | friend class MaskedLoadStoreSDNode; | |||
513 | friend class MaskedGatherScatterSDNode; | |||
514 | ||||
515 | uint16_t : NumMemSDNodeBits; | |||
516 | ||||
517 | // This storage is shared between disparate class hierarchies to hold an | |||
518 | // enumeration specific to the class hierarchy in use. | |||
519 | // LSBaseSDNode => enum ISD::MemIndexedMode | |||
520 | // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode | |||
521 | // MaskedGatherScatterSDNode => enum ISD::MemIndexType | |||
522 | uint16_t AddressingMode : 3; | |||
523 | }; | |||
524 | enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 }; | |||
525 | ||||
526 | class LoadSDNodeBitfields { | |||
527 | friend class LoadSDNode; | |||
528 | friend class MaskedLoadSDNode; | |||
529 | friend class MaskedGatherSDNode; | |||
530 | ||||
531 | uint16_t : NumLSBaseSDNodeBits; | |||
532 | ||||
533 | uint16_t ExtTy : 2; // enum ISD::LoadExtType | |||
534 | uint16_t IsExpanding : 1; | |||
535 | }; | |||
536 | ||||
537 | class StoreSDNodeBitfields { | |||
538 | friend class StoreSDNode; | |||
539 | friend class MaskedStoreSDNode; | |||
540 | friend class MaskedScatterSDNode; | |||
541 | ||||
542 | uint16_t : NumLSBaseSDNodeBits; | |||
543 | ||||
544 | uint16_t IsTruncating : 1; | |||
545 | uint16_t IsCompressing : 1; | |||
546 | }; | |||
547 | ||||
548 | union { | |||
549 | char RawSDNodeBits[sizeof(uint16_t)]; | |||
550 | SDNodeBitfields SDNodeBits; | |||
551 | ConstantSDNodeBitfields ConstantSDNodeBits; | |||
552 | MemSDNodeBitfields MemSDNodeBits; | |||
553 | LSBaseSDNodeBitfields LSBaseSDNodeBits; | |||
554 | LoadSDNodeBitfields LoadSDNodeBits; | |||
555 | StoreSDNodeBitfields StoreSDNodeBits; | |||
556 | }; | |||
557 | END_TWO_BYTE_PACK() | |||
558 | #undef BEGIN_TWO_BYTE_PACK | |||
559 | #undef END_TWO_BYTE_PACK | |||
560 | ||||
561 | // RawSDNodeBits must cover the entirety of the union. This means that all of | |||
562 | // the union's members must have size <= RawSDNodeBits. We write the RHS as | |||
563 | // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter. | |||
564 | static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide"); | |||
565 | static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide"); | |||
566 | static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide"); | |||
567 | static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide"); | |||
568 | static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide"); | |||
569 | static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide"); | |||
570 | ||||
571 | private: | |||
572 | friend class SelectionDAG; | |||
573 | // TODO: unfriend HandleSDNode once we fix its operand handling. | |||
574 | friend class HandleSDNode; | |||
575 | ||||
576 | /// Unique id per SDNode in the DAG. | |||
577 | int NodeId = -1; | |||
578 | ||||
579 | /// The values that are used by this operation. | |||
580 | SDUse *OperandList = nullptr; | |||
581 | ||||
582 | /// The types of the values this node defines. SDNode's may | |||
583 | /// define multiple values simultaneously. | |||
584 | const EVT *ValueList; | |||
585 | ||||
586 | /// List of uses for this SDNode. | |||
587 | SDUse *UseList = nullptr; | |||
588 | ||||
589 | /// The number of entries in the Operand/Value list. | |||
590 | unsigned short NumOperands = 0; | |||
591 | unsigned short NumValues; | |||
592 | ||||
593 | // The ordering of the SDNodes. It roughly corresponds to the ordering of the | |||
594 | // original LLVM instructions. | |||
595 | // This is used for turning off scheduling, because we'll forgo | |||
596 | // the normal scheduling algorithms and output the instructions according to | |||
597 | // this ordering. | |||
598 | unsigned IROrder; | |||
599 | ||||
600 | /// Source line information. | |||
601 | DebugLoc debugLoc; | |||
602 | ||||
603 | /// Return a pointer to the specified value type. | |||
604 | static const EVT *getValueTypeList(EVT VT); | |||
605 | ||||
606 | SDNodeFlags Flags; | |||
607 | ||||
608 | public: | |||
609 | /// Unique and persistent id per SDNode in the DAG. | |||
610 | /// Used for debug printing. | |||
611 | uint16_t PersistentId; | |||
612 | ||||
613 | //===--------------------------------------------------------------------===// | |||
614 | // Accessors | |||
615 | // | |||
616 | ||||
617 | /// Return the SelectionDAG opcode value for this node. For | |||
618 | /// pre-isel nodes (those for which isMachineOpcode returns false), these | |||
619 | /// are the opcode values in the ISD and <target>ISD namespaces. For | |||
620 | /// post-isel opcodes, see getMachineOpcode. | |||
621 | unsigned getOpcode() const { return (unsigned short)NodeType; } | |||
622 | ||||
623 | /// Test if this node has a target-specific opcode (in the | |||
624 | /// \<target\>ISD namespace). | |||
625 | bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; } | |||
626 | ||||
627 | /// Test if this node has a target-specific opcode that may raise | |||
628 | /// FP exceptions (in the \<target\>ISD namespace and greater than | |||
629 | /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory | |||
630 | /// opcode are currently automatically considered to possibly raise | |||
631 | /// FP exceptions as well. | |||
632 | bool isTargetStrictFPOpcode() const { | |||
633 | return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE; | |||
634 | } | |||
635 | ||||
636 | /// Test if this node has a target-specific | |||
637 | /// memory-referencing opcode (in the \<target\>ISD namespace and | |||
638 | /// greater than FIRST_TARGET_MEMORY_OPCODE). | |||
639 | bool isTargetMemoryOpcode() const { | |||
640 | return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE; | |||
641 | } | |||
642 | ||||
643 | /// Return true if the type of the node type undefined. | |||
644 | bool isUndef() const { return NodeType == ISD::UNDEF; } | |||
645 | ||||
646 | /// Test if this node is a memory intrinsic (with valid pointer information). | |||
647 | /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for | |||
648 | /// non-memory intrinsics (with chains) that are not really instances of | |||
649 | /// MemSDNode. For such nodes, we need some extra state to determine the | |||
650 | /// proper classof relationship. | |||
651 | bool isMemIntrinsic() const { | |||
652 | return (NodeType == ISD::INTRINSIC_W_CHAIN || | |||
653 | NodeType == ISD::INTRINSIC_VOID) && | |||
654 | SDNodeBits.IsMemIntrinsic; | |||
655 | } | |||
656 | ||||
657 | /// Test if this node is a strict floating point pseudo-op. | |||
658 | bool isStrictFPOpcode() { | |||
659 | switch (NodeType) { | |||
660 | default: | |||
661 | return false; | |||
662 | case ISD::STRICT_FP16_TO_FP: | |||
663 | case ISD::STRICT_FP_TO_FP16: | |||
664 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ | |||
665 | case ISD::STRICT_##DAGN: | |||
666 | #include "llvm/IR/ConstrainedOps.def" | |||
667 | return true; | |||
668 | } | |||
669 | } | |||
670 | ||||
671 | /// Test if this node has a post-isel opcode, directly | |||
672 | /// corresponding to a MachineInstr opcode. | |||
673 | bool isMachineOpcode() const { return NodeType < 0; } | |||
674 | ||||
675 | /// This may only be called if isMachineOpcode returns | |||
676 | /// true. It returns the MachineInstr opcode value that the node's opcode | |||
677 | /// corresponds to. | |||
678 | unsigned getMachineOpcode() const { | |||
679 | assert(isMachineOpcode() && "Not a MachineInstr opcode!")(static_cast <bool> (isMachineOpcode() && "Not a MachineInstr opcode!" ) ? void (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 679, __extension__ __PRETTY_FUNCTION__)); | |||
680 | return ~NodeType; | |||
681 | } | |||
682 | ||||
683 | bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; } | |||
684 | void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; } | |||
685 | ||||
686 | bool isDivergent() const { return SDNodeBits.IsDivergent; } | |||
687 | ||||
688 | /// Return true if there are no uses of this node. | |||
689 | bool use_empty() const { return UseList == nullptr; } | |||
690 | ||||
691 | /// Return true if there is exactly one use of this node. | |||
692 | bool hasOneUse() const { return hasSingleElement(uses()); } | |||
693 | ||||
694 | /// Return the number of uses of this node. This method takes | |||
695 | /// time proportional to the number of uses. | |||
696 | size_t use_size() const { return std::distance(use_begin(), use_end()); } | |||
697 | ||||
698 | /// Return the unique node id. | |||
699 | int getNodeId() const { return NodeId; } | |||
700 | ||||
701 | /// Set unique node id. | |||
702 | void setNodeId(int Id) { NodeId = Id; } | |||
703 | ||||
704 | /// Return the node ordering. | |||
705 | unsigned getIROrder() const { return IROrder; } | |||
706 | ||||
707 | /// Set the node ordering. | |||
708 | void setIROrder(unsigned Order) { IROrder = Order; } | |||
709 | ||||
710 | /// Return the source location info. | |||
711 | const DebugLoc &getDebugLoc() const { return debugLoc; } | |||
712 | ||||
713 | /// Set source location info. Try to avoid this, putting | |||
714 | /// it in the constructor is preferable. | |||
715 | void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); } | |||
716 | ||||
717 | /// This class provides iterator support for SDUse | |||
718 | /// operands that use a specific SDNode. | |||
719 | class use_iterator { | |||
720 | friend class SDNode; | |||
721 | ||||
722 | SDUse *Op = nullptr; | |||
723 | ||||
724 | explicit use_iterator(SDUse *op) : Op(op) {} | |||
725 | ||||
726 | public: | |||
727 | using iterator_category = std::forward_iterator_tag; | |||
728 | using value_type = SDUse; | |||
729 | using difference_type = std::ptrdiff_t; | |||
730 | using pointer = value_type *; | |||
731 | using reference = value_type &; | |||
732 | ||||
733 | use_iterator() = default; | |||
734 | use_iterator(const use_iterator &I) : Op(I.Op) {} | |||
735 | ||||
736 | bool operator==(const use_iterator &x) const { | |||
737 | return Op == x.Op; | |||
738 | } | |||
739 | bool operator!=(const use_iterator &x) const { | |||
740 | return !operator==(x); | |||
741 | } | |||
742 | ||||
743 | /// Return true if this iterator is at the end of uses list. | |||
744 | bool atEnd() const { return Op == nullptr; } | |||
745 | ||||
746 | // Iterator traversal: forward iteration only. | |||
747 | use_iterator &operator++() { // Preincrement | |||
748 | assert(Op && "Cannot increment end iterator!")(static_cast <bool> (Op && "Cannot increment end iterator!" ) ? void (0) : __assert_fail ("Op && \"Cannot increment end iterator!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 748, __extension__ __PRETTY_FUNCTION__)); | |||
749 | Op = Op->getNext(); | |||
750 | return *this; | |||
751 | } | |||
752 | ||||
753 | use_iterator operator++(int) { // Postincrement | |||
754 | use_iterator tmp = *this; ++*this; return tmp; | |||
755 | } | |||
756 | ||||
757 | /// Retrieve a pointer to the current user node. | |||
758 | SDNode *operator*() const { | |||
759 | assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!" ) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 759, __extension__ __PRETTY_FUNCTION__)); | |||
760 | return Op->getUser(); | |||
761 | } | |||
762 | ||||
763 | SDNode *operator->() const { return operator*(); } | |||
764 | ||||
765 | SDUse &getUse() const { return *Op; } | |||
766 | ||||
767 | /// Retrieve the operand # of this use in its user. | |||
768 | unsigned getOperandNo() const { | |||
769 | assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!" ) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 769, __extension__ __PRETTY_FUNCTION__)); | |||
770 | return (unsigned)(Op - Op->getUser()->OperandList); | |||
771 | } | |||
772 | }; | |||
773 | ||||
774 | /// Provide iteration support to walk over all uses of an SDNode. | |||
775 | use_iterator use_begin() const { | |||
776 | return use_iterator(UseList); | |||
777 | } | |||
778 | ||||
779 | static use_iterator use_end() { return use_iterator(nullptr); } | |||
780 | ||||
781 | inline iterator_range<use_iterator> uses() { | |||
782 | return make_range(use_begin(), use_end()); | |||
783 | } | |||
784 | inline iterator_range<use_iterator> uses() const { | |||
785 | return make_range(use_begin(), use_end()); | |||
786 | } | |||
787 | ||||
788 | /// Return true if there are exactly NUSES uses of the indicated value. | |||
789 | /// This method ignores uses of other values defined by this operation. | |||
790 | bool hasNUsesOfValue(unsigned NUses, unsigned Value) const; | |||
791 | ||||
792 | /// Return true if there are any use of the indicated value. | |||
793 | /// This method ignores uses of other values defined by this operation. | |||
794 | bool hasAnyUseOfValue(unsigned Value) const; | |||
795 | ||||
796 | /// Return true if this node is the only use of N. | |||
797 | bool isOnlyUserOf(const SDNode *N) const; | |||
798 | ||||
799 | /// Return true if this node is an operand of N. | |||
800 | bool isOperandOf(const SDNode *N) const; | |||
801 | ||||
802 | /// Return true if this node is a predecessor of N. | |||
803 | /// NOTE: Implemented on top of hasPredecessor and every bit as | |||
804 | /// expensive. Use carefully. | |||
805 | bool isPredecessorOf(const SDNode *N) const { | |||
806 | return N->hasPredecessor(this); | |||
807 | } | |||
808 | ||||
809 | /// Return true if N is a predecessor of this node. | |||
810 | /// N is either an operand of this node, or can be reached by recursively | |||
811 | /// traversing up the operands. | |||
812 | /// NOTE: This is an expensive method. Use it carefully. | |||
813 | bool hasPredecessor(const SDNode *N) const; | |||
814 | ||||
815 | /// Returns true if N is a predecessor of any node in Worklist. This | |||
816 | /// helper keeps Visited and Worklist sets externally to allow unions | |||
817 | /// searches to be performed in parallel, caching of results across | |||
818 | /// queries and incremental addition to Worklist. Stops early if N is | |||
819 | /// found but will resume. Remember to clear Visited and Worklists | |||
820 | /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before | |||
821 | /// giving up. The TopologicalPrune flag signals that positive NodeIds are | |||
822 | /// topologically ordered (Operands have strictly smaller node id) and search | |||
823 | /// can be pruned leveraging this. | |||
824 | static bool hasPredecessorHelper(const SDNode *N, | |||
825 | SmallPtrSetImpl<const SDNode *> &Visited, | |||
826 | SmallVectorImpl<const SDNode *> &Worklist, | |||
827 | unsigned int MaxSteps = 0, | |||
828 | bool TopologicalPrune = false) { | |||
829 | SmallVector<const SDNode *, 8> DeferredNodes; | |||
830 | if (Visited.count(N)) | |||
831 | return true; | |||
832 | ||||
833 | // Node Id's are assigned in three places: As a topological | |||
834 | // ordering (> 0), during legalization (results in values set to | |||
835 | // 0), new nodes (set to -1). If N has a topolgical id then we | |||
836 | // know that all nodes with ids smaller than it cannot be | |||
837 | // successors and we need not check them. Filter out all node | |||
838 | // that can't be matches. We add them to the worklist before exit | |||
839 | // in case of multiple calls. Note that during selection the topological id | |||
840 | // may be violated if a node's predecessor is selected before it. We mark | |||
841 | // this at selection negating the id of unselected successors and | |||
842 | // restricting topological pruning to positive ids. | |||
843 | ||||
844 | int NId = N->getNodeId(); | |||
845 | // If we Invalidated the Id, reconstruct original NId. | |||
846 | if (NId < -1) | |||
847 | NId = -(NId + 1); | |||
848 | ||||
849 | bool Found = false; | |||
850 | while (!Worklist.empty()) { | |||
851 | const SDNode *M = Worklist.pop_back_val(); | |||
852 | int MId = M->getNodeId(); | |||
853 | if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) && | |||
854 | (MId > 0) && (MId < NId)) { | |||
855 | DeferredNodes.push_back(M); | |||
856 | continue; | |||
857 | } | |||
858 | for (const SDValue &OpV : M->op_values()) { | |||
859 | SDNode *Op = OpV.getNode(); | |||
860 | if (Visited.insert(Op).second) | |||
861 | Worklist.push_back(Op); | |||
862 | if (Op == N) | |||
863 | Found = true; | |||
864 | } | |||
865 | if (Found) | |||
866 | break; | |||
867 | if (MaxSteps != 0 && Visited.size() >= MaxSteps) | |||
868 | break; | |||
869 | } | |||
870 | // Push deferred nodes back on worklist. | |||
871 | Worklist.append(DeferredNodes.begin(), DeferredNodes.end()); | |||
872 | // If we bailed early, conservatively return found. | |||
873 | if (MaxSteps != 0 && Visited.size() >= MaxSteps) | |||
874 | return true; | |||
875 | return Found; | |||
876 | } | |||
877 | ||||
878 | /// Return true if all the users of N are contained in Nodes. | |||
879 | /// NOTE: Requires at least one match, but doesn't require them all. | |||
880 | static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N); | |||
881 | ||||
882 | /// Return the number of values used by this operation. | |||
883 | unsigned getNumOperands() const { return NumOperands; } | |||
884 | ||||
885 | /// Return the maximum number of operands that a SDNode can hold. | |||
886 | static constexpr size_t getMaxNumOperands() { | |||
887 | return std::numeric_limits<decltype(SDNode::NumOperands)>::max(); | |||
888 | } | |||
889 | ||||
890 | /// Helper method returns the integer value of a ConstantSDNode operand. | |||
891 | inline uint64_t getConstantOperandVal(unsigned Num) const; | |||
892 | ||||
893 | /// Helper method returns the APInt of a ConstantSDNode operand. | |||
894 | inline const APInt &getConstantOperandAPInt(unsigned Num) const; | |||
895 | ||||
896 | const SDValue &getOperand(unsigned Num) const { | |||
897 | assert(Num < NumOperands && "Invalid child # of SDNode!")(static_cast <bool> (Num < NumOperands && "Invalid child # of SDNode!" ) ? void (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 897, __extension__ __PRETTY_FUNCTION__)); | |||
898 | return OperandList[Num]; | |||
899 | } | |||
900 | ||||
901 | using op_iterator = SDUse *; | |||
902 | ||||
903 | op_iterator op_begin() const { return OperandList; } | |||
904 | op_iterator op_end() const { return OperandList+NumOperands; } | |||
905 | ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); } | |||
906 | ||||
907 | /// Iterator for directly iterating over the operand SDValue's. | |||
908 | struct value_op_iterator | |||
909 | : iterator_adaptor_base<value_op_iterator, op_iterator, | |||
910 | std::random_access_iterator_tag, SDValue, | |||
911 | ptrdiff_t, value_op_iterator *, | |||
912 | value_op_iterator *> { | |||
913 | explicit value_op_iterator(SDUse *U = nullptr) | |||
914 | : iterator_adaptor_base(U) {} | |||
915 | ||||
916 | const SDValue &operator*() const { return I->get(); } | |||
917 | }; | |||
918 | ||||
919 | iterator_range<value_op_iterator> op_values() const { | |||
920 | return make_range(value_op_iterator(op_begin()), | |||
921 | value_op_iterator(op_end())); | |||
922 | } | |||
923 | ||||
924 | SDVTList getVTList() const { | |||
925 | SDVTList X = { ValueList, NumValues }; | |||
926 | return X; | |||
927 | } | |||
928 | ||||
929 | /// If this node has a glue operand, return the node | |||
930 | /// to which the glue operand points. Otherwise return NULL. | |||
931 | SDNode *getGluedNode() const { | |||
932 | if (getNumOperands() != 0 && | |||
933 | getOperand(getNumOperands()-1).getValueType() == MVT::Glue) | |||
934 | return getOperand(getNumOperands()-1).getNode(); | |||
935 | return nullptr; | |||
936 | } | |||
937 | ||||
938 | /// If this node has a glue value with a user, return | |||
939 | /// the user (there is at most one). Otherwise return NULL. | |||
940 | SDNode *getGluedUser() const { | |||
941 | for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI) | |||
942 | if (UI.getUse().get().getValueType() == MVT::Glue) | |||
943 | return *UI; | |||
944 | return nullptr; | |||
945 | } | |||
946 | ||||
947 | SDNodeFlags getFlags() const { return Flags; } | |||
948 | void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; } | |||
949 | ||||
950 | /// Clear any flags in this node that aren't also set in Flags. | |||
951 | /// If Flags is not in a defined state then this has no effect. | |||
952 | void intersectFlagsWith(const SDNodeFlags Flags); | |||
953 | ||||
954 | /// Return the number of values defined/returned by this operator. | |||
955 | unsigned getNumValues() const { return NumValues; } | |||
956 | ||||
957 | /// Return the type of a specified result. | |||
958 | EVT getValueType(unsigned ResNo) const { | |||
959 | assert(ResNo < NumValues && "Illegal result number!")(static_cast <bool> (ResNo < NumValues && "Illegal result number!" ) ? void (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 959, __extension__ __PRETTY_FUNCTION__)); | |||
960 | return ValueList[ResNo]; | |||
961 | } | |||
962 | ||||
963 | /// Return the type of a specified result as a simple type. | |||
964 | MVT getSimpleValueType(unsigned ResNo) const { | |||
965 | return getValueType(ResNo).getSimpleVT(); | |||
966 | } | |||
967 | ||||
968 | /// Returns MVT::getSizeInBits(getValueType(ResNo)). | |||
969 | /// | |||
970 | /// If the value type is a scalable vector type, the scalable property will | |||
971 | /// be set and the runtime size will be a positive integer multiple of the | |||
972 | /// base size. | |||
973 | TypeSize getValueSizeInBits(unsigned ResNo) const { | |||
974 | return getValueType(ResNo).getSizeInBits(); | |||
975 | } | |||
976 | ||||
977 | using value_iterator = const EVT *; | |||
978 | ||||
979 | value_iterator value_begin() const { return ValueList; } | |||
980 | value_iterator value_end() const { return ValueList+NumValues; } | |||
981 | iterator_range<value_iterator> values() const { | |||
982 | return llvm::make_range(value_begin(), value_end()); | |||
983 | } | |||
984 | ||||
985 | /// Return the opcode of this operation for printing. | |||
986 | std::string getOperationName(const SelectionDAG *G = nullptr) const; | |||
987 | static const char* getIndexedModeName(ISD::MemIndexedMode AM); | |||
988 | void print_types(raw_ostream &OS, const SelectionDAG *G) const; | |||
989 | void print_details(raw_ostream &OS, const SelectionDAG *G) const; | |||
990 | void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const; | |||
991 | void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const; | |||
992 | ||||
993 | /// Print a SelectionDAG node and all children down to | |||
994 | /// the leaves. The given SelectionDAG allows target-specific nodes | |||
995 | /// to be printed in human-readable form. Unlike printr, this will | |||
996 | /// print the whole DAG, including children that appear multiple | |||
997 | /// times. | |||
998 | /// | |||
999 | void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const; | |||
1000 | ||||
1001 | /// Print a SelectionDAG node and children up to | |||
1002 | /// depth "depth." The given SelectionDAG allows target-specific | |||
1003 | /// nodes to be printed in human-readable form. Unlike printr, this | |||
1004 | /// will print children that appear multiple times wherever they are | |||
1005 | /// used. | |||
1006 | /// | |||
1007 | void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr, | |||
1008 | unsigned depth = 100) const; | |||
1009 | ||||
1010 | /// Dump this node, for debugging. | |||
1011 | void dump() const; | |||
1012 | ||||
1013 | /// Dump (recursively) this node and its use-def subgraph. | |||
1014 | void dumpr() const; | |||
1015 | ||||
1016 | /// Dump this node, for debugging. | |||
1017 | /// The given SelectionDAG allows target-specific nodes to be printed | |||
1018 | /// in human-readable form. | |||
1019 | void dump(const SelectionDAG *G) const; | |||
1020 | ||||
1021 | /// Dump (recursively) this node and its use-def subgraph. | |||
1022 | /// The given SelectionDAG allows target-specific nodes to be printed | |||
1023 | /// in human-readable form. | |||
1024 | void dumpr(const SelectionDAG *G) const; | |||
1025 | ||||
1026 | /// printrFull to dbgs(). The given SelectionDAG allows | |||
1027 | /// target-specific nodes to be printed in human-readable form. | |||
1028 | /// Unlike dumpr, this will print the whole DAG, including children | |||
1029 | /// that appear multiple times. | |||
1030 | void dumprFull(const SelectionDAG *G = nullptr) const; | |||
1031 | ||||
1032 | /// printrWithDepth to dbgs(). The given | |||
1033 | /// SelectionDAG allows target-specific nodes to be printed in | |||
1034 | /// human-readable form. Unlike dumpr, this will print children | |||
1035 | /// that appear multiple times wherever they are used. | |||
1036 | /// | |||
1037 | void dumprWithDepth(const SelectionDAG *G = nullptr, | |||
1038 | unsigned depth = 100) const; | |||
1039 | ||||
1040 | /// Gather unique data for the node. | |||
1041 | void Profile(FoldingSetNodeID &ID) const; | |||
1042 | ||||
1043 | /// This method should only be used by the SDUse class. | |||
1044 | void addUse(SDUse &U) { U.addToList(&UseList); } | |||
1045 | ||||
1046 | protected: | |||
1047 | static SDVTList getSDVTList(EVT VT) { | |||
1048 | SDVTList Ret = { getValueTypeList(VT), 1 }; | |||
1049 | return Ret; | |||
1050 | } | |||
1051 | ||||
1052 | /// Create an SDNode. | |||
1053 | /// | |||
1054 | /// SDNodes are created without any operands, and never own the operand | |||
1055 | /// storage. To add operands, see SelectionDAG::createOperands. | |||
1056 | SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs) | |||
1057 | : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs), | |||
1058 | IROrder(Order), debugLoc(std::move(dl)) { | |||
1059 | memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits)); | |||
1060 | assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")(static_cast <bool> (debugLoc.hasTrivialDestructor() && "Expected trivial destructor") ? void (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1060, __extension__ __PRETTY_FUNCTION__)); | |||
1061 | assert(NumValues == VTs.NumVTs &&(static_cast <bool> (NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!") ? void (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1062, __extension__ __PRETTY_FUNCTION__)) | |||
1062 | "NumValues wasn't wide enough for its operands!")(static_cast <bool> (NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!") ? void (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1062, __extension__ __PRETTY_FUNCTION__)); | |||
1063 | } | |||
1064 | ||||
1065 | /// Release the operands and set this node to have zero operands. | |||
1066 | void DropOperands(); | |||
1067 | }; | |||
1068 | ||||
1069 | /// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed | |||
1070 | /// into SDNode creation functions. | |||
1071 | /// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted | |||
1072 | /// from the original Instruction, and IROrder is the ordinal position of | |||
1073 | /// the instruction. | |||
1074 | /// When an SDNode is created after the DAG is being built, both DebugLoc and | |||
1075 | /// the IROrder are propagated from the original SDNode. | |||
1076 | /// So SDLoc class provides two constructors besides the default one, one to | |||
1077 | /// be used by the DAGBuilder, the other to be used by others. | |||
1078 | class SDLoc { | |||
1079 | private: | |||
1080 | DebugLoc DL; | |||
1081 | int IROrder = 0; | |||
1082 | ||||
1083 | public: | |||
1084 | SDLoc() = default; | |||
1085 | SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {} | |||
1086 | SDLoc(const SDValue V) : SDLoc(V.getNode()) {} | |||
1087 | SDLoc(const Instruction *I, int Order) : IROrder(Order) { | |||
1088 | assert(Order >= 0 && "bad IROrder")(static_cast <bool> (Order >= 0 && "bad IROrder" ) ? void (0) : __assert_fail ("Order >= 0 && \"bad IROrder\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1088, __extension__ __PRETTY_FUNCTION__)); | |||
1089 | if (I) | |||
1090 | DL = I->getDebugLoc(); | |||
1091 | } | |||
1092 | ||||
1093 | unsigned getIROrder() const { return IROrder; } | |||
1094 | const DebugLoc &getDebugLoc() const { return DL; } | |||
1095 | }; | |||
1096 | ||||
1097 | // Define inline functions from the SDValue class. | |||
1098 | ||||
1099 | inline SDValue::SDValue(SDNode *node, unsigned resno) | |||
1100 | : Node(node), ResNo(resno) { | |||
1101 | // Explicitly check for !ResNo to avoid use-after-free, because there are | |||
1102 | // callers that use SDValue(N, 0) with a deleted N to indicate successful | |||
1103 | // combines. | |||
1104 | assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(static_cast <bool> ((!Node || !ResNo || ResNo < Node ->getNumValues()) && "Invalid result number for the given node!" ) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1105, __extension__ __PRETTY_FUNCTION__)) | |||
1105 | "Invalid result number for the given node!")(static_cast <bool> ((!Node || !ResNo || ResNo < Node ->getNumValues()) && "Invalid result number for the given node!" ) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1105, __extension__ __PRETTY_FUNCTION__)); | |||
1106 | assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")(static_cast <bool> (ResNo < -2U && "Cannot use result numbers reserved for DenseMaps." ) ? void (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1106, __extension__ __PRETTY_FUNCTION__)); | |||
1107 | } | |||
1108 | ||||
1109 | inline unsigned SDValue::getOpcode() const { | |||
1110 | return Node->getOpcode(); | |||
1111 | } | |||
1112 | ||||
1113 | inline EVT SDValue::getValueType() const { | |||
1114 | return Node->getValueType(ResNo); | |||
| ||||
1115 | } | |||
1116 | ||||
1117 | inline unsigned SDValue::getNumOperands() const { | |||
1118 | return Node->getNumOperands(); | |||
1119 | } | |||
1120 | ||||
1121 | inline const SDValue &SDValue::getOperand(unsigned i) const { | |||
1122 | return Node->getOperand(i); | |||
1123 | } | |||
1124 | ||||
1125 | inline uint64_t SDValue::getConstantOperandVal(unsigned i) const { | |||
1126 | return Node->getConstantOperandVal(i); | |||
1127 | } | |||
1128 | ||||
1129 | inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const { | |||
1130 | return Node->getConstantOperandAPInt(i); | |||
1131 | } | |||
1132 | ||||
1133 | inline bool SDValue::isTargetOpcode() const { | |||
1134 | return Node->isTargetOpcode(); | |||
1135 | } | |||
1136 | ||||
1137 | inline bool SDValue::isTargetMemoryOpcode() const { | |||
1138 | return Node->isTargetMemoryOpcode(); | |||
1139 | } | |||
1140 | ||||
1141 | inline bool SDValue::isMachineOpcode() const { | |||
1142 | return Node->isMachineOpcode(); | |||
1143 | } | |||
1144 | ||||
1145 | inline unsigned SDValue::getMachineOpcode() const { | |||
1146 | return Node->getMachineOpcode(); | |||
1147 | } | |||
1148 | ||||
1149 | inline bool SDValue::isUndef() const { | |||
1150 | return Node->isUndef(); | |||
1151 | } | |||
1152 | ||||
1153 | inline bool SDValue::use_empty() const { | |||
1154 | return !Node->hasAnyUseOfValue(ResNo); | |||
1155 | } | |||
1156 | ||||
1157 | inline bool SDValue::hasOneUse() const { | |||
1158 | return Node->hasNUsesOfValue(1, ResNo); | |||
1159 | } | |||
1160 | ||||
1161 | inline const DebugLoc &SDValue::getDebugLoc() const { | |||
1162 | return Node->getDebugLoc(); | |||
1163 | } | |||
1164 | ||||
1165 | inline void SDValue::dump() const { | |||
1166 | return Node->dump(); | |||
1167 | } | |||
1168 | ||||
1169 | inline void SDValue::dump(const SelectionDAG *G) const { | |||
1170 | return Node->dump(G); | |||
1171 | } | |||
1172 | ||||
1173 | inline void SDValue::dumpr() const { | |||
1174 | return Node->dumpr(); | |||
1175 | } | |||
1176 | ||||
1177 | inline void SDValue::dumpr(const SelectionDAG *G) const { | |||
1178 | return Node->dumpr(G); | |||
1179 | } | |||
1180 | ||||
1181 | // Define inline functions from the SDUse class. | |||
1182 | ||||
1183 | inline void SDUse::set(const SDValue &V) { | |||
1184 | if (Val.getNode()) removeFromList(); | |||
1185 | Val = V; | |||
1186 | if (V.getNode()) V.getNode()->addUse(*this); | |||
1187 | } | |||
1188 | ||||
1189 | inline void SDUse::setInitial(const SDValue &V) { | |||
1190 | Val = V; | |||
1191 | V.getNode()->addUse(*this); | |||
1192 | } | |||
1193 | ||||
1194 | inline void SDUse::setNode(SDNode *N) { | |||
1195 | if (Val.getNode()) removeFromList(); | |||
1196 | Val.setNode(N); | |||
1197 | if (N) N->addUse(*this); | |||
1198 | } | |||
1199 | ||||
1200 | /// This class is used to form a handle around another node that | |||
1201 | /// is persistent and is updated across invocations of replaceAllUsesWith on its | |||
1202 | /// operand. This node should be directly created by end-users and not added to | |||
1203 | /// the AllNodes list. | |||
1204 | class HandleSDNode : public SDNode { | |||
1205 | SDUse Op; | |||
1206 | ||||
1207 | public: | |||
1208 | explicit HandleSDNode(SDValue X) | |||
1209 | : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) { | |||
1210 | // HandleSDNodes are never inserted into the DAG, so they won't be | |||
1211 | // auto-numbered. Use ID 65535 as a sentinel. | |||
1212 | PersistentId = 0xffff; | |||
1213 | ||||
1214 | // Manually set up the operand list. This node type is special in that it's | |||
1215 | // always stack allocated and SelectionDAG does not manage its operands. | |||
1216 | // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not | |||
1217 | // be so special. | |||
1218 | Op.setUser(this); | |||
1219 | Op.setInitial(X); | |||
1220 | NumOperands = 1; | |||
1221 | OperandList = &Op; | |||
1222 | } | |||
1223 | ~HandleSDNode(); | |||
1224 | ||||
1225 | const SDValue &getValue() const { return Op; } | |||
1226 | }; | |||
1227 | ||||
1228 | class AddrSpaceCastSDNode : public SDNode { | |||
1229 | private: | |||
1230 | unsigned SrcAddrSpace; | |||
1231 | unsigned DestAddrSpace; | |||
1232 | ||||
1233 | public: | |||
1234 | AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT, | |||
1235 | unsigned SrcAS, unsigned DestAS); | |||
1236 | ||||
1237 | unsigned getSrcAddressSpace() const { return SrcAddrSpace; } | |||
1238 | unsigned getDestAddressSpace() const { return DestAddrSpace; } | |||
1239 | ||||
1240 | static bool classof(const SDNode *N) { | |||
1241 | return N->getOpcode() == ISD::ADDRSPACECAST; | |||
1242 | } | |||
1243 | }; | |||
1244 | ||||
1245 | /// This is an abstract virtual class for memory operations. | |||
1246 | class MemSDNode : public SDNode { | |||
1247 | private: | |||
1248 | // VT of in-memory value. | |||
1249 | EVT MemoryVT; | |||
1250 | ||||
1251 | protected: | |||
1252 | /// Memory reference information. | |||
1253 | MachineMemOperand *MMO; | |||
1254 | ||||
1255 | public: | |||
1256 | MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
1257 | EVT memvt, MachineMemOperand *MMO); | |||
1258 | ||||
1259 | bool readMem() const { return MMO->isLoad(); } | |||
1260 | bool writeMem() const { return MMO->isStore(); } | |||
1261 | ||||
1262 | /// Returns alignment and volatility of the memory access | |||
1263 | Align getOriginalAlign() const { return MMO->getBaseAlign(); } | |||
1264 | Align getAlign() const { return MMO->getAlign(); } | |||
1265 | // FIXME: Remove once transition to getAlign is over. | |||
1266 | unsigned getAlignment() const { return MMO->getAlign().value(); } | |||
1267 | ||||
1268 | /// Return the SubclassData value, without HasDebugValue. This contains an | |||
1269 | /// encoding of the volatile flag, as well as bits used by subclasses. This | |||
1270 | /// function should only be used to compute a FoldingSetNodeID value. | |||
1271 | /// The HasDebugValue bit is masked out because CSE map needs to match | |||
1272 | /// nodes with debug info with nodes without debug info. Same is about | |||
1273 | /// isDivergent bit. | |||
1274 | unsigned getRawSubclassData() const { | |||
1275 | uint16_t Data; | |||
1276 | union { | |||
1277 | char RawSDNodeBits[sizeof(uint16_t)]; | |||
1278 | SDNodeBitfields SDNodeBits; | |||
1279 | }; | |||
1280 | memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits)); | |||
1281 | SDNodeBits.HasDebugValue = 0; | |||
1282 | SDNodeBits.IsDivergent = false; | |||
1283 | memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits)); | |||
1284 | return Data; | |||
1285 | } | |||
1286 | ||||
1287 | bool isVolatile() const { return MemSDNodeBits.IsVolatile; } | |||
1288 | bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; } | |||
1289 | bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; } | |||
1290 | bool isInvariant() const { return MemSDNodeBits.IsInvariant; } | |||
1291 | ||||
1292 | // Returns the offset from the location of the access. | |||
1293 | int64_t getSrcValueOffset() const { return MMO->getOffset(); } | |||
1294 | ||||
1295 | /// Returns the AA info that describes the dereference. | |||
1296 | AAMDNodes getAAInfo() const { return MMO->getAAInfo(); } | |||
1297 | ||||
1298 | /// Returns the Ranges that describes the dereference. | |||
1299 | const MDNode *getRanges() const { return MMO->getRanges(); } | |||
1300 | ||||
1301 | /// Returns the synchronization scope ID for this memory operation. | |||
1302 | SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); } | |||
1303 | ||||
1304 | /// Return the atomic ordering requirements for this memory operation. For | |||
1305 | /// cmpxchg atomic operations, return the atomic ordering requirements when | |||
1306 | /// store occurs. | |||
1307 | AtomicOrdering getSuccessOrdering() const { | |||
1308 | return MMO->getSuccessOrdering(); | |||
1309 | } | |||
1310 | ||||
1311 | /// Return a single atomic ordering that is at least as strong as both the | |||
1312 | /// success and failure orderings for an atomic operation. (For operations | |||
1313 | /// other than cmpxchg, this is equivalent to getSuccessOrdering().) | |||
1314 | AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); } | |||
1315 | ||||
1316 | /// Return true if the memory operation ordering is Unordered or higher. | |||
1317 | bool isAtomic() const { return MMO->isAtomic(); } | |||
1318 | ||||
1319 | /// Returns true if the memory operation doesn't imply any ordering | |||
1320 | /// constraints on surrounding memory operations beyond the normal memory | |||
1321 | /// aliasing rules. | |||
1322 | bool isUnordered() const { return MMO->isUnordered(); } | |||
1323 | ||||
1324 | /// Returns true if the memory operation is neither atomic or volatile. | |||
1325 | bool isSimple() const { return !isAtomic() && !isVolatile(); } | |||
1326 | ||||
1327 | /// Return the type of the in-memory value. | |||
1328 | EVT getMemoryVT() const { return MemoryVT; } | |||
1329 | ||||
1330 | /// Return a MachineMemOperand object describing the memory | |||
1331 | /// reference performed by operation. | |||
1332 | MachineMemOperand *getMemOperand() const { return MMO; } | |||
1333 | ||||
1334 | const MachinePointerInfo &getPointerInfo() const { | |||
1335 | return MMO->getPointerInfo(); | |||
1336 | } | |||
1337 | ||||
1338 | /// Return the address space for the associated pointer | |||
1339 | unsigned getAddressSpace() const { | |||
1340 | return getPointerInfo().getAddrSpace(); | |||
1341 | } | |||
1342 | ||||
1343 | /// Update this MemSDNode's MachineMemOperand information | |||
1344 | /// to reflect the alignment of NewMMO, if it has a greater alignment. | |||
1345 | /// This must only be used when the new alignment applies to all users of | |||
1346 | /// this MachineMemOperand. | |||
1347 | void refineAlignment(const MachineMemOperand *NewMMO) { | |||
1348 | MMO->refineAlignment(NewMMO); | |||
1349 | } | |||
1350 | ||||
1351 | const SDValue &getChain() const { return getOperand(0); } | |||
1352 | ||||
1353 | const SDValue &getBasePtr() const { | |||
1354 | switch (getOpcode()) { | |||
1355 | case ISD::STORE: | |||
1356 | case ISD::MSTORE: | |||
1357 | return getOperand(2); | |||
1358 | case ISD::MGATHER: | |||
1359 | case ISD::MSCATTER: | |||
1360 | return getOperand(3); | |||
1361 | default: | |||
1362 | return getOperand(1); | |||
1363 | } | |||
1364 | } | |||
1365 | ||||
1366 | // Methods to support isa and dyn_cast | |||
1367 | static bool classof(const SDNode *N) { | |||
1368 | // For some targets, we lower some target intrinsics to a MemIntrinsicNode | |||
1369 | // with either an intrinsic or a target opcode. | |||
1370 | switch (N->getOpcode()) { | |||
1371 | case ISD::LOAD: | |||
1372 | case ISD::STORE: | |||
1373 | case ISD::PREFETCH: | |||
1374 | case ISD::ATOMIC_CMP_SWAP: | |||
1375 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: | |||
1376 | case ISD::ATOMIC_SWAP: | |||
1377 | case ISD::ATOMIC_LOAD_ADD: | |||
1378 | case ISD::ATOMIC_LOAD_SUB: | |||
1379 | case ISD::ATOMIC_LOAD_AND: | |||
1380 | case ISD::ATOMIC_LOAD_CLR: | |||
1381 | case ISD::ATOMIC_LOAD_OR: | |||
1382 | case ISD::ATOMIC_LOAD_XOR: | |||
1383 | case ISD::ATOMIC_LOAD_NAND: | |||
1384 | case ISD::ATOMIC_LOAD_MIN: | |||
1385 | case ISD::ATOMIC_LOAD_MAX: | |||
1386 | case ISD::ATOMIC_LOAD_UMIN: | |||
1387 | case ISD::ATOMIC_LOAD_UMAX: | |||
1388 | case ISD::ATOMIC_LOAD_FADD: | |||
1389 | case ISD::ATOMIC_LOAD_FSUB: | |||
1390 | case ISD::ATOMIC_LOAD: | |||
1391 | case ISD::ATOMIC_STORE: | |||
1392 | case ISD::MLOAD: | |||
1393 | case ISD::MSTORE: | |||
1394 | case ISD::MGATHER: | |||
1395 | case ISD::MSCATTER: | |||
1396 | return true; | |||
1397 | default: | |||
1398 | return N->isMemIntrinsic() || N->isTargetMemoryOpcode(); | |||
1399 | } | |||
1400 | } | |||
1401 | }; | |||
1402 | ||||
1403 | /// This is an SDNode representing atomic operations. | |||
1404 | class AtomicSDNode : public MemSDNode { | |||
1405 | public: | |||
1406 | AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL, | |||
1407 | EVT MemVT, MachineMemOperand *MMO) | |||
1408 | : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) { | |||
1409 | assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?" ) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1410, __extension__ __PRETTY_FUNCTION__)) | |||
1410 | MMO->isAtomic()) && "then why are we using an AtomicSDNode?")(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?" ) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1410, __extension__ __PRETTY_FUNCTION__)); | |||
1411 | } | |||
1412 | ||||
1413 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
1414 | const SDValue &getVal() const { return getOperand(2); } | |||
1415 | ||||
1416 | /// Returns true if this SDNode represents cmpxchg atomic operation, false | |||
1417 | /// otherwise. | |||
1418 | bool isCompareAndSwap() const { | |||
1419 | unsigned Op = getOpcode(); | |||
1420 | return Op == ISD::ATOMIC_CMP_SWAP || | |||
1421 | Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS; | |||
1422 | } | |||
1423 | ||||
1424 | /// For cmpxchg atomic operations, return the atomic ordering requirements | |||
1425 | /// when store does not occur. | |||
1426 | AtomicOrdering getFailureOrdering() const { | |||
1427 | assert(isCompareAndSwap() && "Must be cmpxchg operation")(static_cast <bool> (isCompareAndSwap() && "Must be cmpxchg operation" ) ? void (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1427, __extension__ __PRETTY_FUNCTION__)); | |||
1428 | return MMO->getFailureOrdering(); | |||
1429 | } | |||
1430 | ||||
1431 | // Methods to support isa and dyn_cast | |||
1432 | static bool classof(const SDNode *N) { | |||
1433 | return N->getOpcode() == ISD::ATOMIC_CMP_SWAP || | |||
1434 | N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS || | |||
1435 | N->getOpcode() == ISD::ATOMIC_SWAP || | |||
1436 | N->getOpcode() == ISD::ATOMIC_LOAD_ADD || | |||
1437 | N->getOpcode() == ISD::ATOMIC_LOAD_SUB || | |||
1438 | N->getOpcode() == ISD::ATOMIC_LOAD_AND || | |||
1439 | N->getOpcode() == ISD::ATOMIC_LOAD_CLR || | |||
1440 | N->getOpcode() == ISD::ATOMIC_LOAD_OR || | |||
1441 | N->getOpcode() == ISD::ATOMIC_LOAD_XOR || | |||
1442 | N->getOpcode() == ISD::ATOMIC_LOAD_NAND || | |||
1443 | N->getOpcode() == ISD::ATOMIC_LOAD_MIN || | |||
1444 | N->getOpcode() == ISD::ATOMIC_LOAD_MAX || | |||
1445 | N->getOpcode() == ISD::ATOMIC_LOAD_UMIN || | |||
1446 | N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || | |||
1447 | N->getOpcode() == ISD::ATOMIC_LOAD_FADD || | |||
1448 | N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || | |||
1449 | N->getOpcode() == ISD::ATOMIC_LOAD || | |||
1450 | N->getOpcode() == ISD::ATOMIC_STORE; | |||
1451 | } | |||
1452 | }; | |||
1453 | ||||
1454 | /// This SDNode is used for target intrinsics that touch | |||
1455 | /// memory and need an associated MachineMemOperand. Its opcode may be | |||
1456 | /// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode | |||
1457 | /// with a value not less than FIRST_TARGET_MEMORY_OPCODE. | |||
1458 | class MemIntrinsicSDNode : public MemSDNode { | |||
1459 | public: | |||
1460 | MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, | |||
1461 | SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO) | |||
1462 | : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) { | |||
1463 | SDNodeBits.IsMemIntrinsic = true; | |||
1464 | } | |||
1465 | ||||
1466 | // Methods to support isa and dyn_cast | |||
1467 | static bool classof(const SDNode *N) { | |||
1468 | // We lower some target intrinsics to their target opcode | |||
1469 | // early a node with a target opcode can be of this class | |||
1470 | return N->isMemIntrinsic() || | |||
1471 | N->getOpcode() == ISD::PREFETCH || | |||
1472 | N->isTargetMemoryOpcode(); | |||
1473 | } | |||
1474 | }; | |||
1475 | ||||
1476 | /// This SDNode is used to implement the code generator | |||
1477 | /// support for the llvm IR shufflevector instruction. It combines elements | |||
1478 | /// from two input vectors into a new input vector, with the selection and | |||
1479 | /// ordering of elements determined by an array of integers, referred to as | |||
1480 | /// the shuffle mask. For input vectors of width N, mask indices of 0..N-1 | |||
1481 | /// refer to elements from the LHS input, and indices from N to 2N-1 the RHS. | |||
1482 | /// An index of -1 is treated as undef, such that the code generator may put | |||
1483 | /// any value in the corresponding element of the result. | |||
1484 | class ShuffleVectorSDNode : public SDNode { | |||
1485 | // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and | |||
1486 | // is freed when the SelectionDAG object is destroyed. | |||
1487 | const int *Mask; | |||
1488 | ||||
1489 | protected: | |||
1490 | friend class SelectionDAG; | |||
1491 | ||||
1492 | ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M) | |||
1493 | : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {} | |||
1494 | ||||
1495 | public: | |||
1496 | ArrayRef<int> getMask() const { | |||
1497 | EVT VT = getValueType(0); | |||
1498 | return makeArrayRef(Mask, VT.getVectorNumElements()); | |||
1499 | } | |||
1500 | ||||
1501 | int getMaskElt(unsigned Idx) const { | |||
1502 | assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")(static_cast <bool> (Idx < getValueType(0).getVectorNumElements () && "Idx out of range!") ? void (0) : __assert_fail ("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1502, __extension__ __PRETTY_FUNCTION__)); | |||
1503 | return Mask[Idx]; | |||
1504 | } | |||
1505 | ||||
1506 | bool isSplat() const { return isSplatMask(Mask, getValueType(0)); } | |||
1507 | ||||
1508 | int getSplatIndex() const { | |||
1509 | assert(isSplat() && "Cannot get splat index for non-splat!")(static_cast <bool> (isSplat() && "Cannot get splat index for non-splat!" ) ? void (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1509, __extension__ __PRETTY_FUNCTION__)); | |||
1510 | EVT VT = getValueType(0); | |||
1511 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) | |||
1512 | if (Mask[i] >= 0) | |||
1513 | return Mask[i]; | |||
1514 | ||||
1515 | // We can choose any index value here and be correct because all elements | |||
1516 | // are undefined. Return 0 for better potential for callers to simplify. | |||
1517 | return 0; | |||
1518 | } | |||
1519 | ||||
1520 | static bool isSplatMask(const int *Mask, EVT VT); | |||
1521 | ||||
1522 | /// Change values in a shuffle permute mask assuming | |||
1523 | /// the two vector operands have swapped position. | |||
1524 | static void commuteMask(MutableArrayRef<int> Mask) { | |||
1525 | unsigned NumElems = Mask.size(); | |||
1526 | for (unsigned i = 0; i != NumElems; ++i) { | |||
1527 | int idx = Mask[i]; | |||
1528 | if (idx < 0) | |||
1529 | continue; | |||
1530 | else if (idx < (int)NumElems) | |||
1531 | Mask[i] = idx + NumElems; | |||
1532 | else | |||
1533 | Mask[i] = idx - NumElems; | |||
1534 | } | |||
1535 | } | |||
1536 | ||||
1537 | static bool classof(const SDNode *N) { | |||
1538 | return N->getOpcode() == ISD::VECTOR_SHUFFLE; | |||
1539 | } | |||
1540 | }; | |||
1541 | ||||
1542 | class ConstantSDNode : public SDNode { | |||
1543 | friend class SelectionDAG; | |||
1544 | ||||
1545 | const ConstantInt *Value; | |||
1546 | ||||
1547 | ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT) | |||
1548 | : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(), | |||
1549 | getSDVTList(VT)), | |||
1550 | Value(val) { | |||
1551 | ConstantSDNodeBits.IsOpaque = isOpaque; | |||
1552 | } | |||
1553 | ||||
1554 | public: | |||
1555 | const ConstantInt *getConstantIntValue() const { return Value; } | |||
1556 | const APInt &getAPIntValue() const { return Value->getValue(); } | |||
1557 | uint64_t getZExtValue() const { return Value->getZExtValue(); } | |||
1558 | int64_t getSExtValue() const { return Value->getSExtValue(); } | |||
1559 | uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) { | |||
1560 | return Value->getLimitedValue(Limit); | |||
1561 | } | |||
1562 | MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); } | |||
1563 | Align getAlignValue() const { return Value->getAlignValue(); } | |||
1564 | ||||
1565 | bool isOne() const { return Value->isOne(); } | |||
1566 | bool isNullValue() const { return Value->isZero(); } | |||
1567 | bool isAllOnesValue() const { return Value->isMinusOne(); } | |||
1568 | bool isMaxSignedValue() const { return Value->isMaxValue(true); } | |||
1569 | bool isMinSignedValue() const { return Value->isMinValue(true); } | |||
1570 | ||||
1571 | bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; } | |||
1572 | ||||
1573 | static bool classof(const SDNode *N) { | |||
1574 | return N->getOpcode() == ISD::Constant || | |||
1575 | N->getOpcode() == ISD::TargetConstant; | |||
1576 | } | |||
1577 | }; | |||
1578 | ||||
1579 | uint64_t SDNode::getConstantOperandVal(unsigned Num) const { | |||
1580 | return cast<ConstantSDNode>(getOperand(Num))->getZExtValue(); | |||
1581 | } | |||
1582 | ||||
1583 | const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const { | |||
1584 | return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue(); | |||
1585 | } | |||
1586 | ||||
1587 | class ConstantFPSDNode : public SDNode { | |||
1588 | friend class SelectionDAG; | |||
1589 | ||||
1590 | const ConstantFP *Value; | |||
1591 | ||||
1592 | ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT) | |||
1593 | : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, | |||
1594 | DebugLoc(), getSDVTList(VT)), | |||
1595 | Value(val) {} | |||
1596 | ||||
1597 | public: | |||
1598 | const APFloat& getValueAPF() const { return Value->getValueAPF(); } | |||
1599 | const ConstantFP *getConstantFPValue() const { return Value; } | |||
1600 | ||||
1601 | /// Return true if the value is positive or negative zero. | |||
1602 | bool isZero() const { return Value->isZero(); } | |||
1603 | ||||
1604 | /// Return true if the value is a NaN. | |||
1605 | bool isNaN() const { return Value->isNaN(); } | |||
1606 | ||||
1607 | /// Return true if the value is an infinity | |||
1608 | bool isInfinity() const { return Value->isInfinity(); } | |||
1609 | ||||
1610 | /// Return true if the value is negative. | |||
1611 | bool isNegative() const { return Value->isNegative(); } | |||
1612 | ||||
1613 | /// We don't rely on operator== working on double values, as | |||
1614 | /// it returns true for things that are clearly not equal, like -0.0 and 0.0. | |||
1615 | /// As such, this method can be used to do an exact bit-for-bit comparison of | |||
1616 | /// two floating point values. | |||
1617 | ||||
1618 | /// We leave the version with the double argument here because it's just so | |||
1619 | /// convenient to write "2.0" and the like. Without this function we'd | |||
1620 | /// have to duplicate its logic everywhere it's called. | |||
1621 | bool isExactlyValue(double V) const { | |||
1622 | return Value->getValueAPF().isExactlyValue(V); | |||
1623 | } | |||
1624 | bool isExactlyValue(const APFloat& V) const; | |||
1625 | ||||
1626 | static bool isValueValidForType(EVT VT, const APFloat& Val); | |||
1627 | ||||
1628 | static bool classof(const SDNode *N) { | |||
1629 | return N->getOpcode() == ISD::ConstantFP || | |||
1630 | N->getOpcode() == ISD::TargetConstantFP; | |||
1631 | } | |||
1632 | }; | |||
1633 | ||||
1634 | /// Returns true if \p V is a constant integer zero. | |||
1635 | bool isNullConstant(SDValue V); | |||
1636 | ||||
1637 | /// Returns true if \p V is an FP constant with a value of positive zero. | |||
1638 | bool isNullFPConstant(SDValue V); | |||
1639 | ||||
1640 | /// Returns true if \p V is an integer constant with all bits set. | |||
1641 | bool isAllOnesConstant(SDValue V); | |||
1642 | ||||
1643 | /// Returns true if \p V is a constant integer one. | |||
1644 | bool isOneConstant(SDValue V); | |||
1645 | ||||
1646 | /// Return the non-bitcasted source operand of \p V if it exists. | |||
1647 | /// If \p V is not a bitcasted value, it is returned as-is. | |||
1648 | SDValue peekThroughBitcasts(SDValue V); | |||
1649 | ||||
1650 | /// Return the non-bitcasted and one-use source operand of \p V if it exists. | |||
1651 | /// If \p V is not a bitcasted one-use value, it is returned as-is. | |||
1652 | SDValue peekThroughOneUseBitcasts(SDValue V); | |||
1653 | ||||
1654 | /// Return the non-extracted vector source operand of \p V if it exists. | |||
1655 | /// If \p V is not an extracted subvector, it is returned as-is. | |||
1656 | SDValue peekThroughExtractSubvectors(SDValue V); | |||
1657 | ||||
1658 | /// Returns true if \p V is a bitwise not operation. Assumes that an all ones | |||
1659 | /// constant is canonicalized to be operand 1. | |||
1660 | bool isBitwiseNot(SDValue V, bool AllowUndefs = false); | |||
1661 | ||||
1662 | /// Returns the SDNode if it is a constant splat BuildVector or constant int. | |||
1663 | ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false, | |||
1664 | bool AllowTruncation = false); | |||
1665 | ||||
1666 | /// Returns the SDNode if it is a demanded constant splat BuildVector or | |||
1667 | /// constant int. | |||
1668 | ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts, | |||
1669 | bool AllowUndefs = false, | |||
1670 | bool AllowTruncation = false); | |||
1671 | ||||
1672 | /// Returns the SDNode if it is a constant splat BuildVector or constant float. | |||
1673 | ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false); | |||
1674 | ||||
1675 | /// Returns the SDNode if it is a demanded constant splat BuildVector or | |||
1676 | /// constant float. | |||
1677 | ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts, | |||
1678 | bool AllowUndefs = false); | |||
1679 | ||||
1680 | /// Return true if the value is a constant 0 integer or a splatted vector of | |||
1681 | /// a constant 0 integer (with no undefs by default). | |||
1682 | /// Build vector implicit truncation is not an issue for null values. | |||
1683 | bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false); | |||
1684 | ||||
1685 | /// Return true if the value is a constant 1 integer or a splatted vector of a | |||
1686 | /// constant 1 integer (with no undefs). | |||
1687 | /// Does not permit build vector implicit truncation. | |||
1688 | bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false); | |||
1689 | ||||
1690 | /// Return true if the value is a constant -1 integer or a splatted vector of a | |||
1691 | /// constant -1 integer (with no undefs). | |||
1692 | /// Does not permit build vector implicit truncation. | |||
1693 | bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false); | |||
1694 | ||||
1695 | /// Return true if \p V is either a integer or FP constant. | |||
1696 | inline bool isIntOrFPConstant(SDValue V) { | |||
1697 | return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V); | |||
1698 | } | |||
1699 | ||||
1700 | class GlobalAddressSDNode : public SDNode { | |||
1701 | friend class SelectionDAG; | |||
1702 | ||||
1703 | const GlobalValue *TheGlobal; | |||
1704 | int64_t Offset; | |||
1705 | unsigned TargetFlags; | |||
1706 | ||||
1707 | GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, | |||
1708 | const GlobalValue *GA, EVT VT, int64_t o, | |||
1709 | unsigned TF); | |||
1710 | ||||
1711 | public: | |||
1712 | const GlobalValue *getGlobal() const { return TheGlobal; } | |||
1713 | int64_t getOffset() const { return Offset; } | |||
1714 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1715 | // Return the address space this GlobalAddress belongs to. | |||
1716 | unsigned getAddressSpace() const; | |||
1717 | ||||
1718 | static bool classof(const SDNode *N) { | |||
1719 | return N->getOpcode() == ISD::GlobalAddress || | |||
1720 | N->getOpcode() == ISD::TargetGlobalAddress || | |||
1721 | N->getOpcode() == ISD::GlobalTLSAddress || | |||
1722 | N->getOpcode() == ISD::TargetGlobalTLSAddress; | |||
1723 | } | |||
1724 | }; | |||
1725 | ||||
1726 | class FrameIndexSDNode : public SDNode { | |||
1727 | friend class SelectionDAG; | |||
1728 | ||||
1729 | int FI; | |||
1730 | ||||
1731 | FrameIndexSDNode(int fi, EVT VT, bool isTarg) | |||
1732 | : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex, | |||
1733 | 0, DebugLoc(), getSDVTList(VT)), FI(fi) { | |||
1734 | } | |||
1735 | ||||
1736 | public: | |||
1737 | int getIndex() const { return FI; } | |||
1738 | ||||
1739 | static bool classof(const SDNode *N) { | |||
1740 | return N->getOpcode() == ISD::FrameIndex || | |||
1741 | N->getOpcode() == ISD::TargetFrameIndex; | |||
1742 | } | |||
1743 | }; | |||
1744 | ||||
1745 | /// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate | |||
1746 | /// the offet and size that are started/ended in the underlying FrameIndex. | |||
1747 | class LifetimeSDNode : public SDNode { | |||
1748 | friend class SelectionDAG; | |||
1749 | int64_t Size; | |||
1750 | int64_t Offset; // -1 if offset is unknown. | |||
1751 | ||||
1752 | LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, | |||
1753 | SDVTList VTs, int64_t Size, int64_t Offset) | |||
1754 | : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {} | |||
1755 | public: | |||
1756 | int64_t getFrameIndex() const { | |||
1757 | return cast<FrameIndexSDNode>(getOperand(1))->getIndex(); | |||
1758 | } | |||
1759 | ||||
1760 | bool hasOffset() const { return Offset >= 0; } | |||
1761 | int64_t getOffset() const { | |||
1762 | assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown" ) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1762, __extension__ __PRETTY_FUNCTION__)); | |||
1763 | return Offset; | |||
1764 | } | |||
1765 | int64_t getSize() const { | |||
1766 | assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown" ) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1766, __extension__ __PRETTY_FUNCTION__)); | |||
1767 | return Size; | |||
1768 | } | |||
1769 | ||||
1770 | // Methods to support isa and dyn_cast | |||
1771 | static bool classof(const SDNode *N) { | |||
1772 | return N->getOpcode() == ISD::LIFETIME_START || | |||
1773 | N->getOpcode() == ISD::LIFETIME_END; | |||
1774 | } | |||
1775 | }; | |||
1776 | ||||
1777 | /// This SDNode is used for PSEUDO_PROBE values, which are the function guid and | |||
1778 | /// the index of the basic block being probed. A pseudo probe serves as a place | |||
1779 | /// holder and will be removed at the end of compilation. It does not have any | |||
1780 | /// operand because we do not want the instruction selection to deal with any. | |||
1781 | class PseudoProbeSDNode : public SDNode { | |||
1782 | friend class SelectionDAG; | |||
1783 | uint64_t Guid; | |||
1784 | uint64_t Index; | |||
1785 | uint32_t Attributes; | |||
1786 | ||||
1787 | PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl, | |||
1788 | SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr) | |||
1789 | : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index), | |||
1790 | Attributes(Attr) {} | |||
1791 | ||||
1792 | public: | |||
1793 | uint64_t getGuid() const { return Guid; } | |||
1794 | uint64_t getIndex() const { return Index; } | |||
1795 | uint32_t getAttributes() const { return Attributes; } | |||
1796 | ||||
1797 | // Methods to support isa and dyn_cast | |||
1798 | static bool classof(const SDNode *N) { | |||
1799 | return N->getOpcode() == ISD::PSEUDO_PROBE; | |||
1800 | } | |||
1801 | }; | |||
1802 | ||||
1803 | class JumpTableSDNode : public SDNode { | |||
1804 | friend class SelectionDAG; | |||
1805 | ||||
1806 | int JTI; | |||
1807 | unsigned TargetFlags; | |||
1808 | ||||
1809 | JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF) | |||
1810 | : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable, | |||
1811 | 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) { | |||
1812 | } | |||
1813 | ||||
1814 | public: | |||
1815 | int getIndex() const { return JTI; } | |||
1816 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1817 | ||||
1818 | static bool classof(const SDNode *N) { | |||
1819 | return N->getOpcode() == ISD::JumpTable || | |||
1820 | N->getOpcode() == ISD::TargetJumpTable; | |||
1821 | } | |||
1822 | }; | |||
1823 | ||||
1824 | class ConstantPoolSDNode : public SDNode { | |||
1825 | friend class SelectionDAG; | |||
1826 | ||||
1827 | union { | |||
1828 | const Constant *ConstVal; | |||
1829 | MachineConstantPoolValue *MachineCPVal; | |||
1830 | } Val; | |||
1831 | int Offset; // It's a MachineConstantPoolValue if top bit is set. | |||
1832 | Align Alignment; // Minimum alignment requirement of CP. | |||
1833 | unsigned TargetFlags; | |||
1834 | ||||
1835 | ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o, | |||
1836 | Align Alignment, unsigned TF) | |||
1837 | : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, | |||
1838 | DebugLoc(), getSDVTList(VT)), | |||
1839 | Offset(o), Alignment(Alignment), TargetFlags(TF) { | |||
1840 | assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large" ) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1840, __extension__ __PRETTY_FUNCTION__)); | |||
1841 | Val.ConstVal = c; | |||
1842 | } | |||
1843 | ||||
1844 | ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o, | |||
1845 | Align Alignment, unsigned TF) | |||
1846 | : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, | |||
1847 | DebugLoc(), getSDVTList(VT)), | |||
1848 | Offset(o), Alignment(Alignment), TargetFlags(TF) { | |||
1849 | assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large" ) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1849, __extension__ __PRETTY_FUNCTION__)); | |||
1850 | Val.MachineCPVal = v; | |||
1851 | Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1); | |||
1852 | } | |||
1853 | ||||
1854 | public: | |||
1855 | bool isMachineConstantPoolEntry() const { | |||
1856 | return Offset < 0; | |||
1857 | } | |||
1858 | ||||
1859 | const Constant *getConstVal() const { | |||
1860 | assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (!isMachineConstantPoolEntry() && "Wrong constantpool type") ? void (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1860, __extension__ __PRETTY_FUNCTION__)); | |||
1861 | return Val.ConstVal; | |||
1862 | } | |||
1863 | ||||
1864 | MachineConstantPoolValue *getMachineCPVal() const { | |||
1865 | assert(isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (isMachineConstantPoolEntry() && "Wrong constantpool type") ? void (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1865, __extension__ __PRETTY_FUNCTION__)); | |||
1866 | return Val.MachineCPVal; | |||
1867 | } | |||
1868 | ||||
1869 | int getOffset() const { | |||
1870 | return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1)); | |||
1871 | } | |||
1872 | ||||
1873 | // Return the alignment of this constant pool object, which is either 0 (for | |||
1874 | // default alignment) or the desired value. | |||
1875 | Align getAlign() const { return Alignment; } | |||
1876 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1877 | ||||
1878 | Type *getType() const; | |||
1879 | ||||
1880 | static bool classof(const SDNode *N) { | |||
1881 | return N->getOpcode() == ISD::ConstantPool || | |||
1882 | N->getOpcode() == ISD::TargetConstantPool; | |||
1883 | } | |||
1884 | }; | |||
1885 | ||||
1886 | /// Completely target-dependent object reference. | |||
1887 | class TargetIndexSDNode : public SDNode { | |||
1888 | friend class SelectionDAG; | |||
1889 | ||||
1890 | unsigned TargetFlags; | |||
1891 | int Index; | |||
1892 | int64_t Offset; | |||
1893 | ||||
1894 | public: | |||
1895 | TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF) | |||
1896 | : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), | |||
1897 | TargetFlags(TF), Index(Idx), Offset(Ofs) {} | |||
1898 | ||||
1899 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1900 | int getIndex() const { return Index; } | |||
1901 | int64_t getOffset() const { return Offset; } | |||
1902 | ||||
1903 | static bool classof(const SDNode *N) { | |||
1904 | return N->getOpcode() == ISD::TargetIndex; | |||
1905 | } | |||
1906 | }; | |||
1907 | ||||
1908 | class BasicBlockSDNode : public SDNode { | |||
1909 | friend class SelectionDAG; | |||
1910 | ||||
1911 | MachineBasicBlock *MBB; | |||
1912 | ||||
1913 | /// Debug info is meaningful and potentially useful here, but we create | |||
1914 | /// blocks out of order when they're jumped to, which makes it a bit | |||
1915 | /// harder. Let's see if we need it first. | |||
1916 | explicit BasicBlockSDNode(MachineBasicBlock *mbb) | |||
1917 | : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb) | |||
1918 | {} | |||
1919 | ||||
1920 | public: | |||
1921 | MachineBasicBlock *getBasicBlock() const { return MBB; } | |||
1922 | ||||
1923 | static bool classof(const SDNode *N) { | |||
1924 | return N->getOpcode() == ISD::BasicBlock; | |||
1925 | } | |||
1926 | }; | |||
1927 | ||||
1928 | /// A "pseudo-class" with methods for operating on BUILD_VECTORs. | |||
1929 | class BuildVectorSDNode : public SDNode { | |||
1930 | public: | |||
1931 | // These are constructed as SDNodes and then cast to BuildVectorSDNodes. | |||
1932 | explicit BuildVectorSDNode() = delete; | |||
1933 | ||||
1934 | /// Check if this is a constant splat, and if so, find the | |||
1935 | /// smallest element size that splats the vector. If MinSplatBits is | |||
1936 | /// nonzero, the element size must be at least that large. Note that the | |||
1937 | /// splat element may be the entire vector (i.e., a one element vector). | |||
1938 | /// Returns the splat element value in SplatValue. Any undefined bits in | |||
1939 | /// that value are zero, and the corresponding bits in the SplatUndef mask | |||
1940 | /// are set. The SplatBitSize value is set to the splat element size in | |||
1941 | /// bits. HasAnyUndefs is set to true if any bits in the vector are | |||
1942 | /// undefined. isBigEndian describes the endianness of the target. | |||
1943 | bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, | |||
1944 | unsigned &SplatBitSize, bool &HasAnyUndefs, | |||
1945 | unsigned MinSplatBits = 0, | |||
1946 | bool isBigEndian = false) const; | |||
1947 | ||||
1948 | /// Returns the demanded splatted value or a null value if this is not a | |||
1949 | /// splat. | |||
1950 | /// | |||
1951 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
1952 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1953 | /// the vector width and set the bits where elements are undef. | |||
1954 | SDValue getSplatValue(const APInt &DemandedElts, | |||
1955 | BitVector *UndefElements = nullptr) const; | |||
1956 | ||||
1957 | /// Returns the splatted value or a null value if this is not a splat. | |||
1958 | /// | |||
1959 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1960 | /// the vector width and set the bits where elements are undef. | |||
1961 | SDValue getSplatValue(BitVector *UndefElements = nullptr) const; | |||
1962 | ||||
1963 | /// Find the shortest repeating sequence of values in the build vector. | |||
1964 | /// | |||
1965 | /// e.g. { u, X, u, X, u, u, X, u } -> { X } | |||
1966 | /// { X, Y, u, Y, u, u, X, u } -> { X, Y } | |||
1967 | /// | |||
1968 | /// Currently this must be a power-of-2 build vector. | |||
1969 | /// The DemandedElts mask indicates the elements that must be present, | |||
1970 | /// undemanded elements in Sequence may be null (SDValue()). If passed a | |||
1971 | /// non-null UndefElements bitvector, it will resize it to match the original | |||
1972 | /// vector width and set the bits where elements are undef. If result is | |||
1973 | /// false, Sequence will be empty. | |||
1974 | bool getRepeatedSequence(const APInt &DemandedElts, | |||
1975 | SmallVectorImpl<SDValue> &Sequence, | |||
1976 | BitVector *UndefElements = nullptr) const; | |||
1977 | ||||
1978 | /// Find the shortest repeating sequence of values in the build vector. | |||
1979 | /// | |||
1980 | /// e.g. { u, X, u, X, u, u, X, u } -> { X } | |||
1981 | /// { X, Y, u, Y, u, u, X, u } -> { X, Y } | |||
1982 | /// | |||
1983 | /// Currently this must be a power-of-2 build vector. | |||
1984 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1985 | /// the original vector width and set the bits where elements are undef. | |||
1986 | /// If result is false, Sequence will be empty. | |||
1987 | bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, | |||
1988 | BitVector *UndefElements = nullptr) const; | |||
1989 | ||||
1990 | /// Returns the demanded splatted constant or null if this is not a constant | |||
1991 | /// splat. | |||
1992 | /// | |||
1993 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
1994 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1995 | /// the vector width and set the bits where elements are undef. | |||
1996 | ConstantSDNode * | |||
1997 | getConstantSplatNode(const APInt &DemandedElts, | |||
1998 | BitVector *UndefElements = nullptr) const; | |||
1999 | ||||
2000 | /// Returns the splatted constant or null if this is not a constant | |||
2001 | /// splat. | |||
2002 | /// | |||
2003 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
2004 | /// the vector width and set the bits where elements are undef. | |||
2005 | ConstantSDNode * | |||
2006 | getConstantSplatNode(BitVector *UndefElements = nullptr) const; | |||
2007 | ||||
2008 | /// Returns the demanded splatted constant FP or null if this is not a | |||
2009 | /// constant FP splat. | |||
2010 | /// | |||
2011 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
2012 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
2013 | /// the vector width and set the bits where elements are undef. | |||
2014 | ConstantFPSDNode * | |||
2015 | getConstantFPSplatNode(const APInt &DemandedElts, | |||
2016 | BitVector *UndefElements = nullptr) const; | |||
2017 | ||||
2018 | /// Returns the splatted constant FP or null if this is not a constant | |||
2019 | /// FP splat. | |||
2020 | /// | |||
2021 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
2022 | /// the vector width and set the bits where elements are undef. | |||
2023 | ConstantFPSDNode * | |||
2024 | getConstantFPSplatNode(BitVector *UndefElements = nullptr) const; | |||
2025 | ||||
2026 | /// If this is a constant FP splat and the splatted constant FP is an | |||
2027 | /// exact power or 2, return the log base 2 integer value. Otherwise, | |||
2028 | /// return -1. | |||
2029 | /// | |||
2030 | /// The BitWidth specifies the necessary bit precision. | |||
2031 | int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, | |||
2032 | uint32_t BitWidth) const; | |||
2033 | ||||
2034 | bool isConstant() const; | |||
2035 | ||||
2036 | static bool classof(const SDNode *N) { | |||
2037 | return N->getOpcode() == ISD::BUILD_VECTOR; | |||
2038 | } | |||
2039 | }; | |||
2040 | ||||
2041 | /// An SDNode that holds an arbitrary LLVM IR Value. This is | |||
2042 | /// used when the SelectionDAG needs to make a simple reference to something | |||
2043 | /// in the LLVM IR representation. | |||
2044 | /// | |||
2045 | class SrcValueSDNode : public SDNode { | |||
2046 | friend class SelectionDAG; | |||
2047 | ||||
2048 | const Value *V; | |||
2049 | ||||
2050 | /// Create a SrcValue for a general value. | |||
2051 | explicit SrcValueSDNode(const Value *v) | |||
2052 | : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {} | |||
2053 | ||||
2054 | public: | |||
2055 | /// Return the contained Value. | |||
2056 | const Value *getValue() const { return V; } | |||
2057 | ||||
2058 | static bool classof(const SDNode *N) { | |||
2059 | return N->getOpcode() == ISD::SRCVALUE; | |||
2060 | } | |||
2061 | }; | |||
2062 | ||||
2063 | class MDNodeSDNode : public SDNode { | |||
2064 | friend class SelectionDAG; | |||
2065 | ||||
2066 | const MDNode *MD; | |||
2067 | ||||
2068 | explicit MDNodeSDNode(const MDNode *md) | |||
2069 | : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md) | |||
2070 | {} | |||
2071 | ||||
2072 | public: | |||
2073 | const MDNode *getMD() const { return MD; } | |||
2074 | ||||
2075 | static bool classof(const SDNode *N) { | |||
2076 | return N->getOpcode() == ISD::MDNODE_SDNODE; | |||
2077 | } | |||
2078 | }; | |||
2079 | ||||
2080 | class RegisterSDNode : public SDNode { | |||
2081 | friend class SelectionDAG; | |||
2082 | ||||
2083 | Register Reg; | |||
2084 | ||||
2085 | RegisterSDNode(Register reg, EVT VT) | |||
2086 | : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {} | |||
2087 | ||||
2088 | public: | |||
2089 | Register getReg() const { return Reg; } | |||
2090 | ||||
2091 | static bool classof(const SDNode *N) { | |||
2092 | return N->getOpcode() == ISD::Register; | |||
2093 | } | |||
2094 | }; | |||
2095 | ||||
2096 | class RegisterMaskSDNode : public SDNode { | |||
2097 | friend class SelectionDAG; | |||
2098 | ||||
2099 | // The memory for RegMask is not owned by the node. | |||
2100 | const uint32_t *RegMask; | |||
2101 | ||||
2102 | RegisterMaskSDNode(const uint32_t *mask) | |||
2103 | : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)), | |||
2104 | RegMask(mask) {} | |||
2105 | ||||
2106 | public: | |||
2107 | const uint32_t *getRegMask() const { return RegMask; } | |||
2108 | ||||
2109 | static bool classof(const SDNode *N) { | |||
2110 | return N->getOpcode() == ISD::RegisterMask; | |||
2111 | } | |||
2112 | }; | |||
2113 | ||||
2114 | class BlockAddressSDNode : public SDNode { | |||
2115 | friend class SelectionDAG; | |||
2116 | ||||
2117 | const BlockAddress *BA; | |||
2118 | int64_t Offset; | |||
2119 | unsigned TargetFlags; | |||
2120 | ||||
2121 | BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, | |||
2122 | int64_t o, unsigned Flags) | |||
2123 | : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), | |||
2124 | BA(ba), Offset(o), TargetFlags(Flags) {} | |||
2125 | ||||
2126 | public: | |||
2127 | const BlockAddress *getBlockAddress() const { return BA; } | |||
2128 | int64_t getOffset() const { return Offset; } | |||
2129 | unsigned getTargetFlags() const { return TargetFlags; } | |||
2130 | ||||
2131 | static bool classof(const SDNode *N) { | |||
2132 | return N->getOpcode() == ISD::BlockAddress || | |||
2133 | N->getOpcode() == ISD::TargetBlockAddress; | |||
2134 | } | |||
2135 | }; | |||
2136 | ||||
2137 | class LabelSDNode : public SDNode { | |||
2138 | friend class SelectionDAG; | |||
2139 | ||||
2140 | MCSymbol *Label; | |||
2141 | ||||
2142 | LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L) | |||
2143 | : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) { | |||
2144 | assert(LabelSDNode::classof(this) && "not a label opcode")(static_cast <bool> (LabelSDNode::classof(this) && "not a label opcode") ? void (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2144, __extension__ __PRETTY_FUNCTION__)); | |||
2145 | } | |||
2146 | ||||
2147 | public: | |||
2148 | MCSymbol *getLabel() const { return Label; } | |||
2149 | ||||
2150 | static bool classof(const SDNode *N) { | |||
2151 | return N->getOpcode() == ISD::EH_LABEL || | |||
2152 | N->getOpcode() == ISD::ANNOTATION_LABEL; | |||
2153 | } | |||
2154 | }; | |||
2155 | ||||
2156 | class ExternalSymbolSDNode : public SDNode { | |||
2157 | friend class SelectionDAG; | |||
2158 | ||||
2159 | const char *Symbol; | |||
2160 | unsigned TargetFlags; | |||
2161 | ||||
2162 | ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT) | |||
2163 | : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0, | |||
2164 | DebugLoc(), getSDVTList(VT)), | |||
2165 | Symbol(Sym), TargetFlags(TF) {} | |||
2166 | ||||
2167 | public: | |||
2168 | const char *getSymbol() const { return Symbol; } | |||
2169 | unsigned getTargetFlags() const { return TargetFlags; } | |||
2170 | ||||
2171 | static bool classof(const SDNode *N) { | |||
2172 | return N->getOpcode() == ISD::ExternalSymbol || | |||
2173 | N->getOpcode() == ISD::TargetExternalSymbol; | |||
2174 | } | |||
2175 | }; | |||
2176 | ||||
2177 | class MCSymbolSDNode : public SDNode { | |||
2178 | friend class SelectionDAG; | |||
2179 | ||||
2180 | MCSymbol *Symbol; | |||
2181 | ||||
2182 | MCSymbolSDNode(MCSymbol *Symbol, EVT VT) | |||
2183 | : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {} | |||
2184 | ||||
2185 | public: | |||
2186 | MCSymbol *getMCSymbol() const { return Symbol; } | |||
2187 | ||||
2188 | static bool classof(const SDNode *N) { | |||
2189 | return N->getOpcode() == ISD::MCSymbol; | |||
2190 | } | |||
2191 | }; | |||
2192 | ||||
2193 | class CondCodeSDNode : public SDNode { | |||
2194 | friend class SelectionDAG; | |||
2195 | ||||
2196 | ISD::CondCode Condition; | |||
2197 | ||||
2198 | explicit CondCodeSDNode(ISD::CondCode Cond) | |||
2199 | : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)), | |||
2200 | Condition(Cond) {} | |||
2201 | ||||
2202 | public: | |||
2203 | ISD::CondCode get() const { return Condition; } | |||
2204 | ||||
2205 | static bool classof(const SDNode *N) { | |||
2206 | return N->getOpcode() == ISD::CONDCODE; | |||
2207 | } | |||
2208 | }; | |||
2209 | ||||
2210 | /// This class is used to represent EVT's, which are used | |||
2211 | /// to parameterize some operations. | |||
2212 | class VTSDNode : public SDNode { | |||
2213 | friend class SelectionDAG; | |||
2214 | ||||
2215 | EVT ValueType; | |||
2216 | ||||
2217 | explicit VTSDNode(EVT VT) | |||
2218 | : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)), | |||
2219 | ValueType(VT) {} | |||
2220 | ||||
2221 | public: | |||
2222 | EVT getVT() const { return ValueType; } | |||
2223 | ||||
2224 | static bool classof(const SDNode *N) { | |||
2225 | return N->getOpcode() == ISD::VALUETYPE; | |||
2226 | } | |||
2227 | }; | |||
2228 | ||||
2229 | /// Base class for LoadSDNode and StoreSDNode | |||
2230 | class LSBaseSDNode : public MemSDNode { | |||
2231 | public: | |||
2232 | LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, | |||
2233 | SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT, | |||
2234 | MachineMemOperand *MMO) | |||
2235 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2236 | LSBaseSDNodeBits.AddressingMode = AM; | |||
2237 | assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM && "Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2237, __extension__ __PRETTY_FUNCTION__)); | |||
2238 | } | |||
2239 | ||||
2240 | const SDValue &getOffset() const { | |||
2241 | return getOperand(getOpcode() == ISD::LOAD ? 2 : 3); | |||
2242 | } | |||
2243 | ||||
2244 | /// Return the addressing mode for this load or store: | |||
2245 | /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. | |||
2246 | ISD::MemIndexedMode getAddressingMode() const { | |||
2247 | return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); | |||
2248 | } | |||
2249 | ||||
2250 | /// Return true if this is a pre/post inc/dec load/store. | |||
2251 | bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } | |||
2252 | ||||
2253 | /// Return true if this is NOT a pre/post inc/dec load/store. | |||
2254 | bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } | |||
2255 | ||||
2256 | static bool classof(const SDNode *N) { | |||
2257 | return N->getOpcode() == ISD::LOAD || | |||
2258 | N->getOpcode() == ISD::STORE; | |||
2259 | } | |||
2260 | }; | |||
2261 | ||||
2262 | /// This class is used to represent ISD::LOAD nodes. | |||
2263 | class LoadSDNode : public LSBaseSDNode { | |||
2264 | friend class SelectionDAG; | |||
2265 | ||||
2266 | LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2267 | ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT, | |||
2268 | MachineMemOperand *MMO) | |||
2269 | : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) { | |||
2270 | LoadSDNodeBits.ExtTy = ETy; | |||
2271 | assert(readMem() && "Load MachineMemOperand is not a load!")(static_cast <bool> (readMem() && "Load MachineMemOperand is not a load!" ) ? void (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2271, __extension__ __PRETTY_FUNCTION__)); | |||
2272 | assert(!writeMem() && "Load MachineMemOperand is a store!")(static_cast <bool> (!writeMem() && "Load MachineMemOperand is a store!" ) ? void (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2272, __extension__ __PRETTY_FUNCTION__)); | |||
2273 | } | |||
2274 | ||||
2275 | public: | |||
2276 | /// Return whether this is a plain node, | |||
2277 | /// or one of the varieties of value-extending loads. | |||
2278 | ISD::LoadExtType getExtensionType() const { | |||
2279 | return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); | |||
2280 | } | |||
2281 | ||||
2282 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
2283 | const SDValue &getOffset() const { return getOperand(2); } | |||
2284 | ||||
2285 | static bool classof(const SDNode *N) { | |||
2286 | return N->getOpcode() == ISD::LOAD; | |||
2287 | } | |||
2288 | }; | |||
2289 | ||||
2290 | /// This class is used to represent ISD::STORE nodes. | |||
2291 | class StoreSDNode : public LSBaseSDNode { | |||
2292 | friend class SelectionDAG; | |||
2293 | ||||
2294 | StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2295 | ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT, | |||
2296 | MachineMemOperand *MMO) | |||
2297 | : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) { | |||
2298 | StoreSDNodeBits.IsTruncating = isTrunc; | |||
2299 | assert(!readMem() && "Store MachineMemOperand is a load!")(static_cast <bool> (!readMem() && "Store MachineMemOperand is a load!" ) ? void (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2299, __extension__ __PRETTY_FUNCTION__)); | |||
2300 | assert(writeMem() && "Store MachineMemOperand is not a store!")(static_cast <bool> (writeMem() && "Store MachineMemOperand is not a store!" ) ? void (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2300, __extension__ __PRETTY_FUNCTION__)); | |||
2301 | } | |||
2302 | ||||
2303 | public: | |||
2304 | /// Return true if the op does a truncation before store. | |||
2305 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2306 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2307 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2308 | void setTruncatingStore(bool Truncating) { | |||
2309 | StoreSDNodeBits.IsTruncating = Truncating; | |||
2310 | } | |||
2311 | ||||
2312 | const SDValue &getValue() const { return getOperand(1); } | |||
2313 | const SDValue &getBasePtr() const { return getOperand(2); } | |||
2314 | const SDValue &getOffset() const { return getOperand(3); } | |||
2315 | ||||
2316 | static bool classof(const SDNode *N) { | |||
2317 | return N->getOpcode() == ISD::STORE; | |||
2318 | } | |||
2319 | }; | |||
2320 | ||||
2321 | /// This base class is used to represent MLOAD and MSTORE nodes | |||
2322 | class MaskedLoadStoreSDNode : public MemSDNode { | |||
2323 | public: | |||
2324 | friend class SelectionDAG; | |||
2325 | ||||
2326 | MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, | |||
2327 | const DebugLoc &dl, SDVTList VTs, | |||
2328 | ISD::MemIndexedMode AM, EVT MemVT, | |||
2329 | MachineMemOperand *MMO) | |||
2330 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2331 | LSBaseSDNodeBits.AddressingMode = AM; | |||
2332 | assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM && "Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2332, __extension__ __PRETTY_FUNCTION__)); | |||
2333 | } | |||
2334 | ||||
2335 | // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru) | |||
2336 | // MaskedStoreSDNode (Chain, data, ptr, offset, mask) | |||
2337 | // Mask is a vector of i1 elements | |||
2338 | const SDValue &getOffset() const { | |||
2339 | return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3); | |||
2340 | } | |||
2341 | const SDValue &getMask() const { | |||
2342 | return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4); | |||
2343 | } | |||
2344 | ||||
2345 | /// Return the addressing mode for this load or store: | |||
2346 | /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. | |||
2347 | ISD::MemIndexedMode getAddressingMode() const { | |||
2348 | return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); | |||
2349 | } | |||
2350 | ||||
2351 | /// Return true if this is a pre/post inc/dec load/store. | |||
2352 | bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } | |||
2353 | ||||
2354 | /// Return true if this is NOT a pre/post inc/dec load/store. | |||
2355 | bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } | |||
2356 | ||||
2357 | static bool classof(const SDNode *N) { | |||
2358 | return N->getOpcode() == ISD::MLOAD || | |||
2359 | N->getOpcode() == ISD::MSTORE; | |||
2360 | } | |||
2361 | }; | |||
2362 | ||||
2363 | /// This class is used to represent an MLOAD node | |||
2364 | class MaskedLoadSDNode : public MaskedLoadStoreSDNode { | |||
2365 | public: | |||
2366 | friend class SelectionDAG; | |||
2367 | ||||
2368 | MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2369 | ISD::MemIndexedMode AM, ISD::LoadExtType ETy, | |||
2370 | bool IsExpanding, EVT MemVT, MachineMemOperand *MMO) | |||
2371 | : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) { | |||
2372 | LoadSDNodeBits.ExtTy = ETy; | |||
2373 | LoadSDNodeBits.IsExpanding = IsExpanding; | |||
2374 | } | |||
2375 | ||||
2376 | ISD::LoadExtType getExtensionType() const { | |||
2377 | return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); | |||
2378 | } | |||
2379 | ||||
2380 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
2381 | const SDValue &getOffset() const { return getOperand(2); } | |||
2382 | const SDValue &getMask() const { return getOperand(3); } | |||
2383 | const SDValue &getPassThru() const { return getOperand(4); } | |||
2384 | ||||
2385 | static bool classof(const SDNode *N) { | |||
2386 | return N->getOpcode() == ISD::MLOAD; | |||
2387 | } | |||
2388 | ||||
2389 | bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } | |||
2390 | }; | |||
2391 | ||||
2392 | /// This class is used to represent an MSTORE node | |||
2393 | class MaskedStoreSDNode : public MaskedLoadStoreSDNode { | |||
2394 | public: | |||
2395 | friend class SelectionDAG; | |||
2396 | ||||
2397 | MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2398 | ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing, | |||
2399 | EVT MemVT, MachineMemOperand *MMO) | |||
2400 | : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) { | |||
2401 | StoreSDNodeBits.IsTruncating = isTrunc; | |||
2402 | StoreSDNodeBits.IsCompressing = isCompressing; | |||
2403 | } | |||
2404 | ||||
2405 | /// Return true if the op does a truncation before store. | |||
2406 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2407 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2408 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2409 | ||||
2410 | /// Returns true if the op does a compression to the vector before storing. | |||
2411 | /// The node contiguously stores the active elements (integers or floats) | |||
2412 | /// in src (those with their respective bit set in writemask k) to unaligned | |||
2413 | /// memory at base_addr. | |||
2414 | bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } | |||
2415 | ||||
2416 | const SDValue &getValue() const { return getOperand(1); } | |||
2417 | const SDValue &getBasePtr() const { return getOperand(2); } | |||
2418 | const SDValue &getOffset() const { return getOperand(3); } | |||
2419 | const SDValue &getMask() const { return getOperand(4); } | |||
2420 | ||||
2421 | static bool classof(const SDNode *N) { | |||
2422 | return N->getOpcode() == ISD::MSTORE; | |||
2423 | } | |||
2424 | }; | |||
2425 | ||||
2426 | /// This is a base class used to represent | |||
2427 | /// MGATHER and MSCATTER nodes | |||
2428 | /// | |||
2429 | class MaskedGatherScatterSDNode : public MemSDNode { | |||
2430 | public: | |||
2431 | friend class SelectionDAG; | |||
2432 | ||||
2433 | MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, | |||
2434 | const DebugLoc &dl, SDVTList VTs, EVT MemVT, | |||
2435 | MachineMemOperand *MMO, ISD::MemIndexType IndexType) | |||
2436 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2437 | LSBaseSDNodeBits.AddressingMode = IndexType; | |||
2438 | assert(getIndexType() == IndexType && "Value truncated")(static_cast <bool> (getIndexType() == IndexType && "Value truncated") ? void (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2438, __extension__ __PRETTY_FUNCTION__)); | |||
2439 | } | |||
2440 | ||||
2441 | /// How is Index applied to BasePtr when computing addresses. | |||
2442 | ISD::MemIndexType getIndexType() const { | |||
2443 | return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode); | |||
2444 | } | |||
2445 | void setIndexType(ISD::MemIndexType IndexType) { | |||
2446 | LSBaseSDNodeBits.AddressingMode = IndexType; | |||
2447 | } | |||
2448 | bool isIndexScaled() const { | |||
2449 | return (getIndexType() == ISD::SIGNED_SCALED) || | |||
2450 | (getIndexType() == ISD::UNSIGNED_SCALED); | |||
2451 | } | |||
2452 | bool isIndexSigned() const { | |||
2453 | return (getIndexType() == ISD::SIGNED_SCALED) || | |||
2454 | (getIndexType() == ISD::SIGNED_UNSCALED); | |||
2455 | } | |||
2456 | ||||
2457 | // In the both nodes address is Op1, mask is Op2: | |||
2458 | // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) | |||
2459 | // MaskedScatterSDNode (Chain, value, mask, base, index, scale) | |||
2460 | // Mask is a vector of i1 elements | |||
2461 | const SDValue &getBasePtr() const { return getOperand(3); } | |||
2462 | const SDValue &getIndex() const { return getOperand(4); } | |||
2463 | const SDValue &getMask() const { return getOperand(2); } | |||
2464 | const SDValue &getScale() const { return getOperand(5); } | |||
2465 | ||||
2466 | static bool classof(const SDNode *N) { | |||
2467 | return N->getOpcode() == ISD::MGATHER || | |||
2468 | N->getOpcode() == ISD::MSCATTER; | |||
2469 | } | |||
2470 | }; | |||
2471 | ||||
2472 | /// This class is used to represent an MGATHER node | |||
2473 | /// | |||
2474 | class MaskedGatherSDNode : public MaskedGatherScatterSDNode { | |||
2475 | public: | |||
2476 | friend class SelectionDAG; | |||
2477 | ||||
2478 | MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2479 | EVT MemVT, MachineMemOperand *MMO, | |||
2480 | ISD::MemIndexType IndexType, ISD::LoadExtType ETy) | |||
2481 | : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO, | |||
2482 | IndexType) { | |||
2483 | LoadSDNodeBits.ExtTy = ETy; | |||
2484 | } | |||
2485 | ||||
2486 | const SDValue &getPassThru() const { return getOperand(1); } | |||
2487 | ||||
2488 | ISD::LoadExtType getExtensionType() const { | |||
2489 | return ISD::LoadExtType(LoadSDNodeBits.ExtTy); | |||
2490 | } | |||
2491 | ||||
2492 | static bool classof(const SDNode *N) { | |||
2493 | return N->getOpcode() == ISD::MGATHER; | |||
2494 | } | |||
2495 | }; | |||
2496 | ||||
2497 | /// This class is used to represent an MSCATTER node | |||
2498 | /// | |||
2499 | class MaskedScatterSDNode : public MaskedGatherScatterSDNode { | |||
2500 | public: | |||
2501 | friend class SelectionDAG; | |||
2502 | ||||
2503 | MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2504 | EVT MemVT, MachineMemOperand *MMO, | |||
2505 | ISD::MemIndexType IndexType, bool IsTrunc) | |||
2506 | : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO, | |||
2507 | IndexType) { | |||
2508 | StoreSDNodeBits.IsTruncating = IsTrunc; | |||
2509 | } | |||
2510 | ||||
2511 | /// Return true if the op does a truncation before store. | |||
2512 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2513 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2514 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2515 | ||||
2516 | const SDValue &getValue() const { return getOperand(1); } | |||
2517 | ||||
2518 | static bool classof(const SDNode *N) { | |||
2519 | return N->getOpcode() == ISD::MSCATTER; | |||
2520 | } | |||
2521 | }; | |||
2522 | ||||
2523 | /// An SDNode that represents everything that will be needed | |||
2524 | /// to construct a MachineInstr. These nodes are created during the | |||
2525 | /// instruction selection proper phase. | |||
2526 | /// | |||
2527 | /// Note that the only supported way to set the `memoperands` is by calling the | |||
2528 | /// `SelectionDAG::setNodeMemRefs` function as the memory management happens | |||
2529 | /// inside the DAG rather than in the node. | |||
2530 | class MachineSDNode : public SDNode { | |||
2531 | private: | |||
2532 | friend class SelectionDAG; | |||
2533 | ||||
2534 | MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs) | |||
2535 | : SDNode(Opc, Order, DL, VTs) {} | |||
2536 | ||||
2537 | // We use a pointer union between a single `MachineMemOperand` pointer and | |||
2538 | // a pointer to an array of `MachineMemOperand` pointers. This is null when | |||
2539 | // the number of these is zero, the single pointer variant used when the | |||
2540 | // number is one, and the array is used for larger numbers. | |||
2541 | // | |||
2542 | // The array is allocated via the `SelectionDAG`'s allocator and so will | |||
2543 | // always live until the DAG is cleaned up and doesn't require ownership here. | |||
2544 | // | |||
2545 | // We can't use something simpler like `TinyPtrVector` here because `SDNode` | |||
2546 | // subclasses aren't managed in a conforming C++ manner. See the comments on | |||
2547 | // `SelectionDAG::MorphNodeTo` which details what all goes on, but the | |||
2548 | // constraint here is that these don't manage memory with their constructor or | |||
2549 | // destructor and can be initialized to a good state even if they start off | |||
2550 | // uninitialized. | |||
2551 | PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {}; | |||
2552 | ||||
2553 | // Note that this could be folded into the above `MemRefs` member if doing so | |||
2554 | // is advantageous at some point. We don't need to store this in most cases. | |||
2555 | // However, at the moment this doesn't appear to make the allocation any | |||
2556 | // smaller and makes the code somewhat simpler to read. | |||
2557 | int NumMemRefs = 0; | |||
2558 | ||||
2559 | public: | |||
2560 | using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator; | |||
2561 | ||||
2562 | ArrayRef<MachineMemOperand *> memoperands() const { | |||
2563 | // Special case the common cases. | |||
2564 | if (NumMemRefs == 0) | |||
2565 | return {}; | |||
2566 | if (NumMemRefs == 1) | |||
2567 | return makeArrayRef(MemRefs.getAddrOfPtr1(), 1); | |||
2568 | ||||
2569 | // Otherwise we have an actual array. | |||
2570 | return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs); | |||
2571 | } | |||
2572 | mmo_iterator memoperands_begin() const { return memoperands().begin(); } | |||
2573 | mmo_iterator memoperands_end() const { return memoperands().end(); } | |||
2574 | bool memoperands_empty() const { return memoperands().empty(); } | |||
2575 | ||||
2576 | /// Clear out the memory reference descriptor list. | |||
2577 | void clearMemRefs() { | |||
2578 | MemRefs = nullptr; | |||
2579 | NumMemRefs = 0; | |||
2580 | } | |||
2581 | ||||
2582 | static bool classof(const SDNode *N) { | |||
2583 | return N->isMachineOpcode(); | |||
2584 | } | |||
2585 | }; | |||
2586 | ||||
2587 | /// An SDNode that records if a register contains a value that is guaranteed to | |||
2588 | /// be aligned accordingly. | |||
2589 | class AssertAlignSDNode : public SDNode { | |||
2590 | Align Alignment; | |||
2591 | ||||
2592 | public: | |||
2593 | AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A) | |||
2594 | : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {} | |||
2595 | ||||
2596 | Align getAlign() const { return Alignment; } | |||
2597 | ||||
2598 | static bool classof(const SDNode *N) { | |||
2599 | return N->getOpcode() == ISD::AssertAlign; | |||
2600 | } | |||
2601 | }; | |||
2602 | ||||
2603 | class SDNodeIterator { | |||
2604 | const SDNode *Node; | |||
2605 | unsigned Operand; | |||
2606 | ||||
2607 | SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {} | |||
2608 | ||||
2609 | public: | |||
2610 | using iterator_category = std::forward_iterator_tag; | |||
2611 | using value_type = SDNode; | |||
2612 | using difference_type = std::ptrdiff_t; | |||
2613 | using pointer = value_type *; | |||
2614 | using reference = value_type &; | |||
2615 | ||||
2616 | bool operator==(const SDNodeIterator& x) const { | |||
2617 | return Operand == x.Operand; | |||
2618 | } | |||
2619 | bool operator!=(const SDNodeIterator& x) const { return !operator==(x); } | |||
2620 | ||||
2621 | pointer operator*() const { | |||
2622 | return Node->getOperand(Operand).getNode(); | |||
2623 | } | |||
2624 | pointer operator->() const { return operator*(); } | |||
2625 | ||||
2626 | SDNodeIterator& operator++() { // Preincrement | |||
2627 | ++Operand; | |||
2628 | return *this; | |||
2629 | } | |||
2630 | SDNodeIterator operator++(int) { // Postincrement | |||
2631 | SDNodeIterator tmp = *this; ++*this; return tmp; | |||
2632 | } | |||
2633 | size_t operator-(SDNodeIterator Other) const { | |||
2634 | assert(Node == Other.Node &&(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!" ) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2635, __extension__ __PRETTY_FUNCTION__)) | |||
2635 | "Cannot compare iterators of two different nodes!")(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!" ) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2635, __extension__ __PRETTY_FUNCTION__)); | |||
2636 | return Operand - Other.Operand; | |||
2637 | } | |||
2638 | ||||
2639 | static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); } | |||
2640 | static SDNodeIterator end (const SDNode *N) { | |||
2641 | return SDNodeIterator(N, N->getNumOperands()); | |||
2642 | } | |||
2643 | ||||
2644 | unsigned getOperand() const { return Operand; } | |||
2645 | const SDNode *getNode() const { return Node; } | |||
2646 | }; | |||
2647 | ||||
2648 | template <> struct GraphTraits<SDNode*> { | |||
2649 | using NodeRef = SDNode *; | |||
2650 | using ChildIteratorType = SDNodeIterator; | |||
2651 | ||||
2652 | static NodeRef getEntryNode(SDNode *N) { return N; } | |||
2653 | ||||
2654 | static ChildIteratorType child_begin(NodeRef N) { | |||
2655 | return SDNodeIterator::begin(N); | |||
2656 | } | |||
2657 | ||||
2658 | static ChildIteratorType child_end(NodeRef N) { | |||
2659 | return SDNodeIterator::end(N); | |||
2660 | } | |||
2661 | }; | |||
2662 | ||||
2663 | /// A representation of the largest SDNode, for use in sizeof(). | |||
2664 | /// | |||
2665 | /// This needs to be a union because the largest node differs on 32 bit systems | |||
2666 | /// with 4 and 8 byte pointer alignment, respectively. | |||
2667 | using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode, | |||
2668 | BlockAddressSDNode, | |||
2669 | GlobalAddressSDNode, | |||
2670 | PseudoProbeSDNode>; | |||
2671 | ||||
2672 | /// The SDNode class with the greatest alignment requirement. | |||
2673 | using MostAlignedSDNode = GlobalAddressSDNode; | |||
2674 | ||||
2675 | namespace ISD { | |||
2676 | ||||
2677 | /// Returns true if the specified node is a non-extending and unindexed load. | |||
2678 | inline bool isNormalLoad(const SDNode *N) { | |||
2679 | const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N); | |||
2680 | return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD && | |||
2681 | Ld->getAddressingMode() == ISD::UNINDEXED; | |||
2682 | } | |||
2683 | ||||
2684 | /// Returns true if the specified node is a non-extending load. | |||
2685 | inline bool isNON_EXTLoad(const SDNode *N) { | |||
2686 | return isa<LoadSDNode>(N) && | |||
2687 | cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD; | |||
2688 | } | |||
2689 | ||||
2690 | /// Returns true if the specified node is a EXTLOAD. | |||
2691 | inline bool isEXTLoad(const SDNode *N) { | |||
2692 | return isa<LoadSDNode>(N) && | |||
2693 | cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD; | |||
2694 | } | |||
2695 | ||||
2696 | /// Returns true if the specified node is a SEXTLOAD. | |||
2697 | inline bool isSEXTLoad(const SDNode *N) { | |||
2698 | return isa<LoadSDNode>(N) && | |||
2699 | cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; | |||
2700 | } | |||
2701 | ||||
2702 | /// Returns true if the specified node is a ZEXTLOAD. | |||
2703 | inline bool isZEXTLoad(const SDNode *N) { | |||
2704 | return isa<LoadSDNode>(N) && | |||
2705 | cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; | |||
2706 | } | |||
2707 | ||||
2708 | /// Returns true if the specified node is an unindexed load. | |||
2709 | inline bool isUNINDEXEDLoad(const SDNode *N) { | |||
2710 | return isa<LoadSDNode>(N) && | |||
2711 | cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; | |||
2712 | } | |||
2713 | ||||
2714 | /// Returns true if the specified node is a non-truncating | |||
2715 | /// and unindexed store. | |||
2716 | inline bool isNormalStore(const SDNode *N) { | |||
2717 | const StoreSDNode *St = dyn_cast<StoreSDNode>(N); | |||
2718 | return St && !St->isTruncatingStore() && | |||
2719 | St->getAddressingMode() == ISD::UNINDEXED; | |||
2720 | } | |||
2721 | ||||
2722 | /// Returns true if the specified node is an unindexed store. | |||
2723 | inline bool isUNINDEXEDStore(const SDNode *N) { | |||
2724 | return isa<StoreSDNode>(N) && | |||
2725 | cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; | |||
2726 | } | |||
2727 | ||||
2728 | /// Attempt to match a unary predicate against a scalar/splat constant or | |||
2729 | /// every element of a constant BUILD_VECTOR. | |||
2730 | /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. | |||
2731 | bool matchUnaryPredicate(SDValue Op, | |||
2732 | std::function<bool(ConstantSDNode *)> Match, | |||
2733 | bool AllowUndefs = false); | |||
2734 | ||||
2735 | /// Attempt to match a binary predicate against a pair of scalar/splat | |||
2736 | /// constants or every element of a pair of constant BUILD_VECTORs. | |||
2737 | /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. | |||
2738 | /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match. | |||
2739 | bool matchBinaryPredicate( | |||
2740 | SDValue LHS, SDValue RHS, | |||
2741 | std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, | |||
2742 | bool AllowUndefs = false, bool AllowTypeMismatch = false); | |||
2743 | ||||
2744 | /// Returns true if the specified value is the overflow result from one | |||
2745 | /// of the overflow intrinsic nodes. | |||
2746 | inline bool isOverflowIntrOpRes(SDValue Op) { | |||
2747 | unsigned Opc = Op.getOpcode(); | |||
2748 | return (Op.getResNo() == 1 && | |||
2749 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | |||
2750 | Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)); | |||
2751 | } | |||
2752 | ||||
2753 | } // end namespace ISD | |||
2754 | ||||
2755 | } // end namespace llvm | |||
2756 | ||||
2757 | #endif // LLVM_CODEGEN_SELECTIONDAGNODES_H |