File: | llvm/include/llvm/CodeGen/SelectionDAGNodes.h |
Warning: | line 1149, column 10 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// | ||||||
2 | // | ||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||
6 | // | ||||||
7 | //===----------------------------------------------------------------------===// | ||||||
8 | // | ||||||
9 | // This file defines a DAG pattern matching instruction selector for X86, | ||||||
10 | // converting from a legalized dag to a X86 dag. | ||||||
11 | // | ||||||
12 | //===----------------------------------------------------------------------===// | ||||||
13 | |||||||
14 | #include "X86.h" | ||||||
15 | #include "X86MachineFunctionInfo.h" | ||||||
16 | #include "X86RegisterInfo.h" | ||||||
17 | #include "X86Subtarget.h" | ||||||
18 | #include "X86TargetMachine.h" | ||||||
19 | #include "llvm/ADT/Statistic.h" | ||||||
20 | #include "llvm/CodeGen/MachineFrameInfo.h" | ||||||
21 | #include "llvm/CodeGen/MachineFunction.h" | ||||||
22 | #include "llvm/CodeGen/SelectionDAGISel.h" | ||||||
23 | #include "llvm/Config/llvm-config.h" | ||||||
24 | #include "llvm/IR/ConstantRange.h" | ||||||
25 | #include "llvm/IR/Function.h" | ||||||
26 | #include "llvm/IR/Instructions.h" | ||||||
27 | #include "llvm/IR/Intrinsics.h" | ||||||
28 | #include "llvm/IR/IntrinsicsX86.h" | ||||||
29 | #include "llvm/IR/Type.h" | ||||||
30 | #include "llvm/Support/Debug.h" | ||||||
31 | #include "llvm/Support/ErrorHandling.h" | ||||||
32 | #include "llvm/Support/KnownBits.h" | ||||||
33 | #include "llvm/Support/MathExtras.h" | ||||||
34 | #include "llvm/Support/raw_ostream.h" | ||||||
35 | #include "llvm/Target/TargetMachine.h" | ||||||
36 | #include "llvm/Target/TargetOptions.h" | ||||||
37 | #include <stdint.h> | ||||||
38 | using namespace llvm; | ||||||
39 | |||||||
40 | #define DEBUG_TYPE"x86-isel" "x86-isel" | ||||||
41 | |||||||
42 | STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor")static llvm::Statistic NumLoadMoved = {"x86-isel", "NumLoadMoved" , "Number of loads moved below TokenFactor"}; | ||||||
43 | |||||||
44 | static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true), | ||||||
45 | cl::desc("Enable setting constant bits to reduce size of mask immediates"), | ||||||
46 | cl::Hidden); | ||||||
47 | |||||||
48 | //===----------------------------------------------------------------------===// | ||||||
49 | // Pattern Matcher Implementation | ||||||
50 | //===----------------------------------------------------------------------===// | ||||||
51 | |||||||
52 | namespace { | ||||||
53 | /// This corresponds to X86AddressMode, but uses SDValue's instead of register | ||||||
54 | /// numbers for the leaves of the matched tree. | ||||||
55 | struct X86ISelAddressMode { | ||||||
56 | enum { | ||||||
57 | RegBase, | ||||||
58 | FrameIndexBase | ||||||
59 | } BaseType; | ||||||
60 | |||||||
61 | // This is really a union, discriminated by BaseType! | ||||||
62 | SDValue Base_Reg; | ||||||
63 | int Base_FrameIndex; | ||||||
64 | |||||||
65 | unsigned Scale; | ||||||
66 | SDValue IndexReg; | ||||||
67 | int32_t Disp; | ||||||
68 | SDValue Segment; | ||||||
69 | const GlobalValue *GV; | ||||||
70 | const Constant *CP; | ||||||
71 | const BlockAddress *BlockAddr; | ||||||
72 | const char *ES; | ||||||
73 | MCSymbol *MCSym; | ||||||
74 | int JT; | ||||||
75 | unsigned Align; // CP alignment. | ||||||
76 | unsigned char SymbolFlags; // X86II::MO_* | ||||||
77 | bool NegateIndex = false; | ||||||
78 | |||||||
79 | X86ISelAddressMode() | ||||||
80 | : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), | ||||||
81 | Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), | ||||||
82 | MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {} | ||||||
83 | |||||||
84 | bool hasSymbolicDisplacement() const { | ||||||
85 | return GV != nullptr || CP != nullptr || ES != nullptr || | ||||||
86 | MCSym != nullptr || JT != -1 || BlockAddr != nullptr; | ||||||
87 | } | ||||||
88 | |||||||
89 | bool hasBaseOrIndexReg() const { | ||||||
90 | return BaseType == FrameIndexBase || | ||||||
91 | IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; | ||||||
92 | } | ||||||
93 | |||||||
94 | /// Return true if this addressing mode is already RIP-relative. | ||||||
95 | bool isRIPRelative() const { | ||||||
96 | if (BaseType != RegBase) return false; | ||||||
97 | if (RegisterSDNode *RegNode = | ||||||
98 | dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode())) | ||||||
99 | return RegNode->getReg() == X86::RIP; | ||||||
100 | return false; | ||||||
101 | } | ||||||
102 | |||||||
103 | void setBaseReg(SDValue Reg) { | ||||||
104 | BaseType = RegBase; | ||||||
105 | Base_Reg = Reg; | ||||||
106 | } | ||||||
107 | |||||||
108 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||||||
109 | void dump(SelectionDAG *DAG = nullptr) { | ||||||
110 | dbgs() << "X86ISelAddressMode " << this << '\n'; | ||||||
111 | dbgs() << "Base_Reg "; | ||||||
112 | if (Base_Reg.getNode()) | ||||||
113 | Base_Reg.getNode()->dump(DAG); | ||||||
114 | else | ||||||
115 | dbgs() << "nul\n"; | ||||||
116 | if (BaseType == FrameIndexBase) | ||||||
117 | dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'; | ||||||
118 | dbgs() << " Scale " << Scale << '\n' | ||||||
119 | << "IndexReg "; | ||||||
120 | if (NegateIndex) | ||||||
121 | dbgs() << "negate "; | ||||||
122 | if (IndexReg.getNode()) | ||||||
123 | IndexReg.getNode()->dump(DAG); | ||||||
124 | else | ||||||
125 | dbgs() << "nul\n"; | ||||||
126 | dbgs() << " Disp " << Disp << '\n' | ||||||
127 | << "GV "; | ||||||
128 | if (GV) | ||||||
129 | GV->dump(); | ||||||
130 | else | ||||||
131 | dbgs() << "nul"; | ||||||
132 | dbgs() << " CP "; | ||||||
133 | if (CP) | ||||||
134 | CP->dump(); | ||||||
135 | else | ||||||
136 | dbgs() << "nul"; | ||||||
137 | dbgs() << '\n' | ||||||
138 | << "ES "; | ||||||
139 | if (ES) | ||||||
140 | dbgs() << ES; | ||||||
141 | else | ||||||
142 | dbgs() << "nul"; | ||||||
143 | dbgs() << " MCSym "; | ||||||
144 | if (MCSym) | ||||||
145 | dbgs() << MCSym; | ||||||
146 | else | ||||||
147 | dbgs() << "nul"; | ||||||
148 | dbgs() << " JT" << JT << " Align" << Align << '\n'; | ||||||
149 | } | ||||||
150 | #endif | ||||||
151 | }; | ||||||
152 | } | ||||||
153 | |||||||
154 | namespace { | ||||||
155 | //===--------------------------------------------------------------------===// | ||||||
156 | /// ISel - X86-specific code to select X86 machine instructions for | ||||||
157 | /// SelectionDAG operations. | ||||||
158 | /// | ||||||
159 | class X86DAGToDAGISel final : public SelectionDAGISel { | ||||||
160 | /// Keep a pointer to the X86Subtarget around so that we can | ||||||
161 | /// make the right decision when generating code for different targets. | ||||||
162 | const X86Subtarget *Subtarget; | ||||||
163 | |||||||
164 | /// If true, selector should try to optimize for code size instead of | ||||||
165 | /// performance. | ||||||
166 | bool OptForSize; | ||||||
167 | |||||||
168 | /// If true, selector should try to optimize for minimum code size. | ||||||
169 | bool OptForMinSize; | ||||||
170 | |||||||
171 | /// Disable direct TLS access through segment registers. | ||||||
172 | bool IndirectTlsSegRefs; | ||||||
173 | |||||||
174 | public: | ||||||
175 | explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) | ||||||
176 | : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false), | ||||||
177 | OptForMinSize(false), IndirectTlsSegRefs(false) {} | ||||||
178 | |||||||
179 | StringRef getPassName() const override { | ||||||
180 | return "X86 DAG->DAG Instruction Selection"; | ||||||
181 | } | ||||||
182 | |||||||
183 | bool runOnMachineFunction(MachineFunction &MF) override { | ||||||
184 | // Reset the subtarget each time through. | ||||||
185 | Subtarget = &MF.getSubtarget<X86Subtarget>(); | ||||||
186 | IndirectTlsSegRefs = MF.getFunction().hasFnAttribute( | ||||||
187 | "indirect-tls-seg-refs"); | ||||||
188 | |||||||
189 | // OptFor[Min]Size are used in pattern predicates that isel is matching. | ||||||
190 | OptForSize = MF.getFunction().hasOptSize(); | ||||||
191 | OptForMinSize = MF.getFunction().hasMinSize(); | ||||||
192 | assert((!OptForMinSize || OptForSize) &&(((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize" ) ? static_cast<void> (0) : __assert_fail ("(!OptForMinSize || OptForSize) && \"OptForMinSize implies OptForSize\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 193, __PRETTY_FUNCTION__)) | ||||||
193 | "OptForMinSize implies OptForSize")(((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize" ) ? static_cast<void> (0) : __assert_fail ("(!OptForMinSize || OptForSize) && \"OptForMinSize implies OptForSize\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 193, __PRETTY_FUNCTION__)); | ||||||
194 | |||||||
195 | SelectionDAGISel::runOnMachineFunction(MF); | ||||||
196 | return true; | ||||||
197 | } | ||||||
198 | |||||||
199 | void emitFunctionEntryCode() override; | ||||||
200 | |||||||
201 | bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; | ||||||
202 | |||||||
203 | void PreprocessISelDAG() override; | ||||||
204 | void PostprocessISelDAG() override; | ||||||
205 | |||||||
206 | // Include the pieces autogenerated from the target description. | ||||||
207 | #include "X86GenDAGISel.inc" | ||||||
208 | |||||||
209 | private: | ||||||
210 | void Select(SDNode *N) override; | ||||||
211 | |||||||
212 | bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); | ||||||
213 | bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); | ||||||
214 | bool matchWrapper(SDValue N, X86ISelAddressMode &AM); | ||||||
215 | bool matchAddress(SDValue N, X86ISelAddressMode &AM); | ||||||
216 | bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM); | ||||||
217 | bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth); | ||||||
218 | bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, | ||||||
219 | unsigned Depth); | ||||||
220 | bool matchAddressBase(SDValue N, X86ISelAddressMode &AM); | ||||||
221 | bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base, | ||||||
222 | SDValue &Scale, SDValue &Index, SDValue &Disp, | ||||||
223 | SDValue &Segment); | ||||||
224 | bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp, | ||||||
225 | SDValue ScaleOp, SDValue &Base, SDValue &Scale, | ||||||
226 | SDValue &Index, SDValue &Disp, SDValue &Segment); | ||||||
227 | bool selectMOV64Imm32(SDValue N, SDValue &Imm); | ||||||
228 | bool selectLEAAddr(SDValue N, SDValue &Base, | ||||||
229 | SDValue &Scale, SDValue &Index, SDValue &Disp, | ||||||
230 | SDValue &Segment); | ||||||
231 | bool selectLEA64_32Addr(SDValue N, SDValue &Base, | ||||||
232 | SDValue &Scale, SDValue &Index, SDValue &Disp, | ||||||
233 | SDValue &Segment); | ||||||
234 | bool selectTLSADDRAddr(SDValue N, SDValue &Base, | ||||||
235 | SDValue &Scale, SDValue &Index, SDValue &Disp, | ||||||
236 | SDValue &Segment); | ||||||
237 | bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N, | ||||||
238 | SDValue &Base, SDValue &Scale, | ||||||
239 | SDValue &Index, SDValue &Disp, | ||||||
240 | SDValue &Segment, | ||||||
241 | SDValue &NodeWithChain); | ||||||
242 | bool selectRelocImm(SDValue N, SDValue &Op); | ||||||
243 | |||||||
244 | bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, | ||||||
245 | SDValue &Base, SDValue &Scale, | ||||||
246 | SDValue &Index, SDValue &Disp, | ||||||
247 | SDValue &Segment); | ||||||
248 | |||||||
249 | // Convenience method where P is also root. | ||||||
250 | bool tryFoldLoad(SDNode *P, SDValue N, | ||||||
251 | SDValue &Base, SDValue &Scale, | ||||||
252 | SDValue &Index, SDValue &Disp, | ||||||
253 | SDValue &Segment) { | ||||||
254 | return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment); | ||||||
255 | } | ||||||
256 | |||||||
257 | bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, | ||||||
258 | SDValue &Base, SDValue &Scale, | ||||||
259 | SDValue &Index, SDValue &Disp, | ||||||
260 | SDValue &Segment); | ||||||
261 | |||||||
262 | /// Implement addressing mode selection for inline asm expressions. | ||||||
263 | bool SelectInlineAsmMemoryOperand(const SDValue &Op, | ||||||
264 | unsigned ConstraintID, | ||||||
265 | std::vector<SDValue> &OutOps) override; | ||||||
266 | |||||||
267 | void emitSpecialCodeForMain(); | ||||||
268 | |||||||
269 | inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL, | ||||||
270 | MVT VT, SDValue &Base, SDValue &Scale, | ||||||
271 | SDValue &Index, SDValue &Disp, | ||||||
272 | SDValue &Segment) { | ||||||
273 | if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) | ||||||
274 | Base = CurDAG->getTargetFrameIndex( | ||||||
275 | AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout())); | ||||||
276 | else if (AM.Base_Reg.getNode()) | ||||||
277 | Base = AM.Base_Reg; | ||||||
278 | else | ||||||
279 | Base = CurDAG->getRegister(0, VT); | ||||||
280 | |||||||
281 | Scale = getI8Imm(AM.Scale, DL); | ||||||
282 | |||||||
283 | // Negate the index if needed. | ||||||
284 | if (AM.NegateIndex) { | ||||||
285 | unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r; | ||||||
286 | SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32, | ||||||
287 | AM.IndexReg), 0); | ||||||
288 | AM.IndexReg = Neg; | ||||||
289 | } | ||||||
290 | |||||||
291 | if (AM.IndexReg.getNode()) | ||||||
292 | Index = AM.IndexReg; | ||||||
293 | else | ||||||
294 | Index = CurDAG->getRegister(0, VT); | ||||||
295 | |||||||
296 | // These are 32-bit even in 64-bit mode since RIP-relative offset | ||||||
297 | // is 32-bit. | ||||||
298 | if (AM.GV) | ||||||
299 | Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), | ||||||
300 | MVT::i32, AM.Disp, | ||||||
301 | AM.SymbolFlags); | ||||||
302 | else if (AM.CP) | ||||||
303 | Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, | ||||||
304 | AM.Align, AM.Disp, AM.SymbolFlags); | ||||||
305 | else if (AM.ES) { | ||||||
306 | assert(!AM.Disp && "Non-zero displacement is ignored with ES.")((!AM.Disp && "Non-zero displacement is ignored with ES." ) ? static_cast<void> (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with ES.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 306, __PRETTY_FUNCTION__)); | ||||||
307 | Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); | ||||||
308 | } else if (AM.MCSym) { | ||||||
309 | assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.")((!AM.Disp && "Non-zero displacement is ignored with MCSym." ) ? static_cast<void> (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with MCSym.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 309, __PRETTY_FUNCTION__)); | ||||||
310 | assert(AM.SymbolFlags == 0 && "oo")((AM.SymbolFlags == 0 && "oo") ? static_cast<void> (0) : __assert_fail ("AM.SymbolFlags == 0 && \"oo\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 310, __PRETTY_FUNCTION__)); | ||||||
311 | Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32); | ||||||
312 | } else if (AM.JT != -1) { | ||||||
313 | assert(!AM.Disp && "Non-zero displacement is ignored with JT.")((!AM.Disp && "Non-zero displacement is ignored with JT." ) ? static_cast<void> (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with JT.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 313, __PRETTY_FUNCTION__)); | ||||||
314 | Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); | ||||||
315 | } else if (AM.BlockAddr) | ||||||
316 | Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, | ||||||
317 | AM.SymbolFlags); | ||||||
318 | else | ||||||
319 | Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32); | ||||||
320 | |||||||
321 | if (AM.Segment.getNode()) | ||||||
322 | Segment = AM.Segment; | ||||||
323 | else | ||||||
324 | Segment = CurDAG->getRegister(0, MVT::i16); | ||||||
325 | } | ||||||
326 | |||||||
327 | // Utility function to determine whether we should avoid selecting | ||||||
328 | // immediate forms of instructions for better code size or not. | ||||||
329 | // At a high level, we'd like to avoid such instructions when | ||||||
330 | // we have similar constants used within the same basic block | ||||||
331 | // that can be kept in a register. | ||||||
332 | // | ||||||
333 | bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const { | ||||||
334 | uint32_t UseCount = 0; | ||||||
335 | |||||||
336 | // Do not want to hoist if we're not optimizing for size. | ||||||
337 | // TODO: We'd like to remove this restriction. | ||||||
338 | // See the comment in X86InstrInfo.td for more info. | ||||||
339 | if (!CurDAG->shouldOptForSize()) | ||||||
340 | return false; | ||||||
341 | |||||||
342 | // Walk all the users of the immediate. | ||||||
343 | for (SDNode::use_iterator UI = N->use_begin(), | ||||||
344 | UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) { | ||||||
345 | |||||||
346 | SDNode *User = *UI; | ||||||
347 | |||||||
348 | // This user is already selected. Count it as a legitimate use and | ||||||
349 | // move on. | ||||||
350 | if (User->isMachineOpcode()) { | ||||||
351 | UseCount++; | ||||||
352 | continue; | ||||||
353 | } | ||||||
354 | |||||||
355 | // We want to count stores of immediates as real uses. | ||||||
356 | if (User->getOpcode() == ISD::STORE && | ||||||
357 | User->getOperand(1).getNode() == N) { | ||||||
358 | UseCount++; | ||||||
359 | continue; | ||||||
360 | } | ||||||
361 | |||||||
362 | // We don't currently match users that have > 2 operands (except | ||||||
363 | // for stores, which are handled above) | ||||||
364 | // Those instruction won't match in ISEL, for now, and would | ||||||
365 | // be counted incorrectly. | ||||||
366 | // This may change in the future as we add additional instruction | ||||||
367 | // types. | ||||||
368 | if (User->getNumOperands() != 2) | ||||||
369 | continue; | ||||||
370 | |||||||
371 | // If this can match to INC/DEC, don't count it as a use. | ||||||
372 | if (User->getOpcode() == ISD::ADD && | ||||||
373 | (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0)))) | ||||||
374 | continue; | ||||||
375 | |||||||
376 | // Immediates that are used for offsets as part of stack | ||||||
377 | // manipulation should be left alone. These are typically | ||||||
378 | // used to indicate SP offsets for argument passing and | ||||||
379 | // will get pulled into stores/pushes (implicitly). | ||||||
380 | if (User->getOpcode() == X86ISD::ADD || | ||||||
381 | User->getOpcode() == ISD::ADD || | ||||||
382 | User->getOpcode() == X86ISD::SUB || | ||||||
383 | User->getOpcode() == ISD::SUB) { | ||||||
384 | |||||||
385 | // Find the other operand of the add/sub. | ||||||
386 | SDValue OtherOp = User->getOperand(0); | ||||||
387 | if (OtherOp.getNode() == N) | ||||||
388 | OtherOp = User->getOperand(1); | ||||||
389 | |||||||
390 | // Don't count if the other operand is SP. | ||||||
391 | RegisterSDNode *RegNode; | ||||||
392 | if (OtherOp->getOpcode() == ISD::CopyFromReg && | ||||||
393 | (RegNode = dyn_cast_or_null<RegisterSDNode>( | ||||||
394 | OtherOp->getOperand(1).getNode()))) | ||||||
395 | if ((RegNode->getReg() == X86::ESP) || | ||||||
396 | (RegNode->getReg() == X86::RSP)) | ||||||
397 | continue; | ||||||
398 | } | ||||||
399 | |||||||
400 | // ... otherwise, count this and move on. | ||||||
401 | UseCount++; | ||||||
402 | } | ||||||
403 | |||||||
404 | // If we have more than 1 use, then recommend for hoisting. | ||||||
405 | return (UseCount > 1); | ||||||
406 | } | ||||||
407 | |||||||
408 | /// Return a target constant with the specified value of type i8. | ||||||
409 | inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) { | ||||||
410 | return CurDAG->getTargetConstant(Imm, DL, MVT::i8); | ||||||
411 | } | ||||||
412 | |||||||
413 | /// Return a target constant with the specified value, of type i32. | ||||||
414 | inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) { | ||||||
415 | return CurDAG->getTargetConstant(Imm, DL, MVT::i32); | ||||||
416 | } | ||||||
417 | |||||||
418 | /// Return a target constant with the specified value, of type i64. | ||||||
419 | inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) { | ||||||
420 | return CurDAG->getTargetConstant(Imm, DL, MVT::i64); | ||||||
421 | } | ||||||
422 | |||||||
423 | SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth, | ||||||
424 | const SDLoc &DL) { | ||||||
425 | assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width")(((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width" ) ? static_cast<void> (0) : __assert_fail ("(VecWidth == 128 || VecWidth == 256) && \"Unexpected vector width\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 425, __PRETTY_FUNCTION__)); | ||||||
426 | uint64_t Index = N->getConstantOperandVal(1); | ||||||
427 | MVT VecVT = N->getOperand(0).getSimpleValueType(); | ||||||
428 | return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); | ||||||
429 | } | ||||||
430 | |||||||
431 | SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth, | ||||||
432 | const SDLoc &DL) { | ||||||
433 | assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width")(((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width" ) ? static_cast<void> (0) : __assert_fail ("(VecWidth == 128 || VecWidth == 256) && \"Unexpected vector width\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 433, __PRETTY_FUNCTION__)); | ||||||
434 | uint64_t Index = N->getConstantOperandVal(2); | ||||||
435 | MVT VecVT = N->getSimpleValueType(0); | ||||||
436 | return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); | ||||||
437 | } | ||||||
438 | |||||||
439 | // Helper to detect unneeded and instructions on shift amounts. Called | ||||||
440 | // from PatFrags in tablegen. | ||||||
441 | bool isUnneededShiftMask(SDNode *N, unsigned Width) const { | ||||||
442 | assert(N->getOpcode() == ISD::AND && "Unexpected opcode")((N->getOpcode() == ISD::AND && "Unexpected opcode" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 442, __PRETTY_FUNCTION__)); | ||||||
443 | const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); | ||||||
444 | |||||||
445 | if (Val.countTrailingOnes() >= Width) | ||||||
446 | return true; | ||||||
447 | |||||||
448 | APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero; | ||||||
449 | return Mask.countTrailingOnes() >= Width; | ||||||
450 | } | ||||||
451 | |||||||
452 | /// Return an SDNode that returns the value of the global base register. | ||||||
453 | /// Output instructions required to initialize the global base register, | ||||||
454 | /// if necessary. | ||||||
455 | SDNode *getGlobalBaseReg(); | ||||||
456 | |||||||
457 | /// Return a reference to the TargetMachine, casted to the target-specific | ||||||
458 | /// type. | ||||||
459 | const X86TargetMachine &getTargetMachine() const { | ||||||
460 | return static_cast<const X86TargetMachine &>(TM); | ||||||
461 | } | ||||||
462 | |||||||
463 | /// Return a reference to the TargetInstrInfo, casted to the target-specific | ||||||
464 | /// type. | ||||||
465 | const X86InstrInfo *getInstrInfo() const { | ||||||
466 | return Subtarget->getInstrInfo(); | ||||||
467 | } | ||||||
468 | |||||||
469 | /// Address-mode matching performs shift-of-and to and-of-shift | ||||||
470 | /// reassociation in order to expose more scaled addressing | ||||||
471 | /// opportunities. | ||||||
472 | bool ComplexPatternFuncMutatesDAG() const override { | ||||||
473 | return true; | ||||||
474 | } | ||||||
475 | |||||||
476 | bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const; | ||||||
477 | |||||||
478 | /// Returns whether this is a relocatable immediate in the range | ||||||
479 | /// [-2^Width .. 2^Width-1]. | ||||||
480 | template <unsigned Width> bool isSExtRelocImm(SDNode *N) const { | ||||||
481 | if (auto *CN = dyn_cast<ConstantSDNode>(N)) | ||||||
482 | return isInt<Width>(CN->getSExtValue()); | ||||||
483 | return isSExtAbsoluteSymbolRef(Width, N); | ||||||
484 | } | ||||||
485 | |||||||
486 | // Indicates we should prefer to use a non-temporal load for this load. | ||||||
487 | bool useNonTemporalLoad(LoadSDNode *N) const { | ||||||
488 | if (!N->isNonTemporal()) | ||||||
489 | return false; | ||||||
490 | |||||||
491 | unsigned StoreSize = N->getMemoryVT().getStoreSize(); | ||||||
492 | |||||||
493 | if (N->getAlignment() < StoreSize) | ||||||
494 | return false; | ||||||
495 | |||||||
496 | switch (StoreSize) { | ||||||
497 | default: llvm_unreachable("Unsupported store size")::llvm::llvm_unreachable_internal("Unsupported store size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 497); | ||||||
498 | case 4: | ||||||
499 | case 8: | ||||||
500 | return false; | ||||||
501 | case 16: | ||||||
502 | return Subtarget->hasSSE41(); | ||||||
503 | case 32: | ||||||
504 | return Subtarget->hasAVX2(); | ||||||
505 | case 64: | ||||||
506 | return Subtarget->hasAVX512(); | ||||||
507 | } | ||||||
508 | } | ||||||
509 | |||||||
510 | bool foldLoadStoreIntoMemOperand(SDNode *Node); | ||||||
511 | MachineSDNode *matchBEXTRFromAndImm(SDNode *Node); | ||||||
512 | bool matchBitExtract(SDNode *Node); | ||||||
513 | bool shrinkAndImmediate(SDNode *N); | ||||||
514 | bool isMaskZeroExtended(SDNode *N) const; | ||||||
515 | bool tryShiftAmountMod(SDNode *N); | ||||||
516 | bool tryShrinkShlLogicImm(SDNode *N); | ||||||
517 | bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); | ||||||
518 | bool tryMatchBitSelect(SDNode *N); | ||||||
519 | |||||||
520 | MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, | ||||||
521 | const SDLoc &dl, MVT VT, SDNode *Node); | ||||||
522 | MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, | ||||||
523 | const SDLoc &dl, MVT VT, SDNode *Node, | ||||||
524 | SDValue &InFlag); | ||||||
525 | |||||||
526 | bool tryOptimizeRem8Extend(SDNode *N); | ||||||
527 | |||||||
528 | bool onlyUsesZeroFlag(SDValue Flags) const; | ||||||
529 | bool hasNoSignFlagUses(SDValue Flags) const; | ||||||
530 | bool hasNoCarryFlagUses(SDValue Flags) const; | ||||||
531 | }; | ||||||
532 | } | ||||||
533 | |||||||
534 | |||||||
535 | // Returns true if this masked compare can be implemented legally with this | ||||||
536 | // type. | ||||||
537 | static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { | ||||||
538 | unsigned Opcode = N->getOpcode(); | ||||||
539 | if (Opcode == X86ISD::CMPM || Opcode == X86ISD::STRICT_CMPM || | ||||||
540 | Opcode == ISD::SETCC || Opcode == X86ISD::CMPM_SAE || | ||||||
541 | Opcode == X86ISD::VFPCLASS) { | ||||||
542 | // We can get 256-bit 8 element types here without VLX being enabled. When | ||||||
543 | // this happens we will use 512-bit operations and the mask will not be | ||||||
544 | // zero extended. | ||||||
545 | EVT OpVT = N->getOperand(0).getValueType(); | ||||||
546 | // The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the | ||||||
547 | // second operand. | ||||||
548 | if (Opcode == X86ISD::STRICT_CMPM) | ||||||
549 | OpVT = N->getOperand(1).getValueType(); | ||||||
550 | if (OpVT.is256BitVector() || OpVT.is128BitVector()) | ||||||
551 | return Subtarget->hasVLX(); | ||||||
552 | |||||||
553 | return true; | ||||||
554 | } | ||||||
555 | // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check. | ||||||
556 | if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM || | ||||||
557 | Opcode == X86ISD::FSETCCM_SAE) | ||||||
558 | return true; | ||||||
559 | |||||||
560 | return false; | ||||||
561 | } | ||||||
562 | |||||||
563 | // Returns true if we can assume the writer of the mask has zero extended it | ||||||
564 | // for us. | ||||||
565 | bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const { | ||||||
566 | // If this is an AND, check if we have a compare on either side. As long as | ||||||
567 | // one side guarantees the mask is zero extended, the AND will preserve those | ||||||
568 | // zeros. | ||||||
569 | if (N->getOpcode() == ISD::AND) | ||||||
570 | return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) || | ||||||
571 | isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget); | ||||||
572 | |||||||
573 | return isLegalMaskCompare(N, Subtarget); | ||||||
574 | } | ||||||
575 | |||||||
576 | bool | ||||||
577 | X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { | ||||||
578 | if (OptLevel == CodeGenOpt::None) return false; | ||||||
579 | |||||||
580 | if (!N.hasOneUse()) | ||||||
581 | return false; | ||||||
582 | |||||||
583 | if (N.getOpcode() != ISD::LOAD) | ||||||
584 | return true; | ||||||
585 | |||||||
586 | // Don't fold non-temporal loads if we have an instruction for them. | ||||||
587 | if (useNonTemporalLoad(cast<LoadSDNode>(N))) | ||||||
588 | return false; | ||||||
589 | |||||||
590 | // If N is a load, do additional profitability checks. | ||||||
591 | if (U
| ||||||
592 | switch (U->getOpcode()) { | ||||||
593 | default: break; | ||||||
594 | case X86ISD::ADD: | ||||||
595 | case X86ISD::ADC: | ||||||
596 | case X86ISD::SUB: | ||||||
597 | case X86ISD::SBB: | ||||||
598 | case X86ISD::AND: | ||||||
599 | case X86ISD::XOR: | ||||||
600 | case X86ISD::OR: | ||||||
601 | case ISD::ADD: | ||||||
602 | case ISD::ADDCARRY: | ||||||
603 | case ISD::AND: | ||||||
604 | case ISD::OR: | ||||||
605 | case ISD::XOR: { | ||||||
606 | SDValue Op1 = U->getOperand(1); | ||||||
607 | |||||||
608 | // If the other operand is a 8-bit immediate we should fold the immediate | ||||||
609 | // instead. This reduces code size. | ||||||
610 | // e.g. | ||||||
611 | // movl 4(%esp), %eax | ||||||
612 | // addl $4, %eax | ||||||
613 | // vs. | ||||||
614 | // movl $4, %eax | ||||||
615 | // addl 4(%esp), %eax | ||||||
616 | // The former is 2 bytes shorter. In case where the increment is 1, then | ||||||
617 | // the saving can be 4 bytes (by using incl %eax). | ||||||
618 | if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) { | ||||||
619 | if (Imm->getAPIntValue().isSignedIntN(8)) | ||||||
620 | return false; | ||||||
621 | |||||||
622 | // If this is a 64-bit AND with an immediate that fits in 32-bits, | ||||||
623 | // prefer using the smaller and over folding the load. This is needed to | ||||||
624 | // make sure immediates created by shrinkAndImmediate are always folded. | ||||||
625 | // Ideally we would narrow the load during DAG combine and get the | ||||||
626 | // best of both worlds. | ||||||
627 | if (U->getOpcode() == ISD::AND && | ||||||
628 | Imm->getAPIntValue().getBitWidth() == 64 && | ||||||
629 | Imm->getAPIntValue().isIntN(32)) | ||||||
630 | return false; | ||||||
631 | |||||||
632 | // If this really a zext_inreg that can be represented with a movzx | ||||||
633 | // instruction, prefer that. | ||||||
634 | // TODO: We could shrink the load and fold if it is non-volatile. | ||||||
635 | if (U->getOpcode() == ISD::AND && | ||||||
636 | (Imm->getAPIntValue() == UINT8_MAX(255) || | ||||||
637 | Imm->getAPIntValue() == UINT16_MAX(65535) || | ||||||
638 | Imm->getAPIntValue() == UINT32_MAX(4294967295U))) | ||||||
639 | return false; | ||||||
640 | |||||||
641 | // ADD/SUB with can negate the immediate and use the opposite operation | ||||||
642 | // to fit 128 into a sign extended 8 bit immediate. | ||||||
643 | if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) && | ||||||
644 | (-Imm->getAPIntValue()).isSignedIntN(8)) | ||||||
645 | return false; | ||||||
646 | |||||||
647 | if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) && | ||||||
648 | (-Imm->getAPIntValue()).isSignedIntN(8) && | ||||||
649 | hasNoCarryFlagUses(SDValue(U, 1))) | ||||||
650 | return false; | ||||||
651 | } | ||||||
652 | |||||||
653 | // If the other operand is a TLS address, we should fold it instead. | ||||||
654 | // This produces | ||||||
655 | // movl %gs:0, %eax | ||||||
656 | // leal i@NTPOFF(%eax), %eax | ||||||
657 | // instead of | ||||||
658 | // movl $i@NTPOFF, %eax | ||||||
659 | // addl %gs:0, %eax | ||||||
660 | // if the block also has an access to a second TLS address this will save | ||||||
661 | // a load. | ||||||
662 | // FIXME: This is probably also true for non-TLS addresses. | ||||||
663 | if (Op1.getOpcode() == X86ISD::Wrapper) { | ||||||
664 | SDValue Val = Op1.getOperand(0); | ||||||
665 | if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) | ||||||
666 | return false; | ||||||
667 | } | ||||||
668 | |||||||
669 | // Don't fold load if this matches the BTS/BTR/BTC patterns. | ||||||
670 | // BTS: (or X, (shl 1, n)) | ||||||
671 | // BTR: (and X, (rotl -2, n)) | ||||||
672 | // BTC: (xor X, (shl 1, n)) | ||||||
673 | if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) { | ||||||
674 | if (U->getOperand(0).getOpcode() == ISD::SHL && | ||||||
675 | isOneConstant(U->getOperand(0).getOperand(0))) | ||||||
676 | return false; | ||||||
677 | |||||||
678 | if (U->getOperand(1).getOpcode() == ISD::SHL && | ||||||
679 | isOneConstant(U->getOperand(1).getOperand(0))) | ||||||
680 | return false; | ||||||
681 | } | ||||||
682 | if (U->getOpcode() == ISD::AND) { | ||||||
683 | SDValue U0 = U->getOperand(0); | ||||||
684 | SDValue U1 = U->getOperand(1); | ||||||
685 | if (U0.getOpcode() == ISD::ROTL) { | ||||||
686 | auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0)); | ||||||
687 | if (C && C->getSExtValue() == -2) | ||||||
688 | return false; | ||||||
689 | } | ||||||
690 | |||||||
691 | if (U1.getOpcode() == ISD::ROTL) { | ||||||
692 | auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0)); | ||||||
693 | if (C && C->getSExtValue() == -2) | ||||||
694 | return false; | ||||||
695 | } | ||||||
696 | } | ||||||
697 | |||||||
698 | break; | ||||||
699 | } | ||||||
700 | case ISD::SHL: | ||||||
701 | case ISD::SRA: | ||||||
702 | case ISD::SRL: | ||||||
703 | // Don't fold a load into a shift by immediate. The BMI2 instructions | ||||||
704 | // support folding a load, but not an immediate. The legacy instructions | ||||||
705 | // support folding an immediate, but can't fold a load. Folding an | ||||||
706 | // immediate is preferable to folding a load. | ||||||
707 | if (isa<ConstantSDNode>(U->getOperand(1))) | ||||||
708 | return false; | ||||||
709 | |||||||
710 | break; | ||||||
711 | } | ||||||
712 | } | ||||||
713 | |||||||
714 | // Prevent folding a load if this can implemented with an insert_subreg or | ||||||
715 | // a move that implicitly zeroes. | ||||||
716 | if (Root->getOpcode() == ISD::INSERT_SUBVECTOR && | ||||||
717 | isNullConstant(Root->getOperand(2)) && | ||||||
718 | (Root->getOperand(0).isUndef() || | ||||||
719 | ISD::isBuildVectorAllZeros(Root->getOperand(0).getNode()))) | ||||||
720 | return false; | ||||||
721 | |||||||
722 | return true; | ||||||
723 | } | ||||||
724 | |||||||
725 | /// Replace the original chain operand of the call with | ||||||
726 | /// load's chain operand and move load below the call's chain operand. | ||||||
727 | static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, | ||||||
728 | SDValue Call, SDValue OrigChain) { | ||||||
729 | SmallVector<SDValue, 8> Ops; | ||||||
730 | SDValue Chain = OrigChain.getOperand(0); | ||||||
731 | if (Chain.getNode() == Load.getNode()) | ||||||
732 | Ops.push_back(Load.getOperand(0)); | ||||||
733 | else { | ||||||
734 | assert(Chain.getOpcode() == ISD::TokenFactor &&((Chain.getOpcode() == ISD::TokenFactor && "Unexpected chain operand" ) ? static_cast<void> (0) : __assert_fail ("Chain.getOpcode() == ISD::TokenFactor && \"Unexpected chain operand\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 735, __PRETTY_FUNCTION__)) | ||||||
735 | "Unexpected chain operand")((Chain.getOpcode() == ISD::TokenFactor && "Unexpected chain operand" ) ? static_cast<void> (0) : __assert_fail ("Chain.getOpcode() == ISD::TokenFactor && \"Unexpected chain operand\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 735, __PRETTY_FUNCTION__)); | ||||||
736 | for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) | ||||||
737 | if (Chain.getOperand(i).getNode() == Load.getNode()) | ||||||
738 | Ops.push_back(Load.getOperand(0)); | ||||||
739 | else | ||||||
740 | Ops.push_back(Chain.getOperand(i)); | ||||||
741 | SDValue NewChain = | ||||||
742 | CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); | ||||||
743 | Ops.clear(); | ||||||
744 | Ops.push_back(NewChain); | ||||||
745 | } | ||||||
746 | Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end()); | ||||||
747 | CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); | ||||||
748 | CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), | ||||||
749 | Load.getOperand(1), Load.getOperand(2)); | ||||||
750 | |||||||
751 | Ops.clear(); | ||||||
752 | Ops.push_back(SDValue(Load.getNode(), 1)); | ||||||
753 | Ops.append(Call->op_begin() + 1, Call->op_end()); | ||||||
754 | CurDAG->UpdateNodeOperands(Call.getNode(), Ops); | ||||||
755 | } | ||||||
756 | |||||||
757 | /// Return true if call address is a load and it can be | ||||||
758 | /// moved below CALLSEQ_START and the chains leading up to the call. | ||||||
759 | /// Return the CALLSEQ_START by reference as a second output. | ||||||
760 | /// In the case of a tail call, there isn't a callseq node between the call | ||||||
761 | /// chain and the load. | ||||||
762 | static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { | ||||||
763 | // The transformation is somewhat dangerous if the call's chain was glued to | ||||||
764 | // the call. After MoveBelowOrigChain the load is moved between the call and | ||||||
765 | // the chain, this can create a cycle if the load is not folded. So it is | ||||||
766 | // *really* important that we are sure the load will be folded. | ||||||
767 | if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) | ||||||
768 | return false; | ||||||
769 | LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); | ||||||
770 | if (!LD || | ||||||
771 | !LD->isSimple() || | ||||||
772 | LD->getAddressingMode() != ISD::UNINDEXED || | ||||||
773 | LD->getExtensionType() != ISD::NON_EXTLOAD) | ||||||
774 | return false; | ||||||
775 | |||||||
776 | // Now let's find the callseq_start. | ||||||
777 | while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { | ||||||
778 | if (!Chain.hasOneUse()) | ||||||
779 | return false; | ||||||
780 | Chain = Chain.getOperand(0); | ||||||
781 | } | ||||||
782 | |||||||
783 | if (!Chain.getNumOperands()) | ||||||
784 | return false; | ||||||
785 | // Since we are not checking for AA here, conservatively abort if the chain | ||||||
786 | // writes to memory. It's not safe to move the callee (a load) across a store. | ||||||
787 | if (isa<MemSDNode>(Chain.getNode()) && | ||||||
788 | cast<MemSDNode>(Chain.getNode())->writeMem()) | ||||||
789 | return false; | ||||||
790 | if (Chain.getOperand(0).getNode() == Callee.getNode()) | ||||||
791 | return true; | ||||||
792 | if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && | ||||||
793 | Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && | ||||||
794 | Callee.getValue(1).hasOneUse()) | ||||||
795 | return true; | ||||||
796 | return false; | ||||||
797 | } | ||||||
798 | |||||||
799 | void X86DAGToDAGISel::PreprocessISelDAG() { | ||||||
800 | for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), | ||||||
801 | E = CurDAG->allnodes_end(); I != E; ) { | ||||||
802 | SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. | ||||||
803 | |||||||
804 | // If this is a target specific AND node with no flag usages, turn it back | ||||||
805 | // into ISD::AND to enable test instruction matching. | ||||||
806 | if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) { | ||||||
807 | SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0), | ||||||
808 | N->getOperand(0), N->getOperand(1)); | ||||||
809 | --I; | ||||||
810 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); | ||||||
811 | ++I; | ||||||
812 | CurDAG->DeleteNode(N); | ||||||
813 | continue; | ||||||
814 | } | ||||||
815 | |||||||
816 | /// Convert vector increment or decrement to sub/add with an all-ones | ||||||
817 | /// constant: | ||||||
818 | /// add X, <1, 1...> --> sub X, <-1, -1...> | ||||||
819 | /// sub X, <1, 1...> --> add X, <-1, -1...> | ||||||
820 | /// The all-ones vector constant can be materialized using a pcmpeq | ||||||
821 | /// instruction that is commonly recognized as an idiom (has no register | ||||||
822 | /// dependency), so that's better/smaller than loading a splat 1 constant. | ||||||
823 | if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && | ||||||
824 | N->getSimpleValueType(0).isVector()) { | ||||||
825 | |||||||
826 | APInt SplatVal; | ||||||
827 | if (X86::isConstantSplat(N->getOperand(1), SplatVal) && | ||||||
828 | SplatVal.isOneValue()) { | ||||||
829 | SDLoc DL(N); | ||||||
830 | |||||||
831 | MVT VT = N->getSimpleValueType(0); | ||||||
832 | unsigned NumElts = VT.getSizeInBits() / 32; | ||||||
833 | SDValue AllOnes = | ||||||
834 | CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts)); | ||||||
835 | AllOnes = CurDAG->getBitcast(VT, AllOnes); | ||||||
836 | |||||||
837 | unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; | ||||||
838 | SDValue Res = | ||||||
839 | CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes); | ||||||
840 | --I; | ||||||
841 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); | ||||||
842 | ++I; | ||||||
843 | CurDAG->DeleteNode(N); | ||||||
844 | continue; | ||||||
845 | } | ||||||
846 | } | ||||||
847 | |||||||
848 | switch (N->getOpcode()) { | ||||||
849 | case ISD::FP_ROUND: | ||||||
850 | case ISD::STRICT_FP_ROUND: | ||||||
851 | case ISD::FP_TO_SINT: | ||||||
852 | case ISD::FP_TO_UINT: | ||||||
853 | case ISD::STRICT_FP_TO_SINT: | ||||||
854 | case ISD::STRICT_FP_TO_UINT: { | ||||||
855 | // Replace vector fp_to_s/uint with their X86 specific equivalent so we | ||||||
856 | // don't need 2 sets of patterns. | ||||||
857 | if (!N->getSimpleValueType(0).isVector()) | ||||||
858 | break; | ||||||
859 | |||||||
860 | unsigned NewOpc; | ||||||
861 | switch (N->getOpcode()) { | ||||||
862 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 862); | ||||||
863 | case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break; | ||||||
864 | case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break; | ||||||
865 | case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break; | ||||||
866 | case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break; | ||||||
867 | case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break; | ||||||
868 | case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break; | ||||||
869 | } | ||||||
870 | SDValue Res; | ||||||
871 | if (N->isStrictFPOpcode()) | ||||||
872 | Res = | ||||||
873 | CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other}, | ||||||
874 | {N->getOperand(0), N->getOperand(1)}); | ||||||
875 | else | ||||||
876 | Res = | ||||||
877 | CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), | ||||||
878 | N->getOperand(0)); | ||||||
879 | --I; | ||||||
880 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); | ||||||
881 | ++I; | ||||||
882 | CurDAG->DeleteNode(N); | ||||||
883 | continue; | ||||||
884 | } | ||||||
885 | case ISD::SHL: | ||||||
886 | case ISD::SRA: | ||||||
887 | case ISD::SRL: { | ||||||
888 | // Replace vector shifts with their X86 specific equivalent so we don't | ||||||
889 | // need 2 sets of patterns. | ||||||
890 | if (!N->getValueType(0).isVector()) | ||||||
891 | break; | ||||||
892 | |||||||
893 | unsigned NewOpc; | ||||||
894 | switch (N->getOpcode()) { | ||||||
895 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 895); | ||||||
896 | case ISD::SHL: NewOpc = X86ISD::VSHLV; break; | ||||||
897 | case ISD::SRA: NewOpc = X86ISD::VSRAV; break; | ||||||
898 | case ISD::SRL: NewOpc = X86ISD::VSRLV; break; | ||||||
899 | } | ||||||
900 | SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), | ||||||
901 | N->getOperand(0), N->getOperand(1)); | ||||||
902 | --I; | ||||||
903 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); | ||||||
904 | ++I; | ||||||
905 | CurDAG->DeleteNode(N); | ||||||
906 | continue; | ||||||
907 | } | ||||||
908 | case ISD::ANY_EXTEND: | ||||||
909 | case ISD::ANY_EXTEND_VECTOR_INREG: { | ||||||
910 | // Replace vector any extend with the zero extend equivalents so we don't | ||||||
911 | // need 2 sets of patterns. Ignore vXi1 extensions. | ||||||
912 | if (!N->getValueType(0).isVector()) | ||||||
913 | break; | ||||||
914 | |||||||
915 | unsigned NewOpc; | ||||||
916 | if (N->getOperand(0).getScalarValueSizeInBits() == 1) { | ||||||
917 | assert(N->getOpcode() == ISD::ANY_EXTEND &&((N->getOpcode() == ISD::ANY_EXTEND && "Unexpected opcode for mask vector!" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::ANY_EXTEND && \"Unexpected opcode for mask vector!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 918, __PRETTY_FUNCTION__)) | ||||||
918 | "Unexpected opcode for mask vector!")((N->getOpcode() == ISD::ANY_EXTEND && "Unexpected opcode for mask vector!" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::ANY_EXTEND && \"Unexpected opcode for mask vector!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 918, __PRETTY_FUNCTION__)); | ||||||
919 | NewOpc = ISD::SIGN_EXTEND; | ||||||
920 | } else { | ||||||
921 | NewOpc = N->getOpcode() == ISD::ANY_EXTEND | ||||||
922 | ? ISD::ZERO_EXTEND | ||||||
923 | : ISD::ZERO_EXTEND_VECTOR_INREG; | ||||||
924 | } | ||||||
925 | |||||||
926 | SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), | ||||||
927 | N->getOperand(0)); | ||||||
928 | --I; | ||||||
929 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); | ||||||
930 | ++I; | ||||||
931 | CurDAG->DeleteNode(N); | ||||||
932 | continue; | ||||||
933 | } | ||||||
934 | case ISD::FCEIL: | ||||||
935 | case ISD::STRICT_FCEIL: | ||||||
936 | case ISD::FFLOOR: | ||||||
937 | case ISD::STRICT_FFLOOR: | ||||||
938 | case ISD::FTRUNC: | ||||||
939 | case ISD::STRICT_FTRUNC: | ||||||
940 | case ISD::FNEARBYINT: | ||||||
941 | case ISD::STRICT_FNEARBYINT: | ||||||
942 | case ISD::FRINT: | ||||||
943 | case ISD::STRICT_FRINT: { | ||||||
944 | // Replace fp rounding with their X86 specific equivalent so we don't | ||||||
945 | // need 2 sets of patterns. | ||||||
946 | unsigned Imm; | ||||||
947 | switch (N->getOpcode()) { | ||||||
948 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 948); | ||||||
949 | case ISD::STRICT_FCEIL: | ||||||
950 | case ISD::FCEIL: Imm = 0xA; break; | ||||||
951 | case ISD::STRICT_FFLOOR: | ||||||
952 | case ISD::FFLOOR: Imm = 0x9; break; | ||||||
953 | case ISD::STRICT_FTRUNC: | ||||||
954 | case ISD::FTRUNC: Imm = 0xB; break; | ||||||
955 | case ISD::STRICT_FNEARBYINT: | ||||||
956 | case ISD::FNEARBYINT: Imm = 0xC; break; | ||||||
957 | case ISD::STRICT_FRINT: | ||||||
958 | case ISD::FRINT: Imm = 0x4; break; | ||||||
959 | } | ||||||
960 | SDLoc dl(N); | ||||||
961 | bool IsStrict = N->isStrictFPOpcode(); | ||||||
962 | SDValue Res; | ||||||
963 | if (IsStrict) | ||||||
964 | Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl, | ||||||
965 | {N->getValueType(0), MVT::Other}, | ||||||
966 | {N->getOperand(0), N->getOperand(1), | ||||||
967 | CurDAG->getTargetConstant(Imm, dl, MVT::i8)}); | ||||||
968 | else | ||||||
969 | Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0), | ||||||
970 | N->getOperand(0), | ||||||
971 | CurDAG->getTargetConstant(Imm, dl, MVT::i8)); | ||||||
972 | --I; | ||||||
973 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); | ||||||
974 | ++I; | ||||||
975 | CurDAG->DeleteNode(N); | ||||||
976 | continue; | ||||||
977 | } | ||||||
978 | case X86ISD::FANDN: | ||||||
979 | case X86ISD::FAND: | ||||||
980 | case X86ISD::FOR: | ||||||
981 | case X86ISD::FXOR: { | ||||||
982 | // Widen scalar fp logic ops to vector to reduce isel patterns. | ||||||
983 | // FIXME: Can we do this during lowering/combine. | ||||||
984 | MVT VT = N->getSimpleValueType(0); | ||||||
985 | if (VT.isVector() || VT == MVT::f128) | ||||||
986 | break; | ||||||
987 | |||||||
988 | MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32; | ||||||
989 | SDLoc dl(N); | ||||||
990 | SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, | ||||||
991 | N->getOperand(0)); | ||||||
992 | SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, | ||||||
993 | N->getOperand(1)); | ||||||
994 | |||||||
995 | SDValue Res; | ||||||
996 | if (Subtarget->hasSSE2()) { | ||||||
997 | EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger(); | ||||||
998 | Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0); | ||||||
999 | Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1); | ||||||
1000 | unsigned Opc; | ||||||
1001 | switch (N->getOpcode()) { | ||||||
1002 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1002); | ||||||
1003 | case X86ISD::FANDN: Opc = X86ISD::ANDNP; break; | ||||||
1004 | case X86ISD::FAND: Opc = ISD::AND; break; | ||||||
1005 | case X86ISD::FOR: Opc = ISD::OR; break; | ||||||
1006 | case X86ISD::FXOR: Opc = ISD::XOR; break; | ||||||
1007 | } | ||||||
1008 | Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1); | ||||||
1009 | Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res); | ||||||
1010 | } else { | ||||||
1011 | Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1); | ||||||
1012 | } | ||||||
1013 | Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, | ||||||
1014 | CurDAG->getIntPtrConstant(0, dl)); | ||||||
1015 | --I; | ||||||
1016 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); | ||||||
1017 | ++I; | ||||||
1018 | CurDAG->DeleteNode(N); | ||||||
1019 | continue; | ||||||
1020 | } | ||||||
1021 | } | ||||||
1022 | |||||||
1023 | if (OptLevel != CodeGenOpt::None && | ||||||
1024 | // Only do this when the target can fold the load into the call or | ||||||
1025 | // jmp. | ||||||
1026 | !Subtarget->useRetpolineIndirectCalls() && | ||||||
1027 | ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || | ||||||
1028 | (N->getOpcode() == X86ISD::TC_RETURN && | ||||||
1029 | (Subtarget->is64Bit() || | ||||||
1030 | !getTargetMachine().isPositionIndependent())))) { | ||||||
1031 | /// Also try moving call address load from outside callseq_start to just | ||||||
1032 | /// before the call to allow it to be folded. | ||||||
1033 | /// | ||||||
1034 | /// [Load chain] | ||||||
1035 | /// ^ | ||||||
1036 | /// | | ||||||
1037 | /// [Load] | ||||||
1038 | /// ^ ^ | ||||||
1039 | /// | | | ||||||
1040 | /// / \-- | ||||||
1041 | /// / | | ||||||
1042 | ///[CALLSEQ_START] | | ||||||
1043 | /// ^ | | ||||||
1044 | /// | | | ||||||
1045 | /// [LOAD/C2Reg] | | ||||||
1046 | /// | | | ||||||
1047 | /// \ / | ||||||
1048 | /// \ / | ||||||
1049 | /// [CALL] | ||||||
1050 | bool HasCallSeq = N->getOpcode() == X86ISD::CALL; | ||||||
1051 | SDValue Chain = N->getOperand(0); | ||||||
1052 | SDValue Load = N->getOperand(1); | ||||||
1053 | if (!isCalleeLoad(Load, Chain, HasCallSeq)) | ||||||
1054 | continue; | ||||||
1055 | moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); | ||||||
1056 | ++NumLoadMoved; | ||||||
1057 | continue; | ||||||
1058 | } | ||||||
1059 | |||||||
1060 | // Lower fpround and fpextend nodes that target the FP stack to be store and | ||||||
1061 | // load to the stack. This is a gross hack. We would like to simply mark | ||||||
1062 | // these as being illegal, but when we do that, legalize produces these when | ||||||
1063 | // it expands calls, then expands these in the same legalize pass. We would | ||||||
1064 | // like dag combine to be able to hack on these between the call expansion | ||||||
1065 | // and the node legalization. As such this pass basically does "really | ||||||
1066 | // late" legalization of these inline with the X86 isel pass. | ||||||
1067 | // FIXME: This should only happen when not compiled with -O0. | ||||||
1068 | switch (N->getOpcode()) { | ||||||
1069 | default: continue; | ||||||
1070 | case ISD::FP_ROUND: | ||||||
1071 | case ISD::FP_EXTEND: | ||||||
1072 | { | ||||||
1073 | MVT SrcVT = N->getOperand(0).getSimpleValueType(); | ||||||
1074 | MVT DstVT = N->getSimpleValueType(0); | ||||||
1075 | |||||||
1076 | // If any of the sources are vectors, no fp stack involved. | ||||||
1077 | if (SrcVT.isVector() || DstVT.isVector()) | ||||||
1078 | continue; | ||||||
1079 | |||||||
1080 | // If the source and destination are SSE registers, then this is a legal | ||||||
1081 | // conversion that should not be lowered. | ||||||
1082 | const X86TargetLowering *X86Lowering = | ||||||
1083 | static_cast<const X86TargetLowering *>(TLI); | ||||||
1084 | bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); | ||||||
1085 | bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); | ||||||
1086 | if (SrcIsSSE && DstIsSSE) | ||||||
1087 | continue; | ||||||
1088 | |||||||
1089 | if (!SrcIsSSE && !DstIsSSE) { | ||||||
1090 | // If this is an FPStack extension, it is a noop. | ||||||
1091 | if (N->getOpcode() == ISD::FP_EXTEND) | ||||||
1092 | continue; | ||||||
1093 | // If this is a value-preserving FPStack truncation, it is a noop. | ||||||
1094 | if (N->getConstantOperandVal(1)) | ||||||
1095 | continue; | ||||||
1096 | } | ||||||
1097 | |||||||
1098 | // Here we could have an FP stack truncation or an FPStack <-> SSE convert. | ||||||
1099 | // FPStack has extload and truncstore. SSE can fold direct loads into other | ||||||
1100 | // operations. Based on this, decide what we want to do. | ||||||
1101 | MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT; | ||||||
1102 | SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); | ||||||
1103 | int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex(); | ||||||
1104 | MachinePointerInfo MPI = | ||||||
1105 | MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI); | ||||||
1106 | SDLoc dl(N); | ||||||
1107 | |||||||
1108 | // FIXME: optimize the case where the src/dest is a load or store? | ||||||
1109 | |||||||
1110 | SDValue Store = CurDAG->getTruncStore( | ||||||
1111 | CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT); | ||||||
1112 | SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, | ||||||
1113 | MemTmp, MPI, MemVT); | ||||||
1114 | |||||||
1115 | // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the | ||||||
1116 | // extload we created. This will cause general havok on the dag because | ||||||
1117 | // anything below the conversion could be folded into other existing nodes. | ||||||
1118 | // To avoid invalidating 'I', back it up to the convert node. | ||||||
1119 | --I; | ||||||
1120 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); | ||||||
1121 | break; | ||||||
1122 | } | ||||||
1123 | |||||||
1124 | //The sequence of events for lowering STRICT_FP versions of these nodes requires | ||||||
1125 | //dealing with the chain differently, as there is already a preexisting chain. | ||||||
1126 | case ISD::STRICT_FP_ROUND: | ||||||
1127 | case ISD::STRICT_FP_EXTEND: | ||||||
1128 | { | ||||||
1129 | MVT SrcVT = N->getOperand(1).getSimpleValueType(); | ||||||
1130 | MVT DstVT = N->getSimpleValueType(0); | ||||||
1131 | |||||||
1132 | // If any of the sources are vectors, no fp stack involved. | ||||||
1133 | if (SrcVT.isVector() || DstVT.isVector()) | ||||||
1134 | continue; | ||||||
1135 | |||||||
1136 | // If the source and destination are SSE registers, then this is a legal | ||||||
1137 | // conversion that should not be lowered. | ||||||
1138 | const X86TargetLowering *X86Lowering = | ||||||
1139 | static_cast<const X86TargetLowering *>(TLI); | ||||||
1140 | bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); | ||||||
1141 | bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); | ||||||
1142 | if (SrcIsSSE && DstIsSSE) | ||||||
1143 | continue; | ||||||
1144 | |||||||
1145 | if (!SrcIsSSE && !DstIsSSE) { | ||||||
1146 | // If this is an FPStack extension, it is a noop. | ||||||
1147 | if (N->getOpcode() == ISD::STRICT_FP_EXTEND) | ||||||
1148 | continue; | ||||||
1149 | // If this is a value-preserving FPStack truncation, it is a noop. | ||||||
1150 | if (N->getConstantOperandVal(2)) | ||||||
1151 | continue; | ||||||
1152 | } | ||||||
1153 | |||||||
1154 | // Here we could have an FP stack truncation or an FPStack <-> SSE convert. | ||||||
1155 | // FPStack has extload and truncstore. SSE can fold direct loads into other | ||||||
1156 | // operations. Based on this, decide what we want to do. | ||||||
1157 | MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT; | ||||||
1158 | SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); | ||||||
1159 | int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex(); | ||||||
1160 | MachinePointerInfo MPI = | ||||||
1161 | MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI); | ||||||
1162 | SDLoc dl(N); | ||||||
1163 | |||||||
1164 | // FIXME: optimize the case where the src/dest is a load or store? | ||||||
1165 | |||||||
1166 | //Since the operation is StrictFP, use the preexisting chain. | ||||||
1167 | SDValue Store, Result; | ||||||
1168 | if (!SrcIsSSE) { | ||||||
1169 | SDVTList VTs = CurDAG->getVTList(MVT::Other); | ||||||
1170 | SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp}; | ||||||
1171 | Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT, | ||||||
1172 | MPI, /*Align*/ 0, | ||||||
1173 | MachineMemOperand::MOStore); | ||||||
1174 | if (N->getFlags().hasNoFPExcept()) { | ||||||
1175 | SDNodeFlags Flags = Store->getFlags(); | ||||||
1176 | Flags.setNoFPExcept(true); | ||||||
1177 | Store->setFlags(Flags); | ||||||
1178 | } | ||||||
1179 | } else { | ||||||
1180 | assert(SrcVT == MemVT && "Unexpected VT!")((SrcVT == MemVT && "Unexpected VT!") ? static_cast< void> (0) : __assert_fail ("SrcVT == MemVT && \"Unexpected VT!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1180, __PRETTY_FUNCTION__)); | ||||||
1181 | Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp, | ||||||
1182 | MPI); | ||||||
1183 | } | ||||||
1184 | |||||||
1185 | if (!DstIsSSE) { | ||||||
1186 | SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other); | ||||||
1187 | SDValue Ops[] = {Store, MemTmp}; | ||||||
1188 | Result = | ||||||
1189 | CurDAG->getMemIntrinsicNode(X86ISD::FLD, dl, VTs, Ops, MemVT, MPI, | ||||||
1190 | /*Align*/ 0, MachineMemOperand::MOLoad); | ||||||
1191 | if (N->getFlags().hasNoFPExcept()) { | ||||||
1192 | SDNodeFlags Flags = Result->getFlags(); | ||||||
1193 | Flags.setNoFPExcept(true); | ||||||
1194 | Result->setFlags(Flags); | ||||||
1195 | } | ||||||
1196 | } else { | ||||||
1197 | assert(DstVT == MemVT && "Unexpected VT!")((DstVT == MemVT && "Unexpected VT!") ? static_cast< void> (0) : __assert_fail ("DstVT == MemVT && \"Unexpected VT!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1197, __PRETTY_FUNCTION__)); | ||||||
1198 | Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI); | ||||||
1199 | } | ||||||
1200 | |||||||
1201 | // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the | ||||||
1202 | // extload we created. This will cause general havok on the dag because | ||||||
1203 | // anything below the conversion could be folded into other existing nodes. | ||||||
1204 | // To avoid invalidating 'I', back it up to the convert node. | ||||||
1205 | --I; | ||||||
1206 | CurDAG->ReplaceAllUsesWith(N, Result.getNode()); | ||||||
1207 | break; | ||||||
1208 | } | ||||||
1209 | } | ||||||
1210 | |||||||
1211 | |||||||
1212 | // Now that we did that, the node is dead. Increment the iterator to the | ||||||
1213 | // next node to process, then delete N. | ||||||
1214 | ++I; | ||||||
1215 | CurDAG->DeleteNode(N); | ||||||
1216 | } | ||||||
1217 | |||||||
1218 | // The load+call transform above can leave some dead nodes in the graph. Make | ||||||
1219 | // sure we remove them. Its possible some of the other transforms do to so | ||||||
1220 | // just remove dead nodes unconditionally. | ||||||
1221 | CurDAG->RemoveDeadNodes(); | ||||||
1222 | } | ||||||
1223 | |||||||
1224 | // Look for a redundant movzx/movsx that can occur after an 8-bit divrem. | ||||||
1225 | bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) { | ||||||
1226 | unsigned Opc = N->getMachineOpcode(); | ||||||
1227 | if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 && | ||||||
1228 | Opc != X86::MOVSX64rr8) | ||||||
1229 | return false; | ||||||
1230 | |||||||
1231 | SDValue N0 = N->getOperand(0); | ||||||
1232 | |||||||
1233 | // We need to be extracting the lower bit of an extend. | ||||||
1234 | if (!N0.isMachineOpcode() || | ||||||
1235 | N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG || | ||||||
1236 | N0.getConstantOperandVal(1) != X86::sub_8bit) | ||||||
1237 | return false; | ||||||
1238 | |||||||
1239 | // We're looking for either a movsx or movzx to match the original opcode. | ||||||
1240 | unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX | ||||||
1241 | : X86::MOVSX32rr8_NOREX; | ||||||
1242 | SDValue N00 = N0.getOperand(0); | ||||||
1243 | if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc) | ||||||
1244 | return false; | ||||||
1245 | |||||||
1246 | if (Opc == X86::MOVSX64rr8) { | ||||||
1247 | // If we had a sign extend from 8 to 64 bits. We still need to go from 32 | ||||||
1248 | // to 64. | ||||||
1249 | MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N), | ||||||
1250 | MVT::i64, N00); | ||||||
1251 | ReplaceUses(N, Extend); | ||||||
1252 | } else { | ||||||
1253 | // Ok we can drop this extend and just use the original extend. | ||||||
1254 | ReplaceUses(N, N00.getNode()); | ||||||
1255 | } | ||||||
1256 | |||||||
1257 | return true; | ||||||
1258 | } | ||||||
1259 | |||||||
1260 | void X86DAGToDAGISel::PostprocessISelDAG() { | ||||||
1261 | // Skip peepholes at -O0. | ||||||
1262 | if (TM.getOptLevel() == CodeGenOpt::None) | ||||||
1263 | return; | ||||||
1264 | |||||||
1265 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | ||||||
1266 | |||||||
1267 | bool MadeChange = false; | ||||||
1268 | while (Position != CurDAG->allnodes_begin()) { | ||||||
1269 | SDNode *N = &*--Position; | ||||||
1270 | // Skip dead nodes and any non-machine opcodes. | ||||||
1271 | if (N->use_empty() || !N->isMachineOpcode()) | ||||||
1272 | continue; | ||||||
1273 | |||||||
1274 | if (tryOptimizeRem8Extend(N)) { | ||||||
1275 | MadeChange = true; | ||||||
1276 | continue; | ||||||
1277 | } | ||||||
1278 | |||||||
1279 | // Look for a TESTrr+ANDrr pattern where both operands of the test are | ||||||
1280 | // the same. Rewrite to remove the AND. | ||||||
1281 | unsigned Opc = N->getMachineOpcode(); | ||||||
1282 | if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr || | ||||||
1283 | Opc == X86::TEST32rr || Opc == X86::TEST64rr) && | ||||||
1284 | N->getOperand(0) == N->getOperand(1) && | ||||||
1285 | N->isOnlyUserOf(N->getOperand(0).getNode()) && | ||||||
1286 | N->getOperand(0).isMachineOpcode()) { | ||||||
1287 | SDValue And = N->getOperand(0); | ||||||
1288 | unsigned N0Opc = And.getMachineOpcode(); | ||||||
1289 | if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr || | ||||||
1290 | N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) { | ||||||
1291 | MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N), | ||||||
1292 | MVT::i32, | ||||||
1293 | And.getOperand(0), | ||||||
1294 | And.getOperand(1)); | ||||||
1295 | ReplaceUses(N, Test); | ||||||
1296 | MadeChange = true; | ||||||
1297 | continue; | ||||||
1298 | } | ||||||
1299 | if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm || | ||||||
1300 | N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) { | ||||||
1301 | unsigned NewOpc; | ||||||
1302 | switch (N0Opc) { | ||||||
1303 | case X86::AND8rm: NewOpc = X86::TEST8mr; break; | ||||||
1304 | case X86::AND16rm: NewOpc = X86::TEST16mr; break; | ||||||
1305 | case X86::AND32rm: NewOpc = X86::TEST32mr; break; | ||||||
1306 | case X86::AND64rm: NewOpc = X86::TEST64mr; break; | ||||||
1307 | } | ||||||
1308 | |||||||
1309 | // Need to swap the memory and register operand. | ||||||
1310 | SDValue Ops[] = { And.getOperand(1), | ||||||
1311 | And.getOperand(2), | ||||||
1312 | And.getOperand(3), | ||||||
1313 | And.getOperand(4), | ||||||
1314 | And.getOperand(5), | ||||||
1315 | And.getOperand(0), | ||||||
1316 | And.getOperand(6) /* Chain */ }; | ||||||
1317 | MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N), | ||||||
1318 | MVT::i32, MVT::Other, Ops); | ||||||
1319 | ReplaceUses(N, Test); | ||||||
1320 | MadeChange = true; | ||||||
1321 | continue; | ||||||
1322 | } | ||||||
1323 | } | ||||||
1324 | |||||||
1325 | // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is | ||||||
1326 | // used. We're doing this late so we can prefer to fold the AND into masked | ||||||
1327 | // comparisons. Doing that can be better for the live range of the mask | ||||||
1328 | // register. | ||||||
1329 | if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr || | ||||||
1330 | Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) && | ||||||
1331 | N->getOperand(0) == N->getOperand(1) && | ||||||
1332 | N->isOnlyUserOf(N->getOperand(0).getNode()) && | ||||||
1333 | N->getOperand(0).isMachineOpcode() && | ||||||
1334 | onlyUsesZeroFlag(SDValue(N, 0))) { | ||||||
1335 | SDValue And = N->getOperand(0); | ||||||
1336 | unsigned N0Opc = And.getMachineOpcode(); | ||||||
1337 | // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other | ||||||
1338 | // KAND instructions and KTEST use the same ISA feature. | ||||||
1339 | if (N0Opc == X86::KANDBrr || | ||||||
1340 | (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) || | ||||||
1341 | N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) { | ||||||
1342 | unsigned NewOpc; | ||||||
1343 | switch (Opc) { | ||||||
1344 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1344); | ||||||
1345 | case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break; | ||||||
1346 | case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break; | ||||||
1347 | case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break; | ||||||
1348 | case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break; | ||||||
1349 | } | ||||||
1350 | MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N), | ||||||
1351 | MVT::i32, | ||||||
1352 | And.getOperand(0), | ||||||
1353 | And.getOperand(1)); | ||||||
1354 | ReplaceUses(N, KTest); | ||||||
1355 | MadeChange = true; | ||||||
1356 | continue; | ||||||
1357 | } | ||||||
1358 | } | ||||||
1359 | |||||||
1360 | // Attempt to remove vectors moves that were inserted to zero upper bits. | ||||||
1361 | if (Opc != TargetOpcode::SUBREG_TO_REG) | ||||||
1362 | continue; | ||||||
1363 | |||||||
1364 | unsigned SubRegIdx = N->getConstantOperandVal(2); | ||||||
1365 | if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm) | ||||||
1366 | continue; | ||||||
1367 | |||||||
1368 | SDValue Move = N->getOperand(1); | ||||||
1369 | if (!Move.isMachineOpcode()) | ||||||
1370 | continue; | ||||||
1371 | |||||||
1372 | // Make sure its one of the move opcodes we recognize. | ||||||
1373 | switch (Move.getMachineOpcode()) { | ||||||
1374 | default: | ||||||
1375 | continue; | ||||||
1376 | case X86::VMOVAPDrr: case X86::VMOVUPDrr: | ||||||
1377 | case X86::VMOVAPSrr: case X86::VMOVUPSrr: | ||||||
1378 | case X86::VMOVDQArr: case X86::VMOVDQUrr: | ||||||
1379 | case X86::VMOVAPDYrr: case X86::VMOVUPDYrr: | ||||||
1380 | case X86::VMOVAPSYrr: case X86::VMOVUPSYrr: | ||||||
1381 | case X86::VMOVDQAYrr: case X86::VMOVDQUYrr: | ||||||
1382 | case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr: | ||||||
1383 | case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr: | ||||||
1384 | case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr: | ||||||
1385 | case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr: | ||||||
1386 | case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr: | ||||||
1387 | case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr: | ||||||
1388 | case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr: | ||||||
1389 | case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr: | ||||||
1390 | break; | ||||||
1391 | } | ||||||
1392 | |||||||
1393 | SDValue In = Move.getOperand(0); | ||||||
1394 | if (!In.isMachineOpcode() || | ||||||
1395 | In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END) | ||||||
1396 | continue; | ||||||
1397 | |||||||
1398 | // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers | ||||||
1399 | // the SHA instructions which use a legacy encoding. | ||||||
1400 | uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags; | ||||||
1401 | if ((TSFlags & X86II::EncodingMask) != X86II::VEX && | ||||||
1402 | (TSFlags & X86II::EncodingMask) != X86II::EVEX && | ||||||
1403 | (TSFlags & X86II::EncodingMask) != X86II::XOP) | ||||||
1404 | continue; | ||||||
1405 | |||||||
1406 | // Producing instruction is another vector instruction. We can drop the | ||||||
1407 | // move. | ||||||
1408 | CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2)); | ||||||
1409 | MadeChange = true; | ||||||
1410 | } | ||||||
1411 | |||||||
1412 | if (MadeChange) | ||||||
1413 | CurDAG->RemoveDeadNodes(); | ||||||
1414 | } | ||||||
1415 | |||||||
1416 | |||||||
1417 | /// Emit any code that needs to be executed only in the main function. | ||||||
1418 | void X86DAGToDAGISel::emitSpecialCodeForMain() { | ||||||
1419 | if (Subtarget->isTargetCygMing()) { | ||||||
1420 | TargetLowering::ArgListTy Args; | ||||||
1421 | auto &DL = CurDAG->getDataLayout(); | ||||||
1422 | |||||||
1423 | TargetLowering::CallLoweringInfo CLI(*CurDAG); | ||||||
1424 | CLI.setChain(CurDAG->getRoot()) | ||||||
1425 | .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()), | ||||||
1426 | CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)), | ||||||
1427 | std::move(Args)); | ||||||
1428 | const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); | ||||||
1429 | std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); | ||||||
1430 | CurDAG->setRoot(Result.second); | ||||||
1431 | } | ||||||
1432 | } | ||||||
1433 | |||||||
1434 | void X86DAGToDAGISel::emitFunctionEntryCode() { | ||||||
1435 | // If this is main, emit special code for main. | ||||||
1436 | const Function &F = MF->getFunction(); | ||||||
1437 | if (F.hasExternalLinkage() && F.getName() == "main") | ||||||
1438 | emitSpecialCodeForMain(); | ||||||
1439 | } | ||||||
1440 | |||||||
1441 | static bool isDispSafeForFrameIndex(int64_t Val) { | ||||||
1442 | // On 64-bit platforms, we can run into an issue where a frame index | ||||||
1443 | // includes a displacement that, when added to the explicit displacement, | ||||||
1444 | // will overflow the displacement field. Assuming that the frame index | ||||||
1445 | // displacement fits into a 31-bit integer (which is only slightly more | ||||||
1446 | // aggressive than the current fundamental assumption that it fits into | ||||||
1447 | // a 32-bit integer), a 31-bit disp should always be safe. | ||||||
1448 | return isInt<31>(Val); | ||||||
1449 | } | ||||||
1450 | |||||||
1451 | bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset, | ||||||
1452 | X86ISelAddressMode &AM) { | ||||||
1453 | // We may have already matched a displacement and the caller just added the | ||||||
1454 | // symbolic displacement. So we still need to do the checks even if Offset | ||||||
1455 | // is zero. | ||||||
1456 | |||||||
1457 | int64_t Val = AM.Disp + Offset; | ||||||
1458 | |||||||
1459 | // Cannot combine ExternalSymbol displacements with integer offsets. | ||||||
1460 | if (Val != 0 && (AM.ES || AM.MCSym)) | ||||||
1461 | return true; | ||||||
1462 | |||||||
1463 | CodeModel::Model M = TM.getCodeModel(); | ||||||
1464 | if (Subtarget->is64Bit()) { | ||||||
1465 | if (Val != 0 && | ||||||
1466 | !X86::isOffsetSuitableForCodeModel(Val, M, | ||||||
1467 | AM.hasSymbolicDisplacement())) | ||||||
1468 | return true; | ||||||
1469 | // In addition to the checks required for a register base, check that | ||||||
1470 | // we do not try to use an unsafe Disp with a frame index. | ||||||
1471 | if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && | ||||||
1472 | !isDispSafeForFrameIndex(Val)) | ||||||
1473 | return true; | ||||||
1474 | } | ||||||
1475 | AM.Disp = Val; | ||||||
1476 | return false; | ||||||
1477 | |||||||
1478 | } | ||||||
1479 | |||||||
1480 | bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ | ||||||
1481 | SDValue Address = N->getOperand(1); | ||||||
1482 | |||||||
1483 | // load gs:0 -> GS segment register. | ||||||
1484 | // load fs:0 -> FS segment register. | ||||||
1485 | // | ||||||
1486 | // This optimization is valid because the GNU TLS model defines that | ||||||
1487 | // gs:0 (or fs:0 on X86-64) contains its own address. | ||||||
1488 | // For more information see http://people.redhat.com/drepper/tls.pdf | ||||||
1489 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) | ||||||
1490 | if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr && | ||||||
1491 | !IndirectTlsSegRefs && | ||||||
1492 | (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() || | ||||||
1493 | Subtarget->isTargetFuchsia())) | ||||||
1494 | switch (N->getPointerInfo().getAddrSpace()) { | ||||||
1495 | case 256: | ||||||
1496 | AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); | ||||||
1497 | return false; | ||||||
1498 | case 257: | ||||||
1499 | AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); | ||||||
1500 | return false; | ||||||
1501 | // Address space 258 is not handled here, because it is not used to | ||||||
1502 | // address TLS areas. | ||||||
1503 | } | ||||||
1504 | |||||||
1505 | return true; | ||||||
1506 | } | ||||||
1507 | |||||||
1508 | /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing | ||||||
1509 | /// mode. These wrap things that will resolve down into a symbol reference. | ||||||
1510 | /// If no match is possible, this returns true, otherwise it returns false. | ||||||
1511 | bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { | ||||||
1512 | // If the addressing mode already has a symbol as the displacement, we can | ||||||
1513 | // never match another symbol. | ||||||
1514 | if (AM.hasSymbolicDisplacement()) | ||||||
1515 | return true; | ||||||
1516 | |||||||
1517 | bool IsRIPRelTLS = false; | ||||||
1518 | bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP; | ||||||
1519 | if (IsRIPRel) { | ||||||
1520 | SDValue Val = N.getOperand(0); | ||||||
1521 | if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) | ||||||
1522 | IsRIPRelTLS = true; | ||||||
1523 | } | ||||||
1524 | |||||||
1525 | // We can't use an addressing mode in the 64-bit large code model. | ||||||
1526 | // Global TLS addressing is an exception. In the medium code model, | ||||||
1527 | // we use can use a mode when RIP wrappers are present. | ||||||
1528 | // That signifies access to globals that are known to be "near", | ||||||
1529 | // such as the GOT itself. | ||||||
1530 | CodeModel::Model M = TM.getCodeModel(); | ||||||
1531 | if (Subtarget->is64Bit() && | ||||||
1532 | ((M == CodeModel::Large && !IsRIPRelTLS) || | ||||||
1533 | (M == CodeModel::Medium && !IsRIPRel))) | ||||||
1534 | return true; | ||||||
1535 | |||||||
1536 | // Base and index reg must be 0 in order to use %rip as base. | ||||||
1537 | if (IsRIPRel && AM.hasBaseOrIndexReg()) | ||||||
1538 | return true; | ||||||
1539 | |||||||
1540 | // Make a local copy in case we can't do this fold. | ||||||
1541 | X86ISelAddressMode Backup = AM; | ||||||
1542 | |||||||
1543 | int64_t Offset = 0; | ||||||
1544 | SDValue N0 = N.getOperand(0); | ||||||
1545 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { | ||||||
1546 | AM.GV = G->getGlobal(); | ||||||
1547 | AM.SymbolFlags = G->getTargetFlags(); | ||||||
1548 | Offset = G->getOffset(); | ||||||
1549 | } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { | ||||||
1550 | AM.CP = CP->getConstVal(); | ||||||
1551 | AM.Align = CP->getAlignment(); | ||||||
1552 | AM.SymbolFlags = CP->getTargetFlags(); | ||||||
1553 | Offset = CP->getOffset(); | ||||||
1554 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { | ||||||
1555 | AM.ES = S->getSymbol(); | ||||||
1556 | AM.SymbolFlags = S->getTargetFlags(); | ||||||
1557 | } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) { | ||||||
1558 | AM.MCSym = S->getMCSymbol(); | ||||||
1559 | } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { | ||||||
1560 | AM.JT = J->getIndex(); | ||||||
1561 | AM.SymbolFlags = J->getTargetFlags(); | ||||||
1562 | } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { | ||||||
1563 | AM.BlockAddr = BA->getBlockAddress(); | ||||||
1564 | AM.SymbolFlags = BA->getTargetFlags(); | ||||||
1565 | Offset = BA->getOffset(); | ||||||
1566 | } else | ||||||
1567 | llvm_unreachable("Unhandled symbol reference node.")::llvm::llvm_unreachable_internal("Unhandled symbol reference node." , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1567); | ||||||
1568 | |||||||
1569 | if (foldOffsetIntoAddress(Offset, AM)) { | ||||||
1570 | AM = Backup; | ||||||
1571 | return true; | ||||||
1572 | } | ||||||
1573 | |||||||
1574 | if (IsRIPRel) | ||||||
1575 | AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); | ||||||
1576 | |||||||
1577 | // Commit the changes now that we know this fold is safe. | ||||||
1578 | return false; | ||||||
1579 | } | ||||||
1580 | |||||||
1581 | /// Add the specified node to the specified addressing mode, returning true if | ||||||
1582 | /// it cannot be done. This just pattern matches for the addressing mode. | ||||||
1583 | bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { | ||||||
1584 | if (matchAddressRecursively(N, AM, 0)) | ||||||
1585 | return true; | ||||||
1586 | |||||||
1587 | // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has | ||||||
1588 | // a smaller encoding and avoids a scaled-index. | ||||||
1589 | if (AM.Scale == 2 && | ||||||
1590 | AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
1591 | AM.Base_Reg.getNode() == nullptr) { | ||||||
1592 | AM.Base_Reg = AM.IndexReg; | ||||||
1593 | AM.Scale = 1; | ||||||
1594 | } | ||||||
1595 | |||||||
1596 | // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, | ||||||
1597 | // because it has a smaller encoding. | ||||||
1598 | // TODO: Which other code models can use this? | ||||||
1599 | switch (TM.getCodeModel()) { | ||||||
1600 | default: break; | ||||||
1601 | case CodeModel::Small: | ||||||
1602 | case CodeModel::Kernel: | ||||||
1603 | if (Subtarget->is64Bit() && | ||||||
1604 | AM.Scale == 1 && | ||||||
1605 | AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
1606 | AM.Base_Reg.getNode() == nullptr && | ||||||
1607 | AM.IndexReg.getNode() == nullptr && | ||||||
1608 | AM.SymbolFlags == X86II::MO_NO_FLAG && | ||||||
1609 | AM.hasSymbolicDisplacement()) | ||||||
1610 | AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); | ||||||
1611 | break; | ||||||
1612 | } | ||||||
1613 | |||||||
1614 | return false; | ||||||
1615 | } | ||||||
1616 | |||||||
1617 | bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM, | ||||||
1618 | unsigned Depth) { | ||||||
1619 | // Add an artificial use to this node so that we can keep track of | ||||||
1620 | // it if it gets CSE'd with a different node. | ||||||
1621 | HandleSDNode Handle(N); | ||||||
1622 | |||||||
1623 | X86ISelAddressMode Backup = AM; | ||||||
1624 | if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) && | ||||||
1625 | !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) | ||||||
1626 | return false; | ||||||
1627 | AM = Backup; | ||||||
1628 | |||||||
1629 | // Try again after commutating the operands. | ||||||
1630 | if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, | ||||||
1631 | Depth + 1) && | ||||||
1632 | !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1)) | ||||||
1633 | return false; | ||||||
1634 | AM = Backup; | ||||||
1635 | |||||||
1636 | // If we couldn't fold both operands into the address at the same time, | ||||||
1637 | // see if we can just put each operand into a register and fold at least | ||||||
1638 | // the add. | ||||||
1639 | if (AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
1640 | !AM.Base_Reg.getNode() && | ||||||
1641 | !AM.IndexReg.getNode()) { | ||||||
1642 | N = Handle.getValue(); | ||||||
1643 | AM.Base_Reg = N.getOperand(0); | ||||||
1644 | AM.IndexReg = N.getOperand(1); | ||||||
1645 | AM.Scale = 1; | ||||||
1646 | return false; | ||||||
1647 | } | ||||||
1648 | N = Handle.getValue(); | ||||||
1649 | return true; | ||||||
1650 | } | ||||||
1651 | |||||||
1652 | // Insert a node into the DAG at least before the Pos node's position. This | ||||||
1653 | // will reposition the node as needed, and will assign it a node ID that is <= | ||||||
1654 | // the Pos node's ID. Note that this does *not* preserve the uniqueness of node | ||||||
1655 | // IDs! The selection DAG must no longer depend on their uniqueness when this | ||||||
1656 | // is used. | ||||||
1657 | static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { | ||||||
1658 | if (N->getNodeId() == -1 || | ||||||
1659 | (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > | ||||||
1660 | SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) { | ||||||
1661 | DAG.RepositionNode(Pos->getIterator(), N.getNode()); | ||||||
1662 | // Mark Node as invalid for pruning as after this it may be a successor to a | ||||||
1663 | // selected node but otherwise be in the same position of Pos. | ||||||
1664 | // Conservatively mark it with the same -abs(Id) to assure node id | ||||||
1665 | // invariant is preserved. | ||||||
1666 | N->setNodeId(Pos->getNodeId()); | ||||||
1667 | SelectionDAGISel::InvalidateNodeId(N.getNode()); | ||||||
1668 | } | ||||||
1669 | } | ||||||
1670 | |||||||
1671 | // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if | ||||||
1672 | // safe. This allows us to convert the shift and and into an h-register | ||||||
1673 | // extract and a scaled index. Returns false if the simplification is | ||||||
1674 | // performed. | ||||||
1675 | static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, | ||||||
1676 | uint64_t Mask, | ||||||
1677 | SDValue Shift, SDValue X, | ||||||
1678 | X86ISelAddressMode &AM) { | ||||||
1679 | if (Shift.getOpcode() != ISD::SRL || | ||||||
1680 | !isa<ConstantSDNode>(Shift.getOperand(1)) || | ||||||
1681 | !Shift.hasOneUse()) | ||||||
1682 | return true; | ||||||
1683 | |||||||
1684 | int ScaleLog = 8 - Shift.getConstantOperandVal(1); | ||||||
1685 | if (ScaleLog <= 0 || ScaleLog >= 4 || | ||||||
1686 | Mask != (0xffu << ScaleLog)) | ||||||
1687 | return true; | ||||||
1688 | |||||||
1689 | MVT VT = N.getSimpleValueType(); | ||||||
1690 | SDLoc DL(N); | ||||||
1691 | SDValue Eight = DAG.getConstant(8, DL, MVT::i8); | ||||||
1692 | SDValue NewMask = DAG.getConstant(0xff, DL, VT); | ||||||
1693 | SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); | ||||||
1694 | SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask); | ||||||
1695 | SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8); | ||||||
1696 | SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount); | ||||||
1697 | |||||||
1698 | // Insert the new nodes into the topological ordering. We must do this in | ||||||
1699 | // a valid topological ordering as nothing is going to go back and re-sort | ||||||
1700 | // these nodes. We continually insert before 'N' in sequence as this is | ||||||
1701 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | ||||||
1702 | // hierarchy left to express. | ||||||
1703 | insertDAGNode(DAG, N, Eight); | ||||||
1704 | insertDAGNode(DAG, N, Srl); | ||||||
1705 | insertDAGNode(DAG, N, NewMask); | ||||||
1706 | insertDAGNode(DAG, N, And); | ||||||
1707 | insertDAGNode(DAG, N, ShlCount); | ||||||
1708 | insertDAGNode(DAG, N, Shl); | ||||||
1709 | DAG.ReplaceAllUsesWith(N, Shl); | ||||||
1710 | DAG.RemoveDeadNode(N.getNode()); | ||||||
1711 | AM.IndexReg = And; | ||||||
1712 | AM.Scale = (1 << ScaleLog); | ||||||
1713 | return false; | ||||||
1714 | } | ||||||
1715 | |||||||
1716 | // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this | ||||||
1717 | // allows us to fold the shift into this addressing mode. Returns false if the | ||||||
1718 | // transform succeeded. | ||||||
1719 | static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, | ||||||
1720 | X86ISelAddressMode &AM) { | ||||||
1721 | SDValue Shift = N.getOperand(0); | ||||||
1722 | |||||||
1723 | // Use a signed mask so that shifting right will insert sign bits. These | ||||||
1724 | // bits will be removed when we shift the result left so it doesn't matter | ||||||
1725 | // what we use. This might allow a smaller immediate encoding. | ||||||
1726 | int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue(); | ||||||
1727 | |||||||
1728 | // If we have an any_extend feeding the AND, look through it to see if there | ||||||
1729 | // is a shift behind it. But only if the AND doesn't use the extended bits. | ||||||
1730 | // FIXME: Generalize this to other ANY_EXTEND than i32 to i64? | ||||||
1731 | bool FoundAnyExtend = false; | ||||||
1732 | if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && | ||||||
1733 | Shift.getOperand(0).getSimpleValueType() == MVT::i32 && | ||||||
1734 | isUInt<32>(Mask)) { | ||||||
1735 | FoundAnyExtend = true; | ||||||
1736 | Shift = Shift.getOperand(0); | ||||||
1737 | } | ||||||
1738 | |||||||
1739 | if (Shift.getOpcode() != ISD::SHL || | ||||||
1740 | !isa<ConstantSDNode>(Shift.getOperand(1))) | ||||||
1741 | return true; | ||||||
1742 | |||||||
1743 | SDValue X = Shift.getOperand(0); | ||||||
1744 | |||||||
1745 | // Not likely to be profitable if either the AND or SHIFT node has more | ||||||
1746 | // than one use (unless all uses are for address computation). Besides, | ||||||
1747 | // isel mechanism requires their node ids to be reused. | ||||||
1748 | if (!N.hasOneUse() || !Shift.hasOneUse()) | ||||||
1749 | return true; | ||||||
1750 | |||||||
1751 | // Verify that the shift amount is something we can fold. | ||||||
1752 | unsigned ShiftAmt = Shift.getConstantOperandVal(1); | ||||||
1753 | if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) | ||||||
1754 | return true; | ||||||
1755 | |||||||
1756 | MVT VT = N.getSimpleValueType(); | ||||||
1757 | SDLoc DL(N); | ||||||
1758 | if (FoundAnyExtend) { | ||||||
1759 | SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); | ||||||
1760 | insertDAGNode(DAG, N, NewX); | ||||||
1761 | X = NewX; | ||||||
1762 | } | ||||||
1763 | |||||||
1764 | SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT); | ||||||
1765 | SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); | ||||||
1766 | SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); | ||||||
1767 | |||||||
1768 | // Insert the new nodes into the topological ordering. We must do this in | ||||||
1769 | // a valid topological ordering as nothing is going to go back and re-sort | ||||||
1770 | // these nodes. We continually insert before 'N' in sequence as this is | ||||||
1771 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | ||||||
1772 | // hierarchy left to express. | ||||||
1773 | insertDAGNode(DAG, N, NewMask); | ||||||
1774 | insertDAGNode(DAG, N, NewAnd); | ||||||
1775 | insertDAGNode(DAG, N, NewShift); | ||||||
1776 | DAG.ReplaceAllUsesWith(N, NewShift); | ||||||
1777 | DAG.RemoveDeadNode(N.getNode()); | ||||||
1778 | |||||||
1779 | AM.Scale = 1 << ShiftAmt; | ||||||
1780 | AM.IndexReg = NewAnd; | ||||||
1781 | return false; | ||||||
1782 | } | ||||||
1783 | |||||||
1784 | // Implement some heroics to detect shifts of masked values where the mask can | ||||||
1785 | // be replaced by extending the shift and undoing that in the addressing mode | ||||||
1786 | // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and | ||||||
1787 | // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in | ||||||
1788 | // the addressing mode. This results in code such as: | ||||||
1789 | // | ||||||
1790 | // int f(short *y, int *lookup_table) { | ||||||
1791 | // ... | ||||||
1792 | // return *y + lookup_table[*y >> 11]; | ||||||
1793 | // } | ||||||
1794 | // | ||||||
1795 | // Turning into: | ||||||
1796 | // movzwl (%rdi), %eax | ||||||
1797 | // movl %eax, %ecx | ||||||
1798 | // shrl $11, %ecx | ||||||
1799 | // addl (%rsi,%rcx,4), %eax | ||||||
1800 | // | ||||||
1801 | // Instead of: | ||||||
1802 | // movzwl (%rdi), %eax | ||||||
1803 | // movl %eax, %ecx | ||||||
1804 | // shrl $9, %ecx | ||||||
1805 | // andl $124, %rcx | ||||||
1806 | // addl (%rsi,%rcx), %eax | ||||||
1807 | // | ||||||
1808 | // Note that this function assumes the mask is provided as a mask *after* the | ||||||
1809 | // value is shifted. The input chain may or may not match that, but computing | ||||||
1810 | // such a mask is trivial. | ||||||
1811 | static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, | ||||||
1812 | uint64_t Mask, | ||||||
1813 | SDValue Shift, SDValue X, | ||||||
1814 | X86ISelAddressMode &AM) { | ||||||
1815 | if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || | ||||||
1816 | !isa<ConstantSDNode>(Shift.getOperand(1))) | ||||||
1817 | return true; | ||||||
1818 | |||||||
1819 | unsigned ShiftAmt = Shift.getConstantOperandVal(1); | ||||||
1820 | unsigned MaskLZ = countLeadingZeros(Mask); | ||||||
1821 | unsigned MaskTZ = countTrailingZeros(Mask); | ||||||
1822 | |||||||
1823 | // The amount of shift we're trying to fit into the addressing mode is taken | ||||||
1824 | // from the trailing zeros of the mask. | ||||||
1825 | unsigned AMShiftAmt = MaskTZ; | ||||||
1826 | |||||||
1827 | // There is nothing we can do here unless the mask is removing some bits. | ||||||
1828 | // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. | ||||||
1829 | if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; | ||||||
1830 | |||||||
1831 | // We also need to ensure that mask is a continuous run of bits. | ||||||
1832 | if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; | ||||||
1833 | |||||||
1834 | // Scale the leading zero count down based on the actual size of the value. | ||||||
1835 | // Also scale it down based on the size of the shift. | ||||||
1836 | unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; | ||||||
1837 | if (MaskLZ < ScaleDown) | ||||||
1838 | return true; | ||||||
1839 | MaskLZ -= ScaleDown; | ||||||
1840 | |||||||
1841 | // The final check is to ensure that any masked out high bits of X are | ||||||
1842 | // already known to be zero. Otherwise, the mask has a semantic impact | ||||||
1843 | // other than masking out a couple of low bits. Unfortunately, because of | ||||||
1844 | // the mask, zero extensions will be removed from operands in some cases. | ||||||
1845 | // This code works extra hard to look through extensions because we can | ||||||
1846 | // replace them with zero extensions cheaply if necessary. | ||||||
1847 | bool ReplacingAnyExtend = false; | ||||||
1848 | if (X.getOpcode() == ISD::ANY_EXTEND) { | ||||||
1849 | unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - | ||||||
1850 | X.getOperand(0).getSimpleValueType().getSizeInBits(); | ||||||
1851 | // Assume that we'll replace the any-extend with a zero-extend, and | ||||||
1852 | // narrow the search to the extended value. | ||||||
1853 | X = X.getOperand(0); | ||||||
1854 | MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; | ||||||
1855 | ReplacingAnyExtend = true; | ||||||
1856 | } | ||||||
1857 | APInt MaskedHighBits = | ||||||
1858 | APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); | ||||||
1859 | KnownBits Known = DAG.computeKnownBits(X); | ||||||
1860 | if (MaskedHighBits != Known.Zero) return true; | ||||||
1861 | |||||||
1862 | // We've identified a pattern that can be transformed into a single shift | ||||||
1863 | // and an addressing mode. Make it so. | ||||||
1864 | MVT VT = N.getSimpleValueType(); | ||||||
1865 | if (ReplacingAnyExtend) { | ||||||
1866 | assert(X.getValueType() != VT)((X.getValueType() != VT) ? static_cast<void> (0) : __assert_fail ("X.getValueType() != VT", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 1866, __PRETTY_FUNCTION__)); | ||||||
1867 | // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. | ||||||
1868 | SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); | ||||||
1869 | insertDAGNode(DAG, N, NewX); | ||||||
1870 | X = NewX; | ||||||
1871 | } | ||||||
1872 | SDLoc DL(N); | ||||||
1873 | SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); | ||||||
1874 | SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); | ||||||
1875 | SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); | ||||||
1876 | SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); | ||||||
1877 | |||||||
1878 | // Insert the new nodes into the topological ordering. We must do this in | ||||||
1879 | // a valid topological ordering as nothing is going to go back and re-sort | ||||||
1880 | // these nodes. We continually insert before 'N' in sequence as this is | ||||||
1881 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | ||||||
1882 | // hierarchy left to express. | ||||||
1883 | insertDAGNode(DAG, N, NewSRLAmt); | ||||||
1884 | insertDAGNode(DAG, N, NewSRL); | ||||||
1885 | insertDAGNode(DAG, N, NewSHLAmt); | ||||||
1886 | insertDAGNode(DAG, N, NewSHL); | ||||||
1887 | DAG.ReplaceAllUsesWith(N, NewSHL); | ||||||
1888 | DAG.RemoveDeadNode(N.getNode()); | ||||||
1889 | |||||||
1890 | AM.Scale = 1 << AMShiftAmt; | ||||||
1891 | AM.IndexReg = NewSRL; | ||||||
1892 | return false; | ||||||
1893 | } | ||||||
1894 | |||||||
1895 | // Transform "(X >> SHIFT) & (MASK << C1)" to | ||||||
1896 | // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be | ||||||
1897 | // matched to a BEXTR later. Returns false if the simplification is performed. | ||||||
1898 | static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, | ||||||
1899 | uint64_t Mask, | ||||||
1900 | SDValue Shift, SDValue X, | ||||||
1901 | X86ISelAddressMode &AM, | ||||||
1902 | const X86Subtarget &Subtarget) { | ||||||
1903 | if (Shift.getOpcode() != ISD::SRL || | ||||||
1904 | !isa<ConstantSDNode>(Shift.getOperand(1)) || | ||||||
1905 | !Shift.hasOneUse() || !N.hasOneUse()) | ||||||
1906 | return true; | ||||||
1907 | |||||||
1908 | // Only do this if BEXTR will be matched by matchBEXTRFromAndImm. | ||||||
1909 | if (!Subtarget.hasTBM() && | ||||||
1910 | !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR())) | ||||||
1911 | return true; | ||||||
1912 | |||||||
1913 | // We need to ensure that mask is a continuous run of bits. | ||||||
1914 | if (!isShiftedMask_64(Mask)) return true; | ||||||
1915 | |||||||
1916 | unsigned ShiftAmt = Shift.getConstantOperandVal(1); | ||||||
1917 | |||||||
1918 | // The amount of shift we're trying to fit into the addressing mode is taken | ||||||
1919 | // from the trailing zeros of the mask. | ||||||
1920 | unsigned AMShiftAmt = countTrailingZeros(Mask); | ||||||
1921 | |||||||
1922 | // There is nothing we can do here unless the mask is removing some bits. | ||||||
1923 | // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. | ||||||
1924 | if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; | ||||||
1925 | |||||||
1926 | MVT VT = N.getSimpleValueType(); | ||||||
1927 | SDLoc DL(N); | ||||||
1928 | SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); | ||||||
1929 | SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); | ||||||
1930 | SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT); | ||||||
1931 | SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask); | ||||||
1932 | SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); | ||||||
1933 | SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt); | ||||||
1934 | |||||||
1935 | // Insert the new nodes into the topological ordering. We must do this in | ||||||
1936 | // a valid topological ordering as nothing is going to go back and re-sort | ||||||
1937 | // these nodes. We continually insert before 'N' in sequence as this is | ||||||
1938 | // essentially a pre-flattened and pre-sorted sequence of nodes. There is no | ||||||
1939 | // hierarchy left to express. | ||||||
1940 | insertDAGNode(DAG, N, NewSRLAmt); | ||||||
1941 | insertDAGNode(DAG, N, NewSRL); | ||||||
1942 | insertDAGNode(DAG, N, NewMask); | ||||||
1943 | insertDAGNode(DAG, N, NewAnd); | ||||||
1944 | insertDAGNode(DAG, N, NewSHLAmt); | ||||||
1945 | insertDAGNode(DAG, N, NewSHL); | ||||||
1946 | DAG.ReplaceAllUsesWith(N, NewSHL); | ||||||
1947 | DAG.RemoveDeadNode(N.getNode()); | ||||||
1948 | |||||||
1949 | AM.Scale = 1 << AMShiftAmt; | ||||||
1950 | AM.IndexReg = NewAnd; | ||||||
1951 | return false; | ||||||
1952 | } | ||||||
1953 | |||||||
1954 | bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, | ||||||
1955 | unsigned Depth) { | ||||||
1956 | SDLoc dl(N); | ||||||
1957 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG ); }; } } while (false) | ||||||
1958 | dbgs() << "MatchAddress: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG ); }; } } while (false) | ||||||
1959 | AM.dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG ); }; } } while (false) | ||||||
1960 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG ); }; } } while (false); | ||||||
1961 | // Limit recursion. | ||||||
1962 | if (Depth > 5) | ||||||
1963 | return matchAddressBase(N, AM); | ||||||
1964 | |||||||
1965 | // If this is already a %rip relative address, we can only merge immediates | ||||||
1966 | // into it. Instead of handling this in every case, we handle it here. | ||||||
1967 | // RIP relative addressing: %rip + 32-bit displacement! | ||||||
1968 | if (AM.isRIPRelative()) { | ||||||
1969 | // FIXME: JumpTable and ExternalSymbol address currently don't like | ||||||
1970 | // displacements. It isn't very important, but this should be fixed for | ||||||
1971 | // consistency. | ||||||
1972 | if (!(AM.ES || AM.MCSym) && AM.JT != -1) | ||||||
1973 | return true; | ||||||
1974 | |||||||
1975 | if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) | ||||||
1976 | if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM)) | ||||||
1977 | return false; | ||||||
1978 | return true; | ||||||
1979 | } | ||||||
1980 | |||||||
1981 | switch (N.getOpcode()) { | ||||||
1982 | default: break; | ||||||
1983 | case ISD::LOCAL_RECOVER: { | ||||||
1984 | if (!AM.hasSymbolicDisplacement() && AM.Disp == 0) | ||||||
1985 | if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) { | ||||||
1986 | // Use the symbol and don't prefix it. | ||||||
1987 | AM.MCSym = ESNode->getMCSymbol(); | ||||||
1988 | return false; | ||||||
1989 | } | ||||||
1990 | break; | ||||||
1991 | } | ||||||
1992 | case ISD::Constant: { | ||||||
1993 | uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); | ||||||
1994 | if (!foldOffsetIntoAddress(Val, AM)) | ||||||
1995 | return false; | ||||||
1996 | break; | ||||||
1997 | } | ||||||
1998 | |||||||
1999 | case X86ISD::Wrapper: | ||||||
2000 | case X86ISD::WrapperRIP: | ||||||
2001 | if (!matchWrapper(N, AM)) | ||||||
2002 | return false; | ||||||
2003 | break; | ||||||
2004 | |||||||
2005 | case ISD::LOAD: | ||||||
2006 | if (!matchLoadInAddress(cast<LoadSDNode>(N), AM)) | ||||||
2007 | return false; | ||||||
2008 | break; | ||||||
2009 | |||||||
2010 | case ISD::FrameIndex: | ||||||
2011 | if (AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
2012 | AM.Base_Reg.getNode() == nullptr && | ||||||
2013 | (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { | ||||||
2014 | AM.BaseType = X86ISelAddressMode::FrameIndexBase; | ||||||
2015 | AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); | ||||||
2016 | return false; | ||||||
2017 | } | ||||||
2018 | break; | ||||||
2019 | |||||||
2020 | case ISD::SHL: | ||||||
2021 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) | ||||||
2022 | break; | ||||||
2023 | |||||||
2024 | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { | ||||||
2025 | unsigned Val = CN->getZExtValue(); | ||||||
2026 | // Note that we handle x<<1 as (,x,2) rather than (x,x) here so | ||||||
2027 | // that the base operand remains free for further matching. If | ||||||
2028 | // the base doesn't end up getting used, a post-processing step | ||||||
2029 | // in MatchAddress turns (,x,2) into (x,x), which is cheaper. | ||||||
2030 | if (Val == 1 || Val == 2 || Val == 3) { | ||||||
2031 | AM.Scale = 1 << Val; | ||||||
2032 | SDValue ShVal = N.getOperand(0); | ||||||
2033 | |||||||
2034 | // Okay, we know that we have a scale by now. However, if the scaled | ||||||
2035 | // value is an add of something and a constant, we can fold the | ||||||
2036 | // constant into the disp field here. | ||||||
2037 | if (CurDAG->isBaseWithConstantOffset(ShVal)) { | ||||||
2038 | AM.IndexReg = ShVal.getOperand(0); | ||||||
2039 | ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1)); | ||||||
2040 | uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; | ||||||
2041 | if (!foldOffsetIntoAddress(Disp, AM)) | ||||||
2042 | return false; | ||||||
2043 | } | ||||||
2044 | |||||||
2045 | AM.IndexReg = ShVal; | ||||||
2046 | return false; | ||||||
2047 | } | ||||||
2048 | } | ||||||
2049 | break; | ||||||
2050 | |||||||
2051 | case ISD::SRL: { | ||||||
2052 | // Scale must not be used already. | ||||||
2053 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; | ||||||
2054 | |||||||
2055 | // We only handle up to 64-bit values here as those are what matter for | ||||||
2056 | // addressing mode optimizations. | ||||||
2057 | assert(N.getSimpleValueType().getSizeInBits() <= 64 &&((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!" ) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2058, __PRETTY_FUNCTION__)) | ||||||
2058 | "Unexpected value size!")((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!" ) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2058, __PRETTY_FUNCTION__)); | ||||||
2059 | |||||||
2060 | SDValue And = N.getOperand(0); | ||||||
2061 | if (And.getOpcode() != ISD::AND) break; | ||||||
2062 | SDValue X = And.getOperand(0); | ||||||
2063 | |||||||
2064 | // The mask used for the transform is expected to be post-shift, but we | ||||||
2065 | // found the shift first so just apply the shift to the mask before passing | ||||||
2066 | // it down. | ||||||
2067 | if (!isa<ConstantSDNode>(N.getOperand(1)) || | ||||||
2068 | !isa<ConstantSDNode>(And.getOperand(1))) | ||||||
2069 | break; | ||||||
2070 | uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1); | ||||||
2071 | |||||||
2072 | // Try to fold the mask and shift into the scale, and return false if we | ||||||
2073 | // succeed. | ||||||
2074 | if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) | ||||||
2075 | return false; | ||||||
2076 | break; | ||||||
2077 | } | ||||||
2078 | |||||||
2079 | case ISD::SMUL_LOHI: | ||||||
2080 | case ISD::UMUL_LOHI: | ||||||
2081 | // A mul_lohi where we need the low part can be folded as a plain multiply. | ||||||
2082 | if (N.getResNo() != 0) break; | ||||||
2083 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
2084 | case ISD::MUL: | ||||||
2085 | case X86ISD::MUL_IMM: | ||||||
2086 | // X*[3,5,9] -> X+X*[2,4,8] | ||||||
2087 | if (AM.BaseType == X86ISelAddressMode::RegBase && | ||||||
2088 | AM.Base_Reg.getNode() == nullptr && | ||||||
2089 | AM.IndexReg.getNode() == nullptr) { | ||||||
2090 | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) | ||||||
2091 | if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || | ||||||
2092 | CN->getZExtValue() == 9) { | ||||||
2093 | AM.Scale = unsigned(CN->getZExtValue())-1; | ||||||
2094 | |||||||
2095 | SDValue MulVal = N.getOperand(0); | ||||||
2096 | SDValue Reg; | ||||||
2097 | |||||||
2098 | // Okay, we know that we have a scale by now. However, if the scaled | ||||||
2099 | // value is an add of something and a constant, we can fold the | ||||||
2100 | // constant into the disp field here. | ||||||
2101 | if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && | ||||||
2102 | isa<ConstantSDNode>(MulVal.getOperand(1))) { | ||||||
2103 | Reg = MulVal.getOperand(0); | ||||||
2104 | ConstantSDNode *AddVal = | ||||||
2105 | cast<ConstantSDNode>(MulVal.getOperand(1)); | ||||||
2106 | uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); | ||||||
2107 | if (foldOffsetIntoAddress(Disp, AM)) | ||||||
2108 | Reg = N.getOperand(0); | ||||||
2109 | } else { | ||||||
2110 | Reg = N.getOperand(0); | ||||||
2111 | } | ||||||
2112 | |||||||
2113 | AM.IndexReg = AM.Base_Reg = Reg; | ||||||
2114 | return false; | ||||||
2115 | } | ||||||
2116 | } | ||||||
2117 | break; | ||||||
2118 | |||||||
2119 | case ISD::SUB: { | ||||||
2120 | // Given A-B, if A can be completely folded into the address and | ||||||
2121 | // the index field with the index field unused, use -B as the index. | ||||||
2122 | // This is a win if a has multiple parts that can be folded into | ||||||
2123 | // the address. Also, this saves a mov if the base register has | ||||||
2124 | // other uses, since it avoids a two-address sub instruction, however | ||||||
2125 | // it costs an additional mov if the index register has other uses. | ||||||
2126 | |||||||
2127 | // Add an artificial use to this node so that we can keep track of | ||||||
2128 | // it if it gets CSE'd with a different node. | ||||||
2129 | HandleSDNode Handle(N); | ||||||
2130 | |||||||
2131 | // Test if the LHS of the sub can be folded. | ||||||
2132 | X86ISelAddressMode Backup = AM; | ||||||
2133 | if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) { | ||||||
2134 | N = Handle.getValue(); | ||||||
2135 | AM = Backup; | ||||||
2136 | break; | ||||||
2137 | } | ||||||
2138 | N = Handle.getValue(); | ||||||
2139 | // Test if the index field is free for use. | ||||||
2140 | if (AM.IndexReg.getNode() || AM.isRIPRelative()) { | ||||||
2141 | AM = Backup; | ||||||
2142 | break; | ||||||
2143 | } | ||||||
2144 | |||||||
2145 | int Cost = 0; | ||||||
2146 | SDValue RHS = N.getOperand(1); | ||||||
2147 | // If the RHS involves a register with multiple uses, this | ||||||
2148 | // transformation incurs an extra mov, due to the neg instruction | ||||||
2149 | // clobbering its operand. | ||||||
2150 | if (!RHS.getNode()->hasOneUse() || | ||||||
2151 | RHS.getNode()->getOpcode() == ISD::CopyFromReg || | ||||||
2152 | RHS.getNode()->getOpcode() == ISD::TRUNCATE || | ||||||
2153 | RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || | ||||||
2154 | (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && | ||||||
2155 | RHS.getOperand(0).getValueType() == MVT::i32)) | ||||||
2156 | ++Cost; | ||||||
2157 | // If the base is a register with multiple uses, this | ||||||
2158 | // transformation may save a mov. | ||||||
2159 | if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() && | ||||||
2160 | !AM.Base_Reg.getNode()->hasOneUse()) || | ||||||
2161 | AM.BaseType == X86ISelAddressMode::FrameIndexBase) | ||||||
2162 | --Cost; | ||||||
2163 | // If the folded LHS was interesting, this transformation saves | ||||||
2164 | // address arithmetic. | ||||||
2165 | if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + | ||||||
2166 | ((AM.Disp != 0) && (Backup.Disp == 0)) + | ||||||
2167 | (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) | ||||||
2168 | --Cost; | ||||||
2169 | // If it doesn't look like it may be an overall win, don't do it. | ||||||
2170 | if (Cost >= 0) { | ||||||
2171 | AM = Backup; | ||||||
2172 | break; | ||||||
2173 | } | ||||||
2174 | |||||||
2175 | // Ok, the transformation is legal and appears profitable. Go for it. | ||||||
2176 | // Negation will be emitted later to avoid creating dangling nodes if this | ||||||
2177 | // was an unprofitable LEA. | ||||||
2178 | AM.IndexReg = RHS; | ||||||
2179 | AM.NegateIndex = true; | ||||||
2180 | AM.Scale = 1; | ||||||
2181 | return false; | ||||||
2182 | } | ||||||
2183 | |||||||
2184 | case ISD::ADD: | ||||||
2185 | if (!matchAdd(N, AM, Depth)) | ||||||
2186 | return false; | ||||||
2187 | break; | ||||||
2188 | |||||||
2189 | case ISD::OR: | ||||||
2190 | // We want to look through a transform in InstCombine and DAGCombiner that | ||||||
2191 | // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'. | ||||||
2192 | // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3)) | ||||||
2193 | // An 'lea' can then be used to match the shift (multiply) and add: | ||||||
2194 | // and $1, %esi | ||||||
2195 | // lea (%rsi, %rdi, 8), %rax | ||||||
2196 | if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) && | ||||||
2197 | !matchAdd(N, AM, Depth)) | ||||||
2198 | return false; | ||||||
2199 | break; | ||||||
2200 | |||||||
2201 | case ISD::AND: { | ||||||
2202 | // Perform some heroic transforms on an and of a constant-count shift | ||||||
2203 | // with a constant to enable use of the scaled offset field. | ||||||
2204 | |||||||
2205 | // Scale must not be used already. | ||||||
2206 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; | ||||||
2207 | |||||||
2208 | // We only handle up to 64-bit values here as those are what matter for | ||||||
2209 | // addressing mode optimizations. | ||||||
2210 | assert(N.getSimpleValueType().getSizeInBits() <= 64 &&((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!" ) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2211, __PRETTY_FUNCTION__)) | ||||||
2211 | "Unexpected value size!")((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!" ) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2211, __PRETTY_FUNCTION__)); | ||||||
2212 | |||||||
2213 | if (!isa<ConstantSDNode>(N.getOperand(1))) | ||||||
2214 | break; | ||||||
2215 | |||||||
2216 | if (N.getOperand(0).getOpcode() == ISD::SRL) { | ||||||
2217 | SDValue Shift = N.getOperand(0); | ||||||
2218 | SDValue X = Shift.getOperand(0); | ||||||
2219 | |||||||
2220 | uint64_t Mask = N.getConstantOperandVal(1); | ||||||
2221 | |||||||
2222 | // Try to fold the mask and shift into an extract and scale. | ||||||
2223 | if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) | ||||||
2224 | return false; | ||||||
2225 | |||||||
2226 | // Try to fold the mask and shift directly into the scale. | ||||||
2227 | if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) | ||||||
2228 | return false; | ||||||
2229 | |||||||
2230 | // Try to fold the mask and shift into BEXTR and scale. | ||||||
2231 | if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget)) | ||||||
2232 | return false; | ||||||
2233 | } | ||||||
2234 | |||||||
2235 | // Try to swap the mask and shift to place shifts which can be done as | ||||||
2236 | // a scale on the outside of the mask. | ||||||
2237 | if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM)) | ||||||
2238 | return false; | ||||||
2239 | |||||||
2240 | break; | ||||||
2241 | } | ||||||
2242 | case ISD::ZERO_EXTEND: { | ||||||
2243 | // Try to widen a zexted shift left to the same size as its use, so we can | ||||||
2244 | // match the shift as a scale factor. | ||||||
2245 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) | ||||||
2246 | break; | ||||||
2247 | if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse()) | ||||||
2248 | break; | ||||||
2249 | |||||||
2250 | // Give up if the shift is not a valid scale factor [1,2,3]. | ||||||
2251 | SDValue Shl = N.getOperand(0); | ||||||
2252 | auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1)); | ||||||
2253 | if (!ShAmtC || ShAmtC->getZExtValue() > 3) | ||||||
2254 | break; | ||||||
2255 | |||||||
2256 | // The narrow shift must only shift out zero bits (it must be 'nuw'). | ||||||
2257 | // That makes it safe to widen to the destination type. | ||||||
2258 | APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(), | ||||||
2259 | ShAmtC->getZExtValue()); | ||||||
2260 | if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros)) | ||||||
2261 | break; | ||||||
2262 | |||||||
2263 | // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C) | ||||||
2264 | MVT VT = N.getSimpleValueType(); | ||||||
2265 | SDLoc DL(N); | ||||||
2266 | SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0)); | ||||||
2267 | SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1)); | ||||||
2268 | |||||||
2269 | // Convert the shift to scale factor. | ||||||
2270 | AM.Scale = 1 << ShAmtC->getZExtValue(); | ||||||
2271 | AM.IndexReg = Zext; | ||||||
2272 | |||||||
2273 | insertDAGNode(*CurDAG, N, Zext); | ||||||
2274 | insertDAGNode(*CurDAG, N, NewShl); | ||||||
2275 | CurDAG->ReplaceAllUsesWith(N, NewShl); | ||||||
2276 | CurDAG->RemoveDeadNode(N.getNode()); | ||||||
2277 | return false; | ||||||
2278 | } | ||||||
2279 | } | ||||||
2280 | |||||||
2281 | return matchAddressBase(N, AM); | ||||||
2282 | } | ||||||
2283 | |||||||
2284 | /// Helper for MatchAddress. Add the specified node to the | ||||||
2285 | /// specified addressing mode without any further recursion. | ||||||
2286 | bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { | ||||||
2287 | // Is the base register already occupied? | ||||||
2288 | if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { | ||||||
2289 | // If so, check to see if the scale index register is set. | ||||||
2290 | if (!AM.IndexReg.getNode()) { | ||||||
2291 | AM.IndexReg = N; | ||||||
2292 | AM.Scale = 1; | ||||||
2293 | return false; | ||||||
2294 | } | ||||||
2295 | |||||||
2296 | // Otherwise, we cannot select it. | ||||||
2297 | return true; | ||||||
2298 | } | ||||||
2299 | |||||||
2300 | // Default, generate it as a register. | ||||||
2301 | AM.BaseType = X86ISelAddressMode::RegBase; | ||||||
2302 | AM.Base_Reg = N; | ||||||
2303 | return false; | ||||||
2304 | } | ||||||
2305 | |||||||
2306 | /// Helper for selectVectorAddr. Handles things that can be folded into a | ||||||
2307 | /// gather scatter address. The index register and scale should have already | ||||||
2308 | /// been handled. | ||||||
2309 | bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) { | ||||||
2310 | // TODO: Support other operations. | ||||||
2311 | switch (N.getOpcode()) { | ||||||
2312 | case ISD::Constant: { | ||||||
2313 | uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); | ||||||
2314 | if (!foldOffsetIntoAddress(Val, AM)) | ||||||
2315 | return false; | ||||||
2316 | break; | ||||||
2317 | } | ||||||
2318 | case X86ISD::Wrapper: | ||||||
2319 | if (!matchWrapper(N, AM)) | ||||||
2320 | return false; | ||||||
2321 | break; | ||||||
2322 | } | ||||||
2323 | |||||||
2324 | return matchAddressBase(N, AM); | ||||||
2325 | } | ||||||
2326 | |||||||
2327 | bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, | ||||||
2328 | SDValue IndexOp, SDValue ScaleOp, | ||||||
2329 | SDValue &Base, SDValue &Scale, | ||||||
2330 | SDValue &Index, SDValue &Disp, | ||||||
2331 | SDValue &Segment) { | ||||||
2332 | X86ISelAddressMode AM; | ||||||
2333 | AM.IndexReg = IndexOp; | ||||||
2334 | AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue(); | ||||||
2335 | |||||||
2336 | unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace(); | ||||||
2337 | if (AddrSpace == X86AS::GS) | ||||||
2338 | AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); | ||||||
2339 | if (AddrSpace == X86AS::FS) | ||||||
2340 | AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); | ||||||
2341 | if (AddrSpace == X86AS::SS) | ||||||
2342 | AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); | ||||||
2343 | |||||||
2344 | SDLoc DL(BasePtr); | ||||||
2345 | MVT VT = BasePtr.getSimpleValueType(); | ||||||
2346 | |||||||
2347 | // Try to match into the base and displacement fields. | ||||||
2348 | if (matchVectorAddress(BasePtr, AM)) | ||||||
2349 | return false; | ||||||
2350 | |||||||
2351 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); | ||||||
2352 | return true; | ||||||
2353 | } | ||||||
2354 | |||||||
2355 | /// Returns true if it is able to pattern match an addressing mode. | ||||||
2356 | /// It returns the operands which make up the maximal addressing mode it can | ||||||
2357 | /// match by reference. | ||||||
2358 | /// | ||||||
2359 | /// Parent is the parent node of the addr operand that is being matched. It | ||||||
2360 | /// is always a load, store, atomic node, or null. It is only null when | ||||||
2361 | /// checking memory operands for inline asm nodes. | ||||||
2362 | bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, | ||||||
2363 | SDValue &Scale, SDValue &Index, | ||||||
2364 | SDValue &Disp, SDValue &Segment) { | ||||||
2365 | X86ISelAddressMode AM; | ||||||
2366 | |||||||
2367 | if (Parent && | ||||||
2368 | // This list of opcodes are all the nodes that have an "addr:$ptr" operand | ||||||
2369 | // that are not a MemSDNode, and thus don't have proper addrspace info. | ||||||
2370 | Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme | ||||||
2371 | Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores | ||||||
2372 | Parent->getOpcode() != X86ISD::TLSCALL && // Fixme | ||||||
2373 | Parent->getOpcode() != X86ISD::ENQCMD && // Fixme | ||||||
2374 | Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme | ||||||
2375 | Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp | ||||||
2376 | Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp | ||||||
2377 | unsigned AddrSpace = | ||||||
2378 | cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); | ||||||
2379 | // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS. | ||||||
2380 | if (AddrSpace == 256) | ||||||
2381 | AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); | ||||||
2382 | if (AddrSpace == 257) | ||||||
2383 | AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); | ||||||
2384 | if (AddrSpace == 258) | ||||||
2385 | AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); | ||||||
2386 | } | ||||||
2387 | |||||||
2388 | // Save the DL and VT before calling matchAddress, it can invalidate N. | ||||||
2389 | SDLoc DL(N); | ||||||
2390 | MVT VT = N.getSimpleValueType(); | ||||||
2391 | |||||||
2392 | if (matchAddress(N, AM)) | ||||||
2393 | return false; | ||||||
2394 | |||||||
2395 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); | ||||||
2396 | return true; | ||||||
2397 | } | ||||||
2398 | |||||||
2399 | // We can only fold a load if all nodes between it and the root node have a | ||||||
2400 | // single use. If there are additional uses, we could end up duplicating the | ||||||
2401 | // load. | ||||||
2402 | static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) { | ||||||
2403 | while (User != Root) { | ||||||
2404 | if (!User->hasOneUse()) | ||||||
2405 | return false; | ||||||
2406 | User = *User->use_begin(); | ||||||
2407 | } | ||||||
2408 | |||||||
2409 | return true; | ||||||
2410 | } | ||||||
2411 | |||||||
2412 | /// Match a scalar SSE load. In particular, we want to match a load whose top | ||||||
2413 | /// elements are either undef or zeros. The load flavor is derived from the | ||||||
2414 | /// type of N, which is either v4f32 or v2f64. | ||||||
2415 | /// | ||||||
2416 | /// We also return: | ||||||
2417 | /// PatternChainNode: this is the matched node that has a chain input and | ||||||
2418 | /// output. | ||||||
2419 | bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent, | ||||||
2420 | SDValue N, SDValue &Base, | ||||||
2421 | SDValue &Scale, SDValue &Index, | ||||||
2422 | SDValue &Disp, SDValue &Segment, | ||||||
2423 | SDValue &PatternNodeWithChain) { | ||||||
2424 | if (!hasSingleUsesFromRoot(Root, Parent)) | ||||||
2425 | return false; | ||||||
2426 | |||||||
2427 | // We can allow a full vector load here since narrowing a load is ok unless | ||||||
2428 | // it's volatile or atomic. | ||||||
2429 | if (ISD::isNON_EXTLoad(N.getNode())) { | ||||||
2430 | LoadSDNode *LD = cast<LoadSDNode>(N); | ||||||
2431 | if (LD->isSimple() && | ||||||
2432 | IsProfitableToFold(N, LD, Root) && | ||||||
2433 | IsLegalToFold(N, Parent, Root, OptLevel)) { | ||||||
2434 | PatternNodeWithChain = N; | ||||||
2435 | return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, | ||||||
2436 | Segment); | ||||||
2437 | } | ||||||
2438 | } | ||||||
2439 | |||||||
2440 | // We can also match the special zero extended load opcode. | ||||||
2441 | if (N.getOpcode() == X86ISD::VZEXT_LOAD) { | ||||||
2442 | PatternNodeWithChain = N; | ||||||
2443 | if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) && | ||||||
2444 | IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) { | ||||||
2445 | auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain); | ||||||
2446 | return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp, | ||||||
2447 | Segment); | ||||||
2448 | } | ||||||
2449 | } | ||||||
2450 | |||||||
2451 | // Need to make sure that the SCALAR_TO_VECTOR and load are both only used | ||||||
2452 | // once. Otherwise the load might get duplicated and the chain output of the | ||||||
2453 | // duplicate load will not be observed by all dependencies. | ||||||
2454 | if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) { | ||||||
2455 | PatternNodeWithChain = N.getOperand(0); | ||||||
2456 | if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && | ||||||
2457 | IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) && | ||||||
2458 | IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) { | ||||||
2459 | LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); | ||||||
2460 | return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, | ||||||
2461 | Segment); | ||||||
2462 | } | ||||||
2463 | } | ||||||
2464 | |||||||
2465 | return false; | ||||||
2466 | } | ||||||
2467 | |||||||
2468 | |||||||
2469 | bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { | ||||||
2470 | if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { | ||||||
2471 | uint64_t ImmVal = CN->getZExtValue(); | ||||||
2472 | if (!isUInt<32>(ImmVal)) | ||||||
2473 | return false; | ||||||
2474 | |||||||
2475 | Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64); | ||||||
2476 | return true; | ||||||
2477 | } | ||||||
2478 | |||||||
2479 | // In static codegen with small code model, we can get the address of a label | ||||||
2480 | // into a register with 'movl' | ||||||
2481 | if (N->getOpcode() != X86ISD::Wrapper) | ||||||
2482 | return false; | ||||||
2483 | |||||||
2484 | N = N.getOperand(0); | ||||||
2485 | |||||||
2486 | // At least GNU as does not accept 'movl' for TPOFF relocations. | ||||||
2487 | // FIXME: We could use 'movl' when we know we are targeting MC. | ||||||
2488 | if (N->getOpcode() == ISD::TargetGlobalTLSAddress) | ||||||
2489 | return false; | ||||||
2490 | |||||||
2491 | Imm = N; | ||||||
2492 | if (N->getOpcode() != ISD::TargetGlobalAddress) | ||||||
2493 | return TM.getCodeModel() == CodeModel::Small; | ||||||
2494 | |||||||
2495 | Optional<ConstantRange> CR = | ||||||
2496 | cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange(); | ||||||
2497 | if (!CR) | ||||||
2498 | return TM.getCodeModel() == CodeModel::Small; | ||||||
2499 | |||||||
2500 | return CR->getUnsignedMax().ult(1ull << 32); | ||||||
2501 | } | ||||||
2502 | |||||||
2503 | bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base, | ||||||
2504 | SDValue &Scale, SDValue &Index, | ||||||
2505 | SDValue &Disp, SDValue &Segment) { | ||||||
2506 | // Save the debug loc before calling selectLEAAddr, in case it invalidates N. | ||||||
2507 | SDLoc DL(N); | ||||||
2508 | |||||||
2509 | if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment)) | ||||||
2510 | return false; | ||||||
2511 | |||||||
2512 | RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base); | ||||||
2513 | if (RN && RN->getReg() == 0) | ||||||
2514 | Base = CurDAG->getRegister(0, MVT::i64); | ||||||
2515 | else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) { | ||||||
2516 | // Base could already be %rip, particularly in the x32 ABI. | ||||||
2517 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL, | ||||||
2518 | MVT::i64), 0); | ||||||
2519 | Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef, | ||||||
2520 | Base); | ||||||
2521 | } | ||||||
2522 | |||||||
2523 | RN = dyn_cast<RegisterSDNode>(Index); | ||||||
2524 | if (RN && RN->getReg() == 0) | ||||||
2525 | Index = CurDAG->getRegister(0, MVT::i64); | ||||||
2526 | else { | ||||||
2527 | assert(Index.getValueType() == MVT::i32 &&((Index.getValueType() == MVT::i32 && "Expect to be extending 32-bit registers for use in LEA" ) ? static_cast<void> (0) : __assert_fail ("Index.getValueType() == MVT::i32 && \"Expect to be extending 32-bit registers for use in LEA\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2528, __PRETTY_FUNCTION__)) | ||||||
2528 | "Expect to be extending 32-bit registers for use in LEA")((Index.getValueType() == MVT::i32 && "Expect to be extending 32-bit registers for use in LEA" ) ? static_cast<void> (0) : __assert_fail ("Index.getValueType() == MVT::i32 && \"Expect to be extending 32-bit registers for use in LEA\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2528, __PRETTY_FUNCTION__)); | ||||||
2529 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL, | ||||||
2530 | MVT::i64), 0); | ||||||
2531 | Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef, | ||||||
2532 | Index); | ||||||
2533 | } | ||||||
2534 | |||||||
2535 | return true; | ||||||
2536 | } | ||||||
2537 | |||||||
2538 | /// Calls SelectAddr and determines if the maximal addressing | ||||||
2539 | /// mode it matches can be cost effectively emitted as an LEA instruction. | ||||||
2540 | bool X86DAGToDAGISel::selectLEAAddr(SDValue N, | ||||||
2541 | SDValue &Base, SDValue &Scale, | ||||||
2542 | SDValue &Index, SDValue &Disp, | ||||||
2543 | SDValue &Segment) { | ||||||
2544 | X86ISelAddressMode AM; | ||||||
2545 | |||||||
2546 | // Save the DL and VT before calling matchAddress, it can invalidate N. | ||||||
2547 | SDLoc DL(N); | ||||||
2548 | MVT VT = N.getSimpleValueType(); | ||||||
2549 | |||||||
2550 | // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support | ||||||
2551 | // segments. | ||||||
2552 | SDValue Copy = AM.Segment; | ||||||
2553 | SDValue T = CurDAG->getRegister(0, MVT::i32); | ||||||
2554 | AM.Segment = T; | ||||||
2555 | if (matchAddress(N, AM)) | ||||||
2556 | return false; | ||||||
2557 | assert (T == AM.Segment)((T == AM.Segment) ? static_cast<void> (0) : __assert_fail ("T == AM.Segment", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2557, __PRETTY_FUNCTION__)); | ||||||
2558 | AM.Segment = Copy; | ||||||
2559 | |||||||
2560 | unsigned Complexity = 0; | ||||||
2561 | if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode()) | ||||||
2562 | Complexity = 1; | ||||||
2563 | else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) | ||||||
2564 | Complexity = 4; | ||||||
2565 | |||||||
2566 | if (AM.IndexReg.getNode()) | ||||||
2567 | Complexity++; | ||||||
2568 | |||||||
2569 | // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with | ||||||
2570 | // a simple shift. | ||||||
2571 | if (AM.Scale > 1) | ||||||
2572 | Complexity++; | ||||||
2573 | |||||||
2574 | // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA | ||||||
2575 | // to a LEA. This is determined with some experimentation but is by no means | ||||||
2576 | // optimal (especially for code size consideration). LEA is nice because of | ||||||
2577 | // its three-address nature. Tweak the cost function again when we can run | ||||||
2578 | // convertToThreeAddress() at register allocation time. | ||||||
2579 | if (AM.hasSymbolicDisplacement()) { | ||||||
2580 | // For X86-64, always use LEA to materialize RIP-relative addresses. | ||||||
2581 | if (Subtarget->is64Bit()) | ||||||
2582 | Complexity = 4; | ||||||
2583 | else | ||||||
2584 | Complexity += 2; | ||||||
2585 | } | ||||||
2586 | |||||||
2587 | // Heuristic: try harder to form an LEA from ADD if the operands set flags. | ||||||
2588 | // Unlike ADD, LEA does not affect flags, so we will be less likely to require | ||||||
2589 | // duplicating flag-producing instructions later in the pipeline. | ||||||
2590 | if (N.getOpcode() == ISD::ADD) { | ||||||
2591 | auto isMathWithFlags = [](SDValue V) { | ||||||
2592 | switch (V.getOpcode()) { | ||||||
2593 | case X86ISD::ADD: | ||||||
2594 | case X86ISD::SUB: | ||||||
2595 | case X86ISD::ADC: | ||||||
2596 | case X86ISD::SBB: | ||||||
2597 | /* TODO: These opcodes can be added safely, but we may want to justify | ||||||
2598 | their inclusion for different reasons (better for reg-alloc). | ||||||
2599 | case X86ISD::SMUL: | ||||||
2600 | case X86ISD::UMUL: | ||||||
2601 | case X86ISD::OR: | ||||||
2602 | case X86ISD::XOR: | ||||||
2603 | case X86ISD::AND: | ||||||
2604 | */ | ||||||
2605 | // Value 1 is the flag output of the node - verify it's not dead. | ||||||
2606 | return !SDValue(V.getNode(), 1).use_empty(); | ||||||
2607 | default: | ||||||
2608 | return false; | ||||||
2609 | } | ||||||
2610 | }; | ||||||
2611 | // TODO: This could be an 'or' rather than 'and' to make the transform more | ||||||
2612 | // likely to happen. We might want to factor in whether there's a | ||||||
2613 | // load folding opportunity for the math op that disappears with LEA. | ||||||
2614 | if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) | ||||||
2615 | Complexity++; | ||||||
2616 | } | ||||||
2617 | |||||||
2618 | if (AM.Disp) | ||||||
2619 | Complexity++; | ||||||
2620 | |||||||
2621 | // If it isn't worth using an LEA, reject it. | ||||||
2622 | if (Complexity <= 2) | ||||||
2623 | return false; | ||||||
2624 | |||||||
2625 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); | ||||||
2626 | return true; | ||||||
2627 | } | ||||||
2628 | |||||||
2629 | /// This is only run on TargetGlobalTLSAddress nodes. | ||||||
2630 | bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base, | ||||||
2631 | SDValue &Scale, SDValue &Index, | ||||||
2632 | SDValue &Disp, SDValue &Segment) { | ||||||
2633 | assert(N.getOpcode() == ISD::TargetGlobalTLSAddress)((N.getOpcode() == ISD::TargetGlobalTLSAddress) ? static_cast <void> (0) : __assert_fail ("N.getOpcode() == ISD::TargetGlobalTLSAddress" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2633, __PRETTY_FUNCTION__)); | ||||||
2634 | const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); | ||||||
2635 | |||||||
2636 | X86ISelAddressMode AM; | ||||||
2637 | AM.GV = GA->getGlobal(); | ||||||
2638 | AM.Disp += GA->getOffset(); | ||||||
2639 | AM.SymbolFlags = GA->getTargetFlags(); | ||||||
2640 | |||||||
2641 | MVT VT = N.getSimpleValueType(); | ||||||
2642 | if (VT == MVT::i32) { | ||||||
2643 | AM.Scale = 1; | ||||||
2644 | AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); | ||||||
2645 | } | ||||||
2646 | |||||||
2647 | getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment); | ||||||
2648 | return true; | ||||||
2649 | } | ||||||
2650 | |||||||
2651 | bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) { | ||||||
2652 | if (auto *CN = dyn_cast<ConstantSDNode>(N)) { | ||||||
2653 | Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN), | ||||||
2654 | N.getValueType()); | ||||||
2655 | return true; | ||||||
2656 | } | ||||||
2657 | |||||||
2658 | // Keep track of the original value type and whether this value was | ||||||
2659 | // truncated. If we see a truncation from pointer type to VT that truncates | ||||||
2660 | // bits that are known to be zero, we can use a narrow reference. | ||||||
2661 | EVT VT = N.getValueType(); | ||||||
2662 | bool WasTruncated = false; | ||||||
2663 | if (N.getOpcode() == ISD::TRUNCATE) { | ||||||
2664 | WasTruncated = true; | ||||||
2665 | N = N.getOperand(0); | ||||||
2666 | } | ||||||
2667 | |||||||
2668 | if (N.getOpcode() != X86ISD::Wrapper) | ||||||
2669 | return false; | ||||||
2670 | |||||||
2671 | // We can only use non-GlobalValues as immediates if they were not truncated, | ||||||
2672 | // as we do not have any range information. If we have a GlobalValue and the | ||||||
2673 | // address was not truncated, we can select it as an operand directly. | ||||||
2674 | unsigned Opc = N.getOperand(0)->getOpcode(); | ||||||
2675 | if (Opc != ISD::TargetGlobalAddress || !WasTruncated) { | ||||||
2676 | Op = N.getOperand(0); | ||||||
2677 | // We can only select the operand directly if we didn't have to look past a | ||||||
2678 | // truncate. | ||||||
2679 | return !WasTruncated; | ||||||
2680 | } | ||||||
2681 | |||||||
2682 | // Check that the global's range fits into VT. | ||||||
2683 | auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0)); | ||||||
2684 | Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange(); | ||||||
2685 | if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits())) | ||||||
2686 | return false; | ||||||
2687 | |||||||
2688 | // Okay, we can use a narrow reference. | ||||||
2689 | Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT, | ||||||
2690 | GA->getOffset(), GA->getTargetFlags()); | ||||||
2691 | return true; | ||||||
2692 | } | ||||||
2693 | |||||||
2694 | bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, | ||||||
2695 | SDValue &Base, SDValue &Scale, | ||||||
2696 | SDValue &Index, SDValue &Disp, | ||||||
2697 | SDValue &Segment) { | ||||||
2698 | assert(Root && P && "Unknown root/parent nodes")((Root && P && "Unknown root/parent nodes") ? static_cast<void> (0) : __assert_fail ("Root && P && \"Unknown root/parent nodes\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2698, __PRETTY_FUNCTION__)); | ||||||
2699 | if (!ISD::isNON_EXTLoad(N.getNode()) || | ||||||
2700 | !IsProfitableToFold(N, P, Root) || | ||||||
2701 | !IsLegalToFold(N, P, Root, OptLevel)) | ||||||
2702 | return false; | ||||||
2703 | |||||||
2704 | return selectAddr(N.getNode(), | ||||||
2705 | N.getOperand(1), Base, Scale, Index, Disp, Segment); | ||||||
2706 | } | ||||||
2707 | |||||||
2708 | bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, | ||||||
2709 | SDValue &Base, SDValue &Scale, | ||||||
2710 | SDValue &Index, SDValue &Disp, | ||||||
2711 | SDValue &Segment) { | ||||||
2712 | assert(Root && P && "Unknown root/parent nodes")((Root && P && "Unknown root/parent nodes") ? static_cast<void> (0) : __assert_fail ("Root && P && \"Unknown root/parent nodes\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2712, __PRETTY_FUNCTION__)); | ||||||
2713 | if (N->getOpcode() != X86ISD::VBROADCAST_LOAD || | ||||||
2714 | !IsProfitableToFold(N, P, Root) || | ||||||
2715 | !IsLegalToFold(N, P, Root, OptLevel)) | ||||||
2716 | return false; | ||||||
2717 | |||||||
2718 | return selectAddr(N.getNode(), | ||||||
2719 | N.getOperand(1), Base, Scale, Index, Disp, Segment); | ||||||
2720 | } | ||||||
2721 | |||||||
2722 | /// Return an SDNode that returns the value of the global base register. | ||||||
2723 | /// Output instructions required to initialize the global base register, | ||||||
2724 | /// if necessary. | ||||||
2725 | SDNode *X86DAGToDAGISel::getGlobalBaseReg() { | ||||||
2726 | unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); | ||||||
2727 | auto &DL = MF->getDataLayout(); | ||||||
2728 | return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode(); | ||||||
2729 | } | ||||||
2730 | |||||||
2731 | bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { | ||||||
2732 | if (N->getOpcode() == ISD::TRUNCATE) | ||||||
2733 | N = N->getOperand(0).getNode(); | ||||||
2734 | if (N->getOpcode() != X86ISD::Wrapper) | ||||||
2735 | return false; | ||||||
2736 | |||||||
2737 | auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0)); | ||||||
2738 | if (!GA) | ||||||
2739 | return false; | ||||||
2740 | |||||||
2741 | Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange(); | ||||||
2742 | return CR && CR->getSignedMin().sge(-1ull << Width) && | ||||||
2743 | CR->getSignedMax().slt(1ull << Width); | ||||||
2744 | } | ||||||
2745 | |||||||
2746 | static X86::CondCode getCondFromNode(SDNode *N) { | ||||||
2747 | assert(N->isMachineOpcode() && "Unexpected node")((N->isMachineOpcode() && "Unexpected node") ? static_cast <void> (0) : __assert_fail ("N->isMachineOpcode() && \"Unexpected node\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 2747, __PRETTY_FUNCTION__)); | ||||||
2748 | X86::CondCode CC = X86::COND_INVALID; | ||||||
2749 | unsigned Opc = N->getMachineOpcode(); | ||||||
2750 | if (Opc == X86::JCC_1) | ||||||
2751 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1)); | ||||||
2752 | else if (Opc == X86::SETCCr) | ||||||
2753 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0)); | ||||||
2754 | else if (Opc == X86::SETCCm) | ||||||
2755 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5)); | ||||||
2756 | else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr || | ||||||
2757 | Opc == X86::CMOV64rr) | ||||||
2758 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2)); | ||||||
2759 | else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm || | ||||||
2760 | Opc == X86::CMOV64rm) | ||||||
2761 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6)); | ||||||
2762 | |||||||
2763 | return CC; | ||||||
2764 | } | ||||||
2765 | |||||||
2766 | /// Test whether the given X86ISD::CMP node has any users that use a flag | ||||||
2767 | /// other than ZF. | ||||||
2768 | bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const { | ||||||
2769 | // Examine each user of the node. | ||||||
2770 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); | ||||||
2771 | UI != UE; ++UI) { | ||||||
2772 | // Only check things that use the flags. | ||||||
2773 | if (UI.getUse().getResNo() != Flags.getResNo()) | ||||||
2774 | continue; | ||||||
2775 | // Only examine CopyToReg uses that copy to EFLAGS. | ||||||
2776 | if (UI->getOpcode() != ISD::CopyToReg || | ||||||
2777 | cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS) | ||||||
2778 | return false; | ||||||
2779 | // Examine each user of the CopyToReg use. | ||||||
2780 | for (SDNode::use_iterator FlagUI = UI->use_begin(), | ||||||
2781 | FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { | ||||||
2782 | // Only examine the Flag result. | ||||||
2783 | if (FlagUI.getUse().getResNo() != 1) continue; | ||||||
2784 | // Anything unusual: assume conservatively. | ||||||
2785 | if (!FlagUI->isMachineOpcode()) return false; | ||||||
2786 | // Examine the condition code of the user. | ||||||
2787 | X86::CondCode CC = getCondFromNode(*FlagUI); | ||||||
2788 | |||||||
2789 | switch (CC) { | ||||||
2790 | // Comparisons which only use the zero flag. | ||||||
2791 | case X86::COND_E: case X86::COND_NE: | ||||||
2792 | continue; | ||||||
2793 | // Anything else: assume conservatively. | ||||||
2794 | default: | ||||||
2795 | return false; | ||||||
2796 | } | ||||||
2797 | } | ||||||
2798 | } | ||||||
2799 | return true; | ||||||
2800 | } | ||||||
2801 | |||||||
2802 | /// Test whether the given X86ISD::CMP node has any uses which require the SF | ||||||
2803 | /// flag to be accurate. | ||||||
2804 | bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { | ||||||
2805 | // Examine each user of the node. | ||||||
2806 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); | ||||||
2807 | UI != UE; ++UI) { | ||||||
2808 | // Only check things that use the flags. | ||||||
2809 | if (UI.getUse().getResNo() != Flags.getResNo()) | ||||||
2810 | continue; | ||||||
2811 | // Only examine CopyToReg uses that copy to EFLAGS. | ||||||
2812 | if (UI->getOpcode() != ISD::CopyToReg || | ||||||
2813 | cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS) | ||||||
2814 | return false; | ||||||
2815 | // Examine each user of the CopyToReg use. | ||||||
2816 | for (SDNode::use_iterator FlagUI = UI->use_begin(), | ||||||
2817 | FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { | ||||||
2818 | // Only examine the Flag result. | ||||||
2819 | if (FlagUI.getUse().getResNo() != 1) continue; | ||||||
2820 | // Anything unusual: assume conservatively. | ||||||
2821 | if (!FlagUI->isMachineOpcode()) return false; | ||||||
2822 | // Examine the condition code of the user. | ||||||
2823 | X86::CondCode CC = getCondFromNode(*FlagUI); | ||||||
2824 | |||||||
2825 | switch (CC) { | ||||||
2826 | // Comparisons which don't examine the SF flag. | ||||||
2827 | case X86::COND_A: case X86::COND_AE: | ||||||
2828 | case X86::COND_B: case X86::COND_BE: | ||||||
2829 | case X86::COND_E: case X86::COND_NE: | ||||||
2830 | case X86::COND_O: case X86::COND_NO: | ||||||
2831 | case X86::COND_P: case X86::COND_NP: | ||||||
2832 | continue; | ||||||
2833 | // Anything else: assume conservatively. | ||||||
2834 | default: | ||||||
2835 | return false; | ||||||
2836 | } | ||||||
2837 | } | ||||||
2838 | } | ||||||
2839 | return true; | ||||||
2840 | } | ||||||
2841 | |||||||
2842 | static bool mayUseCarryFlag(X86::CondCode CC) { | ||||||
2843 | switch (CC) { | ||||||
2844 | // Comparisons which don't examine the CF flag. | ||||||
2845 | case X86::COND_O: case X86::COND_NO: | ||||||
2846 | case X86::COND_E: case X86::COND_NE: | ||||||
2847 | case X86::COND_S: case X86::COND_NS: | ||||||
2848 | case X86::COND_P: case X86::COND_NP: | ||||||
2849 | case X86::COND_L: case X86::COND_GE: | ||||||
2850 | case X86::COND_G: case X86::COND_LE: | ||||||
2851 | return false; | ||||||
2852 | // Anything else: assume conservatively. | ||||||
2853 | default: | ||||||
2854 | return true; | ||||||
2855 | } | ||||||
2856 | } | ||||||
2857 | |||||||
2858 | /// Test whether the given node which sets flags has any uses which require the | ||||||
2859 | /// CF flag to be accurate. | ||||||
2860 | bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { | ||||||
2861 | // Examine each user of the node. | ||||||
2862 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); | ||||||
2863 | UI != UE; ++UI) { | ||||||
2864 | // Only check things that use the flags. | ||||||
2865 | if (UI.getUse().getResNo() != Flags.getResNo()) | ||||||
2866 | continue; | ||||||
2867 | |||||||
2868 | unsigned UIOpc = UI->getOpcode(); | ||||||
2869 | |||||||
2870 | if (UIOpc == ISD::CopyToReg) { | ||||||
2871 | // Only examine CopyToReg uses that copy to EFLAGS. | ||||||
2872 | if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS) | ||||||
2873 | return false; | ||||||
2874 | // Examine each user of the CopyToReg use. | ||||||
2875 | for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); | ||||||
2876 | FlagUI != FlagUE; ++FlagUI) { | ||||||
2877 | // Only examine the Flag result. | ||||||
2878 | if (FlagUI.getUse().getResNo() != 1) | ||||||
2879 | continue; | ||||||
2880 | // Anything unusual: assume conservatively. | ||||||
2881 | if (!FlagUI->isMachineOpcode()) | ||||||
2882 | return false; | ||||||
2883 | // Examine the condition code of the user. | ||||||
2884 | X86::CondCode CC = getCondFromNode(*FlagUI); | ||||||
2885 | |||||||
2886 | if (mayUseCarryFlag(CC)) | ||||||
2887 | return false; | ||||||
2888 | } | ||||||
2889 | |||||||
2890 | // This CopyToReg is ok. Move on to the next user. | ||||||
2891 | continue; | ||||||
2892 | } | ||||||
2893 | |||||||
2894 | // This might be an unselected node. So look for the pre-isel opcodes that | ||||||
2895 | // use flags. | ||||||
2896 | unsigned CCOpNo; | ||||||
2897 | switch (UIOpc) { | ||||||
2898 | default: | ||||||
2899 | // Something unusual. Be conservative. | ||||||
2900 | return false; | ||||||
2901 | case X86ISD::SETCC: CCOpNo = 0; break; | ||||||
2902 | case X86ISD::SETCC_CARRY: CCOpNo = 0; break; | ||||||
2903 | case X86ISD::CMOV: CCOpNo = 2; break; | ||||||
2904 | case X86ISD::BRCOND: CCOpNo = 2; break; | ||||||
2905 | } | ||||||
2906 | |||||||
2907 | X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo); | ||||||
2908 | if (mayUseCarryFlag(CC)) | ||||||
2909 | return false; | ||||||
2910 | } | ||||||
2911 | return true; | ||||||
2912 | } | ||||||
2913 | |||||||
2914 | /// Check whether or not the chain ending in StoreNode is suitable for doing | ||||||
2915 | /// the {load; op; store} to modify transformation. | ||||||
2916 | static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, | ||||||
2917 | SDValue StoredVal, SelectionDAG *CurDAG, | ||||||
2918 | unsigned LoadOpNo, | ||||||
2919 | LoadSDNode *&LoadNode, | ||||||
2920 | SDValue &InputChain) { | ||||||
2921 | // Is the stored value result 0 of the operation? | ||||||
2922 | if (StoredVal.getResNo() != 0) return false; | ||||||
2923 | |||||||
2924 | // Are there other uses of the operation other than the store? | ||||||
2925 | if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; | ||||||
2926 | |||||||
2927 | // Is the store non-extending and non-indexed? | ||||||
2928 | if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) | ||||||
2929 | return false; | ||||||
2930 | |||||||
2931 | SDValue Load = StoredVal->getOperand(LoadOpNo); | ||||||
2932 | // Is the stored value a non-extending and non-indexed load? | ||||||
2933 | if (!ISD::isNormalLoad(Load.getNode())) return false; | ||||||
2934 | |||||||
2935 | // Return LoadNode by reference. | ||||||
2936 | LoadNode = cast<LoadSDNode>(Load); | ||||||
2937 | |||||||
2938 | // Is store the only read of the loaded value? | ||||||
2939 | if (!Load.hasOneUse()) | ||||||
2940 | return false; | ||||||
2941 | |||||||
2942 | // Is the address of the store the same as the load? | ||||||
2943 | if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || | ||||||
2944 | LoadNode->getOffset() != StoreNode->getOffset()) | ||||||
2945 | return false; | ||||||
2946 | |||||||
2947 | bool FoundLoad = false; | ||||||
2948 | SmallVector<SDValue, 4> ChainOps; | ||||||
2949 | SmallVector<const SDNode *, 4> LoopWorklist; | ||||||
2950 | SmallPtrSet<const SDNode *, 16> Visited; | ||||||
2951 | const unsigned int Max = 1024; | ||||||
2952 | |||||||
2953 | // Visualization of Load-Op-Store fusion: | ||||||
2954 | // ------------------------- | ||||||
2955 | // Legend: | ||||||
2956 | // *-lines = Chain operand dependencies. | ||||||
2957 | // |-lines = Normal operand dependencies. | ||||||
2958 | // Dependencies flow down and right. n-suffix references multiple nodes. | ||||||
2959 | // | ||||||
2960 | // C Xn C | ||||||
2961 | // * * * | ||||||
2962 | // * * * | ||||||
2963 | // Xn A-LD Yn TF Yn | ||||||
2964 | // * * \ | * | | ||||||
2965 | // * * \ | * | | ||||||
2966 | // * * \ | => A--LD_OP_ST | ||||||
2967 | // * * \| \ | ||||||
2968 | // TF OP \ | ||||||
2969 | // * | \ Zn | ||||||
2970 | // * | \ | ||||||
2971 | // A-ST Zn | ||||||
2972 | // | ||||||
2973 | |||||||
2974 | // This merge induced dependences from: #1: Xn -> LD, OP, Zn | ||||||
2975 | // #2: Yn -> LD | ||||||
2976 | // #3: ST -> Zn | ||||||
2977 | |||||||
2978 | // Ensure the transform is safe by checking for the dual | ||||||
2979 | // dependencies to make sure we do not induce a loop. | ||||||
2980 | |||||||
2981 | // As LD is a predecessor to both OP and ST we can do this by checking: | ||||||
2982 | // a). if LD is a predecessor to a member of Xn or Yn. | ||||||
2983 | // b). if a Zn is a predecessor to ST. | ||||||
2984 | |||||||
2985 | // However, (b) can only occur through being a chain predecessor to | ||||||
2986 | // ST, which is the same as Zn being a member or predecessor of Xn, | ||||||
2987 | // which is a subset of LD being a predecessor of Xn. So it's | ||||||
2988 | // subsumed by check (a). | ||||||
2989 | |||||||
2990 | SDValue Chain = StoreNode->getChain(); | ||||||
2991 | |||||||
2992 | // Gather X elements in ChainOps. | ||||||
2993 | if (Chain == Load.getValue(1)) { | ||||||
2994 | FoundLoad = true; | ||||||
2995 | ChainOps.push_back(Load.getOperand(0)); | ||||||
2996 | } else if (Chain.getOpcode() == ISD::TokenFactor) { | ||||||
2997 | for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { | ||||||
2998 | SDValue Op = Chain.getOperand(i); | ||||||
2999 | if (Op == Load.getValue(1)) { | ||||||
3000 | FoundLoad = true; | ||||||
3001 | // Drop Load, but keep its chain. No cycle check necessary. | ||||||
3002 | ChainOps.push_back(Load.getOperand(0)); | ||||||
3003 | continue; | ||||||
3004 | } | ||||||
3005 | LoopWorklist.push_back(Op.getNode()); | ||||||
3006 | ChainOps.push_back(Op); | ||||||
3007 | } | ||||||
3008 | } | ||||||
3009 | |||||||
3010 | if (!FoundLoad) | ||||||
3011 | return false; | ||||||
3012 | |||||||
3013 | // Worklist is currently Xn. Add Yn to worklist. | ||||||
3014 | for (SDValue Op : StoredVal->ops()) | ||||||
3015 | if (Op.getNode() != LoadNode) | ||||||
3016 | LoopWorklist.push_back(Op.getNode()); | ||||||
3017 | |||||||
3018 | // Check (a) if Load is a predecessor to Xn + Yn | ||||||
3019 | if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max, | ||||||
3020 | true)) | ||||||
3021 | return false; | ||||||
3022 | |||||||
3023 | InputChain = | ||||||
3024 | CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps); | ||||||
3025 | return true; | ||||||
3026 | } | ||||||
3027 | |||||||
3028 | // Change a chain of {load; op; store} of the same value into a simple op | ||||||
3029 | // through memory of that value, if the uses of the modified value and its | ||||||
3030 | // address are suitable. | ||||||
3031 | // | ||||||
3032 | // The tablegen pattern memory operand pattern is currently not able to match | ||||||
3033 | // the case where the EFLAGS on the original operation are used. | ||||||
3034 | // | ||||||
3035 | // To move this to tablegen, we'll need to improve tablegen to allow flags to | ||||||
3036 | // be transferred from a node in the pattern to the result node, probably with | ||||||
3037 | // a new keyword. For example, we have this | ||||||
3038 | // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", | ||||||
3039 | // [(store (add (loadi64 addr:$dst), -1), addr:$dst), | ||||||
3040 | // (implicit EFLAGS)]>; | ||||||
3041 | // but maybe need something like this | ||||||
3042 | // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", | ||||||
3043 | // [(store (add (loadi64 addr:$dst), -1), addr:$dst), | ||||||
3044 | // (transferrable EFLAGS)]>; | ||||||
3045 | // | ||||||
3046 | // Until then, we manually fold these and instruction select the operation | ||||||
3047 | // here. | ||||||
3048 | bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { | ||||||
3049 | StoreSDNode *StoreNode = cast<StoreSDNode>(Node); | ||||||
3050 | SDValue StoredVal = StoreNode->getOperand(1); | ||||||
3051 | unsigned Opc = StoredVal->getOpcode(); | ||||||
3052 | |||||||
3053 | // Before we try to select anything, make sure this is memory operand size | ||||||
3054 | // and opcode we can handle. Note that this must match the code below that | ||||||
3055 | // actually lowers the opcodes. | ||||||
3056 | EVT MemVT = StoreNode->getMemoryVT(); | ||||||
3057 | if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 && | ||||||
3058 | MemVT != MVT::i8) | ||||||
3059 | return false; | ||||||
3060 | |||||||
3061 | bool IsCommutable = false; | ||||||
3062 | bool IsNegate = false; | ||||||
3063 | switch (Opc) { | ||||||
3064 | default: | ||||||
3065 | return false; | ||||||
3066 | case X86ISD::SUB: | ||||||
3067 | IsNegate = isNullConstant(StoredVal.getOperand(0)); | ||||||
3068 | break; | ||||||
3069 | case X86ISD::SBB: | ||||||
3070 | break; | ||||||
3071 | case X86ISD::ADD: | ||||||
3072 | case X86ISD::ADC: | ||||||
3073 | case X86ISD::AND: | ||||||
3074 | case X86ISD::OR: | ||||||
3075 | case X86ISD::XOR: | ||||||
3076 | IsCommutable = true; | ||||||
3077 | break; | ||||||
3078 | } | ||||||
3079 | |||||||
3080 | unsigned LoadOpNo = IsNegate ? 1 : 0; | ||||||
3081 | LoadSDNode *LoadNode = nullptr; | ||||||
3082 | SDValue InputChain; | ||||||
3083 | if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, | ||||||
3084 | LoadNode, InputChain)) { | ||||||
3085 | if (!IsCommutable) | ||||||
3086 | return false; | ||||||
3087 | |||||||
3088 | // This operation is commutable, try the other operand. | ||||||
3089 | LoadOpNo = 1; | ||||||
3090 | if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, | ||||||
3091 | LoadNode, InputChain)) | ||||||
3092 | return false; | ||||||
3093 | } | ||||||
3094 | |||||||
3095 | SDValue Base, Scale, Index, Disp, Segment; | ||||||
3096 | if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp, | ||||||
3097 | Segment)) | ||||||
3098 | return false; | ||||||
3099 | |||||||
3100 | auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, | ||||||
3101 | unsigned Opc8) { | ||||||
3102 | switch (MemVT.getSimpleVT().SimpleTy) { | ||||||
3103 | case MVT::i64: | ||||||
3104 | return Opc64; | ||||||
3105 | case MVT::i32: | ||||||
3106 | return Opc32; | ||||||
3107 | case MVT::i16: | ||||||
3108 | return Opc16; | ||||||
3109 | case MVT::i8: | ||||||
3110 | return Opc8; | ||||||
3111 | default: | ||||||
3112 | llvm_unreachable("Invalid size!")::llvm::llvm_unreachable_internal("Invalid size!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3112); | ||||||
3113 | } | ||||||
3114 | }; | ||||||
3115 | |||||||
3116 | MachineSDNode *Result; | ||||||
3117 | switch (Opc) { | ||||||
3118 | case X86ISD::SUB: | ||||||
3119 | // Handle negate. | ||||||
3120 | if (IsNegate) { | ||||||
3121 | unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m, | ||||||
3122 | X86::NEG8m); | ||||||
3123 | const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; | ||||||
3124 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, | ||||||
3125 | MVT::Other, Ops); | ||||||
3126 | break; | ||||||
3127 | } | ||||||
3128 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
3129 | case X86ISD::ADD: | ||||||
3130 | // Try to match inc/dec. | ||||||
3131 | if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) { | ||||||
3132 | bool IsOne = isOneConstant(StoredVal.getOperand(1)); | ||||||
3133 | bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1)); | ||||||
3134 | // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec. | ||||||
3135 | if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) { | ||||||
3136 | unsigned NewOpc = | ||||||
3137 | ((Opc == X86ISD::ADD) == IsOne) | ||||||
3138 | ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) | ||||||
3139 | : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); | ||||||
3140 | const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; | ||||||
3141 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, | ||||||
3142 | MVT::Other, Ops); | ||||||
3143 | break; | ||||||
3144 | } | ||||||
3145 | } | ||||||
3146 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
3147 | case X86ISD::ADC: | ||||||
3148 | case X86ISD::SBB: | ||||||
3149 | case X86ISD::AND: | ||||||
3150 | case X86ISD::OR: | ||||||
3151 | case X86ISD::XOR: { | ||||||
3152 | auto SelectRegOpcode = [SelectOpcode](unsigned Opc) { | ||||||
3153 | switch (Opc) { | ||||||
3154 | case X86ISD::ADD: | ||||||
3155 | return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr, | ||||||
3156 | X86::ADD8mr); | ||||||
3157 | case X86ISD::ADC: | ||||||
3158 | return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr, | ||||||
3159 | X86::ADC8mr); | ||||||
3160 | case X86ISD::SUB: | ||||||
3161 | return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr, | ||||||
3162 | X86::SUB8mr); | ||||||
3163 | case X86ISD::SBB: | ||||||
3164 | return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr, | ||||||
3165 | X86::SBB8mr); | ||||||
3166 | case X86ISD::AND: | ||||||
3167 | return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr, | ||||||
3168 | X86::AND8mr); | ||||||
3169 | case X86ISD::OR: | ||||||
3170 | return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr); | ||||||
3171 | case X86ISD::XOR: | ||||||
3172 | return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr, | ||||||
3173 | X86::XOR8mr); | ||||||
3174 | default: | ||||||
3175 | llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3175); | ||||||
3176 | } | ||||||
3177 | }; | ||||||
3178 | auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) { | ||||||
3179 | switch (Opc) { | ||||||
3180 | case X86ISD::ADD: | ||||||
3181 | return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0); | ||||||
3182 | case X86ISD::ADC: | ||||||
3183 | return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0); | ||||||
3184 | case X86ISD::SUB: | ||||||
3185 | return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0); | ||||||
3186 | case X86ISD::SBB: | ||||||
3187 | return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0); | ||||||
3188 | case X86ISD::AND: | ||||||
3189 | return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0); | ||||||
3190 | case X86ISD::OR: | ||||||
3191 | return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0); | ||||||
3192 | case X86ISD::XOR: | ||||||
3193 | return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0); | ||||||
3194 | default: | ||||||
3195 | llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3195); | ||||||
3196 | } | ||||||
3197 | }; | ||||||
3198 | auto SelectImmOpcode = [SelectOpcode](unsigned Opc) { | ||||||
3199 | switch (Opc) { | ||||||
3200 | case X86ISD::ADD: | ||||||
3201 | return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi, | ||||||
3202 | X86::ADD8mi); | ||||||
3203 | case X86ISD::ADC: | ||||||
3204 | return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi, | ||||||
3205 | X86::ADC8mi); | ||||||
3206 | case X86ISD::SUB: | ||||||
3207 | return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi, | ||||||
3208 | X86::SUB8mi); | ||||||
3209 | case X86ISD::SBB: | ||||||
3210 | return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi, | ||||||
3211 | X86::SBB8mi); | ||||||
3212 | case X86ISD::AND: | ||||||
3213 | return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi, | ||||||
3214 | X86::AND8mi); | ||||||
3215 | case X86ISD::OR: | ||||||
3216 | return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi, | ||||||
3217 | X86::OR8mi); | ||||||
3218 | case X86ISD::XOR: | ||||||
3219 | return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi, | ||||||
3220 | X86::XOR8mi); | ||||||
3221 | default: | ||||||
3222 | llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3222); | ||||||
3223 | } | ||||||
3224 | }; | ||||||
3225 | |||||||
3226 | unsigned NewOpc = SelectRegOpcode(Opc); | ||||||
3227 | SDValue Operand = StoredVal->getOperand(1-LoadOpNo); | ||||||
3228 | |||||||
3229 | // See if the operand is a constant that we can fold into an immediate | ||||||
3230 | // operand. | ||||||
3231 | if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) { | ||||||
3232 | int64_t OperandV = OperandC->getSExtValue(); | ||||||
3233 | |||||||
3234 | // Check if we can shrink the operand enough to fit in an immediate (or | ||||||
3235 | // fit into a smaller immediate) by negating it and switching the | ||||||
3236 | // operation. | ||||||
3237 | if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) && | ||||||
3238 | ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) || | ||||||
3239 | (MemVT == MVT::i64 && !isInt<32>(OperandV) && | ||||||
3240 | isInt<32>(-OperandV))) && | ||||||
3241 | hasNoCarryFlagUses(StoredVal.getValue(1))) { | ||||||
3242 | OperandV = -OperandV; | ||||||
3243 | Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD; | ||||||
3244 | } | ||||||
3245 | |||||||
3246 | // First try to fit this into an Imm8 operand. If it doesn't fit, then try | ||||||
3247 | // the larger immediate operand. | ||||||
3248 | if (MemVT != MVT::i8 && isInt<8>(OperandV)) { | ||||||
3249 | Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); | ||||||
3250 | NewOpc = SelectImm8Opcode(Opc); | ||||||
3251 | } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) { | ||||||
3252 | Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); | ||||||
3253 | NewOpc = SelectImmOpcode(Opc); | ||||||
3254 | } | ||||||
3255 | } | ||||||
3256 | |||||||
3257 | if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) { | ||||||
3258 | SDValue CopyTo = | ||||||
3259 | CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS, | ||||||
3260 | StoredVal.getOperand(2), SDValue()); | ||||||
3261 | |||||||
3262 | const SDValue Ops[] = {Base, Scale, Index, Disp, | ||||||
3263 | Segment, Operand, CopyTo, CopyTo.getValue(1)}; | ||||||
3264 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, | ||||||
3265 | Ops); | ||||||
3266 | } else { | ||||||
3267 | const SDValue Ops[] = {Base, Scale, Index, Disp, | ||||||
3268 | Segment, Operand, InputChain}; | ||||||
3269 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, | ||||||
3270 | Ops); | ||||||
3271 | } | ||||||
3272 | break; | ||||||
3273 | } | ||||||
3274 | default: | ||||||
3275 | llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3275); | ||||||
3276 | } | ||||||
3277 | |||||||
3278 | MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(), | ||||||
3279 | LoadNode->getMemOperand()}; | ||||||
3280 | CurDAG->setNodeMemRefs(Result, MemOps); | ||||||
3281 | |||||||
3282 | // Update Load Chain uses as well. | ||||||
3283 | ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1)); | ||||||
3284 | ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); | ||||||
3285 | ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); | ||||||
3286 | CurDAG->RemoveDeadNode(Node); | ||||||
3287 | return true; | ||||||
3288 | } | ||||||
3289 | |||||||
3290 | // See if this is an X & Mask that we can match to BEXTR/BZHI. | ||||||
3291 | // Where Mask is one of the following patterns: | ||||||
3292 | // a) x & (1 << nbits) - 1 | ||||||
3293 | // b) x & ~(-1 << nbits) | ||||||
3294 | // c) x & (-1 >> (32 - y)) | ||||||
3295 | // d) x << (32 - y) >> (32 - y) | ||||||
3296 | bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { | ||||||
3297 | assert((((Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits." ) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3299, __PRETTY_FUNCTION__)) | ||||||
3298 | (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&(((Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits." ) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3299, __PRETTY_FUNCTION__)) | ||||||
3299 | "Should be either an and-mask, or right-shift after clearing high bits.")(((Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits." ) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3299, __PRETTY_FUNCTION__)); | ||||||
3300 | |||||||
3301 | // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one. | ||||||
3302 | if (!Subtarget->hasBMI() && !Subtarget->hasBMI2()) | ||||||
3303 | return false; | ||||||
3304 | |||||||
3305 | MVT NVT = Node->getSimpleValueType(0); | ||||||
3306 | |||||||
3307 | // Only supported for 32 and 64 bits. | ||||||
3308 | if (NVT != MVT::i32 && NVT != MVT::i64) | ||||||
3309 | return false; | ||||||
3310 | |||||||
3311 | SDValue NBits; | ||||||
3312 | |||||||
3313 | // If we have BMI2's BZHI, we are ok with muti-use patterns. | ||||||
3314 | // Else, if we only have BMI1's BEXTR, we require one-use. | ||||||
3315 | const bool CanHaveExtraUses = Subtarget->hasBMI2(); | ||||||
3316 | auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) { | ||||||
3317 | return CanHaveExtraUses || | ||||||
3318 | Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo()); | ||||||
3319 | }; | ||||||
3320 | auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); }; | ||||||
3321 | auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); }; | ||||||
3322 | |||||||
3323 | auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) { | ||||||
3324 | if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) { | ||||||
3325 | assert(V.getSimpleValueType() == MVT::i32 &&((V.getSimpleValueType() == MVT::i32 && V.getOperand( 0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation" ) ? static_cast<void> (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3327, __PRETTY_FUNCTION__)) | ||||||
3326 | V.getOperand(0).getSimpleValueType() == MVT::i64 &&((V.getSimpleValueType() == MVT::i32 && V.getOperand( 0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation" ) ? static_cast<void> (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3327, __PRETTY_FUNCTION__)) | ||||||
3327 | "Expected i64 -> i32 truncation")((V.getSimpleValueType() == MVT::i32 && V.getOperand( 0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation" ) ? static_cast<void> (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3327, __PRETTY_FUNCTION__)); | ||||||
3328 | V = V.getOperand(0); | ||||||
3329 | } | ||||||
3330 | return V; | ||||||
3331 | }; | ||||||
3332 | |||||||
3333 | // a) x & ((1 << nbits) + (-1)) | ||||||
3334 | auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, | ||||||
3335 | &NBits](SDValue Mask) -> bool { | ||||||
3336 | // Match `add`. Must only have one use! | ||||||
3337 | if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask)) | ||||||
3338 | return false; | ||||||
3339 | // We should be adding all-ones constant (i.e. subtracting one.) | ||||||
3340 | if (!isAllOnesConstant(Mask->getOperand(1))) | ||||||
3341 | return false; | ||||||
3342 | // Match `1 << nbits`. Might be truncated. Must only have one use! | ||||||
3343 | SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); | ||||||
3344 | if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) | ||||||
3345 | return false; | ||||||
3346 | if (!isOneConstant(M0->getOperand(0))) | ||||||
3347 | return false; | ||||||
3348 | NBits = M0->getOperand(1); | ||||||
3349 | return true; | ||||||
3350 | }; | ||||||
3351 | |||||||
3352 | auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) { | ||||||
3353 | V = peekThroughOneUseTruncation(V); | ||||||
3354 | return CurDAG->MaskedValueIsAllOnes( | ||||||
3355 | V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(), | ||||||
3356 | NVT.getSizeInBits())); | ||||||
3357 | }; | ||||||
3358 | |||||||
3359 | // b) x & ~(-1 << nbits) | ||||||
3360 | auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation, | ||||||
3361 | &NBits](SDValue Mask) -> bool { | ||||||
3362 | // Match `~()`. Must only have one use! | ||||||
3363 | if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask)) | ||||||
3364 | return false; | ||||||
3365 | // The -1 only has to be all-ones for the final Node's NVT. | ||||||
3366 | if (!isAllOnes(Mask->getOperand(1))) | ||||||
3367 | return false; | ||||||
3368 | // Match `-1 << nbits`. Might be truncated. Must only have one use! | ||||||
3369 | SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); | ||||||
3370 | if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) | ||||||
3371 | return false; | ||||||
3372 | // The -1 only has to be all-ones for the final Node's NVT. | ||||||
3373 | if (!isAllOnes(M0->getOperand(0))) | ||||||
3374 | return false; | ||||||
3375 | NBits = M0->getOperand(1); | ||||||
3376 | return true; | ||||||
3377 | }; | ||||||
3378 | |||||||
3379 | // Match potentially-truncated (bitwidth - y) | ||||||
3380 | auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt, | ||||||
3381 | unsigned Bitwidth) { | ||||||
3382 | // Skip over a truncate of the shift amount. | ||||||
3383 | if (ShiftAmt.getOpcode() == ISD::TRUNCATE) { | ||||||
3384 | ShiftAmt = ShiftAmt.getOperand(0); | ||||||
3385 | // The trunc should have been the only user of the real shift amount. | ||||||
3386 | if (!checkOneUse(ShiftAmt)) | ||||||
3387 | return false; | ||||||
3388 | } | ||||||
3389 | // Match the shift amount as: (bitwidth - y). It should go away, too. | ||||||
3390 | if (ShiftAmt.getOpcode() != ISD::SUB) | ||||||
3391 | return false; | ||||||
3392 | auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0)); | ||||||
3393 | if (!V0 || V0->getZExtValue() != Bitwidth) | ||||||
3394 | return false; | ||||||
3395 | NBits = ShiftAmt.getOperand(1); | ||||||
3396 | return true; | ||||||
3397 | }; | ||||||
3398 | |||||||
3399 | // c) x & (-1 >> (32 - y)) | ||||||
3400 | auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, | ||||||
3401 | matchShiftAmt](SDValue Mask) -> bool { | ||||||
3402 | // The mask itself may be truncated. | ||||||
3403 | Mask = peekThroughOneUseTruncation(Mask); | ||||||
3404 | unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits(); | ||||||
3405 | // Match `l>>`. Must only have one use! | ||||||
3406 | if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask)) | ||||||
3407 | return false; | ||||||
3408 | // We should be shifting truly all-ones constant. | ||||||
3409 | if (!isAllOnesConstant(Mask.getOperand(0))) | ||||||
3410 | return false; | ||||||
3411 | SDValue M1 = Mask.getOperand(1); | ||||||
3412 | // The shift amount should not be used externally. | ||||||
3413 | if (!checkOneUse(M1)) | ||||||
3414 | return false; | ||||||
3415 | return matchShiftAmt(M1, Bitwidth); | ||||||
3416 | }; | ||||||
3417 | |||||||
3418 | SDValue X; | ||||||
3419 | |||||||
3420 | // d) x << (32 - y) >> (32 - y) | ||||||
3421 | auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt, | ||||||
3422 | &X](SDNode *Node) -> bool { | ||||||
3423 | if (Node->getOpcode() != ISD::SRL) | ||||||
3424 | return false; | ||||||
3425 | SDValue N0 = Node->getOperand(0); | ||||||
3426 | if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0)) | ||||||
3427 | return false; | ||||||
3428 | unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits(); | ||||||
3429 | SDValue N1 = Node->getOperand(1); | ||||||
3430 | SDValue N01 = N0->getOperand(1); | ||||||
3431 | // Both of the shifts must be by the exact same value. | ||||||
3432 | // There should not be any uses of the shift amount outside of the pattern. | ||||||
3433 | if (N1 != N01 || !checkTwoUse(N1)) | ||||||
3434 | return false; | ||||||
3435 | if (!matchShiftAmt(N1, Bitwidth)) | ||||||
3436 | return false; | ||||||
3437 | X = N0->getOperand(0); | ||||||
3438 | return true; | ||||||
3439 | }; | ||||||
3440 | |||||||
3441 | auto matchLowBitMask = [matchPatternA, matchPatternB, | ||||||
3442 | matchPatternC](SDValue Mask) -> bool { | ||||||
3443 | return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask); | ||||||
3444 | }; | ||||||
3445 | |||||||
3446 | if (Node->getOpcode() == ISD::AND) { | ||||||
3447 | X = Node->getOperand(0); | ||||||
3448 | SDValue Mask = Node->getOperand(1); | ||||||
3449 | |||||||
3450 | if (matchLowBitMask(Mask)) { | ||||||
3451 | // Great. | ||||||
3452 | } else { | ||||||
3453 | std::swap(X, Mask); | ||||||
3454 | if (!matchLowBitMask(Mask)) | ||||||
3455 | return false; | ||||||
3456 | } | ||||||
3457 | } else if (!matchPatternD(Node)) | ||||||
3458 | return false; | ||||||
3459 | |||||||
3460 | SDLoc DL(Node); | ||||||
3461 | |||||||
3462 | // Truncate the shift amount. | ||||||
3463 | NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits); | ||||||
3464 | insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); | ||||||
3465 | |||||||
3466 | // Insert 8-bit NBits into lowest 8 bits of 32-bit register. | ||||||
3467 | // All the other bits are undefined, we do not care about them. | ||||||
3468 | SDValue ImplDef = SDValue( | ||||||
3469 | CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0); | ||||||
3470 | insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef); | ||||||
3471 | |||||||
3472 | SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32); | ||||||
3473 | insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal); | ||||||
3474 | NBits = SDValue( | ||||||
3475 | CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef, | ||||||
3476 | NBits, SRIdxVal), 0); | ||||||
3477 | insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); | ||||||
3478 | |||||||
3479 | if (Subtarget->hasBMI2()) { | ||||||
3480 | // Great, just emit the the BZHI.. | ||||||
3481 | if (NVT != MVT::i32) { | ||||||
3482 | // But have to place the bit count into the wide-enough register first. | ||||||
3483 | NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits); | ||||||
3484 | insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); | ||||||
3485 | } | ||||||
3486 | |||||||
3487 | SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits); | ||||||
3488 | ReplaceNode(Node, Extract.getNode()); | ||||||
3489 | SelectCode(Extract.getNode()); | ||||||
3490 | return true; | ||||||
3491 | } | ||||||
3492 | |||||||
3493 | // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is | ||||||
3494 | // *logically* shifted (potentially with one-use trunc inbetween), | ||||||
3495 | // and the truncation was the only use of the shift, | ||||||
3496 | // and if so look past one-use truncation. | ||||||
3497 | { | ||||||
3498 | SDValue RealX = peekThroughOneUseTruncation(X); | ||||||
3499 | // FIXME: only if the shift is one-use? | ||||||
3500 | if (RealX != X && RealX.getOpcode() == ISD::SRL) | ||||||
3501 | X = RealX; | ||||||
3502 | } | ||||||
3503 | |||||||
3504 | MVT XVT = X.getSimpleValueType(); | ||||||
3505 | |||||||
3506 | // Else, emitting BEXTR requires one more step. | ||||||
3507 | // The 'control' of BEXTR has the pattern of: | ||||||
3508 | // [15...8 bit][ 7...0 bit] location | ||||||
3509 | // [ bit count][ shift] name | ||||||
3510 | // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11 | ||||||
3511 | |||||||
3512 | // Shift NBits left by 8 bits, thus producing 'control'. | ||||||
3513 | // This makes the low 8 bits to be zero. | ||||||
3514 | SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8); | ||||||
3515 | SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8); | ||||||
3516 | insertDAGNode(*CurDAG, SDValue(Node, 0), Control); | ||||||
3517 | |||||||
3518 | // If the 'X' is *logically* shifted, we can fold that shift into 'control'. | ||||||
3519 | // FIXME: only if the shift is one-use? | ||||||
3520 | if (X.getOpcode() == ISD::SRL) { | ||||||
3521 | SDValue ShiftAmt = X.getOperand(1); | ||||||
3522 | X = X.getOperand(0); | ||||||
3523 | |||||||
3524 | assert(ShiftAmt.getValueType() == MVT::i8 &&((ShiftAmt.getValueType() == MVT::i8 && "Expected shift amount to be i8" ) ? static_cast<void> (0) : __assert_fail ("ShiftAmt.getValueType() == MVT::i8 && \"Expected shift amount to be i8\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3525, __PRETTY_FUNCTION__)) | ||||||
3525 | "Expected shift amount to be i8")((ShiftAmt.getValueType() == MVT::i8 && "Expected shift amount to be i8" ) ? static_cast<void> (0) : __assert_fail ("ShiftAmt.getValueType() == MVT::i8 && \"Expected shift amount to be i8\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3525, __PRETTY_FUNCTION__)); | ||||||
3526 | |||||||
3527 | // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero! | ||||||
3528 | // We could zext to i16 in some form, but we intentionally don't do that. | ||||||
3529 | SDValue OrigShiftAmt = ShiftAmt; | ||||||
3530 | ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt); | ||||||
3531 | insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt); | ||||||
3532 | |||||||
3533 | // And now 'or' these low 8 bits of shift amount into the 'control'. | ||||||
3534 | Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt); | ||||||
3535 | insertDAGNode(*CurDAG, SDValue(Node, 0), Control); | ||||||
3536 | } | ||||||
3537 | |||||||
3538 | // But have to place the 'control' into the wide-enough register first. | ||||||
3539 | if (XVT != MVT::i32) { | ||||||
3540 | Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control); | ||||||
3541 | insertDAGNode(*CurDAG, SDValue(Node, 0), Control); | ||||||
3542 | } | ||||||
3543 | |||||||
3544 | // And finally, form the BEXTR itself. | ||||||
3545 | SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control); | ||||||
3546 | |||||||
3547 | // The 'X' was originally truncated. Do that now. | ||||||
3548 | if (XVT != NVT) { | ||||||
3549 | insertDAGNode(*CurDAG, SDValue(Node, 0), Extract); | ||||||
3550 | Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract); | ||||||
3551 | } | ||||||
3552 | |||||||
3553 | ReplaceNode(Node, Extract.getNode()); | ||||||
3554 | SelectCode(Extract.getNode()); | ||||||
3555 | |||||||
3556 | return true; | ||||||
3557 | } | ||||||
3558 | |||||||
3559 | // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI. | ||||||
3560 | MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { | ||||||
3561 | MVT NVT = Node->getSimpleValueType(0); | ||||||
3562 | SDLoc dl(Node); | ||||||
3563 | |||||||
3564 | SDValue N0 = Node->getOperand(0); | ||||||
3565 | SDValue N1 = Node->getOperand(1); | ||||||
3566 | |||||||
3567 | // If we have TBM we can use an immediate for the control. If we have BMI | ||||||
3568 | // we should only do this if the BEXTR instruction is implemented well. | ||||||
3569 | // Otherwise moving the control into a register makes this more costly. | ||||||
3570 | // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM | ||||||
3571 | // hoisting the move immediate would make it worthwhile with a less optimal | ||||||
3572 | // BEXTR? | ||||||
3573 | bool PreferBEXTR = | ||||||
3574 | Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR()); | ||||||
3575 | if (!PreferBEXTR && !Subtarget->hasBMI2()) | ||||||
3576 | return nullptr; | ||||||
3577 | |||||||
3578 | // Must have a shift right. | ||||||
3579 | if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA) | ||||||
3580 | return nullptr; | ||||||
3581 | |||||||
3582 | // Shift can't have additional users. | ||||||
3583 | if (!N0->hasOneUse()) | ||||||
3584 | return nullptr; | ||||||
3585 | |||||||
3586 | // Only supported for 32 and 64 bits. | ||||||
3587 | if (NVT != MVT::i32 && NVT != MVT::i64) | ||||||
3588 | return nullptr; | ||||||
3589 | |||||||
3590 | // Shift amount and RHS of and must be constant. | ||||||
3591 | ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1); | ||||||
3592 | ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1)); | ||||||
3593 | if (!MaskCst || !ShiftCst) | ||||||
3594 | return nullptr; | ||||||
3595 | |||||||
3596 | // And RHS must be a mask. | ||||||
3597 | uint64_t Mask = MaskCst->getZExtValue(); | ||||||
3598 | if (!isMask_64(Mask)) | ||||||
3599 | return nullptr; | ||||||
3600 | |||||||
3601 | uint64_t Shift = ShiftCst->getZExtValue(); | ||||||
3602 | uint64_t MaskSize = countPopulation(Mask); | ||||||
3603 | |||||||
3604 | // Don't interfere with something that can be handled by extracting AH. | ||||||
3605 | // TODO: If we are able to fold a load, BEXTR might still be better than AH. | ||||||
3606 | if (Shift == 8 && MaskSize == 8) | ||||||
3607 | return nullptr; | ||||||
3608 | |||||||
3609 | // Make sure we are only using bits that were in the original value, not | ||||||
3610 | // shifted in. | ||||||
3611 | if (Shift + MaskSize > NVT.getSizeInBits()) | ||||||
3612 | return nullptr; | ||||||
3613 | |||||||
3614 | // BZHI, if available, is always fast, unlike BEXTR. But even if we decide | ||||||
3615 | // that we can't use BEXTR, it is only worthwhile using BZHI if the mask | ||||||
3616 | // does not fit into 32 bits. Load folding is not a sufficient reason. | ||||||
3617 | if (!PreferBEXTR && MaskSize <= 32) | ||||||
3618 | return nullptr; | ||||||
3619 | |||||||
3620 | SDValue Control; | ||||||
3621 | unsigned ROpc, MOpc; | ||||||
3622 | |||||||
3623 | if (!PreferBEXTR) { | ||||||
3624 | assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.")((Subtarget->hasBMI2() && "We must have BMI2's BZHI then." ) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasBMI2() && \"We must have BMI2's BZHI then.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3624, __PRETTY_FUNCTION__)); | ||||||
3625 | // If we can't make use of BEXTR then we can't fuse shift+mask stages. | ||||||
3626 | // Let's perform the mask first, and apply shift later. Note that we need to | ||||||
3627 | // widen the mask to account for the fact that we'll apply shift afterwards! | ||||||
3628 | Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT); | ||||||
3629 | ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr; | ||||||
3630 | MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm; | ||||||
3631 | unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; | ||||||
3632 | Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); | ||||||
3633 | } else { | ||||||
3634 | // The 'control' of BEXTR has the pattern of: | ||||||
3635 | // [15...8 bit][ 7...0 bit] location | ||||||
3636 | // [ bit count][ shift] name | ||||||
3637 | // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11 | ||||||
3638 | Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT); | ||||||
3639 | if (Subtarget->hasTBM()) { | ||||||
3640 | ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri; | ||||||
3641 | MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi; | ||||||
3642 | } else { | ||||||
3643 | assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.")((Subtarget->hasBMI() && "We must have BMI1's BEXTR then." ) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasBMI() && \"We must have BMI1's BEXTR then.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 3643, __PRETTY_FUNCTION__)); | ||||||
3644 | // BMI requires the immediate to placed in a register. | ||||||
3645 | ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr; | ||||||
3646 | MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm; | ||||||
3647 | unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; | ||||||
3648 | Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); | ||||||
3649 | } | ||||||
3650 | } | ||||||
3651 | |||||||
3652 | MachineSDNode *NewNode; | ||||||
3653 | SDValue Input = N0->getOperand(0); | ||||||
3654 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
3655 | if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
3656 | SDValue Ops[] = { | ||||||
3657 | Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)}; | ||||||
3658 | SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); | ||||||
3659 | NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
3660 | // Update the chain. | ||||||
3661 | ReplaceUses(Input.getValue(1), SDValue(NewNode, 2)); | ||||||
3662 | // Record the mem-refs | ||||||
3663 | CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()}); | ||||||
3664 | } else { | ||||||
3665 | NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control); | ||||||
3666 | } | ||||||
3667 | |||||||
3668 | if (!PreferBEXTR) { | ||||||
3669 | // We still need to apply the shift. | ||||||
3670 | SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT); | ||||||
3671 | unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri; | ||||||
3672 | NewNode = | ||||||
3673 | CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt); | ||||||
3674 | } | ||||||
3675 | |||||||
3676 | return NewNode; | ||||||
3677 | } | ||||||
3678 | |||||||
3679 | // Emit a PCMISTR(I/M) instruction. | ||||||
3680 | MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc, | ||||||
3681 | bool MayFoldLoad, const SDLoc &dl, | ||||||
3682 | MVT VT, SDNode *Node) { | ||||||
3683 | SDValue N0 = Node->getOperand(0); | ||||||
3684 | SDValue N1 = Node->getOperand(1); | ||||||
3685 | SDValue Imm = Node->getOperand(2); | ||||||
3686 | const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue(); | ||||||
3687 | Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType()); | ||||||
3688 | |||||||
3689 | // Try to fold a load. No need to check alignment. | ||||||
3690 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
3691 | if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
3692 | SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, | ||||||
3693 | N1.getOperand(0) }; | ||||||
3694 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other); | ||||||
3695 | MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
3696 | // Update the chain. | ||||||
3697 | ReplaceUses(N1.getValue(1), SDValue(CNode, 2)); | ||||||
3698 | // Record the mem-refs | ||||||
3699 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); | ||||||
3700 | return CNode; | ||||||
3701 | } | ||||||
3702 | |||||||
3703 | SDValue Ops[] = { N0, N1, Imm }; | ||||||
3704 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32); | ||||||
3705 | MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops); | ||||||
3706 | return CNode; | ||||||
3707 | } | ||||||
3708 | |||||||
3709 | // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need | ||||||
3710 | // to emit a second instruction after this one. This is needed since we have two | ||||||
3711 | // copyToReg nodes glued before this and we need to continue that glue through. | ||||||
3712 | MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc, | ||||||
3713 | bool MayFoldLoad, const SDLoc &dl, | ||||||
3714 | MVT VT, SDNode *Node, | ||||||
3715 | SDValue &InFlag) { | ||||||
3716 | SDValue N0 = Node->getOperand(0); | ||||||
3717 | SDValue N2 = Node->getOperand(2); | ||||||
3718 | SDValue Imm = Node->getOperand(4); | ||||||
3719 | const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue(); | ||||||
3720 | Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType()); | ||||||
3721 | |||||||
3722 | // Try to fold a load. No need to check alignment. | ||||||
3723 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
3724 | if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
| |||||||
3725 | SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, | ||||||
3726 | N2.getOperand(0), InFlag }; | ||||||
3727 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue); | ||||||
3728 | MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
3729 | InFlag = SDValue(CNode, 3); | ||||||
3730 | // Update the chain. | ||||||
3731 | ReplaceUses(N2.getValue(1), SDValue(CNode, 2)); | ||||||
3732 | // Record the mem-refs | ||||||
3733 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()}); | ||||||
3734 | return CNode; | ||||||
3735 | } | ||||||
3736 | |||||||
3737 | SDValue Ops[] = { N0, N2, Imm, InFlag }; | ||||||
3738 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue); | ||||||
3739 | MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops); | ||||||
3740 | InFlag = SDValue(CNode, 2); | ||||||
3741 | return CNode; | ||||||
3742 | } | ||||||
3743 | |||||||
3744 | bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { | ||||||
3745 | EVT VT = N->getValueType(0); | ||||||
3746 | |||||||
3747 | // Only handle scalar shifts. | ||||||
3748 | if (VT.isVector()) | ||||||
3749 | return false; | ||||||
3750 | |||||||
3751 | // Narrower shifts only mask to 5 bits in hardware. | ||||||
3752 | unsigned Size = VT == MVT::i64 ? 64 : 32; | ||||||
3753 | |||||||
3754 | SDValue OrigShiftAmt = N->getOperand(1); | ||||||
3755 | SDValue ShiftAmt = OrigShiftAmt; | ||||||
3756 | SDLoc DL(N); | ||||||
3757 | |||||||
3758 | // Skip over a truncate of the shift amount. | ||||||
3759 | if (ShiftAmt->getOpcode() == ISD::TRUNCATE) | ||||||
3760 | ShiftAmt = ShiftAmt->getOperand(0); | ||||||
3761 | |||||||
3762 | // This function is called after X86DAGToDAGISel::matchBitExtract(), | ||||||
3763 | // so we are not afraid that we might mess up BZHI/BEXTR pattern. | ||||||
3764 | |||||||
3765 | SDValue NewShiftAmt; | ||||||
3766 | if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { | ||||||
3767 | SDValue Add0 = ShiftAmt->getOperand(0); | ||||||
3768 | SDValue Add1 = ShiftAmt->getOperand(1); | ||||||
3769 | // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X | ||||||
3770 | // to avoid the ADD/SUB. | ||||||
3771 | if (isa<ConstantSDNode>(Add1) && | ||||||
3772 | cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) { | ||||||
3773 | NewShiftAmt = Add0; | ||||||
3774 | // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to | ||||||
3775 | // generate a NEG instead of a SUB of a constant. | ||||||
3776 | } else if (ShiftAmt->getOpcode() == ISD::SUB && | ||||||
3777 | isa<ConstantSDNode>(Add0) && | ||||||
3778 | cast<ConstantSDNode>(Add0)->getZExtValue() != 0 && | ||||||
3779 | cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) { | ||||||
3780 | // Insert a negate op. | ||||||
3781 | // TODO: This isn't guaranteed to replace the sub if there is a logic cone | ||||||
3782 | // that uses it that's not a shift. | ||||||
3783 | EVT SubVT = ShiftAmt.getValueType(); | ||||||
3784 | SDValue Zero = CurDAG->getConstant(0, DL, SubVT); | ||||||
3785 | SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1); | ||||||
3786 | NewShiftAmt = Neg; | ||||||
3787 | |||||||
3788 | // Insert these operands into a valid topological order so they can | ||||||
3789 | // get selected independently. | ||||||
3790 | insertDAGNode(*CurDAG, OrigShiftAmt, Zero); | ||||||
3791 | insertDAGNode(*CurDAG, OrigShiftAmt, Neg); | ||||||
3792 | } else | ||||||
3793 | return false; | ||||||
3794 | } else | ||||||
3795 | return false; | ||||||
3796 | |||||||
3797 | if (NewShiftAmt.getValueType() != MVT::i8) { | ||||||
3798 | // Need to truncate the shift amount. | ||||||
3799 | NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt); | ||||||
3800 | // Add to a correct topological ordering. | ||||||
3801 | insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); | ||||||
3802 | } | ||||||
3803 | |||||||
3804 | // Insert a new mask to keep the shift amount legal. This should be removed | ||||||
3805 | // by isel patterns. | ||||||
3806 | NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt, | ||||||
3807 | CurDAG->getConstant(Size - 1, DL, MVT::i8)); | ||||||
3808 | // Place in a correct topological ordering. | ||||||
3809 | insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); | ||||||
3810 | |||||||
3811 | SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0), | ||||||
3812 | NewShiftAmt); | ||||||
3813 | if (UpdatedNode != N) { | ||||||
3814 | // If we found an existing node, we should replace ourselves with that node | ||||||
3815 | // and wait for it to be selected after its other users. | ||||||
3816 | ReplaceNode(N, UpdatedNode); | ||||||
3817 | return true; | ||||||
3818 | } | ||||||
3819 | |||||||
3820 | // If the original shift amount is now dead, delete it so that we don't run | ||||||
3821 | // it through isel. | ||||||
3822 | if (OrigShiftAmt.getNode()->use_empty()) | ||||||
3823 | CurDAG->RemoveDeadNode(OrigShiftAmt.getNode()); | ||||||
3824 | |||||||
3825 | // Now that we've optimized the shift amount, defer to normal isel to get | ||||||
3826 | // load folding and legacy vs BMI2 selection without repeating it here. | ||||||
3827 | SelectCode(N); | ||||||
3828 | return true; | ||||||
3829 | } | ||||||
3830 | |||||||
3831 | bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { | ||||||
3832 | MVT NVT = N->getSimpleValueType(0); | ||||||
3833 | unsigned Opcode = N->getOpcode(); | ||||||
3834 | SDLoc dl(N); | ||||||
3835 | |||||||
3836 | // For operations of the form (x << C1) op C2, check if we can use a smaller | ||||||
3837 | // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. | ||||||
3838 | SDValue Shift = N->getOperand(0); | ||||||
3839 | SDValue N1 = N->getOperand(1); | ||||||
3840 | |||||||
3841 | ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); | ||||||
3842 | if (!Cst) | ||||||
3843 | return false; | ||||||
3844 | |||||||
3845 | int64_t Val = Cst->getSExtValue(); | ||||||
3846 | |||||||
3847 | // If we have an any_extend feeding the AND, look through it to see if there | ||||||
3848 | // is a shift behind it. But only if the AND doesn't use the extended bits. | ||||||
3849 | // FIXME: Generalize this to other ANY_EXTEND than i32 to i64? | ||||||
3850 | bool FoundAnyExtend = false; | ||||||
3851 | if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && | ||||||
3852 | Shift.getOperand(0).getSimpleValueType() == MVT::i32 && | ||||||
3853 | isUInt<32>(Val)) { | ||||||
3854 | FoundAnyExtend = true; | ||||||
3855 | Shift = Shift.getOperand(0); | ||||||
3856 | } | ||||||
3857 | |||||||
3858 | if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) | ||||||
3859 | return false; | ||||||
3860 | |||||||
3861 | // i8 is unshrinkable, i16 should be promoted to i32. | ||||||
3862 | if (NVT != MVT::i32 && NVT != MVT::i64) | ||||||
3863 | return false; | ||||||
3864 | |||||||
3865 | ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); | ||||||
3866 | if (!ShlCst) | ||||||
3867 | return false; | ||||||
3868 | |||||||
3869 | uint64_t ShAmt = ShlCst->getZExtValue(); | ||||||
3870 | |||||||
3871 | // Make sure that we don't change the operation by removing bits. | ||||||
3872 | // This only matters for OR and XOR, AND is unaffected. | ||||||
3873 | uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1; | ||||||
3874 | if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) | ||||||
3875 | return false; | ||||||
3876 | |||||||
3877 | // Check the minimum bitwidth for the new constant. | ||||||
3878 | // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. | ||||||
3879 | auto CanShrinkImmediate = [&](int64_t &ShiftedVal) { | ||||||
3880 | if (Opcode == ISD::AND) { | ||||||
3881 | // AND32ri is the same as AND64ri32 with zext imm. | ||||||
3882 | // Try this before sign extended immediates below. | ||||||
3883 | ShiftedVal = (uint64_t)Val >> ShAmt; | ||||||
3884 | if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) | ||||||
3885 | return true; | ||||||
3886 | // Also swap order when the AND can become MOVZX. | ||||||
3887 | if (ShiftedVal == UINT8_MAX(255) || ShiftedVal == UINT16_MAX(65535)) | ||||||
3888 | return true; | ||||||
3889 | } | ||||||
3890 | ShiftedVal = Val >> ShAmt; | ||||||
3891 | if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) || | ||||||
3892 | (!isInt<32>(Val) && isInt<32>(ShiftedVal))) | ||||||
3893 | return true; | ||||||
3894 | if (Opcode != ISD::AND) { | ||||||
3895 | // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr | ||||||
3896 | ShiftedVal = (uint64_t)Val >> ShAmt; | ||||||
3897 | if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) | ||||||
3898 | return true; | ||||||
3899 | } | ||||||
3900 | return false; | ||||||
3901 | }; | ||||||
3902 | |||||||
3903 | int64_t ShiftedVal; | ||||||
3904 | if (!CanShrinkImmediate(ShiftedVal)) | ||||||
3905 | return false; | ||||||
3906 | |||||||
3907 | // Ok, we can reorder to get a smaller immediate. | ||||||
3908 | |||||||
3909 | // But, its possible the original immediate allowed an AND to become MOVZX. | ||||||
3910 | // Doing this late due to avoid the MakedValueIsZero call as late as | ||||||
3911 | // possible. | ||||||
3912 | if (Opcode == ISD::AND) { | ||||||
3913 | // Find the smallest zext this could possibly be. | ||||||
3914 | unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits(); | ||||||
3915 | ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U)); | ||||||
3916 | |||||||
3917 | // Figure out which bits need to be zero to achieve that mask. | ||||||
3918 | APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(), | ||||||
3919 | ZExtWidth); | ||||||
3920 | NeededMask &= ~Cst->getAPIntValue(); | ||||||
3921 | |||||||
3922 | if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask)) | ||||||
3923 | return false; | ||||||
3924 | } | ||||||
3925 | |||||||
3926 | SDValue X = Shift.getOperand(0); | ||||||
3927 | if (FoundAnyExtend) { | ||||||
3928 | SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X); | ||||||
3929 | insertDAGNode(*CurDAG, SDValue(N, 0), NewX); | ||||||
3930 | X = NewX; | ||||||
3931 | } | ||||||
3932 | |||||||
3933 | SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT); | ||||||
3934 | insertDAGNode(*CurDAG, SDValue(N, 0), NewCst); | ||||||
3935 | SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst); | ||||||
3936 | insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp); | ||||||
3937 | SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp, | ||||||
3938 | Shift.getOperand(1)); | ||||||
3939 | ReplaceNode(N, NewSHL.getNode()); | ||||||
3940 | SelectCode(NewSHL.getNode()); | ||||||
3941 | return true; | ||||||
3942 | } | ||||||
3943 | |||||||
3944 | /// If the high bits of an 'and' operand are known zero, try setting the | ||||||
3945 | /// high bits of an 'and' constant operand to produce a smaller encoding by | ||||||
3946 | /// creating a small, sign-extended negative immediate rather than a large | ||||||
3947 | /// positive one. This reverses a transform in SimplifyDemandedBits that | ||||||
3948 | /// shrinks mask constants by clearing bits. There is also a possibility that | ||||||
3949 | /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that | ||||||
3950 | /// case, just replace the 'and'. Return 'true' if the node is replaced. | ||||||
3951 | bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) { | ||||||
3952 | // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't | ||||||
3953 | // have immediate operands. | ||||||
3954 | MVT VT = And->getSimpleValueType(0); | ||||||
3955 | if (VT != MVT::i32 && VT != MVT::i64) | ||||||
3956 | return false; | ||||||
3957 | |||||||
3958 | auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1)); | ||||||
3959 | if (!And1C) | ||||||
3960 | return false; | ||||||
3961 | |||||||
3962 | // Bail out if the mask constant is already negative. It's can't shrink more. | ||||||
3963 | // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel | ||||||
3964 | // patterns to use a 32-bit and instead of a 64-bit and by relying on the | ||||||
3965 | // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits | ||||||
3966 | // are negative too. | ||||||
3967 | APInt MaskVal = And1C->getAPIntValue(); | ||||||
3968 | unsigned MaskLZ = MaskVal.countLeadingZeros(); | ||||||
3969 | if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32)) | ||||||
3970 | return false; | ||||||
3971 | |||||||
3972 | // Don't extend into the upper 32 bits of a 64 bit mask. | ||||||
3973 | if (VT == MVT::i64 && MaskLZ >= 32) { | ||||||
3974 | MaskLZ -= 32; | ||||||
3975 | MaskVal = MaskVal.trunc(32); | ||||||
3976 | } | ||||||
3977 | |||||||
3978 | SDValue And0 = And->getOperand(0); | ||||||
3979 | APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ); | ||||||
3980 | APInt NegMaskVal = MaskVal | HighZeros; | ||||||
3981 | |||||||
3982 | // If a negative constant would not allow a smaller encoding, there's no need | ||||||
3983 | // to continue. Only change the constant when we know it's a win. | ||||||
3984 | unsigned MinWidth = NegMaskVal.getMinSignedBits(); | ||||||
3985 | if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32)) | ||||||
3986 | return false; | ||||||
3987 | |||||||
3988 | // Extend masks if we truncated above. | ||||||
3989 | if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) { | ||||||
3990 | NegMaskVal = NegMaskVal.zext(64); | ||||||
3991 | HighZeros = HighZeros.zext(64); | ||||||
3992 | } | ||||||
3993 | |||||||
3994 | // The variable operand must be all zeros in the top bits to allow using the | ||||||
3995 | // new, negative constant as the mask. | ||||||
3996 | if (!CurDAG->MaskedValueIsZero(And0, HighZeros)) | ||||||
3997 | return false; | ||||||
3998 | |||||||
3999 | // Check if the mask is -1. In that case, this is an unnecessary instruction | ||||||
4000 | // that escaped earlier analysis. | ||||||
4001 | if (NegMaskVal.isAllOnesValue()) { | ||||||
4002 | ReplaceNode(And, And0.getNode()); | ||||||
4003 | return true; | ||||||
4004 | } | ||||||
4005 | |||||||
4006 | // A negative mask allows a smaller encoding. Create a new 'and' node. | ||||||
4007 | SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT); | ||||||
4008 | SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask); | ||||||
4009 | ReplaceNode(And, NewAnd.getNode()); | ||||||
4010 | SelectCode(NewAnd.getNode()); | ||||||
4011 | return true; | ||||||
4012 | } | ||||||
4013 | |||||||
4014 | static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, | ||||||
4015 | bool FoldedBCast, bool Masked) { | ||||||
4016 | if (Masked) { | ||||||
4017 | if (FoldedLoad) { | ||||||
4018 | switch (TestVT.SimpleTy) { | ||||||
4019 | default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4019); | ||||||
4020 | case MVT::v16i8: | ||||||
4021 | return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk; | ||||||
4022 | case MVT::v8i16: | ||||||
4023 | return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk; | ||||||
4024 | case MVT::v4i32: | ||||||
4025 | return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk; | ||||||
4026 | case MVT::v2i64: | ||||||
4027 | return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk; | ||||||
4028 | case MVT::v32i8: | ||||||
4029 | return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk; | ||||||
4030 | case MVT::v16i16: | ||||||
4031 | return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk; | ||||||
4032 | case MVT::v8i32: | ||||||
4033 | return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk; | ||||||
4034 | case MVT::v4i64: | ||||||
4035 | return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk; | ||||||
4036 | case MVT::v64i8: | ||||||
4037 | return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk; | ||||||
4038 | case MVT::v32i16: | ||||||
4039 | return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk; | ||||||
4040 | case MVT::v16i32: | ||||||
4041 | return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk; | ||||||
4042 | case MVT::v8i64: | ||||||
4043 | return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk; | ||||||
4044 | } | ||||||
4045 | } | ||||||
4046 | |||||||
4047 | if (FoldedBCast) { | ||||||
4048 | switch (TestVT.SimpleTy) { | ||||||
4049 | default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4049); | ||||||
4050 | case MVT::v4i32: | ||||||
4051 | return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk; | ||||||
4052 | case MVT::v2i64: | ||||||
4053 | return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk; | ||||||
4054 | case MVT::v8i32: | ||||||
4055 | return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk; | ||||||
4056 | case MVT::v4i64: | ||||||
4057 | return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk; | ||||||
4058 | case MVT::v16i32: | ||||||
4059 | return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk; | ||||||
4060 | case MVT::v8i64: | ||||||
4061 | return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk; | ||||||
4062 | } | ||||||
4063 | } | ||||||
4064 | |||||||
4065 | switch (TestVT.SimpleTy) { | ||||||
4066 | default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4066); | ||||||
4067 | case MVT::v16i8: | ||||||
4068 | return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk; | ||||||
4069 | case MVT::v8i16: | ||||||
4070 | return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk; | ||||||
4071 | case MVT::v4i32: | ||||||
4072 | return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk; | ||||||
4073 | case MVT::v2i64: | ||||||
4074 | return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk; | ||||||
4075 | case MVT::v32i8: | ||||||
4076 | return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk; | ||||||
4077 | case MVT::v16i16: | ||||||
4078 | return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk; | ||||||
4079 | case MVT::v8i32: | ||||||
4080 | return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk; | ||||||
4081 | case MVT::v4i64: | ||||||
4082 | return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk; | ||||||
4083 | case MVT::v64i8: | ||||||
4084 | return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk; | ||||||
4085 | case MVT::v32i16: | ||||||
4086 | return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk; | ||||||
4087 | case MVT::v16i32: | ||||||
4088 | return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk; | ||||||
4089 | case MVT::v8i64: | ||||||
4090 | return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk; | ||||||
4091 | } | ||||||
4092 | } | ||||||
4093 | |||||||
4094 | if (FoldedLoad) { | ||||||
4095 | switch (TestVT.SimpleTy) { | ||||||
4096 | default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4096); | ||||||
4097 | case MVT::v16i8: | ||||||
4098 | return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm; | ||||||
4099 | case MVT::v8i16: | ||||||
4100 | return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm; | ||||||
4101 | case MVT::v4i32: | ||||||
4102 | return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm; | ||||||
4103 | case MVT::v2i64: | ||||||
4104 | return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm; | ||||||
4105 | case MVT::v32i8: | ||||||
4106 | return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm; | ||||||
4107 | case MVT::v16i16: | ||||||
4108 | return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm; | ||||||
4109 | case MVT::v8i32: | ||||||
4110 | return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm; | ||||||
4111 | case MVT::v4i64: | ||||||
4112 | return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm; | ||||||
4113 | case MVT::v64i8: | ||||||
4114 | return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm; | ||||||
4115 | case MVT::v32i16: | ||||||
4116 | return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm; | ||||||
4117 | case MVT::v16i32: | ||||||
4118 | return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm; | ||||||
4119 | case MVT::v8i64: | ||||||
4120 | return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm; | ||||||
4121 | } | ||||||
4122 | } | ||||||
4123 | |||||||
4124 | if (FoldedBCast) { | ||||||
4125 | switch (TestVT.SimpleTy) { | ||||||
4126 | default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4126); | ||||||
4127 | case MVT::v4i32: | ||||||
4128 | return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb; | ||||||
4129 | case MVT::v2i64: | ||||||
4130 | return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb; | ||||||
4131 | case MVT::v8i32: | ||||||
4132 | return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb; | ||||||
4133 | case MVT::v4i64: | ||||||
4134 | return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb; | ||||||
4135 | case MVT::v16i32: | ||||||
4136 | return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb; | ||||||
4137 | case MVT::v8i64: | ||||||
4138 | return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb; | ||||||
4139 | } | ||||||
4140 | } | ||||||
4141 | |||||||
4142 | switch (TestVT.SimpleTy) { | ||||||
4143 | default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4143); | ||||||
4144 | case MVT::v16i8: | ||||||
4145 | return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr; | ||||||
4146 | case MVT::v8i16: | ||||||
4147 | return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr; | ||||||
4148 | case MVT::v4i32: | ||||||
4149 | return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr; | ||||||
4150 | case MVT::v2i64: | ||||||
4151 | return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr; | ||||||
4152 | case MVT::v32i8: | ||||||
4153 | return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr; | ||||||
4154 | case MVT::v16i16: | ||||||
4155 | return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr; | ||||||
4156 | case MVT::v8i32: | ||||||
4157 | return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr; | ||||||
4158 | case MVT::v4i64: | ||||||
4159 | return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr; | ||||||
4160 | case MVT::v64i8: | ||||||
4161 | return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr; | ||||||
4162 | case MVT::v32i16: | ||||||
4163 | return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr; | ||||||
4164 | case MVT::v16i32: | ||||||
4165 | return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr; | ||||||
4166 | case MVT::v8i64: | ||||||
4167 | return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr; | ||||||
4168 | } | ||||||
4169 | } | ||||||
4170 | |||||||
4171 | // Try to create VPTESTM instruction. If InMask is not null, it will be used | ||||||
4172 | // to form a masked operation. | ||||||
4173 | bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, | ||||||
4174 | SDValue InMask) { | ||||||
4175 | assert(Subtarget->hasAVX512() && "Expected AVX512!")((Subtarget->hasAVX512() && "Expected AVX512!") ? static_cast <void> (0) : __assert_fail ("Subtarget->hasAVX512() && \"Expected AVX512!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4175, __PRETTY_FUNCTION__)); | ||||||
4176 | assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&((Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && "Unexpected VT!") ? static_cast<void> (0) : __assert_fail ("Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && \"Unexpected VT!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4177, __PRETTY_FUNCTION__)) | ||||||
4177 | "Unexpected VT!")((Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && "Unexpected VT!") ? static_cast<void> (0) : __assert_fail ("Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && \"Unexpected VT!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4177, __PRETTY_FUNCTION__)); | ||||||
4178 | |||||||
4179 | // Look for equal and not equal compares. | ||||||
4180 | ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get(); | ||||||
4181 | if (CC != ISD::SETEQ && CC != ISD::SETNE) | ||||||
4182 | return false; | ||||||
4183 | |||||||
4184 | SDValue SetccOp0 = Setcc.getOperand(0); | ||||||
4185 | SDValue SetccOp1 = Setcc.getOperand(1); | ||||||
4186 | |||||||
4187 | // Canonicalize the all zero vector to the RHS. | ||||||
4188 | if (ISD::isBuildVectorAllZeros(SetccOp0.getNode())) | ||||||
4189 | std::swap(SetccOp0, SetccOp1); | ||||||
4190 | |||||||
4191 | // See if we're comparing against zero. | ||||||
4192 | if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode())) | ||||||
4193 | return false; | ||||||
4194 | |||||||
4195 | SDValue N0 = SetccOp0; | ||||||
4196 | |||||||
4197 | MVT CmpVT = N0.getSimpleValueType(); | ||||||
4198 | MVT CmpSVT = CmpVT.getVectorElementType(); | ||||||
4199 | |||||||
4200 | // Start with both operands the same. We'll try to refine this. | ||||||
4201 | SDValue Src0 = N0; | ||||||
4202 | SDValue Src1 = N0; | ||||||
4203 | |||||||
4204 | { | ||||||
4205 | // Look through single use bitcasts. | ||||||
4206 | SDValue N0Temp = N0; | ||||||
4207 | if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse()) | ||||||
4208 | N0Temp = N0.getOperand(0); | ||||||
4209 | |||||||
4210 | // Look for single use AND. | ||||||
4211 | if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) { | ||||||
4212 | Src0 = N0Temp.getOperand(0); | ||||||
4213 | Src1 = N0Temp.getOperand(1); | ||||||
4214 | } | ||||||
4215 | } | ||||||
4216 | |||||||
4217 | // Without VLX we need to widen the load. | ||||||
4218 | bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector(); | ||||||
4219 | |||||||
4220 | // We can only fold loads if the sources are unique. | ||||||
4221 | bool CanFoldLoads = Src0 != Src1; | ||||||
4222 | |||||||
4223 | // Try to fold loads unless we need to widen. | ||||||
4224 | bool FoldedLoad = false; | ||||||
4225 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load; | ||||||
4226 | if (!Widen && CanFoldLoads) { | ||||||
4227 | Load = Src1; | ||||||
4228 | FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3, | ||||||
4229 | Tmp4); | ||||||
4230 | if (!FoldedLoad) { | ||||||
4231 | // And is computative. | ||||||
4232 | Load = Src0; | ||||||
4233 | FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, | ||||||
4234 | Tmp3, Tmp4); | ||||||
4235 | if (FoldedLoad) | ||||||
4236 | std::swap(Src0, Src1); | ||||||
4237 | } | ||||||
4238 | } | ||||||
4239 | |||||||
4240 | auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) { | ||||||
4241 | // Look through single use bitcasts. | ||||||
4242 | if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse()) { | ||||||
4243 | Parent = Src.getNode(); | ||||||
4244 | Src = Src.getOperand(0); | ||||||
4245 | } | ||||||
4246 | |||||||
4247 | if (Src.getOpcode() == X86ISD::VBROADCAST_LOAD && Src.hasOneUse()) { | ||||||
4248 | auto *MemIntr = cast<MemIntrinsicSDNode>(Src); | ||||||
4249 | if (MemIntr->getMemoryVT().getSizeInBits() == CmpSVT.getSizeInBits()) | ||||||
4250 | return Src; | ||||||
4251 | } | ||||||
4252 | |||||||
4253 | return SDValue(); | ||||||
4254 | }; | ||||||
4255 | |||||||
4256 | // If we didn't fold a load, try to match broadcast. No widening limitation | ||||||
4257 | // for this. But only 32 and 64 bit types are supported. | ||||||
4258 | bool FoldedBCast = false; | ||||||
4259 | if (!FoldedLoad && CanFoldLoads && | ||||||
4260 | (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) { | ||||||
4261 | SDNode *ParentNode = N0.getNode(); | ||||||
4262 | if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) { | ||||||
4263 | FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0, | ||||||
4264 | Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
4265 | } | ||||||
4266 | |||||||
4267 | // Try the other operand. | ||||||
4268 | if (!FoldedBCast) { | ||||||
4269 | SDNode *ParentNode = N0.getNode(); | ||||||
4270 | if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) { | ||||||
4271 | FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0, | ||||||
4272 | Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
4273 | if (FoldedBCast) | ||||||
4274 | std::swap(Src0, Src1); | ||||||
4275 | } | ||||||
4276 | } | ||||||
4277 | } | ||||||
4278 | |||||||
4279 | auto getMaskRC = [](MVT MaskVT) { | ||||||
4280 | switch (MaskVT.SimpleTy) { | ||||||
4281 | default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4281); | ||||||
4282 | case MVT::v2i1: return X86::VK2RegClassID; | ||||||
4283 | case MVT::v4i1: return X86::VK4RegClassID; | ||||||
4284 | case MVT::v8i1: return X86::VK8RegClassID; | ||||||
4285 | case MVT::v16i1: return X86::VK16RegClassID; | ||||||
4286 | case MVT::v32i1: return X86::VK32RegClassID; | ||||||
4287 | case MVT::v64i1: return X86::VK64RegClassID; | ||||||
4288 | } | ||||||
4289 | }; | ||||||
4290 | |||||||
4291 | bool IsMasked = InMask.getNode() != nullptr; | ||||||
4292 | |||||||
4293 | SDLoc dl(Root); | ||||||
4294 | |||||||
4295 | MVT ResVT = Setcc.getSimpleValueType(); | ||||||
4296 | MVT MaskVT = ResVT; | ||||||
4297 | if (Widen) { | ||||||
4298 | // Widen the inputs using insert_subreg or copy_to_regclass. | ||||||
4299 | unsigned Scale = CmpVT.is128BitVector() ? 4 : 2; | ||||||
4300 | unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm; | ||||||
4301 | unsigned NumElts = CmpVT.getVectorNumElements() * Scale; | ||||||
4302 | CmpVT = MVT::getVectorVT(CmpSVT, NumElts); | ||||||
4303 | MaskVT = MVT::getVectorVT(MVT::i1, NumElts); | ||||||
4304 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl, | ||||||
4305 | CmpVT), 0); | ||||||
4306 | Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0); | ||||||
4307 | |||||||
4308 | assert(!FoldedLoad && "Shouldn't have folded the load")((!FoldedLoad && "Shouldn't have folded the load") ? static_cast <void> (0) : __assert_fail ("!FoldedLoad && \"Shouldn't have folded the load\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4308, __PRETTY_FUNCTION__)); | ||||||
4309 | if (!FoldedBCast) | ||||||
4310 | Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1); | ||||||
4311 | |||||||
4312 | if (IsMasked) { | ||||||
4313 | // Widen the mask. | ||||||
4314 | unsigned RegClass = getMaskRC(MaskVT); | ||||||
4315 | SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); | ||||||
4316 | InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, | ||||||
4317 | dl, MaskVT, InMask, RC), 0); | ||||||
4318 | } | ||||||
4319 | } | ||||||
4320 | |||||||
4321 | bool IsTestN = CC == ISD::SETEQ; | ||||||
4322 | unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast, | ||||||
4323 | IsMasked); | ||||||
4324 | |||||||
4325 | MachineSDNode *CNode; | ||||||
4326 | if (FoldedLoad || FoldedBCast) { | ||||||
4327 | SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other); | ||||||
4328 | |||||||
4329 | if (IsMasked) { | ||||||
4330 | SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, | ||||||
4331 | Load.getOperand(0) }; | ||||||
4332 | CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); | ||||||
4333 | } else { | ||||||
4334 | SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, | ||||||
4335 | Load.getOperand(0) }; | ||||||
4336 | CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); | ||||||
4337 | } | ||||||
4338 | |||||||
4339 | // Update the chain. | ||||||
4340 | ReplaceUses(Load.getValue(1), SDValue(CNode, 1)); | ||||||
4341 | // Record the mem-refs | ||||||
4342 | CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Load)->getMemOperand()}); | ||||||
4343 | } else { | ||||||
4344 | if (IsMasked) | ||||||
4345 | CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1); | ||||||
4346 | else | ||||||
4347 | CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1); | ||||||
4348 | } | ||||||
4349 | |||||||
4350 | // If we widened, we need to shrink the mask VT. | ||||||
4351 | if (Widen) { | ||||||
4352 | unsigned RegClass = getMaskRC(ResVT); | ||||||
4353 | SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); | ||||||
4354 | CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, | ||||||
4355 | dl, ResVT, SDValue(CNode, 0), RC); | ||||||
4356 | } | ||||||
4357 | |||||||
4358 | ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0)); | ||||||
4359 | CurDAG->RemoveDeadNode(Root); | ||||||
4360 | return true; | ||||||
4361 | } | ||||||
4362 | |||||||
4363 | // Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it | ||||||
4364 | // into vpternlog. | ||||||
4365 | bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { | ||||||
4366 | assert(N->getOpcode() == ISD::OR && "Unexpected opcode!")((N->getOpcode() == ISD::OR && "Unexpected opcode!" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Unexpected opcode!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4366, __PRETTY_FUNCTION__)); | ||||||
4367 | |||||||
4368 | MVT NVT = N->getSimpleValueType(0); | ||||||
4369 | |||||||
4370 | // Make sure we support VPTERNLOG. | ||||||
4371 | if (!NVT.isVector() || !Subtarget->hasAVX512()) | ||||||
4372 | return false; | ||||||
4373 | |||||||
4374 | // We need VLX for 128/256-bit. | ||||||
4375 | if (!(Subtarget->hasVLX() || NVT.is512BitVector())) | ||||||
4376 | return false; | ||||||
4377 | |||||||
4378 | SDValue N0 = N->getOperand(0); | ||||||
4379 | SDValue N1 = N->getOperand(1); | ||||||
4380 | |||||||
4381 | // Canonicalize AND to LHS. | ||||||
4382 | if (N1.getOpcode() == ISD::AND) | ||||||
4383 | std::swap(N0, N1); | ||||||
4384 | |||||||
4385 | if (N0.getOpcode() != ISD::AND || | ||||||
4386 | N1.getOpcode() != X86ISD::ANDNP || | ||||||
4387 | !N0.hasOneUse() || !N1.hasOneUse()) | ||||||
4388 | return false; | ||||||
4389 | |||||||
4390 | // ANDN is not commutable, use it to pick down A and C. | ||||||
4391 | SDValue A = N1.getOperand(0); | ||||||
4392 | SDValue C = N1.getOperand(1); | ||||||
4393 | |||||||
4394 | // AND is commutable, if one operand matches A, the other operand is B. | ||||||
4395 | // Otherwise this isn't a match. | ||||||
4396 | SDValue B; | ||||||
4397 | if (N0.getOperand(0) == A) | ||||||
4398 | B = N0.getOperand(1); | ||||||
4399 | else if (N0.getOperand(1) == A) | ||||||
4400 | B = N0.getOperand(0); | ||||||
4401 | else | ||||||
4402 | return false; | ||||||
4403 | |||||||
4404 | SDLoc dl(N); | ||||||
4405 | SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8); | ||||||
4406 | SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm); | ||||||
4407 | ReplaceNode(N, Ternlog.getNode()); | ||||||
4408 | SelectCode(Ternlog.getNode()); | ||||||
4409 | return true; | ||||||
4410 | } | ||||||
4411 | |||||||
4412 | void X86DAGToDAGISel::Select(SDNode *Node) { | ||||||
4413 | MVT NVT = Node->getSimpleValueType(0); | ||||||
4414 | unsigned Opcode = Node->getOpcode(); | ||||||
4415 | SDLoc dl(Node); | ||||||
4416 | |||||||
4417 | if (Node->isMachineOpcode()) { | ||||||
4418 | LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
4419 | Node->setNodeId(-1); | ||||||
4420 | return; // Already selected. | ||||||
4421 | } | ||||||
4422 | |||||||
4423 | switch (Opcode) { | ||||||
4424 | default: break; | ||||||
4425 | case ISD::INTRINSIC_VOID: { | ||||||
4426 | unsigned IntNo = Node->getConstantOperandVal(1); | ||||||
4427 | switch (IntNo) { | ||||||
4428 | default: break; | ||||||
4429 | case Intrinsic::x86_sse3_monitor: | ||||||
4430 | case Intrinsic::x86_monitorx: | ||||||
4431 | case Intrinsic::x86_clzero: { | ||||||
4432 | bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64; | ||||||
4433 | |||||||
4434 | unsigned Opc = 0; | ||||||
4435 | switch (IntNo) { | ||||||
4436 | default: llvm_unreachable("Unexpected intrinsic!")::llvm::llvm_unreachable_internal("Unexpected intrinsic!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4436); | ||||||
4437 | case Intrinsic::x86_sse3_monitor: | ||||||
4438 | if (!Subtarget->hasSSE3()) | ||||||
4439 | break; | ||||||
4440 | Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr; | ||||||
4441 | break; | ||||||
4442 | case Intrinsic::x86_monitorx: | ||||||
4443 | if (!Subtarget->hasMWAITX()) | ||||||
4444 | break; | ||||||
4445 | Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr; | ||||||
4446 | break; | ||||||
4447 | case Intrinsic::x86_clzero: | ||||||
4448 | if (!Subtarget->hasCLZERO()) | ||||||
4449 | break; | ||||||
4450 | Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r; | ||||||
4451 | break; | ||||||
4452 | } | ||||||
4453 | |||||||
4454 | if (Opc) { | ||||||
4455 | unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX; | ||||||
4456 | SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg, | ||||||
4457 | Node->getOperand(2), SDValue()); | ||||||
4458 | SDValue InFlag = Chain.getValue(1); | ||||||
4459 | |||||||
4460 | if (IntNo == Intrinsic::x86_sse3_monitor || | ||||||
4461 | IntNo == Intrinsic::x86_monitorx) { | ||||||
4462 | // Copy the other two operands to ECX and EDX. | ||||||
4463 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3), | ||||||
4464 | InFlag); | ||||||
4465 | InFlag = Chain.getValue(1); | ||||||
4466 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4), | ||||||
4467 | InFlag); | ||||||
4468 | InFlag = Chain.getValue(1); | ||||||
4469 | } | ||||||
4470 | |||||||
4471 | MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, | ||||||
4472 | { Chain, InFlag}); | ||||||
4473 | ReplaceNode(Node, CNode); | ||||||
4474 | return; | ||||||
4475 | } | ||||||
4476 | |||||||
4477 | break; | ||||||
4478 | } | ||||||
4479 | } | ||||||
4480 | |||||||
4481 | break; | ||||||
4482 | } | ||||||
4483 | case ISD::BRIND: { | ||||||
4484 | if (Subtarget->isTargetNaCl()) | ||||||
4485 | // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We | ||||||
4486 | // leave the instruction alone. | ||||||
4487 | break; | ||||||
4488 | if (Subtarget->isTarget64BitILP32()) { | ||||||
4489 | // Converts a 32-bit register to a 64-bit, zero-extended version of | ||||||
4490 | // it. This is needed because x86-64 can do many things, but jmp %r32 | ||||||
4491 | // ain't one of them. | ||||||
4492 | const SDValue &Target = Node->getOperand(1); | ||||||
4493 | assert(Target.getSimpleValueType() == llvm::MVT::i32)((Target.getSimpleValueType() == llvm::MVT::i32) ? static_cast <void> (0) : __assert_fail ("Target.getSimpleValueType() == llvm::MVT::i32" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4493, __PRETTY_FUNCTION__)); | ||||||
4494 | SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64)); | ||||||
4495 | SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other, | ||||||
4496 | Node->getOperand(0), ZextTarget); | ||||||
4497 | ReplaceNode(Node, Brind.getNode()); | ||||||
4498 | SelectCode(ZextTarget.getNode()); | ||||||
4499 | SelectCode(Brind.getNode()); | ||||||
4500 | return; | ||||||
4501 | } | ||||||
4502 | break; | ||||||
4503 | } | ||||||
4504 | case X86ISD::GlobalBaseReg: | ||||||
4505 | ReplaceNode(Node, getGlobalBaseReg()); | ||||||
4506 | return; | ||||||
4507 | |||||||
4508 | case ISD::BITCAST: | ||||||
4509 | // Just drop all 128/256/512-bit bitcasts. | ||||||
4510 | if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() || | ||||||
4511 | NVT == MVT::f128) { | ||||||
4512 | ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); | ||||||
4513 | CurDAG->RemoveDeadNode(Node); | ||||||
4514 | return; | ||||||
4515 | } | ||||||
4516 | break; | ||||||
4517 | |||||||
4518 | case ISD::VSELECT: { | ||||||
4519 | // Replace VSELECT with non-mask conditions with with BLENDV. | ||||||
4520 | if (Node->getOperand(0).getValueType().getVectorElementType() == MVT::i1) | ||||||
4521 | break; | ||||||
4522 | |||||||
4523 | assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!")((Subtarget->hasSSE41() && "Expected SSE4.1 support!" ) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasSSE41() && \"Expected SSE4.1 support!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4523, __PRETTY_FUNCTION__)); | ||||||
4524 | SDValue Blendv = CurDAG->getNode( | ||||||
4525 | X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0), | ||||||
4526 | Node->getOperand(1), Node->getOperand(2)); | ||||||
4527 | ReplaceNode(Node, Blendv.getNode()); | ||||||
4528 | SelectCode(Blendv.getNode()); | ||||||
4529 | // We already called ReplaceUses. | ||||||
4530 | return; | ||||||
4531 | } | ||||||
4532 | |||||||
4533 | case ISD::SRL: | ||||||
4534 | if (matchBitExtract(Node)) | ||||||
4535 | return; | ||||||
4536 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
4537 | case ISD::SRA: | ||||||
4538 | case ISD::SHL: | ||||||
4539 | if (tryShiftAmountMod(Node)) | ||||||
4540 | return; | ||||||
4541 | break; | ||||||
4542 | |||||||
4543 | case ISD::AND: | ||||||
4544 | if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) { | ||||||
4545 | // Try to form a masked VPTESTM. Operands can be in either order. | ||||||
4546 | SDValue N0 = Node->getOperand(0); | ||||||
4547 | SDValue N1 = Node->getOperand(1); | ||||||
4548 | if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() && | ||||||
4549 | tryVPTESTM(Node, N0, N1)) | ||||||
4550 | return; | ||||||
4551 | if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && | ||||||
4552 | tryVPTESTM(Node, N1, N0)) | ||||||
4553 | return; | ||||||
4554 | } | ||||||
4555 | |||||||
4556 | if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) { | ||||||
4557 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); | ||||||
4558 | CurDAG->RemoveDeadNode(Node); | ||||||
4559 | return; | ||||||
4560 | } | ||||||
4561 | if (matchBitExtract(Node)) | ||||||
4562 | return; | ||||||
4563 | if (AndImmShrink && shrinkAndImmediate(Node)) | ||||||
4564 | return; | ||||||
4565 | |||||||
4566 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
4567 | case ISD::OR: | ||||||
4568 | case ISD::XOR: | ||||||
4569 | if (tryShrinkShlLogicImm(Node)) | ||||||
4570 | return; | ||||||
4571 | |||||||
4572 | if (Opcode == ISD::OR && tryMatchBitSelect(Node)) | ||||||
4573 | return; | ||||||
4574 | |||||||
4575 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
4576 | case ISD::ADD: | ||||||
4577 | case ISD::SUB: { | ||||||
4578 | // Try to avoid folding immediates with multiple uses for optsize. | ||||||
4579 | // This code tries to select to register form directly to avoid going | ||||||
4580 | // through the isel table which might fold the immediate. We can't change | ||||||
4581 | // the patterns on the add/sub/and/or/xor with immediate paterns in the | ||||||
4582 | // tablegen files to check immediate use count without making the patterns | ||||||
4583 | // unavailable to the fast-isel table. | ||||||
4584 | if (!OptForSize) | ||||||
4585 | break; | ||||||
4586 | |||||||
4587 | // Only handle i8/i16/i32/i64. | ||||||
4588 | if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64) | ||||||
4589 | break; | ||||||
4590 | |||||||
4591 | SDValue N0 = Node->getOperand(0); | ||||||
4592 | SDValue N1 = Node->getOperand(1); | ||||||
4593 | |||||||
4594 | ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); | ||||||
4595 | if (!Cst) | ||||||
4596 | break; | ||||||
4597 | |||||||
4598 | int64_t Val = Cst->getSExtValue(); | ||||||
4599 | |||||||
4600 | // Make sure its an immediate that is considered foldable. | ||||||
4601 | // FIXME: Handle unsigned 32 bit immediates for 64-bit AND. | ||||||
4602 | if (!isInt<8>(Val) && !isInt<32>(Val)) | ||||||
4603 | break; | ||||||
4604 | |||||||
4605 | // If this can match to INC/DEC, let it go. | ||||||
4606 | if (Opcode == ISD::ADD && (Val == 1 || Val == -1)) | ||||||
4607 | break; | ||||||
4608 | |||||||
4609 | // Check if we should avoid folding this immediate. | ||||||
4610 | if (!shouldAvoidImmediateInstFormsForSize(N1.getNode())) | ||||||
4611 | break; | ||||||
4612 | |||||||
4613 | // We should not fold the immediate. So we need a register form instead. | ||||||
4614 | unsigned ROpc, MOpc; | ||||||
4615 | switch (NVT.SimpleTy) { | ||||||
4616 | default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4616); | ||||||
4617 | case MVT::i8: | ||||||
4618 | switch (Opcode) { | ||||||
4619 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4619); | ||||||
4620 | case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break; | ||||||
4621 | case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break; | ||||||
4622 | case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break; | ||||||
4623 | case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break; | ||||||
4624 | case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break; | ||||||
4625 | } | ||||||
4626 | break; | ||||||
4627 | case MVT::i16: | ||||||
4628 | switch (Opcode) { | ||||||
4629 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4629); | ||||||
4630 | case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break; | ||||||
4631 | case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break; | ||||||
4632 | case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break; | ||||||
4633 | case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break; | ||||||
4634 | case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break; | ||||||
4635 | } | ||||||
4636 | break; | ||||||
4637 | case MVT::i32: | ||||||
4638 | switch (Opcode) { | ||||||
4639 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4639); | ||||||
4640 | case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break; | ||||||
4641 | case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break; | ||||||
4642 | case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break; | ||||||
4643 | case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break; | ||||||
4644 | case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break; | ||||||
4645 | } | ||||||
4646 | break; | ||||||
4647 | case MVT::i64: | ||||||
4648 | switch (Opcode) { | ||||||
4649 | default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4649); | ||||||
4650 | case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break; | ||||||
4651 | case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break; | ||||||
4652 | case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break; | ||||||
4653 | case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break; | ||||||
4654 | case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break; | ||||||
4655 | } | ||||||
4656 | break; | ||||||
4657 | } | ||||||
4658 | |||||||
4659 | // Ok this is a AND/OR/XOR/ADD/SUB with constant. | ||||||
4660 | |||||||
4661 | // If this is a not a subtract, we can still try to fold a load. | ||||||
4662 | if (Opcode != ISD::SUB) { | ||||||
4663 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
4664 | if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
4665 | SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; | ||||||
4666 | SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); | ||||||
4667 | MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
4668 | // Update the chain. | ||||||
4669 | ReplaceUses(N0.getValue(1), SDValue(CNode, 2)); | ||||||
4670 | // Record the mem-refs | ||||||
4671 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()}); | ||||||
4672 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); | ||||||
4673 | CurDAG->RemoveDeadNode(Node); | ||||||
4674 | return; | ||||||
4675 | } | ||||||
4676 | } | ||||||
4677 | |||||||
4678 | CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1); | ||||||
4679 | return; | ||||||
4680 | } | ||||||
4681 | |||||||
4682 | case X86ISD::SMUL: | ||||||
4683 | // i16/i32/i64 are handled with isel patterns. | ||||||
4684 | if (NVT != MVT::i8) | ||||||
4685 | break; | ||||||
4686 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||
4687 | case X86ISD::UMUL: { | ||||||
4688 | SDValue N0 = Node->getOperand(0); | ||||||
4689 | SDValue N1 = Node->getOperand(1); | ||||||
4690 | |||||||
4691 | unsigned LoReg, ROpc, MOpc; | ||||||
4692 | switch (NVT.SimpleTy) { | ||||||
4693 | default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4693); | ||||||
4694 | case MVT::i8: | ||||||
4695 | LoReg = X86::AL; | ||||||
4696 | ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r; | ||||||
4697 | MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m; | ||||||
4698 | break; | ||||||
4699 | case MVT::i16: | ||||||
4700 | LoReg = X86::AX; | ||||||
4701 | ROpc = X86::MUL16r; | ||||||
4702 | MOpc = X86::MUL16m; | ||||||
4703 | break; | ||||||
4704 | case MVT::i32: | ||||||
4705 | LoReg = X86::EAX; | ||||||
4706 | ROpc = X86::MUL32r; | ||||||
4707 | MOpc = X86::MUL32m; | ||||||
4708 | break; | ||||||
4709 | case MVT::i64: | ||||||
4710 | LoReg = X86::RAX; | ||||||
4711 | ROpc = X86::MUL64r; | ||||||
4712 | MOpc = X86::MUL64m; | ||||||
4713 | break; | ||||||
4714 | } | ||||||
4715 | |||||||
4716 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
4717 | bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
4718 | // Multiply is commmutative. | ||||||
4719 | if (!FoldedLoad) { | ||||||
4720 | FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
4721 | if (FoldedLoad) | ||||||
4722 | std::swap(N0, N1); | ||||||
4723 | } | ||||||
4724 | |||||||
4725 | SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, | ||||||
4726 | N0, SDValue()).getValue(1); | ||||||
4727 | |||||||
4728 | MachineSDNode *CNode; | ||||||
4729 | if (FoldedLoad) { | ||||||
4730 | // i16/i32/i64 use an instruction that produces a low and high result even | ||||||
4731 | // though only the low result is used. | ||||||
4732 | SDVTList VTs; | ||||||
4733 | if (NVT == MVT::i8) | ||||||
4734 | VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); | ||||||
4735 | else | ||||||
4736 | VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other); | ||||||
4737 | |||||||
4738 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), | ||||||
4739 | InFlag }; | ||||||
4740 | CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
4741 | |||||||
4742 | // Update the chain. | ||||||
4743 | ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3)); | ||||||
4744 | // Record the mem-refs | ||||||
4745 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); | ||||||
4746 | } else { | ||||||
4747 | // i16/i32/i64 use an instruction that produces a low and high result even | ||||||
4748 | // though only the low result is used. | ||||||
4749 | SDVTList VTs; | ||||||
4750 | if (NVT == MVT::i8) | ||||||
4751 | VTs = CurDAG->getVTList(NVT, MVT::i32); | ||||||
4752 | else | ||||||
4753 | VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); | ||||||
4754 | |||||||
4755 | CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag}); | ||||||
4756 | } | ||||||
4757 | |||||||
4758 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); | ||||||
4759 | ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2)); | ||||||
4760 | CurDAG->RemoveDeadNode(Node); | ||||||
4761 | return; | ||||||
4762 | } | ||||||
4763 | |||||||
4764 | case ISD::SMUL_LOHI: | ||||||
4765 | case ISD::UMUL_LOHI: { | ||||||
4766 | SDValue N0 = Node->getOperand(0); | ||||||
4767 | SDValue N1 = Node->getOperand(1); | ||||||
4768 | |||||||
4769 | unsigned Opc, MOpc; | ||||||
4770 | unsigned LoReg, HiReg; | ||||||
4771 | bool IsSigned = Opcode == ISD::SMUL_LOHI; | ||||||
4772 | switch (NVT.SimpleTy) { | ||||||
4773 | default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4773); | ||||||
4774 | case MVT::i32: | ||||||
4775 | Opc = IsSigned ? X86::IMUL32r : X86::MUL32r; | ||||||
4776 | MOpc = IsSigned ? X86::IMUL32m : X86::MUL32m; | ||||||
4777 | LoReg = X86::EAX; HiReg = X86::EDX; | ||||||
4778 | break; | ||||||
4779 | case MVT::i64: | ||||||
4780 | Opc = IsSigned ? X86::IMUL64r : X86::MUL64r; | ||||||
4781 | MOpc = IsSigned ? X86::IMUL64m : X86::MUL64m; | ||||||
4782 | LoReg = X86::RAX; HiReg = X86::RDX; | ||||||
4783 | break; | ||||||
4784 | } | ||||||
4785 | |||||||
4786 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
4787 | bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
4788 | // Multiply is commmutative. | ||||||
4789 | if (!foldedLoad) { | ||||||
4790 | foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
4791 | if (foldedLoad) | ||||||
4792 | std::swap(N0, N1); | ||||||
4793 | } | ||||||
4794 | |||||||
4795 | SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, | ||||||
4796 | N0, SDValue()).getValue(1); | ||||||
4797 | if (foldedLoad) { | ||||||
4798 | SDValue Chain; | ||||||
4799 | MachineSDNode *CNode = nullptr; | ||||||
4800 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), | ||||||
4801 | InFlag }; | ||||||
4802 | SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); | ||||||
4803 | CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); | ||||||
4804 | Chain = SDValue(CNode, 0); | ||||||
4805 | InFlag = SDValue(CNode, 1); | ||||||
4806 | |||||||
4807 | // Update the chain. | ||||||
4808 | ReplaceUses(N1.getValue(1), Chain); | ||||||
4809 | // Record the mem-refs | ||||||
4810 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); | ||||||
4811 | } else { | ||||||
4812 | SDValue Ops[] = { N1, InFlag }; | ||||||
4813 | SDVTList VTs = CurDAG->getVTList(MVT::Glue); | ||||||
4814 | SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); | ||||||
4815 | InFlag = SDValue(CNode, 0); | ||||||
4816 | } | ||||||
4817 | |||||||
4818 | // Copy the low half of the result, if it is needed. | ||||||
4819 | if (!SDValue(Node, 0).use_empty()) { | ||||||
4820 | assert(LoReg && "Register for low half is not defined!")((LoReg && "Register for low half is not defined!") ? static_cast<void> (0) : __assert_fail ("LoReg && \"Register for low half is not defined!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4820, __PRETTY_FUNCTION__)); | ||||||
4821 | SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, | ||||||
4822 | NVT, InFlag); | ||||||
4823 | InFlag = ResLo.getValue(2); | ||||||
4824 | ReplaceUses(SDValue(Node, 0), ResLo); | ||||||
4825 | LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; ResLo.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false) | ||||||
4826 | dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; ResLo.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
4827 | } | ||||||
4828 | // Copy the high half of the result, if it is needed. | ||||||
4829 | if (!SDValue(Node, 1).use_empty()) { | ||||||
4830 | assert(HiReg && "Register for high half is not defined!")((HiReg && "Register for high half is not defined!") ? static_cast<void> (0) : __assert_fail ("HiReg && \"Register for high half is not defined!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4830, __PRETTY_FUNCTION__)); | ||||||
4831 | SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, | ||||||
4832 | NVT, InFlag); | ||||||
4833 | InFlag = ResHi.getValue(2); | ||||||
4834 | ReplaceUses(SDValue(Node, 1), ResHi); | ||||||
4835 | LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; ResHi.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false) | ||||||
4836 | dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; ResHi.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
4837 | } | ||||||
4838 | |||||||
4839 | CurDAG->RemoveDeadNode(Node); | ||||||
4840 | return; | ||||||
4841 | } | ||||||
4842 | |||||||
4843 | case ISD::SDIVREM: | ||||||
4844 | case ISD::UDIVREM: { | ||||||
4845 | SDValue N0 = Node->getOperand(0); | ||||||
4846 | SDValue N1 = Node->getOperand(1); | ||||||
4847 | |||||||
4848 | unsigned ROpc, MOpc; | ||||||
4849 | bool isSigned = Opcode == ISD::SDIVREM; | ||||||
4850 | if (!isSigned) { | ||||||
4851 | switch (NVT.SimpleTy) { | ||||||
4852 | default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4852); | ||||||
4853 | case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break; | ||||||
4854 | case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break; | ||||||
4855 | case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break; | ||||||
4856 | case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break; | ||||||
4857 | } | ||||||
4858 | } else { | ||||||
4859 | switch (NVT.SimpleTy) { | ||||||
4860 | default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4860); | ||||||
4861 | case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break; | ||||||
4862 | case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break; | ||||||
4863 | case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break; | ||||||
4864 | case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break; | ||||||
4865 | } | ||||||
4866 | } | ||||||
4867 | |||||||
4868 | unsigned LoReg, HiReg, ClrReg; | ||||||
4869 | unsigned SExtOpcode; | ||||||
4870 | switch (NVT.SimpleTy) { | ||||||
4871 | default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4871); | ||||||
4872 | case MVT::i8: | ||||||
4873 | LoReg = X86::AL; ClrReg = HiReg = X86::AH; | ||||||
4874 | SExtOpcode = 0; // Not used. | ||||||
4875 | break; | ||||||
4876 | case MVT::i16: | ||||||
4877 | LoReg = X86::AX; HiReg = X86::DX; | ||||||
4878 | ClrReg = X86::DX; | ||||||
4879 | SExtOpcode = X86::CWD; | ||||||
4880 | break; | ||||||
4881 | case MVT::i32: | ||||||
4882 | LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; | ||||||
4883 | SExtOpcode = X86::CDQ; | ||||||
4884 | break; | ||||||
4885 | case MVT::i64: | ||||||
4886 | LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; | ||||||
4887 | SExtOpcode = X86::CQO; | ||||||
4888 | break; | ||||||
4889 | } | ||||||
4890 | |||||||
4891 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
4892 | bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); | ||||||
4893 | bool signBitIsZero = CurDAG->SignBitIsZero(N0); | ||||||
4894 | |||||||
4895 | SDValue InFlag; | ||||||
4896 | if (NVT == MVT::i8) { | ||||||
4897 | // Special case for div8, just use a move with zero extension to AX to | ||||||
4898 | // clear the upper 8 bits (AH). | ||||||
4899 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain; | ||||||
4900 | MachineSDNode *Move; | ||||||
4901 | if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
4902 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; | ||||||
4903 | unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8 | ||||||
4904 | : X86::MOVZX16rm8; | ||||||
4905 | Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops); | ||||||
4906 | Chain = SDValue(Move, 1); | ||||||
4907 | ReplaceUses(N0.getValue(1), Chain); | ||||||
4908 | // Record the mem-refs | ||||||
4909 | CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()}); | ||||||
4910 | } else { | ||||||
4911 | unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8 | ||||||
4912 | : X86::MOVZX16rr8; | ||||||
4913 | Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0); | ||||||
4914 | Chain = CurDAG->getEntryNode(); | ||||||
4915 | } | ||||||
4916 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0), | ||||||
4917 | SDValue()); | ||||||
4918 | InFlag = Chain.getValue(1); | ||||||
4919 | } else { | ||||||
4920 | InFlag = | ||||||
4921 | CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, | ||||||
4922 | LoReg, N0, SDValue()).getValue(1); | ||||||
4923 | if (isSigned && !signBitIsZero) { | ||||||
4924 | // Sign extend the low part into the high part. | ||||||
4925 | InFlag = | ||||||
4926 | SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); | ||||||
4927 | } else { | ||||||
4928 | // Zero out the high part, effectively zero extending the input. | ||||||
4929 | SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); | ||||||
4930 | SDValue ClrNode = | ||||||
4931 | SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0); | ||||||
4932 | switch (NVT.SimpleTy) { | ||||||
4933 | case MVT::i16: | ||||||
4934 | ClrNode = | ||||||
4935 | SDValue(CurDAG->getMachineNode( | ||||||
4936 | TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, | ||||||
4937 | CurDAG->getTargetConstant(X86::sub_16bit, dl, | ||||||
4938 | MVT::i32)), | ||||||
4939 | 0); | ||||||
4940 | break; | ||||||
4941 | case MVT::i32: | ||||||
4942 | break; | ||||||
4943 | case MVT::i64: | ||||||
4944 | ClrNode = | ||||||
4945 | SDValue(CurDAG->getMachineNode( | ||||||
4946 | TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, | ||||||
4947 | CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode, | ||||||
4948 | CurDAG->getTargetConstant(X86::sub_32bit, dl, | ||||||
4949 | MVT::i32)), | ||||||
4950 | 0); | ||||||
4951 | break; | ||||||
4952 | default: | ||||||
4953 | llvm_unreachable("Unexpected division source")::llvm::llvm_unreachable_internal("Unexpected division source" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 4953); | ||||||
4954 | } | ||||||
4955 | |||||||
4956 | InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, | ||||||
4957 | ClrNode, InFlag).getValue(1); | ||||||
4958 | } | ||||||
4959 | } | ||||||
4960 | |||||||
4961 | if (foldedLoad) { | ||||||
4962 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), | ||||||
4963 | InFlag }; | ||||||
4964 | MachineSDNode *CNode = | ||||||
4965 | CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); | ||||||
4966 | InFlag = SDValue(CNode, 1); | ||||||
4967 | // Update the chain. | ||||||
4968 | ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); | ||||||
4969 | // Record the mem-refs | ||||||
4970 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); | ||||||
4971 | } else { | ||||||
4972 | InFlag = | ||||||
4973 | SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InFlag), 0); | ||||||
4974 | } | ||||||
4975 | |||||||
4976 | // Prevent use of AH in a REX instruction by explicitly copying it to | ||||||
4977 | // an ABCD_L register. | ||||||
4978 | // | ||||||
4979 | // The current assumption of the register allocator is that isel | ||||||
4980 | // won't generate explicit references to the GR8_ABCD_H registers. If | ||||||
4981 | // the allocator and/or the backend get enhanced to be more robust in | ||||||
4982 | // that regard, this can be, and should be, removed. | ||||||
4983 | if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) { | ||||||
4984 | SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8); | ||||||
4985 | unsigned AHExtOpcode = | ||||||
4986 | isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX; | ||||||
4987 | |||||||
4988 | SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32, | ||||||
4989 | MVT::Glue, AHCopy, InFlag); | ||||||
4990 | SDValue Result(RNode, 0); | ||||||
4991 | InFlag = SDValue(RNode, 1); | ||||||
4992 | |||||||
4993 | Result = | ||||||
4994 | CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); | ||||||
4995 | |||||||
4996 | ReplaceUses(SDValue(Node, 1), Result); | ||||||
4997 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false) | ||||||
4998 | dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
4999 | } | ||||||
5000 | // Copy the division (low) result, if it is needed. | ||||||
5001 | if (!SDValue(Node, 0).use_empty()) { | ||||||
5002 | SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, | ||||||
5003 | LoReg, NVT, InFlag); | ||||||
5004 | InFlag = Result.getValue(2); | ||||||
5005 | ReplaceUses(SDValue(Node, 0), Result); | ||||||
5006 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false) | ||||||
5007 | dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
5008 | } | ||||||
5009 | // Copy the remainder (high) result, if it is needed. | ||||||
5010 | if (!SDValue(Node, 1).use_empty()) { | ||||||
5011 | SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, | ||||||
5012 | HiReg, NVT, InFlag); | ||||||
5013 | InFlag = Result.getValue(2); | ||||||
5014 | ReplaceUses(SDValue(Node, 1), Result); | ||||||
5015 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false) | ||||||
5016 | dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-isel")) { dbgs() << "=> "; Result.getNode()-> dump(CurDAG); dbgs() << '\n'; } } while (false); | ||||||
5017 | } | ||||||
5018 | CurDAG->RemoveDeadNode(Node); | ||||||
5019 | return; | ||||||
5020 | } | ||||||
5021 | |||||||
5022 | case X86ISD::FCMP: | ||||||
5023 | case X86ISD::STRICT_FCMP: | ||||||
5024 | case X86ISD::STRICT_FCMPS: { | ||||||
5025 | bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP || | ||||||
5026 | Node->getOpcode() == X86ISD::STRICT_FCMPS; | ||||||
5027 | SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0); | ||||||
5028 | SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1); | ||||||
5029 | |||||||
5030 | // Save the original VT of the compare. | ||||||
5031 | MVT CmpVT = N0.getSimpleValueType(); | ||||||
5032 | |||||||
5033 | // Floating point needs special handling if we don't have FCOMI. | ||||||
5034 | if (Subtarget->hasCMov()) | ||||||
5035 | break; | ||||||
5036 | |||||||
5037 | bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS; | ||||||
5038 | |||||||
5039 | unsigned Opc; | ||||||
5040 | switch (CmpVT.SimpleTy) { | ||||||
5041 | default: llvm_unreachable("Unexpected type!")::llvm::llvm_unreachable_internal("Unexpected type!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5041); | ||||||
5042 | case MVT::f32: | ||||||
5043 | Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32; | ||||||
5044 | break; | ||||||
5045 | case MVT::f64: | ||||||
5046 | Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64; | ||||||
5047 | break; | ||||||
5048 | case MVT::f80: | ||||||
5049 | Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80; | ||||||
5050 | break; | ||||||
5051 | } | ||||||
5052 | |||||||
5053 | SDValue Cmp; | ||||||
5054 | SDValue Chain = | ||||||
5055 | IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode(); | ||||||
5056 | if (IsStrictCmp) { | ||||||
5057 | SDVTList VTs = CurDAG->getVTList(MVT::i16, MVT::Other); | ||||||
5058 | Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0); | ||||||
5059 | Chain = Cmp.getValue(1); | ||||||
5060 | } else { | ||||||
5061 | Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i16, N0, N1), 0); | ||||||
5062 | } | ||||||
5063 | |||||||
5064 | // Move FPSW to AX. | ||||||
5065 | SDValue FPSW = CurDAG->getCopyToReg(Chain, dl, X86::FPSW, Cmp, SDValue()); | ||||||
5066 | Chain = FPSW; | ||||||
5067 | SDValue FNSTSW = | ||||||
5068 | SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, FPSW, | ||||||
5069 | FPSW.getValue(1)), | ||||||
5070 | 0); | ||||||
5071 | |||||||
5072 | // Extract upper 8-bits of AX. | ||||||
5073 | SDValue Extract = | ||||||
5074 | CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW); | ||||||
5075 | |||||||
5076 | // Move AH into flags. | ||||||
5077 | // Some 64-bit targets lack SAHF support, but they do support FCOMI. | ||||||
5078 | assert(Subtarget->hasLAHFSAHF() &&((Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?" ) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasLAHFSAHF() && \"Target doesn't support SAHF or FCOMI?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5079, __PRETTY_FUNCTION__)) | ||||||
5079 | "Target doesn't support SAHF or FCOMI?")((Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?" ) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasLAHFSAHF() && \"Target doesn't support SAHF or FCOMI?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5079, __PRETTY_FUNCTION__)); | ||||||
5080 | SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue()); | ||||||
5081 | Chain = AH; | ||||||
5082 | SDValue SAHF = SDValue( | ||||||
5083 | CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0); | ||||||
5084 | |||||||
5085 | if (IsStrictCmp) | ||||||
5086 | ReplaceUses(SDValue(Node, 1), Chain); | ||||||
5087 | |||||||
5088 | ReplaceUses(SDValue(Node, 0), SAHF); | ||||||
5089 | CurDAG->RemoveDeadNode(Node); | ||||||
5090 | return; | ||||||
5091 | } | ||||||
5092 | |||||||
5093 | case X86ISD::CMP: { | ||||||
5094 | SDValue N0 = Node->getOperand(0); | ||||||
5095 | SDValue N1 = Node->getOperand(1); | ||||||
5096 | |||||||
5097 | // Optimizations for TEST compares. | ||||||
5098 | if (!isNullConstant(N1)) | ||||||
5099 | break; | ||||||
5100 | |||||||
5101 | // Save the original VT of the compare. | ||||||
5102 | MVT CmpVT = N0.getSimpleValueType(); | ||||||
5103 | |||||||
5104 | // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed | ||||||
5105 | // by a test instruction. The test should be removed later by | ||||||
5106 | // analyzeCompare if we are using only the zero flag. | ||||||
5107 | // TODO: Should we check the users and use the BEXTR flags directly? | ||||||
5108 | if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { | ||||||
5109 | if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) { | ||||||
5110 | unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr | ||||||
5111 | : X86::TEST32rr; | ||||||
5112 | SDValue BEXTR = SDValue(NewNode, 0); | ||||||
5113 | NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR); | ||||||
5114 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); | ||||||
5115 | CurDAG->RemoveDeadNode(Node); | ||||||
5116 | return; | ||||||
5117 | } | ||||||
5118 | } | ||||||
5119 | |||||||
5120 | // We can peek through truncates, but we need to be careful below. | ||||||
5121 | if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) | ||||||
5122 | N0 = N0.getOperand(0); | ||||||
5123 | |||||||
5124 | // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to | ||||||
5125 | // use a smaller encoding. | ||||||
5126 | // Look past the truncate if CMP is the only use of it. | ||||||
5127 | if (N0.getOpcode() == ISD::AND && | ||||||
5128 | N0.getNode()->hasOneUse() && | ||||||
5129 | N0.getValueType() != MVT::i8) { | ||||||
5130 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); | ||||||
5131 | if (!C) break; | ||||||
5132 | uint64_t Mask = C->getZExtValue(); | ||||||
5133 | |||||||
5134 | // Check if we can replace AND+IMM64 with a shift. This is possible for | ||||||
5135 | // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero | ||||||
5136 | // flag. | ||||||
5137 | if (CmpVT == MVT::i64 && !isInt<32>(Mask) && | ||||||
5138 | onlyUsesZeroFlag(SDValue(Node, 0))) { | ||||||
5139 | if (isMask_64(~Mask)) { | ||||||
5140 | unsigned TrailingZeros = countTrailingZeros(Mask); | ||||||
5141 | SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64); | ||||||
5142 | SDValue Shift = | ||||||
5143 | SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32, | ||||||
5144 | N0.getOperand(0), Imm), 0); | ||||||
5145 | MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, | ||||||
5146 | MVT::i32, Shift, Shift); | ||||||
5147 | ReplaceNode(Node, Test); | ||||||
5148 | return; | ||||||
5149 | } | ||||||
5150 | if (isMask_64(Mask)) { | ||||||
5151 | unsigned LeadingZeros = countLeadingZeros(Mask); | ||||||
5152 | SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64); | ||||||
5153 | SDValue Shift = | ||||||
5154 | SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32, | ||||||
5155 | N0.getOperand(0), Imm), 0); | ||||||
5156 | MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, | ||||||
5157 | MVT::i32, Shift, Shift); | ||||||
5158 | ReplaceNode(Node, Test); | ||||||
5159 | return; | ||||||
5160 | } | ||||||
5161 | } | ||||||
5162 | |||||||
5163 | MVT VT; | ||||||
5164 | int SubRegOp; | ||||||
5165 | unsigned ROpc, MOpc; | ||||||
5166 | |||||||
5167 | // For each of these checks we need to be careful if the sign flag is | ||||||
5168 | // being used. It is only safe to use the sign flag in two conditions, | ||||||
5169 | // either the sign bit in the shrunken mask is zero or the final test | ||||||
5170 | // size is equal to the original compare size. | ||||||
5171 | |||||||
5172 | if (isUInt<8>(Mask) && | ||||||
5173 | (!(Mask & 0x80) || CmpVT == MVT::i8 || | ||||||
5174 | hasNoSignFlagUses(SDValue(Node, 0)))) { | ||||||
5175 | // For example, convert "testl %eax, $8" to "testb %al, $8" | ||||||
5176 | VT = MVT::i8; | ||||||
5177 | SubRegOp = X86::sub_8bit; | ||||||
5178 | ROpc = X86::TEST8ri; | ||||||
5179 | MOpc = X86::TEST8mi; | ||||||
5180 | } else if (OptForMinSize && isUInt<16>(Mask) && | ||||||
5181 | (!(Mask & 0x8000) || CmpVT == MVT::i16 || | ||||||
5182 | hasNoSignFlagUses(SDValue(Node, 0)))) { | ||||||
5183 | // For example, "testl %eax, $32776" to "testw %ax, $32776". | ||||||
5184 | // NOTE: We only want to form TESTW instructions if optimizing for | ||||||
5185 | // min size. Otherwise we only save one byte and possibly get a length | ||||||
5186 | // changing prefix penalty in the decoders. | ||||||
5187 | VT = MVT::i16; | ||||||
5188 | SubRegOp = X86::sub_16bit; | ||||||
5189 | ROpc = X86::TEST16ri; | ||||||
5190 | MOpc = X86::TEST16mi; | ||||||
5191 | } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 && | ||||||
5192 | ((!(Mask & 0x80000000) && | ||||||
5193 | // Without minsize 16-bit Cmps can get here so we need to | ||||||
5194 | // be sure we calculate the correct sign flag if needed. | ||||||
5195 | (CmpVT != MVT::i16 || !(Mask & 0x8000))) || | ||||||
5196 | CmpVT == MVT::i32 || | ||||||
5197 | hasNoSignFlagUses(SDValue(Node, 0)))) { | ||||||
5198 | // For example, "testq %rax, $268468232" to "testl %eax, $268468232". | ||||||
5199 | // NOTE: We only want to run that transform if N0 is 32 or 64 bits. | ||||||
5200 | // Otherwize, we find ourselves in a position where we have to do | ||||||
5201 | // promotion. If previous passes did not promote the and, we assume | ||||||
5202 | // they had a good reason not to and do not promote here. | ||||||
5203 | VT = MVT::i32; | ||||||
5204 | SubRegOp = X86::sub_32bit; | ||||||
5205 | ROpc = X86::TEST32ri; | ||||||
5206 | MOpc = X86::TEST32mi; | ||||||
5207 | } else { | ||||||
5208 | // No eligible transformation was found. | ||||||
5209 | break; | ||||||
5210 | } | ||||||
5211 | |||||||
5212 | SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT); | ||||||
5213 | SDValue Reg = N0.getOperand(0); | ||||||
5214 | |||||||
5215 | // Emit a testl or testw. | ||||||
5216 | MachineSDNode *NewNode; | ||||||
5217 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; | ||||||
5218 | if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { | ||||||
5219 | if (auto *LoadN = dyn_cast<LoadSDNode>(N0.getOperand(0).getNode())) { | ||||||
5220 | if (!LoadN->isSimple()) { | ||||||
5221 | unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits(); | ||||||
5222 | if (MOpc == X86::TEST8mi && NumVolBits != 8) | ||||||
5223 | break; | ||||||
5224 | else if (MOpc == X86::TEST16mi && NumVolBits != 16) | ||||||
5225 | break; | ||||||
5226 | else if (MOpc == X86::TEST32mi && NumVolBits != 32) | ||||||
5227 | break; | ||||||
5228 | } | ||||||
5229 | } | ||||||
5230 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, | ||||||
5231 | Reg.getOperand(0) }; | ||||||
5232 | NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops); | ||||||
5233 | // Update the chain. | ||||||
5234 | ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1)); | ||||||
5235 | // Record the mem-refs | ||||||
5236 | CurDAG->setNodeMemRefs(NewNode, | ||||||
5237 | {cast<LoadSDNode>(Reg)->getMemOperand()}); | ||||||
5238 | } else { | ||||||
5239 | // Extract the subregister if necessary. | ||||||
5240 | if (N0.getValueType() != VT) | ||||||
5241 | Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg); | ||||||
5242 | |||||||
5243 | NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm); | ||||||
5244 | } | ||||||
5245 | // Replace CMP with TEST. | ||||||
5246 | ReplaceNode(Node, NewNode); | ||||||
5247 | return; | ||||||
5248 | } | ||||||
5249 | break; | ||||||
5250 | } | ||||||
5251 | case X86ISD::PCMPISTR: { | ||||||
5252 | if (!Subtarget->hasSSE42()) | ||||||
5253 | break; | ||||||
5254 | |||||||
5255 | bool NeedIndex = !SDValue(Node, 0).use_empty(); | ||||||
5256 | bool NeedMask = !SDValue(Node, 1).use_empty(); | ||||||
5257 | // We can't fold a load if we are going to make two instructions. | ||||||
5258 | bool MayFoldLoad = !NeedIndex || !NeedMask; | ||||||
5259 | |||||||
5260 | MachineSDNode *CNode; | ||||||
5261 | if (NeedMask) { | ||||||
5262 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr; | ||||||
5263 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm; | ||||||
5264 | CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node); | ||||||
5265 | ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0)); | ||||||
5266 | } | ||||||
5267 | if (NeedIndex || !NeedMask) { | ||||||
5268 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr; | ||||||
5269 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm; | ||||||
5270 | CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node); | ||||||
5271 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); | ||||||
5272 | } | ||||||
5273 | |||||||
5274 | // Connect the flag usage to the last instruction created. | ||||||
5275 | ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1)); | ||||||
5276 | CurDAG->RemoveDeadNode(Node); | ||||||
5277 | return; | ||||||
5278 | } | ||||||
5279 | case X86ISD::PCMPESTR: { | ||||||
5280 | if (!Subtarget->hasSSE42()) | ||||||
5281 | break; | ||||||
5282 | |||||||
5283 | // Copy the two implicit register inputs. | ||||||
5284 | SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX, | ||||||
5285 | Node->getOperand(1), | ||||||
5286 | SDValue()).getValue(1); | ||||||
5287 | InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, | ||||||
5288 | Node->getOperand(3), InFlag).getValue(1); | ||||||
5289 | |||||||
5290 | bool NeedIndex = !SDValue(Node, 0).use_empty(); | ||||||
5291 | bool NeedMask = !SDValue(Node, 1).use_empty(); | ||||||
5292 | // We can't fold a load if we are going to make two instructions. | ||||||
5293 | bool MayFoldLoad = !NeedIndex || !NeedMask; | ||||||
5294 | |||||||
5295 | MachineSDNode *CNode; | ||||||
5296 | if (NeedMask) { | ||||||
5297 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr; | ||||||
5298 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm; | ||||||
5299 | CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node, | ||||||
5300 | InFlag); | ||||||
5301 | ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0)); | ||||||
5302 | } | ||||||
5303 | if (NeedIndex || !NeedMask) { | ||||||
5304 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr; | ||||||
5305 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm; | ||||||
5306 | CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag); | ||||||
5307 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); | ||||||
5308 | } | ||||||
5309 | // Connect the flag usage to the last instruction created. | ||||||
5310 | ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1)); | ||||||
5311 | CurDAG->RemoveDeadNode(Node); | ||||||
5312 | return; | ||||||
5313 | } | ||||||
5314 | |||||||
5315 | case ISD::SETCC: { | ||||||
5316 | if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue())) | ||||||
5317 | return; | ||||||
5318 | |||||||
5319 | break; | ||||||
5320 | } | ||||||
5321 | |||||||
5322 | case ISD::STORE: | ||||||
5323 | if (foldLoadStoreIntoMemOperand(Node)) | ||||||
5324 | return; | ||||||
5325 | break; | ||||||
5326 | |||||||
5327 | case X86ISD::SETCC_CARRY: { | ||||||
5328 | // We have to do this manually because tblgen will put the eflags copy in | ||||||
5329 | // the wrong place if we use an extract_subreg in the pattern. | ||||||
5330 | MVT VT = Node->getSimpleValueType(0); | ||||||
5331 | |||||||
5332 | // Copy flags to the EFLAGS register and glue it to next node. | ||||||
5333 | SDValue EFLAGS = | ||||||
5334 | CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, | ||||||
5335 | Node->getOperand(1), SDValue()); | ||||||
5336 | |||||||
5337 | // Create a 64-bit instruction if the result is 64-bits otherwise use the | ||||||
5338 | // 32-bit version. | ||||||
5339 | unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r; | ||||||
5340 | MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; | ||||||
5341 | SDValue Result = SDValue( | ||||||
5342 | CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0); | ||||||
5343 | |||||||
5344 | // For less than 32-bits we need to extract from the 32-bit node. | ||||||
5345 | if (VT == MVT::i8 || VT == MVT::i16) { | ||||||
5346 | int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; | ||||||
5347 | Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); | ||||||
5348 | } | ||||||
5349 | |||||||
5350 | ReplaceUses(SDValue(Node, 0), Result); | ||||||
5351 | CurDAG->RemoveDeadNode(Node); | ||||||
5352 | return; | ||||||
5353 | } | ||||||
5354 | case X86ISD::SBB: { | ||||||
5355 | if (isNullConstant(Node->getOperand(0)) && | ||||||
5356 | isNullConstant(Node->getOperand(1))) { | ||||||
5357 | MVT VT = Node->getSimpleValueType(0); | ||||||
5358 | |||||||
5359 | // Create zero. | ||||||
5360 | SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); | ||||||
5361 | SDValue Zero = | ||||||
5362 | SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0); | ||||||
5363 | if (VT == MVT::i64) { | ||||||
5364 | Zero = SDValue( | ||||||
5365 | CurDAG->getMachineNode( | ||||||
5366 | TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, | ||||||
5367 | CurDAG->getTargetConstant(0, dl, MVT::i64), Zero, | ||||||
5368 | CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)), | ||||||
5369 | 0); | ||||||
5370 | } | ||||||
5371 | |||||||
5372 | // Copy flags to the EFLAGS register and glue it to next node. | ||||||
5373 | SDValue EFLAGS = | ||||||
5374 | CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, | ||||||
5375 | Node->getOperand(2), SDValue()); | ||||||
5376 | |||||||
5377 | // Create a 64-bit instruction if the result is 64-bits otherwise use the | ||||||
5378 | // 32-bit version. | ||||||
5379 | unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr; | ||||||
5380 | MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; | ||||||
5381 | VTs = CurDAG->getVTList(SBBVT, MVT::i32); | ||||||
5382 | SDValue Result = | ||||||
5383 | SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {Zero, Zero, EFLAGS, | ||||||
5384 | EFLAGS.getValue(1)}), | ||||||
5385 | 0); | ||||||
5386 | |||||||
5387 | // Replace the flag use. | ||||||
5388 | ReplaceUses(SDValue(Node, 1), Result.getValue(1)); | ||||||
5389 | |||||||
5390 | // Replace the result use. | ||||||
5391 | if (!SDValue(Node, 0).use_empty()) { | ||||||
5392 | // For less than 32-bits we need to extract from the 32-bit node. | ||||||
5393 | if (VT == MVT::i8 || VT == MVT::i16) { | ||||||
5394 | int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; | ||||||
5395 | Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); | ||||||
5396 | } | ||||||
5397 | ReplaceUses(SDValue(Node, 0), Result); | ||||||
5398 | } | ||||||
5399 | |||||||
5400 | CurDAG->RemoveDeadNode(Node); | ||||||
5401 | return; | ||||||
5402 | } | ||||||
5403 | break; | ||||||
5404 | } | ||||||
5405 | case X86ISD::MGATHER: { | ||||||
5406 | auto *Mgt = cast<X86MaskedGatherSDNode>(Node); | ||||||
5407 | SDValue IndexOp = Mgt->getIndex(); | ||||||
5408 | SDValue Mask = Mgt->getMask(); | ||||||
5409 | MVT IndexVT = IndexOp.getSimpleValueType(); | ||||||
5410 | MVT ValueVT = Node->getSimpleValueType(0); | ||||||
5411 | MVT MaskVT = Mask.getSimpleValueType(); | ||||||
5412 | |||||||
5413 | // This is just to prevent crashes if the nodes are malformed somehow. We're | ||||||
5414 | // otherwise only doing loose type checking in here based on type what | ||||||
5415 | // a type constraint would say just like table based isel. | ||||||
5416 | if (!ValueVT.isVector() || !MaskVT.isVector()) | ||||||
5417 | break; | ||||||
5418 | |||||||
5419 | unsigned NumElts = ValueVT.getVectorNumElements(); | ||||||
5420 | MVT ValueSVT = ValueVT.getVectorElementType(); | ||||||
5421 | |||||||
5422 | bool IsFP = ValueSVT.isFloatingPoint(); | ||||||
5423 | unsigned EltSize = ValueSVT.getSizeInBits(); | ||||||
5424 | |||||||
5425 | unsigned Opc = 0; | ||||||
5426 | bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1; | ||||||
5427 | if (AVX512Gather) { | ||||||
5428 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) | ||||||
5429 | Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm; | ||||||
5430 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) | ||||||
5431 | Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm; | ||||||
5432 | else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) | ||||||
5433 | Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm; | ||||||
5434 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) | ||||||
5435 | Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm; | ||||||
5436 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) | ||||||
5437 | Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm; | ||||||
5438 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) | ||||||
5439 | Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm; | ||||||
5440 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) | ||||||
5441 | Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm; | ||||||
5442 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) | ||||||
5443 | Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm; | ||||||
5444 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) | ||||||
5445 | Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm; | ||||||
5446 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) | ||||||
5447 | Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm; | ||||||
5448 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) | ||||||
5449 | Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm; | ||||||
5450 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) | ||||||
5451 | Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm; | ||||||
5452 | } else { | ||||||
5453 | assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() &&((EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger () && "Unexpected mask VT!") ? static_cast<void> (0) : __assert_fail ("EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() && \"Unexpected mask VT!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5454, __PRETTY_FUNCTION__)) | ||||||
5454 | "Unexpected mask VT!")((EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger () && "Unexpected mask VT!") ? static_cast<void> (0) : __assert_fail ("EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() && \"Unexpected mask VT!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5454, __PRETTY_FUNCTION__)); | ||||||
5455 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) | ||||||
5456 | Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm; | ||||||
5457 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) | ||||||
5458 | Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm; | ||||||
5459 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) | ||||||
5460 | Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm; | ||||||
5461 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) | ||||||
5462 | Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm; | ||||||
5463 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) | ||||||
5464 | Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm; | ||||||
5465 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) | ||||||
5466 | Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm; | ||||||
5467 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) | ||||||
5468 | Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm; | ||||||
5469 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) | ||||||
5470 | Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm; | ||||||
5471 | } | ||||||
5472 | |||||||
5473 | if (!Opc) | ||||||
5474 | break; | ||||||
5475 | |||||||
5476 | SDValue Base, Scale, Index, Disp, Segment; | ||||||
5477 | if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(), | ||||||
5478 | Base, Scale, Index, Disp, Segment)) | ||||||
5479 | break; | ||||||
5480 | |||||||
5481 | SDValue PassThru = Mgt->getPassThru(); | ||||||
5482 | SDValue Chain = Mgt->getChain(); | ||||||
5483 | // Gather instructions have a mask output not in the ISD node. | ||||||
5484 | SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other); | ||||||
5485 | |||||||
5486 | MachineSDNode *NewNode; | ||||||
5487 | if (AVX512Gather) { | ||||||
5488 | SDValue Ops[] = {PassThru, Mask, Base, Scale, | ||||||
5489 | Index, Disp, Segment, Chain}; | ||||||
5490 | NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); | ||||||
5491 | } else { | ||||||
5492 | SDValue Ops[] = {PassThru, Base, Scale, Index, | ||||||
5493 | Disp, Segment, Mask, Chain}; | ||||||
5494 | NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); | ||||||
5495 | } | ||||||
5496 | CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()}); | ||||||
5497 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); | ||||||
5498 | ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2)); | ||||||
5499 | CurDAG->RemoveDeadNode(Node); | ||||||
5500 | return; | ||||||
5501 | } | ||||||
5502 | case X86ISD::MSCATTER: { | ||||||
5503 | auto *Sc = cast<X86MaskedScatterSDNode>(Node); | ||||||
5504 | SDValue Value = Sc->getValue(); | ||||||
5505 | SDValue IndexOp = Sc->getIndex(); | ||||||
5506 | MVT IndexVT = IndexOp.getSimpleValueType(); | ||||||
5507 | MVT ValueVT = Value.getSimpleValueType(); | ||||||
5508 | |||||||
5509 | // This is just to prevent crashes if the nodes are malformed somehow. We're | ||||||
5510 | // otherwise only doing loose type checking in here based on type what | ||||||
5511 | // a type constraint would say just like table based isel. | ||||||
5512 | if (!ValueVT.isVector()) | ||||||
5513 | break; | ||||||
5514 | |||||||
5515 | unsigned NumElts = ValueVT.getVectorNumElements(); | ||||||
5516 | MVT ValueSVT = ValueVT.getVectorElementType(); | ||||||
5517 | |||||||
5518 | bool IsFP = ValueSVT.isFloatingPoint(); | ||||||
5519 | unsigned EltSize = ValueSVT.getSizeInBits(); | ||||||
5520 | |||||||
5521 | unsigned Opc; | ||||||
5522 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) | ||||||
5523 | Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr; | ||||||
5524 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) | ||||||
5525 | Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr; | ||||||
5526 | else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) | ||||||
5527 | Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr; | ||||||
5528 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) | ||||||
5529 | Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr; | ||||||
5530 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) | ||||||
5531 | Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr; | ||||||
5532 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) | ||||||
5533 | Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr; | ||||||
5534 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) | ||||||
5535 | Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr; | ||||||
5536 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) | ||||||
5537 | Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr; | ||||||
5538 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) | ||||||
5539 | Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr; | ||||||
5540 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) | ||||||
5541 | Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr; | ||||||
5542 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) | ||||||
5543 | Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr; | ||||||
5544 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) | ||||||
5545 | Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr; | ||||||
5546 | else | ||||||
5547 | break; | ||||||
5548 | |||||||
5549 | SDValue Base, Scale, Index, Disp, Segment; | ||||||
5550 | if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(), | ||||||
5551 | Base, Scale, Index, Disp, Segment)) | ||||||
5552 | break; | ||||||
5553 | |||||||
5554 | SDValue Mask = Sc->getMask(); | ||||||
5555 | SDValue Chain = Sc->getChain(); | ||||||
5556 | // Scatter instructions have a mask output not in the ISD node. | ||||||
5557 | SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other); | ||||||
5558 | SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain}; | ||||||
5559 | |||||||
5560 | MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); | ||||||
5561 | CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()}); | ||||||
5562 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1)); | ||||||
5563 | CurDAG->RemoveDeadNode(Node); | ||||||
5564 | return; | ||||||
5565 | } | ||||||
5566 | } | ||||||
5567 | |||||||
5568 | SelectCode(Node); | ||||||
5569 | } | ||||||
5570 | |||||||
5571 | bool X86DAGToDAGISel:: | ||||||
5572 | SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, | ||||||
5573 | std::vector<SDValue> &OutOps) { | ||||||
5574 | SDValue Op0, Op1, Op2, Op3, Op4; | ||||||
5575 | switch (ConstraintID) { | ||||||
5576 | default: | ||||||
5577 | llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp" , 5577); | ||||||
5578 | case InlineAsm::Constraint_o: // offsetable ?? | ||||||
5579 | case InlineAsm::Constraint_v: // not offsetable ?? | ||||||
5580 | case InlineAsm::Constraint_m: // memory | ||||||
5581 | case InlineAsm::Constraint_X: | ||||||
5582 | if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) | ||||||
5583 | return true; | ||||||
5584 | break; | ||||||
5585 | } | ||||||
5586 | |||||||
5587 | OutOps.push_back(Op0); | ||||||
5588 | OutOps.push_back(Op1); | ||||||
5589 | OutOps.push_back(Op2); | ||||||
5590 | OutOps.push_back(Op3); | ||||||
5591 | OutOps.push_back(Op4); | ||||||
5592 | return false; | ||||||
5593 | } | ||||||
5594 | |||||||
5595 | /// This pass converts a legalized DAG into a X86-specific DAG, | ||||||
5596 | /// ready for instruction scheduling. | ||||||
5597 | FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, | ||||||
5598 | CodeGenOpt::Level OptLevel) { | ||||||
5599 | return new X86DAGToDAGISel(TM, OptLevel); | ||||||
5600 | } |
1 | //===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the isa<X>(), cast<X>(), dyn_cast<X>(), cast_or_null<X>(), |
10 | // and dyn_cast_or_null<X>() templates. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_SUPPORT_CASTING_H |
15 | #define LLVM_SUPPORT_CASTING_H |
16 | |
17 | #include "llvm/Support/Compiler.h" |
18 | #include "llvm/Support/type_traits.h" |
19 | #include <cassert> |
20 | #include <memory> |
21 | #include <type_traits> |
22 | |
23 | namespace llvm { |
24 | |
25 | //===----------------------------------------------------------------------===// |
26 | // isa<x> Support Templates |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | // Define a template that can be specialized by smart pointers to reflect the |
30 | // fact that they are automatically dereferenced, and are not involved with the |
31 | // template selection process... the default implementation is a noop. |
32 | // |
33 | template<typename From> struct simplify_type { |
34 | using SimpleType = From; // The real type this represents... |
35 | |
36 | // An accessor to get the real value... |
37 | static SimpleType &getSimplifiedValue(From &Val) { return Val; } |
38 | }; |
39 | |
40 | template<typename From> struct simplify_type<const From> { |
41 | using NonConstSimpleType = typename simplify_type<From>::SimpleType; |
42 | using SimpleType = |
43 | typename add_const_past_pointer<NonConstSimpleType>::type; |
44 | using RetType = |
45 | typename add_lvalue_reference_if_not_pointer<SimpleType>::type; |
46 | |
47 | static RetType getSimplifiedValue(const From& Val) { |
48 | return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val)); |
49 | } |
50 | }; |
51 | |
52 | // The core of the implementation of isa<X> is here; To and From should be |
53 | // the names of classes. This template can be specialized to customize the |
54 | // implementation of isa<> without rewriting it from scratch. |
55 | template <typename To, typename From, typename Enabler = void> |
56 | struct isa_impl { |
57 | static inline bool doit(const From &Val) { |
58 | return To::classof(&Val); |
59 | } |
60 | }; |
61 | |
62 | /// Always allow upcasts, and perform no dynamic check for them. |
63 | template <typename To, typename From> |
64 | struct isa_impl<To, From, std::enable_if_t<std::is_base_of<To, From>::value>> { |
65 | static inline bool doit(const From &) { return true; } |
66 | }; |
67 | |
68 | template <typename To, typename From> struct isa_impl_cl { |
69 | static inline bool doit(const From &Val) { |
70 | return isa_impl<To, From>::doit(Val); |
71 | } |
72 | }; |
73 | |
74 | template <typename To, typename From> struct isa_impl_cl<To, const From> { |
75 | static inline bool doit(const From &Val) { |
76 | return isa_impl<To, From>::doit(Val); |
77 | } |
78 | }; |
79 | |
80 | template <typename To, typename From> |
81 | struct isa_impl_cl<To, const std::unique_ptr<From>> { |
82 | static inline bool doit(const std::unique_ptr<From> &Val) { |
83 | assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast <void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 83, __PRETTY_FUNCTION__)); |
84 | return isa_impl_cl<To, From>::doit(*Val); |
85 | } |
86 | }; |
87 | |
88 | template <typename To, typename From> struct isa_impl_cl<To, From*> { |
89 | static inline bool doit(const From *Val) { |
90 | assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast <void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 90, __PRETTY_FUNCTION__)); |
91 | return isa_impl<To, From>::doit(*Val); |
92 | } |
93 | }; |
94 | |
95 | template <typename To, typename From> struct isa_impl_cl<To, From*const> { |
96 | static inline bool doit(const From *Val) { |
97 | assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast <void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 97, __PRETTY_FUNCTION__)); |
98 | return isa_impl<To, From>::doit(*Val); |
99 | } |
100 | }; |
101 | |
102 | template <typename To, typename From> struct isa_impl_cl<To, const From*> { |
103 | static inline bool doit(const From *Val) { |
104 | assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast <void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 104, __PRETTY_FUNCTION__)); |
105 | return isa_impl<To, From>::doit(*Val); |
106 | } |
107 | }; |
108 | |
109 | template <typename To, typename From> struct isa_impl_cl<To, const From*const> { |
110 | static inline bool doit(const From *Val) { |
111 | assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast <void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 111, __PRETTY_FUNCTION__)); |
112 | return isa_impl<To, From>::doit(*Val); |
113 | } |
114 | }; |
115 | |
116 | template<typename To, typename From, typename SimpleFrom> |
117 | struct isa_impl_wrap { |
118 | // When From != SimplifiedType, we can simplify the type some more by using |
119 | // the simplify_type template. |
120 | static bool doit(const From &Val) { |
121 | return isa_impl_wrap<To, SimpleFrom, |
122 | typename simplify_type<SimpleFrom>::SimpleType>::doit( |
123 | simplify_type<const From>::getSimplifiedValue(Val)); |
124 | } |
125 | }; |
126 | |
127 | template<typename To, typename FromTy> |
128 | struct isa_impl_wrap<To, FromTy, FromTy> { |
129 | // When From == SimpleType, we are as simple as we are going to get. |
130 | static bool doit(const FromTy &Val) { |
131 | return isa_impl_cl<To,FromTy>::doit(Val); |
132 | } |
133 | }; |
134 | |
135 | // isa<X> - Return true if the parameter to the template is an instance of the |
136 | // template type argument. Used like this: |
137 | // |
138 | // if (isa<Type>(myVal)) { ... } |
139 | // |
140 | template <class X, class Y> LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) { |
141 | return isa_impl_wrap<X, const Y, |
142 | typename simplify_type<const Y>::SimpleType>::doit(Val); |
143 | } |
144 | |
145 | // isa_and_nonnull<X> - Functionally identical to isa, except that a null value |
146 | // is accepted. |
147 | // |
148 | template <class X, class Y> |
149 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa_and_nonnull(const Y &Val) { |
150 | if (!Val) |
151 | return false; |
152 | return isa<X>(Val); |
153 | } |
154 | |
155 | //===----------------------------------------------------------------------===// |
156 | // cast<x> Support Templates |
157 | //===----------------------------------------------------------------------===// |
158 | |
159 | template<class To, class From> struct cast_retty; |
160 | |
161 | // Calculate what type the 'cast' function should return, based on a requested |
162 | // type of To and a source type of From. |
163 | template<class To, class From> struct cast_retty_impl { |
164 | using ret_type = To &; // Normal case, return Ty& |
165 | }; |
166 | template<class To, class From> struct cast_retty_impl<To, const From> { |
167 | using ret_type = const To &; // Normal case, return Ty& |
168 | }; |
169 | |
170 | template<class To, class From> struct cast_retty_impl<To, From*> { |
171 | using ret_type = To *; // Pointer arg case, return Ty* |
172 | }; |
173 | |
174 | template<class To, class From> struct cast_retty_impl<To, const From*> { |
175 | using ret_type = const To *; // Constant pointer arg case, return const Ty* |
176 | }; |
177 | |
178 | template<class To, class From> struct cast_retty_impl<To, const From*const> { |
179 | using ret_type = const To *; // Constant pointer arg case, return const Ty* |
180 | }; |
181 | |
182 | template <class To, class From> |
183 | struct cast_retty_impl<To, std::unique_ptr<From>> { |
184 | private: |
185 | using PointerType = typename cast_retty_impl<To, From *>::ret_type; |
186 | using ResultType = std::remove_pointer_t<PointerType>; |
187 | |
188 | public: |
189 | using ret_type = std::unique_ptr<ResultType>; |
190 | }; |
191 | |
192 | template<class To, class From, class SimpleFrom> |
193 | struct cast_retty_wrap { |
194 | // When the simplified type and the from type are not the same, use the type |
195 | // simplifier to reduce the type, then reuse cast_retty_impl to get the |
196 | // resultant type. |
197 | using ret_type = typename cast_retty<To, SimpleFrom>::ret_type; |
198 | }; |
199 | |
200 | template<class To, class FromTy> |
201 | struct cast_retty_wrap<To, FromTy, FromTy> { |
202 | // When the simplified type is equal to the from type, use it directly. |
203 | using ret_type = typename cast_retty_impl<To,FromTy>::ret_type; |
204 | }; |
205 | |
206 | template<class To, class From> |
207 | struct cast_retty { |
208 | using ret_type = typename cast_retty_wrap< |
209 | To, From, typename simplify_type<From>::SimpleType>::ret_type; |
210 | }; |
211 | |
212 | // Ensure the non-simple values are converted using the simplify_type template |
213 | // that may be specialized by smart pointers... |
214 | // |
215 | template<class To, class From, class SimpleFrom> struct cast_convert_val { |
216 | // This is not a simple type, use the template to simplify it... |
217 | static typename cast_retty<To, From>::ret_type doit(From &Val) { |
218 | return cast_convert_val<To, SimpleFrom, |
219 | typename simplify_type<SimpleFrom>::SimpleType>::doit( |
220 | simplify_type<From>::getSimplifiedValue(Val)); |
221 | } |
222 | }; |
223 | |
224 | template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> { |
225 | // This _is_ a simple type, just cast it. |
226 | static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) { |
227 | typename cast_retty<To, FromTy>::ret_type Res2 |
228 | = (typename cast_retty<To, FromTy>::ret_type)const_cast<FromTy&>(Val); |
229 | return Res2; |
230 | } |
231 | }; |
232 | |
233 | template <class X> struct is_simple_type { |
234 | static const bool value = |
235 | std::is_same<X, typename simplify_type<X>::SimpleType>::value; |
236 | }; |
237 | |
238 | // cast<X> - Return the argument parameter cast to the specified type. This |
239 | // casting operator asserts that the type is correct, so it does not return null |
240 | // on failure. It does not allow a null argument (use cast_or_null for that). |
241 | // It is typically used like this: |
242 | // |
243 | // cast<Instruction>(myVal)->getParent() |
244 | // |
245 | template <class X, class Y> |
246 | inline std::enable_if_t<!is_simple_type<Y>::value, |
247 | typename cast_retty<X, const Y>::ret_type> |
248 | cast(const Y &Val) { |
249 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 249, __PRETTY_FUNCTION__)); |
250 | return cast_convert_val< |
251 | X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val); |
252 | } |
253 | |
254 | template <class X, class Y> |
255 | inline typename cast_retty<X, Y>::ret_type cast(Y &Val) { |
256 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 256, __PRETTY_FUNCTION__)); |
257 | return cast_convert_val<X, Y, |
258 | typename simplify_type<Y>::SimpleType>::doit(Val); |
259 | } |
260 | |
261 | template <class X, class Y> |
262 | inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) { |
263 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 263, __PRETTY_FUNCTION__)); |
264 | return cast_convert_val<X, Y*, |
265 | typename simplify_type<Y*>::SimpleType>::doit(Val); |
266 | } |
267 | |
268 | template <class X, class Y> |
269 | inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type |
270 | cast(std::unique_ptr<Y> &&Val) { |
271 | assert(isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val.get()) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 271, __PRETTY_FUNCTION__)); |
272 | using ret_type = typename cast_retty<X, std::unique_ptr<Y>>::ret_type; |
273 | return ret_type( |
274 | cast_convert_val<X, Y *, typename simplify_type<Y *>::SimpleType>::doit( |
275 | Val.release())); |
276 | } |
277 | |
278 | // cast_or_null<X> - Functionally identical to cast, except that a null value is |
279 | // accepted. |
280 | // |
281 | template <class X, class Y> |
282 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t< |
283 | !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type> |
284 | cast_or_null(const Y &Val) { |
285 | if (!Val) |
286 | return nullptr; |
287 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 287, __PRETTY_FUNCTION__)); |
288 | return cast<X>(Val); |
289 | } |
290 | |
291 | template <class X, class Y> |
292 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<!is_simple_type<Y>::value, |
293 | typename cast_retty<X, Y>::ret_type> |
294 | cast_or_null(Y &Val) { |
295 | if (!Val) |
296 | return nullptr; |
297 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 297, __PRETTY_FUNCTION__)); |
298 | return cast<X>(Val); |
299 | } |
300 | |
301 | template <class X, class Y> |
302 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type |
303 | cast_or_null(Y *Val) { |
304 | if (!Val) return nullptr; |
305 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h" , 305, __PRETTY_FUNCTION__)); |
306 | return cast<X>(Val); |
307 | } |
308 | |
309 | template <class X, class Y> |
310 | inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type |
311 | cast_or_null(std::unique_ptr<Y> &&Val) { |
312 | if (!Val) |
313 | return nullptr; |
314 | return cast<X>(std::move(Val)); |
315 | } |
316 | |
317 | // dyn_cast<X> - Return the argument parameter cast to the specified type. This |
318 | // casting operator returns null if the argument is of the wrong type, so it can |
319 | // be used to test for a type as well as cast if successful. This should be |
320 | // used in the context of an if statement like this: |
321 | // |
322 | // if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... } |
323 | // |
324 | |
325 | template <class X, class Y> |
326 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t< |
327 | !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type> |
328 | dyn_cast(const Y &Val) { |
329 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
330 | } |
331 | |
332 | template <class X, class Y> |
333 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) { |
334 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
335 | } |
336 | |
337 | template <class X, class Y> |
338 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type dyn_cast(Y *Val) { |
339 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
340 | } |
341 | |
342 | // dyn_cast_or_null<X> - Functionally identical to dyn_cast, except that a null |
343 | // value is accepted. |
344 | // |
345 | template <class X, class Y> |
346 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t< |
347 | !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type> |
348 | dyn_cast_or_null(const Y &Val) { |
349 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
350 | } |
351 | |
352 | template <class X, class Y> |
353 | LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<!is_simple_type<Y>::value, |
354 | typename cast_retty<X, Y>::ret_type> |
355 | dyn_cast_or_null(Y &Val) { |
356 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
357 | } |
358 | |
359 | template <class X, class Y> |
360 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type |
361 | dyn_cast_or_null(Y *Val) { |
362 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
363 | } |
364 | |
365 | // unique_dyn_cast<X> - Given a unique_ptr<Y>, try to return a unique_ptr<X>, |
366 | // taking ownership of the input pointer iff isa<X>(Val) is true. If the |
367 | // cast is successful, From refers to nullptr on exit and the casted value |
368 | // is returned. If the cast is unsuccessful, the function returns nullptr |
369 | // and From is unchanged. |
370 | template <class X, class Y> |
371 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &Val) |
372 | -> decltype(cast<X>(Val)) { |
373 | if (!isa<X>(Val)) |
374 | return nullptr; |
375 | return cast<X>(std::move(Val)); |
376 | } |
377 | |
378 | template <class X, class Y> |
379 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &&Val) { |
380 | return unique_dyn_cast<X, Y>(Val); |
381 | } |
382 | |
383 | // dyn_cast_or_null<X> - Functionally identical to unique_dyn_cast, except that |
384 | // a null value is accepted. |
385 | template <class X, class Y> |
386 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &Val) |
387 | -> decltype(cast<X>(Val)) { |
388 | if (!Val) |
389 | return nullptr; |
390 | return unique_dyn_cast<X, Y>(Val); |
391 | } |
392 | |
393 | template <class X, class Y> |
394 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &&Val) { |
395 | return unique_dyn_cast_or_null<X, Y>(Val); |
396 | } |
397 | |
398 | } // end namespace llvm |
399 | |
400 | #endif // LLVM_SUPPORT_CASTING_H |
1 | //===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file declares the SDNode class and derived classes, which are used to | |||
10 | // represent the nodes and operations present in a SelectionDAG. These nodes | |||
11 | // and operations are machine code level operations, with some similarities to | |||
12 | // the GCC RTL representation. | |||
13 | // | |||
14 | // Clients should include the SelectionDAG.h file instead of this file directly. | |||
15 | // | |||
16 | //===----------------------------------------------------------------------===// | |||
17 | ||||
18 | #ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H | |||
19 | #define LLVM_CODEGEN_SELECTIONDAGNODES_H | |||
20 | ||||
21 | #include "llvm/ADT/APFloat.h" | |||
22 | #include "llvm/ADT/ArrayRef.h" | |||
23 | #include "llvm/ADT/BitVector.h" | |||
24 | #include "llvm/ADT/FoldingSet.h" | |||
25 | #include "llvm/ADT/GraphTraits.h" | |||
26 | #include "llvm/ADT/SmallPtrSet.h" | |||
27 | #include "llvm/ADT/SmallVector.h" | |||
28 | #include "llvm/ADT/ilist_node.h" | |||
29 | #include "llvm/ADT/iterator.h" | |||
30 | #include "llvm/ADT/iterator_range.h" | |||
31 | #include "llvm/CodeGen/ISDOpcodes.h" | |||
32 | #include "llvm/CodeGen/MachineMemOperand.h" | |||
33 | #include "llvm/CodeGen/ValueTypes.h" | |||
34 | #include "llvm/IR/Constants.h" | |||
35 | #include "llvm/IR/DebugLoc.h" | |||
36 | #include "llvm/IR/Instruction.h" | |||
37 | #include "llvm/IR/Instructions.h" | |||
38 | #include "llvm/IR/Metadata.h" | |||
39 | #include "llvm/IR/Operator.h" | |||
40 | #include "llvm/Support/AlignOf.h" | |||
41 | #include "llvm/Support/AtomicOrdering.h" | |||
42 | #include "llvm/Support/Casting.h" | |||
43 | #include "llvm/Support/ErrorHandling.h" | |||
44 | #include "llvm/Support/MachineValueType.h" | |||
45 | #include "llvm/Support/TypeSize.h" | |||
46 | #include <algorithm> | |||
47 | #include <cassert> | |||
48 | #include <climits> | |||
49 | #include <cstddef> | |||
50 | #include <cstdint> | |||
51 | #include <cstring> | |||
52 | #include <iterator> | |||
53 | #include <string> | |||
54 | #include <tuple> | |||
55 | ||||
56 | namespace llvm { | |||
57 | ||||
58 | class APInt; | |||
59 | class Constant; | |||
60 | template <typename T> struct DenseMapInfo; | |||
61 | class GlobalValue; | |||
62 | class MachineBasicBlock; | |||
63 | class MachineConstantPoolValue; | |||
64 | class MCSymbol; | |||
65 | class raw_ostream; | |||
66 | class SDNode; | |||
67 | class SelectionDAG; | |||
68 | class Type; | |||
69 | class Value; | |||
70 | ||||
71 | void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr, | |||
72 | bool force = false); | |||
73 | ||||
74 | /// This represents a list of ValueType's that has been intern'd by | |||
75 | /// a SelectionDAG. Instances of this simple value class are returned by | |||
76 | /// SelectionDAG::getVTList(...). | |||
77 | /// | |||
78 | struct SDVTList { | |||
79 | const EVT *VTs; | |||
80 | unsigned int NumVTs; | |||
81 | }; | |||
82 | ||||
83 | namespace ISD { | |||
84 | ||||
85 | /// Node predicates | |||
86 | ||||
87 | /// If N is a BUILD_VECTOR node whose elements are all the same constant or | |||
88 | /// undefined, return true and return the constant value in \p SplatValue. | |||
89 | bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); | |||
90 | ||||
91 | /// Return true if the specified node is a BUILD_VECTOR where all of the | |||
92 | /// elements are ~0 or undef. | |||
93 | bool isBuildVectorAllOnes(const SDNode *N); | |||
94 | ||||
95 | /// Return true if the specified node is a BUILD_VECTOR where all of the | |||
96 | /// elements are 0 or undef. | |||
97 | bool isBuildVectorAllZeros(const SDNode *N); | |||
98 | ||||
99 | /// Return true if the specified node is a BUILD_VECTOR node of all | |||
100 | /// ConstantSDNode or undef. | |||
101 | bool isBuildVectorOfConstantSDNodes(const SDNode *N); | |||
102 | ||||
103 | /// Return true if the specified node is a BUILD_VECTOR node of all | |||
104 | /// ConstantFPSDNode or undef. | |||
105 | bool isBuildVectorOfConstantFPSDNodes(const SDNode *N); | |||
106 | ||||
107 | /// Return true if the node has at least one operand and all operands of the | |||
108 | /// specified node are ISD::UNDEF. | |||
109 | bool allOperandsUndef(const SDNode *N); | |||
110 | ||||
111 | } // end namespace ISD | |||
112 | ||||
113 | //===----------------------------------------------------------------------===// | |||
114 | /// Unlike LLVM values, Selection DAG nodes may return multiple | |||
115 | /// values as the result of a computation. Many nodes return multiple values, | |||
116 | /// from loads (which define a token and a return value) to ADDC (which returns | |||
117 | /// a result and a carry value), to calls (which may return an arbitrary number | |||
118 | /// of values). | |||
119 | /// | |||
120 | /// As such, each use of a SelectionDAG computation must indicate the node that | |||
121 | /// computes it as well as which return value to use from that node. This pair | |||
122 | /// of information is represented with the SDValue value type. | |||
123 | /// | |||
124 | class SDValue { | |||
125 | friend struct DenseMapInfo<SDValue>; | |||
126 | ||||
127 | SDNode *Node = nullptr; // The node defining the value we are using. | |||
128 | unsigned ResNo = 0; // Which return value of the node we are using. | |||
129 | ||||
130 | public: | |||
131 | SDValue() = default; | |||
132 | SDValue(SDNode *node, unsigned resno); | |||
133 | ||||
134 | /// get the index which selects a specific result in the SDNode | |||
135 | unsigned getResNo() const { return ResNo; } | |||
136 | ||||
137 | /// get the SDNode which holds the desired result | |||
138 | SDNode *getNode() const { return Node; } | |||
139 | ||||
140 | /// set the SDNode | |||
141 | void setNode(SDNode *N) { Node = N; } | |||
142 | ||||
143 | inline SDNode *operator->() const { return Node; } | |||
144 | ||||
145 | bool operator==(const SDValue &O) const { | |||
146 | return Node == O.Node && ResNo == O.ResNo; | |||
147 | } | |||
148 | bool operator!=(const SDValue &O) const { | |||
149 | return !operator==(O); | |||
150 | } | |||
151 | bool operator<(const SDValue &O) const { | |||
152 | return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo); | |||
153 | } | |||
154 | explicit operator bool() const { | |||
155 | return Node != nullptr; | |||
156 | } | |||
157 | ||||
158 | SDValue getValue(unsigned R) const { | |||
159 | return SDValue(Node, R); | |||
160 | } | |||
161 | ||||
162 | /// Return true if this node is an operand of N. | |||
163 | bool isOperandOf(const SDNode *N) const; | |||
164 | ||||
165 | /// Return the ValueType of the referenced return value. | |||
166 | inline EVT getValueType() const; | |||
167 | ||||
168 | /// Return the simple ValueType of the referenced return value. | |||
169 | MVT getSimpleValueType() const { | |||
170 | return getValueType().getSimpleVT(); | |||
171 | } | |||
172 | ||||
173 | /// Returns the size of the value in bits. | |||
174 | /// | |||
175 | /// If the value type is a scalable vector type, the scalable property will | |||
176 | /// be set and the runtime size will be a positive integer multiple of the | |||
177 | /// base size. | |||
178 | TypeSize getValueSizeInBits() const { | |||
179 | return getValueType().getSizeInBits(); | |||
180 | } | |||
181 | ||||
182 | TypeSize getScalarValueSizeInBits() const { | |||
183 | return getValueType().getScalarType().getSizeInBits(); | |||
184 | } | |||
185 | ||||
186 | // Forwarding methods - These forward to the corresponding methods in SDNode. | |||
187 | inline unsigned getOpcode() const; | |||
188 | inline unsigned getNumOperands() const; | |||
189 | inline const SDValue &getOperand(unsigned i) const; | |||
190 | inline uint64_t getConstantOperandVal(unsigned i) const; | |||
191 | inline const APInt &getConstantOperandAPInt(unsigned i) const; | |||
192 | inline bool isTargetMemoryOpcode() const; | |||
193 | inline bool isTargetOpcode() const; | |||
194 | inline bool isMachineOpcode() const; | |||
195 | inline bool isUndef() const; | |||
196 | inline unsigned getMachineOpcode() const; | |||
197 | inline const DebugLoc &getDebugLoc() const; | |||
198 | inline void dump() const; | |||
199 | inline void dump(const SelectionDAG *G) const; | |||
200 | inline void dumpr() const; | |||
201 | inline void dumpr(const SelectionDAG *G) const; | |||
202 | ||||
203 | /// Return true if this operand (which must be a chain) reaches the | |||
204 | /// specified operand without crossing any side-effecting instructions. | |||
205 | /// In practice, this looks through token factors and non-volatile loads. | |||
206 | /// In order to remain efficient, this only | |||
207 | /// looks a couple of nodes in, it does not do an exhaustive search. | |||
208 | bool reachesChainWithoutSideEffects(SDValue Dest, | |||
209 | unsigned Depth = 2) const; | |||
210 | ||||
211 | /// Return true if there are no nodes using value ResNo of Node. | |||
212 | inline bool use_empty() const; | |||
213 | ||||
214 | /// Return true if there is exactly one node using value ResNo of Node. | |||
215 | inline bool hasOneUse() const; | |||
216 | }; | |||
217 | ||||
218 | template<> struct DenseMapInfo<SDValue> { | |||
219 | static inline SDValue getEmptyKey() { | |||
220 | SDValue V; | |||
221 | V.ResNo = -1U; | |||
222 | return V; | |||
223 | } | |||
224 | ||||
225 | static inline SDValue getTombstoneKey() { | |||
226 | SDValue V; | |||
227 | V.ResNo = -2U; | |||
228 | return V; | |||
229 | } | |||
230 | ||||
231 | static unsigned getHashValue(const SDValue &Val) { | |||
232 | return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^ | |||
233 | (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo(); | |||
234 | } | |||
235 | ||||
236 | static bool isEqual(const SDValue &LHS, const SDValue &RHS) { | |||
237 | return LHS == RHS; | |||
238 | } | |||
239 | }; | |||
240 | ||||
241 | /// Allow casting operators to work directly on | |||
242 | /// SDValues as if they were SDNode*'s. | |||
243 | template<> struct simplify_type<SDValue> { | |||
244 | using SimpleType = SDNode *; | |||
245 | ||||
246 | static SimpleType getSimplifiedValue(SDValue &Val) { | |||
247 | return Val.getNode(); | |||
248 | } | |||
249 | }; | |||
250 | template<> struct simplify_type<const SDValue> { | |||
251 | using SimpleType = /*const*/ SDNode *; | |||
252 | ||||
253 | static SimpleType getSimplifiedValue(const SDValue &Val) { | |||
254 | return Val.getNode(); | |||
255 | } | |||
256 | }; | |||
257 | ||||
258 | /// Represents a use of a SDNode. This class holds an SDValue, | |||
259 | /// which records the SDNode being used and the result number, a | |||
260 | /// pointer to the SDNode using the value, and Next and Prev pointers, | |||
261 | /// which link together all the uses of an SDNode. | |||
262 | /// | |||
263 | class SDUse { | |||
264 | /// Val - The value being used. | |||
265 | SDValue Val; | |||
266 | /// User - The user of this value. | |||
267 | SDNode *User = nullptr; | |||
268 | /// Prev, Next - Pointers to the uses list of the SDNode referred by | |||
269 | /// this operand. | |||
270 | SDUse **Prev = nullptr; | |||
271 | SDUse *Next = nullptr; | |||
272 | ||||
273 | public: | |||
274 | SDUse() = default; | |||
275 | SDUse(const SDUse &U) = delete; | |||
276 | SDUse &operator=(const SDUse &) = delete; | |||
277 | ||||
278 | /// Normally SDUse will just implicitly convert to an SDValue that it holds. | |||
279 | operator const SDValue&() const { return Val; } | |||
280 | ||||
281 | /// If implicit conversion to SDValue doesn't work, the get() method returns | |||
282 | /// the SDValue. | |||
283 | const SDValue &get() const { return Val; } | |||
284 | ||||
285 | /// This returns the SDNode that contains this Use. | |||
286 | SDNode *getUser() { return User; } | |||
287 | ||||
288 | /// Get the next SDUse in the use list. | |||
289 | SDUse *getNext() const { return Next; } | |||
290 | ||||
291 | /// Convenience function for get().getNode(). | |||
292 | SDNode *getNode() const { return Val.getNode(); } | |||
293 | /// Convenience function for get().getResNo(). | |||
294 | unsigned getResNo() const { return Val.getResNo(); } | |||
295 | /// Convenience function for get().getValueType(). | |||
296 | EVT getValueType() const { return Val.getValueType(); } | |||
297 | ||||
298 | /// Convenience function for get().operator== | |||
299 | bool operator==(const SDValue &V) const { | |||
300 | return Val == V; | |||
301 | } | |||
302 | ||||
303 | /// Convenience function for get().operator!= | |||
304 | bool operator!=(const SDValue &V) const { | |||
305 | return Val != V; | |||
306 | } | |||
307 | ||||
308 | /// Convenience function for get().operator< | |||
309 | bool operator<(const SDValue &V) const { | |||
310 | return Val < V; | |||
311 | } | |||
312 | ||||
313 | private: | |||
314 | friend class SelectionDAG; | |||
315 | friend class SDNode; | |||
316 | // TODO: unfriend HandleSDNode once we fix its operand handling. | |||
317 | friend class HandleSDNode; | |||
318 | ||||
319 | void setUser(SDNode *p) { User = p; } | |||
320 | ||||
321 | /// Remove this use from its existing use list, assign it the | |||
322 | /// given value, and add it to the new value's node's use list. | |||
323 | inline void set(const SDValue &V); | |||
324 | /// Like set, but only supports initializing a newly-allocated | |||
325 | /// SDUse with a non-null value. | |||
326 | inline void setInitial(const SDValue &V); | |||
327 | /// Like set, but only sets the Node portion of the value, | |||
328 | /// leaving the ResNo portion unmodified. | |||
329 | inline void setNode(SDNode *N); | |||
330 | ||||
331 | void addToList(SDUse **List) { | |||
332 | Next = *List; | |||
333 | if (Next) Next->Prev = &Next; | |||
334 | Prev = List; | |||
335 | *List = this; | |||
336 | } | |||
337 | ||||
338 | void removeFromList() { | |||
339 | *Prev = Next; | |||
340 | if (Next) Next->Prev = Prev; | |||
341 | } | |||
342 | }; | |||
343 | ||||
344 | /// simplify_type specializations - Allow casting operators to work directly on | |||
345 | /// SDValues as if they were SDNode*'s. | |||
346 | template<> struct simplify_type<SDUse> { | |||
347 | using SimpleType = SDNode *; | |||
348 | ||||
349 | static SimpleType getSimplifiedValue(SDUse &Val) { | |||
350 | return Val.getNode(); | |||
351 | } | |||
352 | }; | |||
353 | ||||
354 | /// These are IR-level optimization flags that may be propagated to SDNodes. | |||
355 | /// TODO: This data structure should be shared by the IR optimizer and the | |||
356 | /// the backend. | |||
357 | struct SDNodeFlags { | |||
358 | private: | |||
359 | // This bit is used to determine if the flags are in a defined state. | |||
360 | // Flag bits can only be masked out during intersection if the masking flags | |||
361 | // are defined. | |||
362 | bool AnyDefined : 1; | |||
363 | ||||
364 | bool NoUnsignedWrap : 1; | |||
365 | bool NoSignedWrap : 1; | |||
366 | bool Exact : 1; | |||
367 | bool NoNaNs : 1; | |||
368 | bool NoInfs : 1; | |||
369 | bool NoSignedZeros : 1; | |||
370 | bool AllowReciprocal : 1; | |||
371 | bool VectorReduction : 1; | |||
372 | bool AllowContract : 1; | |||
373 | bool ApproximateFuncs : 1; | |||
374 | bool AllowReassociation : 1; | |||
375 | ||||
376 | // We assume instructions do not raise floating-point exceptions by default, | |||
377 | // and only those marked explicitly may do so. We could choose to represent | |||
378 | // this via a positive "FPExcept" flags like on the MI level, but having a | |||
379 | // negative "NoFPExcept" flag here (that defaults to true) makes the flag | |||
380 | // intersection logic more straightforward. | |||
381 | bool NoFPExcept : 1; | |||
382 | ||||
383 | public: | |||
384 | /// Default constructor turns off all optimization flags. | |||
385 | SDNodeFlags() | |||
386 | : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false), | |||
387 | Exact(false), NoNaNs(false), NoInfs(false), | |||
388 | NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false), | |||
389 | AllowContract(false), ApproximateFuncs(false), | |||
390 | AllowReassociation(false), NoFPExcept(false) {} | |||
391 | ||||
392 | /// Propagate the fast-math-flags from an IR FPMathOperator. | |||
393 | void copyFMF(const FPMathOperator &FPMO) { | |||
394 | setNoNaNs(FPMO.hasNoNaNs()); | |||
395 | setNoInfs(FPMO.hasNoInfs()); | |||
396 | setNoSignedZeros(FPMO.hasNoSignedZeros()); | |||
397 | setAllowReciprocal(FPMO.hasAllowReciprocal()); | |||
398 | setAllowContract(FPMO.hasAllowContract()); | |||
399 | setApproximateFuncs(FPMO.hasApproxFunc()); | |||
400 | setAllowReassociation(FPMO.hasAllowReassoc()); | |||
401 | } | |||
402 | ||||
403 | /// Sets the state of the flags to the defined state. | |||
404 | void setDefined() { AnyDefined = true; } | |||
405 | /// Returns true if the flags are in a defined state. | |||
406 | bool isDefined() const { return AnyDefined; } | |||
407 | ||||
408 | // These are mutators for each flag. | |||
409 | void setNoUnsignedWrap(bool b) { | |||
410 | setDefined(); | |||
411 | NoUnsignedWrap = b; | |||
412 | } | |||
413 | void setNoSignedWrap(bool b) { | |||
414 | setDefined(); | |||
415 | NoSignedWrap = b; | |||
416 | } | |||
417 | void setExact(bool b) { | |||
418 | setDefined(); | |||
419 | Exact = b; | |||
420 | } | |||
421 | void setNoNaNs(bool b) { | |||
422 | setDefined(); | |||
423 | NoNaNs = b; | |||
424 | } | |||
425 | void setNoInfs(bool b) { | |||
426 | setDefined(); | |||
427 | NoInfs = b; | |||
428 | } | |||
429 | void setNoSignedZeros(bool b) { | |||
430 | setDefined(); | |||
431 | NoSignedZeros = b; | |||
432 | } | |||
433 | void setAllowReciprocal(bool b) { | |||
434 | setDefined(); | |||
435 | AllowReciprocal = b; | |||
436 | } | |||
437 | void setVectorReduction(bool b) { | |||
438 | setDefined(); | |||
439 | VectorReduction = b; | |||
440 | } | |||
441 | void setAllowContract(bool b) { | |||
442 | setDefined(); | |||
443 | AllowContract = b; | |||
444 | } | |||
445 | void setApproximateFuncs(bool b) { | |||
446 | setDefined(); | |||
447 | ApproximateFuncs = b; | |||
448 | } | |||
449 | void setAllowReassociation(bool b) { | |||
450 | setDefined(); | |||
451 | AllowReassociation = b; | |||
452 | } | |||
453 | void setNoFPExcept(bool b) { | |||
454 | setDefined(); | |||
455 | NoFPExcept = b; | |||
456 | } | |||
457 | ||||
458 | // These are accessors for each flag. | |||
459 | bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } | |||
460 | bool hasNoSignedWrap() const { return NoSignedWrap; } | |||
461 | bool hasExact() const { return Exact; } | |||
462 | bool hasNoNaNs() const { return NoNaNs; } | |||
463 | bool hasNoInfs() const { return NoInfs; } | |||
464 | bool hasNoSignedZeros() const { return NoSignedZeros; } | |||
465 | bool hasAllowReciprocal() const { return AllowReciprocal; } | |||
466 | bool hasVectorReduction() const { return VectorReduction; } | |||
467 | bool hasAllowContract() const { return AllowContract; } | |||
468 | bool hasApproximateFuncs() const { return ApproximateFuncs; } | |||
469 | bool hasAllowReassociation() const { return AllowReassociation; } | |||
470 | bool hasNoFPExcept() const { return NoFPExcept; } | |||
471 | ||||
472 | /// Clear any flags in this flag set that aren't also set in Flags. | |||
473 | /// If the given Flags are undefined then don't do anything. | |||
474 | void intersectWith(const SDNodeFlags Flags) { | |||
475 | if (!Flags.isDefined()) | |||
476 | return; | |||
477 | NoUnsignedWrap &= Flags.NoUnsignedWrap; | |||
478 | NoSignedWrap &= Flags.NoSignedWrap; | |||
479 | Exact &= Flags.Exact; | |||
480 | NoNaNs &= Flags.NoNaNs; | |||
481 | NoInfs &= Flags.NoInfs; | |||
482 | NoSignedZeros &= Flags.NoSignedZeros; | |||
483 | AllowReciprocal &= Flags.AllowReciprocal; | |||
484 | VectorReduction &= Flags.VectorReduction; | |||
485 | AllowContract &= Flags.AllowContract; | |||
486 | ApproximateFuncs &= Flags.ApproximateFuncs; | |||
487 | AllowReassociation &= Flags.AllowReassociation; | |||
488 | NoFPExcept &= Flags.NoFPExcept; | |||
489 | } | |||
490 | }; | |||
491 | ||||
492 | /// Represents one node in the SelectionDAG. | |||
493 | /// | |||
494 | class SDNode : public FoldingSetNode, public ilist_node<SDNode> { | |||
495 | private: | |||
496 | /// The operation that this node performs. | |||
497 | int16_t NodeType; | |||
498 | ||||
499 | protected: | |||
500 | // We define a set of mini-helper classes to help us interpret the bits in our | |||
501 | // SubclassData. These are designed to fit within a uint16_t so they pack | |||
502 | // with NodeType. | |||
503 | ||||
504 | #if defined(_AIX) && (!defined(__GNUC__4) || defined(__ibmxl__)) | |||
505 | // Except for GCC; by default, AIX compilers store bit-fields in 4-byte words | |||
506 | // and give the `pack` pragma push semantics. | |||
507 | #define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2) | |||
508 | #define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop) | |||
509 | #else | |||
510 | #define BEGIN_TWO_BYTE_PACK() | |||
511 | #define END_TWO_BYTE_PACK() | |||
512 | #endif | |||
513 | ||||
514 | BEGIN_TWO_BYTE_PACK() | |||
515 | class SDNodeBitfields { | |||
516 | friend class SDNode; | |||
517 | friend class MemIntrinsicSDNode; | |||
518 | friend class MemSDNode; | |||
519 | friend class SelectionDAG; | |||
520 | ||||
521 | uint16_t HasDebugValue : 1; | |||
522 | uint16_t IsMemIntrinsic : 1; | |||
523 | uint16_t IsDivergent : 1; | |||
524 | }; | |||
525 | enum { NumSDNodeBits = 3 }; | |||
526 | ||||
527 | class ConstantSDNodeBitfields { | |||
528 | friend class ConstantSDNode; | |||
529 | ||||
530 | uint16_t : NumSDNodeBits; | |||
531 | ||||
532 | uint16_t IsOpaque : 1; | |||
533 | }; | |||
534 | ||||
535 | class MemSDNodeBitfields { | |||
536 | friend class MemSDNode; | |||
537 | friend class MemIntrinsicSDNode; | |||
538 | friend class AtomicSDNode; | |||
539 | ||||
540 | uint16_t : NumSDNodeBits; | |||
541 | ||||
542 | uint16_t IsVolatile : 1; | |||
543 | uint16_t IsNonTemporal : 1; | |||
544 | uint16_t IsDereferenceable : 1; | |||
545 | uint16_t IsInvariant : 1; | |||
546 | }; | |||
547 | enum { NumMemSDNodeBits = NumSDNodeBits + 4 }; | |||
548 | ||||
549 | class LSBaseSDNodeBitfields { | |||
550 | friend class LSBaseSDNode; | |||
551 | friend class MaskedLoadStoreSDNode; | |||
552 | friend class MaskedGatherScatterSDNode; | |||
553 | ||||
554 | uint16_t : NumMemSDNodeBits; | |||
555 | ||||
556 | // This storage is shared between disparate class hierarchies to hold an | |||
557 | // enumeration specific to the class hierarchy in use. | |||
558 | // LSBaseSDNode => enum ISD::MemIndexedMode | |||
559 | // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode | |||
560 | // MaskedGatherScatterSDNode => enum ISD::MemIndexType | |||
561 | uint16_t AddressingMode : 3; | |||
562 | }; | |||
563 | enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 }; | |||
564 | ||||
565 | class LoadSDNodeBitfields { | |||
566 | friend class LoadSDNode; | |||
567 | friend class MaskedLoadSDNode; | |||
568 | ||||
569 | uint16_t : NumLSBaseSDNodeBits; | |||
570 | ||||
571 | uint16_t ExtTy : 2; // enum ISD::LoadExtType | |||
572 | uint16_t IsExpanding : 1; | |||
573 | }; | |||
574 | ||||
575 | class StoreSDNodeBitfields { | |||
576 | friend class StoreSDNode; | |||
577 | friend class MaskedStoreSDNode; | |||
578 | ||||
579 | uint16_t : NumLSBaseSDNodeBits; | |||
580 | ||||
581 | uint16_t IsTruncating : 1; | |||
582 | uint16_t IsCompressing : 1; | |||
583 | }; | |||
584 | ||||
585 | union { | |||
586 | char RawSDNodeBits[sizeof(uint16_t)]; | |||
587 | SDNodeBitfields SDNodeBits; | |||
588 | ConstantSDNodeBitfields ConstantSDNodeBits; | |||
589 | MemSDNodeBitfields MemSDNodeBits; | |||
590 | LSBaseSDNodeBitfields LSBaseSDNodeBits; | |||
591 | LoadSDNodeBitfields LoadSDNodeBits; | |||
592 | StoreSDNodeBitfields StoreSDNodeBits; | |||
593 | }; | |||
594 | END_TWO_BYTE_PACK() | |||
595 | #undef BEGIN_TWO_BYTE_PACK | |||
596 | #undef END_TWO_BYTE_PACK | |||
597 | ||||
598 | // RawSDNodeBits must cover the entirety of the union. This means that all of | |||
599 | // the union's members must have size <= RawSDNodeBits. We write the RHS as | |||
600 | // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter. | |||
601 | static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide"); | |||
602 | static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide"); | |||
603 | static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide"); | |||
604 | static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide"); | |||
605 | static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide"); | |||
606 | static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide"); | |||
607 | ||||
608 | private: | |||
609 | friend class SelectionDAG; | |||
610 | // TODO: unfriend HandleSDNode once we fix its operand handling. | |||
611 | friend class HandleSDNode; | |||
612 | ||||
613 | /// Unique id per SDNode in the DAG. | |||
614 | int NodeId = -1; | |||
615 | ||||
616 | /// The values that are used by this operation. | |||
617 | SDUse *OperandList = nullptr; | |||
618 | ||||
619 | /// The types of the values this node defines. SDNode's may | |||
620 | /// define multiple values simultaneously. | |||
621 | const EVT *ValueList; | |||
622 | ||||
623 | /// List of uses for this SDNode. | |||
624 | SDUse *UseList = nullptr; | |||
625 | ||||
626 | /// The number of entries in the Operand/Value list. | |||
627 | unsigned short NumOperands = 0; | |||
628 | unsigned short NumValues; | |||
629 | ||||
630 | // The ordering of the SDNodes. It roughly corresponds to the ordering of the | |||
631 | // original LLVM instructions. | |||
632 | // This is used for turning off scheduling, because we'll forgo | |||
633 | // the normal scheduling algorithms and output the instructions according to | |||
634 | // this ordering. | |||
635 | unsigned IROrder; | |||
636 | ||||
637 | /// Source line information. | |||
638 | DebugLoc debugLoc; | |||
639 | ||||
640 | /// Return a pointer to the specified value type. | |||
641 | static const EVT *getValueTypeList(EVT VT); | |||
642 | ||||
643 | SDNodeFlags Flags; | |||
644 | ||||
645 | public: | |||
646 | /// Unique and persistent id per SDNode in the DAG. | |||
647 | /// Used for debug printing. | |||
648 | uint16_t PersistentId; | |||
649 | ||||
650 | //===--------------------------------------------------------------------===// | |||
651 | // Accessors | |||
652 | // | |||
653 | ||||
654 | /// Return the SelectionDAG opcode value for this node. For | |||
655 | /// pre-isel nodes (those for which isMachineOpcode returns false), these | |||
656 | /// are the opcode values in the ISD and <target>ISD namespaces. For | |||
657 | /// post-isel opcodes, see getMachineOpcode. | |||
658 | unsigned getOpcode() const { return (unsigned short)NodeType; } | |||
659 | ||||
660 | /// Test if this node has a target-specific opcode (in the | |||
661 | /// \<target\>ISD namespace). | |||
662 | bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; } | |||
663 | ||||
664 | /// Test if this node has a target-specific opcode that may raise | |||
665 | /// FP exceptions (in the \<target\>ISD namespace and greater than | |||
666 | /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory | |||
667 | /// opcode are currently automatically considered to possibly raise | |||
668 | /// FP exceptions as well. | |||
669 | bool isTargetStrictFPOpcode() const { | |||
670 | return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE; | |||
671 | } | |||
672 | ||||
673 | /// Test if this node has a target-specific | |||
674 | /// memory-referencing opcode (in the \<target\>ISD namespace and | |||
675 | /// greater than FIRST_TARGET_MEMORY_OPCODE). | |||
676 | bool isTargetMemoryOpcode() const { | |||
677 | return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE; | |||
678 | } | |||
679 | ||||
680 | /// Return true if the type of the node type undefined. | |||
681 | bool isUndef() const { return NodeType == ISD::UNDEF; } | |||
682 | ||||
683 | /// Test if this node is a memory intrinsic (with valid pointer information). | |||
684 | /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for | |||
685 | /// non-memory intrinsics (with chains) that are not really instances of | |||
686 | /// MemSDNode. For such nodes, we need some extra state to determine the | |||
687 | /// proper classof relationship. | |||
688 | bool isMemIntrinsic() const { | |||
689 | return (NodeType == ISD::INTRINSIC_W_CHAIN || | |||
690 | NodeType == ISD::INTRINSIC_VOID) && | |||
691 | SDNodeBits.IsMemIntrinsic; | |||
692 | } | |||
693 | ||||
694 | /// Test if this node is a strict floating point pseudo-op. | |||
695 | bool isStrictFPOpcode() { | |||
696 | switch (NodeType) { | |||
697 | default: | |||
698 | return false; | |||
699 | case ISD::STRICT_FP16_TO_FP: | |||
700 | case ISD::STRICT_FP_TO_FP16: | |||
701 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ | |||
702 | case ISD::STRICT_##DAGN: | |||
703 | #include "llvm/IR/ConstrainedOps.def" | |||
704 | return true; | |||
705 | } | |||
706 | } | |||
707 | ||||
708 | /// Test if this node has a post-isel opcode, directly | |||
709 | /// corresponding to a MachineInstr opcode. | |||
710 | bool isMachineOpcode() const { return NodeType < 0; } | |||
711 | ||||
712 | /// This may only be called if isMachineOpcode returns | |||
713 | /// true. It returns the MachineInstr opcode value that the node's opcode | |||
714 | /// corresponds to. | |||
715 | unsigned getMachineOpcode() const { | |||
716 | assert(isMachineOpcode() && "Not a MachineInstr opcode!")((isMachineOpcode() && "Not a MachineInstr opcode!") ? static_cast<void> (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 716, __PRETTY_FUNCTION__)); | |||
717 | return ~NodeType; | |||
718 | } | |||
719 | ||||
720 | bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; } | |||
721 | void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; } | |||
722 | ||||
723 | bool isDivergent() const { return SDNodeBits.IsDivergent; } | |||
724 | ||||
725 | /// Return true if there are no uses of this node. | |||
726 | bool use_empty() const { return UseList == nullptr; } | |||
727 | ||||
728 | /// Return true if there is exactly one use of this node. | |||
729 | bool hasOneUse() const { | |||
730 | return !use_empty() && std::next(use_begin()) == use_end(); | |||
731 | } | |||
732 | ||||
733 | /// Return the number of uses of this node. This method takes | |||
734 | /// time proportional to the number of uses. | |||
735 | size_t use_size() const { return std::distance(use_begin(), use_end()); } | |||
736 | ||||
737 | /// Return the unique node id. | |||
738 | int getNodeId() const { return NodeId; } | |||
739 | ||||
740 | /// Set unique node id. | |||
741 | void setNodeId(int Id) { NodeId = Id; } | |||
742 | ||||
743 | /// Return the node ordering. | |||
744 | unsigned getIROrder() const { return IROrder; } | |||
745 | ||||
746 | /// Set the node ordering. | |||
747 | void setIROrder(unsigned Order) { IROrder = Order; } | |||
748 | ||||
749 | /// Return the source location info. | |||
750 | const DebugLoc &getDebugLoc() const { return debugLoc; } | |||
751 | ||||
752 | /// Set source location info. Try to avoid this, putting | |||
753 | /// it in the constructor is preferable. | |||
754 | void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); } | |||
755 | ||||
756 | /// This class provides iterator support for SDUse | |||
757 | /// operands that use a specific SDNode. | |||
758 | class use_iterator | |||
759 | : public std::iterator<std::forward_iterator_tag, SDUse, ptrdiff_t> { | |||
760 | friend class SDNode; | |||
761 | ||||
762 | SDUse *Op = nullptr; | |||
763 | ||||
764 | explicit use_iterator(SDUse *op) : Op(op) {} | |||
765 | ||||
766 | public: | |||
767 | using reference = std::iterator<std::forward_iterator_tag, | |||
768 | SDUse, ptrdiff_t>::reference; | |||
769 | using pointer = std::iterator<std::forward_iterator_tag, | |||
770 | SDUse, ptrdiff_t>::pointer; | |||
771 | ||||
772 | use_iterator() = default; | |||
773 | use_iterator(const use_iterator &I) : Op(I.Op) {} | |||
774 | ||||
775 | bool operator==(const use_iterator &x) const { | |||
776 | return Op == x.Op; | |||
777 | } | |||
778 | bool operator!=(const use_iterator &x) const { | |||
779 | return !operator==(x); | |||
780 | } | |||
781 | ||||
782 | /// Return true if this iterator is at the end of uses list. | |||
783 | bool atEnd() const { return Op == nullptr; } | |||
784 | ||||
785 | // Iterator traversal: forward iteration only. | |||
786 | use_iterator &operator++() { // Preincrement | |||
787 | assert(Op && "Cannot increment end iterator!")((Op && "Cannot increment end iterator!") ? static_cast <void> (0) : __assert_fail ("Op && \"Cannot increment end iterator!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 787, __PRETTY_FUNCTION__)); | |||
788 | Op = Op->getNext(); | |||
789 | return *this; | |||
790 | } | |||
791 | ||||
792 | use_iterator operator++(int) { // Postincrement | |||
793 | use_iterator tmp = *this; ++*this; return tmp; | |||
794 | } | |||
795 | ||||
796 | /// Retrieve a pointer to the current user node. | |||
797 | SDNode *operator*() const { | |||
798 | assert(Op && "Cannot dereference end iterator!")((Op && "Cannot dereference end iterator!") ? static_cast <void> (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 798, __PRETTY_FUNCTION__)); | |||
799 | return Op->getUser(); | |||
800 | } | |||
801 | ||||
802 | SDNode *operator->() const { return operator*(); } | |||
803 | ||||
804 | SDUse &getUse() const { return *Op; } | |||
805 | ||||
806 | /// Retrieve the operand # of this use in its user. | |||
807 | unsigned getOperandNo() const { | |||
808 | assert(Op && "Cannot dereference end iterator!")((Op && "Cannot dereference end iterator!") ? static_cast <void> (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 808, __PRETTY_FUNCTION__)); | |||
809 | return (unsigned)(Op - Op->getUser()->OperandList); | |||
810 | } | |||
811 | }; | |||
812 | ||||
813 | /// Provide iteration support to walk over all uses of an SDNode. | |||
814 | use_iterator use_begin() const { | |||
815 | return use_iterator(UseList); | |||
816 | } | |||
817 | ||||
818 | static use_iterator use_end() { return use_iterator(nullptr); } | |||
819 | ||||
820 | inline iterator_range<use_iterator> uses() { | |||
821 | return make_range(use_begin(), use_end()); | |||
822 | } | |||
823 | inline iterator_range<use_iterator> uses() const { | |||
824 | return make_range(use_begin(), use_end()); | |||
825 | } | |||
826 | ||||
827 | /// Return true if there are exactly NUSES uses of the indicated value. | |||
828 | /// This method ignores uses of other values defined by this operation. | |||
829 | bool hasNUsesOfValue(unsigned NUses, unsigned Value) const; | |||
830 | ||||
831 | /// Return true if there are any use of the indicated value. | |||
832 | /// This method ignores uses of other values defined by this operation. | |||
833 | bool hasAnyUseOfValue(unsigned Value) const; | |||
834 | ||||
835 | /// Return true if this node is the only use of N. | |||
836 | bool isOnlyUserOf(const SDNode *N) const; | |||
837 | ||||
838 | /// Return true if this node is an operand of N. | |||
839 | bool isOperandOf(const SDNode *N) const; | |||
840 | ||||
841 | /// Return true if this node is a predecessor of N. | |||
842 | /// NOTE: Implemented on top of hasPredecessor and every bit as | |||
843 | /// expensive. Use carefully. | |||
844 | bool isPredecessorOf(const SDNode *N) const { | |||
845 | return N->hasPredecessor(this); | |||
846 | } | |||
847 | ||||
848 | /// Return true if N is a predecessor of this node. | |||
849 | /// N is either an operand of this node, or can be reached by recursively | |||
850 | /// traversing up the operands. | |||
851 | /// NOTE: This is an expensive method. Use it carefully. | |||
852 | bool hasPredecessor(const SDNode *N) const; | |||
853 | ||||
854 | /// Returns true if N is a predecessor of any node in Worklist. This | |||
855 | /// helper keeps Visited and Worklist sets externally to allow unions | |||
856 | /// searches to be performed in parallel, caching of results across | |||
857 | /// queries and incremental addition to Worklist. Stops early if N is | |||
858 | /// found but will resume. Remember to clear Visited and Worklists | |||
859 | /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before | |||
860 | /// giving up. The TopologicalPrune flag signals that positive NodeIds are | |||
861 | /// topologically ordered (Operands have strictly smaller node id) and search | |||
862 | /// can be pruned leveraging this. | |||
863 | static bool hasPredecessorHelper(const SDNode *N, | |||
864 | SmallPtrSetImpl<const SDNode *> &Visited, | |||
865 | SmallVectorImpl<const SDNode *> &Worklist, | |||
866 | unsigned int MaxSteps = 0, | |||
867 | bool TopologicalPrune = false) { | |||
868 | SmallVector<const SDNode *, 8> DeferredNodes; | |||
869 | if (Visited.count(N)) | |||
870 | return true; | |||
871 | ||||
872 | // Node Id's are assigned in three places: As a topological | |||
873 | // ordering (> 0), during legalization (results in values set to | |||
874 | // 0), new nodes (set to -1). If N has a topolgical id then we | |||
875 | // know that all nodes with ids smaller than it cannot be | |||
876 | // successors and we need not check them. Filter out all node | |||
877 | // that can't be matches. We add them to the worklist before exit | |||
878 | // in case of multiple calls. Note that during selection the topological id | |||
879 | // may be violated if a node's predecessor is selected before it. We mark | |||
880 | // this at selection negating the id of unselected successors and | |||
881 | // restricting topological pruning to positive ids. | |||
882 | ||||
883 | int NId = N->getNodeId(); | |||
884 | // If we Invalidated the Id, reconstruct original NId. | |||
885 | if (NId < -1) | |||
886 | NId = -(NId + 1); | |||
887 | ||||
888 | bool Found = false; | |||
889 | while (!Worklist.empty()) { | |||
890 | const SDNode *M = Worklist.pop_back_val(); | |||
891 | int MId = M->getNodeId(); | |||
892 | if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) && | |||
893 | (MId > 0) && (MId < NId)) { | |||
894 | DeferredNodes.push_back(M); | |||
895 | continue; | |||
896 | } | |||
897 | for (const SDValue &OpV : M->op_values()) { | |||
898 | SDNode *Op = OpV.getNode(); | |||
899 | if (Visited.insert(Op).second) | |||
900 | Worklist.push_back(Op); | |||
901 | if (Op == N) | |||
902 | Found = true; | |||
903 | } | |||
904 | if (Found) | |||
905 | break; | |||
906 | if (MaxSteps != 0 && Visited.size() >= MaxSteps) | |||
907 | break; | |||
908 | } | |||
909 | // Push deferred nodes back on worklist. | |||
910 | Worklist.append(DeferredNodes.begin(), DeferredNodes.end()); | |||
911 | // If we bailed early, conservatively return found. | |||
912 | if (MaxSteps != 0 && Visited.size() >= MaxSteps) | |||
913 | return true; | |||
914 | return Found; | |||
915 | } | |||
916 | ||||
917 | /// Return true if all the users of N are contained in Nodes. | |||
918 | /// NOTE: Requires at least one match, but doesn't require them all. | |||
919 | static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N); | |||
920 | ||||
921 | /// Return the number of values used by this operation. | |||
922 | unsigned getNumOperands() const { return NumOperands; } | |||
923 | ||||
924 | /// Return the maximum number of operands that a SDNode can hold. | |||
925 | static constexpr size_t getMaxNumOperands() { | |||
926 | return std::numeric_limits<decltype(SDNode::NumOperands)>::max(); | |||
927 | } | |||
928 | ||||
929 | /// Helper method returns the integer value of a ConstantSDNode operand. | |||
930 | inline uint64_t getConstantOperandVal(unsigned Num) const; | |||
931 | ||||
932 | /// Helper method returns the APInt of a ConstantSDNode operand. | |||
933 | inline const APInt &getConstantOperandAPInt(unsigned Num) const; | |||
934 | ||||
935 | const SDValue &getOperand(unsigned Num) const { | |||
936 | assert(Num < NumOperands && "Invalid child # of SDNode!")((Num < NumOperands && "Invalid child # of SDNode!" ) ? static_cast<void> (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 936, __PRETTY_FUNCTION__)); | |||
937 | return OperandList[Num]; | |||
938 | } | |||
939 | ||||
940 | using op_iterator = SDUse *; | |||
941 | ||||
942 | op_iterator op_begin() const { return OperandList; } | |||
943 | op_iterator op_end() const { return OperandList+NumOperands; } | |||
944 | ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); } | |||
945 | ||||
946 | /// Iterator for directly iterating over the operand SDValue's. | |||
947 | struct value_op_iterator | |||
948 | : iterator_adaptor_base<value_op_iterator, op_iterator, | |||
949 | std::random_access_iterator_tag, SDValue, | |||
950 | ptrdiff_t, value_op_iterator *, | |||
951 | value_op_iterator *> { | |||
952 | explicit value_op_iterator(SDUse *U = nullptr) | |||
953 | : iterator_adaptor_base(U) {} | |||
954 | ||||
955 | const SDValue &operator*() const { return I->get(); } | |||
956 | }; | |||
957 | ||||
958 | iterator_range<value_op_iterator> op_values() const { | |||
959 | return make_range(value_op_iterator(op_begin()), | |||
960 | value_op_iterator(op_end())); | |||
961 | } | |||
962 | ||||
963 | SDVTList getVTList() const { | |||
964 | SDVTList X = { ValueList, NumValues }; | |||
965 | return X; | |||
966 | } | |||
967 | ||||
968 | /// If this node has a glue operand, return the node | |||
969 | /// to which the glue operand points. Otherwise return NULL. | |||
970 | SDNode *getGluedNode() const { | |||
971 | if (getNumOperands() != 0 && | |||
972 | getOperand(getNumOperands()-1).getValueType() == MVT::Glue) | |||
973 | return getOperand(getNumOperands()-1).getNode(); | |||
974 | return nullptr; | |||
975 | } | |||
976 | ||||
977 | /// If this node has a glue value with a user, return | |||
978 | /// the user (there is at most one). Otherwise return NULL. | |||
979 | SDNode *getGluedUser() const { | |||
980 | for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI) | |||
981 | if (UI.getUse().get().getValueType() == MVT::Glue) | |||
982 | return *UI; | |||
983 | return nullptr; | |||
984 | } | |||
985 | ||||
986 | const SDNodeFlags getFlags() const { return Flags; } | |||
987 | void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; } | |||
988 | ||||
989 | /// Clear any flags in this node that aren't also set in Flags. | |||
990 | /// If Flags is not in a defined state then this has no effect. | |||
991 | void intersectFlagsWith(const SDNodeFlags Flags); | |||
992 | ||||
993 | /// Return the number of values defined/returned by this operator. | |||
994 | unsigned getNumValues() const { return NumValues; } | |||
995 | ||||
996 | /// Return the type of a specified result. | |||
997 | EVT getValueType(unsigned ResNo) const { | |||
998 | assert(ResNo < NumValues && "Illegal result number!")((ResNo < NumValues && "Illegal result number!") ? static_cast<void> (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 998, __PRETTY_FUNCTION__)); | |||
999 | return ValueList[ResNo]; | |||
1000 | } | |||
1001 | ||||
1002 | /// Return the type of a specified result as a simple type. | |||
1003 | MVT getSimpleValueType(unsigned ResNo) const { | |||
1004 | return getValueType(ResNo).getSimpleVT(); | |||
1005 | } | |||
1006 | ||||
1007 | /// Returns MVT::getSizeInBits(getValueType(ResNo)). | |||
1008 | /// | |||
1009 | /// If the value type is a scalable vector type, the scalable property will | |||
1010 | /// be set and the runtime size will be a positive integer multiple of the | |||
1011 | /// base size. | |||
1012 | TypeSize getValueSizeInBits(unsigned ResNo) const { | |||
1013 | return getValueType(ResNo).getSizeInBits(); | |||
1014 | } | |||
1015 | ||||
1016 | using value_iterator = const EVT *; | |||
1017 | ||||
1018 | value_iterator value_begin() const { return ValueList; } | |||
1019 | value_iterator value_end() const { return ValueList+NumValues; } | |||
1020 | iterator_range<value_iterator> values() const { | |||
1021 | return llvm::make_range(value_begin(), value_end()); | |||
1022 | } | |||
1023 | ||||
1024 | /// Return the opcode of this operation for printing. | |||
1025 | std::string getOperationName(const SelectionDAG *G = nullptr) const; | |||
1026 | static const char* getIndexedModeName(ISD::MemIndexedMode AM); | |||
1027 | void print_types(raw_ostream &OS, const SelectionDAG *G) const; | |||
1028 | void print_details(raw_ostream &OS, const SelectionDAG *G) const; | |||
1029 | void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const; | |||
1030 | void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const; | |||
1031 | ||||
1032 | /// Print a SelectionDAG node and all children down to | |||
1033 | /// the leaves. The given SelectionDAG allows target-specific nodes | |||
1034 | /// to be printed in human-readable form. Unlike printr, this will | |||
1035 | /// print the whole DAG, including children that appear multiple | |||
1036 | /// times. | |||
1037 | /// | |||
1038 | void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const; | |||
1039 | ||||
1040 | /// Print a SelectionDAG node and children up to | |||
1041 | /// depth "depth." The given SelectionDAG allows target-specific | |||
1042 | /// nodes to be printed in human-readable form. Unlike printr, this | |||
1043 | /// will print children that appear multiple times wherever they are | |||
1044 | /// used. | |||
1045 | /// | |||
1046 | void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr, | |||
1047 | unsigned depth = 100) const; | |||
1048 | ||||
1049 | /// Dump this node, for debugging. | |||
1050 | void dump() const; | |||
1051 | ||||
1052 | /// Dump (recursively) this node and its use-def subgraph. | |||
1053 | void dumpr() const; | |||
1054 | ||||
1055 | /// Dump this node, for debugging. | |||
1056 | /// The given SelectionDAG allows target-specific nodes to be printed | |||
1057 | /// in human-readable form. | |||
1058 | void dump(const SelectionDAG *G) const; | |||
1059 | ||||
1060 | /// Dump (recursively) this node and its use-def subgraph. | |||
1061 | /// The given SelectionDAG allows target-specific nodes to be printed | |||
1062 | /// in human-readable form. | |||
1063 | void dumpr(const SelectionDAG *G) const; | |||
1064 | ||||
1065 | /// printrFull to dbgs(). The given SelectionDAG allows | |||
1066 | /// target-specific nodes to be printed in human-readable form. | |||
1067 | /// Unlike dumpr, this will print the whole DAG, including children | |||
1068 | /// that appear multiple times. | |||
1069 | void dumprFull(const SelectionDAG *G = nullptr) const; | |||
1070 | ||||
1071 | /// printrWithDepth to dbgs(). The given | |||
1072 | /// SelectionDAG allows target-specific nodes to be printed in | |||
1073 | /// human-readable form. Unlike dumpr, this will print children | |||
1074 | /// that appear multiple times wherever they are used. | |||
1075 | /// | |||
1076 | void dumprWithDepth(const SelectionDAG *G = nullptr, | |||
1077 | unsigned depth = 100) const; | |||
1078 | ||||
1079 | /// Gather unique data for the node. | |||
1080 | void Profile(FoldingSetNodeID &ID) const; | |||
1081 | ||||
1082 | /// This method should only be used by the SDUse class. | |||
1083 | void addUse(SDUse &U) { U.addToList(&UseList); } | |||
1084 | ||||
1085 | protected: | |||
1086 | static SDVTList getSDVTList(EVT VT) { | |||
1087 | SDVTList Ret = { getValueTypeList(VT), 1 }; | |||
1088 | return Ret; | |||
1089 | } | |||
1090 | ||||
1091 | /// Create an SDNode. | |||
1092 | /// | |||
1093 | /// SDNodes are created without any operands, and never own the operand | |||
1094 | /// storage. To add operands, see SelectionDAG::createOperands. | |||
1095 | SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs) | |||
1096 | : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs), | |||
1097 | IROrder(Order), debugLoc(std::move(dl)) { | |||
1098 | memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits)); | |||
1099 | assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")((debugLoc.hasTrivialDestructor() && "Expected trivial destructor" ) ? static_cast<void> (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1099, __PRETTY_FUNCTION__)); | |||
1100 | assert(NumValues == VTs.NumVTs &&((NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!" ) ? static_cast<void> (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1101, __PRETTY_FUNCTION__)) | |||
1101 | "NumValues wasn't wide enough for its operands!")((NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!" ) ? static_cast<void> (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1101, __PRETTY_FUNCTION__)); | |||
1102 | } | |||
1103 | ||||
1104 | /// Release the operands and set this node to have zero operands. | |||
1105 | void DropOperands(); | |||
1106 | }; | |||
1107 | ||||
1108 | /// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed | |||
1109 | /// into SDNode creation functions. | |||
1110 | /// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted | |||
1111 | /// from the original Instruction, and IROrder is the ordinal position of | |||
1112 | /// the instruction. | |||
1113 | /// When an SDNode is created after the DAG is being built, both DebugLoc and | |||
1114 | /// the IROrder are propagated from the original SDNode. | |||
1115 | /// So SDLoc class provides two constructors besides the default one, one to | |||
1116 | /// be used by the DAGBuilder, the other to be used by others. | |||
1117 | class SDLoc { | |||
1118 | private: | |||
1119 | DebugLoc DL; | |||
1120 | int IROrder = 0; | |||
1121 | ||||
1122 | public: | |||
1123 | SDLoc() = default; | |||
1124 | SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {} | |||
1125 | SDLoc(const SDValue V) : SDLoc(V.getNode()) {} | |||
1126 | SDLoc(const Instruction *I, int Order) : IROrder(Order) { | |||
1127 | assert(Order >= 0 && "bad IROrder")((Order >= 0 && "bad IROrder") ? static_cast<void > (0) : __assert_fail ("Order >= 0 && \"bad IROrder\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1127, __PRETTY_FUNCTION__)); | |||
1128 | if (I) | |||
1129 | DL = I->getDebugLoc(); | |||
1130 | } | |||
1131 | ||||
1132 | unsigned getIROrder() const { return IROrder; } | |||
1133 | const DebugLoc &getDebugLoc() const { return DL; } | |||
1134 | }; | |||
1135 | ||||
1136 | // Define inline functions from the SDValue class. | |||
1137 | ||||
1138 | inline SDValue::SDValue(SDNode *node, unsigned resno) | |||
1139 | : Node(node), ResNo(resno) { | |||
1140 | // Explicitly check for !ResNo to avoid use-after-free, because there are | |||
1141 | // callers that use SDValue(N, 0) with a deleted N to indicate successful | |||
1142 | // combines. | |||
1143 | assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(((!Node || !ResNo || ResNo < Node->getNumValues()) && "Invalid result number for the given node!") ? static_cast< void> (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1144, __PRETTY_FUNCTION__)) | |||
1144 | "Invalid result number for the given node!")(((!Node || !ResNo || ResNo < Node->getNumValues()) && "Invalid result number for the given node!") ? static_cast< void> (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1144, __PRETTY_FUNCTION__)); | |||
1145 | assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")((ResNo < -2U && "Cannot use result numbers reserved for DenseMaps." ) ? static_cast<void> (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1145, __PRETTY_FUNCTION__)); | |||
1146 | } | |||
1147 | ||||
1148 | inline unsigned SDValue::getOpcode() const { | |||
1149 | return Node->getOpcode(); | |||
| ||||
1150 | } | |||
1151 | ||||
1152 | inline EVT SDValue::getValueType() const { | |||
1153 | return Node->getValueType(ResNo); | |||
1154 | } | |||
1155 | ||||
1156 | inline unsigned SDValue::getNumOperands() const { | |||
1157 | return Node->getNumOperands(); | |||
1158 | } | |||
1159 | ||||
1160 | inline const SDValue &SDValue::getOperand(unsigned i) const { | |||
1161 | return Node->getOperand(i); | |||
1162 | } | |||
1163 | ||||
1164 | inline uint64_t SDValue::getConstantOperandVal(unsigned i) const { | |||
1165 | return Node->getConstantOperandVal(i); | |||
1166 | } | |||
1167 | ||||
1168 | inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const { | |||
1169 | return Node->getConstantOperandAPInt(i); | |||
1170 | } | |||
1171 | ||||
1172 | inline bool SDValue::isTargetOpcode() const { | |||
1173 | return Node->isTargetOpcode(); | |||
1174 | } | |||
1175 | ||||
1176 | inline bool SDValue::isTargetMemoryOpcode() const { | |||
1177 | return Node->isTargetMemoryOpcode(); | |||
1178 | } | |||
1179 | ||||
1180 | inline bool SDValue::isMachineOpcode() const { | |||
1181 | return Node->isMachineOpcode(); | |||
1182 | } | |||
1183 | ||||
1184 | inline unsigned SDValue::getMachineOpcode() const { | |||
1185 | return Node->getMachineOpcode(); | |||
1186 | } | |||
1187 | ||||
1188 | inline bool SDValue::isUndef() const { | |||
1189 | return Node->isUndef(); | |||
1190 | } | |||
1191 | ||||
1192 | inline bool SDValue::use_empty() const { | |||
1193 | return !Node->hasAnyUseOfValue(ResNo); | |||
1194 | } | |||
1195 | ||||
1196 | inline bool SDValue::hasOneUse() const { | |||
1197 | return Node->hasNUsesOfValue(1, ResNo); | |||
1198 | } | |||
1199 | ||||
1200 | inline const DebugLoc &SDValue::getDebugLoc() const { | |||
1201 | return Node->getDebugLoc(); | |||
1202 | } | |||
1203 | ||||
1204 | inline void SDValue::dump() const { | |||
1205 | return Node->dump(); | |||
1206 | } | |||
1207 | ||||
1208 | inline void SDValue::dump(const SelectionDAG *G) const { | |||
1209 | return Node->dump(G); | |||
1210 | } | |||
1211 | ||||
1212 | inline void SDValue::dumpr() const { | |||
1213 | return Node->dumpr(); | |||
1214 | } | |||
1215 | ||||
1216 | inline void SDValue::dumpr(const SelectionDAG *G) const { | |||
1217 | return Node->dumpr(G); | |||
1218 | } | |||
1219 | ||||
1220 | // Define inline functions from the SDUse class. | |||
1221 | ||||
1222 | inline void SDUse::set(const SDValue &V) { | |||
1223 | if (Val.getNode()) removeFromList(); | |||
1224 | Val = V; | |||
1225 | if (V.getNode()) V.getNode()->addUse(*this); | |||
1226 | } | |||
1227 | ||||
1228 | inline void SDUse::setInitial(const SDValue &V) { | |||
1229 | Val = V; | |||
1230 | V.getNode()->addUse(*this); | |||
1231 | } | |||
1232 | ||||
1233 | inline void SDUse::setNode(SDNode *N) { | |||
1234 | if (Val.getNode()) removeFromList(); | |||
1235 | Val.setNode(N); | |||
1236 | if (N) N->addUse(*this); | |||
1237 | } | |||
1238 | ||||
1239 | /// This class is used to form a handle around another node that | |||
1240 | /// is persistent and is updated across invocations of replaceAllUsesWith on its | |||
1241 | /// operand. This node should be directly created by end-users and not added to | |||
1242 | /// the AllNodes list. | |||
1243 | class HandleSDNode : public SDNode { | |||
1244 | SDUse Op; | |||
1245 | ||||
1246 | public: | |||
1247 | explicit HandleSDNode(SDValue X) | |||
1248 | : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) { | |||
1249 | // HandleSDNodes are never inserted into the DAG, so they won't be | |||
1250 | // auto-numbered. Use ID 65535 as a sentinel. | |||
1251 | PersistentId = 0xffff; | |||
1252 | ||||
1253 | // Manually set up the operand list. This node type is special in that it's | |||
1254 | // always stack allocated and SelectionDAG does not manage its operands. | |||
1255 | // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not | |||
1256 | // be so special. | |||
1257 | Op.setUser(this); | |||
1258 | Op.setInitial(X); | |||
1259 | NumOperands = 1; | |||
1260 | OperandList = &Op; | |||
1261 | } | |||
1262 | ~HandleSDNode(); | |||
1263 | ||||
1264 | const SDValue &getValue() const { return Op; } | |||
1265 | }; | |||
1266 | ||||
1267 | class AddrSpaceCastSDNode : public SDNode { | |||
1268 | private: | |||
1269 | unsigned SrcAddrSpace; | |||
1270 | unsigned DestAddrSpace; | |||
1271 | ||||
1272 | public: | |||
1273 | AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT, | |||
1274 | unsigned SrcAS, unsigned DestAS); | |||
1275 | ||||
1276 | unsigned getSrcAddressSpace() const { return SrcAddrSpace; } | |||
1277 | unsigned getDestAddressSpace() const { return DestAddrSpace; } | |||
1278 | ||||
1279 | static bool classof(const SDNode *N) { | |||
1280 | return N->getOpcode() == ISD::ADDRSPACECAST; | |||
1281 | } | |||
1282 | }; | |||
1283 | ||||
1284 | /// This is an abstract virtual class for memory operations. | |||
1285 | class MemSDNode : public SDNode { | |||
1286 | private: | |||
1287 | // VT of in-memory value. | |||
1288 | EVT MemoryVT; | |||
1289 | ||||
1290 | protected: | |||
1291 | /// Memory reference information. | |||
1292 | MachineMemOperand *MMO; | |||
1293 | ||||
1294 | public: | |||
1295 | MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
1296 | EVT memvt, MachineMemOperand *MMO); | |||
1297 | ||||
1298 | bool readMem() const { return MMO->isLoad(); } | |||
1299 | bool writeMem() const { return MMO->isStore(); } | |||
1300 | ||||
1301 | /// Returns alignment and volatility of the memory access | |||
1302 | unsigned getOriginalAlignment() const { | |||
1303 | return MMO->getBaseAlignment(); | |||
1304 | } | |||
1305 | unsigned getAlignment() const { | |||
1306 | return MMO->getAlignment(); | |||
1307 | } | |||
1308 | ||||
1309 | /// Return the SubclassData value, without HasDebugValue. This contains an | |||
1310 | /// encoding of the volatile flag, as well as bits used by subclasses. This | |||
1311 | /// function should only be used to compute a FoldingSetNodeID value. | |||
1312 | /// The HasDebugValue bit is masked out because CSE map needs to match | |||
1313 | /// nodes with debug info with nodes without debug info. Same is about | |||
1314 | /// isDivergent bit. | |||
1315 | unsigned getRawSubclassData() const { | |||
1316 | uint16_t Data; | |||
1317 | union { | |||
1318 | char RawSDNodeBits[sizeof(uint16_t)]; | |||
1319 | SDNodeBitfields SDNodeBits; | |||
1320 | }; | |||
1321 | memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits)); | |||
1322 | SDNodeBits.HasDebugValue = 0; | |||
1323 | SDNodeBits.IsDivergent = false; | |||
1324 | memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits)); | |||
1325 | return Data; | |||
1326 | } | |||
1327 | ||||
1328 | bool isVolatile() const { return MemSDNodeBits.IsVolatile; } | |||
1329 | bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; } | |||
1330 | bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; } | |||
1331 | bool isInvariant() const { return MemSDNodeBits.IsInvariant; } | |||
1332 | ||||
1333 | // Returns the offset from the location of the access. | |||
1334 | int64_t getSrcValueOffset() const { return MMO->getOffset(); } | |||
1335 | ||||
1336 | /// Returns the AA info that describes the dereference. | |||
1337 | AAMDNodes getAAInfo() const { return MMO->getAAInfo(); } | |||
1338 | ||||
1339 | /// Returns the Ranges that describes the dereference. | |||
1340 | const MDNode *getRanges() const { return MMO->getRanges(); } | |||
1341 | ||||
1342 | /// Returns the synchronization scope ID for this memory operation. | |||
1343 | SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); } | |||
1344 | ||||
1345 | /// Return the atomic ordering requirements for this memory operation. For | |||
1346 | /// cmpxchg atomic operations, return the atomic ordering requirements when | |||
1347 | /// store occurs. | |||
1348 | AtomicOrdering getOrdering() const { return MMO->getOrdering(); } | |||
1349 | ||||
1350 | /// Return true if the memory operation ordering is Unordered or higher. | |||
1351 | bool isAtomic() const { return MMO->isAtomic(); } | |||
1352 | ||||
1353 | /// Returns true if the memory operation doesn't imply any ordering | |||
1354 | /// constraints on surrounding memory operations beyond the normal memory | |||
1355 | /// aliasing rules. | |||
1356 | bool isUnordered() const { return MMO->isUnordered(); } | |||
1357 | ||||
1358 | /// Returns true if the memory operation is neither atomic or volatile. | |||
1359 | bool isSimple() const { return !isAtomic() && !isVolatile(); } | |||
1360 | ||||
1361 | /// Return the type of the in-memory value. | |||
1362 | EVT getMemoryVT() const { return MemoryVT; } | |||
1363 | ||||
1364 | /// Return a MachineMemOperand object describing the memory | |||
1365 | /// reference performed by operation. | |||
1366 | MachineMemOperand *getMemOperand() const { return MMO; } | |||
1367 | ||||
1368 | const MachinePointerInfo &getPointerInfo() const { | |||
1369 | return MMO->getPointerInfo(); | |||
1370 | } | |||
1371 | ||||
1372 | /// Return the address space for the associated pointer | |||
1373 | unsigned getAddressSpace() const { | |||
1374 | return getPointerInfo().getAddrSpace(); | |||
1375 | } | |||
1376 | ||||
1377 | /// Update this MemSDNode's MachineMemOperand information | |||
1378 | /// to reflect the alignment of NewMMO, if it has a greater alignment. | |||
1379 | /// This must only be used when the new alignment applies to all users of | |||
1380 | /// this MachineMemOperand. | |||
1381 | void refineAlignment(const MachineMemOperand *NewMMO) { | |||
1382 | MMO->refineAlignment(NewMMO); | |||
1383 | } | |||
1384 | ||||
1385 | const SDValue &getChain() const { return getOperand(0); } | |||
1386 | const SDValue &getBasePtr() const { | |||
1387 | return getOperand(getOpcode() == ISD::STORE ? 2 : 1); | |||
1388 | } | |||
1389 | ||||
1390 | // Methods to support isa and dyn_cast | |||
1391 | static bool classof(const SDNode *N) { | |||
1392 | // For some targets, we lower some target intrinsics to a MemIntrinsicNode | |||
1393 | // with either an intrinsic or a target opcode. | |||
1394 | return N->getOpcode() == ISD::LOAD || | |||
1395 | N->getOpcode() == ISD::STORE || | |||
1396 | N->getOpcode() == ISD::PREFETCH || | |||
1397 | N->getOpcode() == ISD::ATOMIC_CMP_SWAP || | |||
1398 | N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS || | |||
1399 | N->getOpcode() == ISD::ATOMIC_SWAP || | |||
1400 | N->getOpcode() == ISD::ATOMIC_LOAD_ADD || | |||
1401 | N->getOpcode() == ISD::ATOMIC_LOAD_SUB || | |||
1402 | N->getOpcode() == ISD::ATOMIC_LOAD_AND || | |||
1403 | N->getOpcode() == ISD::ATOMIC_LOAD_CLR || | |||
1404 | N->getOpcode() == ISD::ATOMIC_LOAD_OR || | |||
1405 | N->getOpcode() == ISD::ATOMIC_LOAD_XOR || | |||
1406 | N->getOpcode() == ISD::ATOMIC_LOAD_NAND || | |||
1407 | N->getOpcode() == ISD::ATOMIC_LOAD_MIN || | |||
1408 | N->getOpcode() == ISD::ATOMIC_LOAD_MAX || | |||
1409 | N->getOpcode() == ISD::ATOMIC_LOAD_UMIN || | |||
1410 | N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || | |||
1411 | N->getOpcode() == ISD::ATOMIC_LOAD_FADD || | |||
1412 | N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || | |||
1413 | N->getOpcode() == ISD::ATOMIC_LOAD || | |||
1414 | N->getOpcode() == ISD::ATOMIC_STORE || | |||
1415 | N->getOpcode() == ISD::MLOAD || | |||
1416 | N->getOpcode() == ISD::MSTORE || | |||
1417 | N->getOpcode() == ISD::MGATHER || | |||
1418 | N->getOpcode() == ISD::MSCATTER || | |||
1419 | N->isMemIntrinsic() || | |||
1420 | N->isTargetMemoryOpcode(); | |||
1421 | } | |||
1422 | }; | |||
1423 | ||||
1424 | /// This is an SDNode representing atomic operations. | |||
1425 | class AtomicSDNode : public MemSDNode { | |||
1426 | public: | |||
1427 | AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL, | |||
1428 | EVT MemVT, MachineMemOperand *MMO) | |||
1429 | : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) { | |||
1430 | assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||((((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE ) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?" ) ? static_cast<void> (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1431, __PRETTY_FUNCTION__)) | |||
1431 | MMO->isAtomic()) && "then why are we using an AtomicSDNode?")((((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE ) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?" ) ? static_cast<void> (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1431, __PRETTY_FUNCTION__)); | |||
1432 | } | |||
1433 | ||||
1434 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
1435 | const SDValue &getVal() const { return getOperand(2); } | |||
1436 | ||||
1437 | /// Returns true if this SDNode represents cmpxchg atomic operation, false | |||
1438 | /// otherwise. | |||
1439 | bool isCompareAndSwap() const { | |||
1440 | unsigned Op = getOpcode(); | |||
1441 | return Op == ISD::ATOMIC_CMP_SWAP || | |||
1442 | Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS; | |||
1443 | } | |||
1444 | ||||
1445 | /// For cmpxchg atomic operations, return the atomic ordering requirements | |||
1446 | /// when store does not occur. | |||
1447 | AtomicOrdering getFailureOrdering() const { | |||
1448 | assert(isCompareAndSwap() && "Must be cmpxchg operation")((isCompareAndSwap() && "Must be cmpxchg operation") ? static_cast<void> (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1448, __PRETTY_FUNCTION__)); | |||
1449 | return MMO->getFailureOrdering(); | |||
1450 | } | |||
1451 | ||||
1452 | // Methods to support isa and dyn_cast | |||
1453 | static bool classof(const SDNode *N) { | |||
1454 | return N->getOpcode() == ISD::ATOMIC_CMP_SWAP || | |||
1455 | N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS || | |||
1456 | N->getOpcode() == ISD::ATOMIC_SWAP || | |||
1457 | N->getOpcode() == ISD::ATOMIC_LOAD_ADD || | |||
1458 | N->getOpcode() == ISD::ATOMIC_LOAD_SUB || | |||
1459 | N->getOpcode() == ISD::ATOMIC_LOAD_AND || | |||
1460 | N->getOpcode() == ISD::ATOMIC_LOAD_CLR || | |||
1461 | N->getOpcode() == ISD::ATOMIC_LOAD_OR || | |||
1462 | N->getOpcode() == ISD::ATOMIC_LOAD_XOR || | |||
1463 | N->getOpcode() == ISD::ATOMIC_LOAD_NAND || | |||
1464 | N->getOpcode() == ISD::ATOMIC_LOAD_MIN || | |||
1465 | N->getOpcode() == ISD::ATOMIC_LOAD_MAX || | |||
1466 | N->getOpcode() == ISD::ATOMIC_LOAD_UMIN || | |||
1467 | N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || | |||
1468 | N->getOpcode() == ISD::ATOMIC_LOAD_FADD || | |||
1469 | N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || | |||
1470 | N->getOpcode() == ISD::ATOMIC_LOAD || | |||
1471 | N->getOpcode() == ISD::ATOMIC_STORE; | |||
1472 | } | |||
1473 | }; | |||
1474 | ||||
1475 | /// This SDNode is used for target intrinsics that touch | |||
1476 | /// memory and need an associated MachineMemOperand. Its opcode may be | |||
1477 | /// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode | |||
1478 | /// with a value not less than FIRST_TARGET_MEMORY_OPCODE. | |||
1479 | class MemIntrinsicSDNode : public MemSDNode { | |||
1480 | public: | |||
1481 | MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, | |||
1482 | SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO) | |||
1483 | : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) { | |||
1484 | SDNodeBits.IsMemIntrinsic = true; | |||
1485 | } | |||
1486 | ||||
1487 | // Methods to support isa and dyn_cast | |||
1488 | static bool classof(const SDNode *N) { | |||
1489 | // We lower some target intrinsics to their target opcode | |||
1490 | // early a node with a target opcode can be of this class | |||
1491 | return N->isMemIntrinsic() || | |||
1492 | N->getOpcode() == ISD::PREFETCH || | |||
1493 | N->isTargetMemoryOpcode(); | |||
1494 | } | |||
1495 | }; | |||
1496 | ||||
1497 | /// This SDNode is used to implement the code generator | |||
1498 | /// support for the llvm IR shufflevector instruction. It combines elements | |||
1499 | /// from two input vectors into a new input vector, with the selection and | |||
1500 | /// ordering of elements determined by an array of integers, referred to as | |||
1501 | /// the shuffle mask. For input vectors of width N, mask indices of 0..N-1 | |||
1502 | /// refer to elements from the LHS input, and indices from N to 2N-1 the RHS. | |||
1503 | /// An index of -1 is treated as undef, such that the code generator may put | |||
1504 | /// any value in the corresponding element of the result. | |||
1505 | class ShuffleVectorSDNode : public SDNode { | |||
1506 | // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and | |||
1507 | // is freed when the SelectionDAG object is destroyed. | |||
1508 | const int *Mask; | |||
1509 | ||||
1510 | protected: | |||
1511 | friend class SelectionDAG; | |||
1512 | ||||
1513 | ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M) | |||
1514 | : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {} | |||
1515 | ||||
1516 | public: | |||
1517 | ArrayRef<int> getMask() const { | |||
1518 | EVT VT = getValueType(0); | |||
1519 | return makeArrayRef(Mask, VT.getVectorNumElements()); | |||
1520 | } | |||
1521 | ||||
1522 | int getMaskElt(unsigned Idx) const { | |||
1523 | assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")((Idx < getValueType(0).getVectorNumElements() && "Idx out of range!" ) ? static_cast<void> (0) : __assert_fail ("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1523, __PRETTY_FUNCTION__)); | |||
1524 | return Mask[Idx]; | |||
1525 | } | |||
1526 | ||||
1527 | bool isSplat() const { return isSplatMask(Mask, getValueType(0)); } | |||
1528 | ||||
1529 | int getSplatIndex() const { | |||
1530 | assert(isSplat() && "Cannot get splat index for non-splat!")((isSplat() && "Cannot get splat index for non-splat!" ) ? static_cast<void> (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1530, __PRETTY_FUNCTION__)); | |||
1531 | EVT VT = getValueType(0); | |||
1532 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) | |||
1533 | if (Mask[i] >= 0) | |||
1534 | return Mask[i]; | |||
1535 | ||||
1536 | // We can choose any index value here and be correct because all elements | |||
1537 | // are undefined. Return 0 for better potential for callers to simplify. | |||
1538 | return 0; | |||
1539 | } | |||
1540 | ||||
1541 | static bool isSplatMask(const int *Mask, EVT VT); | |||
1542 | ||||
1543 | /// Change values in a shuffle permute mask assuming | |||
1544 | /// the two vector operands have swapped position. | |||
1545 | static void commuteMask(MutableArrayRef<int> Mask) { | |||
1546 | unsigned NumElems = Mask.size(); | |||
1547 | for (unsigned i = 0; i != NumElems; ++i) { | |||
1548 | int idx = Mask[i]; | |||
1549 | if (idx < 0) | |||
1550 | continue; | |||
1551 | else if (idx < (int)NumElems) | |||
1552 | Mask[i] = idx + NumElems; | |||
1553 | else | |||
1554 | Mask[i] = idx - NumElems; | |||
1555 | } | |||
1556 | } | |||
1557 | ||||
1558 | static bool classof(const SDNode *N) { | |||
1559 | return N->getOpcode() == ISD::VECTOR_SHUFFLE; | |||
1560 | } | |||
1561 | }; | |||
1562 | ||||
1563 | class ConstantSDNode : public SDNode { | |||
1564 | friend class SelectionDAG; | |||
1565 | ||||
1566 | const ConstantInt *Value; | |||
1567 | ||||
1568 | ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT) | |||
1569 | : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(), | |||
1570 | getSDVTList(VT)), | |||
1571 | Value(val) { | |||
1572 | ConstantSDNodeBits.IsOpaque = isOpaque; | |||
1573 | } | |||
1574 | ||||
1575 | public: | |||
1576 | const ConstantInt *getConstantIntValue() const { return Value; } | |||
1577 | const APInt &getAPIntValue() const { return Value->getValue(); } | |||
1578 | uint64_t getZExtValue() const { return Value->getZExtValue(); } | |||
1579 | int64_t getSExtValue() const { return Value->getSExtValue(); } | |||
1580 | uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) { | |||
1581 | return Value->getLimitedValue(Limit); | |||
1582 | } | |||
1583 | ||||
1584 | bool isOne() const { return Value->isOne(); } | |||
1585 | bool isNullValue() const { return Value->isZero(); } | |||
1586 | bool isAllOnesValue() const { return Value->isMinusOne(); } | |||
1587 | ||||
1588 | bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; } | |||
1589 | ||||
1590 | static bool classof(const SDNode *N) { | |||
1591 | return N->getOpcode() == ISD::Constant || | |||
1592 | N->getOpcode() == ISD::TargetConstant; | |||
1593 | } | |||
1594 | }; | |||
1595 | ||||
1596 | uint64_t SDNode::getConstantOperandVal(unsigned Num) const { | |||
1597 | return cast<ConstantSDNode>(getOperand(Num))->getZExtValue(); | |||
1598 | } | |||
1599 | ||||
1600 | const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const { | |||
1601 | return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue(); | |||
1602 | } | |||
1603 | ||||
1604 | class ConstantFPSDNode : public SDNode { | |||
1605 | friend class SelectionDAG; | |||
1606 | ||||
1607 | const ConstantFP *Value; | |||
1608 | ||||
1609 | ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT) | |||
1610 | : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, | |||
1611 | DebugLoc(), getSDVTList(VT)), | |||
1612 | Value(val) {} | |||
1613 | ||||
1614 | public: | |||
1615 | const APFloat& getValueAPF() const { return Value->getValueAPF(); } | |||
1616 | const ConstantFP *getConstantFPValue() const { return Value; } | |||
1617 | ||||
1618 | /// Return true if the value is positive or negative zero. | |||
1619 | bool isZero() const { return Value->isZero(); } | |||
1620 | ||||
1621 | /// Return true if the value is a NaN. | |||
1622 | bool isNaN() const { return Value->isNaN(); } | |||
1623 | ||||
1624 | /// Return true if the value is an infinity | |||
1625 | bool isInfinity() const { return Value->isInfinity(); } | |||
1626 | ||||
1627 | /// Return true if the value is negative. | |||
1628 | bool isNegative() const { return Value->isNegative(); } | |||
1629 | ||||
1630 | /// We don't rely on operator== working on double values, as | |||
1631 | /// it returns true for things that are clearly not equal, like -0.0 and 0.0. | |||
1632 | /// As such, this method can be used to do an exact bit-for-bit comparison of | |||
1633 | /// two floating point values. | |||
1634 | ||||
1635 | /// We leave the version with the double argument here because it's just so | |||
1636 | /// convenient to write "2.0" and the like. Without this function we'd | |||
1637 | /// have to duplicate its logic everywhere it's called. | |||
1638 | bool isExactlyValue(double V) const { | |||
1639 | return Value->getValueAPF().isExactlyValue(V); | |||
1640 | } | |||
1641 | bool isExactlyValue(const APFloat& V) const; | |||
1642 | ||||
1643 | static bool isValueValidForType(EVT VT, const APFloat& Val); | |||
1644 | ||||
1645 | static bool classof(const SDNode *N) { | |||
1646 | return N->getOpcode() == ISD::ConstantFP || | |||
1647 | N->getOpcode() == ISD::TargetConstantFP; | |||
1648 | } | |||
1649 | }; | |||
1650 | ||||
1651 | /// Returns true if \p V is a constant integer zero. | |||
1652 | bool isNullConstant(SDValue V); | |||
1653 | ||||
1654 | /// Returns true if \p V is an FP constant with a value of positive zero. | |||
1655 | bool isNullFPConstant(SDValue V); | |||
1656 | ||||
1657 | /// Returns true if \p V is an integer constant with all bits set. | |||
1658 | bool isAllOnesConstant(SDValue V); | |||
1659 | ||||
1660 | /// Returns true if \p V is a constant integer one. | |||
1661 | bool isOneConstant(SDValue V); | |||
1662 | ||||
1663 | /// Return the non-bitcasted source operand of \p V if it exists. | |||
1664 | /// If \p V is not a bitcasted value, it is returned as-is. | |||
1665 | SDValue peekThroughBitcasts(SDValue V); | |||
1666 | ||||
1667 | /// Return the non-bitcasted and one-use source operand of \p V if it exists. | |||
1668 | /// If \p V is not a bitcasted one-use value, it is returned as-is. | |||
1669 | SDValue peekThroughOneUseBitcasts(SDValue V); | |||
1670 | ||||
1671 | /// Return the non-extracted vector source operand of \p V if it exists. | |||
1672 | /// If \p V is not an extracted subvector, it is returned as-is. | |||
1673 | SDValue peekThroughExtractSubvectors(SDValue V); | |||
1674 | ||||
1675 | /// Returns true if \p V is a bitwise not operation. Assumes that an all ones | |||
1676 | /// constant is canonicalized to be operand 1. | |||
1677 | bool isBitwiseNot(SDValue V, bool AllowUndefs = false); | |||
1678 | ||||
1679 | /// Returns the SDNode if it is a constant splat BuildVector or constant int. | |||
1680 | ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false, | |||
1681 | bool AllowTruncation = false); | |||
1682 | ||||
1683 | /// Returns the SDNode if it is a demanded constant splat BuildVector or | |||
1684 | /// constant int. | |||
1685 | ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts, | |||
1686 | bool AllowUndefs = false, | |||
1687 | bool AllowTruncation = false); | |||
1688 | ||||
1689 | /// Returns the SDNode if it is a constant splat BuildVector or constant float. | |||
1690 | ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false); | |||
1691 | ||||
1692 | /// Returns the SDNode if it is a demanded constant splat BuildVector or | |||
1693 | /// constant float. | |||
1694 | ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts, | |||
1695 | bool AllowUndefs = false); | |||
1696 | ||||
1697 | /// Return true if the value is a constant 0 integer or a splatted vector of | |||
1698 | /// a constant 0 integer (with no undefs by default). | |||
1699 | /// Build vector implicit truncation is not an issue for null values. | |||
1700 | bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false); | |||
1701 | ||||
1702 | /// Return true if the value is a constant 1 integer or a splatted vector of a | |||
1703 | /// constant 1 integer (with no undefs). | |||
1704 | /// Does not permit build vector implicit truncation. | |||
1705 | bool isOneOrOneSplat(SDValue V); | |||
1706 | ||||
1707 | /// Return true if the value is a constant -1 integer or a splatted vector of a | |||
1708 | /// constant -1 integer (with no undefs). | |||
1709 | /// Does not permit build vector implicit truncation. | |||
1710 | bool isAllOnesOrAllOnesSplat(SDValue V); | |||
1711 | ||||
1712 | class GlobalAddressSDNode : public SDNode { | |||
1713 | friend class SelectionDAG; | |||
1714 | ||||
1715 | const GlobalValue *TheGlobal; | |||
1716 | int64_t Offset; | |||
1717 | unsigned TargetFlags; | |||
1718 | ||||
1719 | GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, | |||
1720 | const GlobalValue *GA, EVT VT, int64_t o, | |||
1721 | unsigned TF); | |||
1722 | ||||
1723 | public: | |||
1724 | const GlobalValue *getGlobal() const { return TheGlobal; } | |||
1725 | int64_t getOffset() const { return Offset; } | |||
1726 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1727 | // Return the address space this GlobalAddress belongs to. | |||
1728 | unsigned getAddressSpace() const; | |||
1729 | ||||
1730 | static bool classof(const SDNode *N) { | |||
1731 | return N->getOpcode() == ISD::GlobalAddress || | |||
1732 | N->getOpcode() == ISD::TargetGlobalAddress || | |||
1733 | N->getOpcode() == ISD::GlobalTLSAddress || | |||
1734 | N->getOpcode() == ISD::TargetGlobalTLSAddress; | |||
1735 | } | |||
1736 | }; | |||
1737 | ||||
1738 | class FrameIndexSDNode : public SDNode { | |||
1739 | friend class SelectionDAG; | |||
1740 | ||||
1741 | int FI; | |||
1742 | ||||
1743 | FrameIndexSDNode(int fi, EVT VT, bool isTarg) | |||
1744 | : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex, | |||
1745 | 0, DebugLoc(), getSDVTList(VT)), FI(fi) { | |||
1746 | } | |||
1747 | ||||
1748 | public: | |||
1749 | int getIndex() const { return FI; } | |||
1750 | ||||
1751 | static bool classof(const SDNode *N) { | |||
1752 | return N->getOpcode() == ISD::FrameIndex || | |||
1753 | N->getOpcode() == ISD::TargetFrameIndex; | |||
1754 | } | |||
1755 | }; | |||
1756 | ||||
1757 | /// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate | |||
1758 | /// the offet and size that are started/ended in the underlying FrameIndex. | |||
1759 | class LifetimeSDNode : public SDNode { | |||
1760 | friend class SelectionDAG; | |||
1761 | int64_t Size; | |||
1762 | int64_t Offset; // -1 if offset is unknown. | |||
1763 | ||||
1764 | LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, | |||
1765 | SDVTList VTs, int64_t Size, int64_t Offset) | |||
1766 | : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {} | |||
1767 | public: | |||
1768 | int64_t getFrameIndex() const { | |||
1769 | return cast<FrameIndexSDNode>(getOperand(1))->getIndex(); | |||
1770 | } | |||
1771 | ||||
1772 | bool hasOffset() const { return Offset >= 0; } | |||
1773 | int64_t getOffset() const { | |||
1774 | assert(hasOffset() && "offset is unknown")((hasOffset() && "offset is unknown") ? static_cast< void> (0) : __assert_fail ("hasOffset() && \"offset is unknown\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1774, __PRETTY_FUNCTION__)); | |||
1775 | return Offset; | |||
1776 | } | |||
1777 | int64_t getSize() const { | |||
1778 | assert(hasOffset() && "offset is unknown")((hasOffset() && "offset is unknown") ? static_cast< void> (0) : __assert_fail ("hasOffset() && \"offset is unknown\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1778, __PRETTY_FUNCTION__)); | |||
1779 | return Size; | |||
1780 | } | |||
1781 | ||||
1782 | // Methods to support isa and dyn_cast | |||
1783 | static bool classof(const SDNode *N) { | |||
1784 | return N->getOpcode() == ISD::LIFETIME_START || | |||
1785 | N->getOpcode() == ISD::LIFETIME_END; | |||
1786 | } | |||
1787 | }; | |||
1788 | ||||
1789 | class JumpTableSDNode : public SDNode { | |||
1790 | friend class SelectionDAG; | |||
1791 | ||||
1792 | int JTI; | |||
1793 | unsigned TargetFlags; | |||
1794 | ||||
1795 | JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF) | |||
1796 | : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable, | |||
1797 | 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) { | |||
1798 | } | |||
1799 | ||||
1800 | public: | |||
1801 | int getIndex() const { return JTI; } | |||
1802 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1803 | ||||
1804 | static bool classof(const SDNode *N) { | |||
1805 | return N->getOpcode() == ISD::JumpTable || | |||
1806 | N->getOpcode() == ISD::TargetJumpTable; | |||
1807 | } | |||
1808 | }; | |||
1809 | ||||
1810 | class ConstantPoolSDNode : public SDNode { | |||
1811 | friend class SelectionDAG; | |||
1812 | ||||
1813 | union { | |||
1814 | const Constant *ConstVal; | |||
1815 | MachineConstantPoolValue *MachineCPVal; | |||
1816 | } Val; | |||
1817 | int Offset; // It's a MachineConstantPoolValue if top bit is set. | |||
1818 | unsigned Alignment; // Minimum alignment requirement of CP (not log2 value). | |||
1819 | unsigned TargetFlags; | |||
1820 | ||||
1821 | ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o, | |||
1822 | unsigned Align, unsigned TF) | |||
1823 | : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, | |||
1824 | DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), | |||
1825 | TargetFlags(TF) { | |||
1826 | assert(Offset >= 0 && "Offset is too large")((Offset >= 0 && "Offset is too large") ? static_cast <void> (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1826, __PRETTY_FUNCTION__)); | |||
1827 | Val.ConstVal = c; | |||
1828 | } | |||
1829 | ||||
1830 | ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, | |||
1831 | EVT VT, int o, unsigned Align, unsigned TF) | |||
1832 | : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, | |||
1833 | DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), | |||
1834 | TargetFlags(TF) { | |||
1835 | assert(Offset >= 0 && "Offset is too large")((Offset >= 0 && "Offset is too large") ? static_cast <void> (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1835, __PRETTY_FUNCTION__)); | |||
1836 | Val.MachineCPVal = v; | |||
1837 | Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1); | |||
1838 | } | |||
1839 | ||||
1840 | public: | |||
1841 | bool isMachineConstantPoolEntry() const { | |||
1842 | return Offset < 0; | |||
1843 | } | |||
1844 | ||||
1845 | const Constant *getConstVal() const { | |||
1846 | assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")((!isMachineConstantPoolEntry() && "Wrong constantpool type" ) ? static_cast<void> (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1846, __PRETTY_FUNCTION__)); | |||
1847 | return Val.ConstVal; | |||
1848 | } | |||
1849 | ||||
1850 | MachineConstantPoolValue *getMachineCPVal() const { | |||
1851 | assert(isMachineConstantPoolEntry() && "Wrong constantpool type")((isMachineConstantPoolEntry() && "Wrong constantpool type" ) ? static_cast<void> (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 1851, __PRETTY_FUNCTION__)); | |||
1852 | return Val.MachineCPVal; | |||
1853 | } | |||
1854 | ||||
1855 | int getOffset() const { | |||
1856 | return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1)); | |||
1857 | } | |||
1858 | ||||
1859 | // Return the alignment of this constant pool object, which is either 0 (for | |||
1860 | // default alignment) or the desired value. | |||
1861 | unsigned getAlignment() const { return Alignment; } | |||
1862 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1863 | ||||
1864 | Type *getType() const; | |||
1865 | ||||
1866 | static bool classof(const SDNode *N) { | |||
1867 | return N->getOpcode() == ISD::ConstantPool || | |||
1868 | N->getOpcode() == ISD::TargetConstantPool; | |||
1869 | } | |||
1870 | }; | |||
1871 | ||||
1872 | /// Completely target-dependent object reference. | |||
1873 | class TargetIndexSDNode : public SDNode { | |||
1874 | friend class SelectionDAG; | |||
1875 | ||||
1876 | unsigned TargetFlags; | |||
1877 | int Index; | |||
1878 | int64_t Offset; | |||
1879 | ||||
1880 | public: | |||
1881 | TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF) | |||
1882 | : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), | |||
1883 | TargetFlags(TF), Index(Idx), Offset(Ofs) {} | |||
1884 | ||||
1885 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1886 | int getIndex() const { return Index; } | |||
1887 | int64_t getOffset() const { return Offset; } | |||
1888 | ||||
1889 | static bool classof(const SDNode *N) { | |||
1890 | return N->getOpcode() == ISD::TargetIndex; | |||
1891 | } | |||
1892 | }; | |||
1893 | ||||
1894 | class BasicBlockSDNode : public SDNode { | |||
1895 | friend class SelectionDAG; | |||
1896 | ||||
1897 | MachineBasicBlock *MBB; | |||
1898 | ||||
1899 | /// Debug info is meaningful and potentially useful here, but we create | |||
1900 | /// blocks out of order when they're jumped to, which makes it a bit | |||
1901 | /// harder. Let's see if we need it first. | |||
1902 | explicit BasicBlockSDNode(MachineBasicBlock *mbb) | |||
1903 | : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb) | |||
1904 | {} | |||
1905 | ||||
1906 | public: | |||
1907 | MachineBasicBlock *getBasicBlock() const { return MBB; } | |||
1908 | ||||
1909 | static bool classof(const SDNode *N) { | |||
1910 | return N->getOpcode() == ISD::BasicBlock; | |||
1911 | } | |||
1912 | }; | |||
1913 | ||||
1914 | /// A "pseudo-class" with methods for operating on BUILD_VECTORs. | |||
1915 | class BuildVectorSDNode : public SDNode { | |||
1916 | public: | |||
1917 | // These are constructed as SDNodes and then cast to BuildVectorSDNodes. | |||
1918 | explicit BuildVectorSDNode() = delete; | |||
1919 | ||||
1920 | /// Check if this is a constant splat, and if so, find the | |||
1921 | /// smallest element size that splats the vector. If MinSplatBits is | |||
1922 | /// nonzero, the element size must be at least that large. Note that the | |||
1923 | /// splat element may be the entire vector (i.e., a one element vector). | |||
1924 | /// Returns the splat element value in SplatValue. Any undefined bits in | |||
1925 | /// that value are zero, and the corresponding bits in the SplatUndef mask | |||
1926 | /// are set. The SplatBitSize value is set to the splat element size in | |||
1927 | /// bits. HasAnyUndefs is set to true if any bits in the vector are | |||
1928 | /// undefined. isBigEndian describes the endianness of the target. | |||
1929 | bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, | |||
1930 | unsigned &SplatBitSize, bool &HasAnyUndefs, | |||
1931 | unsigned MinSplatBits = 0, | |||
1932 | bool isBigEndian = false) const; | |||
1933 | ||||
1934 | /// Returns the demanded splatted value or a null value if this is not a | |||
1935 | /// splat. | |||
1936 | /// | |||
1937 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
1938 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1939 | /// the vector width and set the bits where elements are undef. | |||
1940 | SDValue getSplatValue(const APInt &DemandedElts, | |||
1941 | BitVector *UndefElements = nullptr) const; | |||
1942 | ||||
1943 | /// Returns the splatted value or a null value if this is not a splat. | |||
1944 | /// | |||
1945 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1946 | /// the vector width and set the bits where elements are undef. | |||
1947 | SDValue getSplatValue(BitVector *UndefElements = nullptr) const; | |||
1948 | ||||
1949 | /// Returns the demanded splatted constant or null if this is not a constant | |||
1950 | /// splat. | |||
1951 | /// | |||
1952 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
1953 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1954 | /// the vector width and set the bits where elements are undef. | |||
1955 | ConstantSDNode * | |||
1956 | getConstantSplatNode(const APInt &DemandedElts, | |||
1957 | BitVector *UndefElements = nullptr) const; | |||
1958 | ||||
1959 | /// Returns the splatted constant or null if this is not a constant | |||
1960 | /// splat. | |||
1961 | /// | |||
1962 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1963 | /// the vector width and set the bits where elements are undef. | |||
1964 | ConstantSDNode * | |||
1965 | getConstantSplatNode(BitVector *UndefElements = nullptr) const; | |||
1966 | ||||
1967 | /// Returns the demanded splatted constant FP or null if this is not a | |||
1968 | /// constant FP splat. | |||
1969 | /// | |||
1970 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
1971 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1972 | /// the vector width and set the bits where elements are undef. | |||
1973 | ConstantFPSDNode * | |||
1974 | getConstantFPSplatNode(const APInt &DemandedElts, | |||
1975 | BitVector *UndefElements = nullptr) const; | |||
1976 | ||||
1977 | /// Returns the splatted constant FP or null if this is not a constant | |||
1978 | /// FP splat. | |||
1979 | /// | |||
1980 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1981 | /// the vector width and set the bits where elements are undef. | |||
1982 | ConstantFPSDNode * | |||
1983 | getConstantFPSplatNode(BitVector *UndefElements = nullptr) const; | |||
1984 | ||||
1985 | /// If this is a constant FP splat and the splatted constant FP is an | |||
1986 | /// exact power or 2, return the log base 2 integer value. Otherwise, | |||
1987 | /// return -1. | |||
1988 | /// | |||
1989 | /// The BitWidth specifies the necessary bit precision. | |||
1990 | int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, | |||
1991 | uint32_t BitWidth) const; | |||
1992 | ||||
1993 | bool isConstant() const; | |||
1994 | ||||
1995 | static bool classof(const SDNode *N) { | |||
1996 | return N->getOpcode() == ISD::BUILD_VECTOR; | |||
1997 | } | |||
1998 | }; | |||
1999 | ||||
2000 | /// An SDNode that holds an arbitrary LLVM IR Value. This is | |||
2001 | /// used when the SelectionDAG needs to make a simple reference to something | |||
2002 | /// in the LLVM IR representation. | |||
2003 | /// | |||
2004 | class SrcValueSDNode : public SDNode { | |||
2005 | friend class SelectionDAG; | |||
2006 | ||||
2007 | const Value *V; | |||
2008 | ||||
2009 | /// Create a SrcValue for a general value. | |||
2010 | explicit SrcValueSDNode(const Value *v) | |||
2011 | : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {} | |||
2012 | ||||
2013 | public: | |||
2014 | /// Return the contained Value. | |||
2015 | const Value *getValue() const { return V; } | |||
2016 | ||||
2017 | static bool classof(const SDNode *N) { | |||
2018 | return N->getOpcode() == ISD::SRCVALUE; | |||
2019 | } | |||
2020 | }; | |||
2021 | ||||
2022 | class MDNodeSDNode : public SDNode { | |||
2023 | friend class SelectionDAG; | |||
2024 | ||||
2025 | const MDNode *MD; | |||
2026 | ||||
2027 | explicit MDNodeSDNode(const MDNode *md) | |||
2028 | : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md) | |||
2029 | {} | |||
2030 | ||||
2031 | public: | |||
2032 | const MDNode *getMD() const { return MD; } | |||
2033 | ||||
2034 | static bool classof(const SDNode *N) { | |||
2035 | return N->getOpcode() == ISD::MDNODE_SDNODE; | |||
2036 | } | |||
2037 | }; | |||
2038 | ||||
2039 | class RegisterSDNode : public SDNode { | |||
2040 | friend class SelectionDAG; | |||
2041 | ||||
2042 | unsigned Reg; | |||
2043 | ||||
2044 | RegisterSDNode(unsigned reg, EVT VT) | |||
2045 | : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {} | |||
2046 | ||||
2047 | public: | |||
2048 | unsigned getReg() const { return Reg; } | |||
2049 | ||||
2050 | static bool classof(const SDNode *N) { | |||
2051 | return N->getOpcode() == ISD::Register; | |||
2052 | } | |||
2053 | }; | |||
2054 | ||||
2055 | class RegisterMaskSDNode : public SDNode { | |||
2056 | friend class SelectionDAG; | |||
2057 | ||||
2058 | // The memory for RegMask is not owned by the node. | |||
2059 | const uint32_t *RegMask; | |||
2060 | ||||
2061 | RegisterMaskSDNode(const uint32_t *mask) | |||
2062 | : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)), | |||
2063 | RegMask(mask) {} | |||
2064 | ||||
2065 | public: | |||
2066 | const uint32_t *getRegMask() const { return RegMask; } | |||
2067 | ||||
2068 | static bool classof(const SDNode *N) { | |||
2069 | return N->getOpcode() == ISD::RegisterMask; | |||
2070 | } | |||
2071 | }; | |||
2072 | ||||
2073 | class BlockAddressSDNode : public SDNode { | |||
2074 | friend class SelectionDAG; | |||
2075 | ||||
2076 | const BlockAddress *BA; | |||
2077 | int64_t Offset; | |||
2078 | unsigned TargetFlags; | |||
2079 | ||||
2080 | BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, | |||
2081 | int64_t o, unsigned Flags) | |||
2082 | : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), | |||
2083 | BA(ba), Offset(o), TargetFlags(Flags) {} | |||
2084 | ||||
2085 | public: | |||
2086 | const BlockAddress *getBlockAddress() const { return BA; } | |||
2087 | int64_t getOffset() const { return Offset; } | |||
2088 | unsigned getTargetFlags() const { return TargetFlags; } | |||
2089 | ||||
2090 | static bool classof(const SDNode *N) { | |||
2091 | return N->getOpcode() == ISD::BlockAddress || | |||
2092 | N->getOpcode() == ISD::TargetBlockAddress; | |||
2093 | } | |||
2094 | }; | |||
2095 | ||||
2096 | class LabelSDNode : public SDNode { | |||
2097 | friend class SelectionDAG; | |||
2098 | ||||
2099 | MCSymbol *Label; | |||
2100 | ||||
2101 | LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L) | |||
2102 | : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) { | |||
2103 | assert(LabelSDNode::classof(this) && "not a label opcode")((LabelSDNode::classof(this) && "not a label opcode") ? static_cast<void> (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2103, __PRETTY_FUNCTION__)); | |||
2104 | } | |||
2105 | ||||
2106 | public: | |||
2107 | MCSymbol *getLabel() const { return Label; } | |||
2108 | ||||
2109 | static bool classof(const SDNode *N) { | |||
2110 | return N->getOpcode() == ISD::EH_LABEL || | |||
2111 | N->getOpcode() == ISD::ANNOTATION_LABEL; | |||
2112 | } | |||
2113 | }; | |||
2114 | ||||
2115 | class ExternalSymbolSDNode : public SDNode { | |||
2116 | friend class SelectionDAG; | |||
2117 | ||||
2118 | const char *Symbol; | |||
2119 | unsigned TargetFlags; | |||
2120 | ||||
2121 | ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT) | |||
2122 | : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0, | |||
2123 | DebugLoc(), getSDVTList(VT)), | |||
2124 | Symbol(Sym), TargetFlags(TF) {} | |||
2125 | ||||
2126 | public: | |||
2127 | const char *getSymbol() const { return Symbol; } | |||
2128 | unsigned getTargetFlags() const { return TargetFlags; } | |||
2129 | ||||
2130 | static bool classof(const SDNode *N) { | |||
2131 | return N->getOpcode() == ISD::ExternalSymbol || | |||
2132 | N->getOpcode() == ISD::TargetExternalSymbol; | |||
2133 | } | |||
2134 | }; | |||
2135 | ||||
2136 | class MCSymbolSDNode : public SDNode { | |||
2137 | friend class SelectionDAG; | |||
2138 | ||||
2139 | MCSymbol *Symbol; | |||
2140 | ||||
2141 | MCSymbolSDNode(MCSymbol *Symbol, EVT VT) | |||
2142 | : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {} | |||
2143 | ||||
2144 | public: | |||
2145 | MCSymbol *getMCSymbol() const { return Symbol; } | |||
2146 | ||||
2147 | static bool classof(const SDNode *N) { | |||
2148 | return N->getOpcode() == ISD::MCSymbol; | |||
2149 | } | |||
2150 | }; | |||
2151 | ||||
2152 | class CondCodeSDNode : public SDNode { | |||
2153 | friend class SelectionDAG; | |||
2154 | ||||
2155 | ISD::CondCode Condition; | |||
2156 | ||||
2157 | explicit CondCodeSDNode(ISD::CondCode Cond) | |||
2158 | : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)), | |||
2159 | Condition(Cond) {} | |||
2160 | ||||
2161 | public: | |||
2162 | ISD::CondCode get() const { return Condition; } | |||
2163 | ||||
2164 | static bool classof(const SDNode *N) { | |||
2165 | return N->getOpcode() == ISD::CONDCODE; | |||
2166 | } | |||
2167 | }; | |||
2168 | ||||
2169 | /// This class is used to represent EVT's, which are used | |||
2170 | /// to parameterize some operations. | |||
2171 | class VTSDNode : public SDNode { | |||
2172 | friend class SelectionDAG; | |||
2173 | ||||
2174 | EVT ValueType; | |||
2175 | ||||
2176 | explicit VTSDNode(EVT VT) | |||
2177 | : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)), | |||
2178 | ValueType(VT) {} | |||
2179 | ||||
2180 | public: | |||
2181 | EVT getVT() const { return ValueType; } | |||
2182 | ||||
2183 | static bool classof(const SDNode *N) { | |||
2184 | return N->getOpcode() == ISD::VALUETYPE; | |||
2185 | } | |||
2186 | }; | |||
2187 | ||||
2188 | /// Base class for LoadSDNode and StoreSDNode | |||
2189 | class LSBaseSDNode : public MemSDNode { | |||
2190 | public: | |||
2191 | LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, | |||
2192 | SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT, | |||
2193 | MachineMemOperand *MMO) | |||
2194 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2195 | LSBaseSDNodeBits.AddressingMode = AM; | |||
2196 | assert(getAddressingMode() == AM && "Value truncated")((getAddressingMode() == AM && "Value truncated") ? static_cast <void> (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2196, __PRETTY_FUNCTION__)); | |||
2197 | } | |||
2198 | ||||
2199 | const SDValue &getOffset() const { | |||
2200 | return getOperand(getOpcode() == ISD::LOAD ? 2 : 3); | |||
2201 | } | |||
2202 | ||||
2203 | /// Return the addressing mode for this load or store: | |||
2204 | /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. | |||
2205 | ISD::MemIndexedMode getAddressingMode() const { | |||
2206 | return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); | |||
2207 | } | |||
2208 | ||||
2209 | /// Return true if this is a pre/post inc/dec load/store. | |||
2210 | bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } | |||
2211 | ||||
2212 | /// Return true if this is NOT a pre/post inc/dec load/store. | |||
2213 | bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } | |||
2214 | ||||
2215 | static bool classof(const SDNode *N) { | |||
2216 | return N->getOpcode() == ISD::LOAD || | |||
2217 | N->getOpcode() == ISD::STORE; | |||
2218 | } | |||
2219 | }; | |||
2220 | ||||
2221 | /// This class is used to represent ISD::LOAD nodes. | |||
2222 | class LoadSDNode : public LSBaseSDNode { | |||
2223 | friend class SelectionDAG; | |||
2224 | ||||
2225 | LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2226 | ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT, | |||
2227 | MachineMemOperand *MMO) | |||
2228 | : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) { | |||
2229 | LoadSDNodeBits.ExtTy = ETy; | |||
2230 | assert(readMem() && "Load MachineMemOperand is not a load!")((readMem() && "Load MachineMemOperand is not a load!" ) ? static_cast<void> (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2230, __PRETTY_FUNCTION__)); | |||
2231 | assert(!writeMem() && "Load MachineMemOperand is a store!")((!writeMem() && "Load MachineMemOperand is a store!" ) ? static_cast<void> (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2231, __PRETTY_FUNCTION__)); | |||
2232 | } | |||
2233 | ||||
2234 | public: | |||
2235 | /// Return whether this is a plain node, | |||
2236 | /// or one of the varieties of value-extending loads. | |||
2237 | ISD::LoadExtType getExtensionType() const { | |||
2238 | return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); | |||
2239 | } | |||
2240 | ||||
2241 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
2242 | const SDValue &getOffset() const { return getOperand(2); } | |||
2243 | ||||
2244 | static bool classof(const SDNode *N) { | |||
2245 | return N->getOpcode() == ISD::LOAD; | |||
2246 | } | |||
2247 | }; | |||
2248 | ||||
2249 | /// This class is used to represent ISD::STORE nodes. | |||
2250 | class StoreSDNode : public LSBaseSDNode { | |||
2251 | friend class SelectionDAG; | |||
2252 | ||||
2253 | StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2254 | ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT, | |||
2255 | MachineMemOperand *MMO) | |||
2256 | : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) { | |||
2257 | StoreSDNodeBits.IsTruncating = isTrunc; | |||
2258 | assert(!readMem() && "Store MachineMemOperand is a load!")((!readMem() && "Store MachineMemOperand is a load!") ? static_cast<void> (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2258, __PRETTY_FUNCTION__)); | |||
2259 | assert(writeMem() && "Store MachineMemOperand is not a store!")((writeMem() && "Store MachineMemOperand is not a store!" ) ? static_cast<void> (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2259, __PRETTY_FUNCTION__)); | |||
2260 | } | |||
2261 | ||||
2262 | public: | |||
2263 | /// Return true if the op does a truncation before store. | |||
2264 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2265 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2266 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2267 | void setTruncatingStore(bool Truncating) { | |||
2268 | StoreSDNodeBits.IsTruncating = Truncating; | |||
2269 | } | |||
2270 | ||||
2271 | const SDValue &getValue() const { return getOperand(1); } | |||
2272 | const SDValue &getBasePtr() const { return getOperand(2); } | |||
2273 | const SDValue &getOffset() const { return getOperand(3); } | |||
2274 | ||||
2275 | static bool classof(const SDNode *N) { | |||
2276 | return N->getOpcode() == ISD::STORE; | |||
2277 | } | |||
2278 | }; | |||
2279 | ||||
2280 | /// This base class is used to represent MLOAD and MSTORE nodes | |||
2281 | class MaskedLoadStoreSDNode : public MemSDNode { | |||
2282 | public: | |||
2283 | friend class SelectionDAG; | |||
2284 | ||||
2285 | MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, | |||
2286 | const DebugLoc &dl, SDVTList VTs, | |||
2287 | ISD::MemIndexedMode AM, EVT MemVT, | |||
2288 | MachineMemOperand *MMO) | |||
2289 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2290 | LSBaseSDNodeBits.AddressingMode = AM; | |||
2291 | assert(getAddressingMode() == AM && "Value truncated")((getAddressingMode() == AM && "Value truncated") ? static_cast <void> (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2291, __PRETTY_FUNCTION__)); | |||
2292 | } | |||
2293 | ||||
2294 | // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru) | |||
2295 | // MaskedStoreSDNode (Chain, data, ptr, offset, mask) | |||
2296 | // Mask is a vector of i1 elements | |||
2297 | const SDValue &getBasePtr() const { | |||
2298 | return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2); | |||
2299 | } | |||
2300 | const SDValue &getOffset() const { | |||
2301 | return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3); | |||
2302 | } | |||
2303 | const SDValue &getMask() const { | |||
2304 | return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4); | |||
2305 | } | |||
2306 | ||||
2307 | /// Return the addressing mode for this load or store: | |||
2308 | /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. | |||
2309 | ISD::MemIndexedMode getAddressingMode() const { | |||
2310 | return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); | |||
2311 | } | |||
2312 | ||||
2313 | /// Return true if this is a pre/post inc/dec load/store. | |||
2314 | bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } | |||
2315 | ||||
2316 | /// Return true if this is NOT a pre/post inc/dec load/store. | |||
2317 | bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } | |||
2318 | ||||
2319 | static bool classof(const SDNode *N) { | |||
2320 | return N->getOpcode() == ISD::MLOAD || | |||
2321 | N->getOpcode() == ISD::MSTORE; | |||
2322 | } | |||
2323 | }; | |||
2324 | ||||
2325 | /// This class is used to represent an MLOAD node | |||
2326 | class MaskedLoadSDNode : public MaskedLoadStoreSDNode { | |||
2327 | public: | |||
2328 | friend class SelectionDAG; | |||
2329 | ||||
2330 | MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2331 | ISD::MemIndexedMode AM, ISD::LoadExtType ETy, | |||
2332 | bool IsExpanding, EVT MemVT, MachineMemOperand *MMO) | |||
2333 | : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) { | |||
2334 | LoadSDNodeBits.ExtTy = ETy; | |||
2335 | LoadSDNodeBits.IsExpanding = IsExpanding; | |||
2336 | } | |||
2337 | ||||
2338 | ISD::LoadExtType getExtensionType() const { | |||
2339 | return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); | |||
2340 | } | |||
2341 | ||||
2342 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
2343 | const SDValue &getOffset() const { return getOperand(2); } | |||
2344 | const SDValue &getMask() const { return getOperand(3); } | |||
2345 | const SDValue &getPassThru() const { return getOperand(4); } | |||
2346 | ||||
2347 | static bool classof(const SDNode *N) { | |||
2348 | return N->getOpcode() == ISD::MLOAD; | |||
2349 | } | |||
2350 | ||||
2351 | bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } | |||
2352 | }; | |||
2353 | ||||
2354 | /// This class is used to represent an MSTORE node | |||
2355 | class MaskedStoreSDNode : public MaskedLoadStoreSDNode { | |||
2356 | public: | |||
2357 | friend class SelectionDAG; | |||
2358 | ||||
2359 | MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2360 | ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing, | |||
2361 | EVT MemVT, MachineMemOperand *MMO) | |||
2362 | : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) { | |||
2363 | StoreSDNodeBits.IsTruncating = isTrunc; | |||
2364 | StoreSDNodeBits.IsCompressing = isCompressing; | |||
2365 | } | |||
2366 | ||||
2367 | /// Return true if the op does a truncation before store. | |||
2368 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2369 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2370 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2371 | ||||
2372 | /// Returns true if the op does a compression to the vector before storing. | |||
2373 | /// The node contiguously stores the active elements (integers or floats) | |||
2374 | /// in src (those with their respective bit set in writemask k) to unaligned | |||
2375 | /// memory at base_addr. | |||
2376 | bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } | |||
2377 | ||||
2378 | const SDValue &getValue() const { return getOperand(1); } | |||
2379 | const SDValue &getBasePtr() const { return getOperand(2); } | |||
2380 | const SDValue &getOffset() const { return getOperand(3); } | |||
2381 | const SDValue &getMask() const { return getOperand(4); } | |||
2382 | ||||
2383 | static bool classof(const SDNode *N) { | |||
2384 | return N->getOpcode() == ISD::MSTORE; | |||
2385 | } | |||
2386 | }; | |||
2387 | ||||
2388 | /// This is a base class used to represent | |||
2389 | /// MGATHER and MSCATTER nodes | |||
2390 | /// | |||
2391 | class MaskedGatherScatterSDNode : public MemSDNode { | |||
2392 | public: | |||
2393 | friend class SelectionDAG; | |||
2394 | ||||
2395 | MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, | |||
2396 | const DebugLoc &dl, SDVTList VTs, EVT MemVT, | |||
2397 | MachineMemOperand *MMO, ISD::MemIndexType IndexType) | |||
2398 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2399 | LSBaseSDNodeBits.AddressingMode = IndexType; | |||
2400 | assert(getIndexType() == IndexType && "Value truncated")((getIndexType() == IndexType && "Value truncated") ? static_cast<void> (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2400, __PRETTY_FUNCTION__)); | |||
2401 | } | |||
2402 | ||||
2403 | /// How is Index applied to BasePtr when computing addresses. | |||
2404 | ISD::MemIndexType getIndexType() const { | |||
2405 | return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode); | |||
2406 | } | |||
2407 | bool isIndexScaled() const { | |||
2408 | return (getIndexType() == ISD::SIGNED_SCALED) || | |||
2409 | (getIndexType() == ISD::UNSIGNED_SCALED); | |||
2410 | } | |||
2411 | bool isIndexSigned() const { | |||
2412 | return (getIndexType() == ISD::SIGNED_SCALED) || | |||
2413 | (getIndexType() == ISD::SIGNED_UNSCALED); | |||
2414 | } | |||
2415 | ||||
2416 | // In the both nodes address is Op1, mask is Op2: | |||
2417 | // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) | |||
2418 | // MaskedScatterSDNode (Chain, value, mask, base, index, scale) | |||
2419 | // Mask is a vector of i1 elements | |||
2420 | const SDValue &getBasePtr() const { return getOperand(3); } | |||
2421 | const SDValue &getIndex() const { return getOperand(4); } | |||
2422 | const SDValue &getMask() const { return getOperand(2); } | |||
2423 | const SDValue &getScale() const { return getOperand(5); } | |||
2424 | ||||
2425 | static bool classof(const SDNode *N) { | |||
2426 | return N->getOpcode() == ISD::MGATHER || | |||
2427 | N->getOpcode() == ISD::MSCATTER; | |||
2428 | } | |||
2429 | }; | |||
2430 | ||||
2431 | /// This class is used to represent an MGATHER node | |||
2432 | /// | |||
2433 | class MaskedGatherSDNode : public MaskedGatherScatterSDNode { | |||
2434 | public: | |||
2435 | friend class SelectionDAG; | |||
2436 | ||||
2437 | MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2438 | EVT MemVT, MachineMemOperand *MMO, | |||
2439 | ISD::MemIndexType IndexType) | |||
2440 | : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO, | |||
2441 | IndexType) {} | |||
2442 | ||||
2443 | const SDValue &getPassThru() const { return getOperand(1); } | |||
2444 | ||||
2445 | static bool classof(const SDNode *N) { | |||
2446 | return N->getOpcode() == ISD::MGATHER; | |||
2447 | } | |||
2448 | }; | |||
2449 | ||||
2450 | /// This class is used to represent an MSCATTER node | |||
2451 | /// | |||
2452 | class MaskedScatterSDNode : public MaskedGatherScatterSDNode { | |||
2453 | public: | |||
2454 | friend class SelectionDAG; | |||
2455 | ||||
2456 | MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2457 | EVT MemVT, MachineMemOperand *MMO, | |||
2458 | ISD::MemIndexType IndexType) | |||
2459 | : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO, | |||
2460 | IndexType) {} | |||
2461 | ||||
2462 | const SDValue &getValue() const { return getOperand(1); } | |||
2463 | ||||
2464 | static bool classof(const SDNode *N) { | |||
2465 | return N->getOpcode() == ISD::MSCATTER; | |||
2466 | } | |||
2467 | }; | |||
2468 | ||||
2469 | /// An SDNode that represents everything that will be needed | |||
2470 | /// to construct a MachineInstr. These nodes are created during the | |||
2471 | /// instruction selection proper phase. | |||
2472 | /// | |||
2473 | /// Note that the only supported way to set the `memoperands` is by calling the | |||
2474 | /// `SelectionDAG::setNodeMemRefs` function as the memory management happens | |||
2475 | /// inside the DAG rather than in the node. | |||
2476 | class MachineSDNode : public SDNode { | |||
2477 | private: | |||
2478 | friend class SelectionDAG; | |||
2479 | ||||
2480 | MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs) | |||
2481 | : SDNode(Opc, Order, DL, VTs) {} | |||
2482 | ||||
2483 | // We use a pointer union between a single `MachineMemOperand` pointer and | |||
2484 | // a pointer to an array of `MachineMemOperand` pointers. This is null when | |||
2485 | // the number of these is zero, the single pointer variant used when the | |||
2486 | // number is one, and the array is used for larger numbers. | |||
2487 | // | |||
2488 | // The array is allocated via the `SelectionDAG`'s allocator and so will | |||
2489 | // always live until the DAG is cleaned up and doesn't require ownership here. | |||
2490 | // | |||
2491 | // We can't use something simpler like `TinyPtrVector` here because `SDNode` | |||
2492 | // subclasses aren't managed in a conforming C++ manner. See the comments on | |||
2493 | // `SelectionDAG::MorphNodeTo` which details what all goes on, but the | |||
2494 | // constraint here is that these don't manage memory with their constructor or | |||
2495 | // destructor and can be initialized to a good state even if they start off | |||
2496 | // uninitialized. | |||
2497 | PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {}; | |||
2498 | ||||
2499 | // Note that this could be folded into the above `MemRefs` member if doing so | |||
2500 | // is advantageous at some point. We don't need to store this in most cases. | |||
2501 | // However, at the moment this doesn't appear to make the allocation any | |||
2502 | // smaller and makes the code somewhat simpler to read. | |||
2503 | int NumMemRefs = 0; | |||
2504 | ||||
2505 | public: | |||
2506 | using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator; | |||
2507 | ||||
2508 | ArrayRef<MachineMemOperand *> memoperands() const { | |||
2509 | // Special case the common cases. | |||
2510 | if (NumMemRefs == 0) | |||
2511 | return {}; | |||
2512 | if (NumMemRefs == 1) | |||
2513 | return makeArrayRef(MemRefs.getAddrOfPtr1(), 1); | |||
2514 | ||||
2515 | // Otherwise we have an actual array. | |||
2516 | return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs); | |||
2517 | } | |||
2518 | mmo_iterator memoperands_begin() const { return memoperands().begin(); } | |||
2519 | mmo_iterator memoperands_end() const { return memoperands().end(); } | |||
2520 | bool memoperands_empty() const { return memoperands().empty(); } | |||
2521 | ||||
2522 | /// Clear out the memory reference descriptor list. | |||
2523 | void clearMemRefs() { | |||
2524 | MemRefs = nullptr; | |||
2525 | NumMemRefs = 0; | |||
2526 | } | |||
2527 | ||||
2528 | static bool classof(const SDNode *N) { | |||
2529 | return N->isMachineOpcode(); | |||
2530 | } | |||
2531 | }; | |||
2532 | ||||
2533 | class SDNodeIterator : public std::iterator<std::forward_iterator_tag, | |||
2534 | SDNode, ptrdiff_t> { | |||
2535 | const SDNode *Node; | |||
2536 | unsigned Operand; | |||
2537 | ||||
2538 | SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {} | |||
2539 | ||||
2540 | public: | |||
2541 | bool operator==(const SDNodeIterator& x) const { | |||
2542 | return Operand == x.Operand; | |||
2543 | } | |||
2544 | bool operator!=(const SDNodeIterator& x) const { return !operator==(x); } | |||
2545 | ||||
2546 | pointer operator*() const { | |||
2547 | return Node->getOperand(Operand).getNode(); | |||
2548 | } | |||
2549 | pointer operator->() const { return operator*(); } | |||
2550 | ||||
2551 | SDNodeIterator& operator++() { // Preincrement | |||
2552 | ++Operand; | |||
2553 | return *this; | |||
2554 | } | |||
2555 | SDNodeIterator operator++(int) { // Postincrement | |||
2556 | SDNodeIterator tmp = *this; ++*this; return tmp; | |||
2557 | } | |||
2558 | size_t operator-(SDNodeIterator Other) const { | |||
2559 | assert(Node == Other.Node &&((Node == Other.Node && "Cannot compare iterators of two different nodes!" ) ? static_cast<void> (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2560, __PRETTY_FUNCTION__)) | |||
2560 | "Cannot compare iterators of two different nodes!")((Node == Other.Node && "Cannot compare iterators of two different nodes!" ) ? static_cast<void> (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/SelectionDAGNodes.h" , 2560, __PRETTY_FUNCTION__)); | |||
2561 | return Operand - Other.Operand; | |||
2562 | } | |||
2563 | ||||
2564 | static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); } | |||
2565 | static SDNodeIterator end (const SDNode *N) { | |||
2566 | return SDNodeIterator(N, N->getNumOperands()); | |||
2567 | } | |||
2568 | ||||
2569 | unsigned getOperand() const { return Operand; } | |||
2570 | const SDNode *getNode() const { return Node; } | |||
2571 | }; | |||
2572 | ||||
2573 | template <> struct GraphTraits<SDNode*> { | |||
2574 | using NodeRef = SDNode *; | |||
2575 | using ChildIteratorType = SDNodeIterator; | |||
2576 | ||||
2577 | static NodeRef getEntryNode(SDNode *N) { return N; } | |||
2578 | ||||
2579 | static ChildIteratorType child_begin(NodeRef N) { | |||
2580 | return SDNodeIterator::begin(N); | |||
2581 | } | |||
2582 | ||||
2583 | static ChildIteratorType child_end(NodeRef N) { | |||
2584 | return SDNodeIterator::end(N); | |||
2585 | } | |||
2586 | }; | |||
2587 | ||||
2588 | /// A representation of the largest SDNode, for use in sizeof(). | |||
2589 | /// | |||
2590 | /// This needs to be a union because the largest node differs on 32 bit systems | |||
2591 | /// with 4 and 8 byte pointer alignment, respectively. | |||
2592 | using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode, | |||
2593 | BlockAddressSDNode, | |||
2594 | GlobalAddressSDNode>; | |||
2595 | ||||
2596 | /// The SDNode class with the greatest alignment requirement. | |||
2597 | using MostAlignedSDNode = GlobalAddressSDNode; | |||
2598 | ||||
2599 | namespace ISD { | |||
2600 | ||||
2601 | /// Returns true if the specified node is a non-extending and unindexed load. | |||
2602 | inline bool isNormalLoad(const SDNode *N) { | |||
2603 | const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N); | |||
2604 | return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD && | |||
2605 | Ld->getAddressingMode() == ISD::UNINDEXED; | |||
2606 | } | |||
2607 | ||||
2608 | /// Returns true if the specified node is a non-extending load. | |||
2609 | inline bool isNON_EXTLoad(const SDNode *N) { | |||
2610 | return isa<LoadSDNode>(N) && | |||
2611 | cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD; | |||
2612 | } | |||
2613 | ||||
2614 | /// Returns true if the specified node is a EXTLOAD. | |||
2615 | inline bool isEXTLoad(const SDNode *N) { | |||
2616 | return isa<LoadSDNode>(N) && | |||
2617 | cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD; | |||
2618 | } | |||
2619 | ||||
2620 | /// Returns true if the specified node is a SEXTLOAD. | |||
2621 | inline bool isSEXTLoad(const SDNode *N) { | |||
2622 | return isa<LoadSDNode>(N) && | |||
2623 | cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; | |||
2624 | } | |||
2625 | ||||
2626 | /// Returns true if the specified node is a ZEXTLOAD. | |||
2627 | inline bool isZEXTLoad(const SDNode *N) { | |||
2628 | return isa<LoadSDNode>(N) && | |||
2629 | cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; | |||
2630 | } | |||
2631 | ||||
2632 | /// Returns true if the specified node is an unindexed load. | |||
2633 | inline bool isUNINDEXEDLoad(const SDNode *N) { | |||
2634 | return isa<LoadSDNode>(N) && | |||
2635 | cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; | |||
2636 | } | |||
2637 | ||||
2638 | /// Returns true if the specified node is a non-truncating | |||
2639 | /// and unindexed store. | |||
2640 | inline bool isNormalStore(const SDNode *N) { | |||
2641 | const StoreSDNode *St = dyn_cast<StoreSDNode>(N); | |||
2642 | return St && !St->isTruncatingStore() && | |||
2643 | St->getAddressingMode() == ISD::UNINDEXED; | |||
2644 | } | |||
2645 | ||||
2646 | /// Returns true if the specified node is a non-truncating store. | |||
2647 | inline bool isNON_TRUNCStore(const SDNode *N) { | |||
2648 | return isa<StoreSDNode>(N) && !cast<StoreSDNode>(N)->isTruncatingStore(); | |||
2649 | } | |||
2650 | ||||
2651 | /// Returns true if the specified node is a truncating store. | |||
2652 | inline bool isTRUNCStore(const SDNode *N) { | |||
2653 | return isa<StoreSDNode>(N) && cast<StoreSDNode>(N)->isTruncatingStore(); | |||
2654 | } | |||
2655 | ||||
2656 | /// Returns true if the specified node is an unindexed store. | |||
2657 | inline bool isUNINDEXEDStore(const SDNode *N) { | |||
2658 | return isa<StoreSDNode>(N) && | |||
2659 | cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; | |||
2660 | } | |||
2661 | ||||
2662 | /// Attempt to match a unary predicate against a scalar/splat constant or | |||
2663 | /// every element of a constant BUILD_VECTOR. | |||
2664 | /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. | |||
2665 | bool matchUnaryPredicate(SDValue Op, | |||
2666 | std::function<bool(ConstantSDNode *)> Match, | |||
2667 | bool AllowUndefs = false); | |||
2668 | ||||
2669 | /// Attempt to match a binary predicate against a pair of scalar/splat | |||
2670 | /// constants or every element of a pair of constant BUILD_VECTORs. | |||
2671 | /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. | |||
2672 | /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match. | |||
2673 | bool matchBinaryPredicate( | |||
2674 | SDValue LHS, SDValue RHS, | |||
2675 | std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, | |||
2676 | bool AllowUndefs = false, bool AllowTypeMismatch = false); | |||
2677 | ||||
2678 | /// Returns true if the specified value is the overflow result from one | |||
2679 | /// of the overflow intrinsic nodes. | |||
2680 | inline bool isOverflowIntrOpRes(SDValue Op) { | |||
2681 | unsigned Opc = Op.getOpcode(); | |||
2682 | return (Op.getResNo() == 1 && | |||
2683 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | |||
2684 | Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)); | |||
2685 | } | |||
2686 | ||||
2687 | } // end namespace ISD | |||
2688 | ||||
2689 | } // end namespace llvm | |||
2690 | ||||
2691 | #endif // LLVM_CODEGEN_SELECTIONDAGNODES_H |