Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1149, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelDAGToDAG.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DAG pattern matching instruction selector for X86,
10// converting from a legalized dag to a X86 dag.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86.h"
15#include "X86MachineFunctionInfo.h"
16#include "X86RegisterInfo.h"
17#include "X86Subtarget.h"
18#include "X86TargetMachine.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/IR/ConstantRange.h"
25#include "llvm/IR/Function.h"
26#include "llvm/IR/Instructions.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsX86.h"
29#include "llvm/IR/Type.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/KnownBits.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Support/raw_ostream.h"
35#include "llvm/Target/TargetMachine.h"
36#include "llvm/Target/TargetOptions.h"
37#include <stdint.h>
38using namespace llvm;
39
40#define DEBUG_TYPE"x86-isel" "x86-isel"
41
42STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor")static llvm::Statistic NumLoadMoved = {"x86-isel", "NumLoadMoved"
, "Number of loads moved below TokenFactor"}
;
43
44static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
45 cl::desc("Enable setting constant bits to reduce size of mask immediates"),
46 cl::Hidden);
47
48//===----------------------------------------------------------------------===//
49// Pattern Matcher Implementation
50//===----------------------------------------------------------------------===//
51
52namespace {
53 /// This corresponds to X86AddressMode, but uses SDValue's instead of register
54 /// numbers for the leaves of the matched tree.
55 struct X86ISelAddressMode {
56 enum {
57 RegBase,
58 FrameIndexBase
59 } BaseType;
60
61 // This is really a union, discriminated by BaseType!
62 SDValue Base_Reg;
63 int Base_FrameIndex;
64
65 unsigned Scale;
66 SDValue IndexReg;
67 int32_t Disp;
68 SDValue Segment;
69 const GlobalValue *GV;
70 const Constant *CP;
71 const BlockAddress *BlockAddr;
72 const char *ES;
73 MCSymbol *MCSym;
74 int JT;
75 unsigned Align; // CP alignment.
76 unsigned char SymbolFlags; // X86II::MO_*
77 bool NegateIndex = false;
78
79 X86ISelAddressMode()
80 : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
81 Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
82 MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
83
84 bool hasSymbolicDisplacement() const {
85 return GV != nullptr || CP != nullptr || ES != nullptr ||
86 MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
87 }
88
89 bool hasBaseOrIndexReg() const {
90 return BaseType == FrameIndexBase ||
91 IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
92 }
93
94 /// Return true if this addressing mode is already RIP-relative.
95 bool isRIPRelative() const {
96 if (BaseType != RegBase) return false;
97 if (RegisterSDNode *RegNode =
98 dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
99 return RegNode->getReg() == X86::RIP;
100 return false;
101 }
102
103 void setBaseReg(SDValue Reg) {
104 BaseType = RegBase;
105 Base_Reg = Reg;
106 }
107
108#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
109 void dump(SelectionDAG *DAG = nullptr) {
110 dbgs() << "X86ISelAddressMode " << this << '\n';
111 dbgs() << "Base_Reg ";
112 if (Base_Reg.getNode())
113 Base_Reg.getNode()->dump(DAG);
114 else
115 dbgs() << "nul\n";
116 if (BaseType == FrameIndexBase)
117 dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
118 dbgs() << " Scale " << Scale << '\n'
119 << "IndexReg ";
120 if (NegateIndex)
121 dbgs() << "negate ";
122 if (IndexReg.getNode())
123 IndexReg.getNode()->dump(DAG);
124 else
125 dbgs() << "nul\n";
126 dbgs() << " Disp " << Disp << '\n'
127 << "GV ";
128 if (GV)
129 GV->dump();
130 else
131 dbgs() << "nul";
132 dbgs() << " CP ";
133 if (CP)
134 CP->dump();
135 else
136 dbgs() << "nul";
137 dbgs() << '\n'
138 << "ES ";
139 if (ES)
140 dbgs() << ES;
141 else
142 dbgs() << "nul";
143 dbgs() << " MCSym ";
144 if (MCSym)
145 dbgs() << MCSym;
146 else
147 dbgs() << "nul";
148 dbgs() << " JT" << JT << " Align" << Align << '\n';
149 }
150#endif
151 };
152}
153
154namespace {
155 //===--------------------------------------------------------------------===//
156 /// ISel - X86-specific code to select X86 machine instructions for
157 /// SelectionDAG operations.
158 ///
159 class X86DAGToDAGISel final : public SelectionDAGISel {
160 /// Keep a pointer to the X86Subtarget around so that we can
161 /// make the right decision when generating code for different targets.
162 const X86Subtarget *Subtarget;
163
164 /// If true, selector should try to optimize for code size instead of
165 /// performance.
166 bool OptForSize;
167
168 /// If true, selector should try to optimize for minimum code size.
169 bool OptForMinSize;
170
171 /// Disable direct TLS access through segment registers.
172 bool IndirectTlsSegRefs;
173
174 public:
175 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
176 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
177 OptForMinSize(false), IndirectTlsSegRefs(false) {}
178
179 StringRef getPassName() const override {
180 return "X86 DAG->DAG Instruction Selection";
181 }
182
183 bool runOnMachineFunction(MachineFunction &MF) override {
184 // Reset the subtarget each time through.
185 Subtarget = &MF.getSubtarget<X86Subtarget>();
186 IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
187 "indirect-tls-seg-refs");
188
189 // OptFor[Min]Size are used in pattern predicates that isel is matching.
190 OptForSize = MF.getFunction().hasOptSize();
191 OptForMinSize = MF.getFunction().hasMinSize();
192 assert((!OptForMinSize || OptForSize) &&(((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize"
) ? static_cast<void> (0) : __assert_fail ("(!OptForMinSize || OptForSize) && \"OptForMinSize implies OptForSize\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 193, __PRETTY_FUNCTION__))
193 "OptForMinSize implies OptForSize")(((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize"
) ? static_cast<void> (0) : __assert_fail ("(!OptForMinSize || OptForSize) && \"OptForMinSize implies OptForSize\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 193, __PRETTY_FUNCTION__))
;
194
195 SelectionDAGISel::runOnMachineFunction(MF);
196 return true;
197 }
198
199 void emitFunctionEntryCode() override;
200
201 bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
202
203 void PreprocessISelDAG() override;
204 void PostprocessISelDAG() override;
205
206// Include the pieces autogenerated from the target description.
207#include "X86GenDAGISel.inc"
208
209 private:
210 void Select(SDNode *N) override;
211
212 bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
213 bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
214 bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
215 bool matchAddress(SDValue N, X86ISelAddressMode &AM);
216 bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
217 bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
218 bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
219 unsigned Depth);
220 bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
221 bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
222 SDValue &Scale, SDValue &Index, SDValue &Disp,
223 SDValue &Segment);
224 bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp,
225 SDValue ScaleOp, SDValue &Base, SDValue &Scale,
226 SDValue &Index, SDValue &Disp, SDValue &Segment);
227 bool selectMOV64Imm32(SDValue N, SDValue &Imm);
228 bool selectLEAAddr(SDValue N, SDValue &Base,
229 SDValue &Scale, SDValue &Index, SDValue &Disp,
230 SDValue &Segment);
231 bool selectLEA64_32Addr(SDValue N, SDValue &Base,
232 SDValue &Scale, SDValue &Index, SDValue &Disp,
233 SDValue &Segment);
234 bool selectTLSADDRAddr(SDValue N, SDValue &Base,
235 SDValue &Scale, SDValue &Index, SDValue &Disp,
236 SDValue &Segment);
237 bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
238 SDValue &Base, SDValue &Scale,
239 SDValue &Index, SDValue &Disp,
240 SDValue &Segment,
241 SDValue &NodeWithChain);
242 bool selectRelocImm(SDValue N, SDValue &Op);
243
244 bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
245 SDValue &Base, SDValue &Scale,
246 SDValue &Index, SDValue &Disp,
247 SDValue &Segment);
248
249 // Convenience method where P is also root.
250 bool tryFoldLoad(SDNode *P, SDValue N,
251 SDValue &Base, SDValue &Scale,
252 SDValue &Index, SDValue &Disp,
253 SDValue &Segment) {
254 return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
3
Calling 'X86DAGToDAGISel::tryFoldLoad'
255 }
256
257 bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
258 SDValue &Base, SDValue &Scale,
259 SDValue &Index, SDValue &Disp,
260 SDValue &Segment);
261
262 /// Implement addressing mode selection for inline asm expressions.
263 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
264 unsigned ConstraintID,
265 std::vector<SDValue> &OutOps) override;
266
267 void emitSpecialCodeForMain();
268
269 inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
270 MVT VT, SDValue &Base, SDValue &Scale,
271 SDValue &Index, SDValue &Disp,
272 SDValue &Segment) {
273 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
274 Base = CurDAG->getTargetFrameIndex(
275 AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
276 else if (AM.Base_Reg.getNode())
277 Base = AM.Base_Reg;
278 else
279 Base = CurDAG->getRegister(0, VT);
280
281 Scale = getI8Imm(AM.Scale, DL);
282
283 // Negate the index if needed.
284 if (AM.NegateIndex) {
285 unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
286 SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
287 AM.IndexReg), 0);
288 AM.IndexReg = Neg;
289 }
290
291 if (AM.IndexReg.getNode())
292 Index = AM.IndexReg;
293 else
294 Index = CurDAG->getRegister(0, VT);
295
296 // These are 32-bit even in 64-bit mode since RIP-relative offset
297 // is 32-bit.
298 if (AM.GV)
299 Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
300 MVT::i32, AM.Disp,
301 AM.SymbolFlags);
302 else if (AM.CP)
303 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
304 AM.Align, AM.Disp, AM.SymbolFlags);
305 else if (AM.ES) {
306 assert(!AM.Disp && "Non-zero displacement is ignored with ES.")((!AM.Disp && "Non-zero displacement is ignored with ES."
) ? static_cast<void> (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with ES.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 306, __PRETTY_FUNCTION__))
;
307 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
308 } else if (AM.MCSym) {
309 assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.")((!AM.Disp && "Non-zero displacement is ignored with MCSym."
) ? static_cast<void> (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with MCSym.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 309, __PRETTY_FUNCTION__))
;
310 assert(AM.SymbolFlags == 0 && "oo")((AM.SymbolFlags == 0 && "oo") ? static_cast<void>
(0) : __assert_fail ("AM.SymbolFlags == 0 && \"oo\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 310, __PRETTY_FUNCTION__))
;
311 Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
312 } else if (AM.JT != -1) {
313 assert(!AM.Disp && "Non-zero displacement is ignored with JT.")((!AM.Disp && "Non-zero displacement is ignored with JT."
) ? static_cast<void> (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with JT.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 313, __PRETTY_FUNCTION__))
;
314 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
315 } else if (AM.BlockAddr)
316 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
317 AM.SymbolFlags);
318 else
319 Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
320
321 if (AM.Segment.getNode())
322 Segment = AM.Segment;
323 else
324 Segment = CurDAG->getRegister(0, MVT::i16);
325 }
326
327 // Utility function to determine whether we should avoid selecting
328 // immediate forms of instructions for better code size or not.
329 // At a high level, we'd like to avoid such instructions when
330 // we have similar constants used within the same basic block
331 // that can be kept in a register.
332 //
333 bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
334 uint32_t UseCount = 0;
335
336 // Do not want to hoist if we're not optimizing for size.
337 // TODO: We'd like to remove this restriction.
338 // See the comment in X86InstrInfo.td for more info.
339 if (!CurDAG->shouldOptForSize())
340 return false;
341
342 // Walk all the users of the immediate.
343 for (SDNode::use_iterator UI = N->use_begin(),
344 UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
345
346 SDNode *User = *UI;
347
348 // This user is already selected. Count it as a legitimate use and
349 // move on.
350 if (User->isMachineOpcode()) {
351 UseCount++;
352 continue;
353 }
354
355 // We want to count stores of immediates as real uses.
356 if (User->getOpcode() == ISD::STORE &&
357 User->getOperand(1).getNode() == N) {
358 UseCount++;
359 continue;
360 }
361
362 // We don't currently match users that have > 2 operands (except
363 // for stores, which are handled above)
364 // Those instruction won't match in ISEL, for now, and would
365 // be counted incorrectly.
366 // This may change in the future as we add additional instruction
367 // types.
368 if (User->getNumOperands() != 2)
369 continue;
370
371 // If this can match to INC/DEC, don't count it as a use.
372 if (User->getOpcode() == ISD::ADD &&
373 (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0))))
374 continue;
375
376 // Immediates that are used for offsets as part of stack
377 // manipulation should be left alone. These are typically
378 // used to indicate SP offsets for argument passing and
379 // will get pulled into stores/pushes (implicitly).
380 if (User->getOpcode() == X86ISD::ADD ||
381 User->getOpcode() == ISD::ADD ||
382 User->getOpcode() == X86ISD::SUB ||
383 User->getOpcode() == ISD::SUB) {
384
385 // Find the other operand of the add/sub.
386 SDValue OtherOp = User->getOperand(0);
387 if (OtherOp.getNode() == N)
388 OtherOp = User->getOperand(1);
389
390 // Don't count if the other operand is SP.
391 RegisterSDNode *RegNode;
392 if (OtherOp->getOpcode() == ISD::CopyFromReg &&
393 (RegNode = dyn_cast_or_null<RegisterSDNode>(
394 OtherOp->getOperand(1).getNode())))
395 if ((RegNode->getReg() == X86::ESP) ||
396 (RegNode->getReg() == X86::RSP))
397 continue;
398 }
399
400 // ... otherwise, count this and move on.
401 UseCount++;
402 }
403
404 // If we have more than 1 use, then recommend for hoisting.
405 return (UseCount > 1);
406 }
407
408 /// Return a target constant with the specified value of type i8.
409 inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
410 return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
411 }
412
413 /// Return a target constant with the specified value, of type i32.
414 inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
415 return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
416 }
417
418 /// Return a target constant with the specified value, of type i64.
419 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
420 return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
421 }
422
423 SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
424 const SDLoc &DL) {
425 assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width")(((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"
) ? static_cast<void> (0) : __assert_fail ("(VecWidth == 128 || VecWidth == 256) && \"Unexpected vector width\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 425, __PRETTY_FUNCTION__))
;
426 uint64_t Index = N->getConstantOperandVal(1);
427 MVT VecVT = N->getOperand(0).getSimpleValueType();
428 return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
429 }
430
431 SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
432 const SDLoc &DL) {
433 assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width")(((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"
) ? static_cast<void> (0) : __assert_fail ("(VecWidth == 128 || VecWidth == 256) && \"Unexpected vector width\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 433, __PRETTY_FUNCTION__))
;
434 uint64_t Index = N->getConstantOperandVal(2);
435 MVT VecVT = N->getSimpleValueType(0);
436 return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
437 }
438
439 // Helper to detect unneeded and instructions on shift amounts. Called
440 // from PatFrags in tablegen.
441 bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
442 assert(N->getOpcode() == ISD::AND && "Unexpected opcode")((N->getOpcode() == ISD::AND && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 442, __PRETTY_FUNCTION__))
;
443 const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
444
445 if (Val.countTrailingOnes() >= Width)
446 return true;
447
448 APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
449 return Mask.countTrailingOnes() >= Width;
450 }
451
452 /// Return an SDNode that returns the value of the global base register.
453 /// Output instructions required to initialize the global base register,
454 /// if necessary.
455 SDNode *getGlobalBaseReg();
456
457 /// Return a reference to the TargetMachine, casted to the target-specific
458 /// type.
459 const X86TargetMachine &getTargetMachine() const {
460 return static_cast<const X86TargetMachine &>(TM);
461 }
462
463 /// Return a reference to the TargetInstrInfo, casted to the target-specific
464 /// type.
465 const X86InstrInfo *getInstrInfo() const {
466 return Subtarget->getInstrInfo();
467 }
468
469 /// Address-mode matching performs shift-of-and to and-of-shift
470 /// reassociation in order to expose more scaled addressing
471 /// opportunities.
472 bool ComplexPatternFuncMutatesDAG() const override {
473 return true;
474 }
475
476 bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
477
478 /// Returns whether this is a relocatable immediate in the range
479 /// [-2^Width .. 2^Width-1].
480 template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
481 if (auto *CN = dyn_cast<ConstantSDNode>(N))
482 return isInt<Width>(CN->getSExtValue());
483 return isSExtAbsoluteSymbolRef(Width, N);
484 }
485
486 // Indicates we should prefer to use a non-temporal load for this load.
487 bool useNonTemporalLoad(LoadSDNode *N) const {
488 if (!N->isNonTemporal())
489 return false;
490
491 unsigned StoreSize = N->getMemoryVT().getStoreSize();
492
493 if (N->getAlignment() < StoreSize)
494 return false;
495
496 switch (StoreSize) {
497 default: llvm_unreachable("Unsupported store size")::llvm::llvm_unreachable_internal("Unsupported store size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 497)
;
498 case 4:
499 case 8:
500 return false;
501 case 16:
502 return Subtarget->hasSSE41();
503 case 32:
504 return Subtarget->hasAVX2();
505 case 64:
506 return Subtarget->hasAVX512();
507 }
508 }
509
510 bool foldLoadStoreIntoMemOperand(SDNode *Node);
511 MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
512 bool matchBitExtract(SDNode *Node);
513 bool shrinkAndImmediate(SDNode *N);
514 bool isMaskZeroExtended(SDNode *N) const;
515 bool tryShiftAmountMod(SDNode *N);
516 bool tryShrinkShlLogicImm(SDNode *N);
517 bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
518 bool tryMatchBitSelect(SDNode *N);
519
520 MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
521 const SDLoc &dl, MVT VT, SDNode *Node);
522 MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
523 const SDLoc &dl, MVT VT, SDNode *Node,
524 SDValue &InFlag);
525
526 bool tryOptimizeRem8Extend(SDNode *N);
527
528 bool onlyUsesZeroFlag(SDValue Flags) const;
529 bool hasNoSignFlagUses(SDValue Flags) const;
530 bool hasNoCarryFlagUses(SDValue Flags) const;
531 };
532}
533
534
535// Returns true if this masked compare can be implemented legally with this
536// type.
537static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
538 unsigned Opcode = N->getOpcode();
539 if (Opcode == X86ISD::CMPM || Opcode == X86ISD::STRICT_CMPM ||
540 Opcode == ISD::SETCC || Opcode == X86ISD::CMPM_SAE ||
541 Opcode == X86ISD::VFPCLASS) {
542 // We can get 256-bit 8 element types here without VLX being enabled. When
543 // this happens we will use 512-bit operations and the mask will not be
544 // zero extended.
545 EVT OpVT = N->getOperand(0).getValueType();
546 // The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the
547 // second operand.
548 if (Opcode == X86ISD::STRICT_CMPM)
549 OpVT = N->getOperand(1).getValueType();
550 if (OpVT.is256BitVector() || OpVT.is128BitVector())
551 return Subtarget->hasVLX();
552
553 return true;
554 }
555 // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
556 if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
557 Opcode == X86ISD::FSETCCM_SAE)
558 return true;
559
560 return false;
561}
562
563// Returns true if we can assume the writer of the mask has zero extended it
564// for us.
565bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
566 // If this is an AND, check if we have a compare on either side. As long as
567 // one side guarantees the mask is zero extended, the AND will preserve those
568 // zeros.
569 if (N->getOpcode() == ISD::AND)
570 return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
571 isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
572
573 return isLegalMaskCompare(N, Subtarget);
574}
575
576bool
577X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
578 if (OptLevel == CodeGenOpt::None) return false;
6
Assuming field 'OptLevel' is not equal to None
7
Taking false branch
579
580 if (!N.hasOneUse())
8
Assuming the condition is false
9
Taking false branch
581 return false;
582
583 if (N.getOpcode() != ISD::LOAD)
10
Assuming the condition is false
11
Taking false branch
584 return true;
585
586 // Don't fold non-temporal loads if we have an instruction for them.
587 if (useNonTemporalLoad(cast<LoadSDNode>(N)))
12
Taking false branch
588 return false;
589
590 // If N is a load, do additional profitability checks.
591 if (U
12.1
'U' is equal to 'Root'
12.1
'U' is equal to 'Root'
12.1
'U' is equal to 'Root'
== Root) {
13
Taking true branch
592 switch (U->getOpcode()) {
14
Control jumps to 'case XOR:' at line 605
593 default: break;
594 case X86ISD::ADD:
595 case X86ISD::ADC:
596 case X86ISD::SUB:
597 case X86ISD::SBB:
598 case X86ISD::AND:
599 case X86ISD::XOR:
600 case X86ISD::OR:
601 case ISD::ADD:
602 case ISD::ADDCARRY:
603 case ISD::AND:
604 case ISD::OR:
605 case ISD::XOR: {
606 SDValue Op1 = U->getOperand(1);
15
Value assigned to 'Op1.Node'
607
608 // If the other operand is a 8-bit immediate we should fold the immediate
609 // instead. This reduces code size.
610 // e.g.
611 // movl 4(%esp), %eax
612 // addl $4, %eax
613 // vs.
614 // movl $4, %eax
615 // addl 4(%esp), %eax
616 // The former is 2 bytes shorter. In case where the increment is 1, then
617 // the saving can be 4 bytes (by using incl %eax).
618 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
16
Calling 'dyn_cast<llvm::ConstantSDNode, llvm::SDValue>'
31
Returning from 'dyn_cast<llvm::ConstantSDNode, llvm::SDValue>'
32
Assuming 'Imm' is null
33
Assuming pointer value is null
34
Taking false branch
619 if (Imm->getAPIntValue().isSignedIntN(8))
620 return false;
621
622 // If this is a 64-bit AND with an immediate that fits in 32-bits,
623 // prefer using the smaller and over folding the load. This is needed to
624 // make sure immediates created by shrinkAndImmediate are always folded.
625 // Ideally we would narrow the load during DAG combine and get the
626 // best of both worlds.
627 if (U->getOpcode() == ISD::AND &&
628 Imm->getAPIntValue().getBitWidth() == 64 &&
629 Imm->getAPIntValue().isIntN(32))
630 return false;
631
632 // If this really a zext_inreg that can be represented with a movzx
633 // instruction, prefer that.
634 // TODO: We could shrink the load and fold if it is non-volatile.
635 if (U->getOpcode() == ISD::AND &&
636 (Imm->getAPIntValue() == UINT8_MAX(255) ||
637 Imm->getAPIntValue() == UINT16_MAX(65535) ||
638 Imm->getAPIntValue() == UINT32_MAX(4294967295U)))
639 return false;
640
641 // ADD/SUB with can negate the immediate and use the opposite operation
642 // to fit 128 into a sign extended 8 bit immediate.
643 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
644 (-Imm->getAPIntValue()).isSignedIntN(8))
645 return false;
646
647 if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) &&
648 (-Imm->getAPIntValue()).isSignedIntN(8) &&
649 hasNoCarryFlagUses(SDValue(U, 1)))
650 return false;
651 }
652
653 // If the other operand is a TLS address, we should fold it instead.
654 // This produces
655 // movl %gs:0, %eax
656 // leal i@NTPOFF(%eax), %eax
657 // instead of
658 // movl $i@NTPOFF, %eax
659 // addl %gs:0, %eax
660 // if the block also has an access to a second TLS address this will save
661 // a load.
662 // FIXME: This is probably also true for non-TLS addresses.
663 if (Op1.getOpcode() == X86ISD::Wrapper) {
35
Calling 'SDValue::getOpcode'
664 SDValue Val = Op1.getOperand(0);
665 if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
666 return false;
667 }
668
669 // Don't fold load if this matches the BTS/BTR/BTC patterns.
670 // BTS: (or X, (shl 1, n))
671 // BTR: (and X, (rotl -2, n))
672 // BTC: (xor X, (shl 1, n))
673 if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
674 if (U->getOperand(0).getOpcode() == ISD::SHL &&
675 isOneConstant(U->getOperand(0).getOperand(0)))
676 return false;
677
678 if (U->getOperand(1).getOpcode() == ISD::SHL &&
679 isOneConstant(U->getOperand(1).getOperand(0)))
680 return false;
681 }
682 if (U->getOpcode() == ISD::AND) {
683 SDValue U0 = U->getOperand(0);
684 SDValue U1 = U->getOperand(1);
685 if (U0.getOpcode() == ISD::ROTL) {
686 auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
687 if (C && C->getSExtValue() == -2)
688 return false;
689 }
690
691 if (U1.getOpcode() == ISD::ROTL) {
692 auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
693 if (C && C->getSExtValue() == -2)
694 return false;
695 }
696 }
697
698 break;
699 }
700 case ISD::SHL:
701 case ISD::SRA:
702 case ISD::SRL:
703 // Don't fold a load into a shift by immediate. The BMI2 instructions
704 // support folding a load, but not an immediate. The legacy instructions
705 // support folding an immediate, but can't fold a load. Folding an
706 // immediate is preferable to folding a load.
707 if (isa<ConstantSDNode>(U->getOperand(1)))
708 return false;
709
710 break;
711 }
712 }
713
714 // Prevent folding a load if this can implemented with an insert_subreg or
715 // a move that implicitly zeroes.
716 if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
717 isNullConstant(Root->getOperand(2)) &&
718 (Root->getOperand(0).isUndef() ||
719 ISD::isBuildVectorAllZeros(Root->getOperand(0).getNode())))
720 return false;
721
722 return true;
723}
724
725/// Replace the original chain operand of the call with
726/// load's chain operand and move load below the call's chain operand.
727static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
728 SDValue Call, SDValue OrigChain) {
729 SmallVector<SDValue, 8> Ops;
730 SDValue Chain = OrigChain.getOperand(0);
731 if (Chain.getNode() == Load.getNode())
732 Ops.push_back(Load.getOperand(0));
733 else {
734 assert(Chain.getOpcode() == ISD::TokenFactor &&((Chain.getOpcode() == ISD::TokenFactor && "Unexpected chain operand"
) ? static_cast<void> (0) : __assert_fail ("Chain.getOpcode() == ISD::TokenFactor && \"Unexpected chain operand\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 735, __PRETTY_FUNCTION__))
735 "Unexpected chain operand")((Chain.getOpcode() == ISD::TokenFactor && "Unexpected chain operand"
) ? static_cast<void> (0) : __assert_fail ("Chain.getOpcode() == ISD::TokenFactor && \"Unexpected chain operand\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 735, __PRETTY_FUNCTION__))
;
736 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
737 if (Chain.getOperand(i).getNode() == Load.getNode())
738 Ops.push_back(Load.getOperand(0));
739 else
740 Ops.push_back(Chain.getOperand(i));
741 SDValue NewChain =
742 CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
743 Ops.clear();
744 Ops.push_back(NewChain);
745 }
746 Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
747 CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
748 CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
749 Load.getOperand(1), Load.getOperand(2));
750
751 Ops.clear();
752 Ops.push_back(SDValue(Load.getNode(), 1));
753 Ops.append(Call->op_begin() + 1, Call->op_end());
754 CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
755}
756
757/// Return true if call address is a load and it can be
758/// moved below CALLSEQ_START and the chains leading up to the call.
759/// Return the CALLSEQ_START by reference as a second output.
760/// In the case of a tail call, there isn't a callseq node between the call
761/// chain and the load.
762static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
763 // The transformation is somewhat dangerous if the call's chain was glued to
764 // the call. After MoveBelowOrigChain the load is moved between the call and
765 // the chain, this can create a cycle if the load is not folded. So it is
766 // *really* important that we are sure the load will be folded.
767 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
768 return false;
769 LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
770 if (!LD ||
771 !LD->isSimple() ||
772 LD->getAddressingMode() != ISD::UNINDEXED ||
773 LD->getExtensionType() != ISD::NON_EXTLOAD)
774 return false;
775
776 // Now let's find the callseq_start.
777 while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
778 if (!Chain.hasOneUse())
779 return false;
780 Chain = Chain.getOperand(0);
781 }
782
783 if (!Chain.getNumOperands())
784 return false;
785 // Since we are not checking for AA here, conservatively abort if the chain
786 // writes to memory. It's not safe to move the callee (a load) across a store.
787 if (isa<MemSDNode>(Chain.getNode()) &&
788 cast<MemSDNode>(Chain.getNode())->writeMem())
789 return false;
790 if (Chain.getOperand(0).getNode() == Callee.getNode())
791 return true;
792 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
793 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
794 Callee.getValue(1).hasOneUse())
795 return true;
796 return false;
797}
798
799void X86DAGToDAGISel::PreprocessISelDAG() {
800 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
801 E = CurDAG->allnodes_end(); I != E; ) {
802 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
803
804 // If this is a target specific AND node with no flag usages, turn it back
805 // into ISD::AND to enable test instruction matching.
806 if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
807 SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
808 N->getOperand(0), N->getOperand(1));
809 --I;
810 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
811 ++I;
812 CurDAG->DeleteNode(N);
813 continue;
814 }
815
816 /// Convert vector increment or decrement to sub/add with an all-ones
817 /// constant:
818 /// add X, <1, 1...> --> sub X, <-1, -1...>
819 /// sub X, <1, 1...> --> add X, <-1, -1...>
820 /// The all-ones vector constant can be materialized using a pcmpeq
821 /// instruction that is commonly recognized as an idiom (has no register
822 /// dependency), so that's better/smaller than loading a splat 1 constant.
823 if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
824 N->getSimpleValueType(0).isVector()) {
825
826 APInt SplatVal;
827 if (X86::isConstantSplat(N->getOperand(1), SplatVal) &&
828 SplatVal.isOneValue()) {
829 SDLoc DL(N);
830
831 MVT VT = N->getSimpleValueType(0);
832 unsigned NumElts = VT.getSizeInBits() / 32;
833 SDValue AllOnes =
834 CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts));
835 AllOnes = CurDAG->getBitcast(VT, AllOnes);
836
837 unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
838 SDValue Res =
839 CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes);
840 --I;
841 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
842 ++I;
843 CurDAG->DeleteNode(N);
844 continue;
845 }
846 }
847
848 switch (N->getOpcode()) {
849 case ISD::FP_ROUND:
850 case ISD::STRICT_FP_ROUND:
851 case ISD::FP_TO_SINT:
852 case ISD::FP_TO_UINT:
853 case ISD::STRICT_FP_TO_SINT:
854 case ISD::STRICT_FP_TO_UINT: {
855 // Replace vector fp_to_s/uint with their X86 specific equivalent so we
856 // don't need 2 sets of patterns.
857 if (!N->getSimpleValueType(0).isVector())
858 break;
859
860 unsigned NewOpc;
861 switch (N->getOpcode()) {
862 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 862)
;
863 case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break;
864 case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break;
865 case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break;
866 case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
867 case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break;
868 case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
869 }
870 SDValue Res;
871 if (N->isStrictFPOpcode())
872 Res =
873 CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other},
874 {N->getOperand(0), N->getOperand(1)});
875 else
876 Res =
877 CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
878 N->getOperand(0));
879 --I;
880 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
881 ++I;
882 CurDAG->DeleteNode(N);
883 continue;
884 }
885 case ISD::SHL:
886 case ISD::SRA:
887 case ISD::SRL: {
888 // Replace vector shifts with their X86 specific equivalent so we don't
889 // need 2 sets of patterns.
890 if (!N->getValueType(0).isVector())
891 break;
892
893 unsigned NewOpc;
894 switch (N->getOpcode()) {
895 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 895)
;
896 case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
897 case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
898 case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
899 }
900 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
901 N->getOperand(0), N->getOperand(1));
902 --I;
903 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
904 ++I;
905 CurDAG->DeleteNode(N);
906 continue;
907 }
908 case ISD::ANY_EXTEND:
909 case ISD::ANY_EXTEND_VECTOR_INREG: {
910 // Replace vector any extend with the zero extend equivalents so we don't
911 // need 2 sets of patterns. Ignore vXi1 extensions.
912 if (!N->getValueType(0).isVector())
913 break;
914
915 unsigned NewOpc;
916 if (N->getOperand(0).getScalarValueSizeInBits() == 1) {
917 assert(N->getOpcode() == ISD::ANY_EXTEND &&((N->getOpcode() == ISD::ANY_EXTEND && "Unexpected opcode for mask vector!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::ANY_EXTEND && \"Unexpected opcode for mask vector!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 918, __PRETTY_FUNCTION__))
918 "Unexpected opcode for mask vector!")((N->getOpcode() == ISD::ANY_EXTEND && "Unexpected opcode for mask vector!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::ANY_EXTEND && \"Unexpected opcode for mask vector!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 918, __PRETTY_FUNCTION__))
;
919 NewOpc = ISD::SIGN_EXTEND;
920 } else {
921 NewOpc = N->getOpcode() == ISD::ANY_EXTEND
922 ? ISD::ZERO_EXTEND
923 : ISD::ZERO_EXTEND_VECTOR_INREG;
924 }
925
926 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
927 N->getOperand(0));
928 --I;
929 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
930 ++I;
931 CurDAG->DeleteNode(N);
932 continue;
933 }
934 case ISD::FCEIL:
935 case ISD::STRICT_FCEIL:
936 case ISD::FFLOOR:
937 case ISD::STRICT_FFLOOR:
938 case ISD::FTRUNC:
939 case ISD::STRICT_FTRUNC:
940 case ISD::FNEARBYINT:
941 case ISD::STRICT_FNEARBYINT:
942 case ISD::FRINT:
943 case ISD::STRICT_FRINT: {
944 // Replace fp rounding with their X86 specific equivalent so we don't
945 // need 2 sets of patterns.
946 unsigned Imm;
947 switch (N->getOpcode()) {
948 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 948)
;
949 case ISD::STRICT_FCEIL:
950 case ISD::FCEIL: Imm = 0xA; break;
951 case ISD::STRICT_FFLOOR:
952 case ISD::FFLOOR: Imm = 0x9; break;
953 case ISD::STRICT_FTRUNC:
954 case ISD::FTRUNC: Imm = 0xB; break;
955 case ISD::STRICT_FNEARBYINT:
956 case ISD::FNEARBYINT: Imm = 0xC; break;
957 case ISD::STRICT_FRINT:
958 case ISD::FRINT: Imm = 0x4; break;
959 }
960 SDLoc dl(N);
961 bool IsStrict = N->isStrictFPOpcode();
962 SDValue Res;
963 if (IsStrict)
964 Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
965 {N->getValueType(0), MVT::Other},
966 {N->getOperand(0), N->getOperand(1),
967 CurDAG->getTargetConstant(Imm, dl, MVT::i8)});
968 else
969 Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
970 N->getOperand(0),
971 CurDAG->getTargetConstant(Imm, dl, MVT::i8));
972 --I;
973 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
974 ++I;
975 CurDAG->DeleteNode(N);
976 continue;
977 }
978 case X86ISD::FANDN:
979 case X86ISD::FAND:
980 case X86ISD::FOR:
981 case X86ISD::FXOR: {
982 // Widen scalar fp logic ops to vector to reduce isel patterns.
983 // FIXME: Can we do this during lowering/combine.
984 MVT VT = N->getSimpleValueType(0);
985 if (VT.isVector() || VT == MVT::f128)
986 break;
987
988 MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
989 SDLoc dl(N);
990 SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
991 N->getOperand(0));
992 SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
993 N->getOperand(1));
994
995 SDValue Res;
996 if (Subtarget->hasSSE2()) {
997 EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
998 Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
999 Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
1000 unsigned Opc;
1001 switch (N->getOpcode()) {
1002 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1002)
;
1003 case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
1004 case X86ISD::FAND: Opc = ISD::AND; break;
1005 case X86ISD::FOR: Opc = ISD::OR; break;
1006 case X86ISD::FXOR: Opc = ISD::XOR; break;
1007 }
1008 Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
1009 Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
1010 } else {
1011 Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
1012 }
1013 Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
1014 CurDAG->getIntPtrConstant(0, dl));
1015 --I;
1016 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1017 ++I;
1018 CurDAG->DeleteNode(N);
1019 continue;
1020 }
1021 }
1022
1023 if (OptLevel != CodeGenOpt::None &&
1024 // Only do this when the target can fold the load into the call or
1025 // jmp.
1026 !Subtarget->useRetpolineIndirectCalls() &&
1027 ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
1028 (N->getOpcode() == X86ISD::TC_RETURN &&
1029 (Subtarget->is64Bit() ||
1030 !getTargetMachine().isPositionIndependent())))) {
1031 /// Also try moving call address load from outside callseq_start to just
1032 /// before the call to allow it to be folded.
1033 ///
1034 /// [Load chain]
1035 /// ^
1036 /// |
1037 /// [Load]
1038 /// ^ ^
1039 /// | |
1040 /// / \--
1041 /// / |
1042 ///[CALLSEQ_START] |
1043 /// ^ |
1044 /// | |
1045 /// [LOAD/C2Reg] |
1046 /// | |
1047 /// \ /
1048 /// \ /
1049 /// [CALL]
1050 bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
1051 SDValue Chain = N->getOperand(0);
1052 SDValue Load = N->getOperand(1);
1053 if (!isCalleeLoad(Load, Chain, HasCallSeq))
1054 continue;
1055 moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
1056 ++NumLoadMoved;
1057 continue;
1058 }
1059
1060 // Lower fpround and fpextend nodes that target the FP stack to be store and
1061 // load to the stack. This is a gross hack. We would like to simply mark
1062 // these as being illegal, but when we do that, legalize produces these when
1063 // it expands calls, then expands these in the same legalize pass. We would
1064 // like dag combine to be able to hack on these between the call expansion
1065 // and the node legalization. As such this pass basically does "really
1066 // late" legalization of these inline with the X86 isel pass.
1067 // FIXME: This should only happen when not compiled with -O0.
1068 switch (N->getOpcode()) {
1069 default: continue;
1070 case ISD::FP_ROUND:
1071 case ISD::FP_EXTEND:
1072 {
1073 MVT SrcVT = N->getOperand(0).getSimpleValueType();
1074 MVT DstVT = N->getSimpleValueType(0);
1075
1076 // If any of the sources are vectors, no fp stack involved.
1077 if (SrcVT.isVector() || DstVT.isVector())
1078 continue;
1079
1080 // If the source and destination are SSE registers, then this is a legal
1081 // conversion that should not be lowered.
1082 const X86TargetLowering *X86Lowering =
1083 static_cast<const X86TargetLowering *>(TLI);
1084 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1085 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1086 if (SrcIsSSE && DstIsSSE)
1087 continue;
1088
1089 if (!SrcIsSSE && !DstIsSSE) {
1090 // If this is an FPStack extension, it is a noop.
1091 if (N->getOpcode() == ISD::FP_EXTEND)
1092 continue;
1093 // If this is a value-preserving FPStack truncation, it is a noop.
1094 if (N->getConstantOperandVal(1))
1095 continue;
1096 }
1097
1098 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1099 // FPStack has extload and truncstore. SSE can fold direct loads into other
1100 // operations. Based on this, decide what we want to do.
1101 MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT;
1102 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1103 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1104 MachinePointerInfo MPI =
1105 MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
1106 SDLoc dl(N);
1107
1108 // FIXME: optimize the case where the src/dest is a load or store?
1109
1110 SDValue Store = CurDAG->getTruncStore(
1111 CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT);
1112 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store,
1113 MemTmp, MPI, MemVT);
1114
1115 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1116 // extload we created. This will cause general havok on the dag because
1117 // anything below the conversion could be folded into other existing nodes.
1118 // To avoid invalidating 'I', back it up to the convert node.
1119 --I;
1120 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1121 break;
1122 }
1123
1124 //The sequence of events for lowering STRICT_FP versions of these nodes requires
1125 //dealing with the chain differently, as there is already a preexisting chain.
1126 case ISD::STRICT_FP_ROUND:
1127 case ISD::STRICT_FP_EXTEND:
1128 {
1129 MVT SrcVT = N->getOperand(1).getSimpleValueType();
1130 MVT DstVT = N->getSimpleValueType(0);
1131
1132 // If any of the sources are vectors, no fp stack involved.
1133 if (SrcVT.isVector() || DstVT.isVector())
1134 continue;
1135
1136 // If the source and destination are SSE registers, then this is a legal
1137 // conversion that should not be lowered.
1138 const X86TargetLowering *X86Lowering =
1139 static_cast<const X86TargetLowering *>(TLI);
1140 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1141 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1142 if (SrcIsSSE && DstIsSSE)
1143 continue;
1144
1145 if (!SrcIsSSE && !DstIsSSE) {
1146 // If this is an FPStack extension, it is a noop.
1147 if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
1148 continue;
1149 // If this is a value-preserving FPStack truncation, it is a noop.
1150 if (N->getConstantOperandVal(2))
1151 continue;
1152 }
1153
1154 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1155 // FPStack has extload and truncstore. SSE can fold direct loads into other
1156 // operations. Based on this, decide what we want to do.
1157 MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT;
1158 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1159 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1160 MachinePointerInfo MPI =
1161 MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
1162 SDLoc dl(N);
1163
1164 // FIXME: optimize the case where the src/dest is a load or store?
1165
1166 //Since the operation is StrictFP, use the preexisting chain.
1167 SDValue Store, Result;
1168 if (!SrcIsSSE) {
1169 SDVTList VTs = CurDAG->getVTList(MVT::Other);
1170 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp};
1171 Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT,
1172 MPI, /*Align*/ 0,
1173 MachineMemOperand::MOStore);
1174 if (N->getFlags().hasNoFPExcept()) {
1175 SDNodeFlags Flags = Store->getFlags();
1176 Flags.setNoFPExcept(true);
1177 Store->setFlags(Flags);
1178 }
1179 } else {
1180 assert(SrcVT == MemVT && "Unexpected VT!")((SrcVT == MemVT && "Unexpected VT!") ? static_cast<
void> (0) : __assert_fail ("SrcVT == MemVT && \"Unexpected VT!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1180, __PRETTY_FUNCTION__))
;
1181 Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp,
1182 MPI);
1183 }
1184
1185 if (!DstIsSSE) {
1186 SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other);
1187 SDValue Ops[] = {Store, MemTmp};
1188 Result =
1189 CurDAG->getMemIntrinsicNode(X86ISD::FLD, dl, VTs, Ops, MemVT, MPI,
1190 /*Align*/ 0, MachineMemOperand::MOLoad);
1191 if (N->getFlags().hasNoFPExcept()) {
1192 SDNodeFlags Flags = Result->getFlags();
1193 Flags.setNoFPExcept(true);
1194 Result->setFlags(Flags);
1195 }
1196 } else {
1197 assert(DstVT == MemVT && "Unexpected VT!")((DstVT == MemVT && "Unexpected VT!") ? static_cast<
void> (0) : __assert_fail ("DstVT == MemVT && \"Unexpected VT!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1197, __PRETTY_FUNCTION__))
;
1198 Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI);
1199 }
1200
1201 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1202 // extload we created. This will cause general havok on the dag because
1203 // anything below the conversion could be folded into other existing nodes.
1204 // To avoid invalidating 'I', back it up to the convert node.
1205 --I;
1206 CurDAG->ReplaceAllUsesWith(N, Result.getNode());
1207 break;
1208 }
1209 }
1210
1211
1212 // Now that we did that, the node is dead. Increment the iterator to the
1213 // next node to process, then delete N.
1214 ++I;
1215 CurDAG->DeleteNode(N);
1216 }
1217
1218 // The load+call transform above can leave some dead nodes in the graph. Make
1219 // sure we remove them. Its possible some of the other transforms do to so
1220 // just remove dead nodes unconditionally.
1221 CurDAG->RemoveDeadNodes();
1222}
1223
1224// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
1225bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
1226 unsigned Opc = N->getMachineOpcode();
1227 if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1228 Opc != X86::MOVSX64rr8)
1229 return false;
1230
1231 SDValue N0 = N->getOperand(0);
1232
1233 // We need to be extracting the lower bit of an extend.
1234 if (!N0.isMachineOpcode() ||
1235 N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
1236 N0.getConstantOperandVal(1) != X86::sub_8bit)
1237 return false;
1238
1239 // We're looking for either a movsx or movzx to match the original opcode.
1240 unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1241 : X86::MOVSX32rr8_NOREX;
1242 SDValue N00 = N0.getOperand(0);
1243 if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
1244 return false;
1245
1246 if (Opc == X86::MOVSX64rr8) {
1247 // If we had a sign extend from 8 to 64 bits. We still need to go from 32
1248 // to 64.
1249 MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
1250 MVT::i64, N00);
1251 ReplaceUses(N, Extend);
1252 } else {
1253 // Ok we can drop this extend and just use the original extend.
1254 ReplaceUses(N, N00.getNode());
1255 }
1256
1257 return true;
1258}
1259
1260void X86DAGToDAGISel::PostprocessISelDAG() {
1261 // Skip peepholes at -O0.
1262 if (TM.getOptLevel() == CodeGenOpt::None)
1263 return;
1264
1265 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
1266
1267 bool MadeChange = false;
1268 while (Position != CurDAG->allnodes_begin()) {
1269 SDNode *N = &*--Position;
1270 // Skip dead nodes and any non-machine opcodes.
1271 if (N->use_empty() || !N->isMachineOpcode())
1272 continue;
1273
1274 if (tryOptimizeRem8Extend(N)) {
1275 MadeChange = true;
1276 continue;
1277 }
1278
1279 // Look for a TESTrr+ANDrr pattern where both operands of the test are
1280 // the same. Rewrite to remove the AND.
1281 unsigned Opc = N->getMachineOpcode();
1282 if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
1283 Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
1284 N->getOperand(0) == N->getOperand(1) &&
1285 N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1286 N->getOperand(0).isMachineOpcode()) {
1287 SDValue And = N->getOperand(0);
1288 unsigned N0Opc = And.getMachineOpcode();
1289 if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
1290 N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
1291 MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
1292 MVT::i32,
1293 And.getOperand(0),
1294 And.getOperand(1));
1295 ReplaceUses(N, Test);
1296 MadeChange = true;
1297 continue;
1298 }
1299 if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
1300 N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
1301 unsigned NewOpc;
1302 switch (N0Opc) {
1303 case X86::AND8rm: NewOpc = X86::TEST8mr; break;
1304 case X86::AND16rm: NewOpc = X86::TEST16mr; break;
1305 case X86::AND32rm: NewOpc = X86::TEST32mr; break;
1306 case X86::AND64rm: NewOpc = X86::TEST64mr; break;
1307 }
1308
1309 // Need to swap the memory and register operand.
1310 SDValue Ops[] = { And.getOperand(1),
1311 And.getOperand(2),
1312 And.getOperand(3),
1313 And.getOperand(4),
1314 And.getOperand(5),
1315 And.getOperand(0),
1316 And.getOperand(6) /* Chain */ };
1317 MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1318 MVT::i32, MVT::Other, Ops);
1319 ReplaceUses(N, Test);
1320 MadeChange = true;
1321 continue;
1322 }
1323 }
1324
1325 // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
1326 // used. We're doing this late so we can prefer to fold the AND into masked
1327 // comparisons. Doing that can be better for the live range of the mask
1328 // register.
1329 if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1330 Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1331 N->getOperand(0) == N->getOperand(1) &&
1332 N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1333 N->getOperand(0).isMachineOpcode() &&
1334 onlyUsesZeroFlag(SDValue(N, 0))) {
1335 SDValue And = N->getOperand(0);
1336 unsigned N0Opc = And.getMachineOpcode();
1337 // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1338 // KAND instructions and KTEST use the same ISA feature.
1339 if (N0Opc == X86::KANDBrr ||
1340 (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1341 N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1342 unsigned NewOpc;
1343 switch (Opc) {
1344 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1344)
;
1345 case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
1346 case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
1347 case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
1348 case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
1349 }
1350 MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1351 MVT::i32,
1352 And.getOperand(0),
1353 And.getOperand(1));
1354 ReplaceUses(N, KTest);
1355 MadeChange = true;
1356 continue;
1357 }
1358 }
1359
1360 // Attempt to remove vectors moves that were inserted to zero upper bits.
1361 if (Opc != TargetOpcode::SUBREG_TO_REG)
1362 continue;
1363
1364 unsigned SubRegIdx = N->getConstantOperandVal(2);
1365 if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1366 continue;
1367
1368 SDValue Move = N->getOperand(1);
1369 if (!Move.isMachineOpcode())
1370 continue;
1371
1372 // Make sure its one of the move opcodes we recognize.
1373 switch (Move.getMachineOpcode()) {
1374 default:
1375 continue;
1376 case X86::VMOVAPDrr: case X86::VMOVUPDrr:
1377 case X86::VMOVAPSrr: case X86::VMOVUPSrr:
1378 case X86::VMOVDQArr: case X86::VMOVDQUrr:
1379 case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
1380 case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
1381 case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
1382 case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
1383 case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
1384 case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
1385 case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
1386 case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
1387 case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
1388 case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
1389 case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
1390 break;
1391 }
1392
1393 SDValue In = Move.getOperand(0);
1394 if (!In.isMachineOpcode() ||
1395 In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1396 continue;
1397
1398 // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1399 // the SHA instructions which use a legacy encoding.
1400 uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1401 if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1402 (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1403 (TSFlags & X86II::EncodingMask) != X86II::XOP)
1404 continue;
1405
1406 // Producing instruction is another vector instruction. We can drop the
1407 // move.
1408 CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1409 MadeChange = true;
1410 }
1411
1412 if (MadeChange)
1413 CurDAG->RemoveDeadNodes();
1414}
1415
1416
1417/// Emit any code that needs to be executed only in the main function.
1418void X86DAGToDAGISel::emitSpecialCodeForMain() {
1419 if (Subtarget->isTargetCygMing()) {
1420 TargetLowering::ArgListTy Args;
1421 auto &DL = CurDAG->getDataLayout();
1422
1423 TargetLowering::CallLoweringInfo CLI(*CurDAG);
1424 CLI.setChain(CurDAG->getRoot())
1425 .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1426 CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1427 std::move(Args));
1428 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1429 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1430 CurDAG->setRoot(Result.second);
1431 }
1432}
1433
1434void X86DAGToDAGISel::emitFunctionEntryCode() {
1435 // If this is main, emit special code for main.
1436 const Function &F = MF->getFunction();
1437 if (F.hasExternalLinkage() && F.getName() == "main")
1438 emitSpecialCodeForMain();
1439}
1440
1441static bool isDispSafeForFrameIndex(int64_t Val) {
1442 // On 64-bit platforms, we can run into an issue where a frame index
1443 // includes a displacement that, when added to the explicit displacement,
1444 // will overflow the displacement field. Assuming that the frame index
1445 // displacement fits into a 31-bit integer (which is only slightly more
1446 // aggressive than the current fundamental assumption that it fits into
1447 // a 32-bit integer), a 31-bit disp should always be safe.
1448 return isInt<31>(Val);
1449}
1450
1451bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1452 X86ISelAddressMode &AM) {
1453 // We may have already matched a displacement and the caller just added the
1454 // symbolic displacement. So we still need to do the checks even if Offset
1455 // is zero.
1456
1457 int64_t Val = AM.Disp + Offset;
1458
1459 // Cannot combine ExternalSymbol displacements with integer offsets.
1460 if (Val != 0 && (AM.ES || AM.MCSym))
1461 return true;
1462
1463 CodeModel::Model M = TM.getCodeModel();
1464 if (Subtarget->is64Bit()) {
1465 if (Val != 0 &&
1466 !X86::isOffsetSuitableForCodeModel(Val, M,
1467 AM.hasSymbolicDisplacement()))
1468 return true;
1469 // In addition to the checks required for a register base, check that
1470 // we do not try to use an unsafe Disp with a frame index.
1471 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1472 !isDispSafeForFrameIndex(Val))
1473 return true;
1474 }
1475 AM.Disp = Val;
1476 return false;
1477
1478}
1479
1480bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1481 SDValue Address = N->getOperand(1);
1482
1483 // load gs:0 -> GS segment register.
1484 // load fs:0 -> FS segment register.
1485 //
1486 // This optimization is valid because the GNU TLS model defines that
1487 // gs:0 (or fs:0 on X86-64) contains its own address.
1488 // For more information see http://people.redhat.com/drepper/tls.pdf
1489 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1490 if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1491 !IndirectTlsSegRefs &&
1492 (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1493 Subtarget->isTargetFuchsia()))
1494 switch (N->getPointerInfo().getAddrSpace()) {
1495 case 256:
1496 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1497 return false;
1498 case 257:
1499 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1500 return false;
1501 // Address space 258 is not handled here, because it is not used to
1502 // address TLS areas.
1503 }
1504
1505 return true;
1506}
1507
1508/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1509/// mode. These wrap things that will resolve down into a symbol reference.
1510/// If no match is possible, this returns true, otherwise it returns false.
1511bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1512 // If the addressing mode already has a symbol as the displacement, we can
1513 // never match another symbol.
1514 if (AM.hasSymbolicDisplacement())
1515 return true;
1516
1517 bool IsRIPRelTLS = false;
1518 bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1519 if (IsRIPRel) {
1520 SDValue Val = N.getOperand(0);
1521 if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
1522 IsRIPRelTLS = true;
1523 }
1524
1525 // We can't use an addressing mode in the 64-bit large code model.
1526 // Global TLS addressing is an exception. In the medium code model,
1527 // we use can use a mode when RIP wrappers are present.
1528 // That signifies access to globals that are known to be "near",
1529 // such as the GOT itself.
1530 CodeModel::Model M = TM.getCodeModel();
1531 if (Subtarget->is64Bit() &&
1532 ((M == CodeModel::Large && !IsRIPRelTLS) ||
1533 (M == CodeModel::Medium && !IsRIPRel)))
1534 return true;
1535
1536 // Base and index reg must be 0 in order to use %rip as base.
1537 if (IsRIPRel && AM.hasBaseOrIndexReg())
1538 return true;
1539
1540 // Make a local copy in case we can't do this fold.
1541 X86ISelAddressMode Backup = AM;
1542
1543 int64_t Offset = 0;
1544 SDValue N0 = N.getOperand(0);
1545 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1546 AM.GV = G->getGlobal();
1547 AM.SymbolFlags = G->getTargetFlags();
1548 Offset = G->getOffset();
1549 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1550 AM.CP = CP->getConstVal();
1551 AM.Align = CP->getAlignment();
1552 AM.SymbolFlags = CP->getTargetFlags();
1553 Offset = CP->getOffset();
1554 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1555 AM.ES = S->getSymbol();
1556 AM.SymbolFlags = S->getTargetFlags();
1557 } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1558 AM.MCSym = S->getMCSymbol();
1559 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1560 AM.JT = J->getIndex();
1561 AM.SymbolFlags = J->getTargetFlags();
1562 } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1563 AM.BlockAddr = BA->getBlockAddress();
1564 AM.SymbolFlags = BA->getTargetFlags();
1565 Offset = BA->getOffset();
1566 } else
1567 llvm_unreachable("Unhandled symbol reference node.")::llvm::llvm_unreachable_internal("Unhandled symbol reference node."
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1567)
;
1568
1569 if (foldOffsetIntoAddress(Offset, AM)) {
1570 AM = Backup;
1571 return true;
1572 }
1573
1574 if (IsRIPRel)
1575 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1576
1577 // Commit the changes now that we know this fold is safe.
1578 return false;
1579}
1580
1581/// Add the specified node to the specified addressing mode, returning true if
1582/// it cannot be done. This just pattern matches for the addressing mode.
1583bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1584 if (matchAddressRecursively(N, AM, 0))
1585 return true;
1586
1587 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1588 // a smaller encoding and avoids a scaled-index.
1589 if (AM.Scale == 2 &&
1590 AM.BaseType == X86ISelAddressMode::RegBase &&
1591 AM.Base_Reg.getNode() == nullptr) {
1592 AM.Base_Reg = AM.IndexReg;
1593 AM.Scale = 1;
1594 }
1595
1596 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1597 // because it has a smaller encoding.
1598 // TODO: Which other code models can use this?
1599 switch (TM.getCodeModel()) {
1600 default: break;
1601 case CodeModel::Small:
1602 case CodeModel::Kernel:
1603 if (Subtarget->is64Bit() &&
1604 AM.Scale == 1 &&
1605 AM.BaseType == X86ISelAddressMode::RegBase &&
1606 AM.Base_Reg.getNode() == nullptr &&
1607 AM.IndexReg.getNode() == nullptr &&
1608 AM.SymbolFlags == X86II::MO_NO_FLAG &&
1609 AM.hasSymbolicDisplacement())
1610 AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1611 break;
1612 }
1613
1614 return false;
1615}
1616
1617bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
1618 unsigned Depth) {
1619 // Add an artificial use to this node so that we can keep track of
1620 // it if it gets CSE'd with a different node.
1621 HandleSDNode Handle(N);
1622
1623 X86ISelAddressMode Backup = AM;
1624 if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1625 !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1626 return false;
1627 AM = Backup;
1628
1629 // Try again after commutating the operands.
1630 if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM,
1631 Depth + 1) &&
1632 !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1))
1633 return false;
1634 AM = Backup;
1635
1636 // If we couldn't fold both operands into the address at the same time,
1637 // see if we can just put each operand into a register and fold at least
1638 // the add.
1639 if (AM.BaseType == X86ISelAddressMode::RegBase &&
1640 !AM.Base_Reg.getNode() &&
1641 !AM.IndexReg.getNode()) {
1642 N = Handle.getValue();
1643 AM.Base_Reg = N.getOperand(0);
1644 AM.IndexReg = N.getOperand(1);
1645 AM.Scale = 1;
1646 return false;
1647 }
1648 N = Handle.getValue();
1649 return true;
1650}
1651
1652// Insert a node into the DAG at least before the Pos node's position. This
1653// will reposition the node as needed, and will assign it a node ID that is <=
1654// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1655// IDs! The selection DAG must no longer depend on their uniqueness when this
1656// is used.
1657static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1658 if (N->getNodeId() == -1 ||
1659 (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) >
1660 SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) {
1661 DAG.RepositionNode(Pos->getIterator(), N.getNode());
1662 // Mark Node as invalid for pruning as after this it may be a successor to a
1663 // selected node but otherwise be in the same position of Pos.
1664 // Conservatively mark it with the same -abs(Id) to assure node id
1665 // invariant is preserved.
1666 N->setNodeId(Pos->getNodeId());
1667 SelectionDAGISel::InvalidateNodeId(N.getNode());
1668 }
1669}
1670
1671// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1672// safe. This allows us to convert the shift and and into an h-register
1673// extract and a scaled index. Returns false if the simplification is
1674// performed.
1675static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
1676 uint64_t Mask,
1677 SDValue Shift, SDValue X,
1678 X86ISelAddressMode &AM) {
1679 if (Shift.getOpcode() != ISD::SRL ||
1680 !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1681 !Shift.hasOneUse())
1682 return true;
1683
1684 int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1685 if (ScaleLog <= 0 || ScaleLog >= 4 ||
1686 Mask != (0xffu << ScaleLog))
1687 return true;
1688
1689 MVT VT = N.getSimpleValueType();
1690 SDLoc DL(N);
1691 SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1692 SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1693 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1694 SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1695 SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1696 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1697
1698 // Insert the new nodes into the topological ordering. We must do this in
1699 // a valid topological ordering as nothing is going to go back and re-sort
1700 // these nodes. We continually insert before 'N' in sequence as this is
1701 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1702 // hierarchy left to express.
1703 insertDAGNode(DAG, N, Eight);
1704 insertDAGNode(DAG, N, Srl);
1705 insertDAGNode(DAG, N, NewMask);
1706 insertDAGNode(DAG, N, And);
1707 insertDAGNode(DAG, N, ShlCount);
1708 insertDAGNode(DAG, N, Shl);
1709 DAG.ReplaceAllUsesWith(N, Shl);
1710 DAG.RemoveDeadNode(N.getNode());
1711 AM.IndexReg = And;
1712 AM.Scale = (1 << ScaleLog);
1713 return false;
1714}
1715
1716// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1717// allows us to fold the shift into this addressing mode. Returns false if the
1718// transform succeeded.
1719static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
1720 X86ISelAddressMode &AM) {
1721 SDValue Shift = N.getOperand(0);
1722
1723 // Use a signed mask so that shifting right will insert sign bits. These
1724 // bits will be removed when we shift the result left so it doesn't matter
1725 // what we use. This might allow a smaller immediate encoding.
1726 int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
1727
1728 // If we have an any_extend feeding the AND, look through it to see if there
1729 // is a shift behind it. But only if the AND doesn't use the extended bits.
1730 // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
1731 bool FoundAnyExtend = false;
1732 if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
1733 Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
1734 isUInt<32>(Mask)) {
1735 FoundAnyExtend = true;
1736 Shift = Shift.getOperand(0);
1737 }
1738
1739 if (Shift.getOpcode() != ISD::SHL ||
1740 !isa<ConstantSDNode>(Shift.getOperand(1)))
1741 return true;
1742
1743 SDValue X = Shift.getOperand(0);
1744
1745 // Not likely to be profitable if either the AND or SHIFT node has more
1746 // than one use (unless all uses are for address computation). Besides,
1747 // isel mechanism requires their node ids to be reused.
1748 if (!N.hasOneUse() || !Shift.hasOneUse())
1749 return true;
1750
1751 // Verify that the shift amount is something we can fold.
1752 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1753 if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1754 return true;
1755
1756 MVT VT = N.getSimpleValueType();
1757 SDLoc DL(N);
1758 if (FoundAnyExtend) {
1759 SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
1760 insertDAGNode(DAG, N, NewX);
1761 X = NewX;
1762 }
1763
1764 SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1765 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1766 SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1767
1768 // Insert the new nodes into the topological ordering. We must do this in
1769 // a valid topological ordering as nothing is going to go back and re-sort
1770 // these nodes. We continually insert before 'N' in sequence as this is
1771 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1772 // hierarchy left to express.
1773 insertDAGNode(DAG, N, NewMask);
1774 insertDAGNode(DAG, N, NewAnd);
1775 insertDAGNode(DAG, N, NewShift);
1776 DAG.ReplaceAllUsesWith(N, NewShift);
1777 DAG.RemoveDeadNode(N.getNode());
1778
1779 AM.Scale = 1 << ShiftAmt;
1780 AM.IndexReg = NewAnd;
1781 return false;
1782}
1783
1784// Implement some heroics to detect shifts of masked values where the mask can
1785// be replaced by extending the shift and undoing that in the addressing mode
1786// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1787// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1788// the addressing mode. This results in code such as:
1789//
1790// int f(short *y, int *lookup_table) {
1791// ...
1792// return *y + lookup_table[*y >> 11];
1793// }
1794//
1795// Turning into:
1796// movzwl (%rdi), %eax
1797// movl %eax, %ecx
1798// shrl $11, %ecx
1799// addl (%rsi,%rcx,4), %eax
1800//
1801// Instead of:
1802// movzwl (%rdi), %eax
1803// movl %eax, %ecx
1804// shrl $9, %ecx
1805// andl $124, %rcx
1806// addl (%rsi,%rcx), %eax
1807//
1808// Note that this function assumes the mask is provided as a mask *after* the
1809// value is shifted. The input chain may or may not match that, but computing
1810// such a mask is trivial.
1811static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
1812 uint64_t Mask,
1813 SDValue Shift, SDValue X,
1814 X86ISelAddressMode &AM) {
1815 if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1816 !isa<ConstantSDNode>(Shift.getOperand(1)))
1817 return true;
1818
1819 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1820 unsigned MaskLZ = countLeadingZeros(Mask);
1821 unsigned MaskTZ = countTrailingZeros(Mask);
1822
1823 // The amount of shift we're trying to fit into the addressing mode is taken
1824 // from the trailing zeros of the mask.
1825 unsigned AMShiftAmt = MaskTZ;
1826
1827 // There is nothing we can do here unless the mask is removing some bits.
1828 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1829 if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
1830
1831 // We also need to ensure that mask is a continuous run of bits.
1832 if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1833
1834 // Scale the leading zero count down based on the actual size of the value.
1835 // Also scale it down based on the size of the shift.
1836 unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1837 if (MaskLZ < ScaleDown)
1838 return true;
1839 MaskLZ -= ScaleDown;
1840
1841 // The final check is to ensure that any masked out high bits of X are
1842 // already known to be zero. Otherwise, the mask has a semantic impact
1843 // other than masking out a couple of low bits. Unfortunately, because of
1844 // the mask, zero extensions will be removed from operands in some cases.
1845 // This code works extra hard to look through extensions because we can
1846 // replace them with zero extensions cheaply if necessary.
1847 bool ReplacingAnyExtend = false;
1848 if (X.getOpcode() == ISD::ANY_EXTEND) {
1849 unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1850 X.getOperand(0).getSimpleValueType().getSizeInBits();
1851 // Assume that we'll replace the any-extend with a zero-extend, and
1852 // narrow the search to the extended value.
1853 X = X.getOperand(0);
1854 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1855 ReplacingAnyExtend = true;
1856 }
1857 APInt MaskedHighBits =
1858 APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
1859 KnownBits Known = DAG.computeKnownBits(X);
1860 if (MaskedHighBits != Known.Zero) return true;
1861
1862 // We've identified a pattern that can be transformed into a single shift
1863 // and an addressing mode. Make it so.
1864 MVT VT = N.getSimpleValueType();
1865 if (ReplacingAnyExtend) {
1866 assert(X.getValueType() != VT)((X.getValueType() != VT) ? static_cast<void> (0) : __assert_fail
("X.getValueType() != VT", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1866, __PRETTY_FUNCTION__))
;
1867 // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1868 SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1869 insertDAGNode(DAG, N, NewX);
1870 X = NewX;
1871 }
1872 SDLoc DL(N);
1873 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1874 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1875 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1876 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1877
1878 // Insert the new nodes into the topological ordering. We must do this in
1879 // a valid topological ordering as nothing is going to go back and re-sort
1880 // these nodes. We continually insert before 'N' in sequence as this is
1881 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1882 // hierarchy left to express.
1883 insertDAGNode(DAG, N, NewSRLAmt);
1884 insertDAGNode(DAG, N, NewSRL);
1885 insertDAGNode(DAG, N, NewSHLAmt);
1886 insertDAGNode(DAG, N, NewSHL);
1887 DAG.ReplaceAllUsesWith(N, NewSHL);
1888 DAG.RemoveDeadNode(N.getNode());
1889
1890 AM.Scale = 1 << AMShiftAmt;
1891 AM.IndexReg = NewSRL;
1892 return false;
1893}
1894
1895// Transform "(X >> SHIFT) & (MASK << C1)" to
1896// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1897// matched to a BEXTR later. Returns false if the simplification is performed.
1898static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
1899 uint64_t Mask,
1900 SDValue Shift, SDValue X,
1901 X86ISelAddressMode &AM,
1902 const X86Subtarget &Subtarget) {
1903 if (Shift.getOpcode() != ISD::SRL ||
1904 !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1905 !Shift.hasOneUse() || !N.hasOneUse())
1906 return true;
1907
1908 // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1909 if (!Subtarget.hasTBM() &&
1910 !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1911 return true;
1912
1913 // We need to ensure that mask is a continuous run of bits.
1914 if (!isShiftedMask_64(Mask)) return true;
1915
1916 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1917
1918 // The amount of shift we're trying to fit into the addressing mode is taken
1919 // from the trailing zeros of the mask.
1920 unsigned AMShiftAmt = countTrailingZeros(Mask);
1921
1922 // There is nothing we can do here unless the mask is removing some bits.
1923 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1924 if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
1925
1926 MVT VT = N.getSimpleValueType();
1927 SDLoc DL(N);
1928 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1929 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1930 SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1931 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1932 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1933 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1934
1935 // Insert the new nodes into the topological ordering. We must do this in
1936 // a valid topological ordering as nothing is going to go back and re-sort
1937 // these nodes. We continually insert before 'N' in sequence as this is
1938 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1939 // hierarchy left to express.
1940 insertDAGNode(DAG, N, NewSRLAmt);
1941 insertDAGNode(DAG, N, NewSRL);
1942 insertDAGNode(DAG, N, NewMask);
1943 insertDAGNode(DAG, N, NewAnd);
1944 insertDAGNode(DAG, N, NewSHLAmt);
1945 insertDAGNode(DAG, N, NewSHL);
1946 DAG.ReplaceAllUsesWith(N, NewSHL);
1947 DAG.RemoveDeadNode(N.getNode());
1948
1949 AM.Scale = 1 << AMShiftAmt;
1950 AM.IndexReg = NewAnd;
1951 return false;
1952}
1953
1954bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1955 unsigned Depth) {
1956 SDLoc dl(N);
1957 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG
); }; } } while (false)
1958 dbgs() << "MatchAddress: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG
); }; } } while (false)
1959 AM.dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG
); }; } } while (false)
1960 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG
); }; } } while (false)
;
1961 // Limit recursion.
1962 if (Depth > 5)
1963 return matchAddressBase(N, AM);
1964
1965 // If this is already a %rip relative address, we can only merge immediates
1966 // into it. Instead of handling this in every case, we handle it here.
1967 // RIP relative addressing: %rip + 32-bit displacement!
1968 if (AM.isRIPRelative()) {
1969 // FIXME: JumpTable and ExternalSymbol address currently don't like
1970 // displacements. It isn't very important, but this should be fixed for
1971 // consistency.
1972 if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1973 return true;
1974
1975 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1976 if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1977 return false;
1978 return true;
1979 }
1980
1981 switch (N.getOpcode()) {
1982 default: break;
1983 case ISD::LOCAL_RECOVER: {
1984 if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1985 if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1986 // Use the symbol and don't prefix it.
1987 AM.MCSym = ESNode->getMCSymbol();
1988 return false;
1989 }
1990 break;
1991 }
1992 case ISD::Constant: {
1993 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1994 if (!foldOffsetIntoAddress(Val, AM))
1995 return false;
1996 break;
1997 }
1998
1999 case X86ISD::Wrapper:
2000 case X86ISD::WrapperRIP:
2001 if (!matchWrapper(N, AM))
2002 return false;
2003 break;
2004
2005 case ISD::LOAD:
2006 if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
2007 return false;
2008 break;
2009
2010 case ISD::FrameIndex:
2011 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2012 AM.Base_Reg.getNode() == nullptr &&
2013 (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
2014 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
2015 AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
2016 return false;
2017 }
2018 break;
2019
2020 case ISD::SHL:
2021 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2022 break;
2023
2024 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
2025 unsigned Val = CN->getZExtValue();
2026 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
2027 // that the base operand remains free for further matching. If
2028 // the base doesn't end up getting used, a post-processing step
2029 // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
2030 if (Val == 1 || Val == 2 || Val == 3) {
2031 AM.Scale = 1 << Val;
2032 SDValue ShVal = N.getOperand(0);
2033
2034 // Okay, we know that we have a scale by now. However, if the scaled
2035 // value is an add of something and a constant, we can fold the
2036 // constant into the disp field here.
2037 if (CurDAG->isBaseWithConstantOffset(ShVal)) {
2038 AM.IndexReg = ShVal.getOperand(0);
2039 ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
2040 uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
2041 if (!foldOffsetIntoAddress(Disp, AM))
2042 return false;
2043 }
2044
2045 AM.IndexReg = ShVal;
2046 return false;
2047 }
2048 }
2049 break;
2050
2051 case ISD::SRL: {
2052 // Scale must not be used already.
2053 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2054
2055 // We only handle up to 64-bit values here as those are what matter for
2056 // addressing mode optimizations.
2057 assert(N.getSimpleValueType().getSizeInBits() <= 64 &&((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!"
) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2058, __PRETTY_FUNCTION__))
2058 "Unexpected value size!")((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!"
) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2058, __PRETTY_FUNCTION__))
;
2059
2060 SDValue And = N.getOperand(0);
2061 if (And.getOpcode() != ISD::AND) break;
2062 SDValue X = And.getOperand(0);
2063
2064 // The mask used for the transform is expected to be post-shift, but we
2065 // found the shift first so just apply the shift to the mask before passing
2066 // it down.
2067 if (!isa<ConstantSDNode>(N.getOperand(1)) ||
2068 !isa<ConstantSDNode>(And.getOperand(1)))
2069 break;
2070 uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
2071
2072 // Try to fold the mask and shift into the scale, and return false if we
2073 // succeed.
2074 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
2075 return false;
2076 break;
2077 }
2078
2079 case ISD::SMUL_LOHI:
2080 case ISD::UMUL_LOHI:
2081 // A mul_lohi where we need the low part can be folded as a plain multiply.
2082 if (N.getResNo() != 0) break;
2083 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2084 case ISD::MUL:
2085 case X86ISD::MUL_IMM:
2086 // X*[3,5,9] -> X+X*[2,4,8]
2087 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2088 AM.Base_Reg.getNode() == nullptr &&
2089 AM.IndexReg.getNode() == nullptr) {
2090 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
2091 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
2092 CN->getZExtValue() == 9) {
2093 AM.Scale = unsigned(CN->getZExtValue())-1;
2094
2095 SDValue MulVal = N.getOperand(0);
2096 SDValue Reg;
2097
2098 // Okay, we know that we have a scale by now. However, if the scaled
2099 // value is an add of something and a constant, we can fold the
2100 // constant into the disp field here.
2101 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
2102 isa<ConstantSDNode>(MulVal.getOperand(1))) {
2103 Reg = MulVal.getOperand(0);
2104 ConstantSDNode *AddVal =
2105 cast<ConstantSDNode>(MulVal.getOperand(1));
2106 uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
2107 if (foldOffsetIntoAddress(Disp, AM))
2108 Reg = N.getOperand(0);
2109 } else {
2110 Reg = N.getOperand(0);
2111 }
2112
2113 AM.IndexReg = AM.Base_Reg = Reg;
2114 return false;
2115 }
2116 }
2117 break;
2118
2119 case ISD::SUB: {
2120 // Given A-B, if A can be completely folded into the address and
2121 // the index field with the index field unused, use -B as the index.
2122 // This is a win if a has multiple parts that can be folded into
2123 // the address. Also, this saves a mov if the base register has
2124 // other uses, since it avoids a two-address sub instruction, however
2125 // it costs an additional mov if the index register has other uses.
2126
2127 // Add an artificial use to this node so that we can keep track of
2128 // it if it gets CSE'd with a different node.
2129 HandleSDNode Handle(N);
2130
2131 // Test if the LHS of the sub can be folded.
2132 X86ISelAddressMode Backup = AM;
2133 if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
2134 N = Handle.getValue();
2135 AM = Backup;
2136 break;
2137 }
2138 N = Handle.getValue();
2139 // Test if the index field is free for use.
2140 if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2141 AM = Backup;
2142 break;
2143 }
2144
2145 int Cost = 0;
2146 SDValue RHS = N.getOperand(1);
2147 // If the RHS involves a register with multiple uses, this
2148 // transformation incurs an extra mov, due to the neg instruction
2149 // clobbering its operand.
2150 if (!RHS.getNode()->hasOneUse() ||
2151 RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
2152 RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
2153 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
2154 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
2155 RHS.getOperand(0).getValueType() == MVT::i32))
2156 ++Cost;
2157 // If the base is a register with multiple uses, this
2158 // transformation may save a mov.
2159 if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2160 !AM.Base_Reg.getNode()->hasOneUse()) ||
2161 AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2162 --Cost;
2163 // If the folded LHS was interesting, this transformation saves
2164 // address arithmetic.
2165 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2166 ((AM.Disp != 0) && (Backup.Disp == 0)) +
2167 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2168 --Cost;
2169 // If it doesn't look like it may be an overall win, don't do it.
2170 if (Cost >= 0) {
2171 AM = Backup;
2172 break;
2173 }
2174
2175 // Ok, the transformation is legal and appears profitable. Go for it.
2176 // Negation will be emitted later to avoid creating dangling nodes if this
2177 // was an unprofitable LEA.
2178 AM.IndexReg = RHS;
2179 AM.NegateIndex = true;
2180 AM.Scale = 1;
2181 return false;
2182 }
2183
2184 case ISD::ADD:
2185 if (!matchAdd(N, AM, Depth))
2186 return false;
2187 break;
2188
2189 case ISD::OR:
2190 // We want to look through a transform in InstCombine and DAGCombiner that
2191 // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
2192 // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
2193 // An 'lea' can then be used to match the shift (multiply) and add:
2194 // and $1, %esi
2195 // lea (%rsi, %rdi, 8), %rax
2196 if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
2197 !matchAdd(N, AM, Depth))
2198 return false;
2199 break;
2200
2201 case ISD::AND: {
2202 // Perform some heroic transforms on an and of a constant-count shift
2203 // with a constant to enable use of the scaled offset field.
2204
2205 // Scale must not be used already.
2206 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2207
2208 // We only handle up to 64-bit values here as those are what matter for
2209 // addressing mode optimizations.
2210 assert(N.getSimpleValueType().getSizeInBits() <= 64 &&((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!"
) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2211, __PRETTY_FUNCTION__))
2211 "Unexpected value size!")((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!"
) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2211, __PRETTY_FUNCTION__))
;
2212
2213 if (!isa<ConstantSDNode>(N.getOperand(1)))
2214 break;
2215
2216 if (N.getOperand(0).getOpcode() == ISD::SRL) {
2217 SDValue Shift = N.getOperand(0);
2218 SDValue X = Shift.getOperand(0);
2219
2220 uint64_t Mask = N.getConstantOperandVal(1);
2221
2222 // Try to fold the mask and shift into an extract and scale.
2223 if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
2224 return false;
2225
2226 // Try to fold the mask and shift directly into the scale.
2227 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
2228 return false;
2229
2230 // Try to fold the mask and shift into BEXTR and scale.
2231 if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
2232 return false;
2233 }
2234
2235 // Try to swap the mask and shift to place shifts which can be done as
2236 // a scale on the outside of the mask.
2237 if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
2238 return false;
2239
2240 break;
2241 }
2242 case ISD::ZERO_EXTEND: {
2243 // Try to widen a zexted shift left to the same size as its use, so we can
2244 // match the shift as a scale factor.
2245 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2246 break;
2247 if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
2248 break;
2249
2250 // Give up if the shift is not a valid scale factor [1,2,3].
2251 SDValue Shl = N.getOperand(0);
2252 auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
2253 if (!ShAmtC || ShAmtC->getZExtValue() > 3)
2254 break;
2255
2256 // The narrow shift must only shift out zero bits (it must be 'nuw').
2257 // That makes it safe to widen to the destination type.
2258 APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(),
2259 ShAmtC->getZExtValue());
2260 if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
2261 break;
2262
2263 // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
2264 MVT VT = N.getSimpleValueType();
2265 SDLoc DL(N);
2266 SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
2267 SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
2268
2269 // Convert the shift to scale factor.
2270 AM.Scale = 1 << ShAmtC->getZExtValue();
2271 AM.IndexReg = Zext;
2272
2273 insertDAGNode(*CurDAG, N, Zext);
2274 insertDAGNode(*CurDAG, N, NewShl);
2275 CurDAG->ReplaceAllUsesWith(N, NewShl);
2276 CurDAG->RemoveDeadNode(N.getNode());
2277 return false;
2278 }
2279 }
2280
2281 return matchAddressBase(N, AM);
2282}
2283
2284/// Helper for MatchAddress. Add the specified node to the
2285/// specified addressing mode without any further recursion.
2286bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
2287 // Is the base register already occupied?
2288 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2289 // If so, check to see if the scale index register is set.
2290 if (!AM.IndexReg.getNode()) {
2291 AM.IndexReg = N;
2292 AM.Scale = 1;
2293 return false;
2294 }
2295
2296 // Otherwise, we cannot select it.
2297 return true;
2298 }
2299
2300 // Default, generate it as a register.
2301 AM.BaseType = X86ISelAddressMode::RegBase;
2302 AM.Base_Reg = N;
2303 return false;
2304}
2305
2306/// Helper for selectVectorAddr. Handles things that can be folded into a
2307/// gather scatter address. The index register and scale should have already
2308/// been handled.
2309bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
2310 // TODO: Support other operations.
2311 switch (N.getOpcode()) {
2312 case ISD::Constant: {
2313 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2314 if (!foldOffsetIntoAddress(Val, AM))
2315 return false;
2316 break;
2317 }
2318 case X86ISD::Wrapper:
2319 if (!matchWrapper(N, AM))
2320 return false;
2321 break;
2322 }
2323
2324 return matchAddressBase(N, AM);
2325}
2326
2327bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
2328 SDValue IndexOp, SDValue ScaleOp,
2329 SDValue &Base, SDValue &Scale,
2330 SDValue &Index, SDValue &Disp,
2331 SDValue &Segment) {
2332 X86ISelAddressMode AM;
2333 AM.IndexReg = IndexOp;
2334 AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
2335
2336 unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace();
2337 if (AddrSpace == X86AS::GS)
2338 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2339 if (AddrSpace == X86AS::FS)
2340 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2341 if (AddrSpace == X86AS::SS)
2342 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2343
2344 SDLoc DL(BasePtr);
2345 MVT VT = BasePtr.getSimpleValueType();
2346
2347 // Try to match into the base and displacement fields.
2348 if (matchVectorAddress(BasePtr, AM))
2349 return false;
2350
2351 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2352 return true;
2353}
2354
2355/// Returns true if it is able to pattern match an addressing mode.
2356/// It returns the operands which make up the maximal addressing mode it can
2357/// match by reference.
2358///
2359/// Parent is the parent node of the addr operand that is being matched. It
2360/// is always a load, store, atomic node, or null. It is only null when
2361/// checking memory operands for inline asm nodes.
2362bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2363 SDValue &Scale, SDValue &Index,
2364 SDValue &Disp, SDValue &Segment) {
2365 X86ISelAddressMode AM;
2366
2367 if (Parent &&
2368 // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2369 // that are not a MemSDNode, and thus don't have proper addrspace info.
2370 Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2371 Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2372 Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2373 Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
2374 Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
2375 Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2376 Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2377 unsigned AddrSpace =
2378 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2379 // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2380 if (AddrSpace == 256)
2381 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2382 if (AddrSpace == 257)
2383 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2384 if (AddrSpace == 258)
2385 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2386 }
2387
2388 // Save the DL and VT before calling matchAddress, it can invalidate N.
2389 SDLoc DL(N);
2390 MVT VT = N.getSimpleValueType();
2391
2392 if (matchAddress(N, AM))
2393 return false;
2394
2395 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2396 return true;
2397}
2398
2399// We can only fold a load if all nodes between it and the root node have a
2400// single use. If there are additional uses, we could end up duplicating the
2401// load.
2402static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
2403 while (User != Root) {
2404 if (!User->hasOneUse())
2405 return false;
2406 User = *User->use_begin();
2407 }
2408
2409 return true;
2410}
2411
2412/// Match a scalar SSE load. In particular, we want to match a load whose top
2413/// elements are either undef or zeros. The load flavor is derived from the
2414/// type of N, which is either v4f32 or v2f64.
2415///
2416/// We also return:
2417/// PatternChainNode: this is the matched node that has a chain input and
2418/// output.
2419bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
2420 SDValue N, SDValue &Base,
2421 SDValue &Scale, SDValue &Index,
2422 SDValue &Disp, SDValue &Segment,
2423 SDValue &PatternNodeWithChain) {
2424 if (!hasSingleUsesFromRoot(Root, Parent))
2425 return false;
2426
2427 // We can allow a full vector load here since narrowing a load is ok unless
2428 // it's volatile or atomic.
2429 if (ISD::isNON_EXTLoad(N.getNode())) {
2430 LoadSDNode *LD = cast<LoadSDNode>(N);
2431 if (LD->isSimple() &&
2432 IsProfitableToFold(N, LD, Root) &&
2433 IsLegalToFold(N, Parent, Root, OptLevel)) {
2434 PatternNodeWithChain = N;
2435 return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2436 Segment);
2437 }
2438 }
2439
2440 // We can also match the special zero extended load opcode.
2441 if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
2442 PatternNodeWithChain = N;
2443 if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2444 IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2445 auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
2446 return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
2447 Segment);
2448 }
2449 }
2450
2451 // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
2452 // once. Otherwise the load might get duplicated and the chain output of the
2453 // duplicate load will not be observed by all dependencies.
2454 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
2455 PatternNodeWithChain = N.getOperand(0);
2456 if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2457 IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2458 IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2459 LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2460 return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2461 Segment);
2462 }
2463 }
2464
2465 return false;
2466}
2467
2468
2469bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2470 if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2471 uint64_t ImmVal = CN->getZExtValue();
2472 if (!isUInt<32>(ImmVal))
2473 return false;
2474
2475 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
2476 return true;
2477 }
2478
2479 // In static codegen with small code model, we can get the address of a label
2480 // into a register with 'movl'
2481 if (N->getOpcode() != X86ISD::Wrapper)
2482 return false;
2483
2484 N = N.getOperand(0);
2485
2486 // At least GNU as does not accept 'movl' for TPOFF relocations.
2487 // FIXME: We could use 'movl' when we know we are targeting MC.
2488 if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
2489 return false;
2490
2491 Imm = N;
2492 if (N->getOpcode() != ISD::TargetGlobalAddress)
2493 return TM.getCodeModel() == CodeModel::Small;
2494
2495 Optional<ConstantRange> CR =
2496 cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
2497 if (!CR)
2498 return TM.getCodeModel() == CodeModel::Small;
2499
2500 return CR->getUnsignedMax().ult(1ull << 32);
2501}
2502
2503bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
2504 SDValue &Scale, SDValue &Index,
2505 SDValue &Disp, SDValue &Segment) {
2506 // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
2507 SDLoc DL(N);
2508
2509 if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
2510 return false;
2511
2512 RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
2513 if (RN && RN->getReg() == 0)
2514 Base = CurDAG->getRegister(0, MVT::i64);
2515 else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
2516 // Base could already be %rip, particularly in the x32 ABI.
2517 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2518 MVT::i64), 0);
2519 Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2520 Base);
2521 }
2522
2523 RN = dyn_cast<RegisterSDNode>(Index);
2524 if (RN && RN->getReg() == 0)
2525 Index = CurDAG->getRegister(0, MVT::i64);
2526 else {
2527 assert(Index.getValueType() == MVT::i32 &&((Index.getValueType() == MVT::i32 && "Expect to be extending 32-bit registers for use in LEA"
) ? static_cast<void> (0) : __assert_fail ("Index.getValueType() == MVT::i32 && \"Expect to be extending 32-bit registers for use in LEA\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2528, __PRETTY_FUNCTION__))
2528 "Expect to be extending 32-bit registers for use in LEA")((Index.getValueType() == MVT::i32 && "Expect to be extending 32-bit registers for use in LEA"
) ? static_cast<void> (0) : __assert_fail ("Index.getValueType() == MVT::i32 && \"Expect to be extending 32-bit registers for use in LEA\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2528, __PRETTY_FUNCTION__))
;
2529 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2530 MVT::i64), 0);
2531 Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2532 Index);
2533 }
2534
2535 return true;
2536}
2537
2538/// Calls SelectAddr and determines if the maximal addressing
2539/// mode it matches can be cost effectively emitted as an LEA instruction.
2540bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2541 SDValue &Base, SDValue &Scale,
2542 SDValue &Index, SDValue &Disp,
2543 SDValue &Segment) {
2544 X86ISelAddressMode AM;
2545
2546 // Save the DL and VT before calling matchAddress, it can invalidate N.
2547 SDLoc DL(N);
2548 MVT VT = N.getSimpleValueType();
2549
2550 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2551 // segments.
2552 SDValue Copy = AM.Segment;
2553 SDValue T = CurDAG->getRegister(0, MVT::i32);
2554 AM.Segment = T;
2555 if (matchAddress(N, AM))
2556 return false;
2557 assert (T == AM.Segment)((T == AM.Segment) ? static_cast<void> (0) : __assert_fail
("T == AM.Segment", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2557, __PRETTY_FUNCTION__))
;
2558 AM.Segment = Copy;
2559
2560 unsigned Complexity = 0;
2561 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
2562 Complexity = 1;
2563 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2564 Complexity = 4;
2565
2566 if (AM.IndexReg.getNode())
2567 Complexity++;
2568
2569 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2570 // a simple shift.
2571 if (AM.Scale > 1)
2572 Complexity++;
2573
2574 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2575 // to a LEA. This is determined with some experimentation but is by no means
2576 // optimal (especially for code size consideration). LEA is nice because of
2577 // its three-address nature. Tweak the cost function again when we can run
2578 // convertToThreeAddress() at register allocation time.
2579 if (AM.hasSymbolicDisplacement()) {
2580 // For X86-64, always use LEA to materialize RIP-relative addresses.
2581 if (Subtarget->is64Bit())
2582 Complexity = 4;
2583 else
2584 Complexity += 2;
2585 }
2586
2587 // Heuristic: try harder to form an LEA from ADD if the operands set flags.
2588 // Unlike ADD, LEA does not affect flags, so we will be less likely to require
2589 // duplicating flag-producing instructions later in the pipeline.
2590 if (N.getOpcode() == ISD::ADD) {
2591 auto isMathWithFlags = [](SDValue V) {
2592 switch (V.getOpcode()) {
2593 case X86ISD::ADD:
2594 case X86ISD::SUB:
2595 case X86ISD::ADC:
2596 case X86ISD::SBB:
2597 /* TODO: These opcodes can be added safely, but we may want to justify
2598 their inclusion for different reasons (better for reg-alloc).
2599 case X86ISD::SMUL:
2600 case X86ISD::UMUL:
2601 case X86ISD::OR:
2602 case X86ISD::XOR:
2603 case X86ISD::AND:
2604 */
2605 // Value 1 is the flag output of the node - verify it's not dead.
2606 return !SDValue(V.getNode(), 1).use_empty();
2607 default:
2608 return false;
2609 }
2610 };
2611 // TODO: This could be an 'or' rather than 'and' to make the transform more
2612 // likely to happen. We might want to factor in whether there's a
2613 // load folding opportunity for the math op that disappears with LEA.
2614 if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
2615 Complexity++;
2616 }
2617
2618 if (AM.Disp)
2619 Complexity++;
2620
2621 // If it isn't worth using an LEA, reject it.
2622 if (Complexity <= 2)
2623 return false;
2624
2625 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2626 return true;
2627}
2628
2629/// This is only run on TargetGlobalTLSAddress nodes.
2630bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2631 SDValue &Scale, SDValue &Index,
2632 SDValue &Disp, SDValue &Segment) {
2633 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress)((N.getOpcode() == ISD::TargetGlobalTLSAddress) ? static_cast
<void> (0) : __assert_fail ("N.getOpcode() == ISD::TargetGlobalTLSAddress"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2633, __PRETTY_FUNCTION__))
;
2634 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2635
2636 X86ISelAddressMode AM;
2637 AM.GV = GA->getGlobal();
2638 AM.Disp += GA->getOffset();
2639 AM.SymbolFlags = GA->getTargetFlags();
2640
2641 MVT VT = N.getSimpleValueType();
2642 if (VT == MVT::i32) {
2643 AM.Scale = 1;
2644 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2645 }
2646
2647 getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
2648 return true;
2649}
2650
2651bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2652 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2653 Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2654 N.getValueType());
2655 return true;
2656 }
2657
2658 // Keep track of the original value type and whether this value was
2659 // truncated. If we see a truncation from pointer type to VT that truncates
2660 // bits that are known to be zero, we can use a narrow reference.
2661 EVT VT = N.getValueType();
2662 bool WasTruncated = false;
2663 if (N.getOpcode() == ISD::TRUNCATE) {
2664 WasTruncated = true;
2665 N = N.getOperand(0);
2666 }
2667
2668 if (N.getOpcode() != X86ISD::Wrapper)
2669 return false;
2670
2671 // We can only use non-GlobalValues as immediates if they were not truncated,
2672 // as we do not have any range information. If we have a GlobalValue and the
2673 // address was not truncated, we can select it as an operand directly.
2674 unsigned Opc = N.getOperand(0)->getOpcode();
2675 if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2676 Op = N.getOperand(0);
2677 // We can only select the operand directly if we didn't have to look past a
2678 // truncate.
2679 return !WasTruncated;
2680 }
2681
2682 // Check that the global's range fits into VT.
2683 auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2684 Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2685 if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2686 return false;
2687
2688 // Okay, we can use a narrow reference.
2689 Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2690 GA->getOffset(), GA->getTargetFlags());
2691 return true;
2692}
2693
2694bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2695 SDValue &Base, SDValue &Scale,
2696 SDValue &Index, SDValue &Disp,
2697 SDValue &Segment) {
2698 assert(Root && P && "Unknown root/parent nodes")((Root && P && "Unknown root/parent nodes") ?
static_cast<void> (0) : __assert_fail ("Root && P && \"Unknown root/parent nodes\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2698, __PRETTY_FUNCTION__))
;
4
'?' condition is true
2699 if (!ISD::isNON_EXTLoad(N.getNode()) ||
2700 !IsProfitableToFold(N, P, Root) ||
5
Calling 'X86DAGToDAGISel::IsProfitableToFold'
2701 !IsLegalToFold(N, P, Root, OptLevel))
2702 return false;
2703
2704 return selectAddr(N.getNode(),
2705 N.getOperand(1), Base, Scale, Index, Disp, Segment);
2706}
2707
2708bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
2709 SDValue &Base, SDValue &Scale,
2710 SDValue &Index, SDValue &Disp,
2711 SDValue &Segment) {
2712 assert(Root && P && "Unknown root/parent nodes")((Root && P && "Unknown root/parent nodes") ?
static_cast<void> (0) : __assert_fail ("Root && P && \"Unknown root/parent nodes\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2712, __PRETTY_FUNCTION__))
;
2713 if (N->getOpcode() != X86ISD::VBROADCAST_LOAD ||
2714 !IsProfitableToFold(N, P, Root) ||
2715 !IsLegalToFold(N, P, Root, OptLevel))
2716 return false;
2717
2718 return selectAddr(N.getNode(),
2719 N.getOperand(1), Base, Scale, Index, Disp, Segment);
2720}
2721
2722/// Return an SDNode that returns the value of the global base register.
2723/// Output instructions required to initialize the global base register,
2724/// if necessary.
2725SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2726 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2727 auto &DL = MF->getDataLayout();
2728 return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2729}
2730
2731bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2732 if (N->getOpcode() == ISD::TRUNCATE)
2733 N = N->getOperand(0).getNode();
2734 if (N->getOpcode() != X86ISD::Wrapper)
2735 return false;
2736
2737 auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2738 if (!GA)
2739 return false;
2740
2741 Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2742 return CR && CR->getSignedMin().sge(-1ull << Width) &&
2743 CR->getSignedMax().slt(1ull << Width);
2744}
2745
2746static X86::CondCode getCondFromNode(SDNode *N) {
2747 assert(N->isMachineOpcode() && "Unexpected node")((N->isMachineOpcode() && "Unexpected node") ? static_cast
<void> (0) : __assert_fail ("N->isMachineOpcode() && \"Unexpected node\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2747, __PRETTY_FUNCTION__))
;
2748 X86::CondCode CC = X86::COND_INVALID;
2749 unsigned Opc = N->getMachineOpcode();
2750 if (Opc == X86::JCC_1)
2751 CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
2752 else if (Opc == X86::SETCCr)
2753 CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
2754 else if (Opc == X86::SETCCm)
2755 CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
2756 else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
2757 Opc == X86::CMOV64rr)
2758 CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
2759 else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
2760 Opc == X86::CMOV64rm)
2761 CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
2762
2763 return CC;
2764}
2765
2766/// Test whether the given X86ISD::CMP node has any users that use a flag
2767/// other than ZF.
2768bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
2769 // Examine each user of the node.
2770 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2771 UI != UE; ++UI) {
2772 // Only check things that use the flags.
2773 if (UI.getUse().getResNo() != Flags.getResNo())
2774 continue;
2775 // Only examine CopyToReg uses that copy to EFLAGS.
2776 if (UI->getOpcode() != ISD::CopyToReg ||
2777 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2778 return false;
2779 // Examine each user of the CopyToReg use.
2780 for (SDNode::use_iterator FlagUI = UI->use_begin(),
2781 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2782 // Only examine the Flag result.
2783 if (FlagUI.getUse().getResNo() != 1) continue;
2784 // Anything unusual: assume conservatively.
2785 if (!FlagUI->isMachineOpcode()) return false;
2786 // Examine the condition code of the user.
2787 X86::CondCode CC = getCondFromNode(*FlagUI);
2788
2789 switch (CC) {
2790 // Comparisons which only use the zero flag.
2791 case X86::COND_E: case X86::COND_NE:
2792 continue;
2793 // Anything else: assume conservatively.
2794 default:
2795 return false;
2796 }
2797 }
2798 }
2799 return true;
2800}
2801
2802/// Test whether the given X86ISD::CMP node has any uses which require the SF
2803/// flag to be accurate.
2804bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
2805 // Examine each user of the node.
2806 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2807 UI != UE; ++UI) {
2808 // Only check things that use the flags.
2809 if (UI.getUse().getResNo() != Flags.getResNo())
2810 continue;
2811 // Only examine CopyToReg uses that copy to EFLAGS.
2812 if (UI->getOpcode() != ISD::CopyToReg ||
2813 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2814 return false;
2815 // Examine each user of the CopyToReg use.
2816 for (SDNode::use_iterator FlagUI = UI->use_begin(),
2817 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2818 // Only examine the Flag result.
2819 if (FlagUI.getUse().getResNo() != 1) continue;
2820 // Anything unusual: assume conservatively.
2821 if (!FlagUI->isMachineOpcode()) return false;
2822 // Examine the condition code of the user.
2823 X86::CondCode CC = getCondFromNode(*FlagUI);
2824
2825 switch (CC) {
2826 // Comparisons which don't examine the SF flag.
2827 case X86::COND_A: case X86::COND_AE:
2828 case X86::COND_B: case X86::COND_BE:
2829 case X86::COND_E: case X86::COND_NE:
2830 case X86::COND_O: case X86::COND_NO:
2831 case X86::COND_P: case X86::COND_NP:
2832 continue;
2833 // Anything else: assume conservatively.
2834 default:
2835 return false;
2836 }
2837 }
2838 }
2839 return true;
2840}
2841
2842static bool mayUseCarryFlag(X86::CondCode CC) {
2843 switch (CC) {
2844 // Comparisons which don't examine the CF flag.
2845 case X86::COND_O: case X86::COND_NO:
2846 case X86::COND_E: case X86::COND_NE:
2847 case X86::COND_S: case X86::COND_NS:
2848 case X86::COND_P: case X86::COND_NP:
2849 case X86::COND_L: case X86::COND_GE:
2850 case X86::COND_G: case X86::COND_LE:
2851 return false;
2852 // Anything else: assume conservatively.
2853 default:
2854 return true;
2855 }
2856}
2857
2858/// Test whether the given node which sets flags has any uses which require the
2859/// CF flag to be accurate.
2860 bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
2861 // Examine each user of the node.
2862 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2863 UI != UE; ++UI) {
2864 // Only check things that use the flags.
2865 if (UI.getUse().getResNo() != Flags.getResNo())
2866 continue;
2867
2868 unsigned UIOpc = UI->getOpcode();
2869
2870 if (UIOpc == ISD::CopyToReg) {
2871 // Only examine CopyToReg uses that copy to EFLAGS.
2872 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2873 return false;
2874 // Examine each user of the CopyToReg use.
2875 for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2876 FlagUI != FlagUE; ++FlagUI) {
2877 // Only examine the Flag result.
2878 if (FlagUI.getUse().getResNo() != 1)
2879 continue;
2880 // Anything unusual: assume conservatively.
2881 if (!FlagUI->isMachineOpcode())
2882 return false;
2883 // Examine the condition code of the user.
2884 X86::CondCode CC = getCondFromNode(*FlagUI);
2885
2886 if (mayUseCarryFlag(CC))
2887 return false;
2888 }
2889
2890 // This CopyToReg is ok. Move on to the next user.
2891 continue;
2892 }
2893
2894 // This might be an unselected node. So look for the pre-isel opcodes that
2895 // use flags.
2896 unsigned CCOpNo;
2897 switch (UIOpc) {
2898 default:
2899 // Something unusual. Be conservative.
2900 return false;
2901 case X86ISD::SETCC: CCOpNo = 0; break;
2902 case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2903 case X86ISD::CMOV: CCOpNo = 2; break;
2904 case X86ISD::BRCOND: CCOpNo = 2; break;
2905 }
2906
2907 X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2908 if (mayUseCarryFlag(CC))
2909 return false;
2910 }
2911 return true;
2912}
2913
2914/// Check whether or not the chain ending in StoreNode is suitable for doing
2915/// the {load; op; store} to modify transformation.
2916static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
2917 SDValue StoredVal, SelectionDAG *CurDAG,
2918 unsigned LoadOpNo,
2919 LoadSDNode *&LoadNode,
2920 SDValue &InputChain) {
2921 // Is the stored value result 0 of the operation?
2922 if (StoredVal.getResNo() != 0) return false;
2923
2924 // Are there other uses of the operation other than the store?
2925 if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2926
2927 // Is the store non-extending and non-indexed?
2928 if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2929 return false;
2930
2931 SDValue Load = StoredVal->getOperand(LoadOpNo);
2932 // Is the stored value a non-extending and non-indexed load?
2933 if (!ISD::isNormalLoad(Load.getNode())) return false;
2934
2935 // Return LoadNode by reference.
2936 LoadNode = cast<LoadSDNode>(Load);
2937
2938 // Is store the only read of the loaded value?
2939 if (!Load.hasOneUse())
2940 return false;
2941
2942 // Is the address of the store the same as the load?
2943 if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2944 LoadNode->getOffset() != StoreNode->getOffset())
2945 return false;
2946
2947 bool FoundLoad = false;
2948 SmallVector<SDValue, 4> ChainOps;
2949 SmallVector<const SDNode *, 4> LoopWorklist;
2950 SmallPtrSet<const SDNode *, 16> Visited;
2951 const unsigned int Max = 1024;
2952
2953 // Visualization of Load-Op-Store fusion:
2954 // -------------------------
2955 // Legend:
2956 // *-lines = Chain operand dependencies.
2957 // |-lines = Normal operand dependencies.
2958 // Dependencies flow down and right. n-suffix references multiple nodes.
2959 //
2960 // C Xn C
2961 // * * *
2962 // * * *
2963 // Xn A-LD Yn TF Yn
2964 // * * \ | * |
2965 // * * \ | * |
2966 // * * \ | => A--LD_OP_ST
2967 // * * \| \
2968 // TF OP \
2969 // * | \ Zn
2970 // * | \
2971 // A-ST Zn
2972 //
2973
2974 // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2975 // #2: Yn -> LD
2976 // #3: ST -> Zn
2977
2978 // Ensure the transform is safe by checking for the dual
2979 // dependencies to make sure we do not induce a loop.
2980
2981 // As LD is a predecessor to both OP and ST we can do this by checking:
2982 // a). if LD is a predecessor to a member of Xn or Yn.
2983 // b). if a Zn is a predecessor to ST.
2984
2985 // However, (b) can only occur through being a chain predecessor to
2986 // ST, which is the same as Zn being a member or predecessor of Xn,
2987 // which is a subset of LD being a predecessor of Xn. So it's
2988 // subsumed by check (a).
2989
2990 SDValue Chain = StoreNode->getChain();
2991
2992 // Gather X elements in ChainOps.
2993 if (Chain == Load.getValue(1)) {
2994 FoundLoad = true;
2995 ChainOps.push_back(Load.getOperand(0));
2996 } else if (Chain.getOpcode() == ISD::TokenFactor) {
2997 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2998 SDValue Op = Chain.getOperand(i);
2999 if (Op == Load.getValue(1)) {
3000 FoundLoad = true;
3001 // Drop Load, but keep its chain. No cycle check necessary.
3002 ChainOps.push_back(Load.getOperand(0));
3003 continue;
3004 }
3005 LoopWorklist.push_back(Op.getNode());
3006 ChainOps.push_back(Op);
3007 }
3008 }
3009
3010 if (!FoundLoad)
3011 return false;
3012
3013 // Worklist is currently Xn. Add Yn to worklist.
3014 for (SDValue Op : StoredVal->ops())
3015 if (Op.getNode() != LoadNode)
3016 LoopWorklist.push_back(Op.getNode());
3017
3018 // Check (a) if Load is a predecessor to Xn + Yn
3019 if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
3020 true))
3021 return false;
3022
3023 InputChain =
3024 CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
3025 return true;
3026}
3027
3028// Change a chain of {load; op; store} of the same value into a simple op
3029// through memory of that value, if the uses of the modified value and its
3030// address are suitable.
3031//
3032// The tablegen pattern memory operand pattern is currently not able to match
3033// the case where the EFLAGS on the original operation are used.
3034//
3035// To move this to tablegen, we'll need to improve tablegen to allow flags to
3036// be transferred from a node in the pattern to the result node, probably with
3037// a new keyword. For example, we have this
3038// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
3039// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
3040// (implicit EFLAGS)]>;
3041// but maybe need something like this
3042// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
3043// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
3044// (transferrable EFLAGS)]>;
3045//
3046// Until then, we manually fold these and instruction select the operation
3047// here.
3048bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
3049 StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
3050 SDValue StoredVal = StoreNode->getOperand(1);
3051 unsigned Opc = StoredVal->getOpcode();
3052
3053 // Before we try to select anything, make sure this is memory operand size
3054 // and opcode we can handle. Note that this must match the code below that
3055 // actually lowers the opcodes.
3056 EVT MemVT = StoreNode->getMemoryVT();
3057 if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
3058 MemVT != MVT::i8)
3059 return false;
3060
3061 bool IsCommutable = false;
3062 bool IsNegate = false;
3063 switch (Opc) {
3064 default:
3065 return false;
3066 case X86ISD::SUB:
3067 IsNegate = isNullConstant(StoredVal.getOperand(0));
3068 break;
3069 case X86ISD::SBB:
3070 break;
3071 case X86ISD::ADD:
3072 case X86ISD::ADC:
3073 case X86ISD::AND:
3074 case X86ISD::OR:
3075 case X86ISD::XOR:
3076 IsCommutable = true;
3077 break;
3078 }
3079
3080 unsigned LoadOpNo = IsNegate ? 1 : 0;
3081 LoadSDNode *LoadNode = nullptr;
3082 SDValue InputChain;
3083 if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
3084 LoadNode, InputChain)) {
3085 if (!IsCommutable)
3086 return false;
3087
3088 // This operation is commutable, try the other operand.
3089 LoadOpNo = 1;
3090 if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
3091 LoadNode, InputChain))
3092 return false;
3093 }
3094
3095 SDValue Base, Scale, Index, Disp, Segment;
3096 if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
3097 Segment))
3098 return false;
3099
3100 auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
3101 unsigned Opc8) {
3102 switch (MemVT.getSimpleVT().SimpleTy) {
3103 case MVT::i64:
3104 return Opc64;
3105 case MVT::i32:
3106 return Opc32;
3107 case MVT::i16:
3108 return Opc16;
3109 case MVT::i8:
3110 return Opc8;
3111 default:
3112 llvm_unreachable("Invalid size!")::llvm::llvm_unreachable_internal("Invalid size!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3112)
;
3113 }
3114 };
3115
3116 MachineSDNode *Result;
3117 switch (Opc) {
3118 case X86ISD::SUB:
3119 // Handle negate.
3120 if (IsNegate) {
3121 unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
3122 X86::NEG8m);
3123 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3124 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3125 MVT::Other, Ops);
3126 break;
3127 }
3128 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3129 case X86ISD::ADD:
3130 // Try to match inc/dec.
3131 if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
3132 bool IsOne = isOneConstant(StoredVal.getOperand(1));
3133 bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
3134 // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
3135 if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
3136 unsigned NewOpc =
3137 ((Opc == X86ISD::ADD) == IsOne)
3138 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3139 : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
3140 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3141 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3142 MVT::Other, Ops);
3143 break;
3144 }
3145 }
3146 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3147 case X86ISD::ADC:
3148 case X86ISD::SBB:
3149 case X86ISD::AND:
3150 case X86ISD::OR:
3151 case X86ISD::XOR: {
3152 auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
3153 switch (Opc) {
3154 case X86ISD::ADD:
3155 return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3156 X86::ADD8mr);
3157 case X86ISD::ADC:
3158 return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3159 X86::ADC8mr);
3160 case X86ISD::SUB:
3161 return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3162 X86::SUB8mr);
3163 case X86ISD::SBB:
3164 return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3165 X86::SBB8mr);
3166 case X86ISD::AND:
3167 return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3168 X86::AND8mr);
3169 case X86ISD::OR:
3170 return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3171 case X86ISD::XOR:
3172 return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3173 X86::XOR8mr);
3174 default:
3175 llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3175)
;
3176 }
3177 };
3178 auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
3179 switch (Opc) {
3180 case X86ISD::ADD:
3181 return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
3182 case X86ISD::ADC:
3183 return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
3184 case X86ISD::SUB:
3185 return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
3186 case X86ISD::SBB:
3187 return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
3188 case X86ISD::AND:
3189 return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
3190 case X86ISD::OR:
3191 return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
3192 case X86ISD::XOR:
3193 return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
3194 default:
3195 llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3195)
;
3196 }
3197 };
3198 auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
3199 switch (Opc) {
3200 case X86ISD::ADD:
3201 return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3202 X86::ADD8mi);
3203 case X86ISD::ADC:
3204 return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3205 X86::ADC8mi);
3206 case X86ISD::SUB:
3207 return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3208 X86::SUB8mi);
3209 case X86ISD::SBB:
3210 return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3211 X86::SBB8mi);
3212 case X86ISD::AND:
3213 return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3214 X86::AND8mi);
3215 case X86ISD::OR:
3216 return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3217 X86::OR8mi);
3218 case X86ISD::XOR:
3219 return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3220 X86::XOR8mi);
3221 default:
3222 llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3222)
;
3223 }
3224 };
3225
3226 unsigned NewOpc = SelectRegOpcode(Opc);
3227 SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
3228
3229 // See if the operand is a constant that we can fold into an immediate
3230 // operand.
3231 if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3232 int64_t OperandV = OperandC->getSExtValue();
3233
3234 // Check if we can shrink the operand enough to fit in an immediate (or
3235 // fit into a smaller immediate) by negating it and switching the
3236 // operation.
3237 if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
3238 ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3239 (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3240 isInt<32>(-OperandV))) &&
3241 hasNoCarryFlagUses(StoredVal.getValue(1))) {
3242 OperandV = -OperandV;
3243 Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
3244 }
3245
3246 // First try to fit this into an Imm8 operand. If it doesn't fit, then try
3247 // the larger immediate operand.
3248 if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
3249 Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3250 NewOpc = SelectImm8Opcode(Opc);
3251 } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3252 Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3253 NewOpc = SelectImmOpcode(Opc);
3254 }
3255 }
3256
3257 if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
3258 SDValue CopyTo =
3259 CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
3260 StoredVal.getOperand(2), SDValue());
3261
3262 const SDValue Ops[] = {Base, Scale, Index, Disp,
3263 Segment, Operand, CopyTo, CopyTo.getValue(1)};
3264 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3265 Ops);
3266 } else {
3267 const SDValue Ops[] = {Base, Scale, Index, Disp,
3268 Segment, Operand, InputChain};
3269 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3270 Ops);
3271 }
3272 break;
3273 }
3274 default:
3275 llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3275)
;
3276 }
3277
3278 MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
3279 LoadNode->getMemOperand()};
3280 CurDAG->setNodeMemRefs(Result, MemOps);
3281
3282 // Update Load Chain uses as well.
3283 ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
3284 ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
3285 ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
3286 CurDAG->RemoveDeadNode(Node);
3287 return true;
3288}
3289
3290// See if this is an X & Mask that we can match to BEXTR/BZHI.
3291// Where Mask is one of the following patterns:
3292// a) x & (1 << nbits) - 1
3293// b) x & ~(-1 << nbits)
3294// c) x & (-1 >> (32 - y))
3295// d) x << (32 - y) >> (32 - y)
3296bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
3297 assert((((Node->getOpcode() == ISD::AND || Node->getOpcode() ==
ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits."
) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3299, __PRETTY_FUNCTION__))
3298 (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&(((Node->getOpcode() == ISD::AND || Node->getOpcode() ==
ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits."
) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3299, __PRETTY_FUNCTION__))
3299 "Should be either an and-mask, or right-shift after clearing high bits.")(((Node->getOpcode() == ISD::AND || Node->getOpcode() ==
ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits."
) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3299, __PRETTY_FUNCTION__))
;
3300
3301 // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
3302 if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3303 return false;
3304
3305 MVT NVT = Node->getSimpleValueType(0);
3306
3307 // Only supported for 32 and 64 bits.
3308 if (NVT != MVT::i32 && NVT != MVT::i64)
3309 return false;
3310
3311 SDValue NBits;
3312
3313 // If we have BMI2's BZHI, we are ok with muti-use patterns.
3314 // Else, if we only have BMI1's BEXTR, we require one-use.
3315 const bool CanHaveExtraUses = Subtarget->hasBMI2();
3316 auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
3317 return CanHaveExtraUses ||
3318 Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
3319 };
3320 auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
3321 auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
3322
3323 auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
3324 if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
3325 assert(V.getSimpleValueType() == MVT::i32 &&((V.getSimpleValueType() == MVT::i32 && V.getOperand(
0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation"
) ? static_cast<void> (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3327, __PRETTY_FUNCTION__))
3326 V.getOperand(0).getSimpleValueType() == MVT::i64 &&((V.getSimpleValueType() == MVT::i32 && V.getOperand(
0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation"
) ? static_cast<void> (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3327, __PRETTY_FUNCTION__))
3327 "Expected i64 -> i32 truncation")((V.getSimpleValueType() == MVT::i32 && V.getOperand(
0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation"
) ? static_cast<void> (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3327, __PRETTY_FUNCTION__))
;
3328 V = V.getOperand(0);
3329 }
3330 return V;
3331 };
3332
3333 // a) x & ((1 << nbits) + (-1))
3334 auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation,
3335 &NBits](SDValue Mask) -> bool {
3336 // Match `add`. Must only have one use!
3337 if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
3338 return false;
3339 // We should be adding all-ones constant (i.e. subtracting one.)
3340 if (!isAllOnesConstant(Mask->getOperand(1)))
3341 return false;
3342 // Match `1 << nbits`. Might be truncated. Must only have one use!
3343 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3344 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3345 return false;
3346 if (!isOneConstant(M0->getOperand(0)))
3347 return false;
3348 NBits = M0->getOperand(1);
3349 return true;
3350 };
3351
3352 auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
3353 V = peekThroughOneUseTruncation(V);
3354 return CurDAG->MaskedValueIsAllOnes(
3355 V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
3356 NVT.getSizeInBits()));
3357 };
3358
3359 // b) x & ~(-1 << nbits)
3360 auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3361 &NBits](SDValue Mask) -> bool {
3362 // Match `~()`. Must only have one use!
3363 if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
3364 return false;
3365 // The -1 only has to be all-ones for the final Node's NVT.
3366 if (!isAllOnes(Mask->getOperand(1)))
3367 return false;
3368 // Match `-1 << nbits`. Might be truncated. Must only have one use!
3369 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3370 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3371 return false;
3372 // The -1 only has to be all-ones for the final Node's NVT.
3373 if (!isAllOnes(M0->getOperand(0)))
3374 return false;
3375 NBits = M0->getOperand(1);
3376 return true;
3377 };
3378
3379 // Match potentially-truncated (bitwidth - y)
3380 auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt,
3381 unsigned Bitwidth) {
3382 // Skip over a truncate of the shift amount.
3383 if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
3384 ShiftAmt = ShiftAmt.getOperand(0);
3385 // The trunc should have been the only user of the real shift amount.
3386 if (!checkOneUse(ShiftAmt))
3387 return false;
3388 }
3389 // Match the shift amount as: (bitwidth - y). It should go away, too.
3390 if (ShiftAmt.getOpcode() != ISD::SUB)
3391 return false;
3392 auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
3393 if (!V0 || V0->getZExtValue() != Bitwidth)
3394 return false;
3395 NBits = ShiftAmt.getOperand(1);
3396 return true;
3397 };
3398
3399 // c) x & (-1 >> (32 - y))
3400 auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation,
3401 matchShiftAmt](SDValue Mask) -> bool {
3402 // The mask itself may be truncated.
3403 Mask = peekThroughOneUseTruncation(Mask);
3404 unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
3405 // Match `l>>`. Must only have one use!
3406 if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3407 return false;
3408 // We should be shifting truly all-ones constant.
3409 if (!isAllOnesConstant(Mask.getOperand(0)))
3410 return false;
3411 SDValue M1 = Mask.getOperand(1);
3412 // The shift amount should not be used externally.
3413 if (!checkOneUse(M1))
3414 return false;
3415 return matchShiftAmt(M1, Bitwidth);
3416 };
3417
3418 SDValue X;
3419
3420 // d) x << (32 - y) >> (32 - y)
3421 auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt,
3422 &X](SDNode *Node) -> bool {
3423 if (Node->getOpcode() != ISD::SRL)
3424 return false;
3425 SDValue N0 = Node->getOperand(0);
3426 if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
3427 return false;
3428 unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
3429 SDValue N1 = Node->getOperand(1);
3430 SDValue N01 = N0->getOperand(1);
3431 // Both of the shifts must be by the exact same value.
3432 // There should not be any uses of the shift amount outside of the pattern.
3433 if (N1 != N01 || !checkTwoUse(N1))
3434 return false;
3435 if (!matchShiftAmt(N1, Bitwidth))
3436 return false;
3437 X = N0->getOperand(0);
3438 return true;
3439 };
3440
3441 auto matchLowBitMask = [matchPatternA, matchPatternB,
3442 matchPatternC](SDValue Mask) -> bool {
3443 return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3444 };
3445
3446 if (Node->getOpcode() == ISD::AND) {
3447 X = Node->getOperand(0);
3448 SDValue Mask = Node->getOperand(1);
3449
3450 if (matchLowBitMask(Mask)) {
3451 // Great.
3452 } else {
3453 std::swap(X, Mask);
3454 if (!matchLowBitMask(Mask))
3455 return false;
3456 }
3457 } else if (!matchPatternD(Node))
3458 return false;
3459
3460 SDLoc DL(Node);
3461
3462 // Truncate the shift amount.
3463 NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3464 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3465
3466 // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3467 // All the other bits are undefined, we do not care about them.
3468 SDValue ImplDef = SDValue(
3469 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3470 insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
3471
3472 SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
3473 insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
3474 NBits = SDValue(
3475 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
3476 NBits, SRIdxVal), 0);
3477 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3478
3479 if (Subtarget->hasBMI2()) {
3480 // Great, just emit the the BZHI..
3481 if (NVT != MVT::i32) {
3482 // But have to place the bit count into the wide-enough register first.
3483 NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
3484 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3485 }
3486
3487 SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
3488 ReplaceNode(Node, Extract.getNode());
3489 SelectCode(Extract.getNode());
3490 return true;
3491 }
3492
3493 // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
3494 // *logically* shifted (potentially with one-use trunc inbetween),
3495 // and the truncation was the only use of the shift,
3496 // and if so look past one-use truncation.
3497 {
3498 SDValue RealX = peekThroughOneUseTruncation(X);
3499 // FIXME: only if the shift is one-use?
3500 if (RealX != X && RealX.getOpcode() == ISD::SRL)
3501 X = RealX;
3502 }
3503
3504 MVT XVT = X.getSimpleValueType();
3505
3506 // Else, emitting BEXTR requires one more step.
3507 // The 'control' of BEXTR has the pattern of:
3508 // [15...8 bit][ 7...0 bit] location
3509 // [ bit count][ shift] name
3510 // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
3511
3512 // Shift NBits left by 8 bits, thus producing 'control'.
3513 // This makes the low 8 bits to be zero.
3514 SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
3515 SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
3516 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3517
3518 // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
3519 // FIXME: only if the shift is one-use?
3520 if (X.getOpcode() == ISD::SRL) {
3521 SDValue ShiftAmt = X.getOperand(1);
3522 X = X.getOperand(0);
3523
3524 assert(ShiftAmt.getValueType() == MVT::i8 &&((ShiftAmt.getValueType() == MVT::i8 && "Expected shift amount to be i8"
) ? static_cast<void> (0) : __assert_fail ("ShiftAmt.getValueType() == MVT::i8 && \"Expected shift amount to be i8\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3525, __PRETTY_FUNCTION__))
3525 "Expected shift amount to be i8")((ShiftAmt.getValueType() == MVT::i8 && "Expected shift amount to be i8"
) ? static_cast<void> (0) : __assert_fail ("ShiftAmt.getValueType() == MVT::i8 && \"Expected shift amount to be i8\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3525, __PRETTY_FUNCTION__))
;
3526
3527 // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
3528 // We could zext to i16 in some form, but we intentionally don't do that.
3529 SDValue OrigShiftAmt = ShiftAmt;
3530 ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
3531 insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
3532
3533 // And now 'or' these low 8 bits of shift amount into the 'control'.
3534 Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
3535 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3536 }
3537
3538 // But have to place the 'control' into the wide-enough register first.
3539 if (XVT != MVT::i32) {
3540 Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
3541 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3542 }
3543
3544 // And finally, form the BEXTR itself.
3545 SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
3546
3547 // The 'X' was originally truncated. Do that now.
3548 if (XVT != NVT) {
3549 insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
3550 Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
3551 }
3552
3553 ReplaceNode(Node, Extract.getNode());
3554 SelectCode(Extract.getNode());
3555
3556 return true;
3557}
3558
3559// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
3560MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
3561 MVT NVT = Node->getSimpleValueType(0);
3562 SDLoc dl(Node);
3563
3564 SDValue N0 = Node->getOperand(0);
3565 SDValue N1 = Node->getOperand(1);
3566
3567 // If we have TBM we can use an immediate for the control. If we have BMI
3568 // we should only do this if the BEXTR instruction is implemented well.
3569 // Otherwise moving the control into a register makes this more costly.
3570 // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
3571 // hoisting the move immediate would make it worthwhile with a less optimal
3572 // BEXTR?
3573 bool PreferBEXTR =
3574 Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
3575 if (!PreferBEXTR && !Subtarget->hasBMI2())
3576 return nullptr;
3577
3578 // Must have a shift right.
3579 if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
3580 return nullptr;
3581
3582 // Shift can't have additional users.
3583 if (!N0->hasOneUse())
3584 return nullptr;
3585
3586 // Only supported for 32 and 64 bits.
3587 if (NVT != MVT::i32 && NVT != MVT::i64)
3588 return nullptr;
3589
3590 // Shift amount and RHS of and must be constant.
3591 ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
3592 ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3593 if (!MaskCst || !ShiftCst)
3594 return nullptr;
3595
3596 // And RHS must be a mask.
3597 uint64_t Mask = MaskCst->getZExtValue();
3598 if (!isMask_64(Mask))
3599 return nullptr;
3600
3601 uint64_t Shift = ShiftCst->getZExtValue();
3602 uint64_t MaskSize = countPopulation(Mask);
3603
3604 // Don't interfere with something that can be handled by extracting AH.
3605 // TODO: If we are able to fold a load, BEXTR might still be better than AH.
3606 if (Shift == 8 && MaskSize == 8)
3607 return nullptr;
3608
3609 // Make sure we are only using bits that were in the original value, not
3610 // shifted in.
3611 if (Shift + MaskSize > NVT.getSizeInBits())
3612 return nullptr;
3613
3614 // BZHI, if available, is always fast, unlike BEXTR. But even if we decide
3615 // that we can't use BEXTR, it is only worthwhile using BZHI if the mask
3616 // does not fit into 32 bits. Load folding is not a sufficient reason.
3617 if (!PreferBEXTR && MaskSize <= 32)
3618 return nullptr;
3619
3620 SDValue Control;
3621 unsigned ROpc, MOpc;
3622
3623 if (!PreferBEXTR) {
3624 assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.")((Subtarget->hasBMI2() && "We must have BMI2's BZHI then."
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasBMI2() && \"We must have BMI2's BZHI then.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3624, __PRETTY_FUNCTION__))
;
3625 // If we can't make use of BEXTR then we can't fuse shift+mask stages.
3626 // Let's perform the mask first, and apply shift later. Note that we need to
3627 // widen the mask to account for the fact that we'll apply shift afterwards!
3628 Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
3629 ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
3630 MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
3631 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3632 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
3633 } else {
3634 // The 'control' of BEXTR has the pattern of:
3635 // [15...8 bit][ 7...0 bit] location
3636 // [ bit count][ shift] name
3637 // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
3638 Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3639 if (Subtarget->hasTBM()) {
3640 ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3641 MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3642 } else {
3643 assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.")((Subtarget->hasBMI() && "We must have BMI1's BEXTR then."
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasBMI() && \"We must have BMI1's BEXTR then.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3643, __PRETTY_FUNCTION__))
;
3644 // BMI requires the immediate to placed in a register.
3645 ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3646 MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3647 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3648 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
3649 }
3650 }
3651
3652 MachineSDNode *NewNode;
3653 SDValue Input = N0->getOperand(0);
3654 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3655 if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3656 SDValue Ops[] = {
3657 Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)};
3658 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3659 NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3660 // Update the chain.
3661 ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
3662 // Record the mem-refs
3663 CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3664 } else {
3665 NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control);
3666 }
3667
3668 if (!PreferBEXTR) {
3669 // We still need to apply the shift.
3670 SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
3671 unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri;
3672 NewNode =
3673 CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt);
3674 }
3675
3676 return NewNode;
3677}
3678
3679// Emit a PCMISTR(I/M) instruction.
3680MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
3681 bool MayFoldLoad, const SDLoc &dl,
3682 MVT VT, SDNode *Node) {
3683 SDValue N0 = Node->getOperand(0);
3684 SDValue N1 = Node->getOperand(1);
3685 SDValue Imm = Node->getOperand(2);
3686 const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3687 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3688
3689 // Try to fold a load. No need to check alignment.
3690 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3691 if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3692 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3693 N1.getOperand(0) };
3694 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
3695 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3696 // Update the chain.
3697 ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
3698 // Record the mem-refs
3699 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3700 return CNode;
3701 }
3702
3703 SDValue Ops[] = { N0, N1, Imm };
3704 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3705 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3706 return CNode;
3707}
3708
3709// Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3710// to emit a second instruction after this one. This is needed since we have two
3711// copyToReg nodes glued before this and we need to continue that glue through.
3712MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3713 bool MayFoldLoad, const SDLoc &dl,
3714 MVT VT, SDNode *Node,
3715 SDValue &InFlag) {
3716 SDValue N0 = Node->getOperand(0);
3717 SDValue N2 = Node->getOperand(2);
3718 SDValue Imm = Node->getOperand(4);
3719 const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3720 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3721
3722 // Try to fold a load. No need to check alignment.
3723 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3724 if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
1
Assuming 'MayFoldLoad' is true
2
Calling 'X86DAGToDAGISel::tryFoldLoad'
3725 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3726 N2.getOperand(0), InFlag };
3727 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3728 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3729 InFlag = SDValue(CNode, 3);
3730 // Update the chain.
3731 ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3732 // Record the mem-refs
3733 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3734 return CNode;
3735 }
3736
3737 SDValue Ops[] = { N0, N2, Imm, InFlag };
3738 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3739 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3740 InFlag = SDValue(CNode, 2);
3741 return CNode;
3742}
3743
3744bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3745 EVT VT = N->getValueType(0);
3746
3747 // Only handle scalar shifts.
3748 if (VT.isVector())
3749 return false;
3750
3751 // Narrower shifts only mask to 5 bits in hardware.
3752 unsigned Size = VT == MVT::i64 ? 64 : 32;
3753
3754 SDValue OrigShiftAmt = N->getOperand(1);
3755 SDValue ShiftAmt = OrigShiftAmt;
3756 SDLoc DL(N);
3757
3758 // Skip over a truncate of the shift amount.
3759 if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3760 ShiftAmt = ShiftAmt->getOperand(0);
3761
3762 // This function is called after X86DAGToDAGISel::matchBitExtract(),
3763 // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3764
3765 SDValue NewShiftAmt;
3766 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3767 SDValue Add0 = ShiftAmt->getOperand(0);
3768 SDValue Add1 = ShiftAmt->getOperand(1);
3769 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3770 // to avoid the ADD/SUB.
3771 if (isa<ConstantSDNode>(Add1) &&
3772 cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3773 NewShiftAmt = Add0;
3774 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3775 // generate a NEG instead of a SUB of a constant.
3776 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3777 isa<ConstantSDNode>(Add0) &&
3778 cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3779 cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3780 // Insert a negate op.
3781 // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3782 // that uses it that's not a shift.
3783 EVT SubVT = ShiftAmt.getValueType();
3784 SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3785 SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3786 NewShiftAmt = Neg;
3787
3788 // Insert these operands into a valid topological order so they can
3789 // get selected independently.
3790 insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3791 insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3792 } else
3793 return false;
3794 } else
3795 return false;
3796
3797 if (NewShiftAmt.getValueType() != MVT::i8) {
3798 // Need to truncate the shift amount.
3799 NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3800 // Add to a correct topological ordering.
3801 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3802 }
3803
3804 // Insert a new mask to keep the shift amount legal. This should be removed
3805 // by isel patterns.
3806 NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3807 CurDAG->getConstant(Size - 1, DL, MVT::i8));
3808 // Place in a correct topological ordering.
3809 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3810
3811 SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3812 NewShiftAmt);
3813 if (UpdatedNode != N) {
3814 // If we found an existing node, we should replace ourselves with that node
3815 // and wait for it to be selected after its other users.
3816 ReplaceNode(N, UpdatedNode);
3817 return true;
3818 }
3819
3820 // If the original shift amount is now dead, delete it so that we don't run
3821 // it through isel.
3822 if (OrigShiftAmt.getNode()->use_empty())
3823 CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3824
3825 // Now that we've optimized the shift amount, defer to normal isel to get
3826 // load folding and legacy vs BMI2 selection without repeating it here.
3827 SelectCode(N);
3828 return true;
3829}
3830
3831bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
3832 MVT NVT = N->getSimpleValueType(0);
3833 unsigned Opcode = N->getOpcode();
3834 SDLoc dl(N);
3835
3836 // For operations of the form (x << C1) op C2, check if we can use a smaller
3837 // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3838 SDValue Shift = N->getOperand(0);
3839 SDValue N1 = N->getOperand(1);
3840
3841 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
3842 if (!Cst)
3843 return false;
3844
3845 int64_t Val = Cst->getSExtValue();
3846
3847 // If we have an any_extend feeding the AND, look through it to see if there
3848 // is a shift behind it. But only if the AND doesn't use the extended bits.
3849 // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
3850 bool FoundAnyExtend = false;
3851 if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
3852 Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
3853 isUInt<32>(Val)) {
3854 FoundAnyExtend = true;
3855 Shift = Shift.getOperand(0);
3856 }
3857
3858 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
3859 return false;
3860
3861 // i8 is unshrinkable, i16 should be promoted to i32.
3862 if (NVT != MVT::i32 && NVT != MVT::i64)
3863 return false;
3864
3865 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
3866 if (!ShlCst)
3867 return false;
3868
3869 uint64_t ShAmt = ShlCst->getZExtValue();
3870
3871 // Make sure that we don't change the operation by removing bits.
3872 // This only matters for OR and XOR, AND is unaffected.
3873 uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
3874 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
3875 return false;
3876
3877 // Check the minimum bitwidth for the new constant.
3878 // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
3879 auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
3880 if (Opcode == ISD::AND) {
3881 // AND32ri is the same as AND64ri32 with zext imm.
3882 // Try this before sign extended immediates below.
3883 ShiftedVal = (uint64_t)Val >> ShAmt;
3884 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3885 return true;
3886 // Also swap order when the AND can become MOVZX.
3887 if (ShiftedVal == UINT8_MAX(255) || ShiftedVal == UINT16_MAX(65535))
3888 return true;
3889 }
3890 ShiftedVal = Val >> ShAmt;
3891 if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
3892 (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
3893 return true;
3894 if (Opcode != ISD::AND) {
3895 // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
3896 ShiftedVal = (uint64_t)Val >> ShAmt;
3897 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3898 return true;
3899 }
3900 return false;
3901 };
3902
3903 int64_t ShiftedVal;
3904 if (!CanShrinkImmediate(ShiftedVal))
3905 return false;
3906
3907 // Ok, we can reorder to get a smaller immediate.
3908
3909 // But, its possible the original immediate allowed an AND to become MOVZX.
3910 // Doing this late due to avoid the MakedValueIsZero call as late as
3911 // possible.
3912 if (Opcode == ISD::AND) {
3913 // Find the smallest zext this could possibly be.
3914 unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
3915 ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
3916
3917 // Figure out which bits need to be zero to achieve that mask.
3918 APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
3919 ZExtWidth);
3920 NeededMask &= ~Cst->getAPIntValue();
3921
3922 if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
3923 return false;
3924 }
3925
3926 SDValue X = Shift.getOperand(0);
3927 if (FoundAnyExtend) {
3928 SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
3929 insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
3930 X = NewX;
3931 }
3932
3933 SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
3934 insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
3935 SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
3936 insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
3937 SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
3938 Shift.getOperand(1));
3939 ReplaceNode(N, NewSHL.getNode());
3940 SelectCode(NewSHL.getNode());
3941 return true;
3942}
3943
3944/// If the high bits of an 'and' operand are known zero, try setting the
3945/// high bits of an 'and' constant operand to produce a smaller encoding by
3946/// creating a small, sign-extended negative immediate rather than a large
3947/// positive one. This reverses a transform in SimplifyDemandedBits that
3948/// shrinks mask constants by clearing bits. There is also a possibility that
3949/// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3950/// case, just replace the 'and'. Return 'true' if the node is replaced.
3951bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3952 // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3953 // have immediate operands.
3954 MVT VT = And->getSimpleValueType(0);
3955 if (VT != MVT::i32 && VT != MVT::i64)
3956 return false;
3957
3958 auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3959 if (!And1C)
3960 return false;
3961
3962 // Bail out if the mask constant is already negative. It's can't shrink more.
3963 // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3964 // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3965 // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3966 // are negative too.
3967 APInt MaskVal = And1C->getAPIntValue();
3968 unsigned MaskLZ = MaskVal.countLeadingZeros();
3969 if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3970 return false;
3971
3972 // Don't extend into the upper 32 bits of a 64 bit mask.
3973 if (VT == MVT::i64 && MaskLZ >= 32) {
3974 MaskLZ -= 32;
3975 MaskVal = MaskVal.trunc(32);
3976 }
3977
3978 SDValue And0 = And->getOperand(0);
3979 APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3980 APInt NegMaskVal = MaskVal | HighZeros;
3981
3982 // If a negative constant would not allow a smaller encoding, there's no need
3983 // to continue. Only change the constant when we know it's a win.
3984 unsigned MinWidth = NegMaskVal.getMinSignedBits();
3985 if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3986 return false;
3987
3988 // Extend masks if we truncated above.
3989 if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3990 NegMaskVal = NegMaskVal.zext(64);
3991 HighZeros = HighZeros.zext(64);
3992 }
3993
3994 // The variable operand must be all zeros in the top bits to allow using the
3995 // new, negative constant as the mask.
3996 if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3997 return false;
3998
3999 // Check if the mask is -1. In that case, this is an unnecessary instruction
4000 // that escaped earlier analysis.
4001 if (NegMaskVal.isAllOnesValue()) {
4002 ReplaceNode(And, And0.getNode());
4003 return true;
4004 }
4005
4006 // A negative mask allows a smaller encoding. Create a new 'and' node.
4007 SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
4008 SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
4009 ReplaceNode(And, NewAnd.getNode());
4010 SelectCode(NewAnd.getNode());
4011 return true;
4012}
4013
4014static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
4015 bool FoldedBCast, bool Masked) {
4016 if (Masked) {
4017 if (FoldedLoad) {
4018 switch (TestVT.SimpleTy) {
4019 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4019)
;
4020 case MVT::v16i8:
4021 return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
4022 case MVT::v8i16:
4023 return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
4024 case MVT::v4i32:
4025 return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
4026 case MVT::v2i64:
4027 return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
4028 case MVT::v32i8:
4029 return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
4030 case MVT::v16i16:
4031 return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
4032 case MVT::v8i32:
4033 return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
4034 case MVT::v4i64:
4035 return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
4036 case MVT::v64i8:
4037 return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
4038 case MVT::v32i16:
4039 return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
4040 case MVT::v16i32:
4041 return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
4042 case MVT::v8i64:
4043 return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
4044 }
4045 }
4046
4047 if (FoldedBCast) {
4048 switch (TestVT.SimpleTy) {
4049 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4049)
;
4050 case MVT::v4i32:
4051 return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
4052 case MVT::v2i64:
4053 return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
4054 case MVT::v8i32:
4055 return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
4056 case MVT::v4i64:
4057 return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
4058 case MVT::v16i32:
4059 return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
4060 case MVT::v8i64:
4061 return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
4062 }
4063 }
4064
4065 switch (TestVT.SimpleTy) {
4066 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4066)
;
4067 case MVT::v16i8:
4068 return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
4069 case MVT::v8i16:
4070 return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
4071 case MVT::v4i32:
4072 return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
4073 case MVT::v2i64:
4074 return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
4075 case MVT::v32i8:
4076 return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
4077 case MVT::v16i16:
4078 return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
4079 case MVT::v8i32:
4080 return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
4081 case MVT::v4i64:
4082 return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
4083 case MVT::v64i8:
4084 return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
4085 case MVT::v32i16:
4086 return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
4087 case MVT::v16i32:
4088 return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
4089 case MVT::v8i64:
4090 return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
4091 }
4092 }
4093
4094 if (FoldedLoad) {
4095 switch (TestVT.SimpleTy) {
4096 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4096)
;
4097 case MVT::v16i8:
4098 return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
4099 case MVT::v8i16:
4100 return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
4101 case MVT::v4i32:
4102 return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
4103 case MVT::v2i64:
4104 return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
4105 case MVT::v32i8:
4106 return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
4107 case MVT::v16i16:
4108 return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
4109 case MVT::v8i32:
4110 return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
4111 case MVT::v4i64:
4112 return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
4113 case MVT::v64i8:
4114 return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
4115 case MVT::v32i16:
4116 return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
4117 case MVT::v16i32:
4118 return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
4119 case MVT::v8i64:
4120 return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
4121 }
4122 }
4123
4124 if (FoldedBCast) {
4125 switch (TestVT.SimpleTy) {
4126 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4126)
;
4127 case MVT::v4i32:
4128 return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
4129 case MVT::v2i64:
4130 return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
4131 case MVT::v8i32:
4132 return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
4133 case MVT::v4i64:
4134 return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
4135 case MVT::v16i32:
4136 return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
4137 case MVT::v8i64:
4138 return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
4139 }
4140 }
4141
4142 switch (TestVT.SimpleTy) {
4143 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4143)
;
4144 case MVT::v16i8:
4145 return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
4146 case MVT::v8i16:
4147 return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
4148 case MVT::v4i32:
4149 return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
4150 case MVT::v2i64:
4151 return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
4152 case MVT::v32i8:
4153 return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
4154 case MVT::v16i16:
4155 return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
4156 case MVT::v8i32:
4157 return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
4158 case MVT::v4i64:
4159 return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
4160 case MVT::v64i8:
4161 return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
4162 case MVT::v32i16:
4163 return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
4164 case MVT::v16i32:
4165 return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
4166 case MVT::v8i64:
4167 return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
4168 }
4169}
4170
4171// Try to create VPTESTM instruction. If InMask is not null, it will be used
4172// to form a masked operation.
4173bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
4174 SDValue InMask) {
4175 assert(Subtarget->hasAVX512() && "Expected AVX512!")((Subtarget->hasAVX512() && "Expected AVX512!") ? static_cast
<void> (0) : __assert_fail ("Subtarget->hasAVX512() && \"Expected AVX512!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4175, __PRETTY_FUNCTION__))
;
4176 assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&((Setcc.getSimpleValueType().getVectorElementType() == MVT::i1
&& "Unexpected VT!") ? static_cast<void> (0) :
__assert_fail ("Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && \"Unexpected VT!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4177, __PRETTY_FUNCTION__))
4177 "Unexpected VT!")((Setcc.getSimpleValueType().getVectorElementType() == MVT::i1
&& "Unexpected VT!") ? static_cast<void> (0) :
__assert_fail ("Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && \"Unexpected VT!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4177, __PRETTY_FUNCTION__))
;
4178
4179 // Look for equal and not equal compares.
4180 ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
4181 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4182 return false;
4183
4184 SDValue SetccOp0 = Setcc.getOperand(0);
4185 SDValue SetccOp1 = Setcc.getOperand(1);
4186
4187 // Canonicalize the all zero vector to the RHS.
4188 if (ISD::isBuildVectorAllZeros(SetccOp0.getNode()))
4189 std::swap(SetccOp0, SetccOp1);
4190
4191 // See if we're comparing against zero.
4192 if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode()))
4193 return false;
4194
4195 SDValue N0 = SetccOp0;
4196
4197 MVT CmpVT = N0.getSimpleValueType();
4198 MVT CmpSVT = CmpVT.getVectorElementType();
4199
4200 // Start with both operands the same. We'll try to refine this.
4201 SDValue Src0 = N0;
4202 SDValue Src1 = N0;
4203
4204 {
4205 // Look through single use bitcasts.
4206 SDValue N0Temp = N0;
4207 if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
4208 N0Temp = N0.getOperand(0);
4209
4210 // Look for single use AND.
4211 if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
4212 Src0 = N0Temp.getOperand(0);
4213 Src1 = N0Temp.getOperand(1);
4214 }
4215 }
4216
4217 // Without VLX we need to widen the load.
4218 bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
4219
4220 // We can only fold loads if the sources are unique.
4221 bool CanFoldLoads = Src0 != Src1;
4222
4223 // Try to fold loads unless we need to widen.
4224 bool FoldedLoad = false;
4225 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
4226 if (!Widen && CanFoldLoads) {
4227 Load = Src1;
4228 FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
4229 Tmp4);
4230 if (!FoldedLoad) {
4231 // And is computative.
4232 Load = Src0;
4233 FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
4234 Tmp3, Tmp4);
4235 if (FoldedLoad)
4236 std::swap(Src0, Src1);
4237 }
4238 }
4239
4240 auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
4241 // Look through single use bitcasts.
4242 if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse()) {
4243 Parent = Src.getNode();
4244 Src = Src.getOperand(0);
4245 }
4246
4247 if (Src.getOpcode() == X86ISD::VBROADCAST_LOAD && Src.hasOneUse()) {
4248 auto *MemIntr = cast<MemIntrinsicSDNode>(Src);
4249 if (MemIntr->getMemoryVT().getSizeInBits() == CmpSVT.getSizeInBits())
4250 return Src;
4251 }
4252
4253 return SDValue();
4254 };
4255
4256 // If we didn't fold a load, try to match broadcast. No widening limitation
4257 // for this. But only 32 and 64 bit types are supported.
4258 bool FoldedBCast = false;
4259 if (!FoldedLoad && CanFoldLoads &&
4260 (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
4261 SDNode *ParentNode = N0.getNode();
4262 if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
4263 FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0,
4264 Tmp1, Tmp2, Tmp3, Tmp4);
4265 }
4266
4267 // Try the other operand.
4268 if (!FoldedBCast) {
4269 SDNode *ParentNode = N0.getNode();
4270 if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
4271 FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0,
4272 Tmp1, Tmp2, Tmp3, Tmp4);
4273 if (FoldedBCast)
4274 std::swap(Src0, Src1);
4275 }
4276 }
4277 }
4278
4279 auto getMaskRC = [](MVT MaskVT) {
4280 switch (MaskVT.SimpleTy) {
4281 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4281)
;
4282 case MVT::v2i1: return X86::VK2RegClassID;
4283 case MVT::v4i1: return X86::VK4RegClassID;
4284 case MVT::v8i1: return X86::VK8RegClassID;
4285 case MVT::v16i1: return X86::VK16RegClassID;
4286 case MVT::v32i1: return X86::VK32RegClassID;
4287 case MVT::v64i1: return X86::VK64RegClassID;
4288 }
4289 };
4290
4291 bool IsMasked = InMask.getNode() != nullptr;
4292
4293 SDLoc dl(Root);
4294
4295 MVT ResVT = Setcc.getSimpleValueType();
4296 MVT MaskVT = ResVT;
4297 if (Widen) {
4298 // Widen the inputs using insert_subreg or copy_to_regclass.
4299 unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
4300 unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
4301 unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
4302 CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
4303 MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4304 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4305 CmpVT), 0);
4306 Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
4307
4308 assert(!FoldedLoad && "Shouldn't have folded the load")((!FoldedLoad && "Shouldn't have folded the load") ? static_cast
<void> (0) : __assert_fail ("!FoldedLoad && \"Shouldn't have folded the load\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4308, __PRETTY_FUNCTION__))
;
4309 if (!FoldedBCast)
4310 Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
4311
4312 if (IsMasked) {
4313 // Widen the mask.
4314 unsigned RegClass = getMaskRC(MaskVT);
4315 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4316 InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4317 dl, MaskVT, InMask, RC), 0);
4318 }
4319 }
4320
4321 bool IsTestN = CC == ISD::SETEQ;
4322 unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4323 IsMasked);
4324
4325 MachineSDNode *CNode;
4326 if (FoldedLoad || FoldedBCast) {
4327 SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
4328
4329 if (IsMasked) {
4330 SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4331 Load.getOperand(0) };
4332 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4333 } else {
4334 SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4335 Load.getOperand(0) };
4336 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4337 }
4338
4339 // Update the chain.
4340 ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
4341 // Record the mem-refs
4342 CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Load)->getMemOperand()});
4343 } else {
4344 if (IsMasked)
4345 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
4346 else
4347 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
4348 }
4349
4350 // If we widened, we need to shrink the mask VT.
4351 if (Widen) {
4352 unsigned RegClass = getMaskRC(ResVT);
4353 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4354 CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4355 dl, ResVT, SDValue(CNode, 0), RC);
4356 }
4357
4358 ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
4359 CurDAG->RemoveDeadNode(Root);
4360 return true;
4361}
4362
4363// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
4364// into vpternlog.
4365bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
4366 assert(N->getOpcode() == ISD::OR && "Unexpected opcode!")((N->getOpcode() == ISD::OR && "Unexpected opcode!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4366, __PRETTY_FUNCTION__))
;
4367
4368 MVT NVT = N->getSimpleValueType(0);
4369
4370 // Make sure we support VPTERNLOG.
4371 if (!NVT.isVector() || !Subtarget->hasAVX512())
4372 return false;
4373
4374 // We need VLX for 128/256-bit.
4375 if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
4376 return false;
4377
4378 SDValue N0 = N->getOperand(0);
4379 SDValue N1 = N->getOperand(1);
4380
4381 // Canonicalize AND to LHS.
4382 if (N1.getOpcode() == ISD::AND)
4383 std::swap(N0, N1);
4384
4385 if (N0.getOpcode() != ISD::AND ||
4386 N1.getOpcode() != X86ISD::ANDNP ||
4387 !N0.hasOneUse() || !N1.hasOneUse())
4388 return false;
4389
4390 // ANDN is not commutable, use it to pick down A and C.
4391 SDValue A = N1.getOperand(0);
4392 SDValue C = N1.getOperand(1);
4393
4394 // AND is commutable, if one operand matches A, the other operand is B.
4395 // Otherwise this isn't a match.
4396 SDValue B;
4397 if (N0.getOperand(0) == A)
4398 B = N0.getOperand(1);
4399 else if (N0.getOperand(1) == A)
4400 B = N0.getOperand(0);
4401 else
4402 return false;
4403
4404 SDLoc dl(N);
4405 SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
4406 SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
4407 ReplaceNode(N, Ternlog.getNode());
4408 SelectCode(Ternlog.getNode());
4409 return true;
4410}
4411
4412void X86DAGToDAGISel::Select(SDNode *Node) {
4413 MVT NVT = Node->getSimpleValueType(0);
4414 unsigned Opcode = Node->getOpcode();
4415 SDLoc dl(Node);
4416
4417 if (Node->isMachineOpcode()) {
4418 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "== "; Node->dump(CurDAG);
dbgs() << '\n'; } } while (false)
;
4419 Node->setNodeId(-1);
4420 return; // Already selected.
4421 }
4422
4423 switch (Opcode) {
4424 default: break;
4425 case ISD::INTRINSIC_VOID: {
4426 unsigned IntNo = Node->getConstantOperandVal(1);
4427 switch (IntNo) {
4428 default: break;
4429 case Intrinsic::x86_sse3_monitor:
4430 case Intrinsic::x86_monitorx:
4431 case Intrinsic::x86_clzero: {
4432 bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
4433
4434 unsigned Opc = 0;
4435 switch (IntNo) {
4436 default: llvm_unreachable("Unexpected intrinsic!")::llvm::llvm_unreachable_internal("Unexpected intrinsic!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4436)
;
4437 case Intrinsic::x86_sse3_monitor:
4438 if (!Subtarget->hasSSE3())
4439 break;
4440 Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
4441 break;
4442 case Intrinsic::x86_monitorx:
4443 if (!Subtarget->hasMWAITX())
4444 break;
4445 Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
4446 break;
4447 case Intrinsic::x86_clzero:
4448 if (!Subtarget->hasCLZERO())
4449 break;
4450 Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
4451 break;
4452 }
4453
4454 if (Opc) {
4455 unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
4456 SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
4457 Node->getOperand(2), SDValue());
4458 SDValue InFlag = Chain.getValue(1);
4459
4460 if (IntNo == Intrinsic::x86_sse3_monitor ||
4461 IntNo == Intrinsic::x86_monitorx) {
4462 // Copy the other two operands to ECX and EDX.
4463 Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
4464 InFlag);
4465 InFlag = Chain.getValue(1);
4466 Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
4467 InFlag);
4468 InFlag = Chain.getValue(1);
4469 }
4470
4471 MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4472 { Chain, InFlag});
4473 ReplaceNode(Node, CNode);
4474 return;
4475 }
4476
4477 break;
4478 }
4479 }
4480
4481 break;
4482 }
4483 case ISD::BRIND: {
4484 if (Subtarget->isTargetNaCl())
4485 // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
4486 // leave the instruction alone.
4487 break;
4488 if (Subtarget->isTarget64BitILP32()) {
4489 // Converts a 32-bit register to a 64-bit, zero-extended version of
4490 // it. This is needed because x86-64 can do many things, but jmp %r32
4491 // ain't one of them.
4492 const SDValue &Target = Node->getOperand(1);
4493 assert(Target.getSimpleValueType() == llvm::MVT::i32)((Target.getSimpleValueType() == llvm::MVT::i32) ? static_cast
<void> (0) : __assert_fail ("Target.getSimpleValueType() == llvm::MVT::i32"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4493, __PRETTY_FUNCTION__))
;
4494 SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
4495 SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
4496 Node->getOperand(0), ZextTarget);
4497 ReplaceNode(Node, Brind.getNode());
4498 SelectCode(ZextTarget.getNode());
4499 SelectCode(Brind.getNode());
4500 return;
4501 }
4502 break;
4503 }
4504 case X86ISD::GlobalBaseReg:
4505 ReplaceNode(Node, getGlobalBaseReg());
4506 return;
4507
4508 case ISD::BITCAST:
4509 // Just drop all 128/256/512-bit bitcasts.
4510 if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
4511 NVT == MVT::f128) {
4512 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
4513 CurDAG->RemoveDeadNode(Node);
4514 return;
4515 }
4516 break;
4517
4518 case ISD::VSELECT: {
4519 // Replace VSELECT with non-mask conditions with with BLENDV.
4520 if (Node->getOperand(0).getValueType().getVectorElementType() == MVT::i1)
4521 break;
4522
4523 assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!")((Subtarget->hasSSE41() && "Expected SSE4.1 support!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasSSE41() && \"Expected SSE4.1 support!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4523, __PRETTY_FUNCTION__))
;
4524 SDValue Blendv = CurDAG->getNode(
4525 X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
4526 Node->getOperand(1), Node->getOperand(2));
4527 ReplaceNode(Node, Blendv.getNode());
4528 SelectCode(Blendv.getNode());
4529 // We already called ReplaceUses.
4530 return;
4531 }
4532
4533 case ISD::SRL:
4534 if (matchBitExtract(Node))
4535 return;
4536 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4537 case ISD::SRA:
4538 case ISD::SHL:
4539 if (tryShiftAmountMod(Node))
4540 return;
4541 break;
4542
4543 case ISD::AND:
4544 if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
4545 // Try to form a masked VPTESTM. Operands can be in either order.
4546 SDValue N0 = Node->getOperand(0);
4547 SDValue N1 = Node->getOperand(1);
4548 if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
4549 tryVPTESTM(Node, N0, N1))
4550 return;
4551 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
4552 tryVPTESTM(Node, N1, N0))
4553 return;
4554 }
4555
4556 if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
4557 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4558 CurDAG->RemoveDeadNode(Node);
4559 return;
4560 }
4561 if (matchBitExtract(Node))
4562 return;
4563 if (AndImmShrink && shrinkAndImmediate(Node))
4564 return;
4565
4566 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4567 case ISD::OR:
4568 case ISD::XOR:
4569 if (tryShrinkShlLogicImm(Node))
4570 return;
4571
4572 if (Opcode == ISD::OR && tryMatchBitSelect(Node))
4573 return;
4574
4575 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4576 case ISD::ADD:
4577 case ISD::SUB: {
4578 // Try to avoid folding immediates with multiple uses for optsize.
4579 // This code tries to select to register form directly to avoid going
4580 // through the isel table which might fold the immediate. We can't change
4581 // the patterns on the add/sub/and/or/xor with immediate paterns in the
4582 // tablegen files to check immediate use count without making the patterns
4583 // unavailable to the fast-isel table.
4584 if (!OptForSize)
4585 break;
4586
4587 // Only handle i8/i16/i32/i64.
4588 if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
4589 break;
4590
4591 SDValue N0 = Node->getOperand(0);
4592 SDValue N1 = Node->getOperand(1);
4593
4594 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
4595 if (!Cst)
4596 break;
4597
4598 int64_t Val = Cst->getSExtValue();
4599
4600 // Make sure its an immediate that is considered foldable.
4601 // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
4602 if (!isInt<8>(Val) && !isInt<32>(Val))
4603 break;
4604
4605 // If this can match to INC/DEC, let it go.
4606 if (Opcode == ISD::ADD && (Val == 1 || Val == -1))
4607 break;
4608
4609 // Check if we should avoid folding this immediate.
4610 if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
4611 break;
4612
4613 // We should not fold the immediate. So we need a register form instead.
4614 unsigned ROpc, MOpc;
4615 switch (NVT.SimpleTy) {
4616 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4616)
;
4617 case MVT::i8:
4618 switch (Opcode) {
4619 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4619)
;
4620 case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
4621 case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
4622 case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
4623 case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
4624 case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
4625 }
4626 break;
4627 case MVT::i16:
4628 switch (Opcode) {
4629 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4629)
;
4630 case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
4631 case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
4632 case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
4633 case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
4634 case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
4635 }
4636 break;
4637 case MVT::i32:
4638 switch (Opcode) {
4639 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4639)
;
4640 case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
4641 case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
4642 case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
4643 case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
4644 case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
4645 }
4646 break;
4647 case MVT::i64:
4648 switch (Opcode) {
4649 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4649)
;
4650 case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
4651 case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
4652 case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
4653 case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
4654 case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
4655 }
4656 break;
4657 }
4658
4659 // Ok this is a AND/OR/XOR/ADD/SUB with constant.
4660
4661 // If this is a not a subtract, we can still try to fold a load.
4662 if (Opcode != ISD::SUB) {
4663 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4664 if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4665 SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4666 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4667 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4668 // Update the chain.
4669 ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
4670 // Record the mem-refs
4671 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
4672 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4673 CurDAG->RemoveDeadNode(Node);
4674 return;
4675 }
4676 }
4677
4678 CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
4679 return;
4680 }
4681
4682 case X86ISD::SMUL:
4683 // i16/i32/i64 are handled with isel patterns.
4684 if (NVT != MVT::i8)
4685 break;
4686 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4687 case X86ISD::UMUL: {
4688 SDValue N0 = Node->getOperand(0);
4689 SDValue N1 = Node->getOperand(1);
4690
4691 unsigned LoReg, ROpc, MOpc;
4692 switch (NVT.SimpleTy) {
4693 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4693)
;
4694 case MVT::i8:
4695 LoReg = X86::AL;
4696 ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
4697 MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
4698 break;
4699 case MVT::i16:
4700 LoReg = X86::AX;
4701 ROpc = X86::MUL16r;
4702 MOpc = X86::MUL16m;
4703 break;
4704 case MVT::i32:
4705 LoReg = X86::EAX;
4706 ROpc = X86::MUL32r;
4707 MOpc = X86::MUL32m;
4708 break;
4709 case MVT::i64:
4710 LoReg = X86::RAX;
4711 ROpc = X86::MUL64r;
4712 MOpc = X86::MUL64m;
4713 break;
4714 }
4715
4716 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4717 bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4718 // Multiply is commmutative.
4719 if (!FoldedLoad) {
4720 FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4721 if (FoldedLoad)
4722 std::swap(N0, N1);
4723 }
4724
4725 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
4726 N0, SDValue()).getValue(1);
4727
4728 MachineSDNode *CNode;
4729 if (FoldedLoad) {
4730 // i16/i32/i64 use an instruction that produces a low and high result even
4731 // though only the low result is used.
4732 SDVTList VTs;
4733 if (NVT == MVT::i8)
4734 VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4735 else
4736 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
4737
4738 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4739 InFlag };
4740 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4741
4742 // Update the chain.
4743 ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
4744 // Record the mem-refs
4745 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4746 } else {
4747 // i16/i32/i64 use an instruction that produces a low and high result even
4748 // though only the low result is used.
4749 SDVTList VTs;
4750 if (NVT == MVT::i8)
4751 VTs = CurDAG->getVTList(NVT, MVT::i32);
4752 else
4753 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
4754
4755 CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
4756 }
4757
4758 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4759 ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
4760 CurDAG->RemoveDeadNode(Node);
4761 return;
4762 }
4763
4764 case ISD::SMUL_LOHI:
4765 case ISD::UMUL_LOHI: {
4766 SDValue N0 = Node->getOperand(0);
4767 SDValue N1 = Node->getOperand(1);
4768
4769 unsigned Opc, MOpc;
4770 unsigned LoReg, HiReg;
4771 bool IsSigned = Opcode == ISD::SMUL_LOHI;
4772 switch (NVT.SimpleTy) {
4773 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4773)
;
4774 case MVT::i32:
4775 Opc = IsSigned ? X86::IMUL32r : X86::MUL32r;
4776 MOpc = IsSigned ? X86::IMUL32m : X86::MUL32m;
4777 LoReg = X86::EAX; HiReg = X86::EDX;
4778 break;
4779 case MVT::i64:
4780 Opc = IsSigned ? X86::IMUL64r : X86::MUL64r;
4781 MOpc = IsSigned ? X86::IMUL64m : X86::MUL64m;
4782 LoReg = X86::RAX; HiReg = X86::RDX;
4783 break;
4784 }
4785
4786 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4787 bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4788 // Multiply is commmutative.
4789 if (!foldedLoad) {
4790 foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4791 if (foldedLoad)
4792 std::swap(N0, N1);
4793 }
4794
4795 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
4796 N0, SDValue()).getValue(1);
4797 if (foldedLoad) {
4798 SDValue Chain;
4799 MachineSDNode *CNode = nullptr;
4800 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4801 InFlag };
4802 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
4803 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4804 Chain = SDValue(CNode, 0);
4805 InFlag = SDValue(CNode, 1);
4806
4807 // Update the chain.
4808 ReplaceUses(N1.getValue(1), Chain);
4809 // Record the mem-refs
4810 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4811 } else {
4812 SDValue Ops[] = { N1, InFlag };
4813 SDVTList VTs = CurDAG->getVTList(MVT::Glue);
4814 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4815 InFlag = SDValue(CNode, 0);
4816 }
4817
4818 // Copy the low half of the result, if it is needed.
4819 if (!SDValue(Node, 0).use_empty()) {
4820 assert(LoReg && "Register for low half is not defined!")((LoReg && "Register for low half is not defined!") ?
static_cast<void> (0) : __assert_fail ("LoReg && \"Register for low half is not defined!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4820, __PRETTY_FUNCTION__))
;
4821 SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
4822 NVT, InFlag);
4823 InFlag = ResLo.getValue(2);
4824 ReplaceUses(SDValue(Node, 0), ResLo);
4825 LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; ResLo.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
4826 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; ResLo.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
;
4827 }
4828 // Copy the high half of the result, if it is needed.
4829 if (!SDValue(Node, 1).use_empty()) {
4830 assert(HiReg && "Register for high half is not defined!")((HiReg && "Register for high half is not defined!") ?
static_cast<void> (0) : __assert_fail ("HiReg && \"Register for high half is not defined!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4830, __PRETTY_FUNCTION__))
;
4831 SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
4832 NVT, InFlag);
4833 InFlag = ResHi.getValue(2);
4834 ReplaceUses(SDValue(Node, 1), ResHi);
4835 LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; ResHi.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
4836 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; ResHi.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
;
4837 }
4838
4839 CurDAG->RemoveDeadNode(Node);
4840 return;
4841 }
4842
4843 case ISD::SDIVREM:
4844 case ISD::UDIVREM: {
4845 SDValue N0 = Node->getOperand(0);
4846 SDValue N1 = Node->getOperand(1);
4847
4848 unsigned ROpc, MOpc;
4849 bool isSigned = Opcode == ISD::SDIVREM;
4850 if (!isSigned) {
4851 switch (NVT.SimpleTy) {
4852 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4852)
;
4853 case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break;
4854 case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break;
4855 case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break;
4856 case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break;
4857 }
4858 } else {
4859 switch (NVT.SimpleTy) {
4860 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4860)
;
4861 case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
4862 case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
4863 case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
4864 case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
4865 }
4866 }
4867
4868 unsigned LoReg, HiReg, ClrReg;
4869 unsigned SExtOpcode;
4870 switch (NVT.SimpleTy) {
4871 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4871)
;
4872 case MVT::i8:
4873 LoReg = X86::AL; ClrReg = HiReg = X86::AH;
4874 SExtOpcode = 0; // Not used.
4875 break;
4876 case MVT::i16:
4877 LoReg = X86::AX; HiReg = X86::DX;
4878 ClrReg = X86::DX;
4879 SExtOpcode = X86::CWD;
4880 break;
4881 case MVT::i32:
4882 LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
4883 SExtOpcode = X86::CDQ;
4884 break;
4885 case MVT::i64:
4886 LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
4887 SExtOpcode = X86::CQO;
4888 break;
4889 }
4890
4891 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4892 bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4893 bool signBitIsZero = CurDAG->SignBitIsZero(N0);
4894
4895 SDValue InFlag;
4896 if (NVT == MVT::i8) {
4897 // Special case for div8, just use a move with zero extension to AX to
4898 // clear the upper 8 bits (AH).
4899 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
4900 MachineSDNode *Move;
4901 if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4902 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4903 unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8
4904 : X86::MOVZX16rm8;
4905 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
4906 Chain = SDValue(Move, 1);
4907 ReplaceUses(N0.getValue(1), Chain);
4908 // Record the mem-refs
4909 CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
4910 } else {
4911 unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8
4912 : X86::MOVZX16rr8;
4913 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
4914 Chain = CurDAG->getEntryNode();
4915 }
4916 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
4917 SDValue());
4918 InFlag = Chain.getValue(1);
4919 } else {
4920 InFlag =
4921 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
4922 LoReg, N0, SDValue()).getValue(1);
4923 if (isSigned && !signBitIsZero) {
4924 // Sign extend the low part into the high part.
4925 InFlag =
4926 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
4927 } else {
4928 // Zero out the high part, effectively zero extending the input.
4929 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
4930 SDValue ClrNode =
4931 SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0);
4932 switch (NVT.SimpleTy) {
4933 case MVT::i16:
4934 ClrNode =
4935 SDValue(CurDAG->getMachineNode(
4936 TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
4937 CurDAG->getTargetConstant(X86::sub_16bit, dl,
4938 MVT::i32)),
4939 0);
4940 break;
4941 case MVT::i32:
4942 break;
4943 case MVT::i64:
4944 ClrNode =
4945 SDValue(CurDAG->getMachineNode(
4946 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
4947 CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
4948 CurDAG->getTargetConstant(X86::sub_32bit, dl,
4949 MVT::i32)),
4950 0);
4951 break;
4952 default:
4953 llvm_unreachable("Unexpected division source")::llvm::llvm_unreachable_internal("Unexpected division source"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4953)
;
4954 }
4955
4956 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
4957 ClrNode, InFlag).getValue(1);
4958 }
4959 }
4960
4961 if (foldedLoad) {
4962 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4963 InFlag };
4964 MachineSDNode *CNode =
4965 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
4966 InFlag = SDValue(CNode, 1);
4967 // Update the chain.
4968 ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
4969 // Record the mem-refs
4970 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4971 } else {
4972 InFlag =
4973 SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InFlag), 0);
4974 }
4975
4976 // Prevent use of AH in a REX instruction by explicitly copying it to
4977 // an ABCD_L register.
4978 //
4979 // The current assumption of the register allocator is that isel
4980 // won't generate explicit references to the GR8_ABCD_H registers. If
4981 // the allocator and/or the backend get enhanced to be more robust in
4982 // that regard, this can be, and should be, removed.
4983 if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
4984 SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
4985 unsigned AHExtOpcode =
4986 isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
4987
4988 SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
4989 MVT::Glue, AHCopy, InFlag);
4990 SDValue Result(RNode, 0);
4991 InFlag = SDValue(RNode, 1);
4992
4993 Result =
4994 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
4995
4996 ReplaceUses(SDValue(Node, 1), Result);
4997 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
4998 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
;
4999 }
5000 // Copy the division (low) result, if it is needed.
5001 if (!SDValue(Node, 0).use_empty()) {
5002 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
5003 LoReg, NVT, InFlag);
5004 InFlag = Result.getValue(2);
5005 ReplaceUses(SDValue(Node, 0), Result);
5006 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
5007 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
;
5008 }
5009 // Copy the remainder (high) result, if it is needed.
5010 if (!SDValue(Node, 1).use_empty()) {
5011 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
5012 HiReg, NVT, InFlag);
5013 InFlag = Result.getValue(2);
5014 ReplaceUses(SDValue(Node, 1), Result);
5015 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
5016 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
;
5017 }
5018 CurDAG->RemoveDeadNode(Node);
5019 return;
5020 }
5021
5022 case X86ISD::FCMP:
5023 case X86ISD::STRICT_FCMP:
5024 case X86ISD::STRICT_FCMPS: {
5025 bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP ||
5026 Node->getOpcode() == X86ISD::STRICT_FCMPS;
5027 SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0);
5028 SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1);
5029
5030 // Save the original VT of the compare.
5031 MVT CmpVT = N0.getSimpleValueType();
5032
5033 // Floating point needs special handling if we don't have FCOMI.
5034 if (Subtarget->hasCMov())
5035 break;
5036
5037 bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;
5038
5039 unsigned Opc;
5040 switch (CmpVT.SimpleTy) {
5041 default: llvm_unreachable("Unexpected type!")::llvm::llvm_unreachable_internal("Unexpected type!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 5041)
;
5042 case MVT::f32:
5043 Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32;
5044 break;
5045 case MVT::f64:
5046 Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64;
5047 break;
5048 case MVT::f80:
5049 Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80;
5050 break;
5051 }
5052
5053 SDValue Cmp;
5054 SDValue Chain =
5055 IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode();
5056 if (IsStrictCmp) {
5057 SDVTList VTs = CurDAG->getVTList(MVT::i16, MVT::Other);
5058 Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0);
5059 Chain = Cmp.getValue(1);
5060 } else {
5061 Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i16, N0, N1), 0);
5062 }
5063
5064 // Move FPSW to AX.
5065 SDValue FPSW = CurDAG->getCopyToReg(Chain, dl, X86::FPSW, Cmp, SDValue());
5066 Chain = FPSW;
5067 SDValue FNSTSW =
5068 SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, FPSW,
5069 FPSW.getValue(1)),
5070 0);
5071
5072 // Extract upper 8-bits of AX.
5073 SDValue Extract =
5074 CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW);
5075
5076 // Move AH into flags.
5077 // Some 64-bit targets lack SAHF support, but they do support FCOMI.
5078 assert(Subtarget->hasLAHFSAHF() &&((Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasLAHFSAHF() && \"Target doesn't support SAHF or FCOMI?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 5079, __PRETTY_FUNCTION__))
5079 "Target doesn't support SAHF or FCOMI?")((Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasLAHFSAHF() && \"Target doesn't support SAHF or FCOMI?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 5079, __PRETTY_FUNCTION__))
;
5080 SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
5081 Chain = AH;
5082 SDValue SAHF = SDValue(
5083 CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0);
5084
5085 if (IsStrictCmp)
5086 ReplaceUses(SDValue(Node, 1), Chain);
5087
5088 ReplaceUses(SDValue(Node, 0), SAHF);
5089 CurDAG->RemoveDeadNode(Node);
5090 return;
5091 }
5092
5093 case X86ISD::CMP: {
5094 SDValue N0 = Node->getOperand(0);
5095 SDValue N1 = Node->getOperand(1);
5096
5097 // Optimizations for TEST compares.
5098 if (!isNullConstant(N1))
5099 break;
5100
5101 // Save the original VT of the compare.
5102 MVT CmpVT = N0.getSimpleValueType();
5103
5104 // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
5105 // by a test instruction. The test should be removed later by
5106 // analyzeCompare if we are using only the zero flag.
5107 // TODO: Should we check the users and use the BEXTR flags directly?
5108 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5109 if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
5110 unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
5111 : X86::TEST32rr;
5112 SDValue BEXTR = SDValue(NewNode, 0);
5113 NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
5114 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
5115 CurDAG->RemoveDeadNode(Node);
5116 return;
5117 }
5118 }
5119
5120 // We can peek through truncates, but we need to be careful below.
5121 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
5122 N0 = N0.getOperand(0);
5123
5124 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
5125 // use a smaller encoding.
5126 // Look past the truncate if CMP is the only use of it.
5127 if (N0.getOpcode() == ISD::AND &&
5128 N0.getNode()->hasOneUse() &&
5129 N0.getValueType() != MVT::i8) {
5130 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5131 if (!C) break;
5132 uint64_t Mask = C->getZExtValue();
5133
5134 // Check if we can replace AND+IMM64 with a shift. This is possible for
5135 // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
5136 // flag.
5137 if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
5138 onlyUsesZeroFlag(SDValue(Node, 0))) {
5139 if (isMask_64(~Mask)) {
5140 unsigned TrailingZeros = countTrailingZeros(Mask);
5141 SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
5142 SDValue Shift =
5143 SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
5144 N0.getOperand(0), Imm), 0);
5145 MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
5146 MVT::i32, Shift, Shift);
5147 ReplaceNode(Node, Test);
5148 return;
5149 }
5150 if (isMask_64(Mask)) {
5151 unsigned LeadingZeros = countLeadingZeros(Mask);
5152 SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
5153 SDValue Shift =
5154 SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
5155 N0.getOperand(0), Imm), 0);
5156 MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
5157 MVT::i32, Shift, Shift);
5158 ReplaceNode(Node, Test);
5159 return;
5160 }
5161 }
5162
5163 MVT VT;
5164 int SubRegOp;
5165 unsigned ROpc, MOpc;
5166
5167 // For each of these checks we need to be careful if the sign flag is
5168 // being used. It is only safe to use the sign flag in two conditions,
5169 // either the sign bit in the shrunken mask is zero or the final test
5170 // size is equal to the original compare size.
5171
5172 if (isUInt<8>(Mask) &&
5173 (!(Mask & 0x80) || CmpVT == MVT::i8 ||
5174 hasNoSignFlagUses(SDValue(Node, 0)))) {
5175 // For example, convert "testl %eax, $8" to "testb %al, $8"
5176 VT = MVT::i8;
5177 SubRegOp = X86::sub_8bit;
5178 ROpc = X86::TEST8ri;
5179 MOpc = X86::TEST8mi;
5180 } else if (OptForMinSize && isUInt<16>(Mask) &&
5181 (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
5182 hasNoSignFlagUses(SDValue(Node, 0)))) {
5183 // For example, "testl %eax, $32776" to "testw %ax, $32776".
5184 // NOTE: We only want to form TESTW instructions if optimizing for
5185 // min size. Otherwise we only save one byte and possibly get a length
5186 // changing prefix penalty in the decoders.
5187 VT = MVT::i16;
5188 SubRegOp = X86::sub_16bit;
5189 ROpc = X86::TEST16ri;
5190 MOpc = X86::TEST16mi;
5191 } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
5192 ((!(Mask & 0x80000000) &&
5193 // Without minsize 16-bit Cmps can get here so we need to
5194 // be sure we calculate the correct sign flag if needed.
5195 (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
5196 CmpVT == MVT::i32 ||
5197 hasNoSignFlagUses(SDValue(Node, 0)))) {
5198 // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
5199 // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
5200 // Otherwize, we find ourselves in a position where we have to do
5201 // promotion. If previous passes did not promote the and, we assume
5202 // they had a good reason not to and do not promote here.
5203 VT = MVT::i32;
5204 SubRegOp = X86::sub_32bit;
5205 ROpc = X86::TEST32ri;
5206 MOpc = X86::TEST32mi;
5207 } else {
5208 // No eligible transformation was found.
5209 break;
5210 }
5211
5212 SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
5213 SDValue Reg = N0.getOperand(0);
5214
5215 // Emit a testl or testw.
5216 MachineSDNode *NewNode;
5217 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5218 if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
5219 if (auto *LoadN = dyn_cast<LoadSDNode>(N0.getOperand(0).getNode())) {
5220 if (!LoadN->isSimple()) {
5221 unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits();
5222 if (MOpc == X86::TEST8mi && NumVolBits != 8)
5223 break;
5224 else if (MOpc == X86::TEST16mi && NumVolBits != 16)
5225 break;
5226 else if (MOpc == X86::TEST32mi && NumVolBits != 32)
5227 break;
5228 }
5229 }
5230 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
5231 Reg.getOperand(0) };
5232 NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
5233 // Update the chain.
5234 ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
5235 // Record the mem-refs
5236 CurDAG->setNodeMemRefs(NewNode,
5237 {cast<LoadSDNode>(Reg)->getMemOperand()});
5238 } else {
5239 // Extract the subregister if necessary.
5240 if (N0.getValueType() != VT)
5241 Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
5242
5243 NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
5244 }
5245 // Replace CMP with TEST.
5246 ReplaceNode(Node, NewNode);
5247 return;
5248 }
5249 break;
5250 }
5251 case X86ISD::PCMPISTR: {
5252 if (!Subtarget->hasSSE42())
5253 break;
5254
5255 bool NeedIndex = !SDValue(Node, 0).use_empty();
5256 bool NeedMask = !SDValue(Node, 1).use_empty();
5257 // We can't fold a load if we are going to make two instructions.
5258 bool MayFoldLoad = !NeedIndex || !NeedMask;
5259
5260 MachineSDNode *CNode;
5261 if (NeedMask) {
5262 unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
5263 unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
5264 CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
5265 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
5266 }
5267 if (NeedIndex || !NeedMask) {
5268 unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
5269 unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
5270 CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
5271 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5272 }
5273
5274 // Connect the flag usage to the last instruction created.
5275 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
5276 CurDAG->RemoveDeadNode(Node);
5277 return;
5278 }
5279 case X86ISD::PCMPESTR: {
5280 if (!Subtarget->hasSSE42())
5281 break;
5282
5283 // Copy the two implicit register inputs.
5284 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
5285 Node->getOperand(1),
5286 SDValue()).getValue(1);
5287 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
5288 Node->getOperand(3), InFlag).getValue(1);
5289
5290 bool NeedIndex = !SDValue(Node, 0).use_empty();
5291 bool NeedMask = !SDValue(Node, 1).use_empty();
5292 // We can't fold a load if we are going to make two instructions.
5293 bool MayFoldLoad = !NeedIndex || !NeedMask;
5294
5295 MachineSDNode *CNode;
5296 if (NeedMask) {
5297 unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
5298 unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
5299 CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
5300 InFlag);
5301 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
5302 }
5303 if (NeedIndex || !NeedMask) {
5304 unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
5305 unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
5306 CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
5307 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5308 }
5309 // Connect the flag usage to the last instruction created.
5310 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
5311 CurDAG->RemoveDeadNode(Node);
5312 return;
5313 }
5314
5315 case ISD::SETCC: {
5316 if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
5317 return;
5318
5319 break;
5320 }
5321
5322 case ISD::STORE:
5323 if (foldLoadStoreIntoMemOperand(Node))
5324 return;
5325 break;
5326
5327 case X86ISD::SETCC_CARRY: {
5328 // We have to do this manually because tblgen will put the eflags copy in
5329 // the wrong place if we use an extract_subreg in the pattern.
5330 MVT VT = Node->getSimpleValueType(0);
5331
5332 // Copy flags to the EFLAGS register and glue it to next node.
5333 SDValue EFLAGS =
5334 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
5335 Node->getOperand(1), SDValue());
5336
5337 // Create a 64-bit instruction if the result is 64-bits otherwise use the
5338 // 32-bit version.
5339 unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
5340 MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
5341 SDValue Result = SDValue(
5342 CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0);
5343
5344 // For less than 32-bits we need to extract from the 32-bit node.
5345 if (VT == MVT::i8 || VT == MVT::i16) {
5346 int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
5347 Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
5348 }
5349
5350 ReplaceUses(SDValue(Node, 0), Result);
5351 CurDAG->RemoveDeadNode(Node);
5352 return;
5353 }
5354 case X86ISD::SBB: {
5355 if (isNullConstant(Node->getOperand(0)) &&
5356 isNullConstant(Node->getOperand(1))) {
5357 MVT VT = Node->getSimpleValueType(0);
5358
5359 // Create zero.
5360 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
5361 SDValue Zero =
5362 SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0);
5363 if (VT == MVT::i64) {
5364 Zero = SDValue(
5365 CurDAG->getMachineNode(
5366 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
5367 CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
5368 CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
5369 0);
5370 }
5371
5372 // Copy flags to the EFLAGS register and glue it to next node.
5373 SDValue EFLAGS =
5374 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
5375 Node->getOperand(2), SDValue());
5376
5377 // Create a 64-bit instruction if the result is 64-bits otherwise use the
5378 // 32-bit version.
5379 unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
5380 MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
5381 VTs = CurDAG->getVTList(SBBVT, MVT::i32);
5382 SDValue Result =
5383 SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {Zero, Zero, EFLAGS,
5384 EFLAGS.getValue(1)}),
5385 0);
5386
5387 // Replace the flag use.
5388 ReplaceUses(SDValue(Node, 1), Result.getValue(1));
5389
5390 // Replace the result use.
5391 if (!SDValue(Node, 0).use_empty()) {
5392 // For less than 32-bits we need to extract from the 32-bit node.
5393 if (VT == MVT::i8 || VT == MVT::i16) {
5394 int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
5395 Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
5396 }
5397 ReplaceUses(SDValue(Node, 0), Result);
5398 }
5399
5400 CurDAG->RemoveDeadNode(Node);
5401 return;
5402 }
5403 break;
5404 }
5405 case X86ISD::MGATHER: {
5406 auto *Mgt = cast<X86MaskedGatherSDNode>(Node);
5407 SDValue IndexOp = Mgt->getIndex();
5408 SDValue Mask = Mgt->getMask();
5409 MVT IndexVT = IndexOp.getSimpleValueType();
5410 MVT ValueVT = Node->getSimpleValueType(0);
5411 MVT MaskVT = Mask.getSimpleValueType();
5412
5413 // This is just to prevent crashes if the nodes are malformed somehow. We're
5414 // otherwise only doing loose type checking in here based on type what
5415 // a type constraint would say just like table based isel.
5416 if (!ValueVT.isVector() || !MaskVT.isVector())
5417 break;
5418
5419 unsigned NumElts = ValueVT.getVectorNumElements();
5420 MVT ValueSVT = ValueVT.getVectorElementType();
5421
5422 bool IsFP = ValueSVT.isFloatingPoint();
5423 unsigned EltSize = ValueSVT.getSizeInBits();
5424
5425 unsigned Opc = 0;
5426 bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1;
5427 if (AVX512Gather) {
5428 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
5429 Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm;
5430 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
5431 Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm;
5432 else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
5433 Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm;
5434 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
5435 Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm;
5436 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
5437 Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm;
5438 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
5439 Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm;
5440 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
5441 Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm;
5442 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
5443 Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm;
5444 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
5445 Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm;
5446 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
5447 Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm;
5448 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
5449 Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm;
5450 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
5451 Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm;
5452 } else {
5453 assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() &&((EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger
() && "Unexpected mask VT!") ? static_cast<void>
(0) : __assert_fail ("EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() && \"Unexpected mask VT!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 5454, __PRETTY_FUNCTION__))
5454 "Unexpected mask VT!")((EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger
() && "Unexpected mask VT!") ? static_cast<void>
(0) : __assert_fail ("EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() && \"Unexpected mask VT!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 5454, __PRETTY_FUNCTION__))
;
5455 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
5456 Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm;
5457 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
5458 Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm;
5459 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
5460 Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm;
5461 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
5462 Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm;
5463 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
5464 Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm;
5465 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
5466 Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm;
5467 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
5468 Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm;
5469 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
5470 Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm;
5471 }
5472
5473 if (!Opc)
5474 break;
5475
5476 SDValue Base, Scale, Index, Disp, Segment;
5477 if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(),
5478 Base, Scale, Index, Disp, Segment))
5479 break;
5480
5481 SDValue PassThru = Mgt->getPassThru();
5482 SDValue Chain = Mgt->getChain();
5483 // Gather instructions have a mask output not in the ISD node.
5484 SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other);
5485
5486 MachineSDNode *NewNode;
5487 if (AVX512Gather) {
5488 SDValue Ops[] = {PassThru, Mask, Base, Scale,
5489 Index, Disp, Segment, Chain};
5490 NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
5491 } else {
5492 SDValue Ops[] = {PassThru, Base, Scale, Index,
5493 Disp, Segment, Mask, Chain};
5494 NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
5495 }
5496 CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()});
5497 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
5498 ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2));
5499 CurDAG->RemoveDeadNode(Node);
5500 return;
5501 }
5502 case X86ISD::MSCATTER: {
5503 auto *Sc = cast<X86MaskedScatterSDNode>(Node);
5504 SDValue Value = Sc->getValue();
5505 SDValue IndexOp = Sc->getIndex();
5506 MVT IndexVT = IndexOp.getSimpleValueType();
5507 MVT ValueVT = Value.getSimpleValueType();
5508
5509 // This is just to prevent crashes if the nodes are malformed somehow. We're
5510 // otherwise only doing loose type checking in here based on type what
5511 // a type constraint would say just like table based isel.
5512 if (!ValueVT.isVector())
5513 break;
5514
5515 unsigned NumElts = ValueVT.getVectorNumElements();
5516 MVT ValueSVT = ValueVT.getVectorElementType();
5517
5518 bool IsFP = ValueSVT.isFloatingPoint();
5519 unsigned EltSize = ValueSVT.getSizeInBits();
5520
5521 unsigned Opc;
5522 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
5523 Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr;
5524 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
5525 Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr;
5526 else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
5527 Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr;
5528 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
5529 Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr;
5530 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
5531 Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr;
5532 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
5533 Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr;
5534 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
5535 Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr;
5536 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
5537 Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr;
5538 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
5539 Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr;
5540 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
5541 Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr;
5542 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
5543 Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr;
5544 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
5545 Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr;
5546 else
5547 break;
5548
5549 SDValue Base, Scale, Index, Disp, Segment;
5550 if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(),
5551 Base, Scale, Index, Disp, Segment))
5552 break;
5553
5554 SDValue Mask = Sc->getMask();
5555 SDValue Chain = Sc->getChain();
5556 // Scatter instructions have a mask output not in the ISD node.
5557 SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other);
5558 SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain};
5559
5560 MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
5561 CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()});
5562 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1));
5563 CurDAG->RemoveDeadNode(Node);
5564 return;
5565 }
5566 }
5567
5568 SelectCode(Node);
5569}
5570
5571bool X86DAGToDAGISel::
5572SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
5573 std::vector<SDValue> &OutOps) {
5574 SDValue Op0, Op1, Op2, Op3, Op4;
5575 switch (ConstraintID) {
5576 default:
5577 llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 5577)
;
5578 case InlineAsm::Constraint_o: // offsetable ??
5579 case InlineAsm::Constraint_v: // not offsetable ??
5580 case InlineAsm::Constraint_m: // memory
5581 case InlineAsm::Constraint_X:
5582 if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
5583 return true;
5584 break;
5585 }
5586
5587 OutOps.push_back(Op0);
5588 OutOps.push_back(Op1);
5589 OutOps.push_back(Op2);
5590 OutOps.push_back(Op3);
5591 OutOps.push_back(Op4);
5592 return false;
5593}
5594
5595/// This pass converts a legalized DAG into a X86-specific DAG,
5596/// ready for instruction scheduling.
5597FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
5598 CodeGenOpt::Level OptLevel) {
5599 return new X86DAGToDAGISel(TM, OptLevel);
5600}

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h

1//===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the isa<X>(), cast<X>(), dyn_cast<X>(), cast_or_null<X>(),
10// and dyn_cast_or_null<X>() templates.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_CASTING_H
15#define LLVM_SUPPORT_CASTING_H
16
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/type_traits.h"
19#include <cassert>
20#include <memory>
21#include <type_traits>
22
23namespace llvm {
24
25//===----------------------------------------------------------------------===//
26// isa<x> Support Templates
27//===----------------------------------------------------------------------===//
28
29// Define a template that can be specialized by smart pointers to reflect the
30// fact that they are automatically dereferenced, and are not involved with the
31// template selection process... the default implementation is a noop.
32//
33template<typename From> struct simplify_type {
34 using SimpleType = From; // The real type this represents...
35
36 // An accessor to get the real value...
37 static SimpleType &getSimplifiedValue(From &Val) { return Val; }
38};
39
40template<typename From> struct simplify_type<const From> {
41 using NonConstSimpleType = typename simplify_type<From>::SimpleType;
42 using SimpleType =
43 typename add_const_past_pointer<NonConstSimpleType>::type;
44 using RetType =
45 typename add_lvalue_reference_if_not_pointer<SimpleType>::type;
46
47 static RetType getSimplifiedValue(const From& Val) {
48 return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val));
49 }
50};
51
52// The core of the implementation of isa<X> is here; To and From should be
53// the names of classes. This template can be specialized to customize the
54// implementation of isa<> without rewriting it from scratch.
55template <typename To, typename From, typename Enabler = void>
56struct isa_impl {
57 static inline bool doit(const From &Val) {
58 return To::classof(&Val);
59 }
60};
61
62/// Always allow upcasts, and perform no dynamic check for them.
63template <typename To, typename From>
64struct isa_impl<To, From, std::enable_if_t<std::is_base_of<To, From>::value>> {
65 static inline bool doit(const From &) { return true; }
66};
67
68template <typename To, typename From> struct isa_impl_cl {
69 static inline bool doit(const From &Val) {
70 return isa_impl<To, From>::doit(Val);
71 }
72};
73
74template <typename To, typename From> struct isa_impl_cl<To, const From> {
75 static inline bool doit(const From &Val) {
76 return isa_impl<To, From>::doit(Val);
77 }
78};
79
80template <typename To, typename From>
81struct isa_impl_cl<To, const std::unique_ptr<From>> {
82 static inline bool doit(const std::unique_ptr<From> &Val) {
83 assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 83, __PRETTY_FUNCTION__))
;
84 return isa_impl_cl<To, From>::doit(*Val);
85 }
86};
87
88template <typename To, typename From> struct isa_impl_cl<To, From*> {
89 static inline bool doit(const From *Val) {
90 assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 90, __PRETTY_FUNCTION__))
;
91 return isa_impl<To, From>::doit(*Val);
92 }
93};
94
95template <typename To, typename From> struct isa_impl_cl<To, From*const> {
96 static inline bool doit(const From *Val) {
97 assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 97, __PRETTY_FUNCTION__))
;
98 return isa_impl<To, From>::doit(*Val);
99 }
100};
101
102template <typename To, typename From> struct isa_impl_cl<To, const From*> {
103 static inline bool doit(const From *Val) {
104 assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 104, __PRETTY_FUNCTION__))
;
105 return isa_impl<To, From>::doit(*Val);
106 }
107};
108
109template <typename To, typename From> struct isa_impl_cl<To, const From*const> {
110 static inline bool doit(const From *Val) {
111 assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 111, __PRETTY_FUNCTION__))
;
112 return isa_impl<To, From>::doit(*Val);
113 }
114};
115
116template<typename To, typename From, typename SimpleFrom>
117struct isa_impl_wrap {
118 // When From != SimplifiedType, we can simplify the type some more by using
119 // the simplify_type template.
120 static bool doit(const From &Val) {
121 return isa_impl_wrap<To, SimpleFrom,
122 typename simplify_type<SimpleFrom>::SimpleType>::doit(
123 simplify_type<const From>::getSimplifiedValue(Val));
124 }
125};
126
127template<typename To, typename FromTy>
128struct isa_impl_wrap<To, FromTy, FromTy> {
129 // When From == SimpleType, we are as simple as we are going to get.
130 static bool doit(const FromTy &Val) {
131 return isa_impl_cl<To,FromTy>::doit(Val);
132 }
133};
134
135// isa<X> - Return true if the parameter to the template is an instance of the
136// template type argument. Used like this:
137//
138// if (isa<Type>(myVal)) { ... }
139//
140template <class X, class Y> LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) {
141 return isa_impl_wrap<X, const Y,
142 typename simplify_type<const Y>::SimpleType>::doit(Val);
143}
144
145// isa_and_nonnull<X> - Functionally identical to isa, except that a null value
146// is accepted.
147//
148template <class X, class Y>
149LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa_and_nonnull(const Y &Val) {
150 if (!Val)
151 return false;
152 return isa<X>(Val);
153}
154
155//===----------------------------------------------------------------------===//
156// cast<x> Support Templates
157//===----------------------------------------------------------------------===//
158
159template<class To, class From> struct cast_retty;
160
161// Calculate what type the 'cast' function should return, based on a requested
162// type of To and a source type of From.
163template<class To, class From> struct cast_retty_impl {
164 using ret_type = To &; // Normal case, return Ty&
165};
166template<class To, class From> struct cast_retty_impl<To, const From> {
167 using ret_type = const To &; // Normal case, return Ty&
168};
169
170template<class To, class From> struct cast_retty_impl<To, From*> {
171 using ret_type = To *; // Pointer arg case, return Ty*
172};
173
174template<class To, class From> struct cast_retty_impl<To, const From*> {
175 using ret_type = const To *; // Constant pointer arg case, return const Ty*
176};
177
178template<class To, class From> struct cast_retty_impl<To, const From*const> {
179 using ret_type = const To *; // Constant pointer arg case, return const Ty*
180};
181
182template <class To, class From>
183struct cast_retty_impl<To, std::unique_ptr<From>> {
184private:
185 using PointerType = typename cast_retty_impl<To, From *>::ret_type;
186 using ResultType = std::remove_pointer_t<PointerType>;
187
188public:
189 using ret_type = std::unique_ptr<ResultType>;
190};
191
192template<class To, class From, class SimpleFrom>
193struct cast_retty_wrap {
194 // When the simplified type and the from type are not the same, use the type
195 // simplifier to reduce the type, then reuse cast_retty_impl to get the
196 // resultant type.
197 using ret_type = typename cast_retty<To, SimpleFrom>::ret_type;
198};
199
200template<class To, class FromTy>
201struct cast_retty_wrap<To, FromTy, FromTy> {
202 // When the simplified type is equal to the from type, use it directly.
203 using ret_type = typename cast_retty_impl<To,FromTy>::ret_type;
204};
205
206template<class To, class From>
207struct cast_retty {
208 using ret_type = typename cast_retty_wrap<
209 To, From, typename simplify_type<From>::SimpleType>::ret_type;
210};
211
212// Ensure the non-simple values are converted using the simplify_type template
213// that may be specialized by smart pointers...
214//
215template<class To, class From, class SimpleFrom> struct cast_convert_val {
216 // This is not a simple type, use the template to simplify it...
217 static typename cast_retty<To, From>::ret_type doit(From &Val) {
218 return cast_convert_val<To, SimpleFrom,
26
Returning without writing to 'Val.Node'
219 typename simplify_type<SimpleFrom>::SimpleType>::doit(
220 simplify_type<From>::getSimplifiedValue(Val));
23
Calling 'simplify_type::getSimplifiedValue'
25
Returning from 'simplify_type::getSimplifiedValue'
221 }
222};
223
224template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> {
225 // This _is_ a simple type, just cast it.
226 static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) {
227 typename cast_retty<To, FromTy>::ret_type Res2
228 = (typename cast_retty<To, FromTy>::ret_type)const_cast<FromTy&>(Val);
229 return Res2;
230 }
231};
232
233template <class X> struct is_simple_type {
234 static const bool value =
235 std::is_same<X, typename simplify_type<X>::SimpleType>::value;
236};
237
238// cast<X> - Return the argument parameter cast to the specified type. This
239// casting operator asserts that the type is correct, so it does not return null
240// on failure. It does not allow a null argument (use cast_or_null for that).
241// It is typically used like this:
242//
243// cast<Instruction>(myVal)->getParent()
244//
245template <class X, class Y>
246inline std::enable_if_t<!is_simple_type<Y>::value,
247 typename cast_retty<X, const Y>::ret_type>
248cast(const Y &Val) {
249 assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 249, __PRETTY_FUNCTION__))
;
250 return cast_convert_val<
251 X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val);
252}
253
254template <class X, class Y>
255inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
256 assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 256, __PRETTY_FUNCTION__))
;
20
'Val' is a 'ConstantSDNode'
21
'?' condition is true
257 return cast_convert_val<X, Y,
22
Calling 'cast_convert_val::doit'
27
Returning from 'cast_convert_val::doit'
28
Returning without writing to 'Val.Node'
258 typename simplify_type<Y>::SimpleType>::doit(Val);
259}
260
261template <class X, class Y>
262inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) {
263 assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 263, __PRETTY_FUNCTION__))
;
264 return cast_convert_val<X, Y*,
265 typename simplify_type<Y*>::SimpleType>::doit(Val);
266}
267
268template <class X, class Y>
269inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type
270cast(std::unique_ptr<Y> &&Val) {
271 assert(isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val.get()) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 271, __PRETTY_FUNCTION__))
;
272 using ret_type = typename cast_retty<X, std::unique_ptr<Y>>::ret_type;
273 return ret_type(
274 cast_convert_val<X, Y *, typename simplify_type<Y *>::SimpleType>::doit(
275 Val.release()));
276}
277
278// cast_or_null<X> - Functionally identical to cast, except that a null value is
279// accepted.
280//
281template <class X, class Y>
282LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<
283 !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type>
284cast_or_null(const Y &Val) {
285 if (!Val)
286 return nullptr;
287 assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 287, __PRETTY_FUNCTION__))
;
288 return cast<X>(Val);
289}
290
291template <class X, class Y>
292LLVM_NODISCARD[[clang::warn_unused_result]] inline std::enable_if_t<!is_simple_type<Y>::value,
293 typename cast_retty<X, Y>::ret_type>
294cast_or_null(Y &Val) {
295 if (!Val)
296 return nullptr;
297 assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/Casting.h"
, 297, __PRETTY_FUNCTION__))
;
298 return cast<X>(Val);
299}
300
301template <class X, class Y>
302LLVM_NODISCARD[[clang::warn_unused_result]]