Bug Summary

File:include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1163, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelDAGToDAG.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86 -I /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/include -I /build/llvm-toolchain-snapshot-10~svn374877/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~svn374877=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-10-15-233810-7101-1 -x c++ /build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp

/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp

1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DAG pattern matching instruction selector for X86,
10// converting from a legalized dag to a X86 dag.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86.h"
15#include "X86MachineFunctionInfo.h"
16#include "X86RegisterInfo.h"
17#include "X86Subtarget.h"
18#include "X86TargetMachine.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/IR/ConstantRange.h"
25#include "llvm/IR/Function.h"
26#include "llvm/IR/Instructions.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/Type.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/KnownBits.h"
32#include "llvm/Support/MathExtras.h"
33#include "llvm/Support/raw_ostream.h"
34#include "llvm/Target/TargetMachine.h"
35#include "llvm/Target/TargetOptions.h"
36#include <stdint.h>
37using namespace llvm;
38
39#define DEBUG_TYPE"x86-isel" "x86-isel"
40
41STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor")static llvm::Statistic NumLoadMoved = {"x86-isel", "NumLoadMoved"
, "Number of loads moved below TokenFactor"}
;
42
43static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
44 cl::desc("Enable setting constant bits to reduce size of mask immediates"),
45 cl::Hidden);
46
47//===----------------------------------------------------------------------===//
48// Pattern Matcher Implementation
49//===----------------------------------------------------------------------===//
50
51namespace {
52 /// This corresponds to X86AddressMode, but uses SDValue's instead of register
53 /// numbers for the leaves of the matched tree.
54 struct X86ISelAddressMode {
55 enum {
56 RegBase,
57 FrameIndexBase
58 } BaseType;
59
60 // This is really a union, discriminated by BaseType!
61 SDValue Base_Reg;
62 int Base_FrameIndex;
63
64 unsigned Scale;
65 SDValue IndexReg;
66 int32_t Disp;
67 SDValue Segment;
68 const GlobalValue *GV;
69 const Constant *CP;
70 const BlockAddress *BlockAddr;
71 const char *ES;
72 MCSymbol *MCSym;
73 int JT;
74 unsigned Align; // CP alignment.
75 unsigned char SymbolFlags; // X86II::MO_*
76 bool NegateIndex = false;
77
78 X86ISelAddressMode()
79 : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
80 Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
81 MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
82
83 bool hasSymbolicDisplacement() const {
84 return GV != nullptr || CP != nullptr || ES != nullptr ||
85 MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
86 }
87
88 bool hasBaseOrIndexReg() const {
89 return BaseType == FrameIndexBase ||
90 IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
91 }
92
93 /// Return true if this addressing mode is already RIP-relative.
94 bool isRIPRelative() const {
95 if (BaseType != RegBase) return false;
96 if (RegisterSDNode *RegNode =
97 dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
98 return RegNode->getReg() == X86::RIP;
99 return false;
100 }
101
102 void setBaseReg(SDValue Reg) {
103 BaseType = RegBase;
104 Base_Reg = Reg;
105 }
106
107#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108 void dump(SelectionDAG *DAG = nullptr) {
109 dbgs() << "X86ISelAddressMode " << this << '\n';
110 dbgs() << "Base_Reg ";
111 if (Base_Reg.getNode())
112 Base_Reg.getNode()->dump(DAG);
113 else
114 dbgs() << "nul\n";
115 if (BaseType == FrameIndexBase)
116 dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
117 dbgs() << " Scale " << Scale << '\n'
118 << "IndexReg ";
119 if (NegateIndex)
120 dbgs() << "negate ";
121 if (IndexReg.getNode())
122 IndexReg.getNode()->dump(DAG);
123 else
124 dbgs() << "nul\n";
125 dbgs() << " Disp " << Disp << '\n'
126 << "GV ";
127 if (GV)
128 GV->dump();
129 else
130 dbgs() << "nul";
131 dbgs() << " CP ";
132 if (CP)
133 CP->dump();
134 else
135 dbgs() << "nul";
136 dbgs() << '\n'
137 << "ES ";
138 if (ES)
139 dbgs() << ES;
140 else
141 dbgs() << "nul";
142 dbgs() << " MCSym ";
143 if (MCSym)
144 dbgs() << MCSym;
145 else
146 dbgs() << "nul";
147 dbgs() << " JT" << JT << " Align" << Align << '\n';
148 }
149#endif
150 };
151}
152
153namespace {
154 //===--------------------------------------------------------------------===//
155 /// ISel - X86-specific code to select X86 machine instructions for
156 /// SelectionDAG operations.
157 ///
158 class X86DAGToDAGISel final : public SelectionDAGISel {
159 /// Keep a pointer to the X86Subtarget around so that we can
160 /// make the right decision when generating code for different targets.
161 const X86Subtarget *Subtarget;
162
163 /// If true, selector should try to optimize for code size instead of
164 /// performance.
165 bool OptForSize;
166
167 /// If true, selector should try to optimize for minimum code size.
168 bool OptForMinSize;
169
170 /// Disable direct TLS access through segment registers.
171 bool IndirectTlsSegRefs;
172
173 public:
174 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
175 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
176 OptForMinSize(false), IndirectTlsSegRefs(false) {}
177
178 StringRef getPassName() const override {
179 return "X86 DAG->DAG Instruction Selection";
180 }
181
182 bool runOnMachineFunction(MachineFunction &MF) override {
183 // Reset the subtarget each time through.
184 Subtarget = &MF.getSubtarget<X86Subtarget>();
185 IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
186 "indirect-tls-seg-refs");
187
188 // OptFor[Min]Size are used in pattern predicates that isel is matching.
189 OptForSize = MF.getFunction().hasOptSize();
190 OptForMinSize = MF.getFunction().hasMinSize();
191 assert((!OptForMinSize || OptForSize) &&(((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize"
) ? static_cast<void> (0) : __assert_fail ("(!OptForMinSize || OptForSize) && \"OptForMinSize implies OptForSize\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 192, __PRETTY_FUNCTION__))
192 "OptForMinSize implies OptForSize")(((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize"
) ? static_cast<void> (0) : __assert_fail ("(!OptForMinSize || OptForSize) && \"OptForMinSize implies OptForSize\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 192, __PRETTY_FUNCTION__))
;
193
194 SelectionDAGISel::runOnMachineFunction(MF);
195 return true;
196 }
197
198 void EmitFunctionEntryCode() override;
199
200 bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
201
202 void PreprocessISelDAG() override;
203 void PostprocessISelDAG() override;
204
205// Include the pieces autogenerated from the target description.
206#include "X86GenDAGISel.inc"
207
208 private:
209 void Select(SDNode *N) override;
210
211 bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
212 bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
213 bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
214 bool matchAddress(SDValue N, X86ISelAddressMode &AM);
215 bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
216 bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
217 bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
218 unsigned Depth);
219 bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
220 bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
221 SDValue &Scale, SDValue &Index, SDValue &Disp,
222 SDValue &Segment);
223 bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
224 SDValue &Scale, SDValue &Index, SDValue &Disp,
225 SDValue &Segment);
226 bool selectMOV64Imm32(SDValue N, SDValue &Imm);
227 bool selectLEAAddr(SDValue N, SDValue &Base,
228 SDValue &Scale, SDValue &Index, SDValue &Disp,
229 SDValue &Segment);
230 bool selectLEA64_32Addr(SDValue N, SDValue &Base,
231 SDValue &Scale, SDValue &Index, SDValue &Disp,
232 SDValue &Segment);
233 bool selectTLSADDRAddr(SDValue N, SDValue &Base,
234 SDValue &Scale, SDValue &Index, SDValue &Disp,
235 SDValue &Segment);
236 bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
237 SDValue &Base, SDValue &Scale,
238 SDValue &Index, SDValue &Disp,
239 SDValue &Segment,
240 SDValue &NodeWithChain);
241 bool selectRelocImm(SDValue N, SDValue &Op);
242
243 bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
244 SDValue &Base, SDValue &Scale,
245 SDValue &Index, SDValue &Disp,
246 SDValue &Segment);
247
248 // Convenience method where P is also root.
249 bool tryFoldLoad(SDNode *P, SDValue N,
250 SDValue &Base, SDValue &Scale,
251 SDValue &Index, SDValue &Disp,
252 SDValue &Segment) {
253 return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
254 }
255
256 bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
257 SDValue &Base, SDValue &Scale,
258 SDValue &Index, SDValue &Disp,
259 SDValue &Segment);
260
261 /// Implement addressing mode selection for inline asm expressions.
262 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
263 unsigned ConstraintID,
264 std::vector<SDValue> &OutOps) override;
265
266 void emitSpecialCodeForMain();
267
268 inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
269 MVT VT, SDValue &Base, SDValue &Scale,
270 SDValue &Index, SDValue &Disp,
271 SDValue &Segment) {
272 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
273 Base = CurDAG->getTargetFrameIndex(
274 AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
275 else if (AM.Base_Reg.getNode())
276 Base = AM.Base_Reg;
277 else
278 Base = CurDAG->getRegister(0, VT);
279
280 Scale = getI8Imm(AM.Scale, DL);
281
282 // Negate the index if needed.
283 if (AM.NegateIndex) {
284 unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
285 SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
286 AM.IndexReg), 0);
287 AM.IndexReg = Neg;
288 }
289
290 if (AM.IndexReg.getNode())
291 Index = AM.IndexReg;
292 else
293 Index = CurDAG->getRegister(0, VT);
294
295 // These are 32-bit even in 64-bit mode since RIP-relative offset
296 // is 32-bit.
297 if (AM.GV)
298 Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
299 MVT::i32, AM.Disp,
300 AM.SymbolFlags);
301 else if (AM.CP)
302 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
303 AM.Align, AM.Disp, AM.SymbolFlags);
304 else if (AM.ES) {
305 assert(!AM.Disp && "Non-zero displacement is ignored with ES.")((!AM.Disp && "Non-zero displacement is ignored with ES."
) ? static_cast<void> (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with ES.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 305, __PRETTY_FUNCTION__))
;
306 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
307 } else if (AM.MCSym) {
308 assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.")((!AM.Disp && "Non-zero displacement is ignored with MCSym."
) ? static_cast<void> (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with MCSym.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 308, __PRETTY_FUNCTION__))
;
309 assert(AM.SymbolFlags == 0 && "oo")((AM.SymbolFlags == 0 && "oo") ? static_cast<void>
(0) : __assert_fail ("AM.SymbolFlags == 0 && \"oo\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 309, __PRETTY_FUNCTION__))
;
310 Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
311 } else if (AM.JT != -1) {
312 assert(!AM.Disp && "Non-zero displacement is ignored with JT.")((!AM.Disp && "Non-zero displacement is ignored with JT."
) ? static_cast<void> (0) : __assert_fail ("!AM.Disp && \"Non-zero displacement is ignored with JT.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 312, __PRETTY_FUNCTION__))
;
313 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
314 } else if (AM.BlockAddr)
315 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
316 AM.SymbolFlags);
317 else
318 Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
319
320 if (AM.Segment.getNode())
321 Segment = AM.Segment;
322 else
323 Segment = CurDAG->getRegister(0, MVT::i16);
324 }
325
326 // Utility function to determine whether we should avoid selecting
327 // immediate forms of instructions for better code size or not.
328 // At a high level, we'd like to avoid such instructions when
329 // we have similar constants used within the same basic block
330 // that can be kept in a register.
331 //
332 bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
333 uint32_t UseCount = 0;
334
335 // Do not want to hoist if we're not optimizing for size.
336 // TODO: We'd like to remove this restriction.
337 // See the comment in X86InstrInfo.td for more info.
338 if (!OptForSize)
339 return false;
340
341 // Walk all the users of the immediate.
342 for (SDNode::use_iterator UI = N->use_begin(),
343 UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
344
345 SDNode *User = *UI;
346
347 // This user is already selected. Count it as a legitimate use and
348 // move on.
349 if (User->isMachineOpcode()) {
350 UseCount++;
351 continue;
352 }
353
354 // We want to count stores of immediates as real uses.
355 if (User->getOpcode() == ISD::STORE &&
356 User->getOperand(1).getNode() == N) {
357 UseCount++;
358 continue;
359 }
360
361 // We don't currently match users that have > 2 operands (except
362 // for stores, which are handled above)
363 // Those instruction won't match in ISEL, for now, and would
364 // be counted incorrectly.
365 // This may change in the future as we add additional instruction
366 // types.
367 if (User->getNumOperands() != 2)
368 continue;
369
370 // If this can match to INC/DEC, don't count it as a use.
371 if (User->getOpcode() == ISD::ADD &&
372 (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0))))
373 continue;
374
375 // Immediates that are used for offsets as part of stack
376 // manipulation should be left alone. These are typically
377 // used to indicate SP offsets for argument passing and
378 // will get pulled into stores/pushes (implicitly).
379 if (User->getOpcode() == X86ISD::ADD ||
380 User->getOpcode() == ISD::ADD ||
381 User->getOpcode() == X86ISD::SUB ||
382 User->getOpcode() == ISD::SUB) {
383
384 // Find the other operand of the add/sub.
385 SDValue OtherOp = User->getOperand(0);
386 if (OtherOp.getNode() == N)
387 OtherOp = User->getOperand(1);
388
389 // Don't count if the other operand is SP.
390 RegisterSDNode *RegNode;
391 if (OtherOp->getOpcode() == ISD::CopyFromReg &&
392 (RegNode = dyn_cast_or_null<RegisterSDNode>(
393 OtherOp->getOperand(1).getNode())))
394 if ((RegNode->getReg() == X86::ESP) ||
395 (RegNode->getReg() == X86::RSP))
396 continue;
397 }
398
399 // ... otherwise, count this and move on.
400 UseCount++;
401 }
402
403 // If we have more than 1 use, then recommend for hoisting.
404 return (UseCount > 1);
405 }
406
407 /// Return a target constant with the specified value of type i8.
408 inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
409 return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
410 }
411
412 /// Return a target constant with the specified value, of type i32.
413 inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
414 return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
415 }
416
417 /// Return a target constant with the specified value, of type i64.
418 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
419 return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
420 }
421
422 SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
423 const SDLoc &DL) {
424 assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width")(((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"
) ? static_cast<void> (0) : __assert_fail ("(VecWidth == 128 || VecWidth == 256) && \"Unexpected vector width\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 424, __PRETTY_FUNCTION__))
;
425 uint64_t Index = N->getConstantOperandVal(1);
426 MVT VecVT = N->getOperand(0).getSimpleValueType();
427 return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
428 }
429
430 SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
431 const SDLoc &DL) {
432 assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width")(((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"
) ? static_cast<void> (0) : __assert_fail ("(VecWidth == 128 || VecWidth == 256) && \"Unexpected vector width\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 432, __PRETTY_FUNCTION__))
;
433 uint64_t Index = N->getConstantOperandVal(2);
434 MVT VecVT = N->getSimpleValueType(0);
435 return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
436 }
437
438 // Helper to detect unneeded and instructions on shift amounts. Called
439 // from PatFrags in tablegen.
440 bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
441 assert(N->getOpcode() == ISD::AND && "Unexpected opcode")((N->getOpcode() == ISD::AND && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::AND && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 441, __PRETTY_FUNCTION__))
;
442 const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
443
444 if (Val.countTrailingOnes() >= Width)
445 return true;
446
447 APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
448 return Mask.countTrailingOnes() >= Width;
449 }
450
451 /// Return an SDNode that returns the value of the global base register.
452 /// Output instructions required to initialize the global base register,
453 /// if necessary.
454 SDNode *getGlobalBaseReg();
455
456 /// Return a reference to the TargetMachine, casted to the target-specific
457 /// type.
458 const X86TargetMachine &getTargetMachine() const {
459 return static_cast<const X86TargetMachine &>(TM);
460 }
461
462 /// Return a reference to the TargetInstrInfo, casted to the target-specific
463 /// type.
464 const X86InstrInfo *getInstrInfo() const {
465 return Subtarget->getInstrInfo();
466 }
467
468 /// Address-mode matching performs shift-of-and to and-of-shift
469 /// reassociation in order to expose more scaled addressing
470 /// opportunities.
471 bool ComplexPatternFuncMutatesDAG() const override {
472 return true;
473 }
474
475 bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
476
477 /// Returns whether this is a relocatable immediate in the range
478 /// [-2^Width .. 2^Width-1].
479 template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
480 if (auto *CN = dyn_cast<ConstantSDNode>(N))
481 return isInt<Width>(CN->getSExtValue());
482 return isSExtAbsoluteSymbolRef(Width, N);
483 }
484
485 // Indicates we should prefer to use a non-temporal load for this load.
486 bool useNonTemporalLoad(LoadSDNode *N) const {
487 if (!N->isNonTemporal())
488 return false;
489
490 unsigned StoreSize = N->getMemoryVT().getStoreSize();
491
492 if (N->getAlignment() < StoreSize)
493 return false;
494
495 switch (StoreSize) {
496 default: llvm_unreachable("Unsupported store size")::llvm::llvm_unreachable_internal("Unsupported store size", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 496)
;
497 case 4:
498 case 8:
499 return false;
500 case 16:
501 return Subtarget->hasSSE41();
502 case 32:
503 return Subtarget->hasAVX2();
504 case 64:
505 return Subtarget->hasAVX512();
506 }
507 }
508
509 bool foldLoadStoreIntoMemOperand(SDNode *Node);
510 MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
511 bool matchBitExtract(SDNode *Node);
512 bool shrinkAndImmediate(SDNode *N);
513 bool isMaskZeroExtended(SDNode *N) const;
514 bool tryShiftAmountMod(SDNode *N);
515 bool combineIncDecVector(SDNode *Node);
516 bool tryShrinkShlLogicImm(SDNode *N);
517 bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
518 bool tryMatchBitSelect(SDNode *N);
519
520 MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
521 const SDLoc &dl, MVT VT, SDNode *Node);
522 MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
523 const SDLoc &dl, MVT VT, SDNode *Node,
524 SDValue &InFlag);
525
526 bool tryOptimizeRem8Extend(SDNode *N);
527
528 bool onlyUsesZeroFlag(SDValue Flags) const;
529 bool hasNoSignFlagUses(SDValue Flags) const;
530 bool hasNoCarryFlagUses(SDValue Flags) const;
531 };
532}
533
534
535// Returns true if this masked compare can be implemented legally with this
536// type.
537static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
538 unsigned Opcode = N->getOpcode();
539 if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
540 Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) {
541 // We can get 256-bit 8 element types here without VLX being enabled. When
542 // this happens we will use 512-bit operations and the mask will not be
543 // zero extended.
544 EVT OpVT = N->getOperand(0).getValueType();
545 if (OpVT.is256BitVector() || OpVT.is128BitVector())
546 return Subtarget->hasVLX();
547
548 return true;
549 }
550 // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
551 if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
552 Opcode == X86ISD::FSETCCM_SAE)
553 return true;
554
555 return false;
556}
557
558// Returns true if we can assume the writer of the mask has zero extended it
559// for us.
560bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
561 // If this is an AND, check if we have a compare on either side. As long as
562 // one side guarantees the mask is zero extended, the AND will preserve those
563 // zeros.
564 if (N->getOpcode() == ISD::AND)
565 return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
566 isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
567
568 return isLegalMaskCompare(N, Subtarget);
569}
570
571bool
572X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
573 if (OptLevel == CodeGenOpt::None) return false;
574
575 if (!N.hasOneUse())
576 return false;
577
578 if (N.getOpcode() != ISD::LOAD)
579 return true;
580
581 // Don't fold non-temporal loads if we have an instruction for them.
582 if (useNonTemporalLoad(cast<LoadSDNode>(N)))
583 return false;
584
585 // If N is a load, do additional profitability checks.
586 if (U == Root) {
587 switch (U->getOpcode()) {
588 default: break;
589 case X86ISD::ADD:
590 case X86ISD::ADC:
591 case X86ISD::SUB:
592 case X86ISD::SBB:
593 case X86ISD::AND:
594 case X86ISD::XOR:
595 case X86ISD::OR:
596 case ISD::ADD:
597 case ISD::ADDCARRY:
598 case ISD::AND:
599 case ISD::OR:
600 case ISD::XOR: {
601 SDValue Op1 = U->getOperand(1);
602
603 // If the other operand is a 8-bit immediate we should fold the immediate
604 // instead. This reduces code size.
605 // e.g.
606 // movl 4(%esp), %eax
607 // addl $4, %eax
608 // vs.
609 // movl $4, %eax
610 // addl 4(%esp), %eax
611 // The former is 2 bytes shorter. In case where the increment is 1, then
612 // the saving can be 4 bytes (by using incl %eax).
613 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
614 if (Imm->getAPIntValue().isSignedIntN(8))
615 return false;
616
617 // If this is a 64-bit AND with an immediate that fits in 32-bits,
618 // prefer using the smaller and over folding the load. This is needed to
619 // make sure immediates created by shrinkAndImmediate are always folded.
620 // Ideally we would narrow the load during DAG combine and get the
621 // best of both worlds.
622 if (U->getOpcode() == ISD::AND &&
623 Imm->getAPIntValue().getBitWidth() == 64 &&
624 Imm->getAPIntValue().isIntN(32))
625 return false;
626
627 // If this really a zext_inreg that can be represented with a movzx
628 // instruction, prefer that.
629 // TODO: We could shrink the load and fold if it is non-volatile.
630 if (U->getOpcode() == ISD::AND &&
631 (Imm->getAPIntValue() == UINT8_MAX(255) ||
632 Imm->getAPIntValue() == UINT16_MAX(65535) ||
633 Imm->getAPIntValue() == UINT32_MAX(4294967295U)))
634 return false;
635
636 // ADD/SUB with can negate the immediate and use the opposite operation
637 // to fit 128 into a sign extended 8 bit immediate.
638 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
639 (-Imm->getAPIntValue()).isSignedIntN(8))
640 return false;
641 }
642
643 // If the other operand is a TLS address, we should fold it instead.
644 // This produces
645 // movl %gs:0, %eax
646 // leal i@NTPOFF(%eax), %eax
647 // instead of
648 // movl $i@NTPOFF, %eax
649 // addl %gs:0, %eax
650 // if the block also has an access to a second TLS address this will save
651 // a load.
652 // FIXME: This is probably also true for non-TLS addresses.
653 if (Op1.getOpcode() == X86ISD::Wrapper) {
654 SDValue Val = Op1.getOperand(0);
655 if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
656 return false;
657 }
658
659 // Don't fold load if this matches the BTS/BTR/BTC patterns.
660 // BTS: (or X, (shl 1, n))
661 // BTR: (and X, (rotl -2, n))
662 // BTC: (xor X, (shl 1, n))
663 if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
664 if (U->getOperand(0).getOpcode() == ISD::SHL &&
665 isOneConstant(U->getOperand(0).getOperand(0)))
666 return false;
667
668 if (U->getOperand(1).getOpcode() == ISD::SHL &&
669 isOneConstant(U->getOperand(1).getOperand(0)))
670 return false;
671 }
672 if (U->getOpcode() == ISD::AND) {
673 SDValue U0 = U->getOperand(0);
674 SDValue U1 = U->getOperand(1);
675 if (U0.getOpcode() == ISD::ROTL) {
676 auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
677 if (C && C->getSExtValue() == -2)
678 return false;
679 }
680
681 if (U1.getOpcode() == ISD::ROTL) {
682 auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
683 if (C && C->getSExtValue() == -2)
684 return false;
685 }
686 }
687
688 break;
689 }
690 case ISD::SHL:
691 case ISD::SRA:
692 case ISD::SRL:
693 // Don't fold a load into a shift by immediate. The BMI2 instructions
694 // support folding a load, but not an immediate. The legacy instructions
695 // support folding an immediate, but can't fold a load. Folding an
696 // immediate is preferable to folding a load.
697 if (isa<ConstantSDNode>(U->getOperand(1)))
698 return false;
699
700 break;
701 }
702 }
703
704 // Prevent folding a load if this can implemented with an insert_subreg or
705 // a move that implicitly zeroes.
706 if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
707 isNullConstant(Root->getOperand(2)) &&
708 (Root->getOperand(0).isUndef() ||
709 ISD::isBuildVectorAllZeros(Root->getOperand(0).getNode())))
710 return false;
711
712 return true;
713}
714
715/// Replace the original chain operand of the call with
716/// load's chain operand and move load below the call's chain operand.
717static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
718 SDValue Call, SDValue OrigChain) {
719 SmallVector<SDValue, 8> Ops;
720 SDValue Chain = OrigChain.getOperand(0);
721 if (Chain.getNode() == Load.getNode())
722 Ops.push_back(Load.getOperand(0));
723 else {
724 assert(Chain.getOpcode() == ISD::TokenFactor &&((Chain.getOpcode() == ISD::TokenFactor && "Unexpected chain operand"
) ? static_cast<void> (0) : __assert_fail ("Chain.getOpcode() == ISD::TokenFactor && \"Unexpected chain operand\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 725, __PRETTY_FUNCTION__))
725 "Unexpected chain operand")((Chain.getOpcode() == ISD::TokenFactor && "Unexpected chain operand"
) ? static_cast<void> (0) : __assert_fail ("Chain.getOpcode() == ISD::TokenFactor && \"Unexpected chain operand\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 725, __PRETTY_FUNCTION__))
;
726 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
727 if (Chain.getOperand(i).getNode() == Load.getNode())
728 Ops.push_back(Load.getOperand(0));
729 else
730 Ops.push_back(Chain.getOperand(i));
731 SDValue NewChain =
732 CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
733 Ops.clear();
734 Ops.push_back(NewChain);
735 }
736 Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
737 CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
738 CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
739 Load.getOperand(1), Load.getOperand(2));
740
741 Ops.clear();
742 Ops.push_back(SDValue(Load.getNode(), 1));
743 Ops.append(Call->op_begin() + 1, Call->op_end());
744 CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
745}
746
747/// Return true if call address is a load and it can be
748/// moved below CALLSEQ_START and the chains leading up to the call.
749/// Return the CALLSEQ_START by reference as a second output.
750/// In the case of a tail call, there isn't a callseq node between the call
751/// chain and the load.
752static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
753 // The transformation is somewhat dangerous if the call's chain was glued to
754 // the call. After MoveBelowOrigChain the load is moved between the call and
755 // the chain, this can create a cycle if the load is not folded. So it is
756 // *really* important that we are sure the load will be folded.
757 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
758 return false;
759 LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
760 if (!LD ||
761 !LD->isSimple() ||
762 LD->getAddressingMode() != ISD::UNINDEXED ||
763 LD->getExtensionType() != ISD::NON_EXTLOAD)
764 return false;
765
766 // Now let's find the callseq_start.
767 while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
768 if (!Chain.hasOneUse())
769 return false;
770 Chain = Chain.getOperand(0);
771 }
772
773 if (!Chain.getNumOperands())
774 return false;
775 // Since we are not checking for AA here, conservatively abort if the chain
776 // writes to memory. It's not safe to move the callee (a load) across a store.
777 if (isa<MemSDNode>(Chain.getNode()) &&
778 cast<MemSDNode>(Chain.getNode())->writeMem())
779 return false;
780 if (Chain.getOperand(0).getNode() == Callee.getNode())
781 return true;
782 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
783 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
784 Callee.getValue(1).hasOneUse())
785 return true;
786 return false;
787}
788
789void X86DAGToDAGISel::PreprocessISelDAG() {
790 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
791 E = CurDAG->allnodes_end(); I != E; ) {
792 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
793
794 // If this is a target specific AND node with no flag usages, turn it back
795 // into ISD::AND to enable test instruction matching.
796 if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
797 SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
798 N->getOperand(0), N->getOperand(1));
799 --I;
800 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
801 ++I;
802 CurDAG->DeleteNode(N);
803 continue;
804 }
805
806 switch (N->getOpcode()) {
807 case ISD::FP_TO_SINT:
808 case ISD::FP_TO_UINT: {
809 // Replace vector fp_to_s/uint with their X86 specific equivalent so we
810 // don't need 2 sets of patterns.
811 if (!N->getSimpleValueType(0).isVector())
812 break;
813
814 unsigned NewOpc;
815 switch (N->getOpcode()) {
816 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 816)
;
817 case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
818 case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
819 }
820 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
821 N->getOperand(0));
822 --I;
823 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
824 ++I;
825 CurDAG->DeleteNode(N);
826 continue;
827 }
828 case ISD::SHL:
829 case ISD::SRA:
830 case ISD::SRL: {
831 // Replace vector shifts with their X86 specific equivalent so we don't
832 // need 2 sets of patterns.
833 if (!N->getValueType(0).isVector())
834 break;
835
836 unsigned NewOpc;
837 switch (N->getOpcode()) {
838 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 838)
;
839 case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
840 case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
841 case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
842 }
843 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
844 N->getOperand(0), N->getOperand(1));
845 --I;
846 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
847 ++I;
848 CurDAG->DeleteNode(N);
849 continue;
850 }
851 case ISD::ANY_EXTEND:
852 case ISD::ANY_EXTEND_VECTOR_INREG: {
853 // Replace vector any extend with the zero extend equivalents so we don't
854 // need 2 sets of patterns. Ignore vXi1 extensions.
855 if (!N->getValueType(0).isVector() ||
856 N->getOperand(0).getScalarValueSizeInBits() == 1)
857 break;
858
859 unsigned NewOpc = N->getOpcode() == ISD::ANY_EXTEND
860 ? ISD::ZERO_EXTEND
861 : ISD::ZERO_EXTEND_VECTOR_INREG;
862
863 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
864 N->getOperand(0));
865 --I;
866 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
867 ++I;
868 CurDAG->DeleteNode(N);
869 continue;
870 }
871 case ISD::FCEIL:
872 case ISD::FFLOOR:
873 case ISD::FTRUNC:
874 case ISD::FNEARBYINT:
875 case ISD::FRINT: {
876 // Replace fp rounding with their X86 specific equivalent so we don't
877 // need 2 sets of patterns.
878 unsigned Imm;
879 switch (N->getOpcode()) {
880 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 880)
;
881 case ISD::FCEIL: Imm = 0xA; break;
882 case ISD::FFLOOR: Imm = 0x9; break;
883 case ISD::FTRUNC: Imm = 0xB; break;
884 case ISD::FNEARBYINT: Imm = 0xC; break;
885 case ISD::FRINT: Imm = 0x4; break;
886 }
887 SDLoc dl(N);
888 SDValue Res = CurDAG->getNode(
889 X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0),
890 CurDAG->getTargetConstant(Imm, dl, MVT::i8));
891 --I;
892 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
893 ++I;
894 CurDAG->DeleteNode(N);
895 continue;
896 }
897 case X86ISD::FANDN:
898 case X86ISD::FAND:
899 case X86ISD::FOR:
900 case X86ISD::FXOR: {
901 // Widen scalar fp logic ops to vector to reduce isel patterns.
902 // FIXME: Can we do this during lowering/combine.
903 MVT VT = N->getSimpleValueType(0);
904 if (VT.isVector() || VT == MVT::f128)
905 break;
906
907 MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
908 SDLoc dl(N);
909 SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
910 N->getOperand(0));
911 SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
912 N->getOperand(1));
913
914 SDValue Res;
915 if (Subtarget->hasSSE2()) {
916 EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
917 Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
918 Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
919 unsigned Opc;
920 switch (N->getOpcode()) {
921 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 921)
;
922 case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
923 case X86ISD::FAND: Opc = ISD::AND; break;
924 case X86ISD::FOR: Opc = ISD::OR; break;
925 case X86ISD::FXOR: Opc = ISD::XOR; break;
926 }
927 Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
928 Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
929 } else {
930 Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
931 }
932 Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
933 CurDAG->getIntPtrConstant(0, dl));
934 --I;
935 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
936 ++I;
937 CurDAG->DeleteNode(N);
938 continue;
939 }
940 }
941
942 if (OptLevel != CodeGenOpt::None &&
943 // Only do this when the target can fold the load into the call or
944 // jmp.
945 !Subtarget->useRetpolineIndirectCalls() &&
946 ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
947 (N->getOpcode() == X86ISD::TC_RETURN &&
948 (Subtarget->is64Bit() ||
949 !getTargetMachine().isPositionIndependent())))) {
950 /// Also try moving call address load from outside callseq_start to just
951 /// before the call to allow it to be folded.
952 ///
953 /// [Load chain]
954 /// ^
955 /// |
956 /// [Load]
957 /// ^ ^
958 /// | |
959 /// / \--
960 /// / |
961 ///[CALLSEQ_START] |
962 /// ^ |
963 /// | |
964 /// [LOAD/C2Reg] |
965 /// | |
966 /// \ /
967 /// \ /
968 /// [CALL]
969 bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
970 SDValue Chain = N->getOperand(0);
971 SDValue Load = N->getOperand(1);
972 if (!isCalleeLoad(Load, Chain, HasCallSeq))
973 continue;
974 moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
975 ++NumLoadMoved;
976 continue;
977 }
978
979 // Lower fpround and fpextend nodes that target the FP stack to be store and
980 // load to the stack. This is a gross hack. We would like to simply mark
981 // these as being illegal, but when we do that, legalize produces these when
982 // it expands calls, then expands these in the same legalize pass. We would
983 // like dag combine to be able to hack on these between the call expansion
984 // and the node legalization. As such this pass basically does "really
985 // late" legalization of these inline with the X86 isel pass.
986 // FIXME: This should only happen when not compiled with -O0.
987 switch (N->getOpcode()) {
988 default: continue;
989 case ISD::FP_ROUND:
990 case ISD::FP_EXTEND:
991 {
992 MVT SrcVT = N->getOperand(0).getSimpleValueType();
993 MVT DstVT = N->getSimpleValueType(0);
994
995 // If any of the sources are vectors, no fp stack involved.
996 if (SrcVT.isVector() || DstVT.isVector())
997 continue;
998
999 // If the source and destination are SSE registers, then this is a legal
1000 // conversion that should not be lowered.
1001 const X86TargetLowering *X86Lowering =
1002 static_cast<const X86TargetLowering *>(TLI);
1003 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1004 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1005 if (SrcIsSSE && DstIsSSE)
1006 continue;
1007
1008 if (!SrcIsSSE && !DstIsSSE) {
1009 // If this is an FPStack extension, it is a noop.
1010 if (N->getOpcode() == ISD::FP_EXTEND)
1011 continue;
1012 // If this is a value-preserving FPStack truncation, it is a noop.
1013 if (N->getConstantOperandVal(1))
1014 continue;
1015 }
1016
1017 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1018 // FPStack has extload and truncstore. SSE can fold direct loads into other
1019 // operations. Based on this, decide what we want to do.
1020 MVT MemVT;
1021 if (N->getOpcode() == ISD::FP_ROUND)
1022 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
1023 else
1024 MemVT = SrcIsSSE ? SrcVT : DstVT;
1025
1026 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1027 SDLoc dl(N);
1028
1029 // FIXME: optimize the case where the src/dest is a load or store?
1030
1031 SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
1032 MemTmp, MachinePointerInfo(), MemVT);
1033 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
1034 MachinePointerInfo(), MemVT);
1035
1036 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1037 // extload we created. This will cause general havok on the dag because
1038 // anything below the conversion could be folded into other existing nodes.
1039 // To avoid invalidating 'I', back it up to the convert node.
1040 --I;
1041 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1042 break;
1043 }
1044
1045 //The sequence of events for lowering STRICT_FP versions of these nodes requires
1046 //dealing with the chain differently, as there is already a preexisting chain.
1047 case ISD::STRICT_FP_ROUND:
1048 case ISD::STRICT_FP_EXTEND:
1049 {
1050 MVT SrcVT = N->getOperand(1).getSimpleValueType();
1051 MVT DstVT = N->getSimpleValueType(0);
1052
1053 // If any of the sources are vectors, no fp stack involved.
1054 if (SrcVT.isVector() || DstVT.isVector())
1055 continue;
1056
1057 // If the source and destination are SSE registers, then this is a legal
1058 // conversion that should not be lowered.
1059 const X86TargetLowering *X86Lowering =
1060 static_cast<const X86TargetLowering *>(TLI);
1061 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1062 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1063 if (SrcIsSSE && DstIsSSE)
1064 continue;
1065
1066 if (!SrcIsSSE && !DstIsSSE) {
1067 // If this is an FPStack extension, it is a noop.
1068 if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
1069 continue;
1070 // If this is a value-preserving FPStack truncation, it is a noop.
1071 if (N->getConstantOperandVal(2))
1072 continue;
1073 }
1074
1075 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1076 // FPStack has extload and truncstore. SSE can fold direct loads into other
1077 // operations. Based on this, decide what we want to do.
1078 MVT MemVT;
1079 if (N->getOpcode() == ISD::STRICT_FP_ROUND)
1080 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
1081 else
1082 MemVT = SrcIsSSE ? SrcVT : DstVT;
1083
1084 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1085 SDLoc dl(N);
1086
1087 // FIXME: optimize the case where the src/dest is a load or store?
1088
1089 //Since the operation is StrictFP, use the preexisting chain.
1090 SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
1091 MemTmp, MachinePointerInfo(), MemVT);
1092 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
1093 MachinePointerInfo(), MemVT);
1094
1095 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1096 // extload we created. This will cause general havok on the dag because
1097 // anything below the conversion could be folded into other existing nodes.
1098 // To avoid invalidating 'I', back it up to the convert node.
1099 --I;
1100 CurDAG->ReplaceAllUsesWith(N, Result.getNode());
1101 break;
1102 }
1103 }
1104
1105
1106 // Now that we did that, the node is dead. Increment the iterator to the
1107 // next node to process, then delete N.
1108 ++I;
1109 CurDAG->DeleteNode(N);
1110 }
1111
1112 // The load+call transform above can leave some dead nodes in the graph. Make
1113 // sure we remove them. Its possible some of the other transforms do to so
1114 // just remove dead nodes unconditionally.
1115 CurDAG->RemoveDeadNodes();
1116}
1117
1118// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
1119bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
1120 unsigned Opc = N->getMachineOpcode();
1121 if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1122 Opc != X86::MOVSX64rr8)
1123 return false;
1124
1125 SDValue N0 = N->getOperand(0);
1126
1127 // We need to be extracting the lower bit of an extend.
1128 if (!N0.isMachineOpcode() ||
1129 N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
1130 N0.getConstantOperandVal(1) != X86::sub_8bit)
1131 return false;
1132
1133 // We're looking for either a movsx or movzx to match the original opcode.
1134 unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1135 : X86::MOVSX32rr8_NOREX;
1136 SDValue N00 = N0.getOperand(0);
1137 if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
1138 return false;
1139
1140 if (Opc == X86::MOVSX64rr8) {
1141 // If we had a sign extend from 8 to 64 bits. We still need to go from 32
1142 // to 64.
1143 MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
1144 MVT::i64, N00);
1145 ReplaceUses(N, Extend);
1146 } else {
1147 // Ok we can drop this extend and just use the original extend.
1148 ReplaceUses(N, N00.getNode());
1149 }
1150
1151 return true;
1152}
1153
1154void X86DAGToDAGISel::PostprocessISelDAG() {
1155 // Skip peepholes at -O0.
1156 if (TM.getOptLevel() == CodeGenOpt::None)
1157 return;
1158
1159 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
1160
1161 bool MadeChange = false;
1162 while (Position != CurDAG->allnodes_begin()) {
1163 SDNode *N = &*--Position;
1164 // Skip dead nodes and any non-machine opcodes.
1165 if (N->use_empty() || !N->isMachineOpcode())
1166 continue;
1167
1168 if (tryOptimizeRem8Extend(N)) {
1169 MadeChange = true;
1170 continue;
1171 }
1172
1173 // Look for a TESTrr+ANDrr pattern where both operands of the test are
1174 // the same. Rewrite to remove the AND.
1175 unsigned Opc = N->getMachineOpcode();
1176 if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
1177 Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
1178 N->getOperand(0) == N->getOperand(1) &&
1179 N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1180 N->getOperand(0).isMachineOpcode()) {
1181 SDValue And = N->getOperand(0);
1182 unsigned N0Opc = And.getMachineOpcode();
1183 if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
1184 N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
1185 MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
1186 MVT::i32,
1187 And.getOperand(0),
1188 And.getOperand(1));
1189 ReplaceUses(N, Test);
1190 MadeChange = true;
1191 continue;
1192 }
1193 if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
1194 N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
1195 unsigned NewOpc;
1196 switch (N0Opc) {
1197 case X86::AND8rm: NewOpc = X86::TEST8mr; break;
1198 case X86::AND16rm: NewOpc = X86::TEST16mr; break;
1199 case X86::AND32rm: NewOpc = X86::TEST32mr; break;
1200 case X86::AND64rm: NewOpc = X86::TEST64mr; break;
1201 }
1202
1203 // Need to swap the memory and register operand.
1204 SDValue Ops[] = { And.getOperand(1),
1205 And.getOperand(2),
1206 And.getOperand(3),
1207 And.getOperand(4),
1208 And.getOperand(5),
1209 And.getOperand(0),
1210 And.getOperand(6) /* Chain */ };
1211 MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1212 MVT::i32, MVT::Other, Ops);
1213 ReplaceUses(N, Test);
1214 MadeChange = true;
1215 continue;
1216 }
1217 }
1218
1219 // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
1220 // used. We're doing this late so we can prefer to fold the AND into masked
1221 // comparisons. Doing that can be better for the live range of the mask
1222 // register.
1223 if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1224 Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1225 N->getOperand(0) == N->getOperand(1) &&
1226 N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1227 N->getOperand(0).isMachineOpcode() &&
1228 onlyUsesZeroFlag(SDValue(N, 0))) {
1229 SDValue And = N->getOperand(0);
1230 unsigned N0Opc = And.getMachineOpcode();
1231 // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1232 // KAND instructions and KTEST use the same ISA feature.
1233 if (N0Opc == X86::KANDBrr ||
1234 (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1235 N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1236 unsigned NewOpc;
1237 switch (Opc) {
1238 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1238)
;
1239 case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
1240 case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
1241 case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
1242 case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
1243 }
1244 MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1245 MVT::i32,
1246 And.getOperand(0),
1247 And.getOperand(1));
1248 ReplaceUses(N, KTest);
1249 MadeChange = true;
1250 continue;
1251 }
1252 }
1253
1254 // Attempt to remove vectors moves that were inserted to zero upper bits.
1255 if (Opc != TargetOpcode::SUBREG_TO_REG)
1256 continue;
1257
1258 unsigned SubRegIdx = N->getConstantOperandVal(2);
1259 if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1260 continue;
1261
1262 SDValue Move = N->getOperand(1);
1263 if (!Move.isMachineOpcode())
1264 continue;
1265
1266 // Make sure its one of the move opcodes we recognize.
1267 switch (Move.getMachineOpcode()) {
1268 default:
1269 continue;
1270 case X86::VMOVAPDrr: case X86::VMOVUPDrr:
1271 case X86::VMOVAPSrr: case X86::VMOVUPSrr:
1272 case X86::VMOVDQArr: case X86::VMOVDQUrr:
1273 case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
1274 case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
1275 case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
1276 case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
1277 case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
1278 case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
1279 case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
1280 case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
1281 case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
1282 case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
1283 case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
1284 break;
1285 }
1286
1287 SDValue In = Move.getOperand(0);
1288 if (!In.isMachineOpcode() ||
1289 In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1290 continue;
1291
1292 // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1293 // the SHA instructions which use a legacy encoding.
1294 uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1295 if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1296 (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1297 (TSFlags & X86II::EncodingMask) != X86II::XOP)
1298 continue;
1299
1300 // Producing instruction is another vector instruction. We can drop the
1301 // move.
1302 CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1303 MadeChange = true;
1304 }
1305
1306 if (MadeChange)
1307 CurDAG->RemoveDeadNodes();
1308}
1309
1310
1311/// Emit any code that needs to be executed only in the main function.
1312void X86DAGToDAGISel::emitSpecialCodeForMain() {
1313 if (Subtarget->isTargetCygMing()) {
1314 TargetLowering::ArgListTy Args;
1315 auto &DL = CurDAG->getDataLayout();
1316
1317 TargetLowering::CallLoweringInfo CLI(*CurDAG);
1318 CLI.setChain(CurDAG->getRoot())
1319 .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1320 CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1321 std::move(Args));
1322 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1323 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1324 CurDAG->setRoot(Result.second);
1325 }
1326}
1327
1328void X86DAGToDAGISel::EmitFunctionEntryCode() {
1329 // If this is main, emit special code for main.
1330 const Function &F = MF->getFunction();
1331 if (F.hasExternalLinkage() && F.getName() == "main")
1332 emitSpecialCodeForMain();
1333}
1334
1335static bool isDispSafeForFrameIndex(int64_t Val) {
1336 // On 64-bit platforms, we can run into an issue where a frame index
1337 // includes a displacement that, when added to the explicit displacement,
1338 // will overflow the displacement field. Assuming that the frame index
1339 // displacement fits into a 31-bit integer (which is only slightly more
1340 // aggressive than the current fundamental assumption that it fits into
1341 // a 32-bit integer), a 31-bit disp should always be safe.
1342 return isInt<31>(Val);
1343}
1344
1345bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1346 X86ISelAddressMode &AM) {
1347 // If there's no offset to fold, we don't need to do any work.
1348 if (Offset == 0)
1349 return false;
1350
1351 // Cannot combine ExternalSymbol displacements with integer offsets.
1352 if (AM.ES || AM.MCSym)
1353 return true;
1354
1355 int64_t Val = AM.Disp + Offset;
1356 CodeModel::Model M = TM.getCodeModel();
1357 if (Subtarget->is64Bit()) {
1358 if (!X86::isOffsetSuitableForCodeModel(Val, M,
1359 AM.hasSymbolicDisplacement()))
1360 return true;
1361 // In addition to the checks required for a register base, check that
1362 // we do not try to use an unsafe Disp with a frame index.
1363 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1364 !isDispSafeForFrameIndex(Val))
1365 return true;
1366 }
1367 AM.Disp = Val;
1368 return false;
1369
1370}
1371
1372bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1373 SDValue Address = N->getOperand(1);
1374
1375 // load gs:0 -> GS segment register.
1376 // load fs:0 -> FS segment register.
1377 //
1378 // This optimization is valid because the GNU TLS model defines that
1379 // gs:0 (or fs:0 on X86-64) contains its own address.
1380 // For more information see http://people.redhat.com/drepper/tls.pdf
1381 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1382 if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1383 !IndirectTlsSegRefs &&
1384 (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1385 Subtarget->isTargetFuchsia()))
1386 switch (N->getPointerInfo().getAddrSpace()) {
1387 case 256:
1388 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1389 return false;
1390 case 257:
1391 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1392 return false;
1393 // Address space 258 is not handled here, because it is not used to
1394 // address TLS areas.
1395 }
1396
1397 return true;
1398}
1399
1400/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1401/// mode. These wrap things that will resolve down into a symbol reference.
1402/// If no match is possible, this returns true, otherwise it returns false.
1403bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1404 // If the addressing mode already has a symbol as the displacement, we can
1405 // never match another symbol.
1406 if (AM.hasSymbolicDisplacement())
1407 return true;
1408
1409 bool IsRIPRelTLS = false;
1410 bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1411 if (IsRIPRel) {
1412 SDValue Val = N.getOperand(0);
1413 if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
1414 IsRIPRelTLS = true;
1415 }
1416
1417 // We can't use an addressing mode in the 64-bit large code model.
1418 // Global TLS addressing is an exception. In the medium code model,
1419 // we use can use a mode when RIP wrappers are present.
1420 // That signifies access to globals that are known to be "near",
1421 // such as the GOT itself.
1422 CodeModel::Model M = TM.getCodeModel();
1423 if (Subtarget->is64Bit() &&
1424 ((M == CodeModel::Large && !IsRIPRelTLS) ||
1425 (M == CodeModel::Medium && !IsRIPRel)))
1426 return true;
1427
1428 // Base and index reg must be 0 in order to use %rip as base.
1429 if (IsRIPRel && AM.hasBaseOrIndexReg())
1430 return true;
1431
1432 // Make a local copy in case we can't do this fold.
1433 X86ISelAddressMode Backup = AM;
1434
1435 int64_t Offset = 0;
1436 SDValue N0 = N.getOperand(0);
1437 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1438 AM.GV = G->getGlobal();
1439 AM.SymbolFlags = G->getTargetFlags();
1440 Offset = G->getOffset();
1441 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1442 AM.CP = CP->getConstVal();
1443 AM.Align = CP->getAlignment();
1444 AM.SymbolFlags = CP->getTargetFlags();
1445 Offset = CP->getOffset();
1446 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1447 AM.ES = S->getSymbol();
1448 AM.SymbolFlags = S->getTargetFlags();
1449 } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1450 AM.MCSym = S->getMCSymbol();
1451 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1452 AM.JT = J->getIndex();
1453 AM.SymbolFlags = J->getTargetFlags();
1454 } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1455 AM.BlockAddr = BA->getBlockAddress();
1456 AM.SymbolFlags = BA->getTargetFlags();
1457 Offset = BA->getOffset();
1458 } else
1459 llvm_unreachable("Unhandled symbol reference node.")::llvm::llvm_unreachable_internal("Unhandled symbol reference node."
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1459)
;
1460
1461 if (foldOffsetIntoAddress(Offset, AM)) {
1462 AM = Backup;
1463 return true;
1464 }
1465
1466 if (IsRIPRel)
1467 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1468
1469 // Commit the changes now that we know this fold is safe.
1470 return false;
1471}
1472
1473/// Add the specified node to the specified addressing mode, returning true if
1474/// it cannot be done. This just pattern matches for the addressing mode.
1475bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1476 if (matchAddressRecursively(N, AM, 0))
1477 return true;
1478
1479 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1480 // a smaller encoding and avoids a scaled-index.
1481 if (AM.Scale == 2 &&
1482 AM.BaseType == X86ISelAddressMode::RegBase &&
1483 AM.Base_Reg.getNode() == nullptr) {
1484 AM.Base_Reg = AM.IndexReg;
1485 AM.Scale = 1;
1486 }
1487
1488 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1489 // because it has a smaller encoding.
1490 // TODO: Which other code models can use this?
1491 switch (TM.getCodeModel()) {
1492 default: break;
1493 case CodeModel::Small:
1494 case CodeModel::Kernel:
1495 if (Subtarget->is64Bit() &&
1496 AM.Scale == 1 &&
1497 AM.BaseType == X86ISelAddressMode::RegBase &&
1498 AM.Base_Reg.getNode() == nullptr &&
1499 AM.IndexReg.getNode() == nullptr &&
1500 AM.SymbolFlags == X86II::MO_NO_FLAG &&
1501 AM.hasSymbolicDisplacement())
1502 AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1503 break;
1504 }
1505
1506 return false;
1507}
1508
1509bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
1510 unsigned Depth) {
1511 // Add an artificial use to this node so that we can keep track of
1512 // it if it gets CSE'd with a different node.
1513 HandleSDNode Handle(N);
1514
1515 X86ISelAddressMode Backup = AM;
1516 if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1517 !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1518 return false;
1519 AM = Backup;
1520
1521 // Try again after commuting the operands.
1522 if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1523 !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1524 return false;
1525 AM = Backup;
1526
1527 // If we couldn't fold both operands into the address at the same time,
1528 // see if we can just put each operand into a register and fold at least
1529 // the add.
1530 if (AM.BaseType == X86ISelAddressMode::RegBase &&
1531 !AM.Base_Reg.getNode() &&
1532 !AM.IndexReg.getNode()) {
1533 N = Handle.getValue();
1534 AM.Base_Reg = N.getOperand(0);
1535 AM.IndexReg = N.getOperand(1);
1536 AM.Scale = 1;
1537 return false;
1538 }
1539 N = Handle.getValue();
1540 return true;
1541}
1542
1543// Insert a node into the DAG at least before the Pos node's position. This
1544// will reposition the node as needed, and will assign it a node ID that is <=
1545// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1546// IDs! The selection DAG must no longer depend on their uniqueness when this
1547// is used.
1548static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1549 if (N->getNodeId() == -1 ||
1550 (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) >
1551 SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) {
1552 DAG.RepositionNode(Pos->getIterator(), N.getNode());
1553 // Mark Node as invalid for pruning as after this it may be a successor to a
1554 // selected node but otherwise be in the same position of Pos.
1555 // Conservatively mark it with the same -abs(Id) to assure node id
1556 // invariant is preserved.
1557 N->setNodeId(Pos->getNodeId());
1558 SelectionDAGISel::InvalidateNodeId(N.getNode());
1559 }
1560}
1561
1562// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1563// safe. This allows us to convert the shift and and into an h-register
1564// extract and a scaled index. Returns false if the simplification is
1565// performed.
1566static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
1567 uint64_t Mask,
1568 SDValue Shift, SDValue X,
1569 X86ISelAddressMode &AM) {
1570 if (Shift.getOpcode() != ISD::SRL ||
1571 !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1572 !Shift.hasOneUse())
1573 return true;
1574
1575 int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1576 if (ScaleLog <= 0 || ScaleLog >= 4 ||
1577 Mask != (0xffu << ScaleLog))
1578 return true;
1579
1580 MVT VT = N.getSimpleValueType();
1581 SDLoc DL(N);
1582 SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1583 SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1584 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1585 SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1586 SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1587 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1588
1589 // Insert the new nodes into the topological ordering. We must do this in
1590 // a valid topological ordering as nothing is going to go back and re-sort
1591 // these nodes. We continually insert before 'N' in sequence as this is
1592 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1593 // hierarchy left to express.
1594 insertDAGNode(DAG, N, Eight);
1595 insertDAGNode(DAG, N, Srl);
1596 insertDAGNode(DAG, N, NewMask);
1597 insertDAGNode(DAG, N, And);
1598 insertDAGNode(DAG, N, ShlCount);
1599 insertDAGNode(DAG, N, Shl);
1600 DAG.ReplaceAllUsesWith(N, Shl);
1601 DAG.RemoveDeadNode(N.getNode());
1602 AM.IndexReg = And;
1603 AM.Scale = (1 << ScaleLog);
1604 return false;
1605}
1606
1607// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1608// allows us to fold the shift into this addressing mode. Returns false if the
1609// transform succeeded.
1610static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
1611 X86ISelAddressMode &AM) {
1612 SDValue Shift = N.getOperand(0);
1613
1614 // Use a signed mask so that shifting right will insert sign bits. These
1615 // bits will be removed when we shift the result left so it doesn't matter
1616 // what we use. This might allow a smaller immediate encoding.
1617 int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
1618
1619 // If we have an any_extend feeding the AND, look through it to see if there
1620 // is a shift behind it. But only if the AND doesn't use the extended bits.
1621 // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
1622 bool FoundAnyExtend = false;
1623 if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
1624 Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
1625 isUInt<32>(Mask)) {
1626 FoundAnyExtend = true;
1627 Shift = Shift.getOperand(0);
1628 }
1629
1630 if (Shift.getOpcode() != ISD::SHL ||
1631 !isa<ConstantSDNode>(Shift.getOperand(1)))
1632 return true;
1633
1634 SDValue X = Shift.getOperand(0);
1635
1636 // Not likely to be profitable if either the AND or SHIFT node has more
1637 // than one use (unless all uses are for address computation). Besides,
1638 // isel mechanism requires their node ids to be reused.
1639 if (!N.hasOneUse() || !Shift.hasOneUse())
1640 return true;
1641
1642 // Verify that the shift amount is something we can fold.
1643 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1644 if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1645 return true;
1646
1647 MVT VT = N.getSimpleValueType();
1648 SDLoc DL(N);
1649 if (FoundAnyExtend) {
1650 SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
1651 insertDAGNode(DAG, N, NewX);
1652 X = NewX;
1653 }
1654
1655 SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1656 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1657 SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1658
1659 // Insert the new nodes into the topological ordering. We must do this in
1660 // a valid topological ordering as nothing is going to go back and re-sort
1661 // these nodes. We continually insert before 'N' in sequence as this is
1662 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1663 // hierarchy left to express.
1664 insertDAGNode(DAG, N, NewMask);
1665 insertDAGNode(DAG, N, NewAnd);
1666 insertDAGNode(DAG, N, NewShift);
1667 DAG.ReplaceAllUsesWith(N, NewShift);
1668 DAG.RemoveDeadNode(N.getNode());
1669
1670 AM.Scale = 1 << ShiftAmt;
1671 AM.IndexReg = NewAnd;
1672 return false;
1673}
1674
1675// Implement some heroics to detect shifts of masked values where the mask can
1676// be replaced by extending the shift and undoing that in the addressing mode
1677// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1678// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1679// the addressing mode. This results in code such as:
1680//
1681// int f(short *y, int *lookup_table) {
1682// ...
1683// return *y + lookup_table[*y >> 11];
1684// }
1685//
1686// Turning into:
1687// movzwl (%rdi), %eax
1688// movl %eax, %ecx
1689// shrl $11, %ecx
1690// addl (%rsi,%rcx,4), %eax
1691//
1692// Instead of:
1693// movzwl (%rdi), %eax
1694// movl %eax, %ecx
1695// shrl $9, %ecx
1696// andl $124, %rcx
1697// addl (%rsi,%rcx), %eax
1698//
1699// Note that this function assumes the mask is provided as a mask *after* the
1700// value is shifted. The input chain may or may not match that, but computing
1701// such a mask is trivial.
1702static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
1703 uint64_t Mask,
1704 SDValue Shift, SDValue X,
1705 X86ISelAddressMode &AM) {
1706 if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1707 !isa<ConstantSDNode>(Shift.getOperand(1)))
1708 return true;
1709
1710 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1711 unsigned MaskLZ = countLeadingZeros(Mask);
1712 unsigned MaskTZ = countTrailingZeros(Mask);
1713
1714 // The amount of shift we're trying to fit into the addressing mode is taken
1715 // from the trailing zeros of the mask.
1716 unsigned AMShiftAmt = MaskTZ;
1717
1718 // There is nothing we can do here unless the mask is removing some bits.
1719 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1720 if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1721
1722 // We also need to ensure that mask is a continuous run of bits.
1723 if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1724
1725 // Scale the leading zero count down based on the actual size of the value.
1726 // Also scale it down based on the size of the shift.
1727 unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1728 if (MaskLZ < ScaleDown)
1729 return true;
1730 MaskLZ -= ScaleDown;
1731
1732 // The final check is to ensure that any masked out high bits of X are
1733 // already known to be zero. Otherwise, the mask has a semantic impact
1734 // other than masking out a couple of low bits. Unfortunately, because of
1735 // the mask, zero extensions will be removed from operands in some cases.
1736 // This code works extra hard to look through extensions because we can
1737 // replace them with zero extensions cheaply if necessary.
1738 bool ReplacingAnyExtend = false;
1739 if (X.getOpcode() == ISD::ANY_EXTEND) {
1740 unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1741 X.getOperand(0).getSimpleValueType().getSizeInBits();
1742 // Assume that we'll replace the any-extend with a zero-extend, and
1743 // narrow the search to the extended value.
1744 X = X.getOperand(0);
1745 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1746 ReplacingAnyExtend = true;
1747 }
1748 APInt MaskedHighBits =
1749 APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
1750 KnownBits Known = DAG.computeKnownBits(X);
1751 if (MaskedHighBits != Known.Zero) return true;
1752
1753 // We've identified a pattern that can be transformed into a single shift
1754 // and an addressing mode. Make it so.
1755 MVT VT = N.getSimpleValueType();
1756 if (ReplacingAnyExtend) {
1757 assert(X.getValueType() != VT)((X.getValueType() != VT) ? static_cast<void> (0) : __assert_fail
("X.getValueType() != VT", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1757, __PRETTY_FUNCTION__))
;
1758 // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1759 SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1760 insertDAGNode(DAG, N, NewX);
1761 X = NewX;
1762 }
1763 SDLoc DL(N);
1764 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1765 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1766 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1767 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1768
1769 // Insert the new nodes into the topological ordering. We must do this in
1770 // a valid topological ordering as nothing is going to go back and re-sort
1771 // these nodes. We continually insert before 'N' in sequence as this is
1772 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1773 // hierarchy left to express.
1774 insertDAGNode(DAG, N, NewSRLAmt);
1775 insertDAGNode(DAG, N, NewSRL);
1776 insertDAGNode(DAG, N, NewSHLAmt);
1777 insertDAGNode(DAG, N, NewSHL);
1778 DAG.ReplaceAllUsesWith(N, NewSHL);
1779 DAG.RemoveDeadNode(N.getNode());
1780
1781 AM.Scale = 1 << AMShiftAmt;
1782 AM.IndexReg = NewSRL;
1783 return false;
1784}
1785
1786// Transform "(X >> SHIFT) & (MASK << C1)" to
1787// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1788// matched to a BEXTR later. Returns false if the simplification is performed.
1789static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
1790 uint64_t Mask,
1791 SDValue Shift, SDValue X,
1792 X86ISelAddressMode &AM,
1793 const X86Subtarget &Subtarget) {
1794 if (Shift.getOpcode() != ISD::SRL ||
1795 !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1796 !Shift.hasOneUse() || !N.hasOneUse())
1797 return true;
1798
1799 // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1800 if (!Subtarget.hasTBM() &&
1801 !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1802 return true;
1803
1804 // We need to ensure that mask is a continuous run of bits.
1805 if (!isShiftedMask_64(Mask)) return true;
1806
1807 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1808
1809 // The amount of shift we're trying to fit into the addressing mode is taken
1810 // from the trailing zeros of the mask.
1811 unsigned AMShiftAmt = countTrailingZeros(Mask);
1812
1813 // There is nothing we can do here unless the mask is removing some bits.
1814 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1815 if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1816
1817 MVT VT = N.getSimpleValueType();
1818 SDLoc DL(N);
1819 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1820 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1821 SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1822 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1823 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1824 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1825
1826 // Insert the new nodes into the topological ordering. We must do this in
1827 // a valid topological ordering as nothing is going to go back and re-sort
1828 // these nodes. We continually insert before 'N' in sequence as this is
1829 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1830 // hierarchy left to express.
1831 insertDAGNode(DAG, N, NewSRLAmt);
1832 insertDAGNode(DAG, N, NewSRL);
1833 insertDAGNode(DAG, N, NewMask);
1834 insertDAGNode(DAG, N, NewAnd);
1835 insertDAGNode(DAG, N, NewSHLAmt);
1836 insertDAGNode(DAG, N, NewSHL);
1837 DAG.ReplaceAllUsesWith(N, NewSHL);
1838 DAG.RemoveDeadNode(N.getNode());
1839
1840 AM.Scale = 1 << AMShiftAmt;
1841 AM.IndexReg = NewAnd;
1842 return false;
1843}
1844
1845bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1846 unsigned Depth) {
1847 SDLoc dl(N);
1848 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG
); }; } } while (false)
1849 dbgs() << "MatchAddress: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG
); }; } } while (false)
1850 AM.dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG
); }; } } while (false)
1851 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { { dbgs() << "MatchAddress: "; AM.dump(CurDAG
); }; } } while (false)
;
1852 // Limit recursion.
1853 if (Depth > 5)
1854 return matchAddressBase(N, AM);
1855
1856 // If this is already a %rip relative address, we can only merge immediates
1857 // into it. Instead of handling this in every case, we handle it here.
1858 // RIP relative addressing: %rip + 32-bit displacement!
1859 if (AM.isRIPRelative()) {
1860 // FIXME: JumpTable and ExternalSymbol address currently don't like
1861 // displacements. It isn't very important, but this should be fixed for
1862 // consistency.
1863 if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1864 return true;
1865
1866 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1867 if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1868 return false;
1869 return true;
1870 }
1871
1872 switch (N.getOpcode()) {
1873 default: break;
1874 case ISD::LOCAL_RECOVER: {
1875 if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1876 if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1877 // Use the symbol and don't prefix it.
1878 AM.MCSym = ESNode->getMCSymbol();
1879 return false;
1880 }
1881 break;
1882 }
1883 case ISD::Constant: {
1884 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1885 if (!foldOffsetIntoAddress(Val, AM))
1886 return false;
1887 break;
1888 }
1889
1890 case X86ISD::Wrapper:
1891 case X86ISD::WrapperRIP:
1892 if (!matchWrapper(N, AM))
1893 return false;
1894 break;
1895
1896 case ISD::LOAD:
1897 if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1898 return false;
1899 break;
1900
1901 case ISD::FrameIndex:
1902 if (AM.BaseType == X86ISelAddressMode::RegBase &&
1903 AM.Base_Reg.getNode() == nullptr &&
1904 (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1905 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1906 AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1907 return false;
1908 }
1909 break;
1910
1911 case ISD::SHL:
1912 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1913 break;
1914
1915 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1916 unsigned Val = CN->getZExtValue();
1917 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1918 // that the base operand remains free for further matching. If
1919 // the base doesn't end up getting used, a post-processing step
1920 // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1921 if (Val == 1 || Val == 2 || Val == 3) {
1922 AM.Scale = 1 << Val;
1923 SDValue ShVal = N.getOperand(0);
1924
1925 // Okay, we know that we have a scale by now. However, if the scaled
1926 // value is an add of something and a constant, we can fold the
1927 // constant into the disp field here.
1928 if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1929 AM.IndexReg = ShVal.getOperand(0);
1930 ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1931 uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1932 if (!foldOffsetIntoAddress(Disp, AM))
1933 return false;
1934 }
1935
1936 AM.IndexReg = ShVal;
1937 return false;
1938 }
1939 }
1940 break;
1941
1942 case ISD::SRL: {
1943 // Scale must not be used already.
1944 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1945
1946 // We only handle up to 64-bit values here as those are what matter for
1947 // addressing mode optimizations.
1948 assert(N.getSimpleValueType().getSizeInBits() <= 64 &&((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!"
) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1949, __PRETTY_FUNCTION__))
1949 "Unexpected value size!")((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!"
) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 1949, __PRETTY_FUNCTION__))
;
1950
1951 SDValue And = N.getOperand(0);
1952 if (And.getOpcode() != ISD::AND) break;
1953 SDValue X = And.getOperand(0);
1954
1955 // The mask used for the transform is expected to be post-shift, but we
1956 // found the shift first so just apply the shift to the mask before passing
1957 // it down.
1958 if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1959 !isa<ConstantSDNode>(And.getOperand(1)))
1960 break;
1961 uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1962
1963 // Try to fold the mask and shift into the scale, and return false if we
1964 // succeed.
1965 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1966 return false;
1967 break;
1968 }
1969
1970 case ISD::SMUL_LOHI:
1971 case ISD::UMUL_LOHI:
1972 // A mul_lohi where we need the low part can be folded as a plain multiply.
1973 if (N.getResNo() != 0) break;
1974 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1975 case ISD::MUL:
1976 case X86ISD::MUL_IMM:
1977 // X*[3,5,9] -> X+X*[2,4,8]
1978 if (AM.BaseType == X86ISelAddressMode::RegBase &&
1979 AM.Base_Reg.getNode() == nullptr &&
1980 AM.IndexReg.getNode() == nullptr) {
1981 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1982 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1983 CN->getZExtValue() == 9) {
1984 AM.Scale = unsigned(CN->getZExtValue())-1;
1985
1986 SDValue MulVal = N.getOperand(0);
1987 SDValue Reg;
1988
1989 // Okay, we know that we have a scale by now. However, if the scaled
1990 // value is an add of something and a constant, we can fold the
1991 // constant into the disp field here.
1992 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1993 isa<ConstantSDNode>(MulVal.getOperand(1))) {
1994 Reg = MulVal.getOperand(0);
1995 ConstantSDNode *AddVal =
1996 cast<ConstantSDNode>(MulVal.getOperand(1));
1997 uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1998 if (foldOffsetIntoAddress(Disp, AM))
1999 Reg = N.getOperand(0);
2000 } else {
2001 Reg = N.getOperand(0);
2002 }
2003
2004 AM.IndexReg = AM.Base_Reg = Reg;
2005 return false;
2006 }
2007 }
2008 break;
2009
2010 case ISD::SUB: {
2011 // Given A-B, if A can be completely folded into the address and
2012 // the index field with the index field unused, use -B as the index.
2013 // This is a win if a has multiple parts that can be folded into
2014 // the address. Also, this saves a mov if the base register has
2015 // other uses, since it avoids a two-address sub instruction, however
2016 // it costs an additional mov if the index register has other uses.
2017
2018 // Add an artificial use to this node so that we can keep track of
2019 // it if it gets CSE'd with a different node.
2020 HandleSDNode Handle(N);
2021
2022 // Test if the LHS of the sub can be folded.
2023 X86ISelAddressMode Backup = AM;
2024 if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
2025 N = Handle.getValue();
2026 AM = Backup;
2027 break;
2028 }
2029 N = Handle.getValue();
2030 // Test if the index field is free for use.
2031 if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2032 AM = Backup;
2033 break;
2034 }
2035
2036 int Cost = 0;
2037 SDValue RHS = N.getOperand(1);
2038 // If the RHS involves a register with multiple uses, this
2039 // transformation incurs an extra mov, due to the neg instruction
2040 // clobbering its operand.
2041 if (!RHS.getNode()->hasOneUse() ||
2042 RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
2043 RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
2044 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
2045 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
2046 RHS.getOperand(0).getValueType() == MVT::i32))
2047 ++Cost;
2048 // If the base is a register with multiple uses, this
2049 // transformation may save a mov.
2050 if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2051 !AM.Base_Reg.getNode()->hasOneUse()) ||
2052 AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2053 --Cost;
2054 // If the folded LHS was interesting, this transformation saves
2055 // address arithmetic.
2056 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2057 ((AM.Disp != 0) && (Backup.Disp == 0)) +
2058 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2059 --Cost;
2060 // If it doesn't look like it may be an overall win, don't do it.
2061 if (Cost >= 0) {
2062 AM = Backup;
2063 break;
2064 }
2065
2066 // Ok, the transformation is legal and appears profitable. Go for it.
2067 // Negation will be emitted later to avoid creating dangling nodes if this
2068 // was an unprofitable LEA.
2069 AM.IndexReg = RHS;
2070 AM.NegateIndex = true;
2071 AM.Scale = 1;
2072 return false;
2073 }
2074
2075 case ISD::ADD:
2076 if (!matchAdd(N, AM, Depth))
2077 return false;
2078 break;
2079
2080 case ISD::OR:
2081 // We want to look through a transform in InstCombine and DAGCombiner that
2082 // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
2083 // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
2084 // An 'lea' can then be used to match the shift (multiply) and add:
2085 // and $1, %esi
2086 // lea (%rsi, %rdi, 8), %rax
2087 if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
2088 !matchAdd(N, AM, Depth))
2089 return false;
2090 break;
2091
2092 case ISD::AND: {
2093 // Perform some heroic transforms on an and of a constant-count shift
2094 // with a constant to enable use of the scaled offset field.
2095
2096 // Scale must not be used already.
2097 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2098
2099 // We only handle up to 64-bit values here as those are what matter for
2100 // addressing mode optimizations.
2101 assert(N.getSimpleValueType().getSizeInBits() <= 64 &&((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!"
) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2102, __PRETTY_FUNCTION__))
2102 "Unexpected value size!")((N.getSimpleValueType().getSizeInBits() <= 64 && "Unexpected value size!"
) ? static_cast<void> (0) : __assert_fail ("N.getSimpleValueType().getSizeInBits() <= 64 && \"Unexpected value size!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2102, __PRETTY_FUNCTION__))
;
2103
2104 if (!isa<ConstantSDNode>(N.getOperand(1)))
2105 break;
2106
2107 if (N.getOperand(0).getOpcode() == ISD::SRL) {
2108 SDValue Shift = N.getOperand(0);
2109 SDValue X = Shift.getOperand(0);
2110
2111 uint64_t Mask = N.getConstantOperandVal(1);
2112
2113 // Try to fold the mask and shift into an extract and scale.
2114 if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
2115 return false;
2116
2117 // Try to fold the mask and shift directly into the scale.
2118 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
2119 return false;
2120
2121 // Try to fold the mask and shift into BEXTR and scale.
2122 if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
2123 return false;
2124 }
2125
2126 // Try to swap the mask and shift to place shifts which can be done as
2127 // a scale on the outside of the mask.
2128 if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
2129 return false;
2130
2131 break;
2132 }
2133 case ISD::ZERO_EXTEND: {
2134 // Try to widen a zexted shift left to the same size as its use, so we can
2135 // match the shift as a scale factor.
2136 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2137 break;
2138 if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
2139 break;
2140
2141 // Give up if the shift is not a valid scale factor [1,2,3].
2142 SDValue Shl = N.getOperand(0);
2143 auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
2144 if (!ShAmtC || ShAmtC->getZExtValue() > 3)
2145 break;
2146
2147 // The narrow shift must only shift out zero bits (it must be 'nuw').
2148 // That makes it safe to widen to the destination type.
2149 APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(),
2150 ShAmtC->getZExtValue());
2151 if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
2152 break;
2153
2154 // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
2155 MVT VT = N.getSimpleValueType();
2156 SDLoc DL(N);
2157 SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
2158 SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
2159
2160 // Convert the shift to scale factor.
2161 AM.Scale = 1 << ShAmtC->getZExtValue();
2162 AM.IndexReg = Zext;
2163
2164 insertDAGNode(*CurDAG, N, Zext);
2165 insertDAGNode(*CurDAG, N, NewShl);
2166 CurDAG->ReplaceAllUsesWith(N, NewShl);
2167 CurDAG->RemoveDeadNode(N.getNode());
2168 return false;
2169 }
2170 }
2171
2172 return matchAddressBase(N, AM);
2173}
2174
2175/// Helper for MatchAddress. Add the specified node to the
2176/// specified addressing mode without any further recursion.
2177bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
2178 // Is the base register already occupied?
2179 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2180 // If so, check to see if the scale index register is set.
2181 if (!AM.IndexReg.getNode()) {
2182 AM.IndexReg = N;
2183 AM.Scale = 1;
2184 return false;
2185 }
2186
2187 // Otherwise, we cannot select it.
2188 return true;
2189 }
2190
2191 // Default, generate it as a register.
2192 AM.BaseType = X86ISelAddressMode::RegBase;
2193 AM.Base_Reg = N;
2194 return false;
2195}
2196
2197/// Helper for selectVectorAddr. Handles things that can be folded into a
2198/// gather scatter address. The index register and scale should have already
2199/// been handled.
2200bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
2201 // TODO: Support other operations.
2202 switch (N.getOpcode()) {
2203 case ISD::Constant: {
2204 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2205 if (!foldOffsetIntoAddress(Val, AM))
2206 return false;
2207 break;
2208 }
2209 case X86ISD::Wrapper:
2210 if (!matchWrapper(N, AM))
2211 return false;
2212 break;
2213 }
2214
2215 return matchAddressBase(N, AM);
2216}
2217
2218bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
2219 SDValue &Scale, SDValue &Index,
2220 SDValue &Disp, SDValue &Segment) {
2221 X86ISelAddressMode AM;
2222 auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
2223 AM.IndexReg = Mgs->getIndex();
2224 AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
2225
2226 unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2227 // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2228 if (AddrSpace == 256)
2229 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2230 if (AddrSpace == 257)
2231 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2232 if (AddrSpace == 258)
2233 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2234
2235 SDLoc DL(N);
2236 MVT VT = N.getSimpleValueType();
2237
2238 // Try to match into the base and displacement fields.
2239 if (matchVectorAddress(N, AM))
2240 return false;
2241
2242 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2243 return true;
2244}
2245
2246/// Returns true if it is able to pattern match an addressing mode.
2247/// It returns the operands which make up the maximal addressing mode it can
2248/// match by reference.
2249///
2250/// Parent is the parent node of the addr operand that is being matched. It
2251/// is always a load, store, atomic node, or null. It is only null when
2252/// checking memory operands for inline asm nodes.
2253bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2254 SDValue &Scale, SDValue &Index,
2255 SDValue &Disp, SDValue &Segment) {
2256 X86ISelAddressMode AM;
2257
2258 if (Parent &&
2259 // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2260 // that are not a MemSDNode, and thus don't have proper addrspace info.
2261 Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2262 Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2263 Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2264 Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
2265 Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
2266 Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2267 Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2268 unsigned AddrSpace =
2269 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2270 // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2271 if (AddrSpace == 256)
2272 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2273 if (AddrSpace == 257)
2274 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2275 if (AddrSpace == 258)
2276 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2277 }
2278
2279 // Save the DL and VT before calling matchAddress, it can invalidate N.
2280 SDLoc DL(N);
2281 MVT VT = N.getSimpleValueType();
2282
2283 if (matchAddress(N, AM))
2284 return false;
2285
2286 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2287 return true;
2288}
2289
2290// We can only fold a load if all nodes between it and the root node have a
2291// single use. If there are additional uses, we could end up duplicating the
2292// load.
2293static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
2294 while (User != Root) {
2295 if (!User->hasOneUse())
2296 return false;
2297 User = *User->use_begin();
2298 }
2299
2300 return true;
2301}
2302
2303/// Match a scalar SSE load. In particular, we want to match a load whose top
2304/// elements are either undef or zeros. The load flavor is derived from the
2305/// type of N, which is either v4f32 or v2f64.
2306///
2307/// We also return:
2308/// PatternChainNode: this is the matched node that has a chain input and
2309/// output.
2310bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
2311 SDValue N, SDValue &Base,
2312 SDValue &Scale, SDValue &Index,
2313 SDValue &Disp, SDValue &Segment,
2314 SDValue &PatternNodeWithChain) {
2315 if (!hasSingleUsesFromRoot(Root, Parent))
2316 return false;
2317
2318 // We can allow a full vector load here since narrowing a load is ok unless
2319 // it's volatile or atomic.
2320 if (ISD::isNON_EXTLoad(N.getNode())) {
2321 LoadSDNode *LD = cast<LoadSDNode>(N);
2322 if (LD->isSimple() &&
2323 IsProfitableToFold(N, LD, Root) &&
2324 IsLegalToFold(N, Parent, Root, OptLevel)) {
2325 PatternNodeWithChain = N;
2326 return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2327 Segment);
2328 }
2329 }
2330
2331 // We can also match the special zero extended load opcode.
2332 if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
2333 PatternNodeWithChain = N;
2334 if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2335 IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2336 auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
2337 return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
2338 Segment);
2339 }
2340 }
2341
2342 // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
2343 // once. Otherwise the load might get duplicated and the chain output of the
2344 // duplicate load will not be observed by all dependencies.
2345 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
2346 PatternNodeWithChain = N.getOperand(0);
2347 if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2348 IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2349 IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2350 LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2351 return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2352 Segment);
2353 }
2354 }
2355
2356 return false;
2357}
2358
2359
2360bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2361 if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2362 uint64_t ImmVal = CN->getZExtValue();
2363 if (!isUInt<32>(ImmVal))
2364 return false;
2365
2366 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
2367 return true;
2368 }
2369
2370 // In static codegen with small code model, we can get the address of a label
2371 // into a register with 'movl'
2372 if (N->getOpcode() != X86ISD::Wrapper)
2373 return false;
2374
2375 N = N.getOperand(0);
2376
2377 // At least GNU as does not accept 'movl' for TPOFF relocations.
2378 // FIXME: We could use 'movl' when we know we are targeting MC.
2379 if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
2380 return false;
2381
2382 Imm = N;
2383 if (N->getOpcode() != ISD::TargetGlobalAddress)
2384 return TM.getCodeModel() == CodeModel::Small;
2385
2386 Optional<ConstantRange> CR =
2387 cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
2388 if (!CR)
2389 return TM.getCodeModel() == CodeModel::Small;
2390
2391 return CR->getUnsignedMax().ult(1ull << 32);
2392}
2393
2394bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
2395 SDValue &Scale, SDValue &Index,
2396 SDValue &Disp, SDValue &Segment) {
2397 // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
2398 SDLoc DL(N);
2399
2400 if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
2401 return false;
2402
2403 RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
2404 if (RN && RN->getReg() == 0)
2405 Base = CurDAG->getRegister(0, MVT::i64);
2406 else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
2407 // Base could already be %rip, particularly in the x32 ABI.
2408 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2409 MVT::i64), 0);
2410 Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2411 Base);
2412 }
2413
2414 RN = dyn_cast<RegisterSDNode>(Index);
2415 if (RN && RN->getReg() == 0)
2416 Index = CurDAG->getRegister(0, MVT::i64);
2417 else {
2418 assert(Index.getValueType() == MVT::i32 &&((Index.getValueType() == MVT::i32 && "Expect to be extending 32-bit registers for use in LEA"
) ? static_cast<void> (0) : __assert_fail ("Index.getValueType() == MVT::i32 && \"Expect to be extending 32-bit registers for use in LEA\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2419, __PRETTY_FUNCTION__))
2419 "Expect to be extending 32-bit registers for use in LEA")((Index.getValueType() == MVT::i32 && "Expect to be extending 32-bit registers for use in LEA"
) ? static_cast<void> (0) : __assert_fail ("Index.getValueType() == MVT::i32 && \"Expect to be extending 32-bit registers for use in LEA\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2419, __PRETTY_FUNCTION__))
;
2420 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2421 MVT::i64), 0);
2422 Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2423 Index);
2424 }
2425
2426 return true;
2427}
2428
2429/// Calls SelectAddr and determines if the maximal addressing
2430/// mode it matches can be cost effectively emitted as an LEA instruction.
2431bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2432 SDValue &Base, SDValue &Scale,
2433 SDValue &Index, SDValue &Disp,
2434 SDValue &Segment) {
2435 X86ISelAddressMode AM;
2436
2437 // Save the DL and VT before calling matchAddress, it can invalidate N.
2438 SDLoc DL(N);
2439 MVT VT = N.getSimpleValueType();
2440
2441 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2442 // segments.
2443 SDValue Copy = AM.Segment;
2444 SDValue T = CurDAG->getRegister(0, MVT::i32);
2445 AM.Segment = T;
2446 if (matchAddress(N, AM))
2447 return false;
2448 assert (T == AM.Segment)((T == AM.Segment) ? static_cast<void> (0) : __assert_fail
("T == AM.Segment", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2448, __PRETTY_FUNCTION__))
;
2449 AM.Segment = Copy;
2450
2451 unsigned Complexity = 0;
2452 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
2453 Complexity = 1;
2454 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2455 Complexity = 4;
2456
2457 if (AM.IndexReg.getNode())
2458 Complexity++;
2459
2460 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2461 // a simple shift.
2462 if (AM.Scale > 1)
2463 Complexity++;
2464
2465 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2466 // to a LEA. This is determined with some experimentation but is by no means
2467 // optimal (especially for code size consideration). LEA is nice because of
2468 // its three-address nature. Tweak the cost function again when we can run
2469 // convertToThreeAddress() at register allocation time.
2470 if (AM.hasSymbolicDisplacement()) {
2471 // For X86-64, always use LEA to materialize RIP-relative addresses.
2472 if (Subtarget->is64Bit())
2473 Complexity = 4;
2474 else
2475 Complexity += 2;
2476 }
2477
2478 // Heuristic: try harder to form an LEA from ADD if the operands set flags.
2479 // Unlike ADD, LEA does not affect flags, so we will be less likely to require
2480 // duplicating flag-producing instructions later in the pipeline.
2481 if (N.getOpcode() == ISD::ADD) {
2482 auto isMathWithFlags = [](SDValue V) {
2483 switch (V.getOpcode()) {
2484 case X86ISD::ADD:
2485 case X86ISD::SUB:
2486 case X86ISD::ADC:
2487 case X86ISD::SBB:
2488 /* TODO: These opcodes can be added safely, but we may want to justify
2489 their inclusion for different reasons (better for reg-alloc).
2490 case X86ISD::SMUL:
2491 case X86ISD::UMUL:
2492 case X86ISD::OR:
2493 case X86ISD::XOR:
2494 case X86ISD::AND:
2495 */
2496 // Value 1 is the flag output of the node - verify it's not dead.
2497 return !SDValue(V.getNode(), 1).use_empty();
2498 default:
2499 return false;
2500 }
2501 };
2502 // TODO: This could be an 'or' rather than 'and' to make the transform more
2503 // likely to happen. We might want to factor in whether there's a
2504 // load folding opportunity for the math op that disappears with LEA.
2505 if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
2506 Complexity++;
2507 }
2508
2509 if (AM.Disp)
2510 Complexity++;
2511
2512 // If it isn't worth using an LEA, reject it.
2513 if (Complexity <= 2)
2514 return false;
2515
2516 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2517 return true;
2518}
2519
2520/// This is only run on TargetGlobalTLSAddress nodes.
2521bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2522 SDValue &Scale, SDValue &Index,
2523 SDValue &Disp, SDValue &Segment) {
2524 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress)((N.getOpcode() == ISD::TargetGlobalTLSAddress) ? static_cast
<void> (0) : __assert_fail ("N.getOpcode() == ISD::TargetGlobalTLSAddress"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2524, __PRETTY_FUNCTION__))
;
2525 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2526
2527 X86ISelAddressMode AM;
2528 AM.GV = GA->getGlobal();
2529 AM.Disp += GA->getOffset();
2530 AM.SymbolFlags = GA->getTargetFlags();
2531
2532 MVT VT = N.getSimpleValueType();
2533 if (VT == MVT::i32) {
2534 AM.Scale = 1;
2535 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2536 }
2537
2538 getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
2539 return true;
2540}
2541
2542bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2543 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
1
Calling 'dyn_cast<llvm::ConstantSDNode, llvm::SDValue>'
16
Returning from 'dyn_cast<llvm::ConstantSDNode, llvm::SDValue>'
17
Assuming 'CN' is null
18
Taking false branch
2544 Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2545 N.getValueType());
2546 return true;
2547 }
2548
2549 // Keep track of the original value type and whether this value was
2550 // truncated. If we see a truncation from pointer type to VT that truncates
2551 // bits that are known to be zero, we can use a narrow reference.
2552 EVT VT = N.getValueType();
19
Calling 'SDValue::getValueType'
2553 bool WasTruncated = false;
2554 if (N.getOpcode() == ISD::TRUNCATE) {
2555 WasTruncated = true;
2556 N = N.getOperand(0);
2557 }
2558
2559 if (N.getOpcode() != X86ISD::Wrapper)
2560 return false;
2561
2562 // We can only use non-GlobalValues as immediates if they were not truncated,
2563 // as we do not have any range information. If we have a GlobalValue and the
2564 // address was not truncated, we can select it as an operand directly.
2565 unsigned Opc = N.getOperand(0)->getOpcode();
2566 if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2567 Op = N.getOperand(0);
2568 // We can only select the operand directly if we didn't have to look past a
2569 // truncate.
2570 return !WasTruncated;
2571 }
2572
2573 // Check that the global's range fits into VT.
2574 auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2575 Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2576 if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2577 return false;
2578
2579 // Okay, we can use a narrow reference.
2580 Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2581 GA->getOffset(), GA->getTargetFlags());
2582 return true;
2583}
2584
2585bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2586 SDValue &Base, SDValue &Scale,
2587 SDValue &Index, SDValue &Disp,
2588 SDValue &Segment) {
2589 assert(Root && P && "Unknown root/parent nodes")((Root && P && "Unknown root/parent nodes") ?
static_cast<void> (0) : __assert_fail ("Root && P && \"Unknown root/parent nodes\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2589, __PRETTY_FUNCTION__))
;
2590 if (!ISD::isNON_EXTLoad(N.getNode()) ||
2591 !IsProfitableToFold(N, P, Root) ||
2592 !IsLegalToFold(N, P, Root, OptLevel))
2593 return false;
2594
2595 return selectAddr(N.getNode(),
2596 N.getOperand(1), Base, Scale, Index, Disp, Segment);
2597}
2598
2599bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
2600 SDValue &Base, SDValue &Scale,
2601 SDValue &Index, SDValue &Disp,
2602 SDValue &Segment) {
2603 assert(Root && P && "Unknown root/parent nodes")((Root && P && "Unknown root/parent nodes") ?
static_cast<void> (0) : __assert_fail ("Root && P && \"Unknown root/parent nodes\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2603, __PRETTY_FUNCTION__))
;
2604 if (N->getOpcode() != X86ISD::VBROADCAST_LOAD ||
2605 !IsProfitableToFold(N, P, Root) ||
2606 !IsLegalToFold(N, P, Root, OptLevel))
2607 return false;
2608
2609 return selectAddr(N.getNode(),
2610 N.getOperand(1), Base, Scale, Index, Disp, Segment);
2611}
2612
2613/// Return an SDNode that returns the value of the global base register.
2614/// Output instructions required to initialize the global base register,
2615/// if necessary.
2616SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2617 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2618 auto &DL = MF->getDataLayout();
2619 return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2620}
2621
2622bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2623 if (N->getOpcode() == ISD::TRUNCATE)
2624 N = N->getOperand(0).getNode();
2625 if (N->getOpcode() != X86ISD::Wrapper)
2626 return false;
2627
2628 auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2629 if (!GA)
2630 return false;
2631
2632 Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2633 return CR && CR->getSignedMin().sge(-1ull << Width) &&
2634 CR->getSignedMax().slt(1ull << Width);
2635}
2636
2637static X86::CondCode getCondFromNode(SDNode *N) {
2638 assert(N->isMachineOpcode() && "Unexpected node")((N->isMachineOpcode() && "Unexpected node") ? static_cast
<void> (0) : __assert_fail ("N->isMachineOpcode() && \"Unexpected node\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 2638, __PRETTY_FUNCTION__))
;
2639 X86::CondCode CC = X86::COND_INVALID;
2640 unsigned Opc = N->getMachineOpcode();
2641 if (Opc == X86::JCC_1)
2642 CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
2643 else if (Opc == X86::SETCCr)
2644 CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
2645 else if (Opc == X86::SETCCm)
2646 CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
2647 else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
2648 Opc == X86::CMOV64rr)
2649 CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
2650 else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
2651 Opc == X86::CMOV64rm)
2652 CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
2653
2654 return CC;
2655}
2656
2657/// Test whether the given X86ISD::CMP node has any users that use a flag
2658/// other than ZF.
2659bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
2660 // Examine each user of the node.
2661 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2662 UI != UE; ++UI) {
2663 // Only check things that use the flags.
2664 if (UI.getUse().getResNo() != Flags.getResNo())
2665 continue;
2666 // Only examine CopyToReg uses that copy to EFLAGS.
2667 if (UI->getOpcode() != ISD::CopyToReg ||
2668 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2669 return false;
2670 // Examine each user of the CopyToReg use.
2671 for (SDNode::use_iterator FlagUI = UI->use_begin(),
2672 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2673 // Only examine the Flag result.
2674 if (FlagUI.getUse().getResNo() != 1) continue;
2675 // Anything unusual: assume conservatively.
2676 if (!FlagUI->isMachineOpcode()) return false;
2677 // Examine the condition code of the user.
2678 X86::CondCode CC = getCondFromNode(*FlagUI);
2679
2680 switch (CC) {
2681 // Comparisons which only use the zero flag.
2682 case X86::COND_E: case X86::COND_NE:
2683 continue;
2684 // Anything else: assume conservatively.
2685 default:
2686 return false;
2687 }
2688 }
2689 }
2690 return true;
2691}
2692
2693/// Test whether the given X86ISD::CMP node has any uses which require the SF
2694/// flag to be accurate.
2695bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
2696 // Examine each user of the node.
2697 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2698 UI != UE; ++UI) {
2699 // Only check things that use the flags.
2700 if (UI.getUse().getResNo() != Flags.getResNo())
2701 continue;
2702 // Only examine CopyToReg uses that copy to EFLAGS.
2703 if (UI->getOpcode() != ISD::CopyToReg ||
2704 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2705 return false;
2706 // Examine each user of the CopyToReg use.
2707 for (SDNode::use_iterator FlagUI = UI->use_begin(),
2708 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2709 // Only examine the Flag result.
2710 if (FlagUI.getUse().getResNo() != 1) continue;
2711 // Anything unusual: assume conservatively.
2712 if (!FlagUI->isMachineOpcode()) return false;
2713 // Examine the condition code of the user.
2714 X86::CondCode CC = getCondFromNode(*FlagUI);
2715
2716 switch (CC) {
2717 // Comparisons which don't examine the SF flag.
2718 case X86::COND_A: case X86::COND_AE:
2719 case X86::COND_B: case X86::COND_BE:
2720 case X86::COND_E: case X86::COND_NE:
2721 case X86::COND_O: case X86::COND_NO:
2722 case X86::COND_P: case X86::COND_NP:
2723 continue;
2724 // Anything else: assume conservatively.
2725 default:
2726 return false;
2727 }
2728 }
2729 }
2730 return true;
2731}
2732
2733static bool mayUseCarryFlag(X86::CondCode CC) {
2734 switch (CC) {
2735 // Comparisons which don't examine the CF flag.
2736 case X86::COND_O: case X86::COND_NO:
2737 case X86::COND_E: case X86::COND_NE:
2738 case X86::COND_S: case X86::COND_NS:
2739 case X86::COND_P: case X86::COND_NP:
2740 case X86::COND_L: case X86::COND_GE:
2741 case X86::COND_G: case X86::COND_LE:
2742 return false;
2743 // Anything else: assume conservatively.
2744 default:
2745 return true;
2746 }
2747}
2748
2749/// Test whether the given node which sets flags has any uses which require the
2750/// CF flag to be accurate.
2751 bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
2752 // Examine each user of the node.
2753 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2754 UI != UE; ++UI) {
2755 // Only check things that use the flags.
2756 if (UI.getUse().getResNo() != Flags.getResNo())
2757 continue;
2758
2759 unsigned UIOpc = UI->getOpcode();
2760
2761 if (UIOpc == ISD::CopyToReg) {
2762 // Only examine CopyToReg uses that copy to EFLAGS.
2763 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2764 return false;
2765 // Examine each user of the CopyToReg use.
2766 for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2767 FlagUI != FlagUE; ++FlagUI) {
2768 // Only examine the Flag result.
2769 if (FlagUI.getUse().getResNo() != 1)
2770 continue;
2771 // Anything unusual: assume conservatively.
2772 if (!FlagUI->isMachineOpcode())
2773 return false;
2774 // Examine the condition code of the user.
2775 X86::CondCode CC = getCondFromNode(*FlagUI);
2776
2777 if (mayUseCarryFlag(CC))
2778 return false;
2779 }
2780
2781 // This CopyToReg is ok. Move on to the next user.
2782 continue;
2783 }
2784
2785 // This might be an unselected node. So look for the pre-isel opcodes that
2786 // use flags.
2787 unsigned CCOpNo;
2788 switch (UIOpc) {
2789 default:
2790 // Something unusual. Be conservative.
2791 return false;
2792 case X86ISD::SETCC: CCOpNo = 0; break;
2793 case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2794 case X86ISD::CMOV: CCOpNo = 2; break;
2795 case X86ISD::BRCOND: CCOpNo = 2; break;
2796 }
2797
2798 X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2799 if (mayUseCarryFlag(CC))
2800 return false;
2801 }
2802 return true;
2803}
2804
2805/// Check whether or not the chain ending in StoreNode is suitable for doing
2806/// the {load; op; store} to modify transformation.
2807static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
2808 SDValue StoredVal, SelectionDAG *CurDAG,
2809 unsigned LoadOpNo,
2810 LoadSDNode *&LoadNode,
2811 SDValue &InputChain) {
2812 // Is the stored value result 0 of the operation?
2813 if (StoredVal.getResNo() != 0) return false;
2814
2815 // Are there other uses of the operation other than the store?
2816 if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2817
2818 // Is the store non-extending and non-indexed?
2819 if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2820 return false;
2821
2822 SDValue Load = StoredVal->getOperand(LoadOpNo);
2823 // Is the stored value a non-extending and non-indexed load?
2824 if (!ISD::isNormalLoad(Load.getNode())) return false;
2825
2826 // Return LoadNode by reference.
2827 LoadNode = cast<LoadSDNode>(Load);
2828
2829 // Is store the only read of the loaded value?
2830 if (!Load.hasOneUse())
2831 return false;
2832
2833 // Is the address of the store the same as the load?
2834 if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2835 LoadNode->getOffset() != StoreNode->getOffset())
2836 return false;
2837
2838 bool FoundLoad = false;
2839 SmallVector<SDValue, 4> ChainOps;
2840 SmallVector<const SDNode *, 4> LoopWorklist;
2841 SmallPtrSet<const SDNode *, 16> Visited;
2842 const unsigned int Max = 1024;
2843
2844 // Visualization of Load-Op-Store fusion:
2845 // -------------------------
2846 // Legend:
2847 // *-lines = Chain operand dependencies.
2848 // |-lines = Normal operand dependencies.
2849 // Dependencies flow down and right. n-suffix references multiple nodes.
2850 //
2851 // C Xn C
2852 // * * *
2853 // * * *
2854 // Xn A-LD Yn TF Yn
2855 // * * \ | * |
2856 // * * \ | * |
2857 // * * \ | => A--LD_OP_ST
2858 // * * \| \
2859 // TF OP \
2860 // * | \ Zn
2861 // * | \
2862 // A-ST Zn
2863 //
2864
2865 // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2866 // #2: Yn -> LD
2867 // #3: ST -> Zn
2868
2869 // Ensure the transform is safe by checking for the dual
2870 // dependencies to make sure we do not induce a loop.
2871
2872 // As LD is a predecessor to both OP and ST we can do this by checking:
2873 // a). if LD is a predecessor to a member of Xn or Yn.
2874 // b). if a Zn is a predecessor to ST.
2875
2876 // However, (b) can only occur through being a chain predecessor to
2877 // ST, which is the same as Zn being a member or predecessor of Xn,
2878 // which is a subset of LD being a predecessor of Xn. So it's
2879 // subsumed by check (a).
2880
2881 SDValue Chain = StoreNode->getChain();
2882
2883 // Gather X elements in ChainOps.
2884 if (Chain == Load.getValue(1)) {
2885 FoundLoad = true;
2886 ChainOps.push_back(Load.getOperand(0));
2887 } else if (Chain.getOpcode() == ISD::TokenFactor) {
2888 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2889 SDValue Op = Chain.getOperand(i);
2890 if (Op == Load.getValue(1)) {
2891 FoundLoad = true;
2892 // Drop Load, but keep its chain. No cycle check necessary.
2893 ChainOps.push_back(Load.getOperand(0));
2894 continue;
2895 }
2896 LoopWorklist.push_back(Op.getNode());
2897 ChainOps.push_back(Op);
2898 }
2899 }
2900
2901 if (!FoundLoad)
2902 return false;
2903
2904 // Worklist is currently Xn. Add Yn to worklist.
2905 for (SDValue Op : StoredVal->ops())
2906 if (Op.getNode() != LoadNode)
2907 LoopWorklist.push_back(Op.getNode());
2908
2909 // Check (a) if Load is a predecessor to Xn + Yn
2910 if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2911 true))
2912 return false;
2913
2914 InputChain =
2915 CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2916 return true;
2917}
2918
2919// Change a chain of {load; op; store} of the same value into a simple op
2920// through memory of that value, if the uses of the modified value and its
2921// address are suitable.
2922//
2923// The tablegen pattern memory operand pattern is currently not able to match
2924// the case where the EFLAGS on the original operation are used.
2925//
2926// To move this to tablegen, we'll need to improve tablegen to allow flags to
2927// be transferred from a node in the pattern to the result node, probably with
2928// a new keyword. For example, we have this
2929// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2930// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2931// (implicit EFLAGS)]>;
2932// but maybe need something like this
2933// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2934// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2935// (transferrable EFLAGS)]>;
2936//
2937// Until then, we manually fold these and instruction select the operation
2938// here.
2939bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2940 StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2941 SDValue StoredVal = StoreNode->getOperand(1);
2942 unsigned Opc = StoredVal->getOpcode();
2943
2944 // Before we try to select anything, make sure this is memory operand size
2945 // and opcode we can handle. Note that this must match the code below that
2946 // actually lowers the opcodes.
2947 EVT MemVT = StoreNode->getMemoryVT();
2948 if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2949 MemVT != MVT::i8)
2950 return false;
2951
2952 bool IsCommutable = false;
2953 bool IsNegate = false;
2954 switch (Opc) {
2955 default:
2956 return false;
2957 case X86ISD::SUB:
2958 IsNegate = isNullConstant(StoredVal.getOperand(0));
2959 break;
2960 case X86ISD::SBB:
2961 break;
2962 case X86ISD::ADD:
2963 case X86ISD::ADC:
2964 case X86ISD::AND:
2965 case X86ISD::OR:
2966 case X86ISD::XOR:
2967 IsCommutable = true;
2968 break;
2969 }
2970
2971 unsigned LoadOpNo = IsNegate ? 1 : 0;
2972 LoadSDNode *LoadNode = nullptr;
2973 SDValue InputChain;
2974 if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2975 LoadNode, InputChain)) {
2976 if (!IsCommutable)
2977 return false;
2978
2979 // This operation is commutable, try the other operand.
2980 LoadOpNo = 1;
2981 if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2982 LoadNode, InputChain))
2983 return false;
2984 }
2985
2986 SDValue Base, Scale, Index, Disp, Segment;
2987 if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2988 Segment))
2989 return false;
2990
2991 auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2992 unsigned Opc8) {
2993 switch (MemVT.getSimpleVT().SimpleTy) {
2994 case MVT::i64:
2995 return Opc64;
2996 case MVT::i32:
2997 return Opc32;
2998 case MVT::i16:
2999 return Opc16;
3000 case MVT::i8:
3001 return Opc8;
3002 default:
3003 llvm_unreachable("Invalid size!")::llvm::llvm_unreachable_internal("Invalid size!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3003)
;
3004 }
3005 };
3006
3007 MachineSDNode *Result;
3008 switch (Opc) {
3009 case X86ISD::SUB:
3010 // Handle negate.
3011 if (IsNegate) {
3012 unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
3013 X86::NEG8m);
3014 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3015 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3016 MVT::Other, Ops);
3017 break;
3018 }
3019 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3020 case X86ISD::ADD:
3021 // Try to match inc/dec.
3022 if (!Subtarget->slowIncDec() || OptForSize) {
3023 bool IsOne = isOneConstant(StoredVal.getOperand(1));
3024 bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
3025 // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
3026 if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
3027 unsigned NewOpc =
3028 ((Opc == X86ISD::ADD) == IsOne)
3029 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3030 : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
3031 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3032 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3033 MVT::Other, Ops);
3034 break;
3035 }
3036 }
3037 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3038 case X86ISD::ADC:
3039 case X86ISD::SBB:
3040 case X86ISD::AND:
3041 case X86ISD::OR:
3042 case X86ISD::XOR: {
3043 auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
3044 switch (Opc) {
3045 case X86ISD::ADD:
3046 return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3047 X86::ADD8mr);
3048 case X86ISD::ADC:
3049 return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3050 X86::ADC8mr);
3051 case X86ISD::SUB:
3052 return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3053 X86::SUB8mr);
3054 case X86ISD::SBB:
3055 return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3056 X86::SBB8mr);
3057 case X86ISD::AND:
3058 return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3059 X86::AND8mr);
3060 case X86ISD::OR:
3061 return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3062 case X86ISD::XOR:
3063 return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3064 X86::XOR8mr);
3065 default:
3066 llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3066)
;
3067 }
3068 };
3069 auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
3070 switch (Opc) {
3071 case X86ISD::ADD:
3072 return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
3073 case X86ISD::ADC:
3074 return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
3075 case X86ISD::SUB:
3076 return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
3077 case X86ISD::SBB:
3078 return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
3079 case X86ISD::AND:
3080 return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
3081 case X86ISD::OR:
3082 return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
3083 case X86ISD::XOR:
3084 return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
3085 default:
3086 llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3086)
;
3087 }
3088 };
3089 auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
3090 switch (Opc) {
3091 case X86ISD::ADD:
3092 return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3093 X86::ADD8mi);
3094 case X86ISD::ADC:
3095 return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3096 X86::ADC8mi);
3097 case X86ISD::SUB:
3098 return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3099 X86::SUB8mi);
3100 case X86ISD::SBB:
3101 return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3102 X86::SBB8mi);
3103 case X86ISD::AND:
3104 return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3105 X86::AND8mi);
3106 case X86ISD::OR:
3107 return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3108 X86::OR8mi);
3109 case X86ISD::XOR:
3110 return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3111 X86::XOR8mi);
3112 default:
3113 llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3113)
;
3114 }
3115 };
3116
3117 unsigned NewOpc = SelectRegOpcode(Opc);
3118 SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
3119
3120 // See if the operand is a constant that we can fold into an immediate
3121 // operand.
3122 if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3123 int64_t OperandV = OperandC->getSExtValue();
3124
3125 // Check if we can shrink the operand enough to fit in an immediate (or
3126 // fit into a smaller immediate) by negating it and switching the
3127 // operation.
3128 if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
3129 ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3130 (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3131 isInt<32>(-OperandV))) &&
3132 hasNoCarryFlagUses(StoredVal.getValue(1))) {
3133 OperandV = -OperandV;
3134 Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
3135 }
3136
3137 // First try to fit this into an Imm8 operand. If it doesn't fit, then try
3138 // the larger immediate operand.
3139 if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
3140 Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3141 NewOpc = SelectImm8Opcode(Opc);
3142 } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3143 Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3144 NewOpc = SelectImmOpcode(Opc);
3145 }
3146 }
3147
3148 if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
3149 SDValue CopyTo =
3150 CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
3151 StoredVal.getOperand(2), SDValue());
3152
3153 const SDValue Ops[] = {Base, Scale, Index, Disp,
3154 Segment, Operand, CopyTo, CopyTo.getValue(1)};
3155 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3156 Ops);
3157 } else {
3158 const SDValue Ops[] = {Base, Scale, Index, Disp,
3159 Segment, Operand, InputChain};
3160 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3161 Ops);
3162 }
3163 break;
3164 }
3165 default:
3166 llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3166)
;
3167 }
3168
3169 MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
3170 LoadNode->getMemOperand()};
3171 CurDAG->setNodeMemRefs(Result, MemOps);
3172
3173 // Update Load Chain uses as well.
3174 ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
3175 ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
3176 ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
3177 CurDAG->RemoveDeadNode(Node);
3178 return true;
3179}
3180
3181// See if this is an X & Mask that we can match to BEXTR/BZHI.
3182// Where Mask is one of the following patterns:
3183// a) x & (1 << nbits) - 1
3184// b) x & ~(-1 << nbits)
3185// c) x & (-1 >> (32 - y))
3186// d) x << (32 - y) >> (32 - y)
3187bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
3188 assert((((Node->getOpcode() == ISD::AND || Node->getOpcode() ==
ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits."
) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3190, __PRETTY_FUNCTION__))
3189 (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&(((Node->getOpcode() == ISD::AND || Node->getOpcode() ==
ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits."
) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3190, __PRETTY_FUNCTION__))
3190 "Should be either an and-mask, or right-shift after clearing high bits.")(((Node->getOpcode() == ISD::AND || Node->getOpcode() ==
ISD::SRL) && "Should be either an and-mask, or right-shift after clearing high bits."
) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && \"Should be either an and-mask, or right-shift after clearing high bits.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3190, __PRETTY_FUNCTION__))
;
3191
3192 // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
3193 if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3194 return false;
3195
3196 MVT NVT = Node->getSimpleValueType(0);
3197
3198 // Only supported for 32 and 64 bits.
3199 if (NVT != MVT::i32 && NVT != MVT::i64)
3200 return false;
3201
3202 SDValue NBits;
3203
3204 // If we have BMI2's BZHI, we are ok with muti-use patterns.
3205 // Else, if we only have BMI1's BEXTR, we require one-use.
3206 const bool CanHaveExtraUses = Subtarget->hasBMI2();
3207 auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
3208 return CanHaveExtraUses ||
3209 Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
3210 };
3211 auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
3212 auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
3213
3214 auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
3215 if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
3216 assert(V.getSimpleValueType() == MVT::i32 &&((V.getSimpleValueType() == MVT::i32 && V.getOperand(
0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation"
) ? static_cast<void> (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3218, __PRETTY_FUNCTION__))
3217 V.getOperand(0).getSimpleValueType() == MVT::i64 &&((V.getSimpleValueType() == MVT::i32 && V.getOperand(
0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation"
) ? static_cast<void> (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3218, __PRETTY_FUNCTION__))
3218 "Expected i64 -> i32 truncation")((V.getSimpleValueType() == MVT::i32 && V.getOperand(
0).getSimpleValueType() == MVT::i64 && "Expected i64 -> i32 truncation"
) ? static_cast<void> (0) : __assert_fail ("V.getSimpleValueType() == MVT::i32 && V.getOperand(0).getSimpleValueType() == MVT::i64 && \"Expected i64 -> i32 truncation\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3218, __PRETTY_FUNCTION__))
;
3219 V = V.getOperand(0);
3220 }
3221 return V;
3222 };
3223
3224 // a) x & ((1 << nbits) + (-1))
3225 auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation,
3226 &NBits](SDValue Mask) -> bool {
3227 // Match `add`. Must only have one use!
3228 if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
3229 return false;
3230 // We should be adding all-ones constant (i.e. subtracting one.)
3231 if (!isAllOnesConstant(Mask->getOperand(1)))
3232 return false;
3233 // Match `1 << nbits`. Might be truncated. Must only have one use!
3234 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3235 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3236 return false;
3237 if (!isOneConstant(M0->getOperand(0)))
3238 return false;
3239 NBits = M0->getOperand(1);
3240 return true;
3241 };
3242
3243 auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
3244 V = peekThroughOneUseTruncation(V);
3245 return CurDAG->MaskedValueIsAllOnes(
3246 V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
3247 NVT.getSizeInBits()));
3248 };
3249
3250 // b) x & ~(-1 << nbits)
3251 auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3252 &NBits](SDValue Mask) -> bool {
3253 // Match `~()`. Must only have one use!
3254 if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
3255 return false;
3256 // The -1 only has to be all-ones for the final Node's NVT.
3257 if (!isAllOnes(Mask->getOperand(1)))
3258 return false;
3259 // Match `-1 << nbits`. Might be truncated. Must only have one use!
3260 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3261 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3262 return false;
3263 // The -1 only has to be all-ones for the final Node's NVT.
3264 if (!isAllOnes(M0->getOperand(0)))
3265 return false;
3266 NBits = M0->getOperand(1);
3267 return true;
3268 };
3269
3270 // Match potentially-truncated (bitwidth - y)
3271 auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt,
3272 unsigned Bitwidth) {
3273 // Skip over a truncate of the shift amount.
3274 if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
3275 ShiftAmt = ShiftAmt.getOperand(0);
3276 // The trunc should have been the only user of the real shift amount.
3277 if (!checkOneUse(ShiftAmt))
3278 return false;
3279 }
3280 // Match the shift amount as: (bitwidth - y). It should go away, too.
3281 if (ShiftAmt.getOpcode() != ISD::SUB)
3282 return false;
3283 auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
3284 if (!V0 || V0->getZExtValue() != Bitwidth)
3285 return false;
3286 NBits = ShiftAmt.getOperand(1);
3287 return true;
3288 };
3289
3290 // c) x & (-1 >> (32 - y))
3291 auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation,
3292 matchShiftAmt](SDValue Mask) -> bool {
3293 // The mask itself may be truncated.
3294 Mask = peekThroughOneUseTruncation(Mask);
3295 unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
3296 // Match `l>>`. Must only have one use!
3297 if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3298 return false;
3299 // We should be shifting truly all-ones constant.
3300 if (!isAllOnesConstant(Mask.getOperand(0)))
3301 return false;
3302 SDValue M1 = Mask.getOperand(1);
3303 // The shift amount should not be used externally.
3304 if (!checkOneUse(M1))
3305 return false;
3306 return matchShiftAmt(M1, Bitwidth);
3307 };
3308
3309 SDValue X;
3310
3311 // d) x << (32 - y) >> (32 - y)
3312 auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt,
3313 &X](SDNode *Node) -> bool {
3314 if (Node->getOpcode() != ISD::SRL)
3315 return false;
3316 SDValue N0 = Node->getOperand(0);
3317 if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
3318 return false;
3319 unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
3320 SDValue N1 = Node->getOperand(1);
3321 SDValue N01 = N0->getOperand(1);
3322 // Both of the shifts must be by the exact same value.
3323 // There should not be any uses of the shift amount outside of the pattern.
3324 if (N1 != N01 || !checkTwoUse(N1))
3325 return false;
3326 if (!matchShiftAmt(N1, Bitwidth))
3327 return false;
3328 X = N0->getOperand(0);
3329 return true;
3330 };
3331
3332 auto matchLowBitMask = [matchPatternA, matchPatternB,
3333 matchPatternC](SDValue Mask) -> bool {
3334 return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3335 };
3336
3337 if (Node->getOpcode() == ISD::AND) {
3338 X = Node->getOperand(0);
3339 SDValue Mask = Node->getOperand(1);
3340
3341 if (matchLowBitMask(Mask)) {
3342 // Great.
3343 } else {
3344 std::swap(X, Mask);
3345 if (!matchLowBitMask(Mask))
3346 return false;
3347 }
3348 } else if (!matchPatternD(Node))
3349 return false;
3350
3351 SDLoc DL(Node);
3352
3353 // Truncate the shift amount.
3354 NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3355 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3356
3357 // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3358 // All the other bits are undefined, we do not care about them.
3359 SDValue ImplDef = SDValue(
3360 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3361 insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
3362
3363 SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
3364 insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
3365 NBits = SDValue(
3366 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
3367 NBits, SRIdxVal), 0);
3368 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3369
3370 if (Subtarget->hasBMI2()) {
3371 // Great, just emit the the BZHI..
3372 if (NVT != MVT::i32) {
3373 // But have to place the bit count into the wide-enough register first.
3374 NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
3375 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3376 }
3377
3378 SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
3379 ReplaceNode(Node, Extract.getNode());
3380 SelectCode(Extract.getNode());
3381 return true;
3382 }
3383
3384 // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
3385 // *logically* shifted (potentially with one-use trunc inbetween),
3386 // and the truncation was the only use of the shift,
3387 // and if so look past one-use truncation.
3388 {
3389 SDValue RealX = peekThroughOneUseTruncation(X);
3390 // FIXME: only if the shift is one-use?
3391 if (RealX != X && RealX.getOpcode() == ISD::SRL)
3392 X = RealX;
3393 }
3394
3395 MVT XVT = X.getSimpleValueType();
3396
3397 // Else, emitting BEXTR requires one more step.
3398 // The 'control' of BEXTR has the pattern of:
3399 // [15...8 bit][ 7...0 bit] location
3400 // [ bit count][ shift] name
3401 // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
3402
3403 // Shift NBits left by 8 bits, thus producing 'control'.
3404 // This makes the low 8 bits to be zero.
3405 SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
3406 SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
3407 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3408
3409 // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
3410 // FIXME: only if the shift is one-use?
3411 if (X.getOpcode() == ISD::SRL) {
3412 SDValue ShiftAmt = X.getOperand(1);
3413 X = X.getOperand(0);
3414
3415 assert(ShiftAmt.getValueType() == MVT::i8 &&((ShiftAmt.getValueType() == MVT::i8 && "Expected shift amount to be i8"
) ? static_cast<void> (0) : __assert_fail ("ShiftAmt.getValueType() == MVT::i8 && \"Expected shift amount to be i8\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3416, __PRETTY_FUNCTION__))
3416 "Expected shift amount to be i8")((ShiftAmt.getValueType() == MVT::i8 && "Expected shift amount to be i8"
) ? static_cast<void> (0) : __assert_fail ("ShiftAmt.getValueType() == MVT::i8 && \"Expected shift amount to be i8\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3416, __PRETTY_FUNCTION__))
;
3417
3418 // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
3419 // We could zext to i16 in some form, but we intentionally don't do that.
3420 SDValue OrigShiftAmt = ShiftAmt;
3421 ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
3422 insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
3423
3424 // And now 'or' these low 8 bits of shift amount into the 'control'.
3425 Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
3426 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3427 }
3428
3429 // But have to place the 'control' into the wide-enough register first.
3430 if (XVT != MVT::i32) {
3431 Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
3432 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3433 }
3434
3435 // And finally, form the BEXTR itself.
3436 SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
3437
3438 // The 'X' was originally truncated. Do that now.
3439 if (XVT != NVT) {
3440 insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
3441 Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
3442 }
3443
3444 ReplaceNode(Node, Extract.getNode());
3445 SelectCode(Extract.getNode());
3446
3447 return true;
3448}
3449
3450// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
3451MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
3452 MVT NVT = Node->getSimpleValueType(0);
3453 SDLoc dl(Node);
3454
3455 SDValue N0 = Node->getOperand(0);
3456 SDValue N1 = Node->getOperand(1);
3457
3458 // If we have TBM we can use an immediate for the control. If we have BMI
3459 // we should only do this if the BEXTR instruction is implemented well.
3460 // Otherwise moving the control into a register makes this more costly.
3461 // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
3462 // hoisting the move immediate would make it worthwhile with a less optimal
3463 // BEXTR?
3464 bool PreferBEXTR =
3465 Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
3466 if (!PreferBEXTR && !Subtarget->hasBMI2())
3467 return nullptr;
3468
3469 // Must have a shift right.
3470 if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
3471 return nullptr;
3472
3473 // Shift can't have additional users.
3474 if (!N0->hasOneUse())
3475 return nullptr;
3476
3477 // Only supported for 32 and 64 bits.
3478 if (NVT != MVT::i32 && NVT != MVT::i64)
3479 return nullptr;
3480
3481 // Shift amount and RHS of and must be constant.
3482 ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
3483 ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3484 if (!MaskCst || !ShiftCst)
3485 return nullptr;
3486
3487 // And RHS must be a mask.
3488 uint64_t Mask = MaskCst->getZExtValue();
3489 if (!isMask_64(Mask))
3490 return nullptr;
3491
3492 uint64_t Shift = ShiftCst->getZExtValue();
3493 uint64_t MaskSize = countPopulation(Mask);
3494
3495 // Don't interfere with something that can be handled by extracting AH.
3496 // TODO: If we are able to fold a load, BEXTR might still be better than AH.
3497 if (Shift == 8 && MaskSize == 8)
3498 return nullptr;
3499
3500 // Make sure we are only using bits that were in the original value, not
3501 // shifted in.
3502 if (Shift + MaskSize > NVT.getSizeInBits())
3503 return nullptr;
3504
3505 // BZHI, if available, is always fast, unlike BEXTR. But even if we decide
3506 // that we can't use BEXTR, it is only worthwhile using BZHI if the mask
3507 // does not fit into 32 bits. Load folding is not a sufficient reason.
3508 if (!PreferBEXTR && MaskSize <= 32)
3509 return nullptr;
3510
3511 SDValue Control;
3512 unsigned ROpc, MOpc;
3513
3514 if (!PreferBEXTR) {
3515 assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.")((Subtarget->hasBMI2() && "We must have BMI2's BZHI then."
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasBMI2() && \"We must have BMI2's BZHI then.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3515, __PRETTY_FUNCTION__))
;
3516 // If we can't make use of BEXTR then we can't fuse shift+mask stages.
3517 // Let's perform the mask first, and apply shift later. Note that we need to
3518 // widen the mask to account for the fact that we'll apply shift afterwards!
3519 Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
3520 ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
3521 MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
3522 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3523 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
3524 } else {
3525 // The 'control' of BEXTR has the pattern of:
3526 // [15...8 bit][ 7...0 bit] location
3527 // [ bit count][ shift] name
3528 // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
3529 Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3530 if (Subtarget->hasTBM()) {
3531 ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3532 MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3533 } else {
3534 assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.")((Subtarget->hasBMI() && "We must have BMI1's BEXTR then."
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasBMI() && \"We must have BMI1's BEXTR then.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3534, __PRETTY_FUNCTION__))
;
3535 // BMI requires the immediate to placed in a register.
3536 ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3537 MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3538 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3539 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
3540 }
3541 }
3542
3543 MachineSDNode *NewNode;
3544 SDValue Input = N0->getOperand(0);
3545 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3546 if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3547 SDValue Ops[] = {
3548 Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)};
3549 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3550 NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3551 // Update the chain.
3552 ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
3553 // Record the mem-refs
3554 CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3555 } else {
3556 NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control);
3557 }
3558
3559 if (!PreferBEXTR) {
3560 // We still need to apply the shift.
3561 SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
3562 unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri;
3563 NewNode =
3564 CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt);
3565 }
3566
3567 return NewNode;
3568}
3569
3570// Emit a PCMISTR(I/M) instruction.
3571MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
3572 bool MayFoldLoad, const SDLoc &dl,
3573 MVT VT, SDNode *Node) {
3574 SDValue N0 = Node->getOperand(0);
3575 SDValue N1 = Node->getOperand(1);
3576 SDValue Imm = Node->getOperand(2);
3577 const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3578 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3579
3580 // Try to fold a load. No need to check alignment.
3581 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3582 if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3583 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3584 N1.getOperand(0) };
3585 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
3586 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3587 // Update the chain.
3588 ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
3589 // Record the mem-refs
3590 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3591 return CNode;
3592 }
3593
3594 SDValue Ops[] = { N0, N1, Imm };
3595 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3596 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3597 return CNode;
3598}
3599
3600// Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3601// to emit a second instruction after this one. This is needed since we have two
3602// copyToReg nodes glued before this and we need to continue that glue through.
3603MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3604 bool MayFoldLoad, const SDLoc &dl,
3605 MVT VT, SDNode *Node,
3606 SDValue &InFlag) {
3607 SDValue N0 = Node->getOperand(0);
3608 SDValue N2 = Node->getOperand(2);
3609 SDValue Imm = Node->getOperand(4);
3610 const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3611 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3612
3613 // Try to fold a load. No need to check alignment.
3614 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3615 if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3616 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3617 N2.getOperand(0), InFlag };
3618 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3619 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3620 InFlag = SDValue(CNode, 3);
3621 // Update the chain.
3622 ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3623 // Record the mem-refs
3624 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3625 return CNode;
3626 }
3627
3628 SDValue Ops[] = { N0, N2, Imm, InFlag };
3629 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3630 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3631 InFlag = SDValue(CNode, 2);
3632 return CNode;
3633}
3634
3635bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3636 EVT VT = N->getValueType(0);
3637
3638 // Only handle scalar shifts.
3639 if (VT.isVector())
3640 return false;
3641
3642 // Narrower shifts only mask to 5 bits in hardware.
3643 unsigned Size = VT == MVT::i64 ? 64 : 32;
3644
3645 SDValue OrigShiftAmt = N->getOperand(1);
3646 SDValue ShiftAmt = OrigShiftAmt;
3647 SDLoc DL(N);
3648
3649 // Skip over a truncate of the shift amount.
3650 if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3651 ShiftAmt = ShiftAmt->getOperand(0);
3652
3653 // This function is called after X86DAGToDAGISel::matchBitExtract(),
3654 // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3655
3656 SDValue NewShiftAmt;
3657 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3658 SDValue Add0 = ShiftAmt->getOperand(0);
3659 SDValue Add1 = ShiftAmt->getOperand(1);
3660 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3661 // to avoid the ADD/SUB.
3662 if (isa<ConstantSDNode>(Add1) &&
3663 cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3664 NewShiftAmt = Add0;
3665 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3666 // generate a NEG instead of a SUB of a constant.
3667 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3668 isa<ConstantSDNode>(Add0) &&
3669 cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3670 cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3671 // Insert a negate op.
3672 // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3673 // that uses it that's not a shift.
3674 EVT SubVT = ShiftAmt.getValueType();
3675 SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3676 SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3677 NewShiftAmt = Neg;
3678
3679 // Insert these operands into a valid topological order so they can
3680 // get selected independently.
3681 insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3682 insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3683 } else
3684 return false;
3685 } else
3686 return false;
3687
3688 if (NewShiftAmt.getValueType() != MVT::i8) {
3689 // Need to truncate the shift amount.
3690 NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3691 // Add to a correct topological ordering.
3692 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3693 }
3694
3695 // Insert a new mask to keep the shift amount legal. This should be removed
3696 // by isel patterns.
3697 NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3698 CurDAG->getConstant(Size - 1, DL, MVT::i8));
3699 // Place in a correct topological ordering.
3700 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3701
3702 SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3703 NewShiftAmt);
3704 if (UpdatedNode != N) {
3705 // If we found an existing node, we should replace ourselves with that node
3706 // and wait for it to be selected after its other users.
3707 ReplaceNode(N, UpdatedNode);
3708 return true;
3709 }
3710
3711 // If the original shift amount is now dead, delete it so that we don't run
3712 // it through isel.
3713 if (OrigShiftAmt.getNode()->use_empty())
3714 CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3715
3716 // Now that we've optimized the shift amount, defer to normal isel to get
3717 // load folding and legacy vs BMI2 selection without repeating it here.
3718 SelectCode(N);
3719 return true;
3720}
3721
3722bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
3723 MVT NVT = N->getSimpleValueType(0);
3724 unsigned Opcode = N->getOpcode();
3725 SDLoc dl(N);
3726
3727 // For operations of the form (x << C1) op C2, check if we can use a smaller
3728 // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3729 SDValue Shift = N->getOperand(0);
3730 SDValue N1 = N->getOperand(1);
3731
3732 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
3733 if (!Cst)
3734 return false;
3735
3736 int64_t Val = Cst->getSExtValue();
3737
3738 // If we have an any_extend feeding the AND, look through it to see if there
3739 // is a shift behind it. But only if the AND doesn't use the extended bits.
3740 // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
3741 bool FoundAnyExtend = false;
3742 if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
3743 Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
3744 isUInt<32>(Val)) {
3745 FoundAnyExtend = true;
3746 Shift = Shift.getOperand(0);
3747 }
3748
3749 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
3750 return false;
3751
3752 // i8 is unshrinkable, i16 should be promoted to i32.
3753 if (NVT != MVT::i32 && NVT != MVT::i64)
3754 return false;
3755
3756 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
3757 if (!ShlCst)
3758 return false;
3759
3760 uint64_t ShAmt = ShlCst->getZExtValue();
3761
3762 // Make sure that we don't change the operation by removing bits.
3763 // This only matters for OR and XOR, AND is unaffected.
3764 uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
3765 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
3766 return false;
3767
3768 // Check the minimum bitwidth for the new constant.
3769 // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
3770 auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
3771 if (Opcode == ISD::AND) {
3772 // AND32ri is the same as AND64ri32 with zext imm.
3773 // Try this before sign extended immediates below.
3774 ShiftedVal = (uint64_t)Val >> ShAmt;
3775 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3776 return true;
3777 // Also swap order when the AND can become MOVZX.
3778 if (ShiftedVal == UINT8_MAX(255) || ShiftedVal == UINT16_MAX(65535))
3779 return true;
3780 }
3781 ShiftedVal = Val >> ShAmt;
3782 if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
3783 (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
3784 return true;
3785 if (Opcode != ISD::AND) {
3786 // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
3787 ShiftedVal = (uint64_t)Val >> ShAmt;
3788 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3789 return true;
3790 }
3791 return false;
3792 };
3793
3794 int64_t ShiftedVal;
3795 if (!CanShrinkImmediate(ShiftedVal))
3796 return false;
3797
3798 // Ok, we can reorder to get a smaller immediate.
3799
3800 // But, its possible the original immediate allowed an AND to become MOVZX.
3801 // Doing this late due to avoid the MakedValueIsZero call as late as
3802 // possible.
3803 if (Opcode == ISD::AND) {
3804 // Find the smallest zext this could possibly be.
3805 unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
3806 ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
3807
3808 // Figure out which bits need to be zero to achieve that mask.
3809 APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
3810 ZExtWidth);
3811 NeededMask &= ~Cst->getAPIntValue();
3812
3813 if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
3814 return false;
3815 }
3816
3817 SDValue X = Shift.getOperand(0);
3818 if (FoundAnyExtend) {
3819 SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
3820 insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
3821 X = NewX;
3822 }
3823
3824 SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
3825 insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
3826 SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
3827 insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
3828 SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
3829 Shift.getOperand(1));
3830 ReplaceNode(N, NewSHL.getNode());
3831 SelectCode(NewSHL.getNode());
3832 return true;
3833}
3834
3835/// Convert vector increment or decrement to sub/add with an all-ones constant:
3836/// add X, <1, 1...> --> sub X, <-1, -1...>
3837/// sub X, <1, 1...> --> add X, <-1, -1...>
3838/// The all-ones vector constant can be materialized using a pcmpeq instruction
3839/// that is commonly recognized as an idiom (has no register dependency), so
3840/// that's better/smaller than loading a splat 1 constant.
3841bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) {
3842 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) &&(((Node->getOpcode() == ISD::ADD || Node->getOpcode() ==
ISD::SUB) && "Unexpected opcode for increment/decrement transform"
) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) && \"Unexpected opcode for increment/decrement transform\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3843, __PRETTY_FUNCTION__))
3843 "Unexpected opcode for increment/decrement transform")(((Node->getOpcode() == ISD::ADD || Node->getOpcode() ==
ISD::SUB) && "Unexpected opcode for increment/decrement transform"
) ? static_cast<void> (0) : __assert_fail ("(Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) && \"Unexpected opcode for increment/decrement transform\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3843, __PRETTY_FUNCTION__))
;
3844
3845 EVT VT = Node->getValueType(0);
3846 assert(VT.isVector() && "Should only be called for vectors.")((VT.isVector() && "Should only be called for vectors."
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Should only be called for vectors.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3846, __PRETTY_FUNCTION__))
;
3847
3848 SDValue X = Node->getOperand(0);
3849 SDValue OneVec = Node->getOperand(1);
3850
3851 APInt SplatVal;
3852 if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue())
3853 return false;
3854
3855 SDLoc DL(Node);
3856 SDValue OneConstant, AllOnesVec;
3857
3858 APInt Ones = APInt::getAllOnesValue(32);
3859 assert(VT.getSizeInBits() % 32 == 0 &&((VT.getSizeInBits() % 32 == 0 && "Expected bit count to be a multiple of 32"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() % 32 == 0 && \"Expected bit count to be a multiple of 32\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3860, __PRETTY_FUNCTION__))
3860 "Expected bit count to be a multiple of 32")((VT.getSizeInBits() % 32 == 0 && "Expected bit count to be a multiple of 32"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() % 32 == 0 && \"Expected bit count to be a multiple of 32\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3860, __PRETTY_FUNCTION__))
;
3861 OneConstant = CurDAG->getConstant(Ones, DL, MVT::i32);
3862 insertDAGNode(*CurDAG, X, OneConstant);
3863
3864 unsigned NumElts = VT.getSizeInBits() / 32;
3865 assert(NumElts > 0 && "Expected to get non-empty vector.")((NumElts > 0 && "Expected to get non-empty vector."
) ? static_cast<void> (0) : __assert_fail ("NumElts > 0 && \"Expected to get non-empty vector.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3865, __PRETTY_FUNCTION__))
;
3866 AllOnesVec = CurDAG->getSplatBuildVector(MVT::getVectorVT(MVT::i32, NumElts),
3867 DL, OneConstant);
3868 insertDAGNode(*CurDAG, X, AllOnesVec);
3869
3870 AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec);
3871 insertDAGNode(*CurDAG, X, AllOnesVec);
3872
3873 unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
3874 SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec);
3875
3876 ReplaceNode(Node, NewNode.getNode());
3877 SelectCode(NewNode.getNode());
3878 return true;
3879}
3880
3881/// If the high bits of an 'and' operand are known zero, try setting the
3882/// high bits of an 'and' constant operand to produce a smaller encoding by
3883/// creating a small, sign-extended negative immediate rather than a large
3884/// positive one. This reverses a transform in SimplifyDemandedBits that
3885/// shrinks mask constants by clearing bits. There is also a possibility that
3886/// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3887/// case, just replace the 'and'. Return 'true' if the node is replaced.
3888bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3889 // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3890 // have immediate operands.
3891 MVT VT = And->getSimpleValueType(0);
3892 if (VT != MVT::i32 && VT != MVT::i64)
3893 return false;
3894
3895 auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3896 if (!And1C)
3897 return false;
3898
3899 // Bail out if the mask constant is already negative. It's can't shrink more.
3900 // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3901 // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3902 // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3903 // are negative too.
3904 APInt MaskVal = And1C->getAPIntValue();
3905 unsigned MaskLZ = MaskVal.countLeadingZeros();
3906 if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3907 return false;
3908
3909 // Don't extend into the upper 32 bits of a 64 bit mask.
3910 if (VT == MVT::i64 && MaskLZ >= 32) {
3911 MaskLZ -= 32;
3912 MaskVal = MaskVal.trunc(32);
3913 }
3914
3915 SDValue And0 = And->getOperand(0);
3916 APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3917 APInt NegMaskVal = MaskVal | HighZeros;
3918
3919 // If a negative constant would not allow a smaller encoding, there's no need
3920 // to continue. Only change the constant when we know it's a win.
3921 unsigned MinWidth = NegMaskVal.getMinSignedBits();
3922 if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3923 return false;
3924
3925 // Extend masks if we truncated above.
3926 if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3927 NegMaskVal = NegMaskVal.zext(64);
3928 HighZeros = HighZeros.zext(64);
3929 }
3930
3931 // The variable operand must be all zeros in the top bits to allow using the
3932 // new, negative constant as the mask.
3933 if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3934 return false;
3935
3936 // Check if the mask is -1. In that case, this is an unnecessary instruction
3937 // that escaped earlier analysis.
3938 if (NegMaskVal.isAllOnesValue()) {
3939 ReplaceNode(And, And0.getNode());
3940 return true;
3941 }
3942
3943 // A negative mask allows a smaller encoding. Create a new 'and' node.
3944 SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
3945 SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
3946 ReplaceNode(And, NewAnd.getNode());
3947 SelectCode(NewAnd.getNode());
3948 return true;
3949}
3950
3951static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
3952 bool FoldedBCast, bool Masked) {
3953 if (Masked) {
3954 if (FoldedLoad) {
3955 switch (TestVT.SimpleTy) {
3956 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3956)
;
3957 case MVT::v16i8:
3958 return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
3959 case MVT::v8i16:
3960 return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
3961 case MVT::v4i32:
3962 return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
3963 case MVT::v2i64:
3964 return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
3965 case MVT::v32i8:
3966 return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
3967 case MVT::v16i16:
3968 return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
3969 case MVT::v8i32:
3970 return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
3971 case MVT::v4i64:
3972 return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
3973 case MVT::v64i8:
3974 return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
3975 case MVT::v32i16:
3976 return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
3977 case MVT::v16i32:
3978 return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
3979 case MVT::v8i64:
3980 return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
3981 }
3982 }
3983
3984 if (FoldedBCast) {
3985 switch (TestVT.SimpleTy) {
3986 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 3986)
;
3987 case MVT::v4i32:
3988 return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
3989 case MVT::v2i64:
3990 return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
3991 case MVT::v8i32:
3992 return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
3993 case MVT::v4i64:
3994 return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
3995 case MVT::v16i32:
3996 return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
3997 case MVT::v8i64:
3998 return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
3999 }
4000 }
4001
4002 switch (TestVT.SimpleTy) {
4003 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4003)
;
4004 case MVT::v16i8:
4005 return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
4006 case MVT::v8i16:
4007 return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
4008 case MVT::v4i32:
4009 return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
4010 case MVT::v2i64:
4011 return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
4012 case MVT::v32i8:
4013 return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
4014 case MVT::v16i16:
4015 return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
4016 case MVT::v8i32:
4017 return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
4018 case MVT::v4i64:
4019 return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
4020 case MVT::v64i8:
4021 return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
4022 case MVT::v32i16:
4023 return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
4024 case MVT::v16i32:
4025 return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
4026 case MVT::v8i64:
4027 return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
4028 }
4029 }
4030
4031 if (FoldedLoad) {
4032 switch (TestVT.SimpleTy) {
4033 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4033)
;
4034 case MVT::v16i8:
4035 return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
4036 case MVT::v8i16:
4037 return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
4038 case MVT::v4i32:
4039 return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
4040 case MVT::v2i64:
4041 return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
4042 case MVT::v32i8:
4043 return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
4044 case MVT::v16i16:
4045 return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
4046 case MVT::v8i32:
4047 return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
4048 case MVT::v4i64:
4049 return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
4050 case MVT::v64i8:
4051 return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
4052 case MVT::v32i16:
4053 return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
4054 case MVT::v16i32:
4055 return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
4056 case MVT::v8i64:
4057 return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
4058 }
4059 }
4060
4061 if (FoldedBCast) {
4062 switch (TestVT.SimpleTy) {
4063 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4063)
;
4064 case MVT::v4i32:
4065 return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
4066 case MVT::v2i64:
4067 return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
4068 case MVT::v8i32:
4069 return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
4070 case MVT::v4i64:
4071 return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
4072 case MVT::v16i32:
4073 return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
4074 case MVT::v8i64:
4075 return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
4076 }
4077 }
4078
4079 switch (TestVT.SimpleTy) {
4080 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4080)
;
4081 case MVT::v16i8:
4082 return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
4083 case MVT::v8i16:
4084 return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
4085 case MVT::v4i32:
4086 return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
4087 case MVT::v2i64:
4088 return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
4089 case MVT::v32i8:
4090 return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
4091 case MVT::v16i16:
4092 return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
4093 case MVT::v8i32:
4094 return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
4095 case MVT::v4i64:
4096 return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
4097 case MVT::v64i8:
4098 return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
4099 case MVT::v32i16:
4100 return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
4101 case MVT::v16i32:
4102 return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
4103 case MVT::v8i64:
4104 return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
4105 }
4106}
4107
4108// Try to create VPTESTM instruction. If InMask is not null, it will be used
4109// to form a masked operation.
4110bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
4111 SDValue InMask) {
4112 assert(Subtarget->hasAVX512() && "Expected AVX512!")((Subtarget->hasAVX512() && "Expected AVX512!") ? static_cast
<void> (0) : __assert_fail ("Subtarget->hasAVX512() && \"Expected AVX512!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4112, __PRETTY_FUNCTION__))
;
4113 assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&((Setcc.getSimpleValueType().getVectorElementType() == MVT::i1
&& "Unexpected VT!") ? static_cast<void> (0) :
__assert_fail ("Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && \"Unexpected VT!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4114, __PRETTY_FUNCTION__))
4114 "Unexpected VT!")((Setcc.getSimpleValueType().getVectorElementType() == MVT::i1
&& "Unexpected VT!") ? static_cast<void> (0) :
__assert_fail ("Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && \"Unexpected VT!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4114, __PRETTY_FUNCTION__))
;
4115
4116 // Look for equal and not equal compares.
4117 ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
4118 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4119 return false;
4120
4121 SDValue SetccOp0 = Setcc.getOperand(0);
4122 SDValue SetccOp1 = Setcc.getOperand(1);
4123
4124 // Canonicalize the all zero vector to the RHS.
4125 if (ISD::isBuildVectorAllZeros(SetccOp0.getNode()))
4126 std::swap(SetccOp0, SetccOp1);
4127
4128 // See if we're comparing against zero.
4129 if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode()))
4130 return false;
4131
4132 SDValue N0 = SetccOp0;
4133
4134 MVT CmpVT = N0.getSimpleValueType();
4135 MVT CmpSVT = CmpVT.getVectorElementType();
4136
4137 // Start with both operands the same. We'll try to refine this.
4138 SDValue Src0 = N0;
4139 SDValue Src1 = N0;
4140
4141 {
4142 // Look through single use bitcasts.
4143 SDValue N0Temp = N0;
4144 if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
4145 N0Temp = N0.getOperand(0);
4146
4147 // Look for single use AND.
4148 if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
4149 Src0 = N0Temp.getOperand(0);
4150 Src1 = N0Temp.getOperand(1);
4151 }
4152 }
4153
4154 // Without VLX we need to widen the load.
4155 bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
4156
4157 // We can only fold loads if the sources are unique.
4158 bool CanFoldLoads = Src0 != Src1;
4159
4160 // Try to fold loads unless we need to widen.
4161 bool FoldedLoad = false;
4162 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
4163 if (!Widen && CanFoldLoads) {
4164 Load = Src1;
4165 FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
4166 Tmp4);
4167 if (!FoldedLoad) {
4168 // And is computative.
4169 Load = Src0;
4170 FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
4171 Tmp3, Tmp4);
4172 if (FoldedLoad)
4173 std::swap(Src0, Src1);
4174 }
4175 }
4176
4177 auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
4178 // Look through single use bitcasts.
4179 if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse()) {
4180 Parent = Src.getNode();
4181 Src = Src.getOperand(0);
4182 }
4183
4184 if (Src.getOpcode() == X86ISD::VBROADCAST_LOAD && Src.hasOneUse()) {
4185 auto *MemIntr = cast<MemIntrinsicSDNode>(Src);
4186 if (MemIntr->getMemoryVT().getSizeInBits() == CmpSVT.getSizeInBits())
4187 return Src;
4188 }
4189
4190 return SDValue();
4191 };
4192
4193 // If we didn't fold a load, try to match broadcast. No widening limitation
4194 // for this. But only 32 and 64 bit types are supported.
4195 bool FoldedBCast = false;
4196 if (!FoldedLoad && CanFoldLoads &&
4197 (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
4198 SDNode *ParentNode = N0.getNode();
4199 if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
4200 FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0,
4201 Tmp1, Tmp2, Tmp3, Tmp4);
4202 }
4203
4204 // Try the other operand.
4205 if (!FoldedBCast) {
4206 SDNode *ParentNode = N0.getNode();
4207 if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
4208 FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0,
4209 Tmp1, Tmp2, Tmp3, Tmp4);
4210 if (FoldedBCast)
4211 std::swap(Src0, Src1);
4212 }
4213 }
4214 }
4215
4216 auto getMaskRC = [](MVT MaskVT) {
4217 switch (MaskVT.SimpleTy) {
4218 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4218)
;
4219 case MVT::v2i1: return X86::VK2RegClassID;
4220 case MVT::v4i1: return X86::VK4RegClassID;
4221 case MVT::v8i1: return X86::VK8RegClassID;
4222 case MVT::v16i1: return X86::VK16RegClassID;
4223 case MVT::v32i1: return X86::VK32RegClassID;
4224 case MVT::v64i1: return X86::VK64RegClassID;
4225 }
4226 };
4227
4228 bool IsMasked = InMask.getNode() != nullptr;
4229
4230 SDLoc dl(Root);
4231
4232 MVT ResVT = Setcc.getSimpleValueType();
4233 MVT MaskVT = ResVT;
4234 if (Widen) {
4235 // Widen the inputs using insert_subreg or copy_to_regclass.
4236 unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
4237 unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
4238 unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
4239 CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
4240 MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4241 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4242 CmpVT), 0);
4243 Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
4244
4245 assert(!FoldedLoad && "Shouldn't have folded the load")((!FoldedLoad && "Shouldn't have folded the load") ? static_cast
<void> (0) : __assert_fail ("!FoldedLoad && \"Shouldn't have folded the load\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4245, __PRETTY_FUNCTION__))
;
4246 if (!FoldedBCast)
4247 Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
4248
4249 if (IsMasked) {
4250 // Widen the mask.
4251 unsigned RegClass = getMaskRC(MaskVT);
4252 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4253 InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4254 dl, MaskVT, InMask, RC), 0);
4255 }
4256 }
4257
4258 bool IsTestN = CC == ISD::SETEQ;
4259 unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4260 IsMasked);
4261
4262 MachineSDNode *CNode;
4263 if (FoldedLoad || FoldedBCast) {
4264 SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
4265
4266 if (IsMasked) {
4267 SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4268 Load.getOperand(0) };
4269 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4270 } else {
4271 SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4272 Load.getOperand(0) };
4273 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4274 }
4275
4276 // Update the chain.
4277 ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
4278 // Record the mem-refs
4279 CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Load)->getMemOperand()});
4280 } else {
4281 if (IsMasked)
4282 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
4283 else
4284 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
4285 }
4286
4287 // If we widened, we need to shrink the mask VT.
4288 if (Widen) {
4289 unsigned RegClass = getMaskRC(ResVT);
4290 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4291 CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4292 dl, ResVT, SDValue(CNode, 0), RC);
4293 }
4294
4295 ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
4296 CurDAG->RemoveDeadNode(Root);
4297 return true;
4298}
4299
4300// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
4301// into vpternlog.
4302bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
4303 assert(N->getOpcode() == ISD::OR && "Unexpected opcode!")((N->getOpcode() == ISD::OR && "Unexpected opcode!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4303, __PRETTY_FUNCTION__))
;
4304
4305 MVT NVT = N->getSimpleValueType(0);
4306
4307 // Make sure we support VPTERNLOG.
4308 if (!NVT.isVector() || !Subtarget->hasAVX512())
4309 return false;
4310
4311 // We need VLX for 128/256-bit.
4312 if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
4313 return false;
4314
4315 SDValue N0 = N->getOperand(0);
4316 SDValue N1 = N->getOperand(1);
4317
4318 // Canonicalize AND to LHS.
4319 if (N1.getOpcode() == ISD::AND)
4320 std::swap(N0, N1);
4321
4322 if (N0.getOpcode() != ISD::AND ||
4323 N1.getOpcode() != X86ISD::ANDNP ||
4324 !N0.hasOneUse() || !N1.hasOneUse())
4325 return false;
4326
4327 // ANDN is not commutable, use it to pick down A and C.
4328 SDValue A = N1.getOperand(0);
4329 SDValue C = N1.getOperand(1);
4330
4331 // AND is commutable, if one operand matches A, the other operand is B.
4332 // Otherwise this isn't a match.
4333 SDValue B;
4334 if (N0.getOperand(0) == A)
4335 B = N0.getOperand(1);
4336 else if (N0.getOperand(1) == A)
4337 B = N0.getOperand(0);
4338 else
4339 return false;
4340
4341 SDLoc dl(N);
4342 SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
4343 SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
4344 ReplaceNode(N, Ternlog.getNode());
4345 SelectCode(Ternlog.getNode());
4346 return true;
4347}
4348
4349void X86DAGToDAGISel::Select(SDNode *Node) {
4350 MVT NVT = Node->getSimpleValueType(0);
4351 unsigned Opcode = Node->getOpcode();
4352 SDLoc dl(Node);
4353
4354 if (Node->isMachineOpcode()) {
4355 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "== "; Node->dump(CurDAG);
dbgs() << '\n'; } } while (false)
;
4356 Node->setNodeId(-1);
4357 return; // Already selected.
4358 }
4359
4360 switch (Opcode) {
4361 default: break;
4362 case ISD::INTRINSIC_VOID: {
4363 unsigned IntNo = Node->getConstantOperandVal(1);
4364 switch (IntNo) {
4365 default: break;
4366 case Intrinsic::x86_sse3_monitor:
4367 case Intrinsic::x86_monitorx:
4368 case Intrinsic::x86_clzero: {
4369 bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
4370
4371 unsigned Opc = 0;
4372 switch (IntNo) {
4373 default: llvm_unreachable("Unexpected intrinsic!")::llvm::llvm_unreachable_internal("Unexpected intrinsic!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4373)
;
4374 case Intrinsic::x86_sse3_monitor:
4375 if (!Subtarget->hasSSE3())
4376 break;
4377 Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
4378 break;
4379 case Intrinsic::x86_monitorx:
4380 if (!Subtarget->hasMWAITX())
4381 break;
4382 Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
4383 break;
4384 case Intrinsic::x86_clzero:
4385 if (!Subtarget->hasCLZERO())
4386 break;
4387 Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
4388 break;
4389 }
4390
4391 if (Opc) {
4392 unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
4393 SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
4394 Node->getOperand(2), SDValue());
4395 SDValue InFlag = Chain.getValue(1);
4396
4397 if (IntNo == Intrinsic::x86_sse3_monitor ||
4398 IntNo == Intrinsic::x86_monitorx) {
4399 // Copy the other two operands to ECX and EDX.
4400 Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
4401 InFlag);
4402 InFlag = Chain.getValue(1);
4403 Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
4404 InFlag);
4405 InFlag = Chain.getValue(1);
4406 }
4407
4408 MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4409 { Chain, InFlag});
4410 ReplaceNode(Node, CNode);
4411 return;
4412 }
4413 }
4414 }
4415
4416 break;
4417 }
4418 case ISD::BRIND: {
4419 if (Subtarget->isTargetNaCl())
4420 // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
4421 // leave the instruction alone.
4422 break;
4423 if (Subtarget->isTarget64BitILP32()) {
4424 // Converts a 32-bit register to a 64-bit, zero-extended version of
4425 // it. This is needed because x86-64 can do many things, but jmp %r32
4426 // ain't one of them.
4427 const SDValue &Target = Node->getOperand(1);
4428 assert(Target.getSimpleValueType() == llvm::MVT::i32)((Target.getSimpleValueType() == llvm::MVT::i32) ? static_cast
<void> (0) : __assert_fail ("Target.getSimpleValueType() == llvm::MVT::i32"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4428, __PRETTY_FUNCTION__))
;
4429 SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
4430 SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
4431 Node->getOperand(0), ZextTarget);
4432 ReplaceNode(Node, Brind.getNode());
4433 SelectCode(ZextTarget.getNode());
4434 SelectCode(Brind.getNode());
4435 return;
4436 }
4437 break;
4438 }
4439 case X86ISD::GlobalBaseReg:
4440 ReplaceNode(Node, getGlobalBaseReg());
4441 return;
4442
4443 case ISD::BITCAST:
4444 // Just drop all 128/256/512-bit bitcasts.
4445 if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
4446 NVT == MVT::f128) {
4447 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
4448 CurDAG->RemoveDeadNode(Node);
4449 return;
4450 }
4451 break;
4452
4453 case ISD::VSELECT: {
4454 // Replace VSELECT with non-mask conditions with with BLENDV.
4455 if (Node->getOperand(0).getValueType().getVectorElementType() == MVT::i1)
4456 break;
4457
4458 assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!")((Subtarget->hasSSE41() && "Expected SSE4.1 support!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasSSE41() && \"Expected SSE4.1 support!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4458, __PRETTY_FUNCTION__))
;
4459 SDValue Blendv = CurDAG->getNode(
4460 X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
4461 Node->getOperand(1), Node->getOperand(2));
4462 ReplaceNode(Node, Blendv.getNode());
4463 SelectCode(Blendv.getNode());
4464 // We already called ReplaceUses.
4465 return;
4466 }
4467
4468 case ISD::SRL:
4469 if (matchBitExtract(Node))
4470 return;
4471 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4472 case ISD::SRA:
4473 case ISD::SHL:
4474 if (tryShiftAmountMod(Node))
4475 return;
4476 break;
4477
4478 case ISD::AND:
4479 if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
4480 // Try to form a masked VPTESTM. Operands can be in either order.
4481 SDValue N0 = Node->getOperand(0);
4482 SDValue N1 = Node->getOperand(1);
4483 if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
4484 tryVPTESTM(Node, N0, N1))
4485 return;
4486 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
4487 tryVPTESTM(Node, N1, N0))
4488 return;
4489 }
4490
4491 if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
4492 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4493 CurDAG->RemoveDeadNode(Node);
4494 return;
4495 }
4496 if (matchBitExtract(Node))
4497 return;
4498 if (AndImmShrink && shrinkAndImmediate(Node))
4499 return;
4500
4501 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4502 case ISD::OR:
4503 case ISD::XOR:
4504 if (tryShrinkShlLogicImm(Node))
4505 return;
4506
4507 if (Opcode == ISD::OR && tryMatchBitSelect(Node))
4508 return;
4509
4510 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4511 case ISD::ADD:
4512 case ISD::SUB: {
4513 if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() &&
4514 combineIncDecVector(Node))
4515 return;
4516
4517 // Try to avoid folding immediates with multiple uses for optsize.
4518 // This code tries to select to register form directly to avoid going
4519 // through the isel table which might fold the immediate. We can't change
4520 // the patterns on the add/sub/and/or/xor with immediate paterns in the
4521 // tablegen files to check immediate use count without making the patterns
4522 // unavailable to the fast-isel table.
4523 if (!OptForSize)
4524 break;
4525
4526 // Only handle i8/i16/i32/i64.
4527 if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
4528 break;
4529
4530 SDValue N0 = Node->getOperand(0);
4531 SDValue N1 = Node->getOperand(1);
4532
4533 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
4534 if (!Cst)
4535 break;
4536
4537 int64_t Val = Cst->getSExtValue();
4538
4539 // Make sure its an immediate that is considered foldable.
4540 // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
4541 if (!isInt<8>(Val) && !isInt<32>(Val))
4542 break;
4543
4544 // If this can match to INC/DEC, let it go.
4545 if (Opcode == ISD::ADD && (Val == 1 || Val == -1))
4546 break;
4547
4548 // Check if we should avoid folding this immediate.
4549 if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
4550 break;
4551
4552 // We should not fold the immediate. So we need a register form instead.
4553 unsigned ROpc, MOpc;
4554 switch (NVT.SimpleTy) {
4555 default: llvm_unreachable("Unexpected VT!")::llvm::llvm_unreachable_internal("Unexpected VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4555)
;
4556 case MVT::i8:
4557 switch (Opcode) {
4558 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4558)
;
4559 case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
4560 case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
4561 case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
4562 case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
4563 case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
4564 }
4565 break;
4566 case MVT::i16:
4567 switch (Opcode) {
4568 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4568)
;
4569 case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
4570 case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
4571 case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
4572 case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
4573 case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
4574 }
4575 break;
4576 case MVT::i32:
4577 switch (Opcode) {
4578 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4578)
;
4579 case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
4580 case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
4581 case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
4582 case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
4583 case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
4584 }
4585 break;
4586 case MVT::i64:
4587 switch (Opcode) {
4588 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4588)
;
4589 case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
4590 case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
4591 case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
4592 case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
4593 case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
4594 }
4595 break;
4596 }
4597
4598 // Ok this is a AND/OR/XOR/ADD/SUB with constant.
4599
4600 // If this is a not a subtract, we can still try to fold a load.
4601 if (Opcode != ISD::SUB) {
4602 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4603 if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4604 SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4605 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4606 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4607 // Update the chain.
4608 ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
4609 // Record the mem-refs
4610 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
4611 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4612 CurDAG->RemoveDeadNode(Node);
4613 return;
4614 }
4615 }
4616
4617 CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
4618 return;
4619 }
4620
4621 case X86ISD::SMUL:
4622 // i16/i32/i64 are handled with isel patterns.
4623 if (NVT != MVT::i8)
4624 break;
4625 LLVM_FALLTHROUGH[[gnu::fallthrough]];
4626 case X86ISD::UMUL: {
4627 SDValue N0 = Node->getOperand(0);
4628 SDValue N1 = Node->getOperand(1);
4629
4630 unsigned LoReg, ROpc, MOpc;
4631 switch (NVT.SimpleTy) {
4632 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4632)
;
4633 case MVT::i8:
4634 LoReg = X86::AL;
4635 ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
4636 MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
4637 break;
4638 case MVT::i16:
4639 LoReg = X86::AX;
4640 ROpc = X86::MUL16r;
4641 MOpc = X86::MUL16m;
4642 break;
4643 case MVT::i32:
4644 LoReg = X86::EAX;
4645 ROpc = X86::MUL32r;
4646 MOpc = X86::MUL32m;
4647 break;
4648 case MVT::i64:
4649 LoReg = X86::RAX;
4650 ROpc = X86::MUL64r;
4651 MOpc = X86::MUL64m;
4652 break;
4653 }
4654
4655 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4656 bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4657 // Multiply is commmutative.
4658 if (!FoldedLoad) {
4659 FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4660 if (FoldedLoad)
4661 std::swap(N0, N1);
4662 }
4663
4664 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
4665 N0, SDValue()).getValue(1);
4666
4667 MachineSDNode *CNode;
4668 if (FoldedLoad) {
4669 // i16/i32/i64 use an instruction that produces a low and high result even
4670 // though only the low result is used.
4671 SDVTList VTs;
4672 if (NVT == MVT::i8)
4673 VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4674 else
4675 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
4676
4677 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4678 InFlag };
4679 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4680
4681 // Update the chain.
4682 ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
4683 // Record the mem-refs
4684 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4685 } else {
4686 // i16/i32/i64 use an instruction that produces a low and high result even
4687 // though only the low result is used.
4688 SDVTList VTs;
4689 if (NVT == MVT::i8)
4690 VTs = CurDAG->getVTList(NVT, MVT::i32);
4691 else
4692 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
4693
4694 CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
4695 }
4696
4697 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4698 ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
4699 CurDAG->RemoveDeadNode(Node);
4700 return;
4701 }
4702
4703 case ISD::SMUL_LOHI:
4704 case ISD::UMUL_LOHI: {
4705 SDValue N0 = Node->getOperand(0);
4706 SDValue N1 = Node->getOperand(1);
4707
4708 unsigned Opc, MOpc;
4709 bool isSigned = Opcode == ISD::SMUL_LOHI;
4710 if (!isSigned) {
4711 switch (NVT.SimpleTy) {
4712 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4712)
;
4713 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
4714 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
4715 }
4716 } else {
4717 switch (NVT.SimpleTy) {
4718 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4718)
;
4719 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
4720 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
4721 }
4722 }
4723
4724 unsigned SrcReg, LoReg, HiReg;
4725 switch (Opc) {
4726 default: llvm_unreachable("Unknown MUL opcode!")::llvm::llvm_unreachable_internal("Unknown MUL opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4726)
;
4727 case X86::IMUL32r:
4728 case X86::MUL32r:
4729 SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
4730 break;
4731 case X86::IMUL64r:
4732 case X86::MUL64r:
4733 SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
4734 break;
4735 }
4736
4737 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4738 bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4739 // Multiply is commmutative.
4740 if (!foldedLoad) {
4741 foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4742 if (foldedLoad)
4743 std::swap(N0, N1);
4744 }
4745
4746 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
4747 N0, SDValue()).getValue(1);
4748 if (foldedLoad) {
4749 SDValue Chain;
4750 MachineSDNode *CNode = nullptr;
4751 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4752 InFlag };
4753 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
4754 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4755 Chain = SDValue(CNode, 0);
4756 InFlag = SDValue(CNode, 1);
4757
4758 // Update the chain.
4759 ReplaceUses(N1.getValue(1), Chain);
4760 // Record the mem-refs
4761 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4762 } else {
4763 SDValue Ops[] = { N1, InFlag };
4764 SDVTList VTs = CurDAG->getVTList(MVT::Glue);
4765 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4766 InFlag = SDValue(CNode, 0);
4767 }
4768
4769 // Copy the low half of the result, if it is needed.
4770 if (!SDValue(Node, 0).use_empty()) {
4771 assert(LoReg && "Register for low half is not defined!")((LoReg && "Register for low half is not defined!") ?
static_cast<void> (0) : __assert_fail ("LoReg && \"Register for low half is not defined!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4771, __PRETTY_FUNCTION__))
;
4772 SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
4773 NVT, InFlag);
4774 InFlag = ResLo.getValue(2);
4775 ReplaceUses(SDValue(Node, 0), ResLo);
4776 LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; ResLo.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
4777 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; ResLo.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
;
4778 }
4779 // Copy the high half of the result, if it is needed.
4780 if (!SDValue(Node, 1).use_empty()) {
4781 assert(HiReg && "Register for high half is not defined!")((HiReg && "Register for high half is not defined!") ?
static_cast<void> (0) : __assert_fail ("HiReg && \"Register for high half is not defined!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4781, __PRETTY_FUNCTION__))
;
4782 SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
4783 NVT, InFlag);
4784 InFlag = ResHi.getValue(2);
4785 ReplaceUses(SDValue(Node, 1), ResHi);
4786 LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; ResHi.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
4787 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; ResHi.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
;
4788 }
4789
4790 CurDAG->RemoveDeadNode(Node);
4791 return;
4792 }
4793
4794 case ISD::SDIVREM:
4795 case ISD::UDIVREM: {
4796 SDValue N0 = Node->getOperand(0);
4797 SDValue N1 = Node->getOperand(1);
4798
4799 unsigned Opc, MOpc;
4800 bool isSigned = Opcode == ISD::SDIVREM;
4801 if (!isSigned) {
4802 switch (NVT.SimpleTy) {
4803 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4803)
;
4804 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
4805 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
4806 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
4807 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
4808 }
4809 } else {
4810 switch (NVT.SimpleTy) {
4811 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4811)
;
4812 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
4813 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
4814 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
4815 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
4816 }
4817 }
4818
4819 unsigned LoReg, HiReg, ClrReg;
4820 unsigned SExtOpcode;
4821 switch (NVT.SimpleTy) {
4822 default: llvm_unreachable("Unsupported VT!")::llvm::llvm_unreachable_internal("Unsupported VT!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4822)
;
4823 case MVT::i8:
4824 LoReg = X86::AL; ClrReg = HiReg = X86::AH;
4825 SExtOpcode = 0; // Not used.
4826 break;
4827 case MVT::i16:
4828 LoReg = X86::AX; HiReg = X86::DX;
4829 ClrReg = X86::DX;
4830 SExtOpcode = X86::CWD;
4831 break;
4832 case MVT::i32:
4833 LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
4834 SExtOpcode = X86::CDQ;
4835 break;
4836 case MVT::i64:
4837 LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
4838 SExtOpcode = X86::CQO;
4839 break;
4840 }
4841
4842 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4843 bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4844 bool signBitIsZero = CurDAG->SignBitIsZero(N0);
4845
4846 SDValue InFlag;
4847 if (NVT == MVT::i8) {
4848 // Special case for div8, just use a move with zero extension to AX to
4849 // clear the upper 8 bits (AH).
4850 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
4851 MachineSDNode *Move;
4852 if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4853 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4854 unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8
4855 : X86::MOVZX16rm8;
4856 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
4857 Chain = SDValue(Move, 1);
4858 ReplaceUses(N0.getValue(1), Chain);
4859 // Record the mem-refs
4860 CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
4861 } else {
4862 unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8
4863 : X86::MOVZX16rr8;
4864 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
4865 Chain = CurDAG->getEntryNode();
4866 }
4867 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
4868 SDValue());
4869 InFlag = Chain.getValue(1);
4870 } else {
4871 InFlag =
4872 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
4873 LoReg, N0, SDValue()).getValue(1);
4874 if (isSigned && !signBitIsZero) {
4875 // Sign extend the low part into the high part.
4876 InFlag =
4877 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
4878 } else {
4879 // Zero out the high part, effectively zero extending the input.
4880 SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
4881 switch (NVT.SimpleTy) {
4882 case MVT::i16:
4883 ClrNode =
4884 SDValue(CurDAG->getMachineNode(
4885 TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
4886 CurDAG->getTargetConstant(X86::sub_16bit, dl,
4887 MVT::i32)),
4888 0);
4889 break;
4890 case MVT::i32:
4891 break;
4892 case MVT::i64:
4893 ClrNode =
4894 SDValue(CurDAG->getMachineNode(
4895 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
4896 CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
4897 CurDAG->getTargetConstant(X86::sub_32bit, dl,
4898 MVT::i32)),
4899 0);
4900 break;
4901 default:
4902 llvm_unreachable("Unexpected division source")::llvm::llvm_unreachable_internal("Unexpected division source"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 4902)
;
4903 }
4904
4905 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
4906 ClrNode, InFlag).getValue(1);
4907 }
4908 }
4909
4910 if (foldedLoad) {
4911 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4912 InFlag };
4913 MachineSDNode *CNode =
4914 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
4915 InFlag = SDValue(CNode, 1);
4916 // Update the chain.
4917 ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
4918 // Record the mem-refs
4919 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4920 } else {
4921 InFlag =
4922 SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
4923 }
4924
4925 // Prevent use of AH in a REX instruction by explicitly copying it to
4926 // an ABCD_L register.
4927 //
4928 // The current assumption of the register allocator is that isel
4929 // won't generate explicit references to the GR8_ABCD_H registers. If
4930 // the allocator and/or the backend get enhanced to be more robust in
4931 // that regard, this can be, and should be, removed.
4932 if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
4933 SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
4934 unsigned AHExtOpcode =
4935 isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
4936
4937 SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
4938 MVT::Glue, AHCopy, InFlag);
4939 SDValue Result(RNode, 0);
4940 InFlag = SDValue(RNode, 1);
4941
4942 Result =
4943 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
4944
4945 ReplaceUses(SDValue(Node, 1), Result);
4946 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
4947 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
;
4948 }
4949 // Copy the division (low) result, if it is needed.
4950 if (!SDValue(Node, 0).use_empty()) {
4951 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4952 LoReg, NVT, InFlag);
4953 InFlag = Result.getValue(2);
4954 ReplaceUses(SDValue(Node, 0), Result);
4955 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
4956 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
;
4957 }
4958 // Copy the remainder (high) result, if it is needed.
4959 if (!SDValue(Node, 1).use_empty()) {
4960 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4961 HiReg, NVT, InFlag);
4962 InFlag = Result.getValue(2);
4963 ReplaceUses(SDValue(Node, 1), Result);
4964 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
4965 dbgs() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-isel")) { dbgs() << "=> "; Result.getNode()->
dump(CurDAG); dbgs() << '\n'; } } while (false)
;
4966 }
4967 CurDAG->RemoveDeadNode(Node);
4968 return;
4969 }
4970
4971 case X86ISD::CMP: {
4972 SDValue N0 = Node->getOperand(0);
4973 SDValue N1 = Node->getOperand(1);
4974
4975 // Optimizations for TEST compares.
4976 if (!isNullConstant(N1))
4977 break;
4978
4979 // Save the original VT of the compare.
4980 MVT CmpVT = N0.getSimpleValueType();
4981
4982 // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
4983 // by a test instruction. The test should be removed later by
4984 // analyzeCompare if we are using only the zero flag.
4985 // TODO: Should we check the users and use the BEXTR flags directly?
4986 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4987 if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
4988 unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
4989 : X86::TEST32rr;
4990 SDValue BEXTR = SDValue(NewNode, 0);
4991 NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
4992 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4993 CurDAG->RemoveDeadNode(Node);
4994 return;
4995 }
4996 }
4997
4998 // We can peek through truncates, but we need to be careful below.
4999 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
5000 N0 = N0.getOperand(0);
5001
5002 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
5003 // use a smaller encoding.
5004 // Look past the truncate if CMP is the only use of it.
5005 if (N0.getOpcode() == ISD::AND &&
5006 N0.getNode()->hasOneUse() &&
5007 N0.getValueType() != MVT::i8) {
5008 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5009 if (!C) break;
5010 uint64_t Mask = C->getZExtValue();
5011
5012 // Check if we can replace AND+IMM64 with a shift. This is possible for
5013 // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
5014 // flag.
5015 if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
5016 onlyUsesZeroFlag(SDValue(Node, 0))) {
5017 if (isMask_64(~Mask)) {
5018 unsigned TrailingZeros = countTrailingZeros(Mask);
5019 SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
5020 SDValue Shift =
5021 SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
5022 N0.getOperand(0), Imm), 0);
5023 MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
5024 MVT::i32, Shift, Shift);
5025 ReplaceNode(Node, Test);
5026 return;
5027 }
5028 if (isMask_64(Mask)) {
5029 unsigned LeadingZeros = countLeadingZeros(Mask);
5030 SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
5031 SDValue Shift =
5032 SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
5033 N0.getOperand(0), Imm), 0);
5034 MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
5035 MVT::i32, Shift, Shift);
5036 ReplaceNode(Node, Test);
5037 return;
5038 }
5039 }
5040
5041 MVT VT;
5042 int SubRegOp;
5043 unsigned ROpc, MOpc;
5044
5045 // For each of these checks we need to be careful if the sign flag is
5046 // being used. It is only safe to use the sign flag in two conditions,
5047 // either the sign bit in the shrunken mask is zero or the final test
5048 // size is equal to the original compare size.
5049
5050 if (isUInt<8>(Mask) &&
5051 (!(Mask & 0x80) || CmpVT == MVT::i8 ||
5052 hasNoSignFlagUses(SDValue(Node, 0)))) {
5053 // For example, convert "testl %eax, $8" to "testb %al, $8"
5054 VT = MVT::i8;
5055 SubRegOp = X86::sub_8bit;
5056 ROpc = X86::TEST8ri;
5057 MOpc = X86::TEST8mi;
5058 } else if (OptForMinSize && isUInt<16>(Mask) &&
5059 (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
5060 hasNoSignFlagUses(SDValue(Node, 0)))) {
5061 // For example, "testl %eax, $32776" to "testw %ax, $32776".
5062 // NOTE: We only want to form TESTW instructions if optimizing for
5063 // min size. Otherwise we only save one byte and possibly get a length
5064 // changing prefix penalty in the decoders.
5065 VT = MVT::i16;
5066 SubRegOp = X86::sub_16bit;
5067 ROpc = X86::TEST16ri;
5068 MOpc = X86::TEST16mi;
5069 } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
5070 ((!(Mask & 0x80000000) &&
5071 // Without minsize 16-bit Cmps can get here so we need to
5072 // be sure we calculate the correct sign flag if needed.
5073 (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
5074 CmpVT == MVT::i32 ||
5075 hasNoSignFlagUses(SDValue(Node, 0)))) {
5076 // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
5077 // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
5078 // Otherwize, we find ourselves in a position where we have to do
5079 // promotion. If previous passes did not promote the and, we assume
5080 // they had a good reason not to and do not promote here.
5081 VT = MVT::i32;
5082 SubRegOp = X86::sub_32bit;
5083 ROpc = X86::TEST32ri;
5084 MOpc = X86::TEST32mi;
5085 } else {
5086 // No eligible transformation was found.
5087 break;
5088 }
5089
5090 SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
5091 SDValue Reg = N0.getOperand(0);
5092
5093 // Emit a testl or testw.
5094 MachineSDNode *NewNode;
5095 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5096 if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
5097 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
5098 Reg.getOperand(0) };
5099 NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
5100 // Update the chain.
5101 ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
5102 // Record the mem-refs
5103 CurDAG->setNodeMemRefs(NewNode,
5104 {cast<LoadSDNode>(Reg)->getMemOperand()});
5105 } else {
5106 // Extract the subregister if necessary.
5107 if (N0.getValueType() != VT)
5108 Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
5109
5110 NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
5111 }
5112 // Replace CMP with TEST.
5113 ReplaceNode(Node, NewNode);
5114 return;
5115 }
5116 break;
5117 }
5118 case X86ISD::PCMPISTR: {
5119 if (!Subtarget->hasSSE42())
5120 break;
5121
5122 bool NeedIndex = !SDValue(Node, 0).use_empty();
5123 bool NeedMask = !SDValue(Node, 1).use_empty();
5124 // We can't fold a load if we are going to make two instructions.
5125 bool MayFoldLoad = !NeedIndex || !NeedMask;
5126
5127 MachineSDNode *CNode;
5128 if (NeedMask) {
5129 unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
5130 unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
5131 CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
5132 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
5133 }
5134 if (NeedIndex || !NeedMask) {
5135 unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
5136 unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
5137 CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
5138 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5139 }
5140
5141 // Connect the flag usage to the last instruction created.
5142 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
5143 CurDAG->RemoveDeadNode(Node);
5144 return;
5145 }
5146 case X86ISD::PCMPESTR: {
5147 if (!Subtarget->hasSSE42())
5148 break;
5149
5150 // Copy the two implicit register inputs.
5151 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
5152 Node->getOperand(1),
5153 SDValue()).getValue(1);
5154 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
5155 Node->getOperand(3), InFlag).getValue(1);
5156
5157 bool NeedIndex = !SDValue(Node, 0).use_empty();
5158 bool NeedMask = !SDValue(Node, 1).use_empty();
5159 // We can't fold a load if we are going to make two instructions.
5160 bool MayFoldLoad = !NeedIndex || !NeedMask;
5161
5162 MachineSDNode *CNode;
5163 if (NeedMask) {
5164 unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
5165 unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
5166 CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
5167 InFlag);
5168 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
5169 }
5170 if (NeedIndex || !NeedMask) {
5171 unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
5172 unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
5173 CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
5174 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5175 }
5176 // Connect the flag usage to the last instruction created.
5177 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
5178 CurDAG->RemoveDeadNode(Node);
5179 return;
5180 }
5181
5182 case ISD::SETCC: {
5183 if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
5184 return;
5185
5186 break;
5187 }
5188
5189 case ISD::STORE:
5190 if (foldLoadStoreIntoMemOperand(Node))
5191 return;
5192 break;
5193 case ISD::FCEIL:
5194 case ISD::FFLOOR:
5195 case ISD::FTRUNC:
5196 case ISD::FNEARBYINT:
5197 case ISD::FRINT: {
5198 // Replace fp rounding with their X86 specific equivalent so we don't
5199 // need 2 sets of patterns.
5200 // FIXME: This can only happen when the nodes started as STRICT_* and have
5201 // been mutated into their non-STRICT equivalents. Eventually this
5202 // mutation will be removed and we should switch the STRICT_ nodes to a
5203 // strict version of RNDSCALE in PreProcessISelDAG.
5204 unsigned Imm;
5205 switch (Node->getOpcode()) {
5206 default: llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 5206)
;
5207 case ISD::FCEIL: Imm = 0xA; break;
5208 case ISD::FFLOOR: Imm = 0x9; break;
5209 case ISD::FTRUNC: Imm = 0xB; break;
5210 case ISD::FNEARBYINT: Imm = 0xC; break;
5211 case ISD::FRINT: Imm = 0x4; break;
5212 }
5213 SDLoc dl(Node);
5214 SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, Node->getValueType(0),
5215 Node->getOperand(0),
5216 CurDAG->getTargetConstant(Imm, dl, MVT::i8));
5217 ReplaceNode(Node, Res.getNode());
5218 SelectCode(Res.getNode());
5219 return;
5220 }
5221 }
5222
5223 SelectCode(Node);
5224}
5225
5226bool X86DAGToDAGISel::
5227SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
5228 std::vector<SDValue> &OutOps) {
5229 SDValue Op0, Op1, Op2, Op3, Op4;
5230 switch (ConstraintID) {
5231 default:
5232 llvm_unreachable("Unexpected asm memory constraint")::llvm::llvm_unreachable_internal("Unexpected asm memory constraint"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/X86/X86ISelDAGToDAG.cpp"
, 5232)
;
5233 case InlineAsm::Constraint_i:
5234 // FIXME: It seems strange that 'i' is needed here since it's supposed to
5235 // be an immediate and not a memory constraint.
5236 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5237 case InlineAsm::Constraint_o: // offsetable ??
5238 case InlineAsm::Constraint_v: // not offsetable ??
5239 case InlineAsm::Constraint_m: // memory
5240 case InlineAsm::Constraint_X:
5241 if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
5242 return true;
5243 break;
5244 }
5245
5246 OutOps.push_back(Op0);
5247 OutOps.push_back(Op1);
5248 OutOps.push_back(Op2);
5249 OutOps.push_back(Op3);
5250 OutOps.push_back(Op4);
5251 return false;
5252}
5253
5254/// This pass converts a legalized DAG into a X86-specific DAG,
5255/// ready for instruction scheduling.
5256FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
5257 CodeGenOpt::Level OptLevel) {
5258 return new X86DAGToDAGISel(TM, OptLevel);
5259}

/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h

1//===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the isa<X>(), cast<X>(), dyn_cast<X>(), cast_or_null<X>(),
10// and dyn_cast_or_null<X>() templates.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_CASTING_H
15#define LLVM_SUPPORT_CASTING_H
16
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/type_traits.h"
19#include <cassert>
20#include <memory>
21#include <type_traits>
22
23namespace llvm {
24
25//===----------------------------------------------------------------------===//
26// isa<x> Support Templates
27//===----------------------------------------------------------------------===//
28
29// Define a template that can be specialized by smart pointers to reflect the
30// fact that they are automatically dereferenced, and are not involved with the
31// template selection process... the default implementation is a noop.
32//
33template<typename From> struct simplify_type {
34 using SimpleType = From; // The real type this represents...
35
36 // An accessor to get the real value...
37 static SimpleType &getSimplifiedValue(From &Val) { return Val; }
38};
39
40template<typename From> struct simplify_type<const From> {
41 using NonConstSimpleType = typename simplify_type<From>::SimpleType;
42 using SimpleType =
43 typename add_const_past_pointer<NonConstSimpleType>::type;
44 using RetType =
45 typename add_lvalue_reference_if_not_pointer<SimpleType>::type;
46
47 static RetType getSimplifiedValue(const From& Val) {
48 return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val));
49 }
50};
51
52// The core of the implementation of isa<X> is here; To and From should be
53// the names of classes. This template can be specialized to customize the
54// implementation of isa<> without rewriting it from scratch.
55template <typename To, typename From, typename Enabler = void>
56struct isa_impl {
57 static inline bool doit(const From &Val) {
58 return To::classof(&Val);
59 }
60};
61
62/// Always allow upcasts, and perform no dynamic check for them.
63template <typename To, typename From>
64struct isa_impl<
65 To, From, typename std::enable_if<std::is_base_of<To, From>::value>::type> {
66 static inline bool doit(const From &) { return true; }
67};
68
69template <typename To, typename From> struct isa_impl_cl {
70 static inline bool doit(const From &Val) {
71 return isa_impl<To, From>::doit(Val);
72 }
73};
74
75template <typename To, typename From> struct isa_impl_cl<To, const From> {
76 static inline bool doit(const From &Val) {
77 return isa_impl<To, From>::doit(Val);
78 }
79};
80
81template <typename To, typename From>
82struct isa_impl_cl<To, const std::unique_ptr<From>> {
83 static inline bool doit(const std::unique_ptr<From> &Val) {
84 assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 84, __PRETTY_FUNCTION__))
;
85 return isa_impl_cl<To, From>::doit(*Val);
86 }
87};
88
89template <typename To, typename From> struct isa_impl_cl<To, From*> {
90 static inline bool doit(const From *Val) {
91 assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 91, __PRETTY_FUNCTION__))
;
92 return isa_impl<To, From>::doit(*Val);
93 }
94};
95
96template <typename To, typename From> struct isa_impl_cl<To, From*const> {
97 static inline bool doit(const From *Val) {
98 assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 98, __PRETTY_FUNCTION__))
;
99 return isa_impl<To, From>::doit(*Val);
100 }
101};
102
103template <typename To, typename From> struct isa_impl_cl<To, const From*> {
104 static inline bool doit(const From *Val) {
105 assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 105, __PRETTY_FUNCTION__))
;
106 return isa_impl<To, From>::doit(*Val);
107 }
108};
109
110template <typename To, typename From> struct isa_impl_cl<To, const From*const> {
111 static inline bool doit(const From *Val) {
112 assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 112, __PRETTY_FUNCTION__))
;
113 return isa_impl<To, From>::doit(*Val);
114 }
115};
116
117template<typename To, typename From, typename SimpleFrom>
118struct isa_impl_wrap {
119 // When From != SimplifiedType, we can simplify the type some more by using
120 // the simplify_type template.
121 static bool doit(const From &Val) {
122 return isa_impl_wrap<To, SimpleFrom,
123 typename simplify_type<SimpleFrom>::SimpleType>::doit(
124 simplify_type<const From>::getSimplifiedValue(Val));
125 }
126};
127
128template<typename To, typename FromTy>
129struct isa_impl_wrap<To, FromTy, FromTy> {
130 // When From == SimpleType, we are as simple as we are going to get.
131 static bool doit(const FromTy &Val) {
132 return isa_impl_cl<To,FromTy>::doit(Val);
133 }
134};
135
136// isa<X> - Return true if the parameter to the template is an instance of the
137// template type argument. Used like this:
138//
139// if (isa<Type>(myVal)) { ... }
140//
141template <class X, class Y> LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) {
142 return isa_impl_wrap<X, const Y,
143 typename simplify_type<const Y>::SimpleType>::doit(Val);
144}
145
146// isa_and_nonnull<X> - Functionally identical to isa, except that a null value
147// is accepted.
148//
149template <class X, class Y>
150LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa_and_nonnull(const Y &Val) {
151 if (!Val)
152 return false;
153 return isa<X>(Val);
154}
155
156//===----------------------------------------------------------------------===//
157// cast<x> Support Templates
158//===----------------------------------------------------------------------===//
159
160template<class To, class From> struct cast_retty;
161
162// Calculate what type the 'cast' function should return, based on a requested
163// type of To and a source type of From.
164template<class To, class From> struct cast_retty_impl {
165 using ret_type = To &; // Normal case, return Ty&
166};
167template<class To, class From> struct cast_retty_impl<To, const From> {
168 using ret_type = const To &; // Normal case, return Ty&
169};
170
171template<class To, class From> struct cast_retty_impl<To, From*> {
172 using ret_type = To *; // Pointer arg case, return Ty*
173};
174
175template<class To, class From> struct cast_retty_impl<To, const From*> {
176 using ret_type = const To *; // Constant pointer arg case, return const Ty*
177};
178
179template<class To, class From> struct cast_retty_impl<To, const From*const> {
180 using ret_type = const To *; // Constant pointer arg case, return const Ty*
181};
182
183template <class To, class From>
184struct cast_retty_impl<To, std::unique_ptr<From>> {
185private:
186 using PointerType = typename cast_retty_impl<To, From *>::ret_type;
187 using ResultType = typename std::remove_pointer<PointerType>::type;
188
189public:
190 using ret_type = std::unique_ptr<ResultType>;
191};
192
193template<class To, class From, class SimpleFrom>
194struct cast_retty_wrap {
195 // When the simplified type and the from type are not the same, use the type
196 // simplifier to reduce the type, then reuse cast_retty_impl to get the
197 // resultant type.
198 using ret_type = typename cast_retty<To, SimpleFrom>::ret_type;
199};
200
201template<class To, class FromTy>
202struct cast_retty_wrap<To, FromTy, FromTy> {
203 // When the simplified type is equal to the from type, use it directly.
204 using ret_type = typename cast_retty_impl<To,FromTy>::ret_type;
205};
206
207template<class To, class From>
208struct cast_retty {
209 using ret_type = typename cast_retty_wrap<
210 To, From, typename simplify_type<From>::SimpleType>::ret_type;
211};
212
213// Ensure the non-simple values are converted using the simplify_type template
214// that may be specialized by smart pointers...
215//
216template<class To, class From, class SimpleFrom> struct cast_convert_val {
217 // This is not a simple type, use the template to simplify it...
218 static typename cast_retty<To, From>::ret_type doit(From &Val) {
219 return cast_convert_val<To, SimpleFrom,
11
Returning without writing to 'Val.Node'
220 typename simplify_type<SimpleFrom>::SimpleType>::doit(
221 simplify_type<From>::getSimplifiedValue(Val));
8
Calling 'simplify_type::getSimplifiedValue'
10
Returning from 'simplify_type::getSimplifiedValue'
222 }
223};
224
225template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> {
226 // This _is_ a simple type, just cast it.
227 static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) {
228 typename cast_retty<To, FromTy>::ret_type Res2
229 = (typename cast_retty<To, FromTy>::ret_type)const_cast<FromTy&>(Val);
230 return Res2;
231 }
232};
233
234template <class X> struct is_simple_type {
235 static const bool value =
236 std::is_same<X, typename simplify_type<X>::SimpleType>::value;
237};
238
239// cast<X> - Return the argument parameter cast to the specified type. This
240// casting operator asserts that the type is correct, so it does not return null
241// on failure. It does not allow a null argument (use cast_or_null for that).
242// It is typically used like this:
243//
244// cast<Instruction>(myVal)->getParent()
245//
246template <class X, class Y>
247inline typename std::enable_if<!is_simple_type<Y>::value,
248 typename cast_retty<X, const Y>::ret_type>::type
249cast(const Y &Val) {
250 assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 250, __PRETTY_FUNCTION__))
;
251 return cast_convert_val<
252 X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val);
253}
254
255template <class X, class Y>
256inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
257 assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 257, __PRETTY_FUNCTION__))
;
5
'Val' is a 'ConstantSDNode'
6
'?' condition is true
258 return cast_convert_val<X, Y,
7
Calling 'cast_convert_val::doit'
12
Returning from 'cast_convert_val::doit'
13
Returning without writing to 'Val.Node'
259 typename simplify_type<Y>::SimpleType>::doit(Val);
260}
261
262template <class X, class Y>
263inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) {
264 assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 264, __PRETTY_FUNCTION__))
;
265 return cast_convert_val<X, Y*,
266 typename simplify_type<Y*>::SimpleType>::doit(Val);
267}
268
269template <class X, class Y>
270inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type
271cast(std::unique_ptr<Y> &&Val) {
272 assert(isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val.get()) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 272, __PRETTY_FUNCTION__))
;
273 using ret_type = typename cast_retty<X, std::unique_ptr<Y>>::ret_type;
274 return ret_type(
275 cast_convert_val<X, Y *, typename simplify_type<Y *>::SimpleType>::doit(
276 Val.release()));
277}
278
279// cast_or_null<X> - Functionally identical to cast, except that a null value is
280// accepted.
281//
282template <class X, class Y>
283LLVM_NODISCARD[[clang::warn_unused_result]] inline
284 typename std::enable_if<!is_simple_type<Y>::value,
285 typename cast_retty<X, const Y>::ret_type>::type
286 cast_or_null(const Y &Val) {
287 if (!Val)
288 return nullptr;
289 assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 289, __PRETTY_FUNCTION__))
;
290 return cast<X>(Val);
291}
292
293template <class X, class Y>
294LLVM_NODISCARD[[clang::warn_unused_result]] inline
295 typename std::enable_if<!is_simple_type<Y>::value,
296 typename cast_retty<X, Y>::ret_type>::type
297 cast_or_null(Y &Val) {
298 if (!Val)
299 return nullptr;
300 assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 300, __PRETTY_FUNCTION__))
;
301 return cast<X>(Val);
302}
303
304template <class X, class Y>
305LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type
306cast_or_null(Y *Val) {
307 if (!Val) return nullptr;
308 assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/Casting.h"
, 308, __PRETTY_FUNCTION__))
;
309 return cast<X>(Val);
310}
311
312template <class X, class Y>
313inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type
314cast_or_null(std::unique_ptr<Y> &&Val) {
315 if (!Val)
316 return nullptr;
317 return cast<X>(std::move(Val));
318}
319
320// dyn_cast<X> - Return the argument parameter cast to the specified type. This
321// casting operator returns null if the argument is of the wrong type, so it can
322// be used to test for a type as well as cast if successful. This should be
323// used in the context of an if statement like this:
324//
325// if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... }
326//
327
328template <class X, class Y>
329LLVM_NODISCARD[[clang::warn_unused_result]] inline
330 typename std::enable_if<!is_simple_type<Y>::value,
331 typename cast_retty<X, const Y>::ret_type>::type
332 dyn_cast(const Y &Val) {
333 return isa<X>(Val) ? cast<X>(Val) : nullptr;
334}
335
336template <class X, class Y>
337LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) {
338 return isa<X>(Val) ? cast<X>(Val) : nullptr;
2
Assuming 'Val' is a 'ConstantSDNode'
3
'?' condition is true
4
Calling 'cast<llvm::ConstantSDNode, llvm::SDValue>'
14
Returning from 'cast<llvm::ConstantSDNode, llvm::SDValue>'
15
Returning without writing to 'Val.Node'
339}
340
341template <class X, class Y>
342LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type dyn_cast(Y *Val) {
343 return isa<X>(Val) ? cast<X>(Val) : nullptr;
344}
345
346// dyn_cast_or_null<X> - Functionally identical to dyn_cast, except that a null
347// value is accepted.
348//
349template <class X, class Y>
350LLVM_NODISCARD[[clang::warn_unused_result]] inline
351 typename std::enable_if<!is_simple_type<Y>::value,
352 typename cast_retty<X, const Y>::ret_type>::type
353 dyn_cast_or_null(const Y &Val) {
354 return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
355}
356
357template <class X, class Y>
358LLVM_NODISCARD[[clang::warn_unused_result]] inline
359 typename std::enable_if<!is_simple_type<Y>::value,
360 typename cast_retty<X, Y>::ret_type>::type
361 dyn_cast_or_null(Y &Val) {
362 return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
363}
364
365template <class X, class Y>
366LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type
367dyn_cast_or_null(Y *Val) {
368 return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
369}
370
371// unique_dyn_cast<X> - Given a unique_ptr<Y>, try to return a unique_ptr<X>,
372// taking ownership of the input pointer iff isa<X>(Val) is true. If the
373// cast is successful, From refers to nullptr on exit and the casted value
374// is returned. If the cast is unsuccessful, the function returns nullptr
375// and From is unchanged.
376template <class X, class Y>
377LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &Val)
378 -> decltype(cast<X>(Val)) {
379 if (!isa<X>(Val))
380 return nullptr;
381 return cast<X>(std::move(Val));
382}
383
384template <class X, class Y>
385LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &&Val)
386 -> decltype(cast<X>(Val)) {
387 return unique_dyn_cast<X, Y>(Val);
388}
389
390// dyn_cast_or_null<X> - Functionally identical to unique_dyn_cast, except that
391// a null value is accepted.
392template <class X, class Y>
393LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &Val)
394 -> decltype(cast<X>(Val)) {
395 if (!Val)
396 return nullptr;
397 return unique_dyn_cast<X, Y>(Val);
398}
399
400template <class X, class Y>
401LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &&Val)
402 -> decltype(cast<X>(Val)) {
403 return unique_dyn_cast_or_null<X, Y>(Val);
404}
405
406} // end namespace llvm
407
408#endif // LLVM_SUPPORT_CASTING_H

/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/ValueTypes.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Instruction.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/Metadata.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/Support/AlignOf.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/Casting.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/MachineValueType.h"
45#include <algorithm>
46#include <cassert>
47#include <climits>
48#include <cstddef>
49#include <cstdint>
50#include <cstring>
51#include <iterator>
52#include <string>
53#include <tuple>
54
55namespace llvm {
56
57class APInt;
58class Constant;
59template <typename T> struct DenseMapInfo;
60class GlobalValue;
61class MachineBasicBlock;
62class MachineConstantPoolValue;
63class MCSymbol;
64class raw_ostream;
65class SDNode;
66class SelectionDAG;
67class Type;
68class Value;
69
70void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
71 bool force = false);
72
73/// This represents a list of ValueType's that has been intern'd by
74/// a SelectionDAG. Instances of this simple value class are returned by
75/// SelectionDAG::getVTList(...).
76///
77struct SDVTList {
78 const EVT *VTs;
79 unsigned int NumVTs;
80};
81
82namespace ISD {
83
84 /// Node predicates
85
86 /// If N is a BUILD_VECTOR node whose elements are all the same constant or
87 /// undefined, return true and return the constant value in \p SplatValue.
88 bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
89
90 /// Return true if the specified node is a BUILD_VECTOR where all of the
91 /// elements are ~0 or undef.
92 bool isBuildVectorAllOnes(const SDNode *N);
93
94 /// Return true if the specified node is a BUILD_VECTOR where all of the
95 /// elements are 0 or undef.
96 bool isBuildVectorAllZeros(const SDNode *N);
97
98 /// Return true if the specified node is a BUILD_VECTOR node of all
99 /// ConstantSDNode or undef.
100 bool isBuildVectorOfConstantSDNodes(const SDNode *N);
101
102 /// Return true if the specified node is a BUILD_VECTOR node of all
103 /// ConstantFPSDNode or undef.
104 bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
105
106 /// Return true if the node has at least one operand and all operands of the
107 /// specified node are ISD::UNDEF.
108 bool allOperandsUndef(const SDNode *N);
109
110} // end namespace ISD
111
112//===----------------------------------------------------------------------===//
113/// Unlike LLVM values, Selection DAG nodes may return multiple
114/// values as the result of a computation. Many nodes return multiple values,
115/// from loads (which define a token and a return value) to ADDC (which returns
116/// a result and a carry value), to calls (which may return an arbitrary number
117/// of values).
118///
119/// As such, each use of a SelectionDAG computation must indicate the node that
120/// computes it as well as which return value to use from that node. This pair
121/// of information is represented with the SDValue value type.
122///
123class SDValue {
124 friend struct DenseMapInfo<SDValue>;
125
126 SDNode *Node = nullptr; // The node defining the value we are using.
127 unsigned ResNo = 0; // Which return value of the node we are using.
128
129public:
130 SDValue() = default;
131 SDValue(SDNode *node, unsigned resno);
132
133 /// get the index which selects a specific result in the SDNode
134 unsigned getResNo() const { return ResNo; }
135
136 /// get the SDNode which holds the desired result
137 SDNode *getNode() const { return Node; }
138
139 /// set the SDNode
140 void setNode(SDNode *N) { Node = N; }
141
142 inline SDNode *operator->() const { return Node; }
143
144 bool operator==(const SDValue &O) const {
145 return Node == O.Node && ResNo == O.ResNo;
146 }
147 bool operator!=(const SDValue &O) const {
148 return !operator==(O);
149 }
150 bool operator<(const SDValue &O) const {
151 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
152 }
153 explicit operator bool() const {
154 return Node != nullptr;
155 }
156
157 SDValue getValue(unsigned R) const {
158 return SDValue(Node, R);
159 }
160
161 /// Return true if this node is an operand of N.
162 bool isOperandOf(const SDNode *N) const;
163
164 /// Return the ValueType of the referenced return value.
165 inline EVT getValueType() const;
166
167 /// Return the simple ValueType of the referenced return value.
168 MVT getSimpleValueType() const {
169 return getValueType().getSimpleVT();
170 }
171
172 /// Returns the size of the value in bits.
173 unsigned getValueSizeInBits() const {
174 return getValueType().getSizeInBits();
175 }
176
177 unsigned getScalarValueSizeInBits() const {
178 return getValueType().getScalarType().getSizeInBits();
179 }
180
181 // Forwarding methods - These forward to the corresponding methods in SDNode.
182 inline unsigned getOpcode() const;
183 inline unsigned getNumOperands() const;
184 inline const SDValue &getOperand(unsigned i) const;
185 inline uint64_t getConstantOperandVal(unsigned i) const;
186 inline const APInt &getConstantOperandAPInt(unsigned i) const;
187 inline bool isTargetMemoryOpcode() const;
188 inline bool isTargetOpcode() const;
189 inline bool isMachineOpcode() const;
190 inline bool isUndef() const;
191 inline unsigned getMachineOpcode() const;
192 inline const DebugLoc &getDebugLoc() const;
193 inline void dump() const;
194 inline void dump(const SelectionDAG *G) const;
195 inline void dumpr() const;
196 inline void dumpr(const SelectionDAG *G) const;
197
198 /// Return true if this operand (which must be a chain) reaches the
199 /// specified operand without crossing any side-effecting instructions.
200 /// In practice, this looks through token factors and non-volatile loads.
201 /// In order to remain efficient, this only
202 /// looks a couple of nodes in, it does not do an exhaustive search.
203 bool reachesChainWithoutSideEffects(SDValue Dest,
204 unsigned Depth = 2) const;
205
206 /// Return true if there are no nodes using value ResNo of Node.
207 inline bool use_empty() const;
208
209 /// Return true if there is exactly one node using value ResNo of Node.
210 inline bool hasOneUse() const;
211};
212
213template<> struct DenseMapInfo<SDValue> {
214 static inline SDValue getEmptyKey() {
215 SDValue V;
216 V.ResNo = -1U;
217 return V;
218 }
219
220 static inline SDValue getTombstoneKey() {
221 SDValue V;
222 V.ResNo = -2U;
223 return V;
224 }
225
226 static unsigned getHashValue(const SDValue &Val) {
227 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
228 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
229 }
230
231 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
232 return LHS == RHS;
233 }
234};
235
236/// Allow casting operators to work directly on
237/// SDValues as if they were SDNode*'s.
238template<> struct simplify_type<SDValue> {
239 using SimpleType = SDNode *;
240
241 static SimpleType getSimplifiedValue(SDValue &Val) {
242 return Val.getNode();
9
Returning without writing to 'Val.Node'
243 }
244};
245template<> struct simplify_type<const SDValue> {
246 using SimpleType = /*const*/ SDNode *;
247
248 static SimpleType getSimplifiedValue(const SDValue &Val) {
249 return Val.getNode();
250 }
251};
252
253/// Represents a use of a SDNode. This class holds an SDValue,
254/// which records the SDNode being used and the result number, a
255/// pointer to the SDNode using the value, and Next and Prev pointers,
256/// which link together all the uses of an SDNode.
257///
258class SDUse {
259 /// Val - The value being used.
260 SDValue Val;
261 /// User - The user of this value.
262 SDNode *User = nullptr;
263 /// Prev, Next - Pointers to the uses list of the SDNode referred by
264 /// this operand.
265 SDUse **Prev = nullptr;
266 SDUse *Next = nullptr;