LLVM 19.0.0git
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DAG pattern matching instruction selector for X86,
10// converting from a legalized dag to a X86 dag.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelDAGToDAG.h"
15#include "X86.h"
17#include "X86RegisterInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
23#include "llvm/Config/llvm-config.h"
25#include "llvm/IR/Function.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsX86.h"
29#include "llvm/IR/Module.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
35#include <cstdint>
36
37using namespace llvm;
38
39#define DEBUG_TYPE "x86-isel"
40#define PASS_NAME "X86 DAG->DAG Instruction Selection"
41
42STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
43
44static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
45 cl::desc("Enable setting constant bits to reduce size of mask immediates"),
47
49 "x86-promote-anyext-load", cl::init(true),
50 cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden);
51
53
54//===----------------------------------------------------------------------===//
55// Pattern Matcher Implementation
56//===----------------------------------------------------------------------===//
57
58namespace {
59 /// This corresponds to X86AddressMode, but uses SDValue's instead of register
60 /// numbers for the leaves of the matched tree.
61 struct X86ISelAddressMode {
62 enum {
63 RegBase,
64 FrameIndexBase
65 } BaseType = RegBase;
66
67 // This is really a union, discriminated by BaseType!
68 SDValue Base_Reg;
69 int Base_FrameIndex = 0;
70
71 unsigned Scale = 1;
72 SDValue IndexReg;
73 int32_t Disp = 0;
74 SDValue Segment;
75 const GlobalValue *GV = nullptr;
76 const Constant *CP = nullptr;
77 const BlockAddress *BlockAddr = nullptr;
78 const char *ES = nullptr;
79 MCSymbol *MCSym = nullptr;
80 int JT = -1;
81 Align Alignment; // CP alignment.
82 unsigned char SymbolFlags = X86II::MO_NO_FLAG; // X86II::MO_*
83 bool NegateIndex = false;
84
85 X86ISelAddressMode() = default;
86
87 bool hasSymbolicDisplacement() const {
88 return GV != nullptr || CP != nullptr || ES != nullptr ||
89 MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
90 }
91
92 bool hasBaseOrIndexReg() const {
93 return BaseType == FrameIndexBase ||
94 IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
95 }
96
97 /// Return true if this addressing mode is already RIP-relative.
98 bool isRIPRelative() const {
99 if (BaseType != RegBase) return false;
100 if (RegisterSDNode *RegNode =
101 dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
102 return RegNode->getReg() == X86::RIP;
103 return false;
104 }
105
106 void setBaseReg(SDValue Reg) {
107 BaseType = RegBase;
108 Base_Reg = Reg;
109 }
110
111#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
112 void dump(SelectionDAG *DAG = nullptr) {
113 dbgs() << "X86ISelAddressMode " << this << '\n';
114 dbgs() << "Base_Reg ";
115 if (Base_Reg.getNode())
116 Base_Reg.getNode()->dump(DAG);
117 else
118 dbgs() << "nul\n";
119 if (BaseType == FrameIndexBase)
120 dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
121 dbgs() << " Scale " << Scale << '\n'
122 << "IndexReg ";
123 if (NegateIndex)
124 dbgs() << "negate ";
125 if (IndexReg.getNode())
126 IndexReg.getNode()->dump(DAG);
127 else
128 dbgs() << "nul\n";
129 dbgs() << " Disp " << Disp << '\n'
130 << "GV ";
131 if (GV)
132 GV->dump();
133 else
134 dbgs() << "nul";
135 dbgs() << " CP ";
136 if (CP)
137 CP->dump();
138 else
139 dbgs() << "nul";
140 dbgs() << '\n'
141 << "ES ";
142 if (ES)
143 dbgs() << ES;
144 else
145 dbgs() << "nul";
146 dbgs() << " MCSym ";
147 if (MCSym)
148 dbgs() << MCSym;
149 else
150 dbgs() << "nul";
151 dbgs() << " JT" << JT << " Align" << Alignment.value() << '\n';
152 }
153#endif
154 };
155}
156
157namespace {
158 //===--------------------------------------------------------------------===//
159 /// ISel - X86-specific code to select X86 machine instructions for
160 /// SelectionDAG operations.
161 ///
162 class X86DAGToDAGISel final : public SelectionDAGISel {
163 /// Keep a pointer to the X86Subtarget around so that we can
164 /// make the right decision when generating code for different targets.
165 const X86Subtarget *Subtarget;
166
167 /// If true, selector should try to optimize for minimum code size.
168 bool OptForMinSize;
169
170 /// Disable direct TLS access through segment registers.
171 bool IndirectTlsSegRefs;
172
173 public:
174 X86DAGToDAGISel() = delete;
175
176 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOptLevel OptLevel)
177 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
178 OptForMinSize(false), IndirectTlsSegRefs(false) {}
179
180 bool runOnMachineFunction(MachineFunction &MF) override {
181 // Reset the subtarget each time through.
182 Subtarget = &MF.getSubtarget<X86Subtarget>();
183 IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
184 "indirect-tls-seg-refs");
185
186 // OptFor[Min]Size are used in pattern predicates that isel is matching.
187 OptForMinSize = MF.getFunction().hasMinSize();
188 assert((!OptForMinSize || MF.getFunction().hasOptSize()) &&
189 "OptForMinSize implies OptForSize");
191 }
192
193 void emitFunctionEntryCode() override;
194
195 bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
196
197 void PreprocessISelDAG() override;
198 void PostprocessISelDAG() override;
199
200// Include the pieces autogenerated from the target description.
201#include "X86GenDAGISel.inc"
202
203 private:
204 void Select(SDNode *N) override;
205
206 bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
207 bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
208 bool AllowSegmentRegForX32 = false);
209 bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
210 bool matchAddress(SDValue N, X86ISelAddressMode &AM);
211 bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
212 bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
213 SDValue matchIndexRecursively(SDValue N, X86ISelAddressMode &AM,
214 unsigned Depth);
215 bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
216 unsigned Depth);
217 bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM,
218 unsigned Depth);
219 bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
220 bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
221 SDValue &Scale, SDValue &Index, SDValue &Disp,
222 SDValue &Segment);
223 bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp,
224 SDValue ScaleOp, SDValue &Base, SDValue &Scale,
225 SDValue &Index, SDValue &Disp, SDValue &Segment);
226 bool selectMOV64Imm32(SDValue N, SDValue &Imm);
227 bool selectLEAAddr(SDValue N, SDValue &Base,
228 SDValue &Scale, SDValue &Index, SDValue &Disp,
229 SDValue &Segment);
230 bool selectLEA64_32Addr(SDValue N, SDValue &Base,
231 SDValue &Scale, SDValue &Index, SDValue &Disp,
232 SDValue &Segment);
233 bool selectTLSADDRAddr(SDValue N, SDValue &Base,
234 SDValue &Scale, SDValue &Index, SDValue &Disp,
235 SDValue &Segment);
236 bool selectRelocImm(SDValue N, SDValue &Op);
237
238 bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
239 SDValue &Base, SDValue &Scale,
240 SDValue &Index, SDValue &Disp,
241 SDValue &Segment);
242
243 // Convenience method where P is also root.
244 bool tryFoldLoad(SDNode *P, SDValue N,
245 SDValue &Base, SDValue &Scale,
246 SDValue &Index, SDValue &Disp,
247 SDValue &Segment) {
248 return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
249 }
250
251 bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
252 SDValue &Base, SDValue &Scale,
253 SDValue &Index, SDValue &Disp,
254 SDValue &Segment);
255
256 bool isProfitableToFormMaskedOp(SDNode *N) const;
257
258 /// Implement addressing mode selection for inline asm expressions.
260 InlineAsm::ConstraintCode ConstraintID,
261 std::vector<SDValue> &OutOps) override;
262
263 void emitSpecialCodeForMain();
264
265 inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
266 MVT VT, SDValue &Base, SDValue &Scale,
267 SDValue &Index, SDValue &Disp,
268 SDValue &Segment) {
269 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
270 Base = CurDAG->getTargetFrameIndex(
271 AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
272 else if (AM.Base_Reg.getNode())
273 Base = AM.Base_Reg;
274 else
275 Base = CurDAG->getRegister(0, VT);
276
277 Scale = getI8Imm(AM.Scale, DL);
278
279#define GET_ND_IF_ENABLED(OPC) (Subtarget->hasNDD() ? OPC##_ND : OPC)
280 // Negate the index if needed.
281 if (AM.NegateIndex) {
282 unsigned NegOpc = VT == MVT::i64 ? GET_ND_IF_ENABLED(X86::NEG64r)
283 : GET_ND_IF_ENABLED(X86::NEG32r);
284 SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
285 AM.IndexReg), 0);
286 AM.IndexReg = Neg;
287 }
288
289 if (AM.IndexReg.getNode())
290 Index = AM.IndexReg;
291 else
292 Index = CurDAG->getRegister(0, VT);
293
294 // These are 32-bit even in 64-bit mode since RIP-relative offset
295 // is 32-bit.
296 if (AM.GV)
297 Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
298 MVT::i32, AM.Disp,
299 AM.SymbolFlags);
300 else if (AM.CP)
301 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment,
302 AM.Disp, AM.SymbolFlags);
303 else if (AM.ES) {
304 assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
305 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
306 } else if (AM.MCSym) {
307 assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
308 assert(AM.SymbolFlags == 0 && "oo");
309 Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
310 } else if (AM.JT != -1) {
311 assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
312 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
313 } else if (AM.BlockAddr)
314 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
315 AM.SymbolFlags);
316 else
317 Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
318
319 if (AM.Segment.getNode())
320 Segment = AM.Segment;
321 else
322 Segment = CurDAG->getRegister(0, MVT::i16);
323 }
324
325 // Utility function to determine whether we should avoid selecting
326 // immediate forms of instructions for better code size or not.
327 // At a high level, we'd like to avoid such instructions when
328 // we have similar constants used within the same basic block
329 // that can be kept in a register.
330 //
331 bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
332 uint32_t UseCount = 0;
333
334 // Do not want to hoist if we're not optimizing for size.
335 // TODO: We'd like to remove this restriction.
336 // See the comment in X86InstrInfo.td for more info.
337 if (!CurDAG->shouldOptForSize())
338 return false;
339
340 // Walk all the users of the immediate.
341 for (const SDNode *User : N->uses()) {
342 if (UseCount >= 2)
343 break;
344
345 // This user is already selected. Count it as a legitimate use and
346 // move on.
347 if (User->isMachineOpcode()) {
348 UseCount++;
349 continue;
350 }
351
352 // We want to count stores of immediates as real uses.
353 if (User->getOpcode() == ISD::STORE &&
354 User->getOperand(1).getNode() == N) {
355 UseCount++;
356 continue;
357 }
358
359 // We don't currently match users that have > 2 operands (except
360 // for stores, which are handled above)
361 // Those instruction won't match in ISEL, for now, and would
362 // be counted incorrectly.
363 // This may change in the future as we add additional instruction
364 // types.
365 if (User->getNumOperands() != 2)
366 continue;
367
368 // If this is a sign-extended 8-bit integer immediate used in an ALU
369 // instruction, there is probably an opcode encoding to save space.
370 auto *C = dyn_cast<ConstantSDNode>(N);
371 if (C && isInt<8>(C->getSExtValue()))
372 continue;
373
374 // Immediates that are used for offsets as part of stack
375 // manipulation should be left alone. These are typically
376 // used to indicate SP offsets for argument passing and
377 // will get pulled into stores/pushes (implicitly).
378 if (User->getOpcode() == X86ISD::ADD ||
379 User->getOpcode() == ISD::ADD ||
380 User->getOpcode() == X86ISD::SUB ||
381 User->getOpcode() == ISD::SUB) {
382
383 // Find the other operand of the add/sub.
384 SDValue OtherOp = User->getOperand(0);
385 if (OtherOp.getNode() == N)
386 OtherOp = User->getOperand(1);
387
388 // Don't count if the other operand is SP.
389 RegisterSDNode *RegNode;
390 if (OtherOp->getOpcode() == ISD::CopyFromReg &&
391 (RegNode = dyn_cast_or_null<RegisterSDNode>(
392 OtherOp->getOperand(1).getNode())))
393 if ((RegNode->getReg() == X86::ESP) ||
394 (RegNode->getReg() == X86::RSP))
395 continue;
396 }
397
398 // ... otherwise, count this and move on.
399 UseCount++;
400 }
401
402 // If we have more than 1 use, then recommend for hoisting.
403 return (UseCount > 1);
404 }
405
406 /// Return a target constant with the specified value of type i8.
407 inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
408 return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
409 }
410
411 /// Return a target constant with the specified value, of type i32.
412 inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
413 return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
414 }
415
416 /// Return a target constant with the specified value, of type i64.
417 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
418 return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
419 }
420
421 SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
422 const SDLoc &DL) {
423 assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
424 uint64_t Index = N->getConstantOperandVal(1);
425 MVT VecVT = N->getOperand(0).getSimpleValueType();
426 return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
427 }
428
429 SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
430 const SDLoc &DL) {
431 assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
432 uint64_t Index = N->getConstantOperandVal(2);
433 MVT VecVT = N->getSimpleValueType(0);
434 return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
435 }
436
437 SDValue getPermuteVINSERTCommutedImmediate(SDNode *N, unsigned VecWidth,
438 const SDLoc &DL) {
439 assert(VecWidth == 128 && "Unexpected vector width");
440 uint64_t Index = N->getConstantOperandVal(2);
441 MVT VecVT = N->getSimpleValueType(0);
442 uint64_t InsertIdx = (Index * VecVT.getScalarSizeInBits()) / VecWidth;
443 assert((InsertIdx == 0 || InsertIdx == 1) && "Bad insertf128 index");
444 // vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub)
445 // vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub)
446 return getI8Imm(InsertIdx ? 0x02 : 0x30, DL);
447 }
448
449 SDValue getSBBZero(SDNode *N) {
450 SDLoc dl(N);
451 MVT VT = N->getSimpleValueType(0);
452
453 // Create zero.
454 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
456 CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0);
457 if (VT == MVT::i64) {
458 Zero = SDValue(
459 CurDAG->getMachineNode(
460 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
461 CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
462 CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
463 0);
464 }
465
466 // Copy flags to the EFLAGS register and glue it to next node.
467 unsigned Opcode = N->getOpcode();
468 assert((Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY) &&
469 "Unexpected opcode for SBB materialization");
470 unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1;
471 SDValue EFLAGS =
472 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
473 N->getOperand(FlagOpIndex), SDValue());
474
475 // Create a 64-bit instruction if the result is 64-bits otherwise use the
476 // 32-bit version.
477 unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
478 MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
479 VTs = CurDAG->getVTList(SBBVT, MVT::i32);
480 return SDValue(
481 CurDAG->getMachineNode(Opc, dl, VTs,
482 {Zero, Zero, EFLAGS, EFLAGS.getValue(1)}),
483 0);
484 }
485
486 // Helper to detect unneeded and instructions on shift amounts. Called
487 // from PatFrags in tablegen.
488 bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
489 assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
490 const APInt &Val = N->getConstantOperandAPInt(1);
491
492 if (Val.countr_one() >= Width)
493 return true;
494
495 APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
496 return Mask.countr_one() >= Width;
497 }
498
499 /// Return an SDNode that returns the value of the global base register.
500 /// Output instructions required to initialize the global base register,
501 /// if necessary.
502 SDNode *getGlobalBaseReg();
503
504 /// Return a reference to the TargetMachine, casted to the target-specific
505 /// type.
506 const X86TargetMachine &getTargetMachine() const {
507 return static_cast<const X86TargetMachine &>(TM);
508 }
509
510 /// Return a reference to the TargetInstrInfo, casted to the target-specific
511 /// type.
512 const X86InstrInfo *getInstrInfo() const {
513 return Subtarget->getInstrInfo();
514 }
515
516 /// Return a condition code of the given SDNode
517 X86::CondCode getCondFromNode(SDNode *N) const;
518
519 /// Address-mode matching performs shift-of-and to and-of-shift
520 /// reassociation in order to expose more scaled addressing
521 /// opportunities.
522 bool ComplexPatternFuncMutatesDAG() const override {
523 return true;
524 }
525
526 bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
527
528 // Indicates we should prefer to use a non-temporal load for this load.
529 bool useNonTemporalLoad(LoadSDNode *N) const {
530 if (!N->isNonTemporal())
531 return false;
532
533 unsigned StoreSize = N->getMemoryVT().getStoreSize();
534
535 if (N->getAlign().value() < StoreSize)
536 return false;
537
538 switch (StoreSize) {
539 default: llvm_unreachable("Unsupported store size");
540 case 4:
541 case 8:
542 return false;
543 case 16:
544 return Subtarget->hasSSE41();
545 case 32:
546 return Subtarget->hasAVX2();
547 case 64:
548 return Subtarget->hasAVX512();
549 }
550 }
551
552 bool foldLoadStoreIntoMemOperand(SDNode *Node);
553 MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
554 bool matchBitExtract(SDNode *Node);
555 bool shrinkAndImmediate(SDNode *N);
556 bool isMaskZeroExtended(SDNode *N) const;
557 bool tryShiftAmountMod(SDNode *N);
558 bool tryShrinkShlLogicImm(SDNode *N);
559 bool tryVPTERNLOG(SDNode *N);
560 bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentB,
561 SDNode *ParentC, SDValue A, SDValue B, SDValue C,
562 uint8_t Imm);
563 bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
564 bool tryMatchBitSelect(SDNode *N);
565
566 MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
567 const SDLoc &dl, MVT VT, SDNode *Node);
568 MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
569 const SDLoc &dl, MVT VT, SDNode *Node,
570 SDValue &InGlue);
571
572 bool tryOptimizeRem8Extend(SDNode *N);
573
574 bool onlyUsesZeroFlag(SDValue Flags) const;
575 bool hasNoSignFlagUses(SDValue Flags) const;
576 bool hasNoCarryFlagUses(SDValue Flags) const;
577 };
578
579 class X86DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
580 public:
581 static char ID;
582 explicit X86DAGToDAGISelLegacy(X86TargetMachine &tm,
583 CodeGenOptLevel OptLevel)
585 ID, std::make_unique<X86DAGToDAGISel>(tm, OptLevel)) {}
586 };
587}
588
589char X86DAGToDAGISelLegacy::ID = 0;
590
591INITIALIZE_PASS(X86DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
592
593// Returns true if this masked compare can be implemented legally with this
594// type.
595static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
596 unsigned Opcode = N->getOpcode();
597 if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM ||
598 Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC ||
599 Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) {
600 // We can get 256-bit 8 element types here without VLX being enabled. When
601 // this happens we will use 512-bit operations and the mask will not be
602 // zero extended.
603 EVT OpVT = N->getOperand(0).getValueType();
604 // The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the
605 // second operand.
606 if (Opcode == X86ISD::STRICT_CMPM)
607 OpVT = N->getOperand(1).getValueType();
608 if (OpVT.is256BitVector() || OpVT.is128BitVector())
609 return Subtarget->hasVLX();
610
611 return true;
612 }
613 // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
614 if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
615 Opcode == X86ISD::FSETCCM_SAE)
616 return true;
617
618 return false;
619}
620
621// Returns true if we can assume the writer of the mask has zero extended it
622// for us.
623bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
624 // If this is an AND, check if we have a compare on either side. As long as
625 // one side guarantees the mask is zero extended, the AND will preserve those
626 // zeros.
627 if (N->getOpcode() == ISD::AND)
628 return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
629 isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
630
631 return isLegalMaskCompare(N, Subtarget);
632}
633
634bool
635X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
636 if (OptLevel == CodeGenOptLevel::None)
637 return false;
638
639 if (!N.hasOneUse())
640 return false;
641
642 if (N.getOpcode() != ISD::LOAD)
643 return true;
644
645 // Don't fold non-temporal loads if we have an instruction for them.
646 if (useNonTemporalLoad(cast<LoadSDNode>(N)))
647 return false;
648
649 // If N is a load, do additional profitability checks.
650 if (U == Root) {
651 switch (U->getOpcode()) {
652 default: break;
653 case X86ISD::ADD:
654 case X86ISD::ADC:
655 case X86ISD::SUB:
656 case X86ISD::SBB:
657 case X86ISD::AND:
658 case X86ISD::XOR:
659 case X86ISD::OR:
660 case ISD::ADD:
661 case ISD::UADDO_CARRY:
662 case ISD::AND:
663 case ISD::OR:
664 case ISD::XOR: {
665 SDValue Op1 = U->getOperand(1);
666
667 // If the other operand is a 8-bit immediate we should fold the immediate
668 // instead. This reduces code size.
669 // e.g.
670 // movl 4(%esp), %eax
671 // addl $4, %eax
672 // vs.
673 // movl $4, %eax
674 // addl 4(%esp), %eax
675 // The former is 2 bytes shorter. In case where the increment is 1, then
676 // the saving can be 4 bytes (by using incl %eax).
677 if (auto *Imm = dyn_cast<ConstantSDNode>(Op1)) {
678 if (Imm->getAPIntValue().isSignedIntN(8))
679 return false;
680
681 // If this is a 64-bit AND with an immediate that fits in 32-bits,
682 // prefer using the smaller and over folding the load. This is needed to
683 // make sure immediates created by shrinkAndImmediate are always folded.
684 // Ideally we would narrow the load during DAG combine and get the
685 // best of both worlds.
686 if (U->getOpcode() == ISD::AND &&
687 Imm->getAPIntValue().getBitWidth() == 64 &&
688 Imm->getAPIntValue().isIntN(32))
689 return false;
690
691 // If this really a zext_inreg that can be represented with a movzx
692 // instruction, prefer that.
693 // TODO: We could shrink the load and fold if it is non-volatile.
694 if (U->getOpcode() == ISD::AND &&
695 (Imm->getAPIntValue() == UINT8_MAX ||
696 Imm->getAPIntValue() == UINT16_MAX ||
697 Imm->getAPIntValue() == UINT32_MAX))
698 return false;
699
700 // ADD/SUB with can negate the immediate and use the opposite operation
701 // to fit 128 into a sign extended 8 bit immediate.
702 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
703 (-Imm->getAPIntValue()).isSignedIntN(8))
704 return false;
705
706 if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) &&
707 (-Imm->getAPIntValue()).isSignedIntN(8) &&
708 hasNoCarryFlagUses(SDValue(U, 1)))
709 return false;
710 }
711
712 // If the other operand is a TLS address, we should fold it instead.
713 // This produces
714 // movl %gs:0, %eax
715 // leal i@NTPOFF(%eax), %eax
716 // instead of
717 // movl $i@NTPOFF, %eax
718 // addl %gs:0, %eax
719 // if the block also has an access to a second TLS address this will save
720 // a load.
721 // FIXME: This is probably also true for non-TLS addresses.
722 if (Op1.getOpcode() == X86ISD::Wrapper) {
723 SDValue Val = Op1.getOperand(0);
725 return false;
726 }
727
728 // Don't fold load if this matches the BTS/BTR/BTC patterns.
729 // BTS: (or X, (shl 1, n))
730 // BTR: (and X, (rotl -2, n))
731 // BTC: (xor X, (shl 1, n))
732 if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
733 if (U->getOperand(0).getOpcode() == ISD::SHL &&
734 isOneConstant(U->getOperand(0).getOperand(0)))
735 return false;
736
737 if (U->getOperand(1).getOpcode() == ISD::SHL &&
738 isOneConstant(U->getOperand(1).getOperand(0)))
739 return false;
740 }
741 if (U->getOpcode() == ISD::AND) {
742 SDValue U0 = U->getOperand(0);
743 SDValue U1 = U->getOperand(1);
744 if (U0.getOpcode() == ISD::ROTL) {
745 auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
746 if (C && C->getSExtValue() == -2)
747 return false;
748 }
749
750 if (U1.getOpcode() == ISD::ROTL) {
751 auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
752 if (C && C->getSExtValue() == -2)
753 return false;
754 }
755 }
756
757 break;
758 }
759 case ISD::SHL:
760 case ISD::SRA:
761 case ISD::SRL:
762 // Don't fold a load into a shift by immediate. The BMI2 instructions
763 // support folding a load, but not an immediate. The legacy instructions
764 // support folding an immediate, but can't fold a load. Folding an
765 // immediate is preferable to folding a load.
766 if (isa<ConstantSDNode>(U->getOperand(1)))
767 return false;
768
769 break;
770 }
771 }
772
773 // Prevent folding a load if this can implemented with an insert_subreg or
774 // a move that implicitly zeroes.
775 if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
776 isNullConstant(Root->getOperand(2)) &&
777 (Root->getOperand(0).isUndef() ||
779 return false;
780
781 return true;
782}
783
784// Indicates it is profitable to form an AVX512 masked operation. Returning
785// false will favor a masked register-register masked move or vblendm and the
786// operation will be selected separately.
787bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const {
788 assert(
789 (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) &&
790 "Unexpected opcode!");
791
792 // If the operation has additional users, the operation will be duplicated.
793 // Check the use count to prevent that.
794 // FIXME: Are there cheap opcodes we might want to duplicate?
795 return N->getOperand(1).hasOneUse();
796}
797
798/// Replace the original chain operand of the call with
799/// load's chain operand and move load below the call's chain operand.
800static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
801 SDValue Call, SDValue OrigChain) {
803 SDValue Chain = OrigChain.getOperand(0);
804 if (Chain.getNode() == Load.getNode())
805 Ops.push_back(Load.getOperand(0));
806 else {
807 assert(Chain.getOpcode() == ISD::TokenFactor &&
808 "Unexpected chain operand");
809 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
810 if (Chain.getOperand(i).getNode() == Load.getNode())
811 Ops.push_back(Load.getOperand(0));
812 else
813 Ops.push_back(Chain.getOperand(i));
814 SDValue NewChain =
815 CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
816 Ops.clear();
817 Ops.push_back(NewChain);
818 }
819 Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
820 CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
821 CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
822 Load.getOperand(1), Load.getOperand(2));
823
824 Ops.clear();
825 Ops.push_back(SDValue(Load.getNode(), 1));
826 Ops.append(Call->op_begin() + 1, Call->op_end());
827 CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
828}
829
830/// Return true if call address is a load and it can be
831/// moved below CALLSEQ_START and the chains leading up to the call.
832/// Return the CALLSEQ_START by reference as a second output.
833/// In the case of a tail call, there isn't a callseq node between the call
834/// chain and the load.
835static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
836 // The transformation is somewhat dangerous if the call's chain was glued to
837 // the call. After MoveBelowOrigChain the load is moved between the call and
838 // the chain, this can create a cycle if the load is not folded. So it is
839 // *really* important that we are sure the load will be folded.
840 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
841 return false;
842 auto *LD = dyn_cast<LoadSDNode>(Callee.getNode());
843 if (!LD ||
844 !LD->isSimple() ||
845 LD->getAddressingMode() != ISD::UNINDEXED ||
846 LD->getExtensionType() != ISD::NON_EXTLOAD)
847 return false;
848
849 // Now let's find the callseq_start.
850 while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
851 if (!Chain.hasOneUse())
852 return false;
853 Chain = Chain.getOperand(0);
854 }
855
856 if (!Chain.getNumOperands())
857 return false;
858 // Since we are not checking for AA here, conservatively abort if the chain
859 // writes to memory. It's not safe to move the callee (a load) across a store.
860 if (isa<MemSDNode>(Chain.getNode()) &&
861 cast<MemSDNode>(Chain.getNode())->writeMem())
862 return false;
863 if (Chain.getOperand(0).getNode() == Callee.getNode())
864 return true;
865 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
866 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
867 Callee.getValue(1).hasOneUse())
868 return true;
869 return false;
870}
871
872static bool isEndbrImm64(uint64_t Imm) {
873// There may be some other prefix bytes between 0xF3 and 0x0F1EFA.
874// i.g: 0xF3660F1EFA, 0xF3670F1EFA
875 if ((Imm & 0x00FFFFFF) != 0x0F1EFA)
876 return false;
877
878 uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64,
879 0x65, 0x66, 0x67, 0xf0, 0xf2};
880 int i = 24; // 24bit 0x0F1EFA has matched
881 while (i < 64) {
882 uint8_t Byte = (Imm >> i) & 0xFF;
883 if (Byte == 0xF3)
884 return true;
885 if (!llvm::is_contained(OptionalPrefixBytes, Byte))
886 return false;
887 i += 8;
888 }
889
890 return false;
891}
892
893static bool needBWI(MVT VT) {
894 return (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v64i8);
895}
896
897void X86DAGToDAGISel::PreprocessISelDAG() {
898 bool MadeChange = false;
899 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
900 E = CurDAG->allnodes_end(); I != E; ) {
901 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
902
903 // This is for CET enhancement.
904 //
905 // ENDBR32 and ENDBR64 have specific opcodes:
906 // ENDBR32: F3 0F 1E FB
907 // ENDBR64: F3 0F 1E FA
908 // And we want that attackers won’t find unintended ENDBR32/64
909 // opcode matches in the binary
910 // Here’s an example:
911 // If the compiler had to generate asm for the following code:
912 // a = 0xF30F1EFA
913 // it could, for example, generate:
914 // mov 0xF30F1EFA, dword ptr[a]
915 // In such a case, the binary would include a gadget that starts
916 // with a fake ENDBR64 opcode. Therefore, we split such generation
917 // into multiple operations, let it not shows in the binary
918 if (N->getOpcode() == ISD::Constant) {
919 MVT VT = N->getSimpleValueType(0);
920 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
921 int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB;
922 if (Imm == EndbrImm || isEndbrImm64(Imm)) {
923 // Check that the cf-protection-branch is enabled.
924 Metadata *CFProtectionBranch =
925 MF->getMMI().getModule()->getModuleFlag("cf-protection-branch");
926 if (CFProtectionBranch || IndirectBranchTracking) {
927 SDLoc dl(N);
928 SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true);
929 Complement = CurDAG->getNOT(dl, Complement, VT);
930 --I;
931 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Complement);
932 ++I;
933 MadeChange = true;
934 continue;
935 }
936 }
937 }
938
939 // If this is a target specific AND node with no flag usages, turn it back
940 // into ISD::AND to enable test instruction matching.
941 if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
942 SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
943 N->getOperand(0), N->getOperand(1));
944 --I;
945 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
946 ++I;
947 MadeChange = true;
948 continue;
949 }
950
951 // Convert vector increment or decrement to sub/add with an all-ones
952 // constant:
953 // add X, <1, 1...> --> sub X, <-1, -1...>
954 // sub X, <1, 1...> --> add X, <-1, -1...>
955 // The all-ones vector constant can be materialized using a pcmpeq
956 // instruction that is commonly recognized as an idiom (has no register
957 // dependency), so that's better/smaller than loading a splat 1 constant.
958 //
959 // But don't do this if it would inhibit a potentially profitable load
960 // folding opportunity for the other operand. That only occurs with the
961 // intersection of:
962 // (1) The other operand (op0) is load foldable.
963 // (2) The op is an add (otherwise, we are *creating* an add and can still
964 // load fold the other op).
965 // (3) The target has AVX (otherwise, we have a destructive add and can't
966 // load fold the other op without killing the constant op).
967 // (4) The constant 1 vector has multiple uses (so it is profitable to load
968 // into a register anyway).
969 auto mayPreventLoadFold = [&]() {
970 return X86::mayFoldLoad(N->getOperand(0), *Subtarget) &&
971 N->getOpcode() == ISD::ADD && Subtarget->hasAVX() &&
972 !N->getOperand(1).hasOneUse();
973 };
974 if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
975 N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
976 APInt SplatVal;
977 if (X86::isConstantSplat(N->getOperand(1), SplatVal) &&
978 SplatVal.isOne()) {
979 SDLoc DL(N);
980
981 MVT VT = N->getSimpleValueType(0);
982 unsigned NumElts = VT.getSizeInBits() / 32;
984 CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts));
985 AllOnes = CurDAG->getBitcast(VT, AllOnes);
986
987 unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
988 SDValue Res =
989 CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes);
990 --I;
991 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
992 ++I;
993 MadeChange = true;
994 continue;
995 }
996 }
997
998 switch (N->getOpcode()) {
999 case X86ISD::VBROADCAST: {
1000 MVT VT = N->getSimpleValueType(0);
1001 // Emulate v32i16/v64i8 broadcast without BWI.
1002 if (!Subtarget->hasBWI() && needBWI(VT)) {
1003 MVT NarrowVT = VT.getHalfNumVectorElementsVT();
1004 SDLoc dl(N);
1005 SDValue NarrowBCast =
1006 CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0));
1007 SDValue Res =
1008 CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
1009 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1010 unsigned Index = NarrowVT.getVectorMinNumElements();
1011 Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
1012 CurDAG->getIntPtrConstant(Index, dl));
1013
1014 --I;
1015 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1016 ++I;
1017 MadeChange = true;
1018 continue;
1019 }
1020
1021 break;
1022 }
1024 MVT VT = N->getSimpleValueType(0);
1025 // Emulate v32i16/v64i8 broadcast without BWI.
1026 if (!Subtarget->hasBWI() && needBWI(VT)) {
1027 MVT NarrowVT = VT.getHalfNumVectorElementsVT();
1028 auto *MemNode = cast<MemSDNode>(N);
1029 SDLoc dl(N);
1030 SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other);
1031 SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()};
1032 SDValue NarrowBCast = CurDAG->getMemIntrinsicNode(
1033 X86ISD::VBROADCAST_LOAD, dl, VTs, Ops, MemNode->getMemoryVT(),
1034 MemNode->getMemOperand());
1035 SDValue Res =
1036 CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
1037 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1038 unsigned Index = NarrowVT.getVectorMinNumElements();
1039 Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
1040 CurDAG->getIntPtrConstant(Index, dl));
1041
1042 --I;
1043 SDValue To[] = {Res, NarrowBCast.getValue(1)};
1044 CurDAG->ReplaceAllUsesWith(N, To);
1045 ++I;
1046 MadeChange = true;
1047 continue;
1048 }
1049
1050 break;
1051 }
1052 case ISD::LOAD: {
1053 // If this is a XMM/YMM load of the same lower bits as another YMM/ZMM
1054 // load, then just extract the lower subvector and avoid the second load.
1055 auto *Ld = cast<LoadSDNode>(N);
1056 MVT VT = N->getSimpleValueType(0);
1057 if (!ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
1058 !(VT.is128BitVector() || VT.is256BitVector()))
1059 break;
1060
1061 MVT MaxVT = VT;
1062 SDNode *MaxLd = nullptr;
1063 SDValue Ptr = Ld->getBasePtr();
1064 SDValue Chain = Ld->getChain();
1065 for (SDNode *User : Ptr->uses()) {
1066 auto *UserLd = dyn_cast<LoadSDNode>(User);
1067 MVT UserVT = User->getSimpleValueType(0);
1068 if (User != N && UserLd && ISD::isNormalLoad(User) &&
1069 UserLd->getBasePtr() == Ptr && UserLd->getChain() == Chain &&
1070 !User->hasAnyUseOfValue(1) &&
1071 (UserVT.is256BitVector() || UserVT.is512BitVector()) &&
1072 UserVT.getSizeInBits() > VT.getSizeInBits() &&
1073 (!MaxLd || UserVT.getSizeInBits() > MaxVT.getSizeInBits())) {
1074 MaxLd = User;
1075 MaxVT = UserVT;
1076 }
1077 }
1078 if (MaxLd) {
1079 SDLoc dl(N);
1080 unsigned NumSubElts = VT.getSizeInBits() / MaxVT.getScalarSizeInBits();
1081 MVT SubVT = MVT::getVectorVT(MaxVT.getScalarType(), NumSubElts);
1082 SDValue Extract = CurDAG->getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT,
1083 SDValue(MaxLd, 0),
1084 CurDAG->getIntPtrConstant(0, dl));
1085 SDValue Res = CurDAG->getBitcast(VT, Extract);
1086
1087 --I;
1088 SDValue To[] = {Res, SDValue(MaxLd, 1)};
1089 CurDAG->ReplaceAllUsesWith(N, To);
1090 ++I;
1091 MadeChange = true;
1092 continue;
1093 }
1094 break;
1095 }
1096 case ISD::VSELECT: {
1097 // Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG.
1098 EVT EleVT = N->getOperand(0).getValueType().getVectorElementType();
1099 if (EleVT == MVT::i1)
1100 break;
1101
1102 assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
1103 assert(N->getValueType(0).getVectorElementType() != MVT::i16 &&
1104 "We can't replace VSELECT with BLENDV in vXi16!");
1105 SDValue R;
1106 if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(N->getOperand(0)) ==
1107 EleVT.getSizeInBits()) {
1108 R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0),
1109 N->getOperand(0), N->getOperand(1), N->getOperand(2),
1110 CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8));
1111 } else {
1112 R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
1113 N->getOperand(0), N->getOperand(1),
1114 N->getOperand(2));
1115 }
1116 --I;
1117 CurDAG->ReplaceAllUsesWith(N, R.getNode());
1118 ++I;
1119 MadeChange = true;
1120 continue;
1121 }
1122 case ISD::FP_ROUND:
1124 case ISD::FP_TO_SINT:
1125 case ISD::FP_TO_UINT:
1128 // Replace vector fp_to_s/uint with their X86 specific equivalent so we
1129 // don't need 2 sets of patterns.
1130 if (!N->getSimpleValueType(0).isVector())
1131 break;
1132
1133 unsigned NewOpc;
1134 switch (N->getOpcode()) {
1135 default: llvm_unreachable("Unexpected opcode!");
1136 case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break;
1137 case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break;
1138 case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break;
1139 case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
1140 case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break;
1141 case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
1142 }
1143 SDValue Res;
1144 if (N->isStrictFPOpcode())
1145 Res =
1146 CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other},
1147 {N->getOperand(0), N->getOperand(1)});
1148 else
1149 Res =
1150 CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1151 N->getOperand(0));
1152 --I;
1153 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1154 ++I;
1155 MadeChange = true;
1156 continue;
1157 }
1158 case ISD::SHL:
1159 case ISD::SRA:
1160 case ISD::SRL: {
1161 // Replace vector shifts with their X86 specific equivalent so we don't
1162 // need 2 sets of patterns.
1163 if (!N->getValueType(0).isVector())
1164 break;
1165
1166 unsigned NewOpc;
1167 switch (N->getOpcode()) {
1168 default: llvm_unreachable("Unexpected opcode!");
1169 case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
1170 case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
1171 case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
1172 }
1173 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1174 N->getOperand(0), N->getOperand(1));
1175 --I;
1176 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1177 ++I;
1178 MadeChange = true;
1179 continue;
1180 }
1181 case ISD::ANY_EXTEND:
1183 // Replace vector any extend with the zero extend equivalents so we don't
1184 // need 2 sets of patterns. Ignore vXi1 extensions.
1185 if (!N->getValueType(0).isVector())
1186 break;
1187
1188 unsigned NewOpc;
1189 if (N->getOperand(0).getScalarValueSizeInBits() == 1) {
1190 assert(N->getOpcode() == ISD::ANY_EXTEND &&
1191 "Unexpected opcode for mask vector!");
1192 NewOpc = ISD::SIGN_EXTEND;
1193 } else {
1194 NewOpc = N->getOpcode() == ISD::ANY_EXTEND
1197 }
1198
1199 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1200 N->getOperand(0));
1201 --I;
1202 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1203 ++I;
1204 MadeChange = true;
1205 continue;
1206 }
1207 case ISD::FCEIL:
1208 case ISD::STRICT_FCEIL:
1209 case ISD::FFLOOR:
1210 case ISD::STRICT_FFLOOR:
1211 case ISD::FTRUNC:
1212 case ISD::STRICT_FTRUNC:
1213 case ISD::FROUNDEVEN:
1215 case ISD::FNEARBYINT:
1217 case ISD::FRINT:
1218 case ISD::STRICT_FRINT: {
1219 // Replace fp rounding with their X86 specific equivalent so we don't
1220 // need 2 sets of patterns.
1221 unsigned Imm;
1222 switch (N->getOpcode()) {
1223 default: llvm_unreachable("Unexpected opcode!");
1224 case ISD::STRICT_FCEIL:
1225 case ISD::FCEIL: Imm = 0xA; break;
1226 case ISD::STRICT_FFLOOR:
1227 case ISD::FFLOOR: Imm = 0x9; break;
1228 case ISD::STRICT_FTRUNC:
1229 case ISD::FTRUNC: Imm = 0xB; break;
1231 case ISD::FROUNDEVEN: Imm = 0x8; break;
1233 case ISD::FNEARBYINT: Imm = 0xC; break;
1234 case ISD::STRICT_FRINT:
1235 case ISD::FRINT: Imm = 0x4; break;
1236 }
1237 SDLoc dl(N);
1238 bool IsStrict = N->isStrictFPOpcode();
1239 SDValue Res;
1240 if (IsStrict)
1241 Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
1242 {N->getValueType(0), MVT::Other},
1243 {N->getOperand(0), N->getOperand(1),
1244 CurDAG->getTargetConstant(Imm, dl, MVT::i32)});
1245 else
1246 Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
1247 N->getOperand(0),
1248 CurDAG->getTargetConstant(Imm, dl, MVT::i32));
1249 --I;
1250 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1251 ++I;
1252 MadeChange = true;
1253 continue;
1254 }
1255 case X86ISD::FANDN:
1256 case X86ISD::FAND:
1257 case X86ISD::FOR:
1258 case X86ISD::FXOR: {
1259 // Widen scalar fp logic ops to vector to reduce isel patterns.
1260 // FIXME: Can we do this during lowering/combine.
1261 MVT VT = N->getSimpleValueType(0);
1262 if (VT.isVector() || VT == MVT::f128)
1263 break;
1264
1265 MVT VecVT = VT == MVT::f64 ? MVT::v2f64
1266 : VT == MVT::f32 ? MVT::v4f32
1267 : MVT::v8f16;
1268
1269 SDLoc dl(N);
1270 SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
1271 N->getOperand(0));
1272 SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
1273 N->getOperand(1));
1274
1275 SDValue Res;
1276 if (Subtarget->hasSSE2()) {
1277 EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
1278 Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
1279 Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
1280 unsigned Opc;
1281 switch (N->getOpcode()) {
1282 default: llvm_unreachable("Unexpected opcode!");
1283 case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
1284 case X86ISD::FAND: Opc = ISD::AND; break;
1285 case X86ISD::FOR: Opc = ISD::OR; break;
1286 case X86ISD::FXOR: Opc = ISD::XOR; break;
1287 }
1288 Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
1289 Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
1290 } else {
1291 Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
1292 }
1293 Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
1294 CurDAG->getIntPtrConstant(0, dl));
1295 --I;
1296 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1297 ++I;
1298 MadeChange = true;
1299 continue;
1300 }
1301 }
1302
1303 if (OptLevel != CodeGenOptLevel::None &&
1304 // Only do this when the target can fold the load into the call or
1305 // jmp.
1306 !Subtarget->useIndirectThunkCalls() &&
1307 ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
1308 (N->getOpcode() == X86ISD::TC_RETURN &&
1309 (Subtarget->is64Bit() ||
1310 !getTargetMachine().isPositionIndependent())))) {
1311 /// Also try moving call address load from outside callseq_start to just
1312 /// before the call to allow it to be folded.
1313 ///
1314 /// [Load chain]
1315 /// ^
1316 /// |
1317 /// [Load]
1318 /// ^ ^
1319 /// | |
1320 /// / \--
1321 /// / |
1322 ///[CALLSEQ_START] |
1323 /// ^ |
1324 /// | |
1325 /// [LOAD/C2Reg] |
1326 /// | |
1327 /// \ /
1328 /// \ /
1329 /// [CALL]
1330 bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
1331 SDValue Chain = N->getOperand(0);
1332 SDValue Load = N->getOperand(1);
1333 if (!isCalleeLoad(Load, Chain, HasCallSeq))
1334 continue;
1335 moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
1336 ++NumLoadMoved;
1337 MadeChange = true;
1338 continue;
1339 }
1340
1341 // Lower fpround and fpextend nodes that target the FP stack to be store and
1342 // load to the stack. This is a gross hack. We would like to simply mark
1343 // these as being illegal, but when we do that, legalize produces these when
1344 // it expands calls, then expands these in the same legalize pass. We would
1345 // like dag combine to be able to hack on these between the call expansion
1346 // and the node legalization. As such this pass basically does "really
1347 // late" legalization of these inline with the X86 isel pass.
1348 // FIXME: This should only happen when not compiled with -O0.
1349 switch (N->getOpcode()) {
1350 default: continue;
1351 case ISD::FP_ROUND:
1352 case ISD::FP_EXTEND:
1353 {
1354 MVT SrcVT = N->getOperand(0).getSimpleValueType();
1355 MVT DstVT = N->getSimpleValueType(0);
1356
1357 // If any of the sources are vectors, no fp stack involved.
1358 if (SrcVT.isVector() || DstVT.isVector())
1359 continue;
1360
1361 // If the source and destination are SSE registers, then this is a legal
1362 // conversion that should not be lowered.
1363 const X86TargetLowering *X86Lowering =
1364 static_cast<const X86TargetLowering *>(TLI);
1365 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1366 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1367 if (SrcIsSSE && DstIsSSE)
1368 continue;
1369
1370 if (!SrcIsSSE && !DstIsSSE) {
1371 // If this is an FPStack extension, it is a noop.
1372 if (N->getOpcode() == ISD::FP_EXTEND)
1373 continue;
1374 // If this is a value-preserving FPStack truncation, it is a noop.
1375 if (N->getConstantOperandVal(1))
1376 continue;
1377 }
1378
1379 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1380 // FPStack has extload and truncstore. SSE can fold direct loads into other
1381 // operations. Based on this, decide what we want to do.
1382 MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT;
1383 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1384 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1385 MachinePointerInfo MPI =
1386 MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
1387 SDLoc dl(N);
1388
1389 // FIXME: optimize the case where the src/dest is a load or store?
1390
1391 SDValue Store = CurDAG->getTruncStore(
1392 CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT);
1393 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store,
1394 MemTmp, MPI, MemVT);
1395
1396 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1397 // extload we created. This will cause general havok on the dag because
1398 // anything below the conversion could be folded into other existing nodes.
1399 // To avoid invalidating 'I', back it up to the convert node.
1400 --I;
1401 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1402 break;
1403 }
1404
1405 //The sequence of events for lowering STRICT_FP versions of these nodes requires
1406 //dealing with the chain differently, as there is already a preexisting chain.
1409 {
1410 MVT SrcVT = N->getOperand(1).getSimpleValueType();
1411 MVT DstVT = N->getSimpleValueType(0);
1412
1413 // If any of the sources are vectors, no fp stack involved.
1414 if (SrcVT.isVector() || DstVT.isVector())
1415 continue;
1416
1417 // If the source and destination are SSE registers, then this is a legal
1418 // conversion that should not be lowered.
1419 const X86TargetLowering *X86Lowering =
1420 static_cast<const X86TargetLowering *>(TLI);
1421 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1422 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1423 if (SrcIsSSE && DstIsSSE)
1424 continue;
1425
1426 if (!SrcIsSSE && !DstIsSSE) {
1427 // If this is an FPStack extension, it is a noop.
1428 if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
1429 continue;
1430 // If this is a value-preserving FPStack truncation, it is a noop.
1431 if (N->getConstantOperandVal(2))
1432 continue;
1433 }
1434
1435 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1436 // FPStack has extload and truncstore. SSE can fold direct loads into other
1437 // operations. Based on this, decide what we want to do.
1438 MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT;
1439 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1440 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1441 MachinePointerInfo MPI =
1442 MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
1443 SDLoc dl(N);
1444
1445 // FIXME: optimize the case where the src/dest is a load or store?
1446
1447 //Since the operation is StrictFP, use the preexisting chain.
1449 if (!SrcIsSSE) {
1450 SDVTList VTs = CurDAG->getVTList(MVT::Other);
1451 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp};
1452 Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT,
1453 MPI, /*Align*/ std::nullopt,
1455 if (N->getFlags().hasNoFPExcept()) {
1456 SDNodeFlags Flags = Store->getFlags();
1457 Flags.setNoFPExcept(true);
1458 Store->setFlags(Flags);
1459 }
1460 } else {
1461 assert(SrcVT == MemVT && "Unexpected VT!");
1462 Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp,
1463 MPI);
1464 }
1465
1466 if (!DstIsSSE) {
1467 SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other);
1468 SDValue Ops[] = {Store, MemTmp};
1469 Result = CurDAG->getMemIntrinsicNode(
1470 X86ISD::FLD, dl, VTs, Ops, MemVT, MPI,
1471 /*Align*/ std::nullopt, MachineMemOperand::MOLoad);
1472 if (N->getFlags().hasNoFPExcept()) {
1473 SDNodeFlags Flags = Result->getFlags();
1474 Flags.setNoFPExcept(true);
1475 Result->setFlags(Flags);
1476 }
1477 } else {
1478 assert(DstVT == MemVT && "Unexpected VT!");
1479 Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI);
1480 }
1481
1482 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1483 // extload we created. This will cause general havok on the dag because
1484 // anything below the conversion could be folded into other existing nodes.
1485 // To avoid invalidating 'I', back it up to the convert node.
1486 --I;
1487 CurDAG->ReplaceAllUsesWith(N, Result.getNode());
1488 break;
1489 }
1490 }
1491
1492
1493 // Now that we did that, the node is dead. Increment the iterator to the
1494 // next node to process, then delete N.
1495 ++I;
1496 MadeChange = true;
1497 }
1498
1499 // Remove any dead nodes that may have been left behind.
1500 if (MadeChange)
1501 CurDAG->RemoveDeadNodes();
1502}
1503
1504// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
1505bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
1506 unsigned Opc = N->getMachineOpcode();
1507 if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1508 Opc != X86::MOVSX64rr8)
1509 return false;
1510
1511 SDValue N0 = N->getOperand(0);
1512
1513 // We need to be extracting the lower bit of an extend.
1514 if (!N0.isMachineOpcode() ||
1515 N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
1516 N0.getConstantOperandVal(1) != X86::sub_8bit)
1517 return false;
1518
1519 // We're looking for either a movsx or movzx to match the original opcode.
1520 unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1521 : X86::MOVSX32rr8_NOREX;
1522 SDValue N00 = N0.getOperand(0);
1523 if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
1524 return false;
1525
1526 if (Opc == X86::MOVSX64rr8) {
1527 // If we had a sign extend from 8 to 64 bits. We still need to go from 32
1528 // to 64.
1529 MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
1530 MVT::i64, N00);
1531 ReplaceUses(N, Extend);
1532 } else {
1533 // Ok we can drop this extend and just use the original extend.
1534 ReplaceUses(N, N00.getNode());
1535 }
1536
1537 return true;
1538}
1539
1540void X86DAGToDAGISel::PostprocessISelDAG() {
1541 // Skip peepholes at -O0.
1542 if (TM.getOptLevel() == CodeGenOptLevel::None)
1543 return;
1544
1545 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
1546
1547 bool MadeChange = false;
1548 while (Position != CurDAG->allnodes_begin()) {
1549 SDNode *N = &*--Position;
1550 // Skip dead nodes and any non-machine opcodes.
1551 if (N->use_empty() || !N->isMachineOpcode())
1552 continue;
1553
1554 if (tryOptimizeRem8Extend(N)) {
1555 MadeChange = true;
1556 continue;
1557 }
1558
1559 unsigned Opc = N->getMachineOpcode();
1560 switch (Opc) {
1561 default:
1562 continue;
1563 // ANDrr/rm + TESTrr+ -> TESTrr/TESTmr
1564 case X86::TEST8rr:
1565 case X86::TEST16rr:
1566 case X86::TEST32rr:
1567 case X86::TEST64rr:
1568 // ANDrr/rm + CTESTrr -> CTESTrr/CTESTmr
1569 case X86::CTEST8rr:
1570 case X86::CTEST16rr:
1571 case X86::CTEST32rr:
1572 case X86::CTEST64rr: {
1573 auto &Op0 = N->getOperand(0);
1574 if (Op0 != N->getOperand(1) || !Op0->hasNUsesOfValue(2, Op0.getResNo()) ||
1575 !Op0.isMachineOpcode())
1576 continue;
1577 SDValue And = N->getOperand(0);
1578#define CASE_ND(OP) \
1579 case X86::OP: \
1580 case X86::OP##_ND:
1581 switch (And.getMachineOpcode()) {
1582 default:
1583 continue;
1584 CASE_ND(AND8rr)
1585 CASE_ND(AND16rr)
1586 CASE_ND(AND32rr)
1587 CASE_ND(AND64rr) {
1588 if (And->hasAnyUseOfValue(1))
1589 continue;
1590 SmallVector<SDValue> Ops(N->op_values());
1591 Ops[0] = And.getOperand(0);
1592 Ops[1] = And.getOperand(1);
1594 CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i32, Ops);
1595 ReplaceUses(N, Test);
1596 MadeChange = true;
1597 continue;
1598 }
1599 CASE_ND(AND8rm)
1600 CASE_ND(AND16rm)
1601 CASE_ND(AND32rm)
1602 CASE_ND(AND64rm) {
1603 if (And->hasAnyUseOfValue(1))
1604 continue;
1605 unsigned NewOpc;
1606 bool IsCTESTCC = X86::isCTESTCC(Opc);
1607#define FROM_TO(A, B) \
1608 CASE_ND(A) NewOpc = IsCTESTCC ? X86::C##B : X86::B; \
1609 break;
1610 switch (And.getMachineOpcode()) {
1611 FROM_TO(AND8rm, TEST8mr);
1612 FROM_TO(AND16rm, TEST16mr);
1613 FROM_TO(AND32rm, TEST32mr);
1614 FROM_TO(AND64rm, TEST64mr);
1615 }
1616#undef FROM_TO
1617#undef CASE_ND
1618 // Need to swap the memory and register operand.
1619 SmallVector<SDValue> Ops = {And.getOperand(1), And.getOperand(2),
1620 And.getOperand(3), And.getOperand(4),
1621 And.getOperand(5), And.getOperand(0)};
1622 // CC, Cflags.
1623 if (IsCTESTCC) {
1624 Ops.push_back(N->getOperand(2));
1625 Ops.push_back(N->getOperand(3));
1626 }
1627 // Chain of memory load
1628 Ops.push_back(And.getOperand(6));
1629 // Glue
1630 if (IsCTESTCC)
1631 Ops.push_back(N->getOperand(4));
1632
1633 MachineSDNode *Test = CurDAG->getMachineNode(
1634 NewOpc, SDLoc(N), MVT::i32, MVT::Other, Ops);
1635 CurDAG->setNodeMemRefs(
1636 Test, cast<MachineSDNode>(And.getNode())->memoperands());
1637 ReplaceUses(And.getValue(2), SDValue(Test, 1));
1638 ReplaceUses(SDValue(N, 0), SDValue(Test, 0));
1639 MadeChange = true;
1640 continue;
1641 }
1642 }
1643 }
1644 // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
1645 // used. We're doing this late so we can prefer to fold the AND into masked
1646 // comparisons. Doing that can be better for the live range of the mask
1647 // register.
1648 case X86::KORTESTBrr:
1649 case X86::KORTESTWrr:
1650 case X86::KORTESTDrr:
1651 case X86::KORTESTQrr: {
1652 SDValue Op0 = N->getOperand(0);
1653 if (Op0 != N->getOperand(1) || !N->isOnlyUserOf(Op0.getNode()) ||
1654 !Op0.isMachineOpcode() || !onlyUsesZeroFlag(SDValue(N, 0)))
1655 continue;
1656#define CASE(A) \
1657 case X86::A: \
1658 break;
1659 switch (Op0.getMachineOpcode()) {
1660 default:
1661 continue;
1662 CASE(KANDBrr)
1663 CASE(KANDWrr)
1664 CASE(KANDDrr)
1665 CASE(KANDQrr)
1666 }
1667 unsigned NewOpc;
1668#define FROM_TO(A, B) \
1669 case X86::A: \
1670 NewOpc = X86::B; \
1671 break;
1672 switch (Opc) {
1673 FROM_TO(KORTESTBrr, KTESTBrr)
1674 FROM_TO(KORTESTWrr, KTESTWrr)
1675 FROM_TO(KORTESTDrr, KTESTDrr)
1676 FROM_TO(KORTESTQrr, KTESTQrr)
1677 }
1678 // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1679 // KAND instructions and KTEST use the same ISA feature.
1680 if (NewOpc == X86::KTESTWrr && !Subtarget->hasDQI())
1681 continue;
1682#undef FROM_TO
1683 MachineSDNode *KTest = CurDAG->getMachineNode(
1684 NewOpc, SDLoc(N), MVT::i32, Op0.getOperand(0), Op0.getOperand(1));
1685 ReplaceUses(N, KTest);
1686 MadeChange = true;
1687 continue;
1688 }
1689 // Attempt to remove vectors moves that were inserted to zero upper bits.
1690 case TargetOpcode::SUBREG_TO_REG: {
1691 unsigned SubRegIdx = N->getConstantOperandVal(2);
1692 if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1693 continue;
1694
1695 SDValue Move = N->getOperand(1);
1696 if (!Move.isMachineOpcode())
1697 continue;
1698
1699 // Make sure its one of the move opcodes we recognize.
1700 switch (Move.getMachineOpcode()) {
1701 default:
1702 continue;
1703 CASE(VMOVAPDrr) CASE(VMOVUPDrr)
1704 CASE(VMOVAPSrr) CASE(VMOVUPSrr)
1705 CASE(VMOVDQArr) CASE(VMOVDQUrr)
1706 CASE(VMOVAPDYrr) CASE(VMOVUPDYrr)
1707 CASE(VMOVAPSYrr) CASE(VMOVUPSYrr)
1708 CASE(VMOVDQAYrr) CASE(VMOVDQUYrr)
1709 CASE(VMOVAPDZ128rr) CASE(VMOVUPDZ128rr)
1710 CASE(VMOVAPSZ128rr) CASE(VMOVUPSZ128rr)
1711 CASE(VMOVDQA32Z128rr) CASE(VMOVDQU32Z128rr)
1712 CASE(VMOVDQA64Z128rr) CASE(VMOVDQU64Z128rr)
1713 CASE(VMOVAPDZ256rr) CASE(VMOVUPDZ256rr)
1714 CASE(VMOVAPSZ256rr) CASE(VMOVUPSZ256rr)
1715 CASE(VMOVDQA32Z256rr) CASE(VMOVDQU32Z256rr)
1716 CASE(VMOVDQA64Z256rr) CASE(VMOVDQU64Z256rr)
1717 }
1718#undef CASE
1719
1720 SDValue In = Move.getOperand(0);
1721 if (!In.isMachineOpcode() ||
1722 In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1723 continue;
1724
1725 // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1726 // the SHA instructions which use a legacy encoding.
1727 uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1728 if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1729 (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1730 (TSFlags & X86II::EncodingMask) != X86II::XOP)
1731 continue;
1732
1733 // Producing instruction is another vector instruction. We can drop the
1734 // move.
1735 CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1736 MadeChange = true;
1737 }
1738 }
1739 }
1740
1741 if (MadeChange)
1742 CurDAG->RemoveDeadNodes();
1743}
1744
1745
1746/// Emit any code that needs to be executed only in the main function.
1747void X86DAGToDAGISel::emitSpecialCodeForMain() {
1748 if (Subtarget->isTargetCygMing()) {
1750 auto &DL = CurDAG->getDataLayout();
1751
1753 CLI.setChain(CurDAG->getRoot())
1754 .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1755 CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1756 std::move(Args));
1757 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1758 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1759 CurDAG->setRoot(Result.second);
1760 }
1761}
1762
1763void X86DAGToDAGISel::emitFunctionEntryCode() {
1764 // If this is main, emit special code for main.
1765 const Function &F = MF->getFunction();
1766 if (F.hasExternalLinkage() && F.getName() == "main")
1767 emitSpecialCodeForMain();
1768}
1769
1770static bool isDispSafeForFrameIndex(int64_t Val) {
1771 // On 64-bit platforms, we can run into an issue where a frame index
1772 // includes a displacement that, when added to the explicit displacement,
1773 // will overflow the displacement field. Assuming that the frame index
1774 // displacement fits into a 31-bit integer (which is only slightly more
1775 // aggressive than the current fundamental assumption that it fits into
1776 // a 32-bit integer), a 31-bit disp should always be safe.
1777 return isInt<31>(Val);
1778}
1779
1780bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1781 X86ISelAddressMode &AM) {
1782 // We may have already matched a displacement and the caller just added the
1783 // symbolic displacement. So we still need to do the checks even if Offset
1784 // is zero.
1785
1786 int64_t Val = AM.Disp + Offset;
1787
1788 // Cannot combine ExternalSymbol displacements with integer offsets.
1789 if (Val != 0 && (AM.ES || AM.MCSym))
1790 return true;
1791
1792 CodeModel::Model M = TM.getCodeModel();
1793 if (Subtarget->is64Bit()) {
1794 if (Val != 0 &&
1796 AM.hasSymbolicDisplacement()))
1797 return true;
1798 // In addition to the checks required for a register base, check that
1799 // we do not try to use an unsafe Disp with a frame index.
1800 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1802 return true;
1803 // In ILP32 (x32) mode, pointers are 32 bits and need to be zero-extended to
1804 // 64 bits. Instructions with 32-bit register addresses perform this zero
1805 // extension for us and we can safely ignore the high bits of Offset.
1806 // Instructions with only a 32-bit immediate address do not, though: they
1807 // sign extend instead. This means only address the low 2GB of address space
1808 // is directly addressable, we need indirect addressing for the high 2GB of
1809 // address space.
1810 // TODO: Some of the earlier checks may be relaxed for ILP32 mode as the
1811 // implicit zero extension of instructions would cover up any problem.
1812 // However, we have asserts elsewhere that get triggered if we do, so keep
1813 // the checks for now.
1814 // TODO: We would actually be able to accept these, as well as the same
1815 // addresses in LP64 mode, by adding the EIZ pseudo-register as an operand
1816 // to get an address size override to be emitted. However, this
1817 // pseudo-register is not part of any register class and therefore causes
1818 // MIR verification to fail.
1819 if (Subtarget->isTarget64BitILP32() && !isUInt<31>(Val) &&
1820 !AM.hasBaseOrIndexReg())
1821 return true;
1822 }
1823 AM.Disp = Val;
1824 return false;
1825}
1826
1827bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
1828 bool AllowSegmentRegForX32) {
1829 SDValue Address = N->getOperand(1);
1830
1831 // load gs:0 -> GS segment register.
1832 // load fs:0 -> FS segment register.
1833 //
1834 // This optimization is generally valid because the GNU TLS model defines that
1835 // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode
1836 // with 32-bit registers, as we get in ILP32 mode, those registers are first
1837 // zero-extended to 64 bits and then added it to the base address, which gives
1838 // unwanted results when the register holds a negative value.
1839 // For more information see http://people.redhat.com/drepper/tls.pdf
1840 if (isNullConstant(Address) && AM.Segment.getNode() == nullptr &&
1841 !IndirectTlsSegRefs &&
1842 (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1843 Subtarget->isTargetFuchsia())) {
1844 if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
1845 return true;
1846 switch (N->getPointerInfo().getAddrSpace()) {
1847 case X86AS::GS:
1848 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1849 return false;
1850 case X86AS::FS:
1851 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1852 return false;
1853 // Address space X86AS::SS is not handled here, because it is not used to
1854 // address TLS areas.
1855 }
1856 }
1857
1858 return true;
1859}
1860
1861/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1862/// mode. These wrap things that will resolve down into a symbol reference.
1863/// If no match is possible, this returns true, otherwise it returns false.
1864bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1865 // If the addressing mode already has a symbol as the displacement, we can
1866 // never match another symbol.
1867 if (AM.hasSymbolicDisplacement())
1868 return true;
1869
1870 bool IsRIPRelTLS = false;
1871 bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1872 if (IsRIPRel) {
1873 SDValue Val = N.getOperand(0);
1875 IsRIPRelTLS = true;
1876 }
1877
1878 // We can't use an addressing mode in the 64-bit large code model.
1879 // Global TLS addressing is an exception. In the medium code model,
1880 // we use can use a mode when RIP wrappers are present.
1881 // That signifies access to globals that are known to be "near",
1882 // such as the GOT itself.
1883 CodeModel::Model M = TM.getCodeModel();
1884 if (Subtarget->is64Bit() && M == CodeModel::Large && !IsRIPRelTLS)
1885 return true;
1886
1887 // Base and index reg must be 0 in order to use %rip as base.
1888 if (IsRIPRel && AM.hasBaseOrIndexReg())
1889 return true;
1890
1891 // Make a local copy in case we can't do this fold.
1892 X86ISelAddressMode Backup = AM;
1893
1894 int64_t Offset = 0;
1895 SDValue N0 = N.getOperand(0);
1896 if (auto *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1897 AM.GV = G->getGlobal();
1898 AM.SymbolFlags = G->getTargetFlags();
1899 Offset = G->getOffset();
1900 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1901 AM.CP = CP->getConstVal();
1902 AM.Alignment = CP->getAlign();
1903 AM.SymbolFlags = CP->getTargetFlags();
1904 Offset = CP->getOffset();
1905 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1906 AM.ES = S->getSymbol();
1907 AM.SymbolFlags = S->getTargetFlags();
1908 } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1909 AM.MCSym = S->getMCSymbol();
1910 } else if (auto *J = dyn_cast<JumpTableSDNode>(N0)) {
1911 AM.JT = J->getIndex();
1912 AM.SymbolFlags = J->getTargetFlags();
1913 } else if (auto *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1914 AM.BlockAddr = BA->getBlockAddress();
1915 AM.SymbolFlags = BA->getTargetFlags();
1916 Offset = BA->getOffset();
1917 } else
1918 llvm_unreachable("Unhandled symbol reference node.");
1919
1920 // Can't use an addressing mode with large globals.
1921 if (Subtarget->is64Bit() && !IsRIPRel && AM.GV &&
1922 TM.isLargeGlobalValue(AM.GV)) {
1923 AM = Backup;
1924 return true;
1925 }
1926
1927 if (foldOffsetIntoAddress(Offset, AM)) {
1928 AM = Backup;
1929 return true;
1930 }
1931
1932 if (IsRIPRel)
1933 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1934
1935 // Commit the changes now that we know this fold is safe.
1936 return false;
1937}
1938
1939/// Add the specified node to the specified addressing mode, returning true if
1940/// it cannot be done. This just pattern matches for the addressing mode.
1941bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1942 if (matchAddressRecursively(N, AM, 0))
1943 return true;
1944
1945 // Post-processing: Make a second attempt to fold a load, if we now know
1946 // that there will not be any other register. This is only performed for
1947 // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded
1948 // any foldable load the first time.
1949 if (Subtarget->isTarget64BitILP32() &&
1950 AM.BaseType == X86ISelAddressMode::RegBase &&
1951 AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) {
1952 SDValue Save_Base_Reg = AM.Base_Reg;
1953 if (auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
1954 AM.Base_Reg = SDValue();
1955 if (matchLoadInAddress(LoadN, AM, /*AllowSegmentRegForX32=*/true))
1956 AM.Base_Reg = Save_Base_Reg;
1957 }
1958 }
1959
1960 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1961 // a smaller encoding and avoids a scaled-index.
1962 if (AM.Scale == 2 &&
1963 AM.BaseType == X86ISelAddressMode::RegBase &&
1964 AM.Base_Reg.getNode() == nullptr) {
1965 AM.Base_Reg = AM.IndexReg;
1966 AM.Scale = 1;
1967 }
1968
1969 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1970 // because it has a smaller encoding.
1971 if (TM.getCodeModel() != CodeModel::Large &&
1972 (!AM.GV || !TM.isLargeGlobalValue(AM.GV)) && Subtarget->is64Bit() &&
1973 AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase &&
1974 AM.Base_Reg.getNode() == nullptr && AM.IndexReg.getNode() == nullptr &&
1975 AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) {
1976 AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1977 }
1978
1979 return false;
1980}
1981
1982bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
1983 unsigned Depth) {
1984 // Add an artificial use to this node so that we can keep track of
1985 // it if it gets CSE'd with a different node.
1986 HandleSDNode Handle(N);
1987
1988 X86ISelAddressMode Backup = AM;
1989 if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1990 !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1991 return false;
1992 AM = Backup;
1993
1994 // Try again after commutating the operands.
1995 if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM,
1996 Depth + 1) &&
1997 !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1))
1998 return false;
1999 AM = Backup;
2000
2001 // If we couldn't fold both operands into the address at the same time,
2002 // see if we can just put each operand into a register and fold at least
2003 // the add.
2004 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2005 !AM.Base_Reg.getNode() &&
2006 !AM.IndexReg.getNode()) {
2007 N = Handle.getValue();
2008 AM.Base_Reg = N.getOperand(0);
2009 AM.IndexReg = N.getOperand(1);
2010 AM.Scale = 1;
2011 return false;
2012 }
2013 N = Handle.getValue();
2014 return true;
2015}
2016
2017// Insert a node into the DAG at least before the Pos node's position. This
2018// will reposition the node as needed, and will assign it a node ID that is <=
2019// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
2020// IDs! The selection DAG must no longer depend on their uniqueness when this
2021// is used.
2022static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
2023 if (N->getNodeId() == -1 ||
2026 DAG.RepositionNode(Pos->getIterator(), N.getNode());
2027 // Mark Node as invalid for pruning as after this it may be a successor to a
2028 // selected node but otherwise be in the same position of Pos.
2029 // Conservatively mark it with the same -abs(Id) to assure node id
2030 // invariant is preserved.
2031 N->setNodeId(Pos->getNodeId());
2033 }
2034}
2035
2036// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
2037// safe. This allows us to convert the shift and and into an h-register
2038// extract and a scaled index. Returns false if the simplification is
2039// performed.
2041 uint64_t Mask,
2042 SDValue Shift, SDValue X,
2043 X86ISelAddressMode &AM) {
2044 if (Shift.getOpcode() != ISD::SRL ||
2045 !isa<ConstantSDNode>(Shift.getOperand(1)) ||
2046 !Shift.hasOneUse())
2047 return true;
2048
2049 int ScaleLog = 8 - Shift.getConstantOperandVal(1);
2050 if (ScaleLog <= 0 || ScaleLog >= 4 ||
2051 Mask != (0xffu << ScaleLog))
2052 return true;
2053
2054 MVT XVT = X.getSimpleValueType();
2055 MVT VT = N.getSimpleValueType();
2056 SDLoc DL(N);
2057 SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
2058 SDValue NewMask = DAG.getConstant(0xff, DL, XVT);
2059 SDValue Srl = DAG.getNode(ISD::SRL, DL, XVT, X, Eight);
2060 SDValue And = DAG.getNode(ISD::AND, DL, XVT, Srl, NewMask);
2061 SDValue Ext = DAG.getZExtOrTrunc(And, DL, VT);
2062 SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
2063 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Ext, ShlCount);
2064
2065 // Insert the new nodes into the topological ordering. We must do this in
2066 // a valid topological ordering as nothing is going to go back and re-sort
2067 // these nodes. We continually insert before 'N' in sequence as this is
2068 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2069 // hierarchy left to express.
2070 insertDAGNode(DAG, N, Eight);
2071 insertDAGNode(DAG, N, NewMask);
2072 insertDAGNode(DAG, N, Srl);
2073 insertDAGNode(DAG, N, And);
2074 insertDAGNode(DAG, N, Ext);
2075 insertDAGNode(DAG, N, ShlCount);
2076 insertDAGNode(DAG, N, Shl);
2077 DAG.ReplaceAllUsesWith(N, Shl);
2078 DAG.RemoveDeadNode(N.getNode());
2079 AM.IndexReg = Ext;
2080 AM.Scale = (1 << ScaleLog);
2081 return false;
2082}
2083
2084// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
2085// allows us to fold the shift into this addressing mode. Returns false if the
2086// transform succeeded.
2088 X86ISelAddressMode &AM) {
2089 SDValue Shift = N.getOperand(0);
2090
2091 // Use a signed mask so that shifting right will insert sign bits. These
2092 // bits will be removed when we shift the result left so it doesn't matter
2093 // what we use. This might allow a smaller immediate encoding.
2094 int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
2095
2096 // If we have an any_extend feeding the AND, look through it to see if there
2097 // is a shift behind it. But only if the AND doesn't use the extended bits.
2098 // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
2099 bool FoundAnyExtend = false;
2100 if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
2101 Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
2102 isUInt<32>(Mask)) {
2103 FoundAnyExtend = true;
2104 Shift = Shift.getOperand(0);
2105 }
2106
2107 if (Shift.getOpcode() != ISD::SHL ||
2108 !isa<ConstantSDNode>(Shift.getOperand(1)))
2109 return true;
2110
2111 SDValue X = Shift.getOperand(0);
2112
2113 // Not likely to be profitable if either the AND or SHIFT node has more
2114 // than one use (unless all uses are for address computation). Besides,
2115 // isel mechanism requires their node ids to be reused.
2116 if (!N.hasOneUse() || !Shift.hasOneUse())
2117 return true;
2118
2119 // Verify that the shift amount is something we can fold.
2120 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
2121 if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
2122 return true;
2123
2124 MVT VT = N.getSimpleValueType();
2125 SDLoc DL(N);
2126 if (FoundAnyExtend) {
2127 SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
2128 insertDAGNode(DAG, N, NewX);
2129 X = NewX;
2130 }
2131
2132 SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
2133 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
2134 SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
2135
2136 // Insert the new nodes into the topological ordering. We must do this in
2137 // a valid topological ordering as nothing is going to go back and re-sort
2138 // these nodes. We continually insert before 'N' in sequence as this is
2139 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2140 // hierarchy left to express.
2141 insertDAGNode(DAG, N, NewMask);
2142 insertDAGNode(DAG, N, NewAnd);
2143 insertDAGNode(DAG, N, NewShift);
2144 DAG.ReplaceAllUsesWith(N, NewShift);
2145 DAG.RemoveDeadNode(N.getNode());
2146
2147 AM.Scale = 1 << ShiftAmt;
2148 AM.IndexReg = NewAnd;
2149 return false;
2150}
2151
2152// Implement some heroics to detect shifts of masked values where the mask can
2153// be replaced by extending the shift and undoing that in the addressing mode
2154// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
2155// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
2156// the addressing mode. This results in code such as:
2157//
2158// int f(short *y, int *lookup_table) {
2159// ...
2160// return *y + lookup_table[*y >> 11];
2161// }
2162//
2163// Turning into:
2164// movzwl (%rdi), %eax
2165// movl %eax, %ecx
2166// shrl $11, %ecx
2167// addl (%rsi,%rcx,4), %eax
2168//
2169// Instead of:
2170// movzwl (%rdi), %eax
2171// movl %eax, %ecx
2172// shrl $9, %ecx
2173// andl $124, %rcx
2174// addl (%rsi,%rcx), %eax
2175//
2176// Note that this function assumes the mask is provided as a mask *after* the
2177// value is shifted. The input chain may or may not match that, but computing
2178// such a mask is trivial.
2180 uint64_t Mask,
2181 SDValue Shift, SDValue X,
2182 X86ISelAddressMode &AM) {
2183 if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
2184 !isa<ConstantSDNode>(Shift.getOperand(1)))
2185 return true;
2186
2187 // We need to ensure that mask is a continuous run of bits.
2188 unsigned MaskIdx, MaskLen;
2189 if (!isShiftedMask_64(Mask, MaskIdx, MaskLen))
2190 return true;
2191 unsigned MaskLZ = 64 - (MaskIdx + MaskLen);
2192
2193 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
2194
2195 // The amount of shift we're trying to fit into the addressing mode is taken
2196 // from the shifted mask index (number of trailing zeros of the mask).
2197 unsigned AMShiftAmt = MaskIdx;
2198
2199 // There is nothing we can do here unless the mask is removing some bits.
2200 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
2201 if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
2202
2203 // Scale the leading zero count down based on the actual size of the value.
2204 // Also scale it down based on the size of the shift.
2205 unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
2206 if (MaskLZ < ScaleDown)
2207 return true;
2208 MaskLZ -= ScaleDown;
2209
2210 // The final check is to ensure that any masked out high bits of X are
2211 // already known to be zero. Otherwise, the mask has a semantic impact
2212 // other than masking out a couple of low bits. Unfortunately, because of
2213 // the mask, zero extensions will be removed from operands in some cases.
2214 // This code works extra hard to look through extensions because we can
2215 // replace them with zero extensions cheaply if necessary.
2216 bool ReplacingAnyExtend = false;
2217 if (X.getOpcode() == ISD::ANY_EXTEND) {
2218 unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
2219 X.getOperand(0).getSimpleValueType().getSizeInBits();
2220 // Assume that we'll replace the any-extend with a zero-extend, and
2221 // narrow the search to the extended value.
2222 X = X.getOperand(0);
2223 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
2224 ReplacingAnyExtend = true;
2225 }
2226 APInt MaskedHighBits =
2227 APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
2228 if (!DAG.MaskedValueIsZero(X, MaskedHighBits))
2229 return true;
2230
2231 // We've identified a pattern that can be transformed into a single shift
2232 // and an addressing mode. Make it so.
2233 MVT VT = N.getSimpleValueType();
2234 if (ReplacingAnyExtend) {
2235 assert(X.getValueType() != VT);
2236 // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
2237 SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
2238 insertDAGNode(DAG, N, NewX);
2239 X = NewX;
2240 }
2241
2242 MVT XVT = X.getSimpleValueType();
2243 SDLoc DL(N);
2244 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
2245 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, XVT, X, NewSRLAmt);
2246 SDValue NewExt = DAG.getZExtOrTrunc(NewSRL, DL, VT);
2247 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
2248 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewExt, NewSHLAmt);
2249
2250 // Insert the new nodes into the topological ordering. We must do this in
2251 // a valid topological ordering as nothing is going to go back and re-sort
2252 // these nodes. We continually insert before 'N' in sequence as this is
2253 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2254 // hierarchy left to express.
2255 insertDAGNode(DAG, N, NewSRLAmt);
2256 insertDAGNode(DAG, N, NewSRL);
2257 insertDAGNode(DAG, N, NewExt);
2258 insertDAGNode(DAG, N, NewSHLAmt);
2259 insertDAGNode(DAG, N, NewSHL);
2260 DAG.ReplaceAllUsesWith(N, NewSHL);
2261 DAG.RemoveDeadNode(N.getNode());
2262
2263 AM.Scale = 1 << AMShiftAmt;
2264 AM.IndexReg = NewExt;
2265 return false;
2266}
2267
2268// Transform "(X >> SHIFT) & (MASK << C1)" to
2269// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
2270// matched to a BEXTR later. Returns false if the simplification is performed.
2272 uint64_t Mask,
2273 SDValue Shift, SDValue X,
2274 X86ISelAddressMode &AM,
2275 const X86Subtarget &Subtarget) {
2276 if (Shift.getOpcode() != ISD::SRL ||
2277 !isa<ConstantSDNode>(Shift.getOperand(1)) ||
2278 !Shift.hasOneUse() || !N.hasOneUse())
2279 return true;
2280
2281 // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
2282 if (!Subtarget.hasTBM() &&
2283 !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
2284 return true;
2285
2286 // We need to ensure that mask is a continuous run of bits.
2287 unsigned MaskIdx, MaskLen;
2288 if (!isShiftedMask_64(Mask, MaskIdx, MaskLen))
2289 return true;
2290
2291 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
2292
2293 // The amount of shift we're trying to fit into the addressing mode is taken
2294 // from the shifted mask index (number of trailing zeros of the mask).
2295 unsigned AMShiftAmt = MaskIdx;
2296
2297 // There is nothing we can do here unless the mask is removing some bits.
2298 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
2299 if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
2300
2301 MVT XVT = X.getSimpleValueType();
2302 MVT VT = N.getSimpleValueType();
2303 SDLoc DL(N);
2304 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
2305 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, XVT, X, NewSRLAmt);
2306 SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, XVT);
2307 SDValue NewAnd = DAG.getNode(ISD::AND, DL, XVT, NewSRL, NewMask);
2308 SDValue NewExt = DAG.getZExtOrTrunc(NewAnd, DL, VT);
2309 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
2310 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewExt, NewSHLAmt);
2311
2312 // Insert the new nodes into the topological ordering. We must do this in
2313 // a valid topological ordering as nothing is going to go back and re-sort
2314 // these nodes. We continually insert before 'N' in sequence as this is
2315 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2316 // hierarchy left to express.
2317 insertDAGNode(DAG, N, NewSRLAmt);
2318 insertDAGNode(DAG, N, NewSRL);
2319 insertDAGNode(DAG, N, NewMask);
2320 insertDAGNode(DAG, N, NewAnd);
2321 insertDAGNode(DAG, N, NewExt);
2322 insertDAGNode(DAG, N, NewSHLAmt);
2323 insertDAGNode(DAG, N, NewSHL);
2324 DAG.ReplaceAllUsesWith(N, NewSHL);
2325 DAG.RemoveDeadNode(N.getNode());
2326
2327 AM.Scale = 1 << AMShiftAmt;
2328 AM.IndexReg = NewExt;
2329 return false;
2330}
2331
2332// Attempt to peek further into a scaled index register, collecting additional
2333// extensions / offsets / etc. Returns /p N if we can't peek any further.
2334SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N,
2335 X86ISelAddressMode &AM,
2336 unsigned Depth) {
2337 assert(AM.IndexReg.getNode() == nullptr && "IndexReg already matched");
2338 assert((AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8) &&
2339 "Illegal index scale");
2340
2341 // Limit recursion.
2343 return N;
2344
2345 EVT VT = N.getValueType();
2346 unsigned Opc = N.getOpcode();
2347
2348 // index: add(x,c) -> index: x, disp + c
2349 if (CurDAG->isBaseWithConstantOffset(N)) {
2350 auto *AddVal = cast<ConstantSDNode>(N.getOperand(1));
2351 uint64_t Offset = (uint64_t)AddVal->getSExtValue() * AM.Scale;
2352 if (!foldOffsetIntoAddress(Offset, AM))
2353 return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
2354 }
2355
2356 // index: add(x,x) -> index: x, scale * 2
2357 if (Opc == ISD::ADD && N.getOperand(0) == N.getOperand(1)) {
2358 if (AM.Scale <= 4) {
2359 AM.Scale *= 2;
2360 return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
2361 }
2362 }
2363
2364 // index: shl(x,i) -> index: x, scale * (1 << i)
2365 if (Opc == X86ISD::VSHLI) {
2366 uint64_t ShiftAmt = N.getConstantOperandVal(1);
2367 uint64_t ScaleAmt = 1ULL << ShiftAmt;
2368 if ((AM.Scale * ScaleAmt) <= 8) {
2369 AM.Scale *= ScaleAmt;
2370 return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
2371 }
2372 }
2373
2374 // index: sext(add_nsw(x,c)) -> index: sext(x), disp + sext(c)
2375 // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext?
2376 if (Opc == ISD::SIGN_EXTEND && !VT.isVector() && N.hasOneUse()) {
2377 SDValue Src = N.getOperand(0);
2378 if (Src.getOpcode() == ISD::ADD && Src->getFlags().hasNoSignedWrap() &&
2379 Src.hasOneUse()) {
2380 if (CurDAG->isBaseWithConstantOffset(Src)) {
2381 SDValue AddSrc = Src.getOperand(0);
2382 auto *AddVal = cast<ConstantSDNode>(Src.getOperand(1));
2383 uint64_t Offset = (uint64_t)AddVal->getSExtValue();
2384 if (!foldOffsetIntoAddress(Offset * AM.Scale, AM)) {
2385 SDLoc DL(N);
2386 SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
2387 SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
2388 SDValue ExtAdd = CurDAG->getNode(ISD::ADD, DL, VT, ExtSrc, ExtVal);
2389 insertDAGNode(*CurDAG, N, ExtSrc);
2390 insertDAGNode(*CurDAG, N, ExtVal);
2391 insertDAGNode(*CurDAG, N, ExtAdd);
2392 CurDAG->ReplaceAllUsesWith(N, ExtAdd);
2393 CurDAG->RemoveDeadNode(N.getNode());
2394 return ExtSrc;
2395 }
2396 }
2397 }
2398 }
2399
2400 // index: zext(add_nuw(x,c)) -> index: zext(x), disp + zext(c)
2401 // index: zext(addlike(x,c)) -> index: zext(x), disp + zext(c)
2402 // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext?
2403 if (Opc == ISD::ZERO_EXTEND && !VT.isVector() && N.hasOneUse()) {
2404 SDValue Src = N.getOperand(0);
2405 unsigned SrcOpc = Src.getOpcode();
2406 if (((SrcOpc == ISD::ADD && Src->getFlags().hasNoUnsignedWrap()) ||
2407 CurDAG->isADDLike(Src, /*NoWrap=*/true)) &&
2408 Src.hasOneUse()) {
2409 if (CurDAG->isBaseWithConstantOffset(Src)) {
2410 SDValue AddSrc = Src.getOperand(0);
2411 uint64_t Offset = Src.getConstantOperandVal(1);
2412 if (!foldOffsetIntoAddress(Offset * AM.Scale, AM)) {
2413 SDLoc DL(N);
2414 SDValue Res;
2415 // If we're also scaling, see if we can use that as well.
2416 if (AddSrc.getOpcode() == ISD::SHL &&
2417 isa<ConstantSDNode>(AddSrc.getOperand(1))) {
2418 SDValue ShVal = AddSrc.getOperand(0);
2419 uint64_t ShAmt = AddSrc.getConstantOperandVal(1);
2420 APInt HiBits =
2422 uint64_t ScaleAmt = 1ULL << ShAmt;
2423 if ((AM.Scale * ScaleAmt) <= 8 &&
2424 (AddSrc->getFlags().hasNoUnsignedWrap() ||
2425 CurDAG->MaskedValueIsZero(ShVal, HiBits))) {
2426 AM.Scale *= ScaleAmt;
2427 SDValue ExtShVal = CurDAG->getNode(Opc, DL, VT, ShVal);
2428 SDValue ExtShift = CurDAG->getNode(ISD::SHL, DL, VT, ExtShVal,
2429 AddSrc.getOperand(1));
2430 insertDAGNode(*CurDAG, N, ExtShVal);
2431 insertDAGNode(*CurDAG, N, ExtShift);
2432 AddSrc = ExtShift;
2433 Res = ExtShVal;
2434 }
2435 }
2436 SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
2437 SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
2438 SDValue ExtAdd = CurDAG->getNode(SrcOpc, DL, VT, ExtSrc, ExtVal);
2439 insertDAGNode(*CurDAG, N, ExtSrc);
2440 insertDAGNode(*CurDAG, N, ExtVal);
2441 insertDAGNode(*CurDAG, N, ExtAdd);
2442 CurDAG->ReplaceAllUsesWith(N, ExtAdd);
2443 CurDAG->RemoveDeadNode(N.getNode());
2444 return Res ? Res : ExtSrc;
2445 }
2446 }
2447 }
2448 }
2449
2450 // TODO: Handle extensions, shifted masks etc.
2451 return N;
2452}
2453
2454bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
2455 unsigned Depth) {
2456 SDLoc dl(N);
2457 LLVM_DEBUG({
2458 dbgs() << "MatchAddress: ";
2459 AM.dump(CurDAG);
2460 });
2461 // Limit recursion.
2463 return matchAddressBase(N, AM);
2464
2465 // If this is already a %rip relative address, we can only merge immediates
2466 // into it. Instead of handling this in every case, we handle it here.
2467 // RIP relative addressing: %rip + 32-bit displacement!
2468 if (AM.isRIPRelative()) {
2469 // FIXME: JumpTable and ExternalSymbol address currently don't like
2470 // displacements. It isn't very important, but this should be fixed for
2471 // consistency.
2472 if (!(AM.ES || AM.MCSym) && AM.JT != -1)
2473 return true;
2474
2475 if (auto *Cst = dyn_cast<ConstantSDNode>(N))
2476 if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
2477 return false;
2478 return true;
2479 }
2480
2481 switch (N.getOpcode()) {
2482 default: break;
2483 case ISD::LOCAL_RECOVER: {
2484 if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
2485 if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
2486 // Use the symbol and don't prefix it.
2487 AM.MCSym = ESNode->getMCSymbol();
2488 return false;
2489 }
2490 break;
2491 }
2492 case ISD::Constant: {
2493 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2494 if (!foldOffsetIntoAddress(Val, AM))
2495 return false;
2496 break;
2497 }
2498
2499 case X86ISD::Wrapper:
2500 case X86ISD::WrapperRIP:
2501 if (!matchWrapper(N, AM))
2502 return false;
2503 break;
2504
2505 case ISD::LOAD:
2506 if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
2507 return false;
2508 break;
2509
2510 case ISD::FrameIndex:
2511 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2512 AM.Base_Reg.getNode() == nullptr &&
2513 (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
2514 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
2515 AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
2516 return false;
2517 }
2518 break;
2519
2520 case ISD::SHL:
2521 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2522 break;
2523
2524 if (auto *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
2525 unsigned Val = CN->getZExtValue();
2526 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
2527 // that the base operand remains free for further matching. If
2528 // the base doesn't end up getting used, a post-processing step
2529 // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
2530 if (Val == 1 || Val == 2 || Val == 3) {
2531 SDValue ShVal = N.getOperand(0);
2532 AM.Scale = 1 << Val;
2533 AM.IndexReg = matchIndexRecursively(ShVal, AM, Depth + 1);
2534 return false;
2535 }
2536 }
2537 break;
2538
2539 case ISD::SRL: {
2540 // Scale must not be used already.
2541 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2542
2543 // We only handle up to 64-bit values here as those are what matter for
2544 // addressing mode optimizations.
2545 assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
2546 "Unexpected value size!");
2547
2548 SDValue And = N.getOperand(0);
2549 if (And.getOpcode() != ISD::AND) break;
2550 SDValue X = And.getOperand(0);
2551
2552 // The mask used for the transform is expected to be post-shift, but we
2553 // found the shift first so just apply the shift to the mask before passing
2554 // it down.
2555 if (!isa<ConstantSDNode>(N.getOperand(1)) ||
2556 !isa<ConstantSDNode>(And.getOperand(1)))
2557 break;
2558 uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
2559
2560 // Try to fold the mask and shift into the scale, and return false if we
2561 // succeed.
2562 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
2563 return false;
2564 break;
2565 }
2566
2567 case ISD::SMUL_LOHI:
2568 case ISD::UMUL_LOHI:
2569 // A mul_lohi where we need the low part can be folded as a plain multiply.
2570 if (N.getResNo() != 0) break;
2571 [[fallthrough]];
2572 case ISD::MUL:
2573 case X86ISD::MUL_IMM:
2574 // X*[3,5,9] -> X+X*[2,4,8]
2575 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2576 AM.Base_Reg.getNode() == nullptr &&
2577 AM.IndexReg.getNode() == nullptr) {
2578 if (auto *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
2579 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
2580 CN->getZExtValue() == 9) {
2581 AM.Scale = unsigned(CN->getZExtValue())-1;
2582
2583 SDValue MulVal = N.getOperand(0);
2584 SDValue Reg;
2585
2586 // Okay, we know that we have a scale by now. However, if the scaled
2587 // value is an add of something and a constant, we can fold the
2588 // constant into the disp field here.
2589 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
2590 isa<ConstantSDNode>(MulVal.getOperand(1))) {
2591 Reg = MulVal.getOperand(0);
2592 auto *AddVal = cast<ConstantSDNode>(MulVal.getOperand(1));
2593 uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
2594 if (foldOffsetIntoAddress(Disp, AM))
2595 Reg = N.getOperand(0);
2596 } else {
2597 Reg = N.getOperand(0);
2598 }
2599
2600 AM.IndexReg = AM.Base_Reg = Reg;
2601 return false;
2602 }
2603 }
2604 break;
2605
2606 case ISD::SUB: {
2607 // Given A-B, if A can be completely folded into the address and
2608 // the index field with the index field unused, use -B as the index.
2609 // This is a win if a has multiple parts that can be folded into
2610 // the address. Also, this saves a mov if the base register has
2611 // other uses, since it avoids a two-address sub instruction, however
2612 // it costs an additional mov if the index register has other uses.
2613
2614 // Add an artificial use to this node so that we can keep track of
2615 // it if it gets CSE'd with a different node.
2616 HandleSDNode Handle(N);
2617
2618 // Test if the LHS of the sub can be folded.
2619 X86ISelAddressMode Backup = AM;
2620 if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
2621 N = Handle.getValue();
2622 AM = Backup;
2623 break;
2624 }
2625 N = Handle.getValue();
2626 // Test if the index field is free for use.
2627 if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2628 AM = Backup;
2629 break;
2630 }
2631
2632 int Cost = 0;
2633 SDValue RHS = N.getOperand(1);
2634 // If the RHS involves a register with multiple uses, this
2635 // transformation incurs an extra mov, due to the neg instruction
2636 // clobbering its operand.
2637 if (!RHS.getNode()->hasOneUse() ||
2638 RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
2639 RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
2640 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
2641 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
2642 RHS.getOperand(0).getValueType() == MVT::i32))
2643 ++Cost;
2644 // If the base is a register with multiple uses, this
2645 // transformation may save a mov.
2646 if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2647 !AM.Base_Reg.getNode()->hasOneUse()) ||
2648 AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2649 --Cost;
2650 // If the folded LHS was interesting, this transformation saves
2651 // address arithmetic.
2652 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2653 ((AM.Disp != 0) && (Backup.Disp == 0)) +
2654 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2655 --Cost;
2656 // If it doesn't look like it may be an overall win, don't do it.
2657 if (Cost >= 0) {
2658 AM = Backup;
2659 break;
2660 }
2661
2662 // Ok, the transformation is legal and appears profitable. Go for it.
2663 // Negation will be emitted later to avoid creating dangling nodes if this
2664 // was an unprofitable LEA.
2665 AM.IndexReg = RHS;
2666 AM.NegateIndex = true;
2667 AM.Scale = 1;
2668 return false;
2669 }
2670
2671 case ISD::OR:
2672 case ISD::XOR:
2673 // See if we can treat the OR/XOR node as an ADD node.
2674 if (!CurDAG->isADDLike(N))
2675 break;
2676 [[fallthrough]];
2677 case ISD::ADD:
2678 if (!matchAdd(N, AM, Depth))
2679 return false;
2680 break;
2681
2682 case ISD::AND: {
2683 // Perform some heroic transforms on an and of a constant-count shift
2684 // with a constant to enable use of the scaled offset field.
2685
2686 // Scale must not be used already.
2687 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2688
2689 // We only handle up to 64-bit values here as those are what matter for
2690 // addressing mode optimizations.
2691 assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
2692 "Unexpected value size!");
2693
2694 if (!isa<ConstantSDNode>(N.getOperand(1)))
2695 break;
2696
2697 if (N.getOperand(0).getOpcode() == ISD::SRL) {
2698 SDValue Shift = N.getOperand(0);
2699 SDValue X = Shift.getOperand(0);
2700
2701 uint64_t Mask = N.getConstantOperandVal(1);
2702
2703 // Try to fold the mask and shift into an extract and scale.
2704 if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
2705 return false;
2706
2707 // Try to fold the mask and shift directly into the scale.
2708 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
2709 return false;
2710
2711 // Try to fold the mask and shift into BEXTR and scale.
2712 if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
2713 return false;
2714 }
2715
2716 // Try to swap the mask and shift to place shifts which can be done as
2717 // a scale on the outside of the mask.
2718 if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
2719 return false;
2720
2721 break;
2722 }
2723 case ISD::ZERO_EXTEND: {
2724 // Try to widen a zexted shift left to the same size as its use, so we can
2725 // match the shift as a scale factor.
2726 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2727 break;
2728
2729 SDValue Src = N.getOperand(0);
2730
2731 // See if we can match a zext(addlike(x,c)).
2732 // TODO: Move more ZERO_EXTEND patterns into matchIndexRecursively.
2733 if (Src.getOpcode() == ISD::ADD || Src.getOpcode() == ISD::OR)
2734 if (SDValue Index = matchIndexRecursively(N, AM, Depth + 1))
2735 if (Index != N) {
2736 AM.IndexReg = Index;
2737 return false;
2738 }
2739
2740 // Peek through mask: zext(and(shl(x,c1),c2))
2741 APInt Mask = APInt::getAllOnes(Src.getScalarValueSizeInBits());
2742 if (Src.getOpcode() == ISD::AND && Src.hasOneUse())
2743 if (auto *MaskC = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
2744 Mask = MaskC->getAPIntValue();
2745 Src = Src.getOperand(0);
2746 }
2747
2748 if (Src.getOpcode() == ISD::SHL && Src.hasOneUse() && N->hasOneUse()) {
2749 // Give up if the shift is not a valid scale factor [1,2,3].
2750 SDValue ShlSrc = Src.getOperand(0);
2751 SDValue ShlAmt = Src.getOperand(1);
2752 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShlAmt);
2753 if (!ShAmtC)
2754 break;
2755 unsigned ShAmtV = ShAmtC->getZExtValue();
2756 if (ShAmtV > 3)
2757 break;
2758
2759 // The narrow shift must only shift out zero bits (it must be 'nuw').
2760 // That makes it safe to widen to the destination type.
2761 APInt HighZeros =
2762 APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV);
2763 if (!Src->getFlags().hasNoUnsignedWrap() &&
2764 !CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
2765 break;
2766
2767 // zext (shl nuw i8 %x, C1) to i32
2768 // --> shl (zext i8 %x to i32), (zext C1)
2769 // zext (and (shl nuw i8 %x, C1), C2) to i32
2770 // --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1)
2771 MVT SrcVT = ShlSrc.getSimpleValueType();
2772 MVT VT = N.getSimpleValueType();
2773 SDLoc DL(N);
2774
2775 SDValue Res = ShlSrc;
2776 if (!Mask.isAllOnes()) {
2777 Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT);
2778 insertDAGNode(*CurDAG, N, Res);
2779 Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res);
2780 insertDAGNode(*CurDAG, N, Res);
2781 }
2782 SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res);
2783 insertDAGNode(*CurDAG, N, Zext);
2784 SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt);
2785 insertDAGNode(*CurDAG, N, NewShl);
2786 CurDAG->ReplaceAllUsesWith(N, NewShl);
2787 CurDAG->RemoveDeadNode(N.getNode());
2788
2789 // Convert the shift to scale factor.
2790 AM.Scale = 1 << ShAmtV;
2791 // If matchIndexRecursively is not called here,
2792 // Zext may be replaced by other nodes but later used to call a builder
2793 // method
2794 AM.IndexReg = matchIndexRecursively(Zext, AM, Depth + 1);
2795 return false;
2796 }
2797
2798 if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes()) {
2799 // Try to fold the mask and shift into an extract and scale.
2800 if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src,
2801 Src.getOperand(0), AM))
2802 return false;
2803
2804 // Try to fold the mask and shift directly into the scale.
2805 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask.getZExtValue(), Src,
2806 Src.getOperand(0), AM))
2807 return false;
2808
2809 // Try to fold the mask and shift into BEXTR and scale.
2810 if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask.getZExtValue(), Src,
2811 Src.getOperand(0), AM, *Subtarget))
2812 return false;
2813 }
2814
2815 break;
2816 }
2817 }
2818
2819 return matchAddressBase(N, AM);
2820}
2821
2822/// Helper for MatchAddress. Add the specified node to the
2823/// specified addressing mode without any further recursion.
2824bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
2825 // Is the base register already occupied?
2826 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2827 // If so, check to see if the scale index register is set.
2828 if (!AM.IndexReg.getNode()) {
2829 AM.IndexReg = N;
2830 AM.Scale = 1;
2831 return false;
2832 }
2833
2834 // Otherwise, we cannot select it.
2835 return true;
2836 }
2837
2838 // Default, generate it as a register.
2839 AM.BaseType = X86ISelAddressMode::RegBase;
2840 AM.Base_Reg = N;
2841 return false;
2842}
2843
2844bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N,
2845 X86ISelAddressMode &AM,
2846 unsigned Depth) {
2847 SDLoc dl(N);
2848 LLVM_DEBUG({
2849 dbgs() << "MatchVectorAddress: ";
2850 AM.dump(CurDAG);
2851 });
2852 // Limit recursion.
2854 return matchAddressBase(N, AM);
2855
2856 // TODO: Support other operations.
2857 switch (N.getOpcode()) {
2858 case ISD::Constant: {
2859 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2860 if (!foldOffsetIntoAddress(Val, AM))
2861 return false;
2862 break;
2863 }
2864 case X86ISD::Wrapper:
2865 if (!matchWrapper(N, AM))
2866 return false;
2867 break;
2868 case ISD::ADD: {
2869 // Add an artificial use to this node so that we can keep track of
2870 // it if it gets CSE'd with a different node.
2871 HandleSDNode Handle(N);
2872
2873 X86ISelAddressMode Backup = AM;
2874 if (!matchVectorAddressRecursively(N.getOperand(0), AM, Depth + 1) &&
2875 !matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2876 Depth + 1))
2877 return false;
2878 AM = Backup;
2879
2880 // Try again after commuting the operands.
2881 if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2882 Depth + 1) &&
2883 !matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
2884 Depth + 1))
2885 return false;
2886 AM = Backup;
2887
2888 N = Handle.getValue();
2889 break;
2890 }
2891 }
2892
2893 return matchAddressBase(N, AM);
2894}
2895
2896/// Helper for selectVectorAddr. Handles things that can be folded into a
2897/// gather/scatter address. The index register and scale should have already
2898/// been handled.
2899bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
2900 return matchVectorAddressRecursively(N, AM, 0);
2901}
2902
2903bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
2904 SDValue IndexOp, SDValue ScaleOp,
2905 SDValue &Base, SDValue &Scale,
2906 SDValue &Index, SDValue &Disp,
2907 SDValue &Segment) {
2908 X86ISelAddressMode AM;
2909 AM.Scale = ScaleOp->getAsZExtVal();
2910
2911 // Attempt to match index patterns, as long as we're not relying on implicit
2912 // sign-extension, which is performed BEFORE scale.
2913 if (IndexOp.getScalarValueSizeInBits() == BasePtr.getScalarValueSizeInBits())
2914 AM.IndexReg = matchIndexRecursively(IndexOp, AM, 0);
2915 else
2916 AM.IndexReg = IndexOp;
2917
2918 unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace();
2919 if (AddrSpace == X86AS::GS)
2920 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2921 if (AddrSpace == X86AS::FS)
2922 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2923 if (AddrSpace == X86AS::SS)
2924 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2925
2926 SDLoc DL(BasePtr);
2927 MVT VT = BasePtr.getSimpleValueType();
2928
2929 // Try to match into the base and displacement fields.
2930 if (matchVectorAddress(BasePtr, AM))
2931 return false;
2932
2933 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2934 return true;
2935}
2936
2937/// Returns true if it is able to pattern match an addressing mode.
2938/// It returns the operands which make up the maximal addressing mode it can
2939/// match by reference.
2940///
2941/// Parent is the parent node of the addr operand that is being matched. It
2942/// is always a load, store, atomic node, or null. It is only null when
2943/// checking memory operands for inline asm nodes.
2944bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2945 SDValue &Scale, SDValue &Index,
2946 SDValue &Disp, SDValue &Segment) {
2947 X86ISelAddressMode AM;
2948
2949 if (Parent &&
2950 // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2951 // that are not a MemSDNode, and thus don't have proper addrspace info.
2952 Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2953 Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2954 Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2955 Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
2956 Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
2957 Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2958 Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2959 unsigned AddrSpace =
2960 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2961 if (AddrSpace == X86AS::GS)
2962 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2963 if (AddrSpace == X86AS::FS)
2964 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2965 if (AddrSpace == X86AS::SS)
2966 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2967 }
2968
2969 // Save the DL and VT before calling matchAddress, it can invalidate N.
2970 SDLoc DL(N);
2971 MVT VT = N.getSimpleValueType();
2972
2973 if (matchAddress(N, AM))
2974 return false;
2975
2976 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2977 return true;
2978}
2979
2980bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2981 // Cannot use 32 bit constants to reference objects in kernel/large code
2982 // model.
2983 if (TM.getCodeModel() == CodeModel::Kernel ||
2984 TM.getCodeModel() == CodeModel::Large)
2985 return false;
2986
2987 // In static codegen with small code model, we can get the address of a label
2988 // into a register with 'movl'
2989 if (N->getOpcode() != X86ISD::Wrapper)
2990 return false;
2991
2992 N = N.getOperand(0);
2993
2994 // At least GNU as does not accept 'movl' for TPOFF relocations.
2995 // FIXME: We could use 'movl' when we know we are targeting MC.
2996 if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
2997 return false;
2998
2999 Imm = N;
3000 // Small/medium code model can reference non-TargetGlobalAddress objects with
3001 // 32 bit constants.
3002 if (N->getOpcode() != ISD::TargetGlobalAddress) {
3003 return TM.getCodeModel() == CodeModel::Small ||
3004 TM.getCodeModel() == CodeModel::Medium;
3005 }
3006
3007 const GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
3008 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
3009 return CR->getUnsignedMax().ult(1ull << 32);
3010
3011 return !TM.isLargeGlobalValue(GV);
3012}
3013
3014bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
3015 SDValue &Scale, SDValue &Index,
3016 SDValue &Disp, SDValue &Segment) {
3017 // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
3018 SDLoc DL(N);
3019
3020 if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
3021 return false;
3022
3023 auto *RN = dyn_cast<RegisterSDNode>(Base);
3024 if (RN && RN->getReg() == 0)
3025 Base = CurDAG->getRegister(0, MVT::i64);
3026 else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
3027 // Base could already be %rip, particularly in the x32 ABI.
3028 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
3029 MVT::i64), 0);
3030 Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
3031 Base);
3032 }
3033
3034 RN = dyn_cast<RegisterSDNode>(Index);
3035 if (RN && RN->getReg() == 0)
3036 Index = CurDAG->getRegister(0, MVT::i64);
3037 else {
3038 assert(Index.getValueType() == MVT::i32 &&
3039 "Expect to be extending 32-bit registers for use in LEA");
3040 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
3041 MVT::i64), 0);
3042 Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
3043 Index);
3044 }
3045
3046 return true;
3047}
3048
3049/// Calls SelectAddr and determines if the maximal addressing
3050/// mode it matches can be cost effectively emitted as an LEA instruction.
3051bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
3052 SDValue &Base, SDValue &Scale,
3053 SDValue &Index, SDValue &Disp,
3054 SDValue &Segment) {
3055 X86ISelAddressMode AM;
3056
3057 // Save the DL and VT before calling matchAddress, it can invalidate N.
3058 SDLoc DL(N);
3059 MVT VT = N.getSimpleValueType();
3060
3061 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
3062 // segments.
3063 SDValue Copy = AM.Segment;
3064 SDValue T = CurDAG->getRegister(0, MVT::i32);
3065 AM.Segment = T;
3066 if (matchAddress(N, AM))
3067 return false;
3068 assert (T == AM.Segment);
3069 AM.Segment = Copy;
3070
3071 unsigned Complexity = 0;
3072 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
3073 Complexity = 1;
3074 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
3075 Complexity = 4;
3076
3077 if (AM.IndexReg.getNode())
3078 Complexity++;
3079
3080 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
3081 // a simple shift.
3082 if (AM.Scale > 1)
3083 Complexity++;
3084
3085 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
3086 // to a LEA. This is determined with some experimentation but is by no means
3087 // optimal (especially for code size consideration). LEA is nice because of
3088 // its three-address nature. Tweak the cost function again when we can run
3089 // convertToThreeAddress() at register allocation time.
3090 if (AM.hasSymbolicDisplacement()) {
3091 // For X86-64, always use LEA to materialize RIP-relative addresses.
3092 if (Subtarget->is64Bit())
3093 Complexity = 4;
3094 else
3095 Complexity += 2;
3096 }
3097
3098 // Heuristic: try harder to form an LEA from ADD if the operands set flags.
3099 // Unlike ADD, LEA does not affect flags, so we will be less likely to require
3100 // duplicating flag-producing instructions later in the pipeline.
3101 if (N.getOpcode() == ISD::ADD) {
3102 auto isMathWithFlags = [](SDValue V) {
3103 switch (V.getOpcode()) {
3104 case X86ISD::ADD:
3105 case X86ISD::SUB:
3106 case X86ISD::ADC:
3107 case X86ISD::SBB:
3108 case X86ISD::SMUL:
3109 case X86ISD::UMUL:
3110 /* TODO: These opcodes can be added safely, but we may want to justify
3111 their inclusion for different reasons (better for reg-alloc).
3112 case X86ISD::OR:
3113 case X86ISD::XOR:
3114 case X86ISD::AND:
3115 */
3116 // Value 1 is the flag output of the node - verify it's not dead.
3117 return !SDValue(V.getNode(), 1).use_empty();
3118 default:
3119 return false;
3120 }
3121 };
3122 // TODO: We might want to factor in whether there's a load folding
3123 // opportunity for the math op that disappears with LEA.
3124 if (isMathWithFlags(N.getOperand(0)) || isMathWithFlags(N.getOperand(1)))
3125 Complexity++;
3126 }
3127
3128 if (AM.Disp)
3129 Complexity++;
3130
3131 // If it isn't worth using an LEA, reject it.
3132 if (Complexity <= 2)
3133 return false;
3134
3135 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
3136 return true;
3137}
3138
3139/// This is only run on TargetGlobalTLSAddress nodes.
3140bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
3141 SDValue &Scale, SDValue &Index,
3142 SDValue &Disp, SDValue &Segment) {
3143 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress ||
3144 N.getOpcode() == ISD::TargetExternalSymbol);
3145
3146 X86ISelAddressMode AM;
3147 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N)) {
3148 AM.GV = GA->getGlobal();
3149 AM.Disp += GA->getOffset();
3150 AM.SymbolFlags = GA->getTargetFlags();
3151 } else {
3152 auto *SA = cast<ExternalSymbolSDNode>(N);
3153 AM.ES = SA->getSymbol();
3154 AM.SymbolFlags = SA->getTargetFlags();
3155 }
3156
3157 if (Subtarget->is32Bit()) {
3158 AM.Scale = 1;
3159 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
3160 }
3161
3162 MVT VT = N.getSimpleValueType();
3163 getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
3164 return true;
3165}
3166
3167bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
3168 // Keep track of the original value type and whether this value was
3169 // truncated. If we see a truncation from pointer type to VT that truncates
3170 // bits that are known to be zero, we can use a narrow reference.
3171 EVT VT = N.getValueType();
3172 bool WasTruncated = false;
3173 if (N.getOpcode() == ISD::TRUNCATE) {
3174 WasTruncated = true;
3175 N = N.getOperand(0);
3176 }
3177
3178 if (N.getOpcode() != X86ISD::Wrapper)
3179 return false;
3180
3181 // We can only use non-GlobalValues as immediates if they were not truncated,
3182 // as we do not have any range information. If we have a GlobalValue and the
3183 // address was not truncated, we can select it as an operand directly.
3184 unsigned Opc = N.getOperand(0)->getOpcode();
3185 if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
3186 Op = N.getOperand(0);
3187 // We can only select the operand directly if we didn't have to look past a
3188 // truncate.
3189 return !WasTruncated;
3190 }
3191
3192 // Check that the global's range fits into VT.
3193 auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
3194 std::optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
3195 if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
3196 return false;
3197
3198 // Okay, we can use a narrow reference.
3199 Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
3200 GA->getOffset(), GA->getTargetFlags());
3201 return true;
3202}
3203
3204bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
3205 SDValue &Base, SDValue &Scale,
3206 SDValue &Index, SDValue &Disp,
3207 SDValue &Segment) {
3208 assert(Root && P && "Unknown root/parent nodes");
3209 if (!ISD::isNON_EXTLoad(N.getNode()) ||
3210 !IsProfitableToFold(N, P, Root) ||
3211 !IsLegalToFold(N, P, Root, OptLevel))
3212 return false;
3213
3214 return selectAddr(N.getNode(),
3215 N.getOperand(1), Base, Scale, Index, Disp, Segment);
3216}
3217
3218bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
3219 SDValue &Base, SDValue &Scale,
3220 SDValue &Index, SDValue &Disp,
3221 SDValue &Segment) {
3222 assert(Root && P && "Unknown root/parent nodes");
3223 if (N->getOpcode() != X86ISD::VBROADCAST_LOAD ||
3224 !IsProfitableToFold(N, P, Root) ||
3225 !IsLegalToFold(N, P, Root, OptLevel))
3226 return false;
3227
3228 return selectAddr(N.getNode(),
3229 N.getOperand(1), Base, Scale, Index, Disp, Segment);
3230}
3231
3232/// Return an SDNode that returns the value of the global base register.
3233/// Output instructions required to initialize the global base register,
3234/// if necessary.
3235SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
3236 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
3237 auto &DL = MF->getDataLayout();
3238 return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
3239}
3240
3241bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
3242 if (N->getOpcode() == ISD::TRUNCATE)
3243 N = N->getOperand(0).getNode();
3244 if (N->getOpcode() != X86ISD::Wrapper)
3245 return false;
3246
3247 auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
3248 if (!GA)
3249 return false;
3250
3251 auto *GV = GA->getGlobal();
3252 std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange();
3253 if (CR)
3254 return CR->getSignedMin().sge(-1ull << Width) &&
3255 CR->getSignedMax().slt(1ull << Width);
3256 // In the kernel code model, globals are in the negative 2GB of the address
3257 // space, so globals can be a sign extended 32-bit immediate.
3258 // In other code models, small globals are in the low 2GB of the address
3259 // space, so sign extending them is equivalent to zero extending them.
3260 return Width == 32 && !TM.isLargeGlobalValue(GV);
3261}
3262
3263X86::CondCode X86DAGToDAGISel::getCondFromNode(SDNode *N) const {
3264 assert(N->isMachineOpcode() && "Unexpected node");
3265 unsigned Opc = N->getMachineOpcode();
3266 const MCInstrDesc &MCID = getInstrInfo()->get(Opc);
3267 int CondNo = X86::getCondSrcNoFromDesc(MCID);
3268 if (CondNo < 0)
3269 return X86::COND_INVALID;
3270
3271 return static_cast<X86::CondCode>(N->getConstantOperandVal(CondNo));
3272}
3273
3274/// Test whether the given X86ISD::CMP node has any users that use a flag
3275/// other than ZF.
3276bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
3277 // Examine each user of the node.
3278 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
3279 UI != UE; ++UI) {
3280 // Only check things that use the flags.
3281 if (UI.getUse().getResNo() != Flags.getResNo())
3282 continue;
3283 // Only examine CopyToReg uses that copy to EFLAGS.
3284 if (UI->getOpcode() != ISD::CopyToReg ||
3285 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3286 return false;
3287 // Examine each user of the CopyToReg use.
3288 for (SDNode::use_iterator FlagUI = UI->use_begin(),
3289 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
3290 // Only examine the Flag result.
3291 if (FlagUI.getUse().getResNo() != 1) continue;
3292 // Anything unusual: assume conservatively.
3293 if (!FlagUI->isMachineOpcode()) return false;
3294 // Examine the condition code of the user.
3295 X86::CondCode CC = getCondFromNode(*FlagUI);
3296
3297 switch (CC) {
3298 // Comparisons which only use the zero flag.
3299 case X86::COND_E: case X86::COND_NE:
3300 continue;
3301 // Anything else: assume conservatively.
3302 default:
3303 return false;
3304 }
3305 }
3306 }
3307 return true;
3308}
3309
3310/// Test whether the given X86ISD::CMP node has any uses which require the SF
3311/// flag to be accurate.
3312bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
3313 // Examine each user of the node.
3314 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
3315 UI != UE; ++UI) {
3316 // Only check things that use the flags.
3317 if (UI.getUse().getResNo() != Flags.getResNo())
3318 continue;
3319 // Only examine CopyToReg uses that copy to EFLAGS.
3320 if (UI->getOpcode() != ISD::CopyToReg ||
3321 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3322 return false;
3323 // Examine each user of the CopyToReg use.
3324 for (SDNode::use_iterator FlagUI = UI->use_begin(),
3325 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
3326 // Only examine the Flag result.
3327 if (FlagUI.getUse().getResNo() != 1) continue;
3328 // Anything unusual: assume conservatively.
3329 if (!FlagUI->isMachineOpcode()) return false;
3330 // Examine the condition code of the user.
3331 X86::CondCode CC = getCondFromNode(*FlagUI);
3332
3333 switch (CC) {
3334 // Comparisons which don't examine the SF flag.
3335 case X86::COND_A: case X86::COND_AE:
3336 case X86::COND_B: case X86::COND_BE:
3337 case X86::COND_E: case X86::COND_NE:
3338 case X86::COND_O: case X86::COND_NO:
3339 case X86::COND_P: case X86::COND_NP:
3340 continue;
3341 // Anything else: assume conservatively.
3342 default:
3343 return false;
3344 }
3345 }
3346 }
3347 return true;
3348}
3349
3351 switch (CC) {
3352 // Comparisons which don't examine the CF flag.
3353 case X86::COND_O: case X86::COND_NO:
3354 case X86::COND_E: case X86::COND_NE:
3355 case X86::COND_S: case X86::COND_NS:
3356 case X86::COND_P: case X86::COND_NP:
3357 case X86::COND_L: case X86::COND_GE:
3358 case X86::COND_G: case X86::COND_LE:
3359 return false;
3360 // Anything else: assume conservatively.
3361 default:
3362 return true;
3363 }
3364}
3365
3366/// Test whether the given node which sets flags has any uses which require the
3367/// CF flag to be accurate.
3368 bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
3369 // Examine each user of the node.
3370 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
3371 UI != UE; ++UI) {
3372 // Only check things that use the flags.
3373 if (UI.getUse().getResNo() != Flags.getResNo())
3374 continue;
3375
3376 unsigned UIOpc = UI->getOpcode();
3377
3378 if (UIOpc == ISD::CopyToReg) {
3379 // Only examine CopyToReg uses that copy to EFLAGS.
3380 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3381 return false;
3382 // Examine each user of the CopyToReg use.
3383 for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
3384 FlagUI != FlagUE; ++FlagUI) {
3385 // Only examine the Flag result.
3386 if (FlagUI.getUse().getResNo() != 1)
3387 continue;
3388 // Anything unusual: assume conservatively.
3389 if (!FlagUI->isMachineOpcode())
3390 return false;
3391 // Examine the condition code of the user.
3392 X86::CondCode CC = getCondFromNode(*FlagUI);
3393
3394 if (mayUseCarryFlag(CC))
3395 return false;
3396 }
3397
3398 // This CopyToReg is ok. Move on to the next user.
3399 continue;
3400 }
3401
3402 // This might be an unselected node. So look for the pre-isel opcodes that
3403 // use flags.
3404 unsigned CCOpNo;
3405 switch (UIOpc) {
3406 default:
3407 // Something unusual. Be conservative.
3408 return false;
3409 case X86ISD::SETCC: CCOpNo = 0; break;
3410 case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
3411 case X86ISD::CMOV: CCOpNo = 2; break;
3412 case X86ISD::BRCOND: CCOpNo = 2; break;
3413 }
3414
3415 X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
3416 if (mayUseCarryFlag(CC))
3417 return false;
3418 }
3419 return true;
3420}
3421
3422/// Check whether or not the chain ending in StoreNode is suitable for doing
3423/// the {load; op; store} to modify transformation.
3425 SDValue StoredVal, SelectionDAG *CurDAG,
3426 unsigned LoadOpNo,
3427 LoadSDNode *&LoadNode,
3428 SDValue &InputChain) {
3429 // Is the stored value result 0 of the operation?
3430 if (StoredVal.getResNo() != 0) return false;
3431
3432 // Are there other uses of the operation other than the store?
3433 if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
3434
3435 // Is the store non-extending and non-indexed?
3436 if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
3437 return false;
3438
3439 SDValue Load = StoredVal->getOperand(LoadOpNo);
3440 // Is the stored value a non-extending and non-indexed load?
3441 if (!ISD::isNormalLoad(Load.getNode())) return false;
3442
3443 // Return LoadNode by reference.
3444 LoadNode = cast<LoadSDNode>(Load);
3445
3446 // Is store the only read of the loaded value?
3447 if (!Load.hasOneUse())
3448 return false;
3449
3450 // Is the address of the store the same as the load?
3451 if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
3452 LoadNode->getOffset() != StoreNode->getOffset())
3453 return false;
3454
3455 bool FoundLoad = false;
3456 SmallVector<SDValue, 4> ChainOps;
3457 SmallVector<const SDNode *, 4> LoopWorklist;
3459 const unsigned int Max = 1024;
3460
3461 // Visualization of Load-Op-Store fusion:
3462 // -------------------------
3463 // Legend:
3464 // *-lines = Chain operand dependencies.
3465 // |-lines = Normal operand dependencies.
3466 // Dependencies flow down and right. n-suffix references multiple nodes.
3467 //
3468 // C Xn C
3469 // * * *
3470 // * * *
3471 // Xn A-LD Yn TF Yn
3472 // * * \ | * |
3473 // * * \ | * |
3474 // * * \ | => A--LD_OP_ST
3475 // * * \| \
3476 // TF OP \
3477 // * | \ Zn
3478 // * | \
3479 // A-ST Zn
3480 //
3481
3482 // This merge induced dependences from: #1: Xn -> LD, OP, Zn
3483 // #2: Yn -> LD
3484 // #3: ST -> Zn
3485
3486 // Ensure the transform is safe by checking for the dual
3487 // dependencies to make sure we do not induce a loop.
3488
3489 // As LD is a predecessor to both OP and ST we can do this by checking:
3490 // a). if LD is a predecessor to a member of Xn or Yn.
3491 // b). if a Zn is a predecessor to ST.
3492
3493 // However, (b) can only occur through being a chain predecessor to
3494 // ST, which is the same as Zn being a member or predecessor of Xn,
3495 // which is a subset of LD being a predecessor of Xn. So it's
3496 // subsumed by check (a).
3497
3498 SDValue Chain = StoreNode->getChain();
3499
3500 // Gather X elements in ChainOps.
3501 if (Chain == Load.getValue(1)) {
3502 FoundLoad = true;
3503 ChainOps.push_back(Load.getOperand(0));
3504 } else if (Chain.getOpcode() == ISD::TokenFactor) {
3505 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
3506 SDValue Op = Chain.getOperand(i);
3507 if (Op == Load.getValue(1)) {
3508 FoundLoad = true;
3509 // Drop Load, but keep its chain. No cycle check necessary.
3510 ChainOps.push_back(Load.getOperand(0));
3511 continue;
3512 }
3513 LoopWorklist.push_back(Op.getNode());
3514 ChainOps.push_back(Op);
3515 }
3516 }
3517
3518 if (!FoundLoad)
3519 return false;
3520
3521 // Worklist is currently Xn. Add Yn to worklist.
3522 for (SDValue Op : StoredVal->ops())
3523 if (Op.getNode() != LoadNode)
3524 LoopWorklist.push_back(Op.getNode());
3525
3526 // Check (a) if Load is a predecessor to Xn + Yn
3527 if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
3528 true))
3529 return false;
3530
3531 InputChain =
3532 CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
3533 return true;
3534}
3535
3536// Change a chain of {load; op; store} of the same value into a simple op
3537// through memory of that value, if the uses of the modified value and its
3538// address are suitable.
3539//
3540// The tablegen pattern memory operand pattern is currently not able to match
3541// the case where the EFLAGS on the original operation are used.
3542//
3543// To move this to tablegen, we'll need to improve tablegen to allow flags to
3544// be transferred from a node in the pattern to the result node, probably with
3545// a new keyword. For example, we have this
3546// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
3547// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
3548// (implicit EFLAGS)]>;
3549// but maybe need something like this
3550// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
3551// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
3552// (transferrable EFLAGS)]>;
3553//
3554// Until then, we manually fold these and instruction select the operation
3555// here.
3556bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
3557 auto *StoreNode = cast<StoreSDNode>(Node);
3558 SDValue StoredVal = StoreNode->getOperand(1);
3559 unsigned Opc = StoredVal->getOpcode();
3560
3561 // Before we try to select anything, make sure this is memory operand size
3562 // and opcode we can handle. Note that this must match the code below that
3563 // actually lowers the opcodes.
3564 EVT MemVT = StoreNode->getMemoryVT();
3565 if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
3566 MemVT != MVT::i8)
3567 return false;
3568
3569 bool IsCommutable = false;
3570 bool IsNegate = false;
3571 switch (Opc) {
3572 default:
3573 return false;
3574 case X86ISD::SUB:
3575 IsNegate = isNullConstant(StoredVal.getOperand(0));
3576 break;
3577 case X86ISD::SBB:
3578 break;
3579 case X86ISD::ADD:
3580 case X86ISD::ADC:
3581 case X86ISD::AND:
3582 case X86ISD::OR:
3583 case X86ISD::XOR:
3584 IsCommutable = true;
3585 break;
3586 }
3587
3588 unsigned LoadOpNo = IsNegate ? 1 : 0;
3589 LoadSDNode *LoadNode = nullptr;
3590 SDValue InputChain;
3591 if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
3592 LoadNode, InputChain)) {
3593 if (!IsCommutable)
3594 return false;
3595
3596 // This operation is commutable, try the other operand.
3597 LoadOpNo = 1;
3598 if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
3599 LoadNode, InputChain))
3600 return false;
3601 }
3602
3603 SDValue Base, Scale, Index, Disp, Segment;
3604 if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
3605 Segment))
3606 return false;
3607
3608 auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
3609 unsigned Opc8) {
3610 switch (MemVT.getSimpleVT().SimpleTy) {
3611 case MVT::i64:
3612 return Opc64;
3613 case MVT::i32:
3614 return Opc32;
3615 case MVT::i16:
3616 return Opc16;
3617 case MVT::i8:
3618 return Opc8;
3619 default:
3620 llvm_unreachable("Invalid size!");
3621 }
3622 };
3623
3625 switch (Opc) {
3626 case X86ISD::SUB:
3627 // Handle negate.
3628 if (IsNegate) {
3629 unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
3630 X86::NEG8m);
3631 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3632 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3633 MVT::Other, Ops);
3634 break;
3635 }
3636 [[fallthrough]];
3637 case X86ISD::ADD:
3638 // Try to match inc/dec.
3639 if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
3640 bool IsOne = isOneConstant(StoredVal.getOperand(1));
3641 bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
3642 // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
3643 if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
3644 unsigned NewOpc =
3645 ((Opc == X86ISD::ADD) == IsOne)
3646 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3647 : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
3648 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3649 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3650 MVT::Other, Ops);
3651 break;
3652 }
3653 }
3654 [[fallthrough]];
3655 case X86ISD::ADC:
3656 case X86ISD::SBB:
3657 case X86ISD::AND:
3658 case X86ISD::OR:
3659 case X86ISD::XOR: {
3660 auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
3661 switch (Opc) {
3662 case X86ISD::ADD:
3663 return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3664 X86::ADD8mr);
3665 case X86ISD::ADC:
3666 return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3667 X86::ADC8mr);
3668 case X86ISD::SUB:
3669 return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3670 X86::SUB8mr);
3671 case X86ISD::SBB:
3672 return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3673 X86::SBB8mr);
3674 case X86ISD::AND:
3675 return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3676 X86::AND8mr);
3677 case X86ISD::OR:
3678 return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3679 case X86ISD::XOR:
3680 return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3681 X86::XOR8mr);
3682 default:
3683 llvm_unreachable("Invalid opcode!");
3684 }
3685 };
3686 auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
3687 switch (Opc) {
3688 case X86ISD::ADD:
3689 return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3690 X86::ADD8mi);
3691 case X86ISD::ADC:
3692 return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3693 X86::ADC8mi);
3694 case X86ISD::SUB:
3695 return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3696 X86::SUB8mi);
3697 case X86ISD::SBB:
3698 return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3699 X86::SBB8mi);
3700 case X86ISD::AND:
3701 return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3702 X86::AND8mi);
3703 case X86ISD::OR:
3704 return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3705 X86::OR8mi);
3706 case X86ISD::XOR:
3707 return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3708 X86::XOR8mi);
3709 default:
3710 llvm_unreachable("Invalid opcode!");
3711 }
3712 };
3713
3714 unsigned NewOpc = SelectRegOpcode(Opc);
3715 SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
3716
3717 // See if the operand is a constant that we can fold into an immediate
3718 // operand.
3719 if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3720 int64_t OperandV = OperandC->getSExtValue();
3721
3722 // Check if we can shrink the operand enough to fit in an immediate (or
3723 // fit into a smaller immediate) by negating it and switching the
3724 // operation.
3725 if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
3726 ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3727 (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3728 isInt<32>(-OperandV))) &&
3729 hasNoCarryFlagUses(StoredVal.getValue(1))) {
3730 OperandV = -OperandV;
3731 Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
3732 }
3733
3734 if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3735 Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3736 NewOpc = SelectImmOpcode(Opc);
3737 }
3738 }
3739
3740 if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
3741 SDValue CopyTo =
3742 CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
3743 StoredVal.getOperand(2), SDValue());
3744
3745 const SDValue Ops[] = {Base, Scale, Index, Disp,
3746 Segment, Operand, CopyTo, CopyTo.getValue(1)};
3747 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3748 Ops);
3749 } else {
3750 const SDValue Ops[] = {Base, Scale, Index, Disp,
3751 Segment, Operand, InputChain};
3752 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3753 Ops);
3754 }
3755 break;
3756 }
3757 default:
3758 llvm_unreachable("Invalid opcode!");
3759 }
3760
3761 MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
3762 LoadNode->getMemOperand()};
3763 CurDAG->setNodeMemRefs(Result, MemOps);
3764
3765 // Update Load Chain uses as well.
3766 ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
3767 ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
3768 ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
3769 CurDAG->RemoveDeadNode(Node);
3770 return true;
3771}
3772
3773// See if this is an X & Mask that we can match to BEXTR/BZHI.
3774// Where Mask is one of the following patterns:
3775// a) x & (1 << nbits) - 1
3776// b) x & ~(-1 << nbits)
3777// c) x & (-1 >> (32 - y))
3778// d) x << (32 - y) >> (32 - y)
3779// e) (1 << nbits) - 1
3780bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
3781 assert(
3782 (Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::AND ||
3783 Node->getOpcode() == ISD::SRL) &&
3784 "Should be either an and-mask, or right-shift after clearing high bits.");
3785
3786 // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
3787 if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3788 return false;
3789
3790 MVT NVT = Node->getSimpleValueType(0);
3791
3792 // Only supported for 32 and 64 bits.
3793 if (NVT != MVT::i32 && NVT != MVT::i64)
3794 return false;
3795
3796 SDValue NBits;
3797 bool NegateNBits;
3798
3799 // If we have BMI2's BZHI, we are ok with muti-use patterns.
3800 // Else, if we only have BMI1's BEXTR, we require one-use.
3801 const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
3802 auto checkUses = [AllowExtraUsesByDefault](
3803 SDValue Op, unsigned NUses,
3804 std::optional<bool> AllowExtraUses) {
3805 return AllowExtraUses.value_or(AllowExtraUsesByDefault) ||
3806 Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
3807 };
3808 auto checkOneUse = [checkUses](SDValue Op,
3809 std::optional<bool> AllowExtraUses =
3810 std::nullopt) {
3811 return checkUses(Op, 1, AllowExtraUses);
3812 };
3813 auto checkTwoUse = [checkUses](SDValue Op,
3814 std::optional<bool> AllowExtraUses =
3815 std::nullopt) {
3816 return checkUses(Op, 2, AllowExtraUses);
3817 };
3818
3819 auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
3820 if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
3821 assert(V.getSimpleValueType() == MVT::i32 &&
3822 V.getOperand(0).getSimpleValueType() == MVT::i64 &&
3823 "Expected i64 -> i32 truncation");
3824 V = V.getOperand(0);
3825 }
3826 return V;
3827 };
3828
3829 // a) x & ((1 << nbits) + (-1))
3830 auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits,
3831 &NegateNBits](SDValue Mask) -> bool {
3832 // Match `add`. Must only have one use!
3833 if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
3834 return false;
3835 // We should be adding all-ones constant (i.e. subtracting one.)
3836 if (!isAllOnesConstant(Mask->getOperand(1)))
3837 return false;
3838 // Match `1 << nbits`. Might be truncated. Must only have one use!
3839 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3840 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3841 return false;
3842 if (!isOneConstant(M0->getOperand(0)))
3843 return false;
3844 NBits = M0->getOperand(1);
3845 NegateNBits = false;
3846 return true;
3847 };
3848
3849 auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
3850 V = peekThroughOneUseTruncation(V);
3851 return CurDAG->MaskedValueIsAllOnes(
3852 V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
3853 NVT.getSizeInBits()));
3854 };
3855
3856 // b) x & ~(-1 << nbits)
3857 auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3858 &NBits, &NegateNBits](SDValue Mask) -> bool {
3859 // Match `~()`. Must only have one use!
3860 if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
3861 return false;
3862 // The -1 only has to be all-ones for the final Node's NVT.
3863 if (!isAllOnes(Mask->getOperand(1)))
3864 return false;
3865 // Match `-1 << nbits`. Might be truncated. Must only have one use!
3866 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3867 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3868 return false;
3869 // The -1 only has to be all-ones for the final Node's NVT.
3870 if (!isAllOnes(M0->getOperand(0)))
3871 return false;
3872 NBits = M0->getOperand(1);
3873 NegateNBits = false;
3874 return true;
3875 };
3876
3877 // Try to match potentially-truncated shift amount as `(bitwidth - y)`,
3878 // or leave the shift amount as-is, but then we'll have to negate it.
3879 auto canonicalizeShiftAmt = [&NBits, &NegateNBits](SDValue ShiftAmt,
3880 unsigned Bitwidth) {
3881 NBits = ShiftAmt;
3882 NegateNBits = true;
3883 // Skip over a truncate of the shift amount, if any.
3884 if (NBits.getOpcode() == ISD::TRUNCATE)
3885 NBits = NBits.getOperand(0);
3886 // Try to match the shift amount as (bitwidth - y). It should go away, too.
3887 // If it doesn't match, that's fine, we'll just negate it ourselves.
3888 if (NBits.getOpcode() != ISD::SUB)
3889 return;
3890 auto *V0 = dyn_cast<ConstantSDNode>(NBits.getOperand(0));
3891 if (!V0 || V0->getZExtValue() != Bitwidth)
3892 return;
3893 NBits = NBits.getOperand(1);
3894 NegateNBits = false;
3895 };
3896
3897 // c) x & (-1 >> z) but then we'll have to subtract z from bitwidth
3898 // or
3899 // c) x & (-1 >> (32 - y))
3900 auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits,
3901 canonicalizeShiftAmt](SDValue Mask) -> bool {
3902 // The mask itself may be truncated.
3903 Mask = peekThroughOneUseTruncation(Mask);
3904 unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
3905 // Match `l>>`. Must only have one use!
3906 if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3907 return false;
3908 // We should be shifting truly all-ones constant.
3909 if (!isAllOnesConstant(Mask.getOperand(0)))
3910 return false;
3911 SDValue M1 = Mask.getOperand(1);
3912 // The shift amount should not be used externally.
3913 if (!checkOneUse(M1))
3914 return false;
3915 canonicalizeShiftAmt(M1, Bitwidth);
3916 // Pattern c. is non-canonical, and is expanded into pattern d. iff there
3917 // is no extra use of the mask. Clearly, there was one since we are here.
3918 // But at the same time, if we need to negate the shift amount,
3919 // then we don't want the mask to stick around, else it's unprofitable.
3920 return !NegateNBits;
3921 };
3922
3923 SDValue X;
3924
3925 // d) x << z >> z but then we'll have to subtract z from bitwidth
3926 // or
3927 // d) x << (32 - y) >> (32 - y)
3928 auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt,
3929 AllowExtraUsesByDefault, &NegateNBits,
3930 &X](SDNode *Node) -> bool {
3931 if (Node->getOpcode() != ISD::SRL)
3932 return false;
3933 SDValue N0 = Node->getOperand(0);
3934 if (N0->getOpcode() != ISD::SHL)
3935 return false;
3936 unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
3937 SDValue N1 = Node->getOperand(1);
3938 SDValue N01 = N0->getOperand(1);
3939 // Both of the shifts must be by the exact same value.
3940 if (N1 != N01)
3941 return false;
3942 canonicalizeShiftAmt(N1, Bitwidth);
3943 // There should not be any external uses of the inner shift / shift amount.
3944 // Note that while we are generally okay with external uses given BMI2,
3945 // iff we need to negate the shift amount, we are not okay with extra uses.
3946 const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits;
3947 if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses))
3948 return false;
3949 X = N0->getOperand(0);
3950 return true;
3951 };
3952
3953 auto matchLowBitMask = [matchPatternA, matchPatternB,
3954 matchPatternC](SDValue Mask) -> bool {
3955 return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3956 };
3957
3958 if (Node->getOpcode() == ISD::AND) {
3959 X = Node->getOperand(0);
3960 SDValue Mask = Node->getOperand(1);
3961
3962 if (matchLowBitMask(Mask)) {
3963 // Great.
3964 } else {
3965 std::swap(X, Mask);
3966 if (!matchLowBitMask(Mask))
3967 return false;
3968 }
3969 } else if (matchLowBitMask(SDValue(Node, 0))) {
3970 X = CurDAG->getAllOnesConstant(SDLoc(Node), NVT);
3971 } else if (!matchPatternD(Node))
3972 return false;
3973
3974 // If we need to negate the shift amount, require BMI2 BZHI support.
3975 // It's just too unprofitable for BMI1 BEXTR.
3976 if (NegateNBits && !Subtarget->hasBMI2())
3977 return false;
3978
3979 SDLoc DL(Node);
3980
3981 // Truncate the shift amount.
3982 NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3983 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3984
3985 // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3986 // All the other bits are undefined, we do not care about them.
3987 SDValue ImplDef = SDValue(
3988 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3989 insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
3990
3991 SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
3992 insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
3993 NBits = SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
3994 MVT::i32, ImplDef, NBits, SRIdxVal),
3995 0);
3996 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3997
3998 // We might have matched the amount of high bits to be cleared,
3999 // but we want the amount of low bits to be kept, so negate it then.
4000 if (NegateNBits) {
4001 SDValue BitWidthC = CurDAG->getConstant(NVT.getSizeInBits(), DL, MVT::i32);
4002 insertDAGNode(*CurDAG, SDValue(Node, 0), BitWidthC);
4003
4004 NBits = CurDAG->getNode(ISD::SUB, DL, MVT::i32, BitWidthC, NBits);
4005 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
4006 }
4007
4008 if (Subtarget->hasBMI2()) {
4009 // Great, just emit the BZHI..
4010 if (NVT != MVT::i32) {
4011 // But have to place the bit count into the wide-enough register first.
4012 NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
4013 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
4014 }
4015
4016 SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
4017 ReplaceNode(Node, Extract.getNode());
4018 SelectCode(Extract.getNode());
4019 return true;
4020 }
4021
4022 // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
4023 // *logically* shifted (potentially with one-use trunc inbetween),
4024 // and the truncation was the only use of the shift,
4025 // and if so look past one-use truncation.
4026 {
4027 SDValue RealX = peekThroughOneUseTruncation(X);
4028 // FIXME: only if the shift is one-use?
4029 if (RealX != X && RealX.getOpcode() == ISD::SRL)
4030 X = RealX;
4031 }
4032
4033 MVT XVT = X.getSimpleValueType();
4034
4035 // Else, emitting BEXTR requires one more step.
4036 // The 'control' of BEXTR has the pattern of:
4037 // [15...8 bit][ 7...0 bit] location
4038 // [ bit count][ shift] name
4039 // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
4040
4041 // Shift NBits left by 8 bits, thus producing 'control'.
4042 // This makes the low 8 bits to be zero.
4043 SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
4044 insertDAGNode(*CurDAG, SDValue(Node, 0), C8);
4045 SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
4046 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
4047
4048 // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
4049 // FIXME: only if the shift is one-use?
4050 if (X.getOpcode() == ISD::SRL) {
4051 SDValue ShiftAmt = X.getOperand(1);
4052 X = X.getOperand(0);
4053
4054 assert(ShiftAmt.getValueType() == MVT::i8 &&
4055 "Expected shift amount to be i8");
4056
4057 // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
4058 // We could zext to i16 in some form, but we intentionally don't do that.
4059 SDValue OrigShiftAmt = ShiftAmt;
4060 ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
4061 insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
4062
4063 // And now 'or' these low 8 bits of shift amount into the 'control'.
4064 Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
4065 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
4066 }
4067
4068 // But have to place the 'control' into the wide-enough register first.
4069 if (XVT != MVT::i32) {
4070 Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
4071 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
4072 }
4073
4074 // And finally, form the BEXTR itself.
4075 SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
4076
4077 // The 'X' was originally truncated. Do that now.
4078 if (XVT != NVT) {
4079 insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
4080 Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
4081 }
4082
4083 ReplaceNode(Node, Extract.getNode());
4084 SelectCode(Extract.getNode());
4085
4086 return true;
4087}
4088
4089// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
4090MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
4091 MVT NVT = Node->getSimpleValueType(0);
4092 SDLoc dl(Node);
4093
4094 SDValue N0 = Node->getOperand(0);
4095 SDValue N1 = Node->getOperand(1);
4096
4097 // If we have TBM we can use an immediate for the control. If we have BMI
4098 // we should only do this if the BEXTR instruction is implemented well.
4099 // Otherwise moving the control into a register makes this more costly.
4100 // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
4101 // hoisting the move immediate would make it worthwhile with a less optimal
4102 // BEXTR?
4103 bool PreferBEXTR =
4104 Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
4105 if (!PreferBEXTR && !Subtarget->hasBMI2())
4106 return nullptr;
4107
4108 // Must have a shift right.
4109 if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
4110 return nullptr;
4111
4112 // Shift can't have additional users.
4113 if (!N0->hasOneUse())
4114 return nullptr;
4115
4116 // Only supported for 32 and 64 bits.
4117 if (NVT != MVT::i32 && NVT != MVT::i64)
4118 return nullptr;
4119
4120 // Shift amount and RHS of and must be constant.
4121 auto *MaskCst = dyn_cast<ConstantSDNode>(N1);
4122 auto *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
4123 if (!MaskCst || !ShiftCst)
4124 return nullptr;
4125
4126 // And RHS must be a mask.
4127 uint64_t Mask = MaskCst->getZExtValue();
4128 if (!isMask_64(Mask))
4129 return nullptr;
4130
4131 uint64_t Shift = ShiftCst->getZExtValue();
4132 uint64_t MaskSize = llvm::popcount(Mask);
4133
4134 // Don't interfere with something that can be handled by extracting AH.
4135 // TODO: If we are able to fold a load, BEXTR might still be better than AH.
4136 if (Shift == 8 && MaskSize == 8)
4137 return nullptr;
4138
4139 // Make sure we are only using bits that were in the original value, not
4140 // shifted in.
4141 if (Shift + MaskSize > NVT.getSizeInBits())
4142 return nullptr;
4143
4144 // BZHI, if available, is always fast, unlike BEXTR. But even if we decide
4145 // that we can't use BEXTR, it is only worthwhile using BZHI if the mask
4146 // does not fit into 32 bits. Load folding is not a sufficient reason.
4147 if (!PreferBEXTR && MaskSize <= 32)
4148 return nullptr;
4149
4150 SDValue Control;
4151 unsigned ROpc, MOpc;
4152
4153#define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC)
4154 if (!PreferBEXTR) {
4155 assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
4156 // If we can't make use of BEXTR then we can't fuse shift+mask stages.
4157 // Let's perform the mask first, and apply shift later. Note that we need to
4158 // widen the mask to account for the fact that we'll apply shift afterwards!
4159 Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
4160 ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rr)
4161 : GET_EGPR_IF_ENABLED(X86::BZHI32rr);
4162 MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rm)
4163 : GET_EGPR_IF_ENABLED(X86::BZHI32rm);
4164 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
4165 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4166 } else {
4167 // The 'control' of BEXTR has the pattern of:
4168 // [15...8 bit][ 7...0 bit] location
4169 // [ bit count][ shift] name
4170 // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
4171 Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
4172 if (Subtarget->hasTBM()) {
4173 ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
4174 MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
4175 } else {
4176 assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
4177 // BMI requires the immediate to placed in a register.
4178 ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rr)
4179 : GET_EGPR_IF_ENABLED(X86::BEXTR32rr);
4180 MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rm)
4181 : GET_EGPR_IF_ENABLED(X86::BEXTR32rm);
4182 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
4183 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4184 }
4185 }
4186
4187 MachineSDNode *NewNode;
4188 SDValue Input = N0->getOperand(0);
4189 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4190 if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4191 SDValue Ops[] = {
4192 Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)};
4193 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4194 NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4195 // Update the chain.
4196 ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
4197 // Record the mem-refs
4198 CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
4199 } else {
4200 NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control);
4201 }
4202
4203 if (!PreferBEXTR) {
4204 // We still need to apply the shift.
4205 SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
4206 unsigned NewOpc = NVT == MVT::i64 ? GET_ND_IF_ENABLED(X86::SHR64ri)
4207 : GET_ND_IF_ENABLED(X86::SHR32ri);
4208 NewNode =
4209 CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt);
4210 }
4211
4212 return NewNode;
4213}
4214
4215// Emit a PCMISTR(I/M) instruction.
4216MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
4217 bool MayFoldLoad, const SDLoc &dl,
4218 MVT VT, SDNode *Node) {
4219 SDValue N0 = Node->getOperand(0);
4220 SDValue N1 = Node->getOperand(1);
4221 SDValue Imm = Node->getOperand(2);
4222 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4223 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
4224
4225 // Try to fold a load. No need to check alignment.
4226 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4227 if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4228 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4229 N1.getOperand(0) };
4230 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
4231 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4232 // Update the chain.
4233 ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
4234 // Record the mem-refs
4235 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4236 return CNode;
4237 }
4238
4239 SDValue Ops[] = { N0, N1, Imm };
4240 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
4241 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4242 return CNode;
4243}
4244
4245// Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
4246// to emit a second instruction after this one. This is needed since we have two
4247// copyToReg nodes glued before this and we need to continue that glue through.
4248MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
4249 bool MayFoldLoad, const SDLoc &dl,
4250 MVT VT, SDNode *Node,
4251 SDValue &InGlue) {
4252 SDValue N0 = Node->getOperand(0);
4253 SDValue N2 = Node->getOperand(2);
4254 SDValue Imm = Node->getOperand(4);
4255 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4256 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
4257
4258 // Try to fold a load. No need to check alignment.
4259 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4260 if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4261 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4262 N2.getOperand(0), InGlue };
4263 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
4264 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4265 InGlue = SDValue(CNode, 3);
4266 // Update the chain.
4267 ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
4268 // Record the mem-refs
4269 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
4270 return CNode;
4271 }
4272
4273 SDValue Ops[] = { N0, N2, Imm, InGlue };
4274 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
4275 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4276 InGlue = SDValue(CNode, 2);
4277 return CNode;
4278}
4279
4280bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4281 EVT VT = N->getValueType(0);
4282
4283 // Only handle scalar shifts.
4284 if (VT.isVector())
4285 return false;
4286
4287 // Narrower shifts only mask to 5 bits in hardware.
4288 unsigned Size = VT == MVT::i64 ? 64 : 32;
4289
4290 SDValue OrigShiftAmt = N->getOperand(1);
4291 SDValue ShiftAmt = OrigShiftAmt;
4292 SDLoc DL(N);
4293
4294 // Skip over a truncate of the shift amount.
4295 if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
4296 ShiftAmt = ShiftAmt->getOperand(0);
4297
4298 // This function is called after X86DAGToDAGISel::matchBitExtract(),
4299 // so we are not afraid that we might mess up BZHI/BEXTR pattern.
4300
4301 SDValue NewShiftAmt;
4302 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB ||
4303 ShiftAmt->getOpcode() == ISD::XOR) {
4304 SDValue Add0 = ShiftAmt->getOperand(0);
4305 SDValue Add1 = ShiftAmt->getOperand(1);
4306 auto *Add0C = dyn_cast<ConstantSDNode>(Add0);
4307 auto *Add1C = dyn_cast<ConstantSDNode>(Add1);
4308 // If we are shifting by X+/-/^N where N == 0 mod Size, then just shift by X
4309 // to avoid the ADD/SUB/XOR.
4310 if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) {
4311 NewShiftAmt = Add0;
4312
4313 } else if (ShiftAmt->getOpcode() != ISD::ADD && ShiftAmt.hasOneUse() &&
4314 ((Add0C && Add0C->getAPIntValue().urem(Size) == Size - 1) ||
4315 (Add1C && Add1C->getAPIntValue().urem(Size) == Size - 1))) {
4316 // If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X
4317 // we can replace it with a NOT. In the XOR case it may save some code
4318 // size, in the SUB case it also may save a move.
4319 assert(Add0C == nullptr || Add1C == nullptr);
4320
4321 // We can only do N-X, not X-N
4322 if (ShiftAmt->getOpcode() == ISD::SUB && Add0C == nullptr)
4323 return false;
4324
4325 EVT OpVT = ShiftAmt.getValueType();
4326
4327 SDValue AllOnes = CurDAG->getAllOnesConstant(DL, OpVT);
4328 NewShiftAmt = CurDAG->getNode(ISD::XOR, DL, OpVT,
4329 Add0C == nullptr ? Add0 : Add1, AllOnes);
4330 insertDAGNode(*CurDAG, OrigShiftAmt, AllOnes);
4331 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
4332 // If we are shifting by N-X where N == 0 mod Size, then just shift by
4333 // -X to generate a NEG instead of a SUB of a constant.
4334 } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C &&
4335 Add0C->getZExtValue() != 0) {
4336 EVT SubVT = ShiftAmt.getValueType();
4337 SDValue X;
4338 if (Add0C->getZExtValue() % Size == 0)
4339 X = Add1;
4340 else if (ShiftAmt.hasOneUse() && Size == 64 &&
4341 Add0C->getZExtValue() % 32 == 0) {
4342 // We have a 64-bit shift by (n*32-x), turn it into -(x+n*32).
4343 // This is mainly beneficial if we already compute (x+n*32).
4344 if (Add1.getOpcode() == ISD::TRUNCATE) {
4345 Add1 = Add1.getOperand(0);
4346 SubVT = Add1.getValueType();
4347 }
4348 if (Add0.getValueType() != SubVT) {
4349 Add0 = CurDAG->getZExtOrTrunc(Add0, DL, SubVT);
4350 insertDAGNode(*CurDAG, OrigShiftAmt, Add0);
4351 }
4352
4353 X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1, Add0);
4354 insertDAGNode(*CurDAG, OrigShiftAmt, X);
4355 } else
4356 return false;
4357 // Insert a negate op.
4358 // TODO: This isn't guaranteed to replace the sub if there is a logic cone
4359 // that uses it that's not a shift.
4360 SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
4361 SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X);
4362 NewShiftAmt = Neg;
4363
4364 // Insert these operands into a valid topological order so they can
4365 // get selected independently.
4366 insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
4367 insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
4368 } else
4369 return false;
4370 } else
4371 return false;
4372
4373 if (NewShiftAmt.getValueType() != MVT::i8) {
4374 // Need to truncate the shift amount.
4375 NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
4376 // Add to a correct topological ordering.
4377 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
4378 }
4379
4380 // Insert a new mask to keep the shift amount legal. This should be removed
4381 // by isel patterns.
4382 NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
4383 CurDAG->getConstant(Size - 1, DL, MVT::i8));
4384 // Place in a correct topological ordering.
4385 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
4386
4387 SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
4388 NewShiftAmt);
4389 if (UpdatedNode != N) {
4390 // If we found an existing node, we should replace ourselves with that node
4391 // and wait for it to be selected after its other users.
4392 ReplaceNode(N, UpdatedNode);
4393 return true;
4394 }
4395
4396 // If the original shift amount is now dead, delete it so that we don't run
4397 // it through isel.
4398 if (OrigShiftAmt.getNode()->use_empty())
4399 CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
4400
4401 // Now that we've optimized the shift amount, defer to normal isel to get
4402 // load folding and legacy vs BMI2 selection without repeating it here.
4403 SelectCode(N);
4404 return true;
4405}
4406
4407bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
4408 MVT NVT = N->getSimpleValueType(0);
4409 unsigned Opcode = N->getOpcode();
4410 SDLoc dl(N);
4411
4412 // For operations of the form (x << C1) op C2, check if we can use a smaller
4413 // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
4414 SDValue Shift = N->getOperand(0);
4415 SDValue N1 = N->getOperand(1);
4416
4417 auto *Cst = dyn_cast<ConstantSDNode>(N1);
4418 if (!Cst)
4419 return false;
4420
4421 int64_t Val = Cst->getSExtValue();
4422
4423 // If we have an any_extend feeding the AND, look through it to see if there
4424 // is a shift behind it. But only if the AND doesn't use the extended bits.
4425 // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
4426 bool FoundAnyExtend = false;
4427 if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
4428 Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
4429 isUInt<32>(Val)) {
4430 FoundAnyExtend = true;
4431 Shift = Shift.getOperand(0);
4432 }
4433
4434 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
4435 return false;
4436
4437 // i8 is unshrinkable, i16 should be promoted to i32.
4438 if (NVT != MVT::i32 && NVT != MVT::i64)
4439 return false;
4440
4441 auto *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
4442 if (!ShlCst)
4443 return false;
4444
4445 uint64_t ShAmt = ShlCst->getZExtValue();
4446
4447 // Make sure that we don't change the operation by removing bits.
4448 // This only matters for OR and XOR, AND is unaffected.
4449 uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
4450 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
4451 return false;
4452
4453 // Check the minimum bitwidth for the new constant.
4454 // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
4455 auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
4456 if (Opcode == ISD::AND) {
4457 // AND32ri is the same as AND64ri32 with zext imm.
4458 // Try this before sign extended immediates below.
4459 ShiftedVal = (uint64_t)Val >> ShAmt;
4460 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4461 return true;
4462 // Also swap order when the AND can become MOVZX.
4463 if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
4464 return true;
4465 }
4466 ShiftedVal = Val >> ShAmt;
4467 if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
4468 (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
4469 return true;
4470 if (Opcode != ISD::AND) {
4471 // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
4472 ShiftedVal = (uint64_t)Val >> ShAmt;
4473 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4474 return true;
4475 }
4476 return false;
4477 };
4478
4479 int64_t ShiftedVal;
4480 if (!CanShrinkImmediate(ShiftedVal))
4481 return false;
4482
4483 // Ok, we can reorder to get a smaller immediate.
4484
4485 // But, its possible the original immediate allowed an AND to become MOVZX.
4486 // Doing this late due to avoid the MakedValueIsZero call as late as
4487 // possible.
4488 if (Opcode == ISD::AND) {
4489 // Find the smallest zext this could possibly be.
4490 unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
4491 ZExtWidth = llvm::bit_ceil(std::max(ZExtWidth, 8U));
4492
4493 // Figure out which bits need to be zero to achieve that mask.
4494 APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
4495 ZExtWidth);
4496 NeededMask &= ~Cst->getAPIntValue();
4497
4498 if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
4499 return false;
4500 }
4501
4502 SDValue X = Shift.getOperand(0);
4503 if (FoundAnyExtend) {
4504 SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
4505 insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
4506 X = NewX;
4507 }
4508
4509 SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
4510 insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
4511 SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
4512 insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
4513 SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
4514 Shift.getOperand(1));
4515 ReplaceNode(N, NewSHL.getNode());
4516 SelectCode(NewSHL.getNode());
4517 return true;
4518}
4519
4520bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
4521 SDNode *ParentB, SDNode *ParentC,
4523 uint8_t Imm) {
4524 assert(A.isOperandOf(ParentA) && B.isOperandOf(ParentB) &&
4525 C.isOperandOf(ParentC) && "Incorrect parent node");
4526
4527 auto tryFoldLoadOrBCast =
4528 [this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale,
4529 SDValue &Index, SDValue &Disp, SDValue &Segment) {
4530 if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment))
4531 return true;
4532
4533 // Not a load, check for broadcast which may be behind a bitcast.
4534 if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) {
4535 P = L.getNode();
4536 L = L.getOperand(0);
4537 }
4538
4539 if (L.getOpcode() != X86ISD::VBROADCAST_LOAD)
4540 return false;
4541
4542 // Only 32 and 64 bit broadcasts are supported.
4543 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4544 unsigned Size = MemIntr->getMemoryVT().getSizeInBits();
4545 if (Size != 32 && Size != 64)
4546 return false;
4547
4548 return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment);
4549 };
4550
4551 bool FoldedLoad = false;
4552 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4553 if (tryFoldLoadOrBCast(Root, ParentC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4554 FoldedLoad = true;
4555 } else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3,
4556 Tmp4)) {
4557 FoldedLoad = true;
4558 std::swap(A, C);
4559 // Swap bits 1/4 and 3/6.
4560 uint8_t OldImm = Imm;
4561 Imm = OldImm & 0xa5;
4562 if (OldImm & 0x02) Imm |= 0x10;
4563 if (OldImm & 0x10) Imm |= 0x02;
4564 if (OldImm & 0x08) Imm |= 0x40;
4565 if (OldImm & 0x40) Imm |= 0x08;
4566 } else if (tryFoldLoadOrBCast(Root, ParentB, B, Tmp0, Tmp1, Tmp2, Tmp3,
4567 Tmp4)) {
4568 FoldedLoad = true;
4569 std::swap(B, C);
4570 // Swap bits 1/2 and 5/6.
4571 uint8_t OldImm = Imm;
4572 Imm = OldImm & 0x99;
4573 if (OldImm & 0x02) Imm |= 0x04;
4574 if (OldImm & 0x04) Imm |= 0x02;
4575 if (OldImm & 0x20) Imm |= 0x40;
4576 if (OldImm & 0x40) Imm |= 0x20;
4577 }
4578
4579 SDLoc DL(Root);
4580
4581 SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8);
4582
4583 MVT NVT = Root->getSimpleValueType(0);
4584
4585 MachineSDNode *MNode;
4586 if (FoldedLoad) {
4587 SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
4588
4589 unsigned Opc;
4590 if (C.getOpcode() == X86ISD::VBROADCAST_LOAD) {
4591 auto *MemIntr = cast<MemIntrinsicSDNode>(C);
4592 unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits();
4593 assert((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!");
4594
4595 bool UseD = EltSize == 32;
4596 if (NVT.is128BitVector())
4597 Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi;
4598 else if (NVT.is256BitVector())
4599 Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi;
4600 else if (NVT.is512BitVector())
4601 Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi;
4602 else
4603 llvm_unreachable("Unexpected vector size!");
4604 } else {
4605 bool UseD = NVT.getVectorElementType() == MVT::i32;
4606 if (NVT.is128BitVector())
4607 Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi;
4608 else if (NVT.is256BitVector())
4609 Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi;
4610 else if (NVT.is512BitVector())
4611 Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi;
4612 else
4613 llvm_unreachable("Unexpected vector size!");
4614 }
4615
4616 SDValue Ops[] = {A, B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm, C.getOperand(0)};
4617 MNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
4618
4619 // Update the chain.
4620 ReplaceUses(C.getValue(1), SDValue(MNode, 1));
4621 // Record the mem-refs
4622 CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(C)->getMemOperand()});
4623 } else {
4624 bool UseD = NVT.getVectorElementType() == MVT::i32;
4625 unsigned Opc;
4626 if (NVT.is128BitVector())
4627 Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri;
4628 else if (NVT.is256BitVector())
4629 Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri;
4630 else if (NVT.is512BitVector())
4631 Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri;
4632 else
4633 llvm_unreachable("Unexpected vector size!");
4634
4635 MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm});
4636 }
4637
4638 ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0));
4639 CurDAG->RemoveDeadNode(Root);
4640 return true;
4641}
4642
4643// Try to match two logic ops to a VPTERNLOG.
4644// FIXME: Handle more complex patterns that use an operand more than once?
4645bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
4646 MVT NVT = N->getSimpleValueType(0);
4647
4648 // Make sure we support VPTERNLOG.
4649 if (!NVT.isVector() || !Subtarget->hasAVX512() ||
4650 NVT.getVectorElementType() == MVT::i1)
4651 return false;
4652
4653 // We need VLX for 128/256-bit.
4654 if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
4655 return false;
4656
4657 SDValue N0 = N->getOperand(0);
4658 SDValue N1 = N->getOperand(1);
4659
4660 auto getFoldableLogicOp = [](SDValue Op) {
4661 // Peek through single use bitcast.
4662 if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
4663 Op = Op.getOperand(0);
4664
4665 if (!Op.hasOneUse())
4666 return SDValue();
4667
4668 unsigned Opc = Op.getOpcode();
4669 if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
4670 Opc == X86ISD::ANDNP)
4671 return Op;
4672
4673 return SDValue();
4674 };
4675
4676 SDValue A, FoldableOp;
4677 if ((FoldableOp = getFoldableLogicOp(N1))) {
4678 A = N0;
4679 } else if ((FoldableOp = getFoldableLogicOp(N0))) {
4680 A = N1;
4681 } else
4682 return false;
4683
4684 SDValue B = FoldableOp.getOperand(0);
4685 SDValue C = FoldableOp.getOperand(1);
4686 SDNode *ParentA = N;
4687 SDNode *ParentB = FoldableOp.getNode();
4688 SDNode *ParentC = FoldableOp.getNode();
4689
4690 // We can build the appropriate control immediate by performing the logic
4691 // operation we're matching using these constants for A, B, and C.
4692 uint8_t TernlogMagicA = 0xf0;
4693 uint8_t TernlogMagicB = 0xcc;
4694 uint8_t TernlogMagicC = 0xaa;
4695
4696 // Some of the inputs may be inverted, peek through them and invert the
4697 // magic values accordingly.
4698 // TODO: There may be a bitcast before the xor that we should peek through.
4699 auto PeekThroughNot = [](SDValue &Op, SDNode *&Parent, uint8_t &Magic) {
4700 if (Op.getOpcode() == ISD::XOR && Op.hasOneUse() &&
4701 ISD::isBuildVectorAllOnes(Op.getOperand(1).getNode())) {
4702 Magic = ~Magic;
4703 Parent = Op.getNode();
4704 Op = Op.getOperand(0);
4705 }
4706 };
4707
4708 PeekThroughNot(A, ParentA, TernlogMagicA);
4709 PeekThroughNot(B, ParentB, TernlogMagicB);
4710 PeekThroughNot(C, ParentC, TernlogMagicC);
4711
4712 uint8_t Imm;
4713 switch (FoldableOp.getOpcode()) {
4714 default: llvm_unreachable("Unexpected opcode!");
4715 case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break;
4716 case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break;
4717 case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break;
4718 case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break;
4719 }
4720
4721 switch (N->getOpcode()) {
4722 default: llvm_unreachable("Unexpected opcode!");
4723 case X86ISD::ANDNP:
4724 if (A == N0)
4725 Imm &= ~TernlogMagicA;
4726 else
4727 Imm = ~(Imm) & TernlogMagicA;
4728 break;
4729 case ISD::AND: Imm &= TernlogMagicA; break;
4730 case ISD::OR: Imm |= TernlogMagicA; break;
4731 case ISD::XOR: Imm ^= TernlogMagicA; break;
4732 }
4733
4734 return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm);
4735}
4736
4737/// If the high bits of an 'and' operand are known zero, try setting the
4738/// high bits of an 'and' constant operand to produce a smaller encoding by
4739/// creating a small, sign-extended negative immediate rather than a large
4740/// positive one. This reverses a transform in SimplifyDemandedBits that
4741/// shrinks mask constants by clearing bits. There is also a possibility that
4742/// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
4743/// case, just replace the 'and'. Return 'true' if the node is replaced.
4744bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
4745 // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
4746 // have immediate operands.
4747 MVT VT = And->getSimpleValueType(0);
4748 if (VT != MVT::i32 && VT != MVT::i64)
4749 return false;
4750
4751 auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
4752 if (!And1C)
4753 return false;
4754
4755 // Bail out if the mask constant is already negative. It's can't shrink more.
4756 // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
4757 // patterns to use a 32-bit and instead of a 64-bit and by relying on the
4758 // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
4759 // are negative too.
4760 APInt MaskVal = And1C->getAPIntValue();
4761 unsigned MaskLZ = MaskVal.countl_zero();
4762 if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
4763 return false;
4764
4765 // Don't extend into the upper 32 bits of a 64 bit mask.
4766 if (VT == MVT::i64 && MaskLZ >= 32) {
4767 MaskLZ -= 32;
4768 MaskVal = MaskVal.trunc(32);
4769 }
4770
4771 SDValue And0 = And->getOperand(0);
4772 APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
4773 APInt NegMaskVal = MaskVal | HighZeros;
4774
4775 // If a negative constant would not allow a smaller encoding, there's no need
4776 // to continue. Only change the constant when we know it's a win.
4777 unsigned MinWidth = NegMaskVal.getSignificantBits();
4778 if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getSignificantBits() <= 32))
4779 return false;
4780
4781 // Extend masks if we truncated above.
4782 if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
4783 NegMaskVal = NegMaskVal.zext(64);
4784 HighZeros = HighZeros.zext(64);
4785 }
4786
4787 // The variable operand must be all zeros in the top bits to allow using the
4788 // new, negative constant as the mask.
4789 if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
4790 return false;
4791
4792 // Check if the mask is -1. In that case, this is an unnecessary instruction
4793 // that escaped earlier analysis.
4794 if (NegMaskVal.isAllOnes()) {
4795 ReplaceNode(And, And0.getNode());
4796 return true;
4797 }
4798
4799 // A negative mask allows a smaller encoding. Create a new 'and' node.
4800 SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
4801 insertDAGNode(*CurDAG, SDValue(And, 0), NewMask);
4802 SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
4803 ReplaceNode(And, NewAnd.getNode());
4804 SelectCode(NewAnd.getNode());
4805 return true;
4806}
4807
4808static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
4809 bool FoldedBCast, bool Masked) {
4810#define VPTESTM_CASE(VT, SUFFIX) \
4811case MVT::VT: \
4812 if (Masked) \
4813 return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \
4814 return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX;
4815
4816
4817#define VPTESTM_BROADCAST_CASES(SUFFIX) \
4818default: llvm_unreachable("Unexpected VT!"); \
4819VPTESTM_CASE(v4i32, DZ128##SUFFIX) \
4820VPTESTM_CASE(v2i64, QZ128##SUFFIX) \
4821VPTESTM_CASE(v8i32, DZ256##SUFFIX) \
4822VPTESTM_CASE(v4i64, QZ256##SUFFIX) \
4823VPTESTM_CASE(v16i32, DZ##SUFFIX) \
4824VPTESTM_CASE(v8i64, QZ##SUFFIX)
4825
4826#define VPTESTM_FULL_CASES(SUFFIX) \
4827VPTESTM_BROADCAST_CASES(SUFFIX) \
4828VPTESTM_CASE(v16i8, BZ128##SUFFIX) \
4829VPTESTM_CASE(v8i16, WZ128##SUFFIX) \
4830VPTESTM_CASE(v32i8, BZ256##SUFFIX) \
4831VPTESTM_CASE(v16i16, WZ256##SUFFIX) \
4832VPTESTM_CASE(v64i8, BZ##SUFFIX) \
4833VPTESTM_CASE(v32i16, WZ##SUFFIX)
4834
4835 if (FoldedBCast) {
4836 switch (TestVT.SimpleTy) {
4838 }
4839 }
4840
4841 if (FoldedLoad) {
4842 switch (TestVT.SimpleTy) {
4844 }
4845 }
4846
4847 switch (TestVT.SimpleTy) {
4849 }
4850
4851#undef VPTESTM_FULL_CASES
4852#undef VPTESTM_BROADCAST_CASES
4853#undef VPTESTM_CASE
4854}
4855
4856// Try to create VPTESTM instruction. If InMask is not null, it will be used
4857// to form a masked operation.
4858bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
4859 SDValue InMask) {
4860 assert(Subtarget->hasAVX512() && "Expected AVX512!");
4861 assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&
4862 "Unexpected VT!");
4863
4864 // Look for equal and not equal compares.
4865 ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
4866 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4867 return false;
4868
4869 SDValue SetccOp0 = Setcc.getOperand(0);
4870 SDValue SetccOp1 = Setcc.getOperand(1);
4871
4872 // Canonicalize the all zero vector to the RHS.
4873 if (ISD::isBuildVectorAllZeros(SetccOp0.getNode()))
4874 std::swap(SetccOp0, SetccOp1);
4875
4876 // See if we're comparing against zero.
4877 if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode()))
4878 return false;
4879
4880 SDValue N0 = SetccOp0;
4881
4882 MVT CmpVT = N0.getSimpleValueType();
4883 MVT CmpSVT = CmpVT.getVectorElementType();
4884
4885 // Start with both operands the same. We'll try to refine this.
4886 SDValue Src0 = N0;
4887 SDValue Src1 = N0;
4888
4889 {
4890 // Look through single use bitcasts.
4891 SDValue N0Temp = N0;
4892 if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
4893 N0Temp = N0.getOperand(0);
4894
4895 // Look for single use AND.
4896 if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
4897 Src0 = N0Temp.getOperand(0);
4898 Src1 = N0Temp.getOperand(1);
4899 }
4900 }
4901
4902 // Without VLX we need to widen the operation.
4903 bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
4904
4905 auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L,
4906 SDValue &Base, SDValue &Scale, SDValue &Index,
4907 SDValue &Disp, SDValue &Segment) {
4908 // If we need to widen, we can't fold the load.
4909 if (!Widen)
4910 if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment))
4911 return true;
4912
4913 // If we didn't fold a load, try to match broadcast. No widening limitation
4914 // for this. But only 32 and 64 bit types are supported.
4915 if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64)
4916 return false;
4917
4918 // Look through single use bitcasts.
4919 if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) {
4920 P = L.getNode();
4921 L = L.getOperand(0);
4922 }
4923
4924 if (L.getOpcode() != X86ISD::VBROADCAST_LOAD)
4925 return false;
4926
4927 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4928 if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits())
4929 return false;
4930
4931 return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment);
4932 };
4933
4934 // We can only fold loads if the sources are unique.
4935 bool CanFoldLoads = Src0 != Src1;
4936
4937 bool FoldedLoad = false;
4938 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4939 if (CanFoldLoads) {
4940 FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2,
4941 Tmp3, Tmp4);
4942 if (!FoldedLoad) {
4943 // And is commutative.
4944 FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1,
4945 Tmp2, Tmp3, Tmp4);
4946 if (FoldedLoad)
4947 std::swap(Src0, Src1);
4948 }
4949 }
4950
4951 bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD;
4952
4953 bool IsMasked = InMask.getNode() != nullptr;
4954
4955 SDLoc dl(Root);
4956
4957 MVT ResVT = Setcc.getSimpleValueType();
4958 MVT MaskVT = ResVT;
4959 if (Widen) {
4960 // Widen the inputs using insert_subreg or copy_to_regclass.
4961 unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
4962 unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
4963 unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
4964 CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
4965 MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4966 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4967 CmpVT), 0);
4968 Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
4969
4970 if (!FoldedBCast)
4971 Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
4972
4973 if (IsMasked) {
4974 // Widen the mask.
4975 unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID();
4976 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4977 InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4978 dl, MaskVT, InMask, RC), 0);
4979 }
4980 }
4981
4982 bool IsTestN = CC == ISD::SETEQ;
4983 unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4984 IsMasked);
4985
4986 MachineSDNode *CNode;
4987 if (FoldedLoad) {
4988 SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
4989
4990 if (IsMasked) {
4991 SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4992 Src1.getOperand(0) };
4993 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4994 } else {
4995 SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4996 Src1.getOperand(0) };
4997 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4998 }
4999
5000 // Update the chain.
5001 ReplaceUses(Src1.getValue(1), SDValue(CNode, 1));
5002 // Record the mem-refs
5003 CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()});
5004 } else {
5005 if (IsMasked)
5006 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
5007 else
5008 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
5009 }
5010
5011 // If we widened, we need to shrink the mask VT.
5012 if (Widen) {
5013 unsigned RegClass = TLI->getRegClassFor(ResVT)->getID();
5014 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
5015 CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
5016 dl, ResVT, SDValue(CNode, 0), RC);
5017 }
5018
5019 ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
5020 CurDAG->RemoveDeadNode(Root);
5021 return true;
5022}
5023
5024// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
5025// into vpternlog.
5026bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
5027 assert(N->getOpcode() == ISD::OR && "Unexpected opcode!");
5028
5029 MVT NVT = N->getSimpleValueType(0);
5030
5031 // Make sure we support VPTERNLOG.
5032 if (!NVT.isVector() || !Subtarget->hasAVX512())
5033 return false;
5034
5035 // We need VLX for 128/256-bit.
5036 if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
5037 return false;
5038
5039 SDValue N0 = N->getOperand(0);
5040 SDValue N1 = N->getOperand(1);
5041
5042 // Canonicalize AND to LHS.
5043 if (N1.getOpcode() == ISD::AND)
5044 std::swap(N0, N1);
5045
5046 if (N0.getOpcode() != ISD::AND ||
5047 N1.getOpcode() != X86ISD::ANDNP ||
5048 !N0.hasOneUse() || !N1.hasOneUse())
5049 return false;
5050
5051 // ANDN is not commutable, use it to pick down A and C.
5052 SDValue A = N1.getOperand(0);
5053 SDValue C = N1.getOperand(1);
5054
5055 // AND is commutable, if one operand matches A, the other operand is B.
5056 // Otherwise this isn't a match.
5057 SDValue B;
5058 if (N0.getOperand(0) == A)
5059 B = N0.getOperand(1);
5060 else if (N0.getOperand(1) == A)
5061 B = N0.getOperand(0);
5062 else
5063 return false;
5064
5065 SDLoc dl(N);
5066 SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
5067 SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
5068 ReplaceNode(N, Ternlog.getNode());
5069
5070 return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(),
5071 Ternlog.getNode(), A, B, C, 0xCA);
5072}
5073
5074void X86DAGToDAGISel::Select(SDNode *Node) {
5075 MVT NVT = Node->getSimpleValueType(0);
5076 unsigned Opcode = Node->getOpcode();
5077 SDLoc dl(Node);
5078
5079 if (Node->isMachineOpcode()) {
5080 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
5081 Node->setNodeId(-1);
5082 return; // Already selected.
5083 }
5084
5085 switch (Opcode) {
5086 default: break;
5088 unsigned IntNo = Node->getConstantOperandVal(1);
5089 switch (IntNo) {
5090 default: break;
5091 case Intrinsic::x86_encodekey128:
5092 case Intrinsic::x86_encodekey256: {
5093 if (!Subtarget->hasKL())
5094 break;
5095
5096 unsigned Opcode;
5097 switch (IntNo) {
5098 default: llvm_unreachable("Impossible intrinsic");
5099 case Intrinsic::x86_encodekey128:
5100 Opcode = X86::ENCODEKEY128;
5101 break;
5102 case Intrinsic::x86_encodekey256:
5103 Opcode = X86::ENCODEKEY256;
5104 break;
5105 }
5106
5107 SDValue Chain = Node->getOperand(0);
5108 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3),
5109 SDValue());
5110 if (Opcode == X86::ENCODEKEY256)
5111 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4),
5112 Chain.getValue(1));
5113
5114 MachineSDNode *Res = CurDAG->getMachineNode(
5115 Opcode, dl, Node->getVTList(),
5116 {Node->getOperand(2), Chain, Chain.getValue(1)});
5117 ReplaceNode(Node, Res);
5118 return;
5119 }
5120 case Intrinsic::x86_tileloadd64_internal:
5121 case Intrinsic::x86_tileloaddt164_internal: {
5122 if (!Subtarget->hasAMXTILE())
5123 break;
5124 auto *MFI =
5125 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5126 MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
5127 unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal
5128 ? X86::PTILELOADDV
5129 : X86::PTILELOADDT1V;
5130 // _tile_loadd_internal(row, col, buf, STRIDE)
5131 SDValue Base = Node->getOperand(4);
5132 SDValue Scale = getI8Imm(1, dl);
5133 SDValue Index = Node->getOperand(5);
5134 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5135 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5136 SDValue Chain = Node->getOperand(0);
5137 MachineSDNode *CNode;
5138 SDValue Ops[] = {Node->getOperand(2),
5139 Node->getOperand(3),
5140 Base,
5141 Scale,
5142 Index,
5143 Disp,
5144 Segment,
5145 Chain};
5146 CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
5147 ReplaceNode(Node, CNode);
5148 return;
5149 }
5150 }
5151 break;
5152 }
5153 case ISD::INTRINSIC_VOID: {
5154 unsigned IntNo = Node->getConstantOperandVal(1);
5155 switch (IntNo) {
5156 default: break;
5157 case Intrinsic::x86_sse3_monitor:
5158 case Intrinsic::x86_monitorx:
5159 case Intrinsic::x86_clzero: {
5160 bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
5161
5162 unsigned Opc = 0;
5163 switch (IntNo) {
5164 default: llvm_unreachable("Unexpected intrinsic!");
5165 case Intrinsic::x86_sse3_monitor:
5166 if (!Subtarget->hasSSE3())
5167 break;
5168 Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
5169 break;
5170 case Intrinsic::x86_monitorx:
5171 if (!Subtarget->hasMWAITX())
5172 break;
5173 Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
5174 break;
5175 case Intrinsic::x86_clzero:
5176 if (!Subtarget->hasCLZERO())
5177 break;
5178 Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
5179 break;
5180 }
5181
5182 if (Opc) {
5183 unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
5184 SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
5185 Node->getOperand(2), SDValue());
5186 SDValue InGlue = Chain.getValue(1);
5187
5188 if (IntNo == Intrinsic::x86_sse3_monitor ||
5189 IntNo == Intrinsic::x86_monitorx) {
5190 // Copy the other two operands to ECX and EDX.
5191 Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
5192 InGlue);
5193 InGlue = Chain.getValue(1);
5194 Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
5195 InGlue);
5196 InGlue = Chain.getValue(1);
5197 }
5198
5199 MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
5200 { Chain, InGlue});
5201 ReplaceNode(Node, CNode);
5202 return;
5203 }
5204
5205 break;
5206 }
5207 case Intrinsic::x86_tilestored64_internal: {
5208 auto *MFI =
5209 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5210 MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
5211 unsigned Opc = X86::PTILESTOREDV;
5212 // _tile_stored_internal(row, col, buf, STRIDE, c)
5213 SDValue Base = Node->getOperand(4);
5214 SDValue Scale = getI8Imm(1, dl);
5215 SDValue Index = Node->getOperand(5);
5216 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5217 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5218 SDValue Chain = Node->getOperand(0);
5219 MachineSDNode *CNode;
5220 SDValue Ops[] = {Node->getOperand(2),
5221 Node->getOperand(3),
5222 Base,
5223 Scale,
5224 Index,
5225 Disp,
5226 Segment,
5227 Node->getOperand(6),
5228 Chain};
5229 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5230 ReplaceNode(Node, CNode);
5231 return;
5232 }
5233 case Intrinsic::x86_tileloadd64:
5234 case Intrinsic::x86_tileloaddt164:
5235 case Intrinsic::x86_tilestored64: {
5236 if (!Subtarget->hasAMXTILE())
5237 break;
5238 auto *MFI =
5239 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5240 MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
5241 unsigned Opc;
5242 switch (IntNo) {
5243 default: llvm_unreachable("Unexpected intrinsic!");
5244 case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD; break;
5245 case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break;
5246 case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED; break;
5247 }
5248 // FIXME: Match displacement and scale.
5249 unsigned TIndex = Node->getConstantOperandVal(2);
5250 SDValue TReg = getI8Imm(TIndex, dl);
5251 SDValue Base = Node->getOperand(3);
5252 SDValue Scale = getI8Imm(1, dl);
5253 SDValue Index = Node->getOperand(4);
5254 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5255 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5256 SDValue Chain = Node->getOperand(0);
5257 MachineSDNode *CNode;
5258 if (Opc == X86::PTILESTORED) {
5259 SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain };
5260 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5261 } else {
5262 SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain };
5263 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5264 }
5265 ReplaceNode(Node, CNode);
5266 return;
5267 }
5268 }
5269 break;
5270 }
5271 case ISD::BRIND:
5272 case X86ISD::NT_BRIND: {
5273 if (Subtarget->isTargetNaCl())
5274 // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
5275 // leave the instruction alone.
5276 break;
5277 if (Subtarget->isTarget64BitILP32()) {
5278 // Converts a 32-bit register to a 64-bit, zero-extended version of
5279 // it. This is needed because x86-64 can do many things, but jmp %r32
5280 // ain't one of them.
5281 SDValue Target = Node->getOperand(1);
5282 assert(Target.getValueType() == MVT::i32 && "Unexpected VT!");
5283 SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64);
5284 SDValue Brind = CurDAG->getNode(Opcode, dl, MVT::Other,
5285 Node->getOperand(0), ZextTarget);
5286 ReplaceNode(Node, Brind.getNode());
5287 SelectCode(ZextTarget.getNode());
5288 SelectCode(Brind.getNode());
5289 return;
5290 }
5291 break;
5292 }
5294 ReplaceNode(Node, getGlobalBaseReg());
5295 return;
5296
5297 case ISD::BITCAST:
5298 // Just drop all 128/256/512-bit bitcasts.
5299 if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
5300 NVT == MVT::f128) {
5301 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
5302 CurDAG->RemoveDeadNode(Node);
5303 return;
5304 }
5305 break;
5306
5307 case ISD::SRL:
5308 if (matchBitExtract(Node))
5309 return;
5310 [[fallthrough]];
5311 case ISD::SRA:
5312 case ISD::SHL:
5313 if (tryShiftAmountMod(Node))
5314 return;
5315 break;
5316
5317 case X86ISD::VPTERNLOG: {
5318 uint8_t Imm = Node->getConstantOperandVal(3);
5319 if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0),
5320 Node->getOperand(1), Node->getOperand(2), Imm))
5321 return;
5322 break;
5323 }
5324
5325 case X86ISD::ANDNP:
5326 if (tryVPTERNLOG(Node))
5327 return;
5328 break;
5329
5330 case ISD::AND:
5331 if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
5332 // Try to form a masked VPTESTM. Operands can be in either order.
5333 SDValue N0 = Node->getOperand(0);
5334 SDValue N1 = Node->getOperand(1);
5335 if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
5336 tryVPTESTM(Node, N0, N1))
5337 return;
5338 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
5339 tryVPTESTM(Node, N1, N0))
5340 return;
5341 }
5342
5343 if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
5344 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
5345 CurDAG->RemoveDeadNode(Node);
5346 return;
5347 }
5348 if (matchBitExtract(Node))
5349 return;
5350 if (AndImmShrink && shrinkAndImmediate(Node))
5351 return;
5352
5353 [[fallthrough]];
5354 case ISD::OR:
5355 case ISD::XOR:
5356 if (tryShrinkShlLogicImm(Node))
5357 return;
5358 if (Opcode == ISD::OR && tryMatchBitSelect(Node))
5359 return;
5360 if (tryVPTERNLOG(Node))
5361 return;
5362
5363 [[fallthrough]];
5364 case ISD::ADD:
5365 if (Opcode == ISD::ADD && matchBitExtract(Node))
5366 return;
5367 [[fallthrough]];
5368 case ISD::SUB: {
5369 // Try to avoid folding immediates with multiple uses for optsize.
5370 // This code tries to select to register form directly to avoid going
5371 // through the isel table which might fold the immediate. We can't change
5372 // the patterns on the add/sub/and/or/xor with immediate paterns in the
5373 // tablegen files to check immediate use count without making the patterns
5374 // unavailable to the fast-isel table.
5375 if (!CurDAG->shouldOptForSize())
5376 break;
5377
5378 // Only handle i8/i16/i32/i64.
5379 if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
5380 break;
5381
5382 SDValue N0 = Node->getOperand(0);
5383 SDValue N1 = Node->getOperand(1);
5384
5385 auto *Cst = dyn_cast<ConstantSDNode>(N1);
5386 if (!Cst)
5387 break;
5388
5389 int64_t Val = Cst->getSExtValue();
5390
5391 // Make sure its an immediate that is considered foldable.
5392 // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
5393 if (!isInt<8>(Val) && !isInt<32>(Val))
5394 break;
5395
5396 // If this can match to INC/DEC, let it go.
5397 if (Opcode == ISD::ADD && (Val == 1 || Val == -1))
5398 break;
5399
5400 // Check if we should avoid folding this immediate.
5401 if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
5402 break;
5403
5404 // We should not fold the immediate. So we need a register form instead.
5405 unsigned ROpc, MOpc;
5406 switch (NVT.SimpleTy) {
5407 default: llvm_unreachable("Unexpected VT!");
5408 case MVT::i8:
5409 switch (Opcode) {
5410 default: llvm_unreachable("Unexpected opcode!");
5411 case ISD::ADD:
5412 ROpc = GET_ND_IF_ENABLED(X86::ADD8rr);
5413 MOpc = GET_ND_IF_ENABLED(X86::ADD8rm);
5414 break;
5415 case ISD::SUB:
5416 ROpc = GET_ND_IF_ENABLED(X86::SUB8rr);
5417 MOpc = GET_ND_IF_ENABLED(X86::SUB8rm);
5418 break;
5419 case ISD::AND:
5420 ROpc = GET_ND_IF_ENABLED(X86::AND8rr);
5421 MOpc = GET_ND_IF_ENABLED(X86::AND8rm);
5422 break;
5423 case ISD::OR:
5424 ROpc = GET_ND_IF_ENABLED(X86::OR8rr);
5425 MOpc = GET_ND_IF_ENABLED(X86::OR8rm);
5426 break;
5427 case ISD::XOR:
5428 ROpc = GET_ND_IF_ENABLED(X86::XOR8rr);
5429 MOpc = GET_ND_IF_ENABLED(X86::XOR8rm);
5430 break;
5431 }
5432 break;
5433 case MVT::i16:
5434 switch (Opcode) {
5435 default: llvm_unreachable("Unexpected opcode!");
5436 case ISD::ADD:
5437 ROpc = GET_ND_IF_ENABLED(X86::ADD16rr);
5438 MOpc = GET_ND_IF_ENABLED(X86::ADD16rm);
5439 break;
5440 case ISD::SUB:
5441 ROpc = GET_ND_IF_ENABLED(X86::SUB16rr);
5442 MOpc = GET_ND_IF_ENABLED(X86::SUB16rm);
5443 break;
5444 case ISD::AND:
5445 ROpc = GET_ND_IF_ENABLED(X86::AND16rr);
5446 MOpc = GET_ND_IF_ENABLED(X86::AND16rm);
5447 break;
5448 case ISD::OR:
5449 ROpc = GET_ND_IF_ENABLED(X86::OR16rr);
5450 MOpc = GET_ND_IF_ENABLED(X86::OR16rm);
5451 break;
5452 case ISD::XOR:
5453 ROpc = GET_ND_IF_ENABLED(X86::XOR16rr);
5454 MOpc = GET_ND_IF_ENABLED(X86::XOR16rm);
5455 break;
5456 }
5457 break;
5458 case MVT::i32:
5459 switch (Opcode) {
5460 default: llvm_unreachable("Unexpected opcode!");
5461 case ISD::ADD:
5462 ROpc = GET_ND_IF_ENABLED(X86::ADD32rr);
5463 MOpc = GET_ND_IF_ENABLED(X86::ADD32rm);
5464 break;
5465 case ISD::SUB:
5466 ROpc = GET_ND_IF_ENABLED(X86::SUB32rr);
5467 MOpc = GET_ND_IF_ENABLED(X86::SUB32rm);
5468 break;
5469 case ISD::AND:
5470 ROpc = GET_ND_IF_ENABLED(X86::AND32rr);
5471 MOpc = GET_ND_IF_ENABLED(X86::AND32rm);
5472 break;
5473 case ISD::OR:
5474 ROpc = GET_ND_IF_ENABLED(X86::OR32rr);
5475 MOpc = GET_ND_IF_ENABLED(X86::OR32rm);
5476 break;
5477 case ISD::XOR:
5478 ROpc = GET_ND_IF_ENABLED(X86::XOR32rr);
5479 MOpc = GET_ND_IF_ENABLED(X86::XOR32rm);
5480 break;
5481 }
5482 break;
5483 case MVT::i64:
5484 switch (Opcode) {
5485 default: llvm_unreachable("Unexpected opcode!");
5486 case ISD::ADD:
5487 ROpc = GET_ND_IF_ENABLED(X86::ADD64rr);
5488 MOpc = GET_ND_IF_ENABLED(X86::ADD64rm);
5489 break;
5490 case ISD::SUB:
5491 ROpc = GET_ND_IF_ENABLED(X86::SUB64rr);
5492 MOpc = GET_ND_IF_ENABLED(X86::SUB64rm);
5493 break;
5494 case ISD::AND:
5495 ROpc = GET_ND_IF_ENABLED(X86::AND64rr);
5496 MOpc = GET_ND_IF_ENABLED(X86::AND64rm);
5497 break;
5498 case ISD::OR:
5499 ROpc = GET_ND_IF_ENABLED(X86::OR64rr);
5500 MOpc = GET_ND_IF_ENABLED(X86::OR64rm);
5501 break;
5502 case ISD::XOR:
5503 ROpc = GET_ND_IF_ENABLED(X86::XOR64rr);
5504 MOpc = GET_ND_IF_ENABLED(X86::XOR64rm);
5505 break;
5506 }
5507 break;
5508 }
5509
5510 // Ok this is a AND/OR/XOR/ADD/SUB with constant.
5511
5512 // If this is a not a subtract, we can still try to fold a load.
5513 if (Opcode != ISD::SUB) {
5514 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5515 if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
5516 SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
5517 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
5518 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5519 // Update the chain.
5520 ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
5521 // Record the mem-refs
5522 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
5523 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5524 CurDAG->RemoveDeadNode(Node);
5525 return;
5526 }
5527 }
5528
5529 CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
5530 return;
5531 }
5532
5533 case X86ISD::SMUL:
5534 // i16/i32/i64 are handled with isel patterns.
5535 if (NVT != MVT::i8)
5536 break;
5537 [[fallthrough]];
5538 case X86ISD::UMUL: {
5539 SDValue N0 = Node->getOperand(0);
5540 SDValue N1 = Node->getOperand(1);
5541
5542 unsigned LoReg, ROpc, MOpc;
5543 switch (NVT.SimpleTy) {
5544 default: llvm_unreachable("Unsupported VT!");
5545 case MVT::i8:
5546 LoReg = X86::AL;
5547 ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
5548 MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
5549 break;
5550 case MVT::i16:
5551 LoReg = X86::AX;
5552 ROpc = X86::MUL16r;
5553 MOpc = X86::MUL16m;
5554 break;
5555 case MVT::i32:
5556 LoReg = X86::EAX;
5557 ROpc = X86::MUL32r;
5558 MOpc = X86::MUL32m;
5559 break;
5560 case MVT::i64:
5561 LoReg = X86::RAX;
5562 ROpc = X86::MUL64r;
5563 MOpc = X86::MUL64m;
5564 break;
5565 }
5566
5567 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5568 bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5569 // Multiply is commutative.
5570 if (!FoldedLoad) {
5571 FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5572 if (FoldedLoad)
5573 std::swap(N0, N1);
5574 }
5575
5576 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
5577 N0, SDValue()).getValue(1);
5578
5579 MachineSDNode *CNode;
5580 if (FoldedLoad) {
5581 // i16/i32/i64 use an instruction that produces a low and high result even
5582 // though only the low result is used.
5583 SDVTList VTs;
5584 if (NVT == MVT::i8)
5585 VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
5586 else
5587 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
5588
5589 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
5590 InGlue };
5591 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5592
5593 // Update the chain.
5594 ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
5595 // Record the mem-refs
5596 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5597 } else {
5598 // i16/i32/i64 use an instruction that produces a low and high result even
5599 // though only the low result is used.
5600 SDVTList VTs;
5601 if (NVT == MVT::i8)
5602 VTs = CurDAG->getVTList(NVT, MVT::i32);
5603 else
5604 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
5605
5606 CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InGlue});
5607 }
5608
5609 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5610 ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
5611 CurDAG->RemoveDeadNode(Node);
5612 return;
5613 }
5614
5615 case ISD::SMUL_LOHI:
5616 case ISD::UMUL_LOHI: {
5617 SDValue N0 = Node->getOperand(0);
5618 SDValue N1 = Node->getOperand(1);
5619
5620 unsigned Opc, MOpc;
5621 unsigned LoReg, HiReg;
5622 bool IsSigned = Opcode == ISD::SMUL_LOHI;
5623 bool UseMULX = !IsSigned && Subtarget->hasBMI2();
5624 bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
5625 switch (NVT.SimpleTy) {
5626 default: llvm_unreachable("Unsupported VT!");
5627 case MVT::i32:
5628 Opc = UseMULXHi ? X86::MULX32Hrr
5629 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rr)
5630 : IsSigned ? X86::IMUL32r
5631 : X86::MUL32r;
5632 MOpc = UseMULXHi ? X86::MULX32Hrm
5633 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rm)
5634 : IsSigned ? X86::IMUL32m
5635 : X86::MUL32m;
5636 LoReg = UseMULX ? X86::EDX : X86::EAX;
5637 HiReg = X86::EDX;
5638 break;
5639 case MVT::i64:
5640 Opc = UseMULXHi ? X86::MULX64Hrr
5641 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rr)
5642 : IsSigned ? X86::IMUL64r
5643 : X86::MUL64r;
5644 MOpc = UseMULXHi ? X86::MULX64Hrm
5645 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rm)
5646 : IsSigned ? X86::IMUL64m
5647 : X86::MUL64m;
5648 LoReg = UseMULX ? X86::RDX : X86::RAX;
5649 HiReg = X86::RDX;
5650 break;
5651 }
5652
5653 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5654 bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5655 // Multiply is commutative.
5656 if (!foldedLoad) {
5657 foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5658 if (foldedLoad)
5659 std::swap(N0, N1);
5660 }
5661
5662 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
5663 N0, SDValue()).getValue(1);
5664 SDValue ResHi, ResLo;
5665 if (foldedLoad) {
5666 SDValue Chain;
5667 MachineSDNode *CNode = nullptr;
5668 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
5669 InGlue };
5670 if (UseMULXHi) {
5671 SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
5672 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5673 ResHi = SDValue(CNode, 0);
5674 Chain = SDValue(CNode, 1);
5675 } else if (UseMULX) {
5676 SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other);
5677 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5678 ResHi = SDValue(CNode, 0);
5679 ResLo = SDValue(CNode, 1);
5680 Chain = SDValue(CNode, 2);
5681 } else {
5682 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
5683 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5684 Chain = SDValue(CNode, 0);
5685 InGlue = SDValue(CNode, 1);
5686 }
5687
5688 // Update the chain.
5689 ReplaceUses(N1.getValue(1), Chain);
5690 // Record the mem-refs
5691 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5692 } else {
5693 SDValue Ops[] = { N1, InGlue };
5694 if (UseMULXHi) {
5695 SDVTList VTs = CurDAG->getVTList(NVT);
5696 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5697 ResHi = SDValue(CNode, 0);
5698 } else if (UseMULX) {
5699 SDVTList VTs = CurDAG->getVTList(NVT, NVT);
5700 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5701 ResHi = SDValue(CNode, 0);
5702 ResLo = SDValue(CNode, 1);
5703 } else {
5704 SDVTList VTs = CurDAG->getVTList(MVT::Glue);
5705 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5706 InGlue = SDValue(CNode, 0);
5707 }
5708 }
5709
5710 // Copy the low half of the result, if it is needed.
5711 if (!SDValue(Node, 0).use_empty()) {
5712 if (!ResLo) {
5713 assert(LoReg && "Register for low half is not defined!");
5714 ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
5715 NVT, InGlue);
5716 InGlue = ResLo.getValue(2);
5717 }
5718 ReplaceUses(SDValue(Node, 0), ResLo);
5719 LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
5720 dbgs() << '\n');
5721 }
5722 // Copy the high half of the result, if it is needed.
5723 if (!SDValue(Node, 1).use_empty()) {
5724 if (!ResHi) {
5725 assert(HiReg && "Register for high half is not defined!");
5726 ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
5727 NVT, InGlue);
5728 InGlue = ResHi.getValue(2);
5729 }
5730 ReplaceUses(SDValue(Node, 1), ResHi);
5731 LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
5732 dbgs() << '\n');
5733 }
5734
5735 CurDAG->RemoveDeadNode(Node);
5736 return;
5737 }
5738
5739 case ISD::SDIVREM:
5740 case ISD::UDIVREM: {
5741 SDValue N0 = Node->getOperand(0);
5742 SDValue N1 = Node->getOperand(1);
5743
5744 unsigned ROpc, MOpc;
5745 bool isSigned = Opcode == ISD::SDIVREM;
5746 if (!isSigned) {
5747 switch (NVT.SimpleTy) {
5748 default: llvm_unreachable("Unsupported VT!");
5749 case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break;
5750 case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break;
5751 case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break;
5752 case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break;
5753 }
5754 } else {
5755 switch (NVT.SimpleTy) {
5756 default: llvm_unreachable("Unsupported VT!");
5757 case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
5758 case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
5759 case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
5760 case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
5761 }
5762 }
5763
5764 unsigned LoReg, HiReg, ClrReg;
5765 unsigned SExtOpcode;
5766 switch (NVT.SimpleTy) {
5767 default: llvm_unreachable("Unsupported VT!");
5768 case MVT::i8:
5769 LoReg = X86::AL; ClrReg = HiReg = X86::AH;
5770 SExtOpcode = 0; // Not used.
5771 break;
5772 case MVT::i16:
5773 LoReg = X86::AX; HiReg = X86::DX;
5774 ClrReg = X86::DX;
5775 SExtOpcode = X86::CWD;
5776 break;
5777 case MVT::i32:
5778 LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
5779 SExtOpcode = X86::CDQ;
5780 break;
5781 case MVT::i64:
5782 LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
5783 SExtOpcode = X86::CQO;
5784 break;
5785 }
5786
5787 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5788 bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5789 bool signBitIsZero = CurDAG->SignBitIsZero(N0);
5790
5791 SDValue InGlue;
5792 if (NVT == MVT::i8) {
5793 // Special case for div8, just use a move with zero extension to AX to
5794 // clear the upper 8 bits (AH).
5795 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
5796 MachineSDNode *Move;
5797 if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
5798 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
5799 unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8
5800 : X86::MOVZX16rm8;
5801 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
5802 Chain = SDValue(Move, 1);
5803 ReplaceUses(N0.getValue(1), Chain);
5804 // Record the mem-refs
5805 CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
5806 } else {
5807 unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8
5808 : X86::MOVZX16rr8;
5809 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
5810 Chain = CurDAG->getEntryNode();
5811 }
5812 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
5813 SDValue());
5814 InGlue = Chain.getValue(1);
5815 } else {
5816 InGlue =
5817 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
5818 LoReg, N0, SDValue()).getValue(1);
5819 if (isSigned && !signBitIsZero) {
5820 // Sign extend the low part into the high part.
5821 InGlue =
5822 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InGlue),0);
5823 } else {
5824 // Zero out the high part, effectively zero extending the input.
5825 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
5826 SDValue ClrNode = SDValue(
5827 CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0);
5828 switch (NVT.SimpleTy) {
5829 case MVT::i16:
5830 ClrNode =
5831 SDValue(CurDAG->getMachineNode(
5832 TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
5833 CurDAG->getTargetConstant(X86::sub_16bit, dl,
5834 MVT::i32)),
5835 0);
5836 break;
5837 case MVT::i32:
5838 break;
5839 case MVT::i64:
5840 ClrNode =
5841 SDValue(CurDAG->getMachineNode(
5842 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
5843 CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
5844 CurDAG->getTargetConstant(X86::sub_32bit, dl,
5845 MVT::i32)),
5846 0);
5847 break;
5848 default:
5849 llvm_unreachable("Unexpected division source");
5850 }
5851
5852 InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
5853 ClrNode, InGlue).getValue(1);
5854 }
5855 }
5856
5857 if (foldedLoad) {
5858 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
5859 InGlue };
5860 MachineSDNode *CNode =
5861 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
5862 InGlue = SDValue(CNode, 1);
5863 // Update the chain.
5864 ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
5865 // Record the mem-refs
5866 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5867 } else {
5868 InGlue =
5869 SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InGlue), 0);
5870 }
5871
5872 // Prevent use of AH in a REX instruction by explicitly copying it to
5873 // an ABCD_L register.
5874 //
5875 // The current assumption of the register allocator is that isel
5876 // won't generate explicit references to the GR8_ABCD_H registers. If
5877 // the allocator and/or the backend get enhanced to be more robust in
5878 // that regard, this can be, and should be, removed.
5879 if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
5880 SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
5881 unsigned AHExtOpcode =
5882 isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
5883
5884 SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
5885 MVT::Glue, AHCopy, InGlue);
5886 SDValue Result(RNode, 0);
5887 InGlue = SDValue(RNode, 1);
5888
5889 Result =
5890 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
5891
5892 ReplaceUses(SDValue(Node, 1), Result);
5893 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
5894 dbgs() << '\n');
5895 }
5896 // Copy the division (low) result, if it is needed.
5897 if (!SDValue(Node, 0).use_empty()) {
5898 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
5899 LoReg, NVT, InGlue);
5900 InGlue = Result.getValue(2);
5901 ReplaceUses(SDValue(Node, 0), Result);
5902 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
5903 dbgs() << '\n');
5904 }
5905 // Copy the remainder (high) result, if it is needed.
5906 if (!SDValue(Node, 1).use_empty()) {
5907 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
5908 HiReg, NVT, InGlue);
5909 InGlue = Result.getValue(2);
5910 ReplaceUses(SDValue(Node, 1), Result);
5911 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
5912 dbgs() << '\n');
5913 }
5914 CurDAG->RemoveDeadNode(Node);
5915 return;
5916 }
5917
5918 case X86ISD::FCMP:
5920 case X86ISD::STRICT_FCMPS: {
5921 bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP ||
5922 Node->getOpcode() == X86ISD::STRICT_FCMPS;
5923 SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0);
5924 SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1);
5925
5926 // Save the original VT of the compare.
5927 MVT CmpVT = N0.getSimpleValueType();
5928
5929 // Floating point needs special handling if we don't have FCOMI.
5930 if (Subtarget->canUseCMOV())
5931 break;
5932
5933 bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;
5934
5935 unsigned Opc;
5936 switch (CmpVT.SimpleTy) {
5937 default: llvm_unreachable("Unexpected type!");
5938 case MVT::f32:
5939 Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32;
5940 break;
5941 case MVT::f64:
5942 Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64;
5943 break;
5944 case MVT::f80:
5945 Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80;
5946 break;
5947 }
5948
5949 SDValue Chain =
5950 IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode();
5951 SDValue Glue;
5952 if (IsStrictCmp) {
5953 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
5954 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0);
5955 Glue = Chain.getValue(1);
5956 } else {
5957 Glue = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N0, N1), 0);
5958 }
5959
5960 // Move FPSW to AX.
5961 SDValue FNSTSW =
5962 SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, Glue), 0);
5963
5964 // Extract upper 8-bits of AX.
5965 SDValue Extract =
5966 CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW);
5967
5968 // Move AH into flags.
5969 // Some 64-bit targets lack SAHF support, but they do support FCOMI.
5970 assert(Subtarget->canUseLAHFSAHF() &&
5971 "Target doesn't support SAHF or FCOMI?");
5972 SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
5973 Chain = AH;
5974 SDValue SAHF = SDValue(
5975 CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0);
5976
5977 if (IsStrictCmp)
5978 ReplaceUses(SDValue(Node, 1), Chain);
5979
5980 ReplaceUses(SDValue(Node, 0), SAHF);
5981 CurDAG->RemoveDeadNode(Node);
5982 return;
5983 }
5984
5985 case X86ISD::CMP: {
5986 SDValue N0 = Node->getOperand(0);
5987 SDValue N1 = Node->getOperand(1);
5988
5989 // Optimizations for TEST compares.
5990 if (!isNullConstant(N1))
5991 break;
5992
5993 // Save the original VT of the compare.
5994 MVT CmpVT = N0.getSimpleValueType();
5995
5996 // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
5997 // by a test instruction. The test should be removed later by
5998 // analyzeCompare if we are using only the zero flag.
5999 // TODO: Should we check the users and use the BEXTR flags directly?
6000 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
6001 if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
6002 unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
6003 : X86::TEST32rr;
6004 SDValue BEXTR = SDValue(NewNode, 0);
6005 NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
6006 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
6007 CurDAG->RemoveDeadNode(Node);
6008 return;
6009 }
6010 }
6011
6012 // We can peek through truncates, but we need to be careful below.
6013 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
6014 N0 = N0.getOperand(0);
6015
6016 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
6017 // use a smaller encoding.
6018 // Look past the truncate if CMP is the only use of it.
6019 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6020 N0.getValueType() != MVT::i8) {
6021 auto *MaskC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6022 if (!MaskC)
6023 break;
6024
6025 // We may have looked through a truncate so mask off any bits that
6026 // shouldn't be part of the compare.
6027 uint64_t Mask = MaskC->getZExtValue();
6028 Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits());
6029
6030 // Check if we can replace AND+IMM{32,64} with a shift. This is possible
6031 // for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the
6032 // zero flag.
6033 if (CmpVT == MVT::i64 && !isInt<8>(Mask) && isShiftedMask_64(Mask) &&
6034 onlyUsesZeroFlag(SDValue(Node, 0))) {
6035 unsigned ShiftOpcode = ISD::DELETED_NODE;
6036 unsigned ShiftAmt;
6037 unsigned SubRegIdx;
6038 MVT SubRegVT;
6039 unsigned TestOpcode;
6040 unsigned LeadingZeros = llvm::countl_zero(Mask);
6041 unsigned TrailingZeros = llvm::countr_zero(Mask);
6042
6043 // With leading/trailing zeros, the transform is profitable if we can
6044 // eliminate a movabsq or shrink a 32-bit immediate to 8-bit without
6045 // incurring any extra register moves.
6046 bool SavesBytes = !isInt<32>(Mask) || N0.getOperand(0).hasOneUse();
6047 if (LeadingZeros == 0 && SavesBytes) {
6048 // If the mask covers the most significant bit, then we can replace
6049 // TEST+AND with a SHR and check eflags.
6050 // This emits a redundant TEST which is subsequently eliminated.
6051 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6052 ShiftAmt = TrailingZeros;
6053 SubRegIdx = 0;
6054 TestOpcode = X86::TEST64rr;
6055 } else if (TrailingZeros == 0 && SavesBytes) {
6056 // If the mask covers the least significant bit, then we can replace
6057 // TEST+AND with a SHL and check eflags.
6058 // This emits a redundant TEST which is subsequently eliminated.
6059 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHL64ri);
6060 ShiftAmt = LeadingZeros;
6061 SubRegIdx = 0;
6062 TestOpcode = X86::TEST64rr;
6063 } else if (MaskC->hasOneUse() && !isInt<32>(Mask)) {
6064 // If the shifted mask extends into the high half and is 8/16/32 bits
6065 // wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr.
6066 unsigned PopCount = 64 - LeadingZeros - TrailingZeros;
6067 if (PopCount == 8) {
6068 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6069 ShiftAmt = TrailingZeros;
6070 SubRegIdx = X86::sub_8bit;
6071 SubRegVT = MVT::i8;
6072 TestOpcode = X86::TEST8rr;
6073 } else if (PopCount == 16) {
6074 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6075 ShiftAmt = TrailingZeros;
6076 SubRegIdx = X86::sub_16bit;
6077 SubRegVT = MVT::i16;
6078 TestOpcode = X86::TEST16rr;
6079 } else if (PopCount == 32) {
6080 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6081 ShiftAmt = TrailingZeros;
6082 SubRegIdx = X86::sub_32bit;
6083 SubRegVT = MVT::i32;
6084 TestOpcode = X86::TEST32rr;
6085 }
6086 }
6087 if (ShiftOpcode != ISD::DELETED_NODE) {
6088 SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64);
6089 SDValue Shift = SDValue(
6090 CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32,
6091 N0.getOperand(0), ShiftC),
6092 0);
6093 if (SubRegIdx != 0) {
6094 Shift =
6095 CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift);
6096 }
6098 CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift);
6099 ReplaceNode(Node, Test);
6100 return;
6101 }
6102 }
6103
6104 MVT VT;
6105 int SubRegOp;
6106 unsigned ROpc, MOpc;
6107
6108 // For each of these checks we need to be careful if the sign flag is
6109 // being used. It is only safe to use the sign flag in two conditions,
6110 // either the sign bit in the shrunken mask is zero or the final test
6111 // size is equal to the original compare size.
6112
6113 if (isUInt<8>(Mask) &&
6114 (!(Mask & 0x80) || CmpVT == MVT::i8 ||
6115 hasNoSignFlagUses(SDValue(Node, 0)))) {
6116 // For example, convert "testl %eax, $8" to "testb %al, $8"
6117 VT = MVT::i8;
6118 SubRegOp = X86::sub_8bit;
6119 ROpc = X86::TEST8ri;
6120 MOpc = X86::TEST8mi;
6121 } else if (OptForMinSize && isUInt<16>(Mask) &&
6122 (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
6123 hasNoSignFlagUses(SDValue(Node, 0)))) {
6124 // For example, "testl %eax, $32776" to "testw %ax, $32776".
6125 // NOTE: We only want to form TESTW instructions if optimizing for
6126 // min size. Otherwise we only save one byte and possibly get a length
6127 // changing prefix penalty in the decoders.
6128 VT = MVT::i16;
6129 SubRegOp = X86::sub_16bit;
6130 ROpc = X86::TEST16ri;
6131 MOpc = X86::TEST16mi;
6132 } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
6133 ((!(Mask & 0x80000000) &&
6134 // Without minsize 16-bit Cmps can get here so we need to
6135 // be sure we calculate the correct sign flag if needed.
6136 (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
6137 CmpVT == MVT::i32 ||
6138 hasNoSignFlagUses(SDValue(Node, 0)))) {
6139 // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
6140 // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
6141 // Otherwize, we find ourselves in a position where we have to do
6142 // promotion. If previous passes did not promote the and, we assume
6143 // they had a good reason not to and do not promote here.
6144 VT = MVT::i32;
6145 SubRegOp = X86::sub_32bit;
6146 ROpc = X86::TEST32ri;
6147 MOpc = X86::TEST32mi;
6148 } else {
6149 // No eligible transformation was found.
6150 break;
6151 }
6152
6153 SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
6154 SDValue Reg = N0.getOperand(0);
6155
6156 // Emit a testl or testw.
6157 MachineSDNode *NewNode;
6158 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
6159 if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
6160 if (auto *LoadN = dyn_cast<LoadSDNode>(N0.getOperand(0).getNode())) {
6161 if (!LoadN->isSimple()) {
6162 unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits();
6163 if ((MOpc == X86::TEST8mi && NumVolBits != 8) ||
6164 (MOpc == X86::TEST16mi && NumVolBits != 16) ||
6165 (MOpc == X86::TEST32mi && NumVolBits != 32))
6166 break;
6167 }
6168 }
6169 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
6170 Reg.getOperand(0) };
6171 NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
6172 // Update the chain.
6173 ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
6174 // Record the mem-refs
6175 CurDAG->setNodeMemRefs(NewNode,
6176 {cast<LoadSDNode>(Reg)->getMemOperand()});
6177 } else {
6178 // Extract the subregister if necessary.
6179 if (N0.getValueType() != VT)
6180 Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
6181
6182 NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
6183 }
6184 // Replace CMP with TEST.
6185 ReplaceNode(Node, NewNode);
6186 return;
6187 }
6188 break;
6189 }
6190 case X86ISD::PCMPISTR: {
6191 if (!Subtarget->hasSSE42())
6192 break;
6193
6194 bool NeedIndex = !SDValue(Node, 0).use_empty();
6195 bool NeedMask = !SDValue(Node, 1).use_empty();
6196 // We can't fold a load if we are going to make two instructions.
6197 bool MayFoldLoad = !NeedIndex || !NeedMask;
6198
6199 MachineSDNode *CNode;
6200 if (NeedMask) {
6201 unsigned ROpc =
6202 Subtarget->hasAVX() ? X86::VPCMPISTRMrri : X86::PCMPISTRMrri;
6203 unsigned MOpc =
6204 Subtarget->hasAVX() ? X86::VPCMPISTRMrmi : X86::PCMPISTRMrmi;
6205 CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
6206 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
6207 }
6208 if (NeedIndex || !NeedMask) {
6209 unsigned ROpc =
6210 Subtarget->hasAVX() ? X86::VPCMPISTRIrri : X86::PCMPISTRIrri;
6211 unsigned MOpc =
6212 Subtarget->hasAVX() ? X86::VPCMPISTRIrmi : X86::PCMPISTRIrmi;
6213 CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
6214 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
6215 }
6216
6217 // Connect the flag usage to the last instruction created.
6218 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
6219 CurDAG->RemoveDeadNode(Node);
6220 return;
6221 }
6222 case X86ISD::PCMPESTR: {
6223 if (!Subtarget->hasSSE42())
6224 break;
6225
6226 // Copy the two implicit register inputs.
6227 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
6228 Node->getOperand(1),
6229 SDValue()).getValue(1);
6230 InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
6231 Node->getOperand(3), InGlue).getValue(1);
6232
6233 bool NeedIndex = !SDValue(Node, 0).use_empty();
6234 bool NeedMask = !SDValue(Node, 1).use_empty();
6235 // We can't fold a load if we are going to make two instructions.
6236 bool MayFoldLoad = !NeedIndex || !NeedMask;
6237
6238 MachineSDNode *CNode;
6239 if (NeedMask) {
6240 unsigned ROpc =
6241 Subtarget->hasAVX() ? X86::VPCMPESTRMrri : X86::PCMPESTRMrri;
6242 unsigned MOpc =
6243 Subtarget->hasAVX() ? X86::VPCMPESTRMrmi : X86::PCMPESTRMrmi;
6244 CNode =
6245 emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node, InGlue);
6246 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
6247 }
6248 if (NeedIndex || !NeedMask) {
6249 unsigned ROpc =
6250 Subtarget->hasAVX() ? X86::VPCMPESTRIrri : X86::PCMPESTRIrri;
6251 unsigned MOpc =
6252 Subtarget->hasAVX() ? X86::VPCMPESTRIrmi : X86::PCMPESTRIrmi;
6253 CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InGlue);
6254 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
6255 }
6256 // Connect the flag usage to the last instruction created.
6257 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
6258 CurDAG->RemoveDeadNode(Node);
6259 return;
6260 }
6261
6262 case ISD::SETCC: {
6263 if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
6264 return;
6265
6266 break;
6267 }
6268
6269 case ISD::STORE:
6270 if (foldLoadStoreIntoMemOperand(Node))
6271 return;
6272 break;
6273
6274 case X86ISD::SETCC_CARRY: {
6275 MVT VT = Node->getSimpleValueType(0);
6277 if (Subtarget->hasSBBDepBreaking()) {
6278 // We have to do this manually because tblgen will put the eflags copy in
6279 // the wrong place if we use an extract_subreg in the pattern.
6280 // Copy flags to the EFLAGS register and glue it to next node.
6281 SDValue EFLAGS =
6282 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
6283 Node->getOperand(1), SDValue());
6284
6285 // Create a 64-bit instruction if the result is 64-bits otherwise use the
6286 // 32-bit version.
6287 unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
6288 MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
6289 Result = SDValue(
6290 CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)),
6291 0);
6292 } else {
6293 // The target does not recognize sbb with the same reg operand as a
6294 // no-source idiom, so we explicitly zero the input values.
6295 Result = getSBBZero(Node);
6296 }
6297
6298 // For less than 32-bits we need to extract from the 32-bit node.
6299 if (VT == MVT::i8 || VT == MVT::i16) {
6300 int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
6301 Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
6302 }
6303
6304 ReplaceUses(SDValue(Node, 0), Result);
6305 CurDAG->RemoveDeadNode(Node);
6306 return;
6307 }
6308 case X86ISD::SBB: {
6309 if (isNullConstant(Node->getOperand(0)) &&
6310 isNullConstant(Node->getOperand(1))) {
6311 SDValue Result = getSBBZero(Node);
6312
6313 // Replace the flag use.
6314 ReplaceUses(SDValue(Node, 1), Result.getValue(1));
6315
6316 // Replace the result use.
6317 if (!SDValue(Node, 0).use_empty()) {
6318 // For less than 32-bits we need to extract from the 32-bit node.
6319 MVT VT = Node->getSimpleValueType(0);
6320 if (VT == MVT::i8 || VT == MVT::i16) {
6321 int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
6322 Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
6323 }
6324 ReplaceUses(SDValue(Node, 0), Result);
6325 }
6326
6327 CurDAG->RemoveDeadNode(Node);
6328 return;
6329 }
6330 break;
6331 }
6332 case X86ISD::MGATHER: {
6333 auto *Mgt = cast<X86MaskedGatherSDNode>(Node);
6334 SDValue IndexOp = Mgt->getIndex();
6335 SDValue Mask = Mgt->getMask();
6336 MVT IndexVT = IndexOp.getSimpleValueType();
6337 MVT ValueVT = Node->getSimpleValueType(0);
6338 MVT MaskVT = Mask.getSimpleValueType();
6339
6340 // This is just to prevent crashes if the nodes are malformed somehow. We're
6341 // otherwise only doing loose type checking in here based on type what
6342 // a type constraint would say just like table based isel.
6343 if (!ValueVT.isVector() || !MaskVT.isVector())
6344 break;
6345
6346 unsigned NumElts = ValueVT.getVectorNumElements();
6347 MVT ValueSVT = ValueVT.getVectorElementType();
6348
6349 bool IsFP = ValueSVT.isFloatingPoint();
6350 unsigned EltSize = ValueSVT.getSizeInBits();
6351
6352 unsigned Opc = 0;
6353 bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1;
6354 if (AVX512Gather) {
6355 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
6356 Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm;
6357 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
6358 Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm;
6359 else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
6360 Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm;
6361 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
6362 Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm;
6363 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
6364 Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm;
6365 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
6366 Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm;
6367 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
6368 Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm;
6369 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
6370 Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm;
6371 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
6372 Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm;
6373 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
6374 Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm;
6375 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
6376 Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm;
6377 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
6378 Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm;
6379 } else {
6380 assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() &&
6381 "Unexpected mask VT!");
6382 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
6383 Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm;
6384 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
6385 Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm;
6386 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
6387 Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm;
6388 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
6389 Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm;
6390 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
6391 Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm;
6392 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
6393 Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm;
6394 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
6395 Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm;
6396 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
6397 Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm;
6398 }
6399
6400 if (!Opc)
6401 break;
6402
6403 SDValue Base, Scale, Index, Disp, Segment;
6404 if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(),
6405 Base, Scale, Index, Disp, Segment))
6406 break;
6407
6408 SDValue PassThru = Mgt->getPassThru();
6409 SDValue Chain = Mgt->getChain();
6410 // Gather instructions have a mask output not in the ISD node.
6411 SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other);
6412
6413 MachineSDNode *NewNode;
6414 if (AVX512Gather) {
6415 SDValue Ops[] = {PassThru, Mask, Base, Scale,
6416 Index, Disp, Segment, Chain};
6417 NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6418 } else {
6419 SDValue Ops[] = {PassThru, Base, Scale, Index,
6420 Disp, Segment, Mask, Chain};
6421 NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6422 }
6423 CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()});
6424 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
6425 ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2));
6426 CurDAG->RemoveDeadNode(Node);
6427 return;
6428 }
6429 case X86ISD::MSCATTER: {
6430 auto *Sc = cast<X86MaskedScatterSDNode>(Node);
6431 SDValue Value = Sc->getValue();
6432 SDValue IndexOp = Sc->getIndex();
6433 MVT IndexVT = IndexOp.getSimpleValueType();
6434 MVT ValueVT = Value.getSimpleValueType();
6435
6436 // This is just to prevent crashes if the nodes are malformed somehow. We're
6437 // otherwise only doing loose type checking in here based on type what
6438 // a type constraint would say just like table based isel.
6439 if (!ValueVT.isVector())
6440 break;
6441
6442 unsigned NumElts = ValueVT.getVectorNumElements();
6443 MVT ValueSVT = ValueVT.getVectorElementType();
6444
6445 bool IsFP = ValueSVT.isFloatingPoint();
6446 unsigned EltSize = ValueSVT.getSizeInBits();
6447
6448 unsigned Opc;
6449 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
6450 Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr;
6451 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
6452 Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr;
6453 else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
6454 Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr;
6455 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
6456 Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr;
6457 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
6458 Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr;
6459 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
6460 Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr;
6461 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
6462 Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr;
6463 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
6464 Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr;
6465 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
6466 Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr;
6467 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
6468 Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr;
6469 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
6470 Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr;
6471 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
6472 Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr;
6473 else
6474 break;
6475
6476 SDValue Base, Scale, Index, Disp, Segment;
6477 if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(),
6478 Base, Scale, Index, Disp, Segment))
6479 break;
6480
6481 SDValue Mask = Sc->getMask();
6482 SDValue Chain = Sc->getChain();
6483 // Scatter instructions have a mask output not in the ISD node.
6484 SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other);
6485 SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain};
6486
6487 MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6488 CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()});
6489 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1));
6490 CurDAG->RemoveDeadNode(Node);
6491 return;
6492 }
6494 auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
6495 auto CallId = MFI->getPreallocatedIdForCallSite(
6496 cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
6497 SDValue Chain = Node->getOperand(0);
6498 SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
6499 MachineSDNode *New = CurDAG->getMachineNode(
6500 TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
6501 ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
6502 CurDAG->RemoveDeadNode(Node);
6503 return;
6504 }
6505 case ISD::PREALLOCATED_ARG: {
6506 auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
6507 auto CallId = MFI->getPreallocatedIdForCallSite(
6508 cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
6509 SDValue Chain = Node->getOperand(0);
6510 SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
6511 SDValue ArgIndex = Node->getOperand(2);
6512 SDValue Ops[3];
6513 Ops[0] = CallIdValue;
6514 Ops[1] = ArgIndex;
6515 Ops[2] = Chain;
6516 MachineSDNode *New = CurDAG->getMachineNode(
6517 TargetOpcode::PREALLOCATED_ARG, dl,
6518 CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
6519 MVT::Other),
6520 Ops);
6521 ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
6522 ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
6523 CurDAG->RemoveDeadNode(Node);
6524 return;
6525 }
6530 if (!Subtarget->hasWIDEKL())
6531 break;
6532
6533 unsigned Opcode;
6534 switch (Node->getOpcode()) {
6535 default:
6536 llvm_unreachable("Unexpected opcode!");
6538 Opcode = X86::AESENCWIDE128KL;
6539 break;
6541 Opcode = X86::AESDECWIDE128KL;
6542 break;
6544 Opcode = X86::AESENCWIDE256KL;
6545 break;
6547 Opcode = X86::AESDECWIDE256KL;
6548 break;
6549 }
6550
6551 SDValue Chain = Node->getOperand(0);
6552 SDValue Addr = Node->getOperand(1);
6553
6554 SDValue Base, Scale, Index, Disp, Segment;
6555 if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment))
6556 break;
6557
6558 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2),
6559 SDValue());
6560 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3),
6561 Chain.getValue(1));
6562 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4),
6563 Chain.getValue(1));
6564 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5),
6565 Chain.getValue(1));
6566 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6),
6567 Chain.getValue(1));
6568 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7),
6569 Chain.getValue(1));
6570 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8),
6571 Chain.getValue(1));
6572 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9),
6573 Chain.getValue(1));
6574
6575 MachineSDNode *Res = CurDAG->getMachineNode(
6576 Opcode, dl, Node->getVTList(),
6577 {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)});
6578 CurDAG->setNodeMemRefs(Res, cast<MemSDNode>(Node)->getMemOperand());
6579 ReplaceNode(Node, Res);
6580 return;
6581 }
6582 }
6583
6584 SelectCode(Node);
6585}
6586
6587bool X86DAGToDAGISel::SelectInlineAsmMemoryOperand(
6588 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
6589 std::vector<SDValue> &OutOps) {
6590 SDValue Op0, Op1, Op2, Op3, Op4;
6591 switch (ConstraintID) {
6592 default:
6593 llvm_unreachable("Unexpected asm memory constraint");
6594 case InlineAsm::ConstraintCode::o: // offsetable ??
6595 case InlineAsm::ConstraintCode::v: // not offsetable ??
6596 case InlineAsm::ConstraintCode::m: // memory
6597 case InlineAsm::ConstraintCode::X:
6598 case InlineAsm::ConstraintCode::p: // address
6599 if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
6600 return true;
6601 break;
6602 }
6603
6604 OutOps.push_back(Op0);
6605 OutOps.push_back(Op1);
6606 OutOps.push_back(Op2);
6607 OutOps.push_back(Op3);
6608 OutOps.push_back(Op4);
6609 return false;
6610}
6611
6614 std::make_unique<X86DAGToDAGISel>(TM, TM.getOptLevel())) {}
6615
6616/// This pass converts a legalized DAG into a X86-specific DAG,
6617/// ready for instruction scheduling.
6619 CodeGenOptLevel OptLevel) {
6620 return new X86DAGToDAGISelLegacy(TM, OptLevel);
6621}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
aarch64 promote const
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Module.h This file contains the declarations for the Module class.
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII)
Check if the instruction uses RIP relative addressing.
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
#define GET_EGPR_IF_ENABLED(OPC)
static bool needBWI(MVT VT)
static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, bool FoldedBCast, bool Masked)
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM, const X86Subtarget &Subtarget)
static bool mayUseCarryFlag(X86::CondCode CC)
static cl::opt< bool > EnablePromoteAnyextLoad("x86-promote-anyext-load", cl::init(true), cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden)
cl::opt< bool > IndirectBranchTracking
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load's chain operand and move load below the call...
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
#define GET_ND_IF_ENABLED(OPC)
#define VPTESTM_BROADCAST_CASES(SUFFIX)
#define FROM_TO(A, B)
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, X86ISelAddressMode &AM)
#define VPTESTM_FULL_CASES(SUFFIX)
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
static bool isDispSafeForFrameIndex(int64_t Val)
#define PASS_NAME
#define CASE(A)
#define CASE_ND(OP)
#define DEBUG_TYPE
static bool isEndbrImm64(uint64_t Imm)
#define GET_ND_IF_ENABLED(OPC)
Value * RHS
DEMANGLE_DUMP_METHOD void dump() const
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1491
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:369
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1615
The address of a basic block.
Definition: Constants.h:890
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:698
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
std::optional< ConstantRange > getAbsoluteSymbolRange() const
If this is an absolute symbol reference, returns the range of the symbol, otherwise returns std::null...
Definition: Globals.cpp:407
This class is used to form a handle around another node that is persistent and is updated across invo...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const Module * getModule() const
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
Root of the metadata hierarchy.
Definition: Metadata.h:62
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition: Module.cpp:333
Register getReg() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
int getNodeId() const
Return the unique node id.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual void PostprocessISelDAG()
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
static int getUninvalidatedNodeId(SDNode *N)
virtual void emitFunctionEntryCode()
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
virtual bool ComplexPatternFuncMutatesDAG() const
Return true if complex patterns for this target can mutate the DAG.
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
static void InvalidateNodeId(SDNode *N)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:548
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
unsigned getID() const
Return the register class ID number.
Target - Wrapper for Target specific information.
static Type * getVoidTy(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5265
X86ISelDAGToDAGPass(X86TargetMachine &TM)
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setAMXProgModel(AMXProgModelEnum Model)
size_t getPreallocatedIdForCallSite(const Value *CS)
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:778
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1072
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:811
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:931
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:802
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ PREALLOCATED_SETUP
Definition: ISDOpcodes.h:1201
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ PREALLOCATED_ARG
Definition: ISDOpcodes.h:1204
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1093
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:733
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:808
@ LOCAL_RECOVER
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:120
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:837
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:916
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:764
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:443
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:864
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:897
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:859
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:814
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1181
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1574
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
const uint64_t Magic
Definition: InstrProf.h:1068
SymbolFlags
Symbol flags.
Definition: Symbol.h:24
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ X86
Windows x64, Windows Itanium (IA-64)
@ SS
Definition: X86.h:211
@ FS
Definition: X86.h:210
@ GS
Definition: X86.h:209
Reg
All possible values of the reg field in the ModR/M byte.
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
Definition: X86BaseInfo.h:825
@ VEX
VEX - encoding using 0xC4/0xC5.
Definition: X86BaseInfo.h:818
@ XOP
XOP - Opcode prefix used by XOP instructions.
Definition: X86BaseInfo.h:820
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
Definition: X86BaseInfo.h:363
@ FST
This instruction implements a truncating store from FP stack slots.
@ CMPM
Vector comparison generating mask bits for fp and integer signed and unsigned data types.
@ CMP
X86 compare and logical compare instructions.
@ BLENDV
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
@ STRICT_FCMP
X86 strict FP compare instructions.
@ STRICT_CMPM
Vector comparison generating mask bits for fp and integer signed and unsigned data types.
@ SETCC
X86 SetCC.
@ NT_BRIND
BRIND node with NoTrack prefix.
@ SELECTS
X86 Select.
@ FSETCCM
X86 FP SETCC, similar to above, but with output as an i1 mask and and a version with SAE.
@ FXOR
Bitwise logical XOR of floating point values.
@ BRCOND
X86 conditional branches.
@ CALL
These operations represent an abstract X86 call instruction, which includes a bunch of information.
@ FANDN
Bitwise logical ANDNOT of floating point values.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ FLD
This instruction implements an extending load to FP stack slots.
@ TC_RETURN
Tail call return.
@ FOR
Bitwise logical OR of floating point values.
@ Wrapper
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress,...
@ ANDNP
Bitwise Logical AND NOT of Packed FP values.
@ FAND
Bitwise logical AND of floating point values.
@ CMOV
X86 conditional moves.
@ WrapperRIP
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
int getCondSrcNoFromDesc(const MCInstrDesc &MCID)
Return the source operand # for condition code by MCID.
bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, bool AssumeSingleUse=false)
Check if Op is a load operation that could be folded into some other x86 instruction as a memory oper...
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement)
Returns true of the given offset can be fit into displacement field of the instruction.
bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs)
If Op is a constant whose elements are all the same constant or undefined, return true and return the...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:480
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
unsigned M1(unsigned Val)
Definition: VE.h:376
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:208
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.