LLVM 20.0.0git
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DAG pattern matching instruction selector for X86,
10// converting from a legalized dag to a X86 dag.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelDAGToDAG.h"
15#include "X86.h"
17#include "X86RegisterInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
23#include "llvm/Config/llvm-config.h"
25#include "llvm/IR/Function.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsX86.h"
29#include "llvm/IR/Module.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/Debug.h"
35#include <cstdint>
36
37using namespace llvm;
38
39#define DEBUG_TYPE "x86-isel"
40#define PASS_NAME "X86 DAG->DAG Instruction Selection"
41
42STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
43
44static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
45 cl::desc("Enable setting constant bits to reduce size of mask immediates"),
47
49 "x86-promote-anyext-load", cl::init(true),
50 cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden);
51
53
54//===----------------------------------------------------------------------===//
55// Pattern Matcher Implementation
56//===----------------------------------------------------------------------===//
57
58namespace {
59 /// This corresponds to X86AddressMode, but uses SDValue's instead of register
60 /// numbers for the leaves of the matched tree.
61 struct X86ISelAddressMode {
62 enum {
63 RegBase,
64 FrameIndexBase
65 } BaseType = RegBase;
66
67 // This is really a union, discriminated by BaseType!
68 SDValue Base_Reg;
69 int Base_FrameIndex = 0;
70
71 unsigned Scale = 1;
72 SDValue IndexReg;
73 int32_t Disp = 0;
74 SDValue Segment;
75 const GlobalValue *GV = nullptr;
76 const Constant *CP = nullptr;
77 const BlockAddress *BlockAddr = nullptr;
78 const char *ES = nullptr;
79 MCSymbol *MCSym = nullptr;
80 int JT = -1;
81 Align Alignment; // CP alignment.
82 unsigned char SymbolFlags = X86II::MO_NO_FLAG; // X86II::MO_*
83 bool NegateIndex = false;
84
85 X86ISelAddressMode() = default;
86
87 bool hasSymbolicDisplacement() const {
88 return GV != nullptr || CP != nullptr || ES != nullptr ||
89 MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
90 }
91
92 bool hasBaseOrIndexReg() const {
93 return BaseType == FrameIndexBase ||
94 IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
95 }
96
97 /// Return true if this addressing mode is already RIP-relative.
98 bool isRIPRelative() const {
99 if (BaseType != RegBase) return false;
100 if (RegisterSDNode *RegNode =
101 dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
102 return RegNode->getReg() == X86::RIP;
103 return false;
104 }
105
106 void setBaseReg(SDValue Reg) {
107 BaseType = RegBase;
108 Base_Reg = Reg;
109 }
110
111#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
112 void dump(SelectionDAG *DAG = nullptr) {
113 dbgs() << "X86ISelAddressMode " << this << '\n';
114 dbgs() << "Base_Reg ";
115 if (Base_Reg.getNode())
116 Base_Reg.getNode()->dump(DAG);
117 else
118 dbgs() << "nul\n";
119 if (BaseType == FrameIndexBase)
120 dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
121 dbgs() << " Scale " << Scale << '\n'
122 << "IndexReg ";
123 if (NegateIndex)
124 dbgs() << "negate ";
125 if (IndexReg.getNode())
126 IndexReg.getNode()->dump(DAG);
127 else
128 dbgs() << "nul\n";
129 dbgs() << " Disp " << Disp << '\n'
130 << "GV ";
131 if (GV)
132 GV->dump();
133 else
134 dbgs() << "nul";
135 dbgs() << " CP ";
136 if (CP)
137 CP->dump();
138 else
139 dbgs() << "nul";
140 dbgs() << '\n'
141 << "ES ";
142 if (ES)
143 dbgs() << ES;
144 else
145 dbgs() << "nul";
146 dbgs() << " MCSym ";
147 if (MCSym)
148 dbgs() << MCSym;
149 else
150 dbgs() << "nul";
151 dbgs() << " JT" << JT << " Align" << Alignment.value() << '\n';
152 }
153#endif
154 };
155}
156
157namespace {
158 //===--------------------------------------------------------------------===//
159 /// ISel - X86-specific code to select X86 machine instructions for
160 /// SelectionDAG operations.
161 ///
162 class X86DAGToDAGISel final : public SelectionDAGISel {
163 /// Keep a pointer to the X86Subtarget around so that we can
164 /// make the right decision when generating code for different targets.
165 const X86Subtarget *Subtarget;
166
167 /// If true, selector should try to optimize for minimum code size.
168 bool OptForMinSize;
169
170 /// Disable direct TLS access through segment registers.
171 bool IndirectTlsSegRefs;
172
173 public:
174 X86DAGToDAGISel() = delete;
175
176 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOptLevel OptLevel)
177 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
178 OptForMinSize(false), IndirectTlsSegRefs(false) {}
179
180 bool runOnMachineFunction(MachineFunction &MF) override {
181 // Reset the subtarget each time through.
182 Subtarget = &MF.getSubtarget<X86Subtarget>();
183 IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
184 "indirect-tls-seg-refs");
185
186 // OptFor[Min]Size are used in pattern predicates that isel is matching.
187 OptForMinSize = MF.getFunction().hasMinSize();
188 assert((!OptForMinSize || MF.getFunction().hasOptSize()) &&
189 "OptForMinSize implies OptForSize");
191 }
192
193 void emitFunctionEntryCode() override;
194
195 bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
196
197 void PreprocessISelDAG() override;
198 void PostprocessISelDAG() override;
199
200// Include the pieces autogenerated from the target description.
201#include "X86GenDAGISel.inc"
202
203 private:
204 void Select(SDNode *N) override;
205
206 bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
207 bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
208 bool AllowSegmentRegForX32 = false);
209 bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
210 bool matchAddress(SDValue N, X86ISelAddressMode &AM);
211 bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
212 bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
213 SDValue matchIndexRecursively(SDValue N, X86ISelAddressMode &AM,
214 unsigned Depth);
215 bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
216 unsigned Depth);
217 bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM,
218 unsigned Depth);
219 bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
220 bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
221 SDValue &Scale, SDValue &Index, SDValue &Disp,
222 SDValue &Segment);
223 bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp,
224 SDValue ScaleOp, SDValue &Base, SDValue &Scale,
225 SDValue &Index, SDValue &Disp, SDValue &Segment);
226 bool selectMOV64Imm32(SDValue N, SDValue &Imm);
227 bool selectLEAAddr(SDValue N, SDValue &Base,
228 SDValue &Scale, SDValue &Index, SDValue &Disp,
229 SDValue &Segment);
230 bool selectLEA64_32Addr(SDValue N, SDValue &Base,
231 SDValue &Scale, SDValue &Index, SDValue &Disp,
232 SDValue &Segment);
233 bool selectTLSADDRAddr(SDValue N, SDValue &Base,
234 SDValue &Scale, SDValue &Index, SDValue &Disp,
235 SDValue &Segment);
236 bool selectRelocImm(SDValue N, SDValue &Op);
237
238 bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
239 SDValue &Base, SDValue &Scale,
240 SDValue &Index, SDValue &Disp,
241 SDValue &Segment);
242
243 // Convenience method where P is also root.
244 bool tryFoldLoad(SDNode *P, SDValue N,
245 SDValue &Base, SDValue &Scale,
246 SDValue &Index, SDValue &Disp,
247 SDValue &Segment) {
248 return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
249 }
250
251 bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
252 SDValue &Base, SDValue &Scale,
253 SDValue &Index, SDValue &Disp,
254 SDValue &Segment);
255
256 bool isProfitableToFormMaskedOp(SDNode *N) const;
257
258 /// Implement addressing mode selection for inline asm expressions.
260 InlineAsm::ConstraintCode ConstraintID,
261 std::vector<SDValue> &OutOps) override;
262
263 void emitSpecialCodeForMain();
264
265 inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
266 MVT VT, SDValue &Base, SDValue &Scale,
267 SDValue &Index, SDValue &Disp,
268 SDValue &Segment) {
269 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
270 Base = CurDAG->getTargetFrameIndex(
271 AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
272 else if (AM.Base_Reg.getNode())
273 Base = AM.Base_Reg;
274 else
275 Base = CurDAG->getRegister(0, VT);
276
277 Scale = getI8Imm(AM.Scale, DL);
278
279#define GET_ND_IF_ENABLED(OPC) (Subtarget->hasNDD() ? OPC##_ND : OPC)
280 // Negate the index if needed.
281 if (AM.NegateIndex) {
282 unsigned NegOpc = VT == MVT::i64 ? GET_ND_IF_ENABLED(X86::NEG64r)
283 : GET_ND_IF_ENABLED(X86::NEG32r);
284 SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
285 AM.IndexReg), 0);
286 AM.IndexReg = Neg;
287 }
288
289 if (AM.IndexReg.getNode())
290 Index = AM.IndexReg;
291 else
292 Index = CurDAG->getRegister(0, VT);
293
294 // These are 32-bit even in 64-bit mode since RIP-relative offset
295 // is 32-bit.
296 if (AM.GV)
297 Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
298 MVT::i32, AM.Disp,
299 AM.SymbolFlags);
300 else if (AM.CP)
301 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment,
302 AM.Disp, AM.SymbolFlags);
303 else if (AM.ES) {
304 assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
305 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
306 } else if (AM.MCSym) {
307 assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
308 assert(AM.SymbolFlags == 0 && "oo");
309 Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
310 } else if (AM.JT != -1) {
311 assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
312 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
313 } else if (AM.BlockAddr)
314 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
315 AM.SymbolFlags);
316 else
317 Disp =
318 CurDAG->getSignedConstant(AM.Disp, DL, MVT::i32, /*isTarget=*/true);
319
320 if (AM.Segment.getNode())
321 Segment = AM.Segment;
322 else
323 Segment = CurDAG->getRegister(0, MVT::i16);
324 }
325
326 // Utility function to determine whether we should avoid selecting
327 // immediate forms of instructions for better code size or not.
328 // At a high level, we'd like to avoid such instructions when
329 // we have similar constants used within the same basic block
330 // that can be kept in a register.
331 //
332 bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
333 uint32_t UseCount = 0;
334
335 // Do not want to hoist if we're not optimizing for size.
336 // TODO: We'd like to remove this restriction.
337 // See the comment in X86InstrInfo.td for more info.
338 if (!CurDAG->shouldOptForSize())
339 return false;
340
341 // Walk all the users of the immediate.
342 for (const SDNode *User : N->uses()) {
343 if (UseCount >= 2)
344 break;
345
346 // This user is already selected. Count it as a legitimate use and
347 // move on.
348 if (User->isMachineOpcode()) {
349 UseCount++;
350 continue;
351 }
352
353 // We want to count stores of immediates as real uses.
354 if (User->getOpcode() == ISD::STORE &&
355 User->getOperand(1).getNode() == N) {
356 UseCount++;
357 continue;
358 }
359
360 // We don't currently match users that have > 2 operands (except
361 // for stores, which are handled above)
362 // Those instruction won't match in ISEL, for now, and would
363 // be counted incorrectly.
364 // This may change in the future as we add additional instruction
365 // types.
366 if (User->getNumOperands() != 2)
367 continue;
368
369 // If this is a sign-extended 8-bit integer immediate used in an ALU
370 // instruction, there is probably an opcode encoding to save space.
371 auto *C = dyn_cast<ConstantSDNode>(N);
372 if (C && isInt<8>(C->getSExtValue()))
373 continue;
374
375 // Immediates that are used for offsets as part of stack
376 // manipulation should be left alone. These are typically
377 // used to indicate SP offsets for argument passing and
378 // will get pulled into stores/pushes (implicitly).
379 if (User->getOpcode() == X86ISD::ADD ||
380 User->getOpcode() == ISD::ADD ||
381 User->getOpcode() == X86ISD::SUB ||
382 User->getOpcode() == ISD::SUB) {
383
384 // Find the other operand of the add/sub.
385 SDValue OtherOp = User->getOperand(0);
386 if (OtherOp.getNode() == N)
387 OtherOp = User->getOperand(1);
388
389 // Don't count if the other operand is SP.
390 RegisterSDNode *RegNode;
391 if (OtherOp->getOpcode() == ISD::CopyFromReg &&
392 (RegNode = dyn_cast_or_null<RegisterSDNode>(
393 OtherOp->getOperand(1).getNode())))
394 if ((RegNode->getReg() == X86::ESP) ||
395 (RegNode->getReg() == X86::RSP))
396 continue;
397 }
398
399 // ... otherwise, count this and move on.
400 UseCount++;
401 }
402
403 // If we have more than 1 use, then recommend for hoisting.
404 return (UseCount > 1);
405 }
406
407 /// Return a target constant with the specified value of type i8.
408 inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
409 return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
410 }
411
412 /// Return a target constant with the specified value, of type i32.
413 inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
414 return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
415 }
416
417 /// Return a target constant with the specified value, of type i64.
418 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
419 return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
420 }
421
422 SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
423 const SDLoc &DL) {
424 assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
425 uint64_t Index = N->getConstantOperandVal(1);
426 MVT VecVT = N->getOperand(0).getSimpleValueType();
427 return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
428 }
429
430 SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
431 const SDLoc &DL) {
432 assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
433 uint64_t Index = N->getConstantOperandVal(2);
434 MVT VecVT = N->getSimpleValueType(0);
435 return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
436 }
437
438 SDValue getPermuteVINSERTCommutedImmediate(SDNode *N, unsigned VecWidth,
439 const SDLoc &DL) {
440 assert(VecWidth == 128 && "Unexpected vector width");
441 uint64_t Index = N->getConstantOperandVal(2);
442 MVT VecVT = N->getSimpleValueType(0);
443 uint64_t InsertIdx = (Index * VecVT.getScalarSizeInBits()) / VecWidth;
444 assert((InsertIdx == 0 || InsertIdx == 1) && "Bad insertf128 index");
445 // vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub)
446 // vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub)
447 return getI8Imm(InsertIdx ? 0x02 : 0x30, DL);
448 }
449
450 SDValue getSBBZero(SDNode *N) {
451 SDLoc dl(N);
452 MVT VT = N->getSimpleValueType(0);
453
454 // Create zero.
455 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
457 CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0);
458 if (VT == MVT::i64) {
459 Zero = SDValue(
460 CurDAG->getMachineNode(
461 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
462 CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
463 CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
464 0);
465 }
466
467 // Copy flags to the EFLAGS register and glue it to next node.
468 unsigned Opcode = N->getOpcode();
469 assert((Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY) &&
470 "Unexpected opcode for SBB materialization");
471 unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1;
472 SDValue EFLAGS =
473 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
474 N->getOperand(FlagOpIndex), SDValue());
475
476 // Create a 64-bit instruction if the result is 64-bits otherwise use the
477 // 32-bit version.
478 unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
479 MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
480 VTs = CurDAG->getVTList(SBBVT, MVT::i32);
481 return SDValue(
482 CurDAG->getMachineNode(Opc, dl, VTs,
483 {Zero, Zero, EFLAGS, EFLAGS.getValue(1)}),
484 0);
485 }
486
487 // Helper to detect unneeded and instructions on shift amounts. Called
488 // from PatFrags in tablegen.
489 bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
490 assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
491 const APInt &Val = N->getConstantOperandAPInt(1);
492
493 if (Val.countr_one() >= Width)
494 return true;
495
496 APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
497 return Mask.countr_one() >= Width;
498 }
499
500 /// Return an SDNode that returns the value of the global base register.
501 /// Output instructions required to initialize the global base register,
502 /// if necessary.
503 SDNode *getGlobalBaseReg();
504
505 /// Return a reference to the TargetMachine, casted to the target-specific
506 /// type.
507 const X86TargetMachine &getTargetMachine() const {
508 return static_cast<const X86TargetMachine &>(TM);
509 }
510
511 /// Return a reference to the TargetInstrInfo, casted to the target-specific
512 /// type.
513 const X86InstrInfo *getInstrInfo() const {
514 return Subtarget->getInstrInfo();
515 }
516
517 /// Return a condition code of the given SDNode
518 X86::CondCode getCondFromNode(SDNode *N) const;
519
520 /// Address-mode matching performs shift-of-and to and-of-shift
521 /// reassociation in order to expose more scaled addressing
522 /// opportunities.
523 bool ComplexPatternFuncMutatesDAG() const override {
524 return true;
525 }
526
527 bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
528
529 // Indicates we should prefer to use a non-temporal load for this load.
530 bool useNonTemporalLoad(LoadSDNode *N) const {
531 if (!N->isNonTemporal())
532 return false;
533
534 unsigned StoreSize = N->getMemoryVT().getStoreSize();
535
536 if (N->getAlign().value() < StoreSize)
537 return false;
538
539 switch (StoreSize) {
540 default: llvm_unreachable("Unsupported store size");
541 case 4:
542 case 8:
543 return false;
544 case 16:
545 return Subtarget->hasSSE41();
546 case 32:
547 return Subtarget->hasAVX2();
548 case 64:
549 return Subtarget->hasAVX512();
550 }
551 }
552
553 bool foldLoadStoreIntoMemOperand(SDNode *Node);
554 MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
555 bool matchBitExtract(SDNode *Node);
556 bool shrinkAndImmediate(SDNode *N);
557 bool isMaskZeroExtended(SDNode *N) const;
558 bool tryShiftAmountMod(SDNode *N);
559 bool tryShrinkShlLogicImm(SDNode *N);
560 bool tryVPTERNLOG(SDNode *N);
561 bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentB,
562 SDNode *ParentC, SDValue A, SDValue B, SDValue C,
563 uint8_t Imm);
564 bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
565 bool tryMatchBitSelect(SDNode *N);
566
567 MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
568 const SDLoc &dl, MVT VT, SDNode *Node);
569 MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
570 const SDLoc &dl, MVT VT, SDNode *Node,
571 SDValue &InGlue);
572
573 bool tryOptimizeRem8Extend(SDNode *N);
574
575 bool onlyUsesZeroFlag(SDValue Flags) const;
576 bool hasNoSignFlagUses(SDValue Flags) const;
577 bool hasNoCarryFlagUses(SDValue Flags) const;
578 };
579
580 class X86DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
581 public:
582 static char ID;
583 explicit X86DAGToDAGISelLegacy(X86TargetMachine &tm,
584 CodeGenOptLevel OptLevel)
586 ID, std::make_unique<X86DAGToDAGISel>(tm, OptLevel)) {}
587 };
588}
589
590char X86DAGToDAGISelLegacy::ID = 0;
591
592INITIALIZE_PASS(X86DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
593
594// Returns true if this masked compare can be implemented legally with this
595// type.
596static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
597 unsigned Opcode = N->getOpcode();
598 if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM ||
599 Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC ||
600 Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) {
601 // We can get 256-bit 8 element types here without VLX being enabled. When
602 // this happens we will use 512-bit operations and the mask will not be
603 // zero extended.
604 EVT OpVT = N->getOperand(0).getValueType();
605 // The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the
606 // second operand.
607 if (Opcode == X86ISD::STRICT_CMPM)
608 OpVT = N->getOperand(1).getValueType();
609 if (OpVT.is256BitVector() || OpVT.is128BitVector())
610 return Subtarget->hasVLX();
611
612 return true;
613 }
614 // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
615 if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
616 Opcode == X86ISD::FSETCCM_SAE)
617 return true;
618
619 return false;
620}
621
622// Returns true if we can assume the writer of the mask has zero extended it
623// for us.
624bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
625 // If this is an AND, check if we have a compare on either side. As long as
626 // one side guarantees the mask is zero extended, the AND will preserve those
627 // zeros.
628 if (N->getOpcode() == ISD::AND)
629 return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
630 isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
631
632 return isLegalMaskCompare(N, Subtarget);
633}
634
635bool
636X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
637 if (OptLevel == CodeGenOptLevel::None)
638 return false;
639
640 if (!N.hasOneUse())
641 return false;
642
643 if (N.getOpcode() != ISD::LOAD)
644 return true;
645
646 // Don't fold non-temporal loads if we have an instruction for them.
647 if (useNonTemporalLoad(cast<LoadSDNode>(N)))
648 return false;
649
650 // If N is a load, do additional profitability checks.
651 if (U == Root) {
652 switch (U->getOpcode()) {
653 default: break;
654 case X86ISD::ADD:
655 case X86ISD::ADC:
656 case X86ISD::SUB:
657 case X86ISD::SBB:
658 case X86ISD::AND:
659 case X86ISD::XOR:
660 case X86ISD::OR:
661 case ISD::ADD:
662 case ISD::UADDO_CARRY:
663 case ISD::AND:
664 case ISD::OR:
665 case ISD::XOR: {
666 SDValue Op1 = U->getOperand(1);
667
668 // If the other operand is a 8-bit immediate we should fold the immediate
669 // instead. This reduces code size.
670 // e.g.
671 // movl 4(%esp), %eax
672 // addl $4, %eax
673 // vs.
674 // movl $4, %eax
675 // addl 4(%esp), %eax
676 // The former is 2 bytes shorter. In case where the increment is 1, then
677 // the saving can be 4 bytes (by using incl %eax).
678 if (auto *Imm = dyn_cast<ConstantSDNode>(Op1)) {
679 if (Imm->getAPIntValue().isSignedIntN(8))
680 return false;
681
682 // If this is a 64-bit AND with an immediate that fits in 32-bits,
683 // prefer using the smaller and over folding the load. This is needed to
684 // make sure immediates created by shrinkAndImmediate are always folded.
685 // Ideally we would narrow the load during DAG combine and get the
686 // best of both worlds.
687 if (U->getOpcode() == ISD::AND &&
688 Imm->getAPIntValue().getBitWidth() == 64 &&
689 Imm->getAPIntValue().isIntN(32))
690 return false;
691
692 // If this really a zext_inreg that can be represented with a movzx
693 // instruction, prefer that.
694 // TODO: We could shrink the load and fold if it is non-volatile.
695 if (U->getOpcode() == ISD::AND &&
696 (Imm->getAPIntValue() == UINT8_MAX ||
697 Imm->getAPIntValue() == UINT16_MAX ||
698 Imm->getAPIntValue() == UINT32_MAX))
699 return false;
700
701 // ADD/SUB with can negate the immediate and use the opposite operation
702 // to fit 128 into a sign extended 8 bit immediate.
703 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
704 (-Imm->getAPIntValue()).isSignedIntN(8))
705 return false;
706
707 if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) &&
708 (-Imm->getAPIntValue()).isSignedIntN(8) &&
709 hasNoCarryFlagUses(SDValue(U, 1)))
710 return false;
711 }
712
713 // If the other operand is a TLS address, we should fold it instead.
714 // This produces
715 // movl %gs:0, %eax
716 // leal i@NTPOFF(%eax), %eax
717 // instead of
718 // movl $i@NTPOFF, %eax
719 // addl %gs:0, %eax
720 // if the block also has an access to a second TLS address this will save
721 // a load.
722 // FIXME: This is probably also true for non-TLS addresses.
723 if (Op1.getOpcode() == X86ISD::Wrapper) {
724 SDValue Val = Op1.getOperand(0);
726 return false;
727 }
728
729 // Don't fold load if this matches the BTS/BTR/BTC patterns.
730 // BTS: (or X, (shl 1, n))
731 // BTR: (and X, (rotl -2, n))
732 // BTC: (xor X, (shl 1, n))
733 if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
734 if (U->getOperand(0).getOpcode() == ISD::SHL &&
735 isOneConstant(U->getOperand(0).getOperand(0)))
736 return false;
737
738 if (U->getOperand(1).getOpcode() == ISD::SHL &&
739 isOneConstant(U->getOperand(1).getOperand(0)))
740 return false;
741 }
742 if (U->getOpcode() == ISD::AND) {
743 SDValue U0 = U->getOperand(0);
744 SDValue U1 = U->getOperand(1);
745 if (U0.getOpcode() == ISD::ROTL) {
746 auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
747 if (C && C->getSExtValue() == -2)
748 return false;
749 }
750
751 if (U1.getOpcode() == ISD::ROTL) {
752 auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
753 if (C && C->getSExtValue() == -2)
754 return false;
755 }
756 }
757
758 break;
759 }
760 case ISD::SHL:
761 case ISD::SRA:
762 case ISD::SRL:
763 // Don't fold a load into a shift by immediate. The BMI2 instructions
764 // support folding a load, but not an immediate. The legacy instructions
765 // support folding an immediate, but can't fold a load. Folding an
766 // immediate is preferable to folding a load.
767 if (isa<ConstantSDNode>(U->getOperand(1)))
768 return false;
769
770 break;
771 }
772 }
773
774 // Prevent folding a load if this can implemented with an insert_subreg or
775 // a move that implicitly zeroes.
776 if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
777 isNullConstant(Root->getOperand(2)) &&
778 (Root->getOperand(0).isUndef() ||
780 return false;
781
782 return true;
783}
784
785// Indicates it is profitable to form an AVX512 masked operation. Returning
786// false will favor a masked register-register masked move or vblendm and the
787// operation will be selected separately.
788bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const {
789 assert(
790 (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) &&
791 "Unexpected opcode!");
792
793 // If the operation has additional users, the operation will be duplicated.
794 // Check the use count to prevent that.
795 // FIXME: Are there cheap opcodes we might want to duplicate?
796 return N->getOperand(1).hasOneUse();
797}
798
799/// Replace the original chain operand of the call with
800/// load's chain operand and move load below the call's chain operand.
801static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
802 SDValue Call, SDValue OrigChain) {
804 SDValue Chain = OrigChain.getOperand(0);
805 if (Chain.getNode() == Load.getNode())
806 Ops.push_back(Load.getOperand(0));
807 else {
808 assert(Chain.getOpcode() == ISD::TokenFactor &&
809 "Unexpected chain operand");
810 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
811 if (Chain.getOperand(i).getNode() == Load.getNode())
812 Ops.push_back(Load.getOperand(0));
813 else
814 Ops.push_back(Chain.getOperand(i));
815 SDValue NewChain =
816 CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
817 Ops.clear();
818 Ops.push_back(NewChain);
819 }
820 Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
821 CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
822 CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
823 Load.getOperand(1), Load.getOperand(2));
824
825 Ops.clear();
826 Ops.push_back(SDValue(Load.getNode(), 1));
827 Ops.append(Call->op_begin() + 1, Call->op_end());
828 CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
829}
830
831/// Return true if call address is a load and it can be
832/// moved below CALLSEQ_START and the chains leading up to the call.
833/// Return the CALLSEQ_START by reference as a second output.
834/// In the case of a tail call, there isn't a callseq node between the call
835/// chain and the load.
836static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
837 // The transformation is somewhat dangerous if the call's chain was glued to
838 // the call. After MoveBelowOrigChain the load is moved between the call and
839 // the chain, this can create a cycle if the load is not folded. So it is
840 // *really* important that we are sure the load will be folded.
841 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
842 return false;
843 auto *LD = dyn_cast<LoadSDNode>(Callee.getNode());
844 if (!LD ||
845 !LD->isSimple() ||
846 LD->getAddressingMode() != ISD::UNINDEXED ||
847 LD->getExtensionType() != ISD::NON_EXTLOAD)
848 return false;
849
850 // Now let's find the callseq_start.
851 while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
852 if (!Chain.hasOneUse())
853 return false;
854 Chain = Chain.getOperand(0);
855 }
856
857 if (!Chain.getNumOperands())
858 return false;
859 // Since we are not checking for AA here, conservatively abort if the chain
860 // writes to memory. It's not safe to move the callee (a load) across a store.
861 if (isa<MemSDNode>(Chain.getNode()) &&
862 cast<MemSDNode>(Chain.getNode())->writeMem())
863 return false;
864 if (Chain.getOperand(0).getNode() == Callee.getNode())
865 return true;
866 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
867 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
868 Callee.getValue(1).hasOneUse())
869 return true;
870 return false;
871}
872
873static bool isEndbrImm64(uint64_t Imm) {
874// There may be some other prefix bytes between 0xF3 and 0x0F1EFA.
875// i.g: 0xF3660F1EFA, 0xF3670F1EFA
876 if ((Imm & 0x00FFFFFF) != 0x0F1EFA)
877 return false;
878
879 uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64,
880 0x65, 0x66, 0x67, 0xf0, 0xf2};
881 int i = 24; // 24bit 0x0F1EFA has matched
882 while (i < 64) {
883 uint8_t Byte = (Imm >> i) & 0xFF;
884 if (Byte == 0xF3)
885 return true;
886 if (!llvm::is_contained(OptionalPrefixBytes, Byte))
887 return false;
888 i += 8;
889 }
890
891 return false;
892}
893
894static bool needBWI(MVT VT) {
895 return (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v64i8);
896}
897
898void X86DAGToDAGISel::PreprocessISelDAG() {
899 bool MadeChange = false;
900 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
901 E = CurDAG->allnodes_end(); I != E; ) {
902 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
903
904 // This is for CET enhancement.
905 //
906 // ENDBR32 and ENDBR64 have specific opcodes:
907 // ENDBR32: F3 0F 1E FB
908 // ENDBR64: F3 0F 1E FA
909 // And we want that attackers won’t find unintended ENDBR32/64
910 // opcode matches in the binary
911 // Here’s an example:
912 // If the compiler had to generate asm for the following code:
913 // a = 0xF30F1EFA
914 // it could, for example, generate:
915 // mov 0xF30F1EFA, dword ptr[a]
916 // In such a case, the binary would include a gadget that starts
917 // with a fake ENDBR64 opcode. Therefore, we split such generation
918 // into multiple operations, let it not shows in the binary
919 if (N->getOpcode() == ISD::Constant) {
920 MVT VT = N->getSimpleValueType(0);
921 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
922 int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB;
923 if (Imm == EndbrImm || isEndbrImm64(Imm)) {
924 // Check that the cf-protection-branch is enabled.
925 Metadata *CFProtectionBranch =
927 "cf-protection-branch");
928 if (CFProtectionBranch || IndirectBranchTracking) {
929 SDLoc dl(N);
930 SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true);
931 Complement = CurDAG->getNOT(dl, Complement, VT);
932 --I;
933 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Complement);
934 ++I;
935 MadeChange = true;
936 continue;
937 }
938 }
939 }
940
941 // If this is a target specific AND node with no flag usages, turn it back
942 // into ISD::AND to enable test instruction matching.
943 if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
944 SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
945 N->getOperand(0), N->getOperand(1));
946 --I;
947 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
948 ++I;
949 MadeChange = true;
950 continue;
951 }
952
953 // Convert vector increment or decrement to sub/add with an all-ones
954 // constant:
955 // add X, <1, 1...> --> sub X, <-1, -1...>
956 // sub X, <1, 1...> --> add X, <-1, -1...>
957 // The all-ones vector constant can be materialized using a pcmpeq
958 // instruction that is commonly recognized as an idiom (has no register
959 // dependency), so that's better/smaller than loading a splat 1 constant.
960 //
961 // But don't do this if it would inhibit a potentially profitable load
962 // folding opportunity for the other operand. That only occurs with the
963 // intersection of:
964 // (1) The other operand (op0) is load foldable.
965 // (2) The op is an add (otherwise, we are *creating* an add and can still
966 // load fold the other op).
967 // (3) The target has AVX (otherwise, we have a destructive add and can't
968 // load fold the other op without killing the constant op).
969 // (4) The constant 1 vector has multiple uses (so it is profitable to load
970 // into a register anyway).
971 auto mayPreventLoadFold = [&]() {
972 return X86::mayFoldLoad(N->getOperand(0), *Subtarget) &&
973 N->getOpcode() == ISD::ADD && Subtarget->hasAVX() &&
974 !N->getOperand(1).hasOneUse();
975 };
976 if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
977 N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
978 APInt SplatVal;
979 if (X86::isConstantSplat(N->getOperand(1), SplatVal) &&
980 SplatVal.isOne()) {
981 SDLoc DL(N);
982
983 MVT VT = N->getSimpleValueType(0);
984 unsigned NumElts = VT.getSizeInBits() / 32;
986 CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts));
987 AllOnes = CurDAG->getBitcast(VT, AllOnes);
988
989 unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
990 SDValue Res =
991 CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes);
992 --I;
993 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
994 ++I;
995 MadeChange = true;
996 continue;
997 }
998 }
999
1000 switch (N->getOpcode()) {
1001 case X86ISD::VBROADCAST: {
1002 MVT VT = N->getSimpleValueType(0);
1003 // Emulate v32i16/v64i8 broadcast without BWI.
1004 if (!Subtarget->hasBWI() && needBWI(VT)) {
1005 MVT NarrowVT = VT.getHalfNumVectorElementsVT();
1006 SDLoc dl(N);
1007 SDValue NarrowBCast =
1008 CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0));
1009 SDValue Res =
1010 CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
1011 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1012 unsigned Index = NarrowVT.getVectorMinNumElements();
1013 Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
1014 CurDAG->getIntPtrConstant(Index, dl));
1015
1016 --I;
1017 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1018 ++I;
1019 MadeChange = true;
1020 continue;
1021 }
1022
1023 break;
1024 }
1026 MVT VT = N->getSimpleValueType(0);
1027 // Emulate v32i16/v64i8 broadcast without BWI.
1028 if (!Subtarget->hasBWI() && needBWI(VT)) {
1029 MVT NarrowVT = VT.getHalfNumVectorElementsVT();
1030 auto *MemNode = cast<MemSDNode>(N);
1031 SDLoc dl(N);
1032 SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other);
1033 SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()};
1034 SDValue NarrowBCast = CurDAG->getMemIntrinsicNode(
1035 X86ISD::VBROADCAST_LOAD, dl, VTs, Ops, MemNode->getMemoryVT(),
1036 MemNode->getMemOperand());
1037 SDValue Res =
1038 CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
1039 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1040 unsigned Index = NarrowVT.getVectorMinNumElements();
1041 Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
1042 CurDAG->getIntPtrConstant(Index, dl));
1043
1044 --I;
1045 SDValue To[] = {Res, NarrowBCast.getValue(1)};
1046 CurDAG->ReplaceAllUsesWith(N, To);
1047 ++I;
1048 MadeChange = true;
1049 continue;
1050 }
1051
1052 break;
1053 }
1054 case ISD::LOAD: {
1055 // If this is a XMM/YMM load of the same lower bits as another YMM/ZMM
1056 // load, then just extract the lower subvector and avoid the second load.
1057 auto *Ld = cast<LoadSDNode>(N);
1058 MVT VT = N->getSimpleValueType(0);
1059 if (!ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
1060 !(VT.is128BitVector() || VT.is256BitVector()))
1061 break;
1062
1063 MVT MaxVT = VT;
1064 SDNode *MaxLd = nullptr;
1065 SDValue Ptr = Ld->getBasePtr();
1066 SDValue Chain = Ld->getChain();
1067 for (SDNode *User : Ptr->uses()) {
1068 auto *UserLd = dyn_cast<LoadSDNode>(User);
1069 MVT UserVT = User->getSimpleValueType(0);
1070 if (User != N && UserLd && ISD::isNormalLoad(User) &&
1071 UserLd->getBasePtr() == Ptr && UserLd->getChain() == Chain &&
1072 !User->hasAnyUseOfValue(1) &&
1073 (UserVT.is256BitVector() || UserVT.is512BitVector()) &&
1074 UserVT.getSizeInBits() > VT.getSizeInBits() &&
1075 (!MaxLd || UserVT.getSizeInBits() > MaxVT.getSizeInBits())) {
1076 MaxLd = User;
1077 MaxVT = UserVT;
1078 }
1079 }
1080 if (MaxLd) {
1081 SDLoc dl(N);
1082 unsigned NumSubElts = VT.getSizeInBits() / MaxVT.getScalarSizeInBits();
1083 MVT SubVT = MVT::getVectorVT(MaxVT.getScalarType(), NumSubElts);
1084 SDValue Extract = CurDAG->getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT,
1085 SDValue(MaxLd, 0),
1086 CurDAG->getIntPtrConstant(0, dl));
1087 SDValue Res = CurDAG->getBitcast(VT, Extract);
1088
1089 --I;
1090 SDValue To[] = {Res, SDValue(MaxLd, 1)};
1091 CurDAG->ReplaceAllUsesWith(N, To);
1092 ++I;
1093 MadeChange = true;
1094 continue;
1095 }
1096 break;
1097 }
1098 case ISD::VSELECT: {
1099 // Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG.
1100 EVT EleVT = N->getOperand(0).getValueType().getVectorElementType();
1101 if (EleVT == MVT::i1)
1102 break;
1103
1104 assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
1105 assert(N->getValueType(0).getVectorElementType() != MVT::i16 &&
1106 "We can't replace VSELECT with BLENDV in vXi16!");
1107 SDValue R;
1108 if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(N->getOperand(0)) ==
1109 EleVT.getSizeInBits()) {
1110 R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0),
1111 N->getOperand(0), N->getOperand(1), N->getOperand(2),
1112 CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8));
1113 } else {
1114 R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
1115 N->getOperand(0), N->getOperand(1),
1116 N->getOperand(2));
1117 }
1118 --I;
1119 CurDAG->ReplaceAllUsesWith(N, R.getNode());
1120 ++I;
1121 MadeChange = true;
1122 continue;
1123 }
1124 case ISD::FP_ROUND:
1126 case ISD::FP_TO_SINT:
1127 case ISD::FP_TO_UINT:
1130 // Replace vector fp_to_s/uint with their X86 specific equivalent so we
1131 // don't need 2 sets of patterns.
1132 if (!N->getSimpleValueType(0).isVector())
1133 break;
1134
1135 unsigned NewOpc;
1136 switch (N->getOpcode()) {
1137 default: llvm_unreachable("Unexpected opcode!");
1138 case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break;
1139 case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break;
1140 case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break;
1141 case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
1142 case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break;
1143 case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
1144 }
1145 SDValue Res;
1146 if (N->isStrictFPOpcode())
1147 Res =
1148 CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other},
1149 {N->getOperand(0), N->getOperand(1)});
1150 else
1151 Res =
1152 CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1153 N->getOperand(0));
1154 --I;
1155 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1156 ++I;
1157 MadeChange = true;
1158 continue;
1159 }
1160 case ISD::SHL:
1161 case ISD::SRA:
1162 case ISD::SRL: {
1163 // Replace vector shifts with their X86 specific equivalent so we don't
1164 // need 2 sets of patterns.
1165 if (!N->getValueType(0).isVector())
1166 break;
1167
1168 unsigned NewOpc;
1169 switch (N->getOpcode()) {
1170 default: llvm_unreachable("Unexpected opcode!");
1171 case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
1172 case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
1173 case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
1174 }
1175 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1176 N->getOperand(0), N->getOperand(1));
1177 --I;
1178 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1179 ++I;
1180 MadeChange = true;
1181 continue;
1182 }
1183 case ISD::ANY_EXTEND:
1185 // Replace vector any extend with the zero extend equivalents so we don't
1186 // need 2 sets of patterns. Ignore vXi1 extensions.
1187 if (!N->getValueType(0).isVector())
1188 break;
1189
1190 unsigned NewOpc;
1191 if (N->getOperand(0).getScalarValueSizeInBits() == 1) {
1192 assert(N->getOpcode() == ISD::ANY_EXTEND &&
1193 "Unexpected opcode for mask vector!");
1194 NewOpc = ISD::SIGN_EXTEND;
1195 } else {
1196 NewOpc = N->getOpcode() == ISD::ANY_EXTEND
1199 }
1200
1201 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1202 N->getOperand(0));
1203 --I;
1204 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1205 ++I;
1206 MadeChange = true;
1207 continue;
1208 }
1209 case ISD::FCEIL:
1210 case ISD::STRICT_FCEIL:
1211 case ISD::FFLOOR:
1212 case ISD::STRICT_FFLOOR:
1213 case ISD::FTRUNC:
1214 case ISD::STRICT_FTRUNC:
1215 case ISD::FROUNDEVEN:
1217 case ISD::FNEARBYINT:
1219 case ISD::FRINT:
1220 case ISD::STRICT_FRINT: {
1221 // Replace fp rounding with their X86 specific equivalent so we don't
1222 // need 2 sets of patterns.
1223 unsigned Imm;
1224 switch (N->getOpcode()) {
1225 default: llvm_unreachable("Unexpected opcode!");
1226 case ISD::STRICT_FCEIL:
1227 case ISD::FCEIL: Imm = 0xA; break;
1228 case ISD::STRICT_FFLOOR:
1229 case ISD::FFLOOR: Imm = 0x9; break;
1230 case ISD::STRICT_FTRUNC:
1231 case ISD::FTRUNC: Imm = 0xB; break;
1233 case ISD::FROUNDEVEN: Imm = 0x8; break;
1235 case ISD::FNEARBYINT: Imm = 0xC; break;
1236 case ISD::STRICT_FRINT:
1237 case ISD::FRINT: Imm = 0x4; break;
1238 }
1239 SDLoc dl(N);
1240 bool IsStrict = N->isStrictFPOpcode();
1241 SDValue Res;
1242 if (IsStrict)
1243 Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
1244 {N->getValueType(0), MVT::Other},
1245 {N->getOperand(0), N->getOperand(1),
1246 CurDAG->getTargetConstant(Imm, dl, MVT::i32)});
1247 else
1248 Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
1249 N->getOperand(0),
1250 CurDAG->getTargetConstant(Imm, dl, MVT::i32));
1251 --I;
1252 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1253 ++I;
1254 MadeChange = true;
1255 continue;
1256 }
1257 case X86ISD::FANDN:
1258 case X86ISD::FAND:
1259 case X86ISD::FOR:
1260 case X86ISD::FXOR: {
1261 // Widen scalar fp logic ops to vector to reduce isel patterns.
1262 // FIXME: Can we do this during lowering/combine.
1263 MVT VT = N->getSimpleValueType(0);
1264 if (VT.isVector() || VT == MVT::f128)
1265 break;
1266
1267 MVT VecVT = VT == MVT::f64 ? MVT::v2f64
1268 : VT == MVT::f32 ? MVT::v4f32
1269 : MVT::v8f16;
1270
1271 SDLoc dl(N);
1272 SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
1273 N->getOperand(0));
1274 SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
1275 N->getOperand(1));
1276
1277 SDValue Res;
1278 if (Subtarget->hasSSE2()) {
1279 EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
1280 Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
1281 Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
1282 unsigned Opc;
1283 switch (N->getOpcode()) {
1284 default: llvm_unreachable("Unexpected opcode!");
1285 case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
1286 case X86ISD::FAND: Opc = ISD::AND; break;
1287 case X86ISD::FOR: Opc = ISD::OR; break;
1288 case X86ISD::FXOR: Opc = ISD::XOR; break;
1289 }
1290 Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
1291 Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
1292 } else {
1293 Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
1294 }
1295 Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
1296 CurDAG->getIntPtrConstant(0, dl));
1297 --I;
1298 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1299 ++I;
1300 MadeChange = true;
1301 continue;
1302 }
1303 }
1304
1305 if (OptLevel != CodeGenOptLevel::None &&
1306 // Only do this when the target can fold the load into the call or
1307 // jmp.
1308 !Subtarget->useIndirectThunkCalls() &&
1309 ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
1310 (N->getOpcode() == X86ISD::TC_RETURN &&
1311 (Subtarget->is64Bit() ||
1312 !getTargetMachine().isPositionIndependent())))) {
1313 /// Also try moving call address load from outside callseq_start to just
1314 /// before the call to allow it to be folded.
1315 ///
1316 /// [Load chain]
1317 /// ^
1318 /// |
1319 /// [Load]
1320 /// ^ ^
1321 /// | |
1322 /// / \--
1323 /// / |
1324 ///[CALLSEQ_START] |
1325 /// ^ |
1326 /// | |
1327 /// [LOAD/C2Reg] |
1328 /// | |
1329 /// \ /
1330 /// \ /
1331 /// [CALL]
1332 bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
1333 SDValue Chain = N->getOperand(0);
1334 SDValue Load = N->getOperand(1);
1335 if (!isCalleeLoad(Load, Chain, HasCallSeq))
1336 continue;
1337 moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
1338 ++NumLoadMoved;
1339 MadeChange = true;
1340 continue;
1341 }
1342
1343 // Lower fpround and fpextend nodes that target the FP stack to be store and
1344 // load to the stack. This is a gross hack. We would like to simply mark
1345 // these as being illegal, but when we do that, legalize produces these when
1346 // it expands calls, then expands these in the same legalize pass. We would
1347 // like dag combine to be able to hack on these between the call expansion
1348 // and the node legalization. As such this pass basically does "really
1349 // late" legalization of these inline with the X86 isel pass.
1350 // FIXME: This should only happen when not compiled with -O0.
1351 switch (N->getOpcode()) {
1352 default: continue;
1353 case ISD::FP_ROUND:
1354 case ISD::FP_EXTEND:
1355 {
1356 MVT SrcVT = N->getOperand(0).getSimpleValueType();
1357 MVT DstVT = N->getSimpleValueType(0);
1358
1359 // If any of the sources are vectors, no fp stack involved.
1360 if (SrcVT.isVector() || DstVT.isVector())
1361 continue;
1362
1363 // If the source and destination are SSE registers, then this is a legal
1364 // conversion that should not be lowered.
1365 const X86TargetLowering *X86Lowering =
1366 static_cast<const X86TargetLowering *>(TLI);
1367 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1368 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1369 if (SrcIsSSE && DstIsSSE)
1370 continue;
1371
1372 if (!SrcIsSSE && !DstIsSSE) {
1373 // If this is an FPStack extension, it is a noop.
1374 if (N->getOpcode() == ISD::FP_EXTEND)
1375 continue;
1376 // If this is a value-preserving FPStack truncation, it is a noop.
1377 if (N->getConstantOperandVal(1))
1378 continue;
1379 }
1380
1381 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1382 // FPStack has extload and truncstore. SSE can fold direct loads into other
1383 // operations. Based on this, decide what we want to do.
1384 MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT;
1385 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1386 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1387 MachinePointerInfo MPI =
1388 MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
1389 SDLoc dl(N);
1390
1391 // FIXME: optimize the case where the src/dest is a load or store?
1392
1393 SDValue Store = CurDAG->getTruncStore(
1394 CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT);
1395 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store,
1396 MemTmp, MPI, MemVT);
1397
1398 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1399 // extload we created. This will cause general havok on the dag because
1400 // anything below the conversion could be folded into other existing nodes.
1401 // To avoid invalidating 'I', back it up to the convert node.
1402 --I;
1403 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1404 break;
1405 }
1406
1407 //The sequence of events for lowering STRICT_FP versions of these nodes requires
1408 //dealing with the chain differently, as there is already a preexisting chain.
1411 {
1412 MVT SrcVT = N->getOperand(1).getSimpleValueType();
1413 MVT DstVT = N->getSimpleValueType(0);
1414
1415 // If any of the sources are vectors, no fp stack involved.
1416 if (SrcVT.isVector() || DstVT.isVector())
1417 continue;
1418
1419 // If the source and destination are SSE registers, then this is a legal
1420 // conversion that should not be lowered.
1421 const X86TargetLowering *X86Lowering =
1422 static_cast<const X86TargetLowering *>(TLI);
1423 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1424 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1425 if (SrcIsSSE && DstIsSSE)
1426 continue;
1427
1428 if (!SrcIsSSE && !DstIsSSE) {
1429 // If this is an FPStack extension, it is a noop.
1430 if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
1431 continue;
1432 // If this is a value-preserving FPStack truncation, it is a noop.
1433 if (N->getConstantOperandVal(2))
1434 continue;
1435 }
1436
1437 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1438 // FPStack has extload and truncstore. SSE can fold direct loads into other
1439 // operations. Based on this, decide what we want to do.
1440 MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT;
1441 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1442 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1443 MachinePointerInfo MPI =
1444 MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
1445 SDLoc dl(N);
1446
1447 // FIXME: optimize the case where the src/dest is a load or store?
1448
1449 //Since the operation is StrictFP, use the preexisting chain.
1451 if (!SrcIsSSE) {
1452 SDVTList VTs = CurDAG->getVTList(MVT::Other);
1453 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp};
1454 Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT,
1455 MPI, /*Align*/ std::nullopt,
1457 if (N->getFlags().hasNoFPExcept()) {
1458 SDNodeFlags Flags = Store->getFlags();
1459 Flags.setNoFPExcept(true);
1460 Store->setFlags(Flags);
1461 }
1462 } else {
1463 assert(SrcVT == MemVT && "Unexpected VT!");
1464 Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp,
1465 MPI);
1466 }
1467
1468 if (!DstIsSSE) {
1469 SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other);
1470 SDValue Ops[] = {Store, MemTmp};
1471 Result = CurDAG->getMemIntrinsicNode(
1472 X86ISD::FLD, dl, VTs, Ops, MemVT, MPI,
1473 /*Align*/ std::nullopt, MachineMemOperand::MOLoad);
1474 if (N->getFlags().hasNoFPExcept()) {
1475 SDNodeFlags Flags = Result->getFlags();
1476 Flags.setNoFPExcept(true);
1477 Result->setFlags(Flags);
1478 }
1479 } else {
1480 assert(DstVT == MemVT && "Unexpected VT!");
1481 Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI);
1482 }
1483
1484 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1485 // extload we created. This will cause general havok on the dag because
1486 // anything below the conversion could be folded into other existing nodes.
1487 // To avoid invalidating 'I', back it up to the convert node.
1488 --I;
1489 CurDAG->ReplaceAllUsesWith(N, Result.getNode());
1490 break;
1491 }
1492 }
1493
1494
1495 // Now that we did that, the node is dead. Increment the iterator to the
1496 // next node to process, then delete N.
1497 ++I;
1498 MadeChange = true;
1499 }
1500
1501 // Remove any dead nodes that may have been left behind.
1502 if (MadeChange)
1503 CurDAG->RemoveDeadNodes();
1504}
1505
1506// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
1507bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
1508 unsigned Opc = N->getMachineOpcode();
1509 if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1510 Opc != X86::MOVSX64rr8)
1511 return false;
1512
1513 SDValue N0 = N->getOperand(0);
1514
1515 // We need to be extracting the lower bit of an extend.
1516 if (!N0.isMachineOpcode() ||
1517 N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
1518 N0.getConstantOperandVal(1) != X86::sub_8bit)
1519 return false;
1520
1521 // We're looking for either a movsx or movzx to match the original opcode.
1522 unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1523 : X86::MOVSX32rr8_NOREX;
1524 SDValue N00 = N0.getOperand(0);
1525 if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
1526 return false;
1527
1528 if (Opc == X86::MOVSX64rr8) {
1529 // If we had a sign extend from 8 to 64 bits. We still need to go from 32
1530 // to 64.
1531 MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
1532 MVT::i64, N00);
1533 ReplaceUses(N, Extend);
1534 } else {
1535 // Ok we can drop this extend and just use the original extend.
1536 ReplaceUses(N, N00.getNode());
1537 }
1538
1539 return true;
1540}
1541
1542void X86DAGToDAGISel::PostprocessISelDAG() {
1543 // Skip peepholes at -O0.
1544 if (TM.getOptLevel() == CodeGenOptLevel::None)
1545 return;
1546
1547 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
1548
1549 bool MadeChange = false;
1550 while (Position != CurDAG->allnodes_begin()) {
1551 SDNode *N = &*--Position;
1552 // Skip dead nodes and any non-machine opcodes.
1553 if (N->use_empty() || !N->isMachineOpcode())
1554 continue;
1555
1556 if (tryOptimizeRem8Extend(N)) {
1557 MadeChange = true;
1558 continue;
1559 }
1560
1561 unsigned Opc = N->getMachineOpcode();
1562 switch (Opc) {
1563 default:
1564 continue;
1565 // ANDrr/rm + TESTrr+ -> TESTrr/TESTmr
1566 case X86::TEST8rr:
1567 case X86::TEST16rr:
1568 case X86::TEST32rr:
1569 case X86::TEST64rr:
1570 // ANDrr/rm + CTESTrr -> CTESTrr/CTESTmr
1571 case X86::CTEST8rr:
1572 case X86::CTEST16rr:
1573 case X86::CTEST32rr:
1574 case X86::CTEST64rr: {
1575 auto &Op0 = N->getOperand(0);
1576 if (Op0 != N->getOperand(1) || !Op0->hasNUsesOfValue(2, Op0.getResNo()) ||
1577 !Op0.isMachineOpcode())
1578 continue;
1579 SDValue And = N->getOperand(0);
1580#define CASE_ND(OP) \
1581 case X86::OP: \
1582 case X86::OP##_ND:
1583 switch (And.getMachineOpcode()) {
1584 default:
1585 continue;
1586 CASE_ND(AND8rr)
1587 CASE_ND(AND16rr)
1588 CASE_ND(AND32rr)
1589 CASE_ND(AND64rr) {
1590 if (And->hasAnyUseOfValue(1))
1591 continue;
1592 SmallVector<SDValue> Ops(N->op_values());
1593 Ops[0] = And.getOperand(0);
1594 Ops[1] = And.getOperand(1);
1596 CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i32, Ops);
1597 ReplaceUses(N, Test);
1598 MadeChange = true;
1599 continue;
1600 }
1601 CASE_ND(AND8rm)
1602 CASE_ND(AND16rm)
1603 CASE_ND(AND32rm)
1604 CASE_ND(AND64rm) {
1605 if (And->hasAnyUseOfValue(1))
1606 continue;
1607 unsigned NewOpc;
1608 bool IsCTESTCC = X86::isCTESTCC(Opc);
1609#define FROM_TO(A, B) \
1610 CASE_ND(A) NewOpc = IsCTESTCC ? X86::C##B : X86::B; \
1611 break;
1612 switch (And.getMachineOpcode()) {
1613 FROM_TO(AND8rm, TEST8mr);
1614 FROM_TO(AND16rm, TEST16mr);
1615 FROM_TO(AND32rm, TEST32mr);
1616 FROM_TO(AND64rm, TEST64mr);
1617 }
1618#undef FROM_TO
1619#undef CASE_ND
1620 // Need to swap the memory and register operand.
1621 SmallVector<SDValue> Ops = {And.getOperand(1), And.getOperand(2),
1622 And.getOperand(3), And.getOperand(4),
1623 And.getOperand(5), And.getOperand(0)};
1624 // CC, Cflags.
1625 if (IsCTESTCC) {
1626 Ops.push_back(N->getOperand(2));
1627 Ops.push_back(N->getOperand(3));
1628 }
1629 // Chain of memory load
1630 Ops.push_back(And.getOperand(6));
1631 // Glue
1632 if (IsCTESTCC)
1633 Ops.push_back(N->getOperand(4));
1634
1635 MachineSDNode *Test = CurDAG->getMachineNode(
1636 NewOpc, SDLoc(N), MVT::i32, MVT::Other, Ops);
1637 CurDAG->setNodeMemRefs(
1638 Test, cast<MachineSDNode>(And.getNode())->memoperands());
1639 ReplaceUses(And.getValue(2), SDValue(Test, 1));
1640 ReplaceUses(SDValue(N, 0), SDValue(Test, 0));
1641 MadeChange = true;
1642 continue;
1643 }
1644 }
1645 }
1646 // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
1647 // used. We're doing this late so we can prefer to fold the AND into masked
1648 // comparisons. Doing that can be better for the live range of the mask
1649 // register.
1650 case X86::KORTESTBrr:
1651 case X86::KORTESTWrr:
1652 case X86::KORTESTDrr:
1653 case X86::KORTESTQrr: {
1654 SDValue Op0 = N->getOperand(0);
1655 if (Op0 != N->getOperand(1) || !N->isOnlyUserOf(Op0.getNode()) ||
1656 !Op0.isMachineOpcode() || !onlyUsesZeroFlag(SDValue(N, 0)))
1657 continue;
1658#define CASE(A) \
1659 case X86::A: \
1660 break;
1661 switch (Op0.getMachineOpcode()) {
1662 default:
1663 continue;
1664 CASE(KANDBrr)
1665 CASE(KANDWrr)
1666 CASE(KANDDrr)
1667 CASE(KANDQrr)
1668 }
1669 unsigned NewOpc;
1670#define FROM_TO(A, B) \
1671 case X86::A: \
1672 NewOpc = X86::B; \
1673 break;
1674 switch (Opc) {
1675 FROM_TO(KORTESTBrr, KTESTBrr)
1676 FROM_TO(KORTESTWrr, KTESTWrr)
1677 FROM_TO(KORTESTDrr, KTESTDrr)
1678 FROM_TO(KORTESTQrr, KTESTQrr)
1679 }
1680 // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1681 // KAND instructions and KTEST use the same ISA feature.
1682 if (NewOpc == X86::KTESTWrr && !Subtarget->hasDQI())
1683 continue;
1684#undef FROM_TO
1685 MachineSDNode *KTest = CurDAG->getMachineNode(
1686 NewOpc, SDLoc(N), MVT::i32, Op0.getOperand(0), Op0.getOperand(1));
1687 ReplaceUses(N, KTest);
1688 MadeChange = true;
1689 continue;
1690 }
1691 // Attempt to remove vectors moves that were inserted to zero upper bits.
1692 case TargetOpcode::SUBREG_TO_REG: {
1693 unsigned SubRegIdx = N->getConstantOperandVal(2);
1694 if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1695 continue;
1696
1697 SDValue Move = N->getOperand(1);
1698 if (!Move.isMachineOpcode())
1699 continue;
1700
1701 // Make sure its one of the move opcodes we recognize.
1702 switch (Move.getMachineOpcode()) {
1703 default:
1704 continue;
1705 CASE(VMOVAPDrr) CASE(VMOVUPDrr)
1706 CASE(VMOVAPSrr) CASE(VMOVUPSrr)
1707 CASE(VMOVDQArr) CASE(VMOVDQUrr)
1708 CASE(VMOVAPDYrr) CASE(VMOVUPDYrr)
1709 CASE(VMOVAPSYrr) CASE(VMOVUPSYrr)
1710 CASE(VMOVDQAYrr) CASE(VMOVDQUYrr)
1711 CASE(VMOVAPDZ128rr) CASE(VMOVUPDZ128rr)
1712 CASE(VMOVAPSZ128rr) CASE(VMOVUPSZ128rr)
1713 CASE(VMOVDQA32Z128rr) CASE(VMOVDQU32Z128rr)
1714 CASE(VMOVDQA64Z128rr) CASE(VMOVDQU64Z128rr)
1715 CASE(VMOVAPDZ256rr) CASE(VMOVUPDZ256rr)
1716 CASE(VMOVAPSZ256rr) CASE(VMOVUPSZ256rr)
1717 CASE(VMOVDQA32Z256rr) CASE(VMOVDQU32Z256rr)
1718 CASE(VMOVDQA64Z256rr) CASE(VMOVDQU64Z256rr)
1719 }
1720#undef CASE
1721
1722 SDValue In = Move.getOperand(0);
1723 if (!In.isMachineOpcode() ||
1724 In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1725 continue;
1726
1727 // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1728 // the SHA instructions which use a legacy encoding.
1729 uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1730 if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1731 (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1732 (TSFlags & X86II::EncodingMask) != X86II::XOP)
1733 continue;
1734
1735 // Producing instruction is another vector instruction. We can drop the
1736 // move.
1737 CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1738 MadeChange = true;
1739 }
1740 }
1741 }
1742
1743 if (MadeChange)
1744 CurDAG->RemoveDeadNodes();
1745}
1746
1747
1748/// Emit any code that needs to be executed only in the main function.
1749void X86DAGToDAGISel::emitSpecialCodeForMain() {
1750 if (Subtarget->isTargetCygMing()) {
1752 auto &DL = CurDAG->getDataLayout();
1753
1755 CLI.setChain(CurDAG->getRoot())
1756 .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1757 CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1758 std::move(Args));
1759 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1760 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1761 CurDAG->setRoot(Result.second);
1762 }
1763}
1764
1765void X86DAGToDAGISel::emitFunctionEntryCode() {
1766 // If this is main, emit special code for main.
1767 const Function &F = MF->getFunction();
1768 if (F.hasExternalLinkage() && F.getName() == "main")
1769 emitSpecialCodeForMain();
1770}
1771
1772static bool isDispSafeForFrameIndex(int64_t Val) {
1773 // On 64-bit platforms, we can run into an issue where a frame index
1774 // includes a displacement that, when added to the explicit displacement,
1775 // will overflow the displacement field. Assuming that the frame index
1776 // displacement fits into a 31-bit integer (which is only slightly more
1777 // aggressive than the current fundamental assumption that it fits into
1778 // a 32-bit integer), a 31-bit disp should always be safe.
1779 return isInt<31>(Val);
1780}
1781
1782bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1783 X86ISelAddressMode &AM) {
1784 // We may have already matched a displacement and the caller just added the
1785 // symbolic displacement. So we still need to do the checks even if Offset
1786 // is zero.
1787
1788 int64_t Val = AM.Disp + Offset;
1789
1790 // Cannot combine ExternalSymbol displacements with integer offsets.
1791 if (Val != 0 && (AM.ES || AM.MCSym))
1792 return true;
1793
1794 CodeModel::Model M = TM.getCodeModel();
1795 if (Subtarget->is64Bit()) {
1796 if (Val != 0 &&
1798 AM.hasSymbolicDisplacement()))
1799 return true;
1800 // In addition to the checks required for a register base, check that
1801 // we do not try to use an unsafe Disp with a frame index.
1802 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1804 return true;
1805 // In ILP32 (x32) mode, pointers are 32 bits and need to be zero-extended to
1806 // 64 bits. Instructions with 32-bit register addresses perform this zero
1807 // extension for us and we can safely ignore the high bits of Offset.
1808 // Instructions with only a 32-bit immediate address do not, though: they
1809 // sign extend instead. This means only address the low 2GB of address space
1810 // is directly addressable, we need indirect addressing for the high 2GB of
1811 // address space.
1812 // TODO: Some of the earlier checks may be relaxed for ILP32 mode as the
1813 // implicit zero extension of instructions would cover up any problem.
1814 // However, we have asserts elsewhere that get triggered if we do, so keep
1815 // the checks for now.
1816 // TODO: We would actually be able to accept these, as well as the same
1817 // addresses in LP64 mode, by adding the EIZ pseudo-register as an operand
1818 // to get an address size override to be emitted. However, this
1819 // pseudo-register is not part of any register class and therefore causes
1820 // MIR verification to fail.
1821 if (Subtarget->isTarget64BitILP32() && !isUInt<31>(Val) &&
1822 !AM.hasBaseOrIndexReg())
1823 return true;
1824 }
1825 AM.Disp = Val;
1826 return false;
1827}
1828
1829bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
1830 bool AllowSegmentRegForX32) {
1831 SDValue Address = N->getOperand(1);
1832
1833 // load gs:0 -> GS segment register.
1834 // load fs:0 -> FS segment register.
1835 //
1836 // This optimization is generally valid because the GNU TLS model defines that
1837 // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode
1838 // with 32-bit registers, as we get in ILP32 mode, those registers are first
1839 // zero-extended to 64 bits and then added it to the base address, which gives
1840 // unwanted results when the register holds a negative value.
1841 // For more information see http://people.redhat.com/drepper/tls.pdf
1842 if (isNullConstant(Address) && AM.Segment.getNode() == nullptr &&
1843 !IndirectTlsSegRefs &&
1844 (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1845 Subtarget->isTargetFuchsia())) {
1846 if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
1847 return true;
1848 switch (N->getPointerInfo().getAddrSpace()) {
1849 case X86AS::GS:
1850 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1851 return false;
1852 case X86AS::FS:
1853 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1854 return false;
1855 // Address space X86AS::SS is not handled here, because it is not used to
1856 // address TLS areas.
1857 }
1858 }
1859
1860 return true;
1861}
1862
1863/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1864/// mode. These wrap things that will resolve down into a symbol reference.
1865/// If no match is possible, this returns true, otherwise it returns false.
1866bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1867 // If the addressing mode already has a symbol as the displacement, we can
1868 // never match another symbol.
1869 if (AM.hasSymbolicDisplacement())
1870 return true;
1871
1872 bool IsRIPRelTLS = false;
1873 bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1874 if (IsRIPRel) {
1875 SDValue Val = N.getOperand(0);
1877 IsRIPRelTLS = true;
1878 }
1879
1880 // We can't use an addressing mode in the 64-bit large code model.
1881 // Global TLS addressing is an exception. In the medium code model,
1882 // we use can use a mode when RIP wrappers are present.
1883 // That signifies access to globals that are known to be "near",
1884 // such as the GOT itself.
1885 CodeModel::Model M = TM.getCodeModel();
1886 if (Subtarget->is64Bit() && M == CodeModel::Large && !IsRIPRelTLS)
1887 return true;
1888
1889 // Base and index reg must be 0 in order to use %rip as base.
1890 if (IsRIPRel && AM.hasBaseOrIndexReg())
1891 return true;
1892
1893 // Make a local copy in case we can't do this fold.
1894 X86ISelAddressMode Backup = AM;
1895
1896 int64_t Offset = 0;
1897 SDValue N0 = N.getOperand(0);
1898 if (auto *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1899 AM.GV = G->getGlobal();
1900 AM.SymbolFlags = G->getTargetFlags();
1901 Offset = G->getOffset();
1902 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1903 AM.CP = CP->getConstVal();
1904 AM.Alignment = CP->getAlign();
1905 AM.SymbolFlags = CP->getTargetFlags();
1906 Offset = CP->getOffset();
1907 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1908 AM.ES = S->getSymbol();
1909 AM.SymbolFlags = S->getTargetFlags();
1910 } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1911 AM.MCSym = S->getMCSymbol();
1912 } else if (auto *J = dyn_cast<JumpTableSDNode>(N0)) {
1913 AM.JT = J->getIndex();
1914 AM.SymbolFlags = J->getTargetFlags();
1915 } else if (auto *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1916 AM.BlockAddr = BA->getBlockAddress();
1917 AM.SymbolFlags = BA->getTargetFlags();
1918 Offset = BA->getOffset();
1919 } else
1920 llvm_unreachable("Unhandled symbol reference node.");
1921
1922 // Can't use an addressing mode with large globals.
1923 if (Subtarget->is64Bit() && !IsRIPRel && AM.GV &&
1924 TM.isLargeGlobalValue(AM.GV)) {
1925 AM = Backup;
1926 return true;
1927 }
1928
1929 if (foldOffsetIntoAddress(Offset, AM)) {
1930 AM = Backup;
1931 return true;
1932 }
1933
1934 if (IsRIPRel)
1935 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1936
1937 // Commit the changes now that we know this fold is safe.
1938 return false;
1939}
1940
1941/// Add the specified node to the specified addressing mode, returning true if
1942/// it cannot be done. This just pattern matches for the addressing mode.
1943bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1944 if (matchAddressRecursively(N, AM, 0))
1945 return true;
1946
1947 // Post-processing: Make a second attempt to fold a load, if we now know
1948 // that there will not be any other register. This is only performed for
1949 // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded
1950 // any foldable load the first time.
1951 if (Subtarget->isTarget64BitILP32() &&
1952 AM.BaseType == X86ISelAddressMode::RegBase &&
1953 AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) {
1954 SDValue Save_Base_Reg = AM.Base_Reg;
1955 if (auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
1956 AM.Base_Reg = SDValue();
1957 if (matchLoadInAddress(LoadN, AM, /*AllowSegmentRegForX32=*/true))
1958 AM.Base_Reg = Save_Base_Reg;
1959 }
1960 }
1961
1962 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1963 // a smaller encoding and avoids a scaled-index.
1964 if (AM.Scale == 2 &&
1965 AM.BaseType == X86ISelAddressMode::RegBase &&
1966 AM.Base_Reg.getNode() == nullptr) {
1967 AM.Base_Reg = AM.IndexReg;
1968 AM.Scale = 1;
1969 }
1970
1971 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1972 // because it has a smaller encoding.
1973 if (TM.getCodeModel() != CodeModel::Large &&
1974 (!AM.GV || !TM.isLargeGlobalValue(AM.GV)) && Subtarget->is64Bit() &&
1975 AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase &&
1976 AM.Base_Reg.getNode() == nullptr && AM.IndexReg.getNode() == nullptr &&
1977 AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) {
1978 AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1979 }
1980
1981 return false;
1982}
1983
1984bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
1985 unsigned Depth) {
1986 // Add an artificial use to this node so that we can keep track of
1987 // it if it gets CSE'd with a different node.
1988 HandleSDNode Handle(N);
1989
1990 X86ISelAddressMode Backup = AM;
1991 if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1992 !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1993 return false;
1994 AM = Backup;
1995
1996 // Try again after commutating the operands.
1997 if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM,
1998 Depth + 1) &&
1999 !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1))
2000 return false;
2001 AM = Backup;
2002
2003 // If we couldn't fold both operands into the address at the same time,
2004 // see if we can just put each operand into a register and fold at least
2005 // the add.
2006 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2007 !AM.Base_Reg.getNode() &&
2008 !AM.IndexReg.getNode()) {
2009 N = Handle.getValue();
2010 AM.Base_Reg = N.getOperand(0);
2011 AM.IndexReg = N.getOperand(1);
2012 AM.Scale = 1;
2013 return false;
2014 }
2015 N = Handle.getValue();
2016 return true;
2017}
2018
2019// Insert a node into the DAG at least before the Pos node's position. This
2020// will reposition the node as needed, and will assign it a node ID that is <=
2021// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
2022// IDs! The selection DAG must no longer depend on their uniqueness when this
2023// is used.
2024static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
2025 if (N->getNodeId() == -1 ||
2028 DAG.RepositionNode(Pos->getIterator(), N.getNode());
2029 // Mark Node as invalid for pruning as after this it may be a successor to a
2030 // selected node but otherwise be in the same position of Pos.
2031 // Conservatively mark it with the same -abs(Id) to assure node id
2032 // invariant is preserved.
2033 N->setNodeId(Pos->getNodeId());
2035 }
2036}
2037
2038// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
2039// safe. This allows us to convert the shift and and into an h-register
2040// extract and a scaled index. Returns false if the simplification is
2041// performed.
2043 uint64_t Mask,
2044 SDValue Shift, SDValue X,
2045 X86ISelAddressMode &AM) {
2046 if (Shift.getOpcode() != ISD::SRL ||
2047 !isa<ConstantSDNode>(Shift.getOperand(1)) ||
2048 !Shift.hasOneUse())
2049 return true;
2050
2051 int ScaleLog = 8 - Shift.getConstantOperandVal(1);
2052 if (ScaleLog <= 0 || ScaleLog >= 4 ||
2053 Mask != (0xffu << ScaleLog))
2054 return true;
2055
2056 MVT XVT = X.getSimpleValueType();
2057 MVT VT = N.getSimpleValueType();
2058 SDLoc DL(N);
2059 SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
2060 SDValue NewMask = DAG.getConstant(0xff, DL, XVT);
2061 SDValue Srl = DAG.getNode(ISD::SRL, DL, XVT, X, Eight);
2062 SDValue And = DAG.getNode(ISD::AND, DL, XVT, Srl, NewMask);
2063 SDValue Ext = DAG.getZExtOrTrunc(And, DL, VT);
2064 SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
2065 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Ext, ShlCount);
2066
2067 // Insert the new nodes into the topological ordering. We must do this in
2068 // a valid topological ordering as nothing is going to go back and re-sort
2069 // these nodes. We continually insert before 'N' in sequence as this is
2070 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2071 // hierarchy left to express.
2072 insertDAGNode(DAG, N, Eight);
2073 insertDAGNode(DAG, N, NewMask);
2074 insertDAGNode(DAG, N, Srl);
2075 insertDAGNode(DAG, N, And);
2076 insertDAGNode(DAG, N, Ext);
2077 insertDAGNode(DAG, N, ShlCount);
2078 insertDAGNode(DAG, N, Shl);
2079 DAG.ReplaceAllUsesWith(N, Shl);
2080 DAG.RemoveDeadNode(N.getNode());
2081 AM.IndexReg = Ext;
2082 AM.Scale = (1 << ScaleLog);
2083 return false;
2084}
2085
2086// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
2087// allows us to fold the shift into this addressing mode. Returns false if the
2088// transform succeeded.
2090 X86ISelAddressMode &AM) {
2091 SDValue Shift = N.getOperand(0);
2092
2093 // Use a signed mask so that shifting right will insert sign bits. These
2094 // bits will be removed when we shift the result left so it doesn't matter
2095 // what we use. This might allow a smaller immediate encoding.
2096 int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
2097
2098 // If we have an any_extend feeding the AND, look through it to see if there
2099 // is a shift behind it. But only if the AND doesn't use the extended bits.
2100 // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
2101 bool FoundAnyExtend = false;
2102 if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
2103 Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
2104 isUInt<32>(Mask)) {
2105 FoundAnyExtend = true;
2106 Shift = Shift.getOperand(0);
2107 }
2108
2109 if (Shift.getOpcode() != ISD::SHL ||
2110 !isa<ConstantSDNode>(Shift.getOperand(1)))
2111 return true;
2112
2113 SDValue X = Shift.getOperand(0);
2114
2115 // Not likely to be profitable if either the AND or SHIFT node has more
2116 // than one use (unless all uses are for address computation). Besides,
2117 // isel mechanism requires their node ids to be reused.
2118 if (!N.hasOneUse() || !Shift.hasOneUse())
2119 return true;
2120
2121 // Verify that the shift amount is something we can fold.
2122 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
2123 if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
2124 return true;
2125
2126 MVT VT = N.getSimpleValueType();
2127 SDLoc DL(N);
2128 if (FoundAnyExtend) {
2129 SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
2130 insertDAGNode(DAG, N, NewX);
2131 X = NewX;
2132 }
2133
2134 SDValue NewMask = DAG.getSignedConstant(Mask >> ShiftAmt, DL, VT);
2135 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
2136 SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
2137
2138 // Insert the new nodes into the topological ordering. We must do this in
2139 // a valid topological ordering as nothing is going to go back and re-sort
2140 // these nodes. We continually insert before 'N' in sequence as this is
2141 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2142 // hierarchy left to express.
2143 insertDAGNode(DAG, N, NewMask);
2144 insertDAGNode(DAG, N, NewAnd);
2145 insertDAGNode(DAG, N, NewShift);
2146 DAG.ReplaceAllUsesWith(N, NewShift);
2147 DAG.RemoveDeadNode(N.getNode());
2148
2149 AM.Scale = 1 << ShiftAmt;
2150 AM.IndexReg = NewAnd;
2151 return false;
2152}
2153
2154// Implement some heroics to detect shifts of masked values where the mask can
2155// be replaced by extending the shift and undoing that in the addressing mode
2156// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
2157// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
2158// the addressing mode. This results in code such as:
2159//
2160// int f(short *y, int *lookup_table) {
2161// ...
2162// return *y + lookup_table[*y >> 11];
2163// }
2164//
2165// Turning into:
2166// movzwl (%rdi), %eax
2167// movl %eax, %ecx
2168// shrl $11, %ecx
2169// addl (%rsi,%rcx,4), %eax
2170//
2171// Instead of:
2172// movzwl (%rdi), %eax
2173// movl %eax, %ecx
2174// shrl $9, %ecx
2175// andl $124, %rcx
2176// addl (%rsi,%rcx), %eax
2177//
2178// Note that this function assumes the mask is provided as a mask *after* the
2179// value is shifted. The input chain may or may not match that, but computing
2180// such a mask is trivial.
2182 uint64_t Mask,
2183 SDValue Shift, SDValue X,
2184 X86ISelAddressMode &AM) {
2185 if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
2186 !isa<ConstantSDNode>(Shift.getOperand(1)))
2187 return true;
2188
2189 // We need to ensure that mask is a continuous run of bits.
2190 unsigned MaskIdx, MaskLen;
2191 if (!isShiftedMask_64(Mask, MaskIdx, MaskLen))
2192 return true;
2193 unsigned MaskLZ = 64 - (MaskIdx + MaskLen);
2194
2195 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
2196
2197 // The amount of shift we're trying to fit into the addressing mode is taken
2198 // from the shifted mask index (number of trailing zeros of the mask).
2199 unsigned AMShiftAmt = MaskIdx;
2200
2201 // There is nothing we can do here unless the mask is removing some bits.
2202 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
2203 if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
2204
2205 // Scale the leading zero count down based on the actual size of the value.
2206 // Also scale it down based on the size of the shift.
2207 unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
2208 if (MaskLZ < ScaleDown)
2209 return true;
2210 MaskLZ -= ScaleDown;
2211
2212 // The final check is to ensure that any masked out high bits of X are
2213 // already known to be zero. Otherwise, the mask has a semantic impact
2214 // other than masking out a couple of low bits. Unfortunately, because of
2215 // the mask, zero extensions will be removed from operands in some cases.
2216 // This code works extra hard to look through extensions because we can
2217 // replace them with zero extensions cheaply if necessary.
2218 bool ReplacingAnyExtend = false;
2219 if (X.getOpcode() == ISD::ANY_EXTEND) {
2220 unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
2221 X.getOperand(0).getSimpleValueType().getSizeInBits();
2222 // Assume that we'll replace the any-extend with a zero-extend, and
2223 // narrow the search to the extended value.
2224 X = X.getOperand(0);
2225 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
2226 ReplacingAnyExtend = true;
2227 }
2228 APInt MaskedHighBits =
2229 APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
2230 if (!DAG.MaskedValueIsZero(X, MaskedHighBits))
2231 return true;
2232
2233 // We've identified a pattern that can be transformed into a single shift
2234 // and an addressing mode. Make it so.
2235 MVT VT = N.getSimpleValueType();
2236 if (ReplacingAnyExtend) {
2237 assert(X.getValueType() != VT);
2238 // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
2239 SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
2240 insertDAGNode(DAG, N, NewX);
2241 X = NewX;
2242 }
2243
2244 MVT XVT = X.getSimpleValueType();
2245 SDLoc DL(N);
2246 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
2247 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, XVT, X, NewSRLAmt);
2248 SDValue NewExt = DAG.getZExtOrTrunc(NewSRL, DL, VT);
2249 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
2250 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewExt, NewSHLAmt);
2251
2252 // Insert the new nodes into the topological ordering. We must do this in
2253 // a valid topological ordering as nothing is going to go back and re-sort
2254 // these nodes. We continually insert before 'N' in sequence as this is
2255 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2256 // hierarchy left to express.
2257 insertDAGNode(DAG, N, NewSRLAmt);
2258 insertDAGNode(DAG, N, NewSRL);
2259 insertDAGNode(DAG, N, NewExt);
2260 insertDAGNode(DAG, N, NewSHLAmt);
2261 insertDAGNode(DAG, N, NewSHL);
2262 DAG.ReplaceAllUsesWith(N, NewSHL);
2263 DAG.RemoveDeadNode(N.getNode());
2264
2265 AM.Scale = 1 << AMShiftAmt;
2266 AM.IndexReg = NewExt;
2267 return false;
2268}
2269
2270// Transform "(X >> SHIFT) & (MASK << C1)" to
2271// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
2272// matched to a BEXTR later. Returns false if the simplification is performed.
2274 uint64_t Mask,
2275 SDValue Shift, SDValue X,
2276 X86ISelAddressMode &AM,
2277 const X86Subtarget &Subtarget) {
2278 if (Shift.getOpcode() != ISD::SRL ||
2279 !isa<ConstantSDNode>(Shift.getOperand(1)) ||
2280 !Shift.hasOneUse() || !N.hasOneUse())
2281 return true;
2282
2283 // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
2284 if (!Subtarget.hasTBM() &&
2285 !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
2286 return true;
2287
2288 // We need to ensure that mask is a continuous run of bits.
2289 unsigned MaskIdx, MaskLen;
2290 if (!isShiftedMask_64(Mask, MaskIdx, MaskLen))
2291 return true;
2292
2293 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
2294
2295 // The amount of shift we're trying to fit into the addressing mode is taken
2296 // from the shifted mask index (number of trailing zeros of the mask).
2297 unsigned AMShiftAmt = MaskIdx;
2298
2299 // There is nothing we can do here unless the mask is removing some bits.
2300 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
2301 if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
2302
2303 MVT XVT = X.getSimpleValueType();
2304 MVT VT = N.getSimpleValueType();
2305 SDLoc DL(N);
2306 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
2307 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, XVT, X, NewSRLAmt);
2308 SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, XVT);
2309 SDValue NewAnd = DAG.getNode(ISD::AND, DL, XVT, NewSRL, NewMask);
2310 SDValue NewExt = DAG.getZExtOrTrunc(NewAnd, DL, VT);
2311 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
2312 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewExt, NewSHLAmt);
2313
2314 // Insert the new nodes into the topological ordering. We must do this in
2315 // a valid topological ordering as nothing is going to go back and re-sort
2316 // these nodes. We continually insert before 'N' in sequence as this is
2317 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2318 // hierarchy left to express.
2319 insertDAGNode(DAG, N, NewSRLAmt);
2320 insertDAGNode(DAG, N, NewSRL);
2321 insertDAGNode(DAG, N, NewMask);
2322 insertDAGNode(DAG, N, NewAnd);
2323 insertDAGNode(DAG, N, NewExt);
2324 insertDAGNode(DAG, N, NewSHLAmt);
2325 insertDAGNode(DAG, N, NewSHL);
2326 DAG.ReplaceAllUsesWith(N, NewSHL);
2327 DAG.RemoveDeadNode(N.getNode());
2328
2329 AM.Scale = 1 << AMShiftAmt;
2330 AM.IndexReg = NewExt;
2331 return false;
2332}
2333
2334// Attempt to peek further into a scaled index register, collecting additional
2335// extensions / offsets / etc. Returns /p N if we can't peek any further.
2336SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N,
2337 X86ISelAddressMode &AM,
2338 unsigned Depth) {
2339 assert(AM.IndexReg.getNode() == nullptr && "IndexReg already matched");
2340 assert((AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8) &&
2341 "Illegal index scale");
2342
2343 // Limit recursion.
2345 return N;
2346
2347 EVT VT = N.getValueType();
2348 unsigned Opc = N.getOpcode();
2349
2350 // index: add(x,c) -> index: x, disp + c
2351 if (CurDAG->isBaseWithConstantOffset(N)) {
2352 auto *AddVal = cast<ConstantSDNode>(N.getOperand(1));
2353 uint64_t Offset = (uint64_t)AddVal->getSExtValue() * AM.Scale;
2354 if (!foldOffsetIntoAddress(Offset, AM))
2355 return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
2356 }
2357
2358 // index: add(x,x) -> index: x, scale * 2
2359 if (Opc == ISD::ADD && N.getOperand(0) == N.getOperand(1)) {
2360 if (AM.Scale <= 4) {
2361 AM.Scale *= 2;
2362 return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
2363 }
2364 }
2365
2366 // index: shl(x,i) -> index: x, scale * (1 << i)
2367 if (Opc == X86ISD::VSHLI) {
2368 uint64_t ShiftAmt = N.getConstantOperandVal(1);
2369 uint64_t ScaleAmt = 1ULL << ShiftAmt;
2370 if ((AM.Scale * ScaleAmt) <= 8) {
2371 AM.Scale *= ScaleAmt;
2372 return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
2373 }
2374 }
2375
2376 // index: sext(add_nsw(x,c)) -> index: sext(x), disp + sext(c)
2377 // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext?
2378 if (Opc == ISD::SIGN_EXTEND && !VT.isVector() && N.hasOneUse()) {
2379 SDValue Src = N.getOperand(0);
2380 if (Src.getOpcode() == ISD::ADD && Src->getFlags().hasNoSignedWrap() &&
2381 Src.hasOneUse()) {
2382 if (CurDAG->isBaseWithConstantOffset(Src)) {
2383 SDValue AddSrc = Src.getOperand(0);
2384 auto *AddVal = cast<ConstantSDNode>(Src.getOperand(1));
2385 uint64_t Offset = (uint64_t)AddVal->getSExtValue();
2386 if (!foldOffsetIntoAddress(Offset * AM.Scale, AM)) {
2387 SDLoc DL(N);
2388 SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
2389 SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
2390 SDValue ExtAdd = CurDAG->getNode(ISD::ADD, DL, VT, ExtSrc, ExtVal);
2391 insertDAGNode(*CurDAG, N, ExtSrc);
2392 insertDAGNode(*CurDAG, N, ExtVal);
2393 insertDAGNode(*CurDAG, N, ExtAdd);
2394 CurDAG->ReplaceAllUsesWith(N, ExtAdd);
2395 CurDAG->RemoveDeadNode(N.getNode());
2396 return ExtSrc;
2397 }
2398 }
2399 }
2400 }
2401
2402 // index: zext(add_nuw(x,c)) -> index: zext(x), disp + zext(c)
2403 // index: zext(addlike(x,c)) -> index: zext(x), disp + zext(c)
2404 // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext?
2405 if (Opc == ISD::ZERO_EXTEND && !VT.isVector() && N.hasOneUse()) {
2406 SDValue Src = N.getOperand(0);
2407 unsigned SrcOpc = Src.getOpcode();
2408 if (((SrcOpc == ISD::ADD && Src->getFlags().hasNoUnsignedWrap()) ||
2409 CurDAG->isADDLike(Src, /*NoWrap=*/true)) &&
2410 Src.hasOneUse()) {
2411 if (CurDAG->isBaseWithConstantOffset(Src)) {
2412 SDValue AddSrc = Src.getOperand(0);
2413 uint64_t Offset = Src.getConstantOperandVal(1);
2414 if (!foldOffsetIntoAddress(Offset * AM.Scale, AM)) {
2415 SDLoc DL(N);
2416 SDValue Res;
2417 // If we're also scaling, see if we can use that as well.
2418 if (AddSrc.getOpcode() == ISD::SHL &&
2419 isa<ConstantSDNode>(AddSrc.getOperand(1))) {
2420 SDValue ShVal = AddSrc.getOperand(0);
2421 uint64_t ShAmt = AddSrc.getConstantOperandVal(1);
2422 APInt HiBits =
2424 uint64_t ScaleAmt = 1ULL << ShAmt;
2425 if ((AM.Scale * ScaleAmt) <= 8 &&
2426 (AddSrc->getFlags().hasNoUnsignedWrap() ||
2427 CurDAG->MaskedValueIsZero(ShVal, HiBits))) {
2428 AM.Scale *= ScaleAmt;
2429 SDValue ExtShVal = CurDAG->getNode(Opc, DL, VT, ShVal);
2430 SDValue ExtShift = CurDAG->getNode(ISD::SHL, DL, VT, ExtShVal,
2431 AddSrc.getOperand(1));
2432 insertDAGNode(*CurDAG, N, ExtShVal);
2433 insertDAGNode(*CurDAG, N, ExtShift);
2434 AddSrc = ExtShift;
2435 Res = ExtShVal;
2436 }
2437 }
2438 SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
2439 SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
2440 SDValue ExtAdd = CurDAG->getNode(SrcOpc, DL, VT, ExtSrc, ExtVal);
2441 insertDAGNode(*CurDAG, N, ExtSrc);
2442 insertDAGNode(*CurDAG, N, ExtVal);
2443 insertDAGNode(*CurDAG, N, ExtAdd);
2444 CurDAG->ReplaceAllUsesWith(N, ExtAdd);
2445 CurDAG->RemoveDeadNode(N.getNode());
2446 return Res ? Res : ExtSrc;
2447 }
2448 }
2449 }
2450 }
2451
2452 // TODO: Handle extensions, shifted masks etc.
2453 return N;
2454}
2455
2456bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
2457 unsigned Depth) {
2458 SDLoc dl(N);
2459 LLVM_DEBUG({
2460 dbgs() << "MatchAddress: ";
2461 AM.dump(CurDAG);
2462 });
2463 // Limit recursion.
2465 return matchAddressBase(N, AM);
2466
2467 // If this is already a %rip relative address, we can only merge immediates
2468 // into it. Instead of handling this in every case, we handle it here.
2469 // RIP relative addressing: %rip + 32-bit displacement!
2470 if (AM.isRIPRelative()) {
2471 // FIXME: JumpTable and ExternalSymbol address currently don't like
2472 // displacements. It isn't very important, but this should be fixed for
2473 // consistency.
2474 if (!(AM.ES || AM.MCSym) && AM.JT != -1)
2475 return true;
2476
2477 if (auto *Cst = dyn_cast<ConstantSDNode>(N))
2478 if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
2479 return false;
2480 return true;
2481 }
2482
2483 switch (N.getOpcode()) {
2484 default: break;
2485 case ISD::LOCAL_RECOVER: {
2486 if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
2487 if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
2488 // Use the symbol and don't prefix it.
2489 AM.MCSym = ESNode->getMCSymbol();
2490 return false;
2491 }
2492 break;
2493 }
2494 case ISD::Constant: {
2495 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2496 if (!foldOffsetIntoAddress(Val, AM))
2497 return false;
2498 break;
2499 }
2500
2501 case X86ISD::Wrapper:
2502 case X86ISD::WrapperRIP:
2503 if (!matchWrapper(N, AM))
2504 return false;
2505 break;
2506
2507 case ISD::LOAD:
2508 if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
2509 return false;
2510 break;
2511
2512 case ISD::FrameIndex:
2513 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2514 AM.Base_Reg.getNode() == nullptr &&
2515 (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
2516 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
2517 AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
2518 return false;
2519 }
2520 break;
2521
2522 case ISD::SHL:
2523 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2524 break;
2525
2526 if (auto *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
2527 unsigned Val = CN->getZExtValue();
2528 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
2529 // that the base operand remains free for further matching. If
2530 // the base doesn't end up getting used, a post-processing step
2531 // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
2532 if (Val == 1 || Val == 2 || Val == 3) {
2533 SDValue ShVal = N.getOperand(0);
2534 AM.Scale = 1 << Val;
2535 AM.IndexReg = matchIndexRecursively(ShVal, AM, Depth + 1);
2536 return false;
2537 }
2538 }
2539 break;
2540
2541 case ISD::SRL: {
2542 // Scale must not be used already.
2543 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2544
2545 // We only handle up to 64-bit values here as those are what matter for
2546 // addressing mode optimizations.
2547 assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
2548 "Unexpected value size!");
2549
2550 SDValue And = N.getOperand(0);
2551 if (And.getOpcode() != ISD::AND) break;
2552 SDValue X = And.getOperand(0);
2553
2554 // The mask used for the transform is expected to be post-shift, but we
2555 // found the shift first so just apply the shift to the mask before passing
2556 // it down.
2557 if (!isa<ConstantSDNode>(N.getOperand(1)) ||
2558 !isa<ConstantSDNode>(And.getOperand(1)))
2559 break;
2560 uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
2561
2562 // Try to fold the mask and shift into the scale, and return false if we
2563 // succeed.
2564 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
2565 return false;
2566 break;
2567 }
2568
2569 case ISD::SMUL_LOHI:
2570 case ISD::UMUL_LOHI:
2571 // A mul_lohi where we need the low part can be folded as a plain multiply.
2572 if (N.getResNo() != 0) break;
2573 [[fallthrough]];
2574 case ISD::MUL:
2575 case X86ISD::MUL_IMM:
2576 // X*[3,5,9] -> X+X*[2,4,8]
2577 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2578 AM.Base_Reg.getNode() == nullptr &&
2579 AM.IndexReg.getNode() == nullptr) {
2580 if (auto *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
2581 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
2582 CN->getZExtValue() == 9) {
2583 AM.Scale = unsigned(CN->getZExtValue())-1;
2584
2585 SDValue MulVal = N.getOperand(0);
2586 SDValue Reg;
2587
2588 // Okay, we know that we have a scale by now. However, if the scaled
2589 // value is an add of something and a constant, we can fold the
2590 // constant into the disp field here.
2591 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
2592 isa<ConstantSDNode>(MulVal.getOperand(1))) {
2593 Reg = MulVal.getOperand(0);
2594 auto *AddVal = cast<ConstantSDNode>(MulVal.getOperand(1));
2595 uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
2596 if (foldOffsetIntoAddress(Disp, AM))
2597 Reg = N.getOperand(0);
2598 } else {
2599 Reg = N.getOperand(0);
2600 }
2601
2602 AM.IndexReg = AM.Base_Reg = Reg;
2603 return false;
2604 }
2605 }
2606 break;
2607
2608 case ISD::SUB: {
2609 // Given A-B, if A can be completely folded into the address and
2610 // the index field with the index field unused, use -B as the index.
2611 // This is a win if a has multiple parts that can be folded into
2612 // the address. Also, this saves a mov if the base register has
2613 // other uses, since it avoids a two-address sub instruction, however
2614 // it costs an additional mov if the index register has other uses.
2615
2616 // Add an artificial use to this node so that we can keep track of
2617 // it if it gets CSE'd with a different node.
2618 HandleSDNode Handle(N);
2619
2620 // Test if the LHS of the sub can be folded.
2621 X86ISelAddressMode Backup = AM;
2622 if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
2623 N = Handle.getValue();
2624 AM = Backup;
2625 break;
2626 }
2627 N = Handle.getValue();
2628 // Test if the index field is free for use.
2629 if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2630 AM = Backup;
2631 break;
2632 }
2633
2634 int Cost = 0;
2635 SDValue RHS = N.getOperand(1);
2636 // If the RHS involves a register with multiple uses, this
2637 // transformation incurs an extra mov, due to the neg instruction
2638 // clobbering its operand.
2639 if (!RHS.getNode()->hasOneUse() ||
2640 RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
2641 RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
2642 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
2643 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
2644 RHS.getOperand(0).getValueType() == MVT::i32))
2645 ++Cost;
2646 // If the base is a register with multiple uses, this
2647 // transformation may save a mov.
2648 if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2649 !AM.Base_Reg.getNode()->hasOneUse()) ||
2650 AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2651 --Cost;
2652 // If the folded LHS was interesting, this transformation saves
2653 // address arithmetic.
2654 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2655 ((AM.Disp != 0) && (Backup.Disp == 0)) +
2656 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2657 --Cost;
2658 // If it doesn't look like it may be an overall win, don't do it.
2659 if (Cost >= 0) {
2660 AM = Backup;
2661 break;
2662 }
2663
2664 // Ok, the transformation is legal and appears profitable. Go for it.
2665 // Negation will be emitted later to avoid creating dangling nodes if this
2666 // was an unprofitable LEA.
2667 AM.IndexReg = RHS;
2668 AM.NegateIndex = true;
2669 AM.Scale = 1;
2670 return false;
2671 }
2672
2673 case ISD::OR:
2674 case ISD::XOR:
2675 // See if we can treat the OR/XOR node as an ADD node.
2676 if (!CurDAG->isADDLike(N))
2677 break;
2678 [[fallthrough]];
2679 case ISD::ADD:
2680 if (!matchAdd(N, AM, Depth))
2681 return false;
2682 break;
2683
2684 case ISD::AND: {
2685 // Perform some heroic transforms on an and of a constant-count shift
2686 // with a constant to enable use of the scaled offset field.
2687
2688 // Scale must not be used already.
2689 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2690
2691 // We only handle up to 64-bit values here as those are what matter for
2692 // addressing mode optimizations.
2693 assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
2694 "Unexpected value size!");
2695
2696 if (!isa<ConstantSDNode>(N.getOperand(1)))
2697 break;
2698
2699 if (N.getOperand(0).getOpcode() == ISD::SRL) {
2700 SDValue Shift = N.getOperand(0);
2701 SDValue X = Shift.getOperand(0);
2702
2703 uint64_t Mask = N.getConstantOperandVal(1);
2704
2705 // Try to fold the mask and shift into an extract and scale.
2706 if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
2707 return false;
2708
2709 // Try to fold the mask and shift directly into the scale.
2710 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
2711 return false;
2712
2713 // Try to fold the mask and shift into BEXTR and scale.
2714 if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
2715 return false;
2716 }
2717
2718 // Try to swap the mask and shift to place shifts which can be done as
2719 // a scale on the outside of the mask.
2720 if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
2721 return false;
2722
2723 break;
2724 }
2725 case ISD::ZERO_EXTEND: {
2726 // Try to widen a zexted shift left to the same size as its use, so we can
2727 // match the shift as a scale factor.
2728 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2729 break;
2730
2731 SDValue Src = N.getOperand(0);
2732
2733 // See if we can match a zext(addlike(x,c)).
2734 // TODO: Move more ZERO_EXTEND patterns into matchIndexRecursively.
2735 if (Src.getOpcode() == ISD::ADD || Src.getOpcode() == ISD::OR)
2736 if (SDValue Index = matchIndexRecursively(N, AM, Depth + 1))
2737 if (Index != N) {
2738 AM.IndexReg = Index;
2739 return false;
2740 }
2741
2742 // Peek through mask: zext(and(shl(x,c1),c2))
2743 APInt Mask = APInt::getAllOnes(Src.getScalarValueSizeInBits());
2744 if (Src.getOpcode() == ISD::AND && Src.hasOneUse())
2745 if (auto *MaskC = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
2746 Mask = MaskC->getAPIntValue();
2747 Src = Src.getOperand(0);
2748 }
2749
2750 if (Src.getOpcode() == ISD::SHL && Src.hasOneUse() && N->hasOneUse()) {
2751 // Give up if the shift is not a valid scale factor [1,2,3].
2752 SDValue ShlSrc = Src.getOperand(0);
2753 SDValue ShlAmt = Src.getOperand(1);
2754 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShlAmt);
2755 if (!ShAmtC)
2756 break;
2757 unsigned ShAmtV = ShAmtC->getZExtValue();
2758 if (ShAmtV > 3)
2759 break;
2760
2761 // The narrow shift must only shift out zero bits (it must be 'nuw').
2762 // That makes it safe to widen to the destination type.
2763 APInt HighZeros =
2764 APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV);
2765 if (!Src->getFlags().hasNoUnsignedWrap() &&
2766 !CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
2767 break;
2768
2769 // zext (shl nuw i8 %x, C1) to i32
2770 // --> shl (zext i8 %x to i32), (zext C1)
2771 // zext (and (shl nuw i8 %x, C1), C2) to i32
2772 // --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1)
2773 MVT SrcVT = ShlSrc.getSimpleValueType();
2774 MVT VT = N.getSimpleValueType();
2775 SDLoc DL(N);
2776
2777 SDValue Res = ShlSrc;
2778 if (!Mask.isAllOnes()) {
2779 Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT);
2780 insertDAGNode(*CurDAG, N, Res);
2781 Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res);
2782 insertDAGNode(*CurDAG, N, Res);
2783 }
2784 SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res);
2785 insertDAGNode(*CurDAG, N, Zext);
2786 SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt);
2787 insertDAGNode(*CurDAG, N, NewShl);
2788 CurDAG->ReplaceAllUsesWith(N, NewShl);
2789 CurDAG->RemoveDeadNode(N.getNode());
2790
2791 // Convert the shift to scale factor.
2792 AM.Scale = 1 << ShAmtV;
2793 // If matchIndexRecursively is not called here,
2794 // Zext may be replaced by other nodes but later used to call a builder
2795 // method
2796 AM.IndexReg = matchIndexRecursively(Zext, AM, Depth + 1);
2797 return false;
2798 }
2799
2800 if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes()) {
2801 // Try to fold the mask and shift into an extract and scale.
2802 if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src,
2803 Src.getOperand(0), AM))
2804 return false;
2805
2806 // Try to fold the mask and shift directly into the scale.
2807 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask.getZExtValue(), Src,
2808 Src.getOperand(0), AM))
2809 return false;
2810
2811 // Try to fold the mask and shift into BEXTR and scale.
2812 if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask.getZExtValue(), Src,
2813 Src.getOperand(0), AM, *Subtarget))
2814 return false;
2815 }
2816
2817 break;
2818 }
2819 }
2820
2821 return matchAddressBase(N, AM);
2822}
2823
2824/// Helper for MatchAddress. Add the specified node to the
2825/// specified addressing mode without any further recursion.
2826bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
2827 // Is the base register already occupied?
2828 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2829 // If so, check to see if the scale index register is set.
2830 if (!AM.IndexReg.getNode()) {
2831 AM.IndexReg = N;
2832 AM.Scale = 1;
2833 return false;
2834 }
2835
2836 // Otherwise, we cannot select it.
2837 return true;
2838 }
2839
2840 // Default, generate it as a register.
2841 AM.BaseType = X86ISelAddressMode::RegBase;
2842 AM.Base_Reg = N;
2843 return false;
2844}
2845
2846bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N,
2847 X86ISelAddressMode &AM,
2848 unsigned Depth) {
2849 SDLoc dl(N);
2850 LLVM_DEBUG({
2851 dbgs() << "MatchVectorAddress: ";
2852 AM.dump(CurDAG);
2853 });
2854 // Limit recursion.
2856 return matchAddressBase(N, AM);
2857
2858 // TODO: Support other operations.
2859 switch (N.getOpcode()) {
2860 case ISD::Constant: {
2861 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2862 if (!foldOffsetIntoAddress(Val, AM))
2863 return false;
2864 break;
2865 }
2866 case X86ISD::Wrapper:
2867 if (!matchWrapper(N, AM))
2868 return false;
2869 break;
2870 case ISD::ADD: {
2871 // Add an artificial use to this node so that we can keep track of
2872 // it if it gets CSE'd with a different node.
2873 HandleSDNode Handle(N);
2874
2875 X86ISelAddressMode Backup = AM;
2876 if (!matchVectorAddressRecursively(N.getOperand(0), AM, Depth + 1) &&
2877 !matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2878 Depth + 1))
2879 return false;
2880 AM = Backup;
2881
2882 // Try again after commuting the operands.
2883 if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2884 Depth + 1) &&
2885 !matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
2886 Depth + 1))
2887 return false;
2888 AM = Backup;
2889
2890 N = Handle.getValue();
2891 break;
2892 }
2893 }
2894
2895 return matchAddressBase(N, AM);
2896}
2897
2898/// Helper for selectVectorAddr. Handles things that can be folded into a
2899/// gather/scatter address. The index register and scale should have already
2900/// been handled.
2901bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
2902 return matchVectorAddressRecursively(N, AM, 0);
2903}
2904
2905bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
2906 SDValue IndexOp, SDValue ScaleOp,
2907 SDValue &Base, SDValue &Scale,
2908 SDValue &Index, SDValue &Disp,
2909 SDValue &Segment) {
2910 X86ISelAddressMode AM;
2911 AM.Scale = ScaleOp->getAsZExtVal();
2912
2913 // Attempt to match index patterns, as long as we're not relying on implicit
2914 // sign-extension, which is performed BEFORE scale.
2915 if (IndexOp.getScalarValueSizeInBits() == BasePtr.getScalarValueSizeInBits())
2916 AM.IndexReg = matchIndexRecursively(IndexOp, AM, 0);
2917 else
2918 AM.IndexReg = IndexOp;
2919
2920 unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace();
2921 if (AddrSpace == X86AS::GS)
2922 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2923 if (AddrSpace == X86AS::FS)
2924 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2925 if (AddrSpace == X86AS::SS)
2926 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2927
2928 SDLoc DL(BasePtr);
2929 MVT VT = BasePtr.getSimpleValueType();
2930
2931 // Try to match into the base and displacement fields.
2932 if (matchVectorAddress(BasePtr, AM))
2933 return false;
2934
2935 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2936 return true;
2937}
2938
2939/// Returns true if it is able to pattern match an addressing mode.
2940/// It returns the operands which make up the maximal addressing mode it can
2941/// match by reference.
2942///
2943/// Parent is the parent node of the addr operand that is being matched. It
2944/// is always a load, store, atomic node, or null. It is only null when
2945/// checking memory operands for inline asm nodes.
2946bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2947 SDValue &Scale, SDValue &Index,
2948 SDValue &Disp, SDValue &Segment) {
2949 X86ISelAddressMode AM;
2950
2951 if (Parent &&
2952 // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2953 // that are not a MemSDNode, and thus don't have proper addrspace info.
2954 Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2955 Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2956 Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2957 Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
2958 Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
2959 Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2960 Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2961 unsigned AddrSpace =
2962 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2963 if (AddrSpace == X86AS::GS)
2964 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2965 if (AddrSpace == X86AS::FS)
2966 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2967 if (AddrSpace == X86AS::SS)
2968 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2969 }
2970
2971 // Save the DL and VT before calling matchAddress, it can invalidate N.
2972 SDLoc DL(N);
2973 MVT VT = N.getSimpleValueType();
2974
2975 if (matchAddress(N, AM))
2976 return false;
2977
2978 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2979 return true;
2980}
2981
2982bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2983 // Cannot use 32 bit constants to reference objects in kernel/large code
2984 // model.
2985 if (TM.getCodeModel() == CodeModel::Kernel ||
2986 TM.getCodeModel() == CodeModel::Large)
2987 return false;
2988
2989 // In static codegen with small code model, we can get the address of a label
2990 // into a register with 'movl'
2991 if (N->getOpcode() != X86ISD::Wrapper)
2992 return false;
2993
2994 N = N.getOperand(0);
2995
2996 // At least GNU as does not accept 'movl' for TPOFF relocations.
2997 // FIXME: We could use 'movl' when we know we are targeting MC.
2998 if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
2999 return false;
3000
3001 Imm = N;
3002 // Small/medium code model can reference non-TargetGlobalAddress objects with
3003 // 32 bit constants.
3004 if (N->getOpcode() != ISD::TargetGlobalAddress) {
3005 return TM.getCodeModel() == CodeModel::Small ||
3006 TM.getCodeModel() == CodeModel::Medium;
3007 }
3008
3009 const GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
3010 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
3011 return CR->getUnsignedMax().ult(1ull << 32);
3012
3013 return !TM.isLargeGlobalValue(GV);
3014}
3015
3016bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
3017 SDValue &Scale, SDValue &Index,
3018 SDValue &Disp, SDValue &Segment) {
3019 // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
3020 SDLoc DL(N);
3021
3022 if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
3023 return false;
3024
3025 auto *RN = dyn_cast<RegisterSDNode>(Base);
3026 if (RN && RN->getReg() == 0)
3027 Base = CurDAG->getRegister(0, MVT::i64);
3028 else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
3029 // Base could already be %rip, particularly in the x32 ABI.
3030 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
3031 MVT::i64), 0);
3032 Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
3033 Base);
3034 }
3035
3036 RN = dyn_cast<RegisterSDNode>(Index);
3037 if (RN && RN->getReg() == 0)
3038 Index = CurDAG->getRegister(0, MVT::i64);
3039 else {
3040 assert(Index.getValueType() == MVT::i32 &&
3041 "Expect to be extending 32-bit registers for use in LEA");
3042 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
3043 MVT::i64), 0);
3044 Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
3045 Index);
3046 }
3047
3048 return true;
3049}
3050
3051/// Calls SelectAddr and determines if the maximal addressing
3052/// mode it matches can be cost effectively emitted as an LEA instruction.
3053bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
3054 SDValue &Base, SDValue &Scale,
3055 SDValue &Index, SDValue &Disp,
3056 SDValue &Segment) {
3057 X86ISelAddressMode AM;
3058
3059 // Save the DL and VT before calling matchAddress, it can invalidate N.
3060 SDLoc DL(N);
3061 MVT VT = N.getSimpleValueType();
3062
3063 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
3064 // segments.
3065 SDValue Copy = AM.Segment;
3066 SDValue T = CurDAG->getRegister(0, MVT::i32);
3067 AM.Segment = T;
3068 if (matchAddress(N, AM))
3069 return false;
3070 assert (T == AM.Segment);
3071 AM.Segment = Copy;
3072
3073 unsigned Complexity = 0;
3074 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
3075 Complexity = 1;
3076 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
3077 Complexity = 4;
3078
3079 if (AM.IndexReg.getNode())
3080 Complexity++;
3081
3082 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
3083 // a simple shift.
3084 if (AM.Scale > 1)
3085 Complexity++;
3086
3087 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
3088 // to a LEA. This is determined with some experimentation but is by no means
3089 // optimal (especially for code size consideration). LEA is nice because of
3090 // its three-address nature. Tweak the cost function again when we can run
3091 // convertToThreeAddress() at register allocation time.
3092 if (AM.hasSymbolicDisplacement()) {
3093 // For X86-64, always use LEA to materialize RIP-relative addresses.
3094 if (Subtarget->is64Bit())
3095 Complexity = 4;
3096 else
3097 Complexity += 2;
3098 }
3099
3100 // Heuristic: try harder to form an LEA from ADD if the operands set flags.
3101 // Unlike ADD, LEA does not affect flags, so we will be less likely to require
3102 // duplicating flag-producing instructions later in the pipeline.
3103 if (N.getOpcode() == ISD::ADD) {
3104 auto isMathWithFlags = [](SDValue V) {
3105 switch (V.getOpcode()) {
3106 case X86ISD::ADD:
3107 case X86ISD::SUB:
3108 case X86ISD::ADC:
3109 case X86ISD::SBB:
3110 case X86ISD::SMUL:
3111 case X86ISD::UMUL:
3112 /* TODO: These opcodes can be added safely, but we may want to justify
3113 their inclusion for different reasons (better for reg-alloc).
3114 case X86ISD::OR:
3115 case X86ISD::XOR:
3116 case X86ISD::AND:
3117 */
3118 // Value 1 is the flag output of the node - verify it's not dead.
3119 return !SDValue(V.getNode(), 1).use_empty();
3120 default:
3121 return false;
3122 }
3123 };
3124 // TODO: We might want to factor in whether there's a load folding
3125 // opportunity for the math op that disappears with LEA.
3126 if (isMathWithFlags(N.getOperand(0)) || isMathWithFlags(N.getOperand(1)))
3127 Complexity++;
3128 }
3129
3130 if (AM.Disp)
3131 Complexity++;
3132
3133 // If it isn't worth using an LEA, reject it.
3134 if (Complexity <= 2)
3135 return false;
3136
3137 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
3138 return true;
3139}
3140
3141/// This is only run on TargetGlobalTLSAddress nodes.
3142bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
3143 SDValue &Scale, SDValue &Index,
3144 SDValue &Disp, SDValue &Segment) {
3145 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress ||
3146 N.getOpcode() == ISD::TargetExternalSymbol);
3147
3148 X86ISelAddressMode AM;
3149 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N)) {
3150 AM.GV = GA->getGlobal();
3151 AM.Disp += GA->getOffset();
3152 AM.SymbolFlags = GA->getTargetFlags();
3153 } else {
3154 auto *SA = cast<ExternalSymbolSDNode>(N);
3155 AM.ES = SA->getSymbol();
3156 AM.SymbolFlags = SA->getTargetFlags();
3157 }
3158
3159 if (Subtarget->is32Bit()) {
3160 AM.Scale = 1;
3161 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
3162 }
3163
3164 MVT VT = N.getSimpleValueType();
3165 getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
3166 return true;
3167}
3168
3169bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
3170 // Keep track of the original value type and whether this value was
3171 // truncated. If we see a truncation from pointer type to VT that truncates
3172 // bits that are known to be zero, we can use a narrow reference.
3173 EVT VT = N.getValueType();
3174 bool WasTruncated = false;
3175 if (N.getOpcode() == ISD::TRUNCATE) {
3176 WasTruncated = true;
3177 N = N.getOperand(0);
3178 }
3179
3180 if (N.getOpcode() != X86ISD::Wrapper)
3181 return false;
3182
3183 // We can only use non-GlobalValues as immediates if they were not truncated,
3184 // as we do not have any range information. If we have a GlobalValue and the
3185 // address was not truncated, we can select it as an operand directly.
3186 unsigned Opc = N.getOperand(0)->getOpcode();
3187 if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
3188 Op = N.getOperand(0);
3189 // We can only select the operand directly if we didn't have to look past a
3190 // truncate.
3191 return !WasTruncated;
3192 }
3193
3194 // Check that the global's range fits into VT.
3195 auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
3196 std::optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
3197 if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
3198 return false;
3199
3200 // Okay, we can use a narrow reference.
3201 Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
3202 GA->getOffset(), GA->getTargetFlags());
3203 return true;
3204}
3205
3206bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
3207 SDValue &Base, SDValue &Scale,
3208 SDValue &Index, SDValue &Disp,
3209 SDValue &Segment) {
3210 assert(Root && P && "Unknown root/parent nodes");
3211 if (!ISD::isNON_EXTLoad(N.getNode()) ||
3212 !IsProfitableToFold(N, P, Root) ||
3213 !IsLegalToFold(N, P, Root, OptLevel))
3214 return false;
3215
3216 return selectAddr(N.getNode(),
3217 N.getOperand(1), Base, Scale, Index, Disp, Segment);
3218}
3219
3220bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
3221 SDValue &Base, SDValue &Scale,
3222 SDValue &Index, SDValue &Disp,
3223 SDValue &Segment) {
3224 assert(Root && P && "Unknown root/parent nodes");
3225 if (N->getOpcode() != X86ISD::VBROADCAST_LOAD ||
3226 !IsProfitableToFold(N, P, Root) ||
3227 !IsLegalToFold(N, P, Root, OptLevel))
3228 return false;
3229
3230 return selectAddr(N.getNode(),
3231 N.getOperand(1), Base, Scale, Index, Disp, Segment);
3232}
3233
3234/// Return an SDNode that returns the value of the global base register.
3235/// Output instructions required to initialize the global base register,
3236/// if necessary.
3237SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
3238 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
3239 auto &DL = MF->getDataLayout();
3240 return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
3241}
3242
3243bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
3244 if (N->getOpcode() == ISD::TRUNCATE)
3245 N = N->getOperand(0).getNode();
3246 if (N->getOpcode() != X86ISD::Wrapper)
3247 return false;
3248
3249 auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
3250 if (!GA)
3251 return false;
3252
3253 auto *GV = GA->getGlobal();
3254 std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange();
3255 if (CR)
3256 return CR->getSignedMin().sge(-1ull << Width) &&
3257 CR->getSignedMax().slt(1ull << Width);
3258 // In the kernel code model, globals are in the negative 2GB of the address
3259 // space, so globals can be a sign extended 32-bit immediate.
3260 // In other code models, small globals are in the low 2GB of the address
3261 // space, so sign extending them is equivalent to zero extending them.
3262 return Width == 32 && !TM.isLargeGlobalValue(GV);
3263}
3264
3265X86::CondCode X86DAGToDAGISel::getCondFromNode(SDNode *N) const {
3266 assert(N->isMachineOpcode() && "Unexpected node");
3267 unsigned Opc = N->getMachineOpcode();
3268 const MCInstrDesc &MCID = getInstrInfo()->get(Opc);
3269 int CondNo = X86::getCondSrcNoFromDesc(MCID);
3270 if (CondNo < 0)
3271 return X86::COND_INVALID;
3272
3273 return static_cast<X86::CondCode>(N->getConstantOperandVal(CondNo));
3274}
3275
3276/// Test whether the given X86ISD::CMP node has any users that use a flag
3277/// other than ZF.
3278bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
3279 // Examine each user of the node.
3280 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
3281 UI != UE; ++UI) {
3282 // Only check things that use the flags.
3283 if (UI.getUse().getResNo() != Flags.getResNo())
3284 continue;
3285 // Only examine CopyToReg uses that copy to EFLAGS.
3286 if (UI->getOpcode() != ISD::CopyToReg ||
3287 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3288 return false;
3289 // Examine each user of the CopyToReg use.
3290 for (SDNode::use_iterator FlagUI = UI->use_begin(),
3291 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
3292 // Only examine the Flag result.
3293 if (FlagUI.getUse().getResNo() != 1) continue;
3294 // Anything unusual: assume conservatively.
3295 if (!FlagUI->isMachineOpcode()) return false;
3296 // Examine the condition code of the user.
3297 X86::CondCode CC = getCondFromNode(*FlagUI);
3298
3299 switch (CC) {
3300 // Comparisons which only use the zero flag.
3301 case X86::COND_E: case X86::COND_NE:
3302 continue;
3303 // Anything else: assume conservatively.
3304 default:
3305 return false;
3306 }
3307 }
3308 }
3309 return true;
3310}
3311
3312/// Test whether the given X86ISD::CMP node has any uses which require the SF
3313/// flag to be accurate.
3314bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
3315 // Examine each user of the node.
3316 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
3317 UI != UE; ++UI) {
3318 // Only check things that use the flags.
3319 if (UI.getUse().getResNo() != Flags.getResNo())
3320 continue;
3321 // Only examine CopyToReg uses that copy to EFLAGS.
3322 if (UI->getOpcode() != ISD::CopyToReg ||
3323 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3324 return false;
3325 // Examine each user of the CopyToReg use.
3326 for (SDNode::use_iterator FlagUI = UI->use_begin(),
3327 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
3328 // Only examine the Flag result.
3329 if (FlagUI.getUse().getResNo() != 1) continue;
3330 // Anything unusual: assume conservatively.
3331 if (!FlagUI->isMachineOpcode()) return false;
3332 // Examine the condition code of the user.
3333 X86::CondCode CC = getCondFromNode(*FlagUI);
3334
3335 switch (CC) {
3336 // Comparisons which don't examine the SF flag.
3337 case X86::COND_A: case X86::COND_AE:
3338 case X86::COND_B: case X86::COND_BE:
3339 case X86::COND_E: case X86::COND_NE:
3340 case X86::COND_O: case X86::COND_NO:
3341 case X86::COND_P: case X86::COND_NP:
3342 continue;
3343 // Anything else: assume conservatively.
3344 default:
3345 return false;
3346 }
3347 }
3348 }
3349 return true;
3350}
3351
3353 switch (CC) {
3354 // Comparisons which don't examine the CF flag.
3355 case X86::COND_O: case X86::COND_NO:
3356 case X86::COND_E: case X86::COND_NE:
3357 case X86::COND_S: case X86::COND_NS:
3358 case X86::COND_P: case X86::COND_NP:
3359 case X86::COND_L: case X86::COND_GE:
3360 case X86::COND_G: case X86::COND_LE:
3361 return false;
3362 // Anything else: assume conservatively.
3363 default:
3364 return true;
3365 }
3366}
3367
3368/// Test whether the given node which sets flags has any uses which require the
3369/// CF flag to be accurate.
3370 bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
3371 // Examine each user of the node.
3372 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
3373 UI != UE; ++UI) {
3374 // Only check things that use the flags.
3375 if (UI.getUse().getResNo() != Flags.getResNo())
3376 continue;
3377
3378 unsigned UIOpc = UI->getOpcode();
3379
3380 if (UIOpc == ISD::CopyToReg) {
3381 // Only examine CopyToReg uses that copy to EFLAGS.
3382 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3383 return false;
3384 // Examine each user of the CopyToReg use.
3385 for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
3386 FlagUI != FlagUE; ++FlagUI) {
3387 // Only examine the Flag result.
3388 if (FlagUI.getUse().getResNo() != 1)
3389 continue;
3390 // Anything unusual: assume conservatively.
3391 if (!FlagUI->isMachineOpcode())
3392 return false;
3393 // Examine the condition code of the user.
3394 X86::CondCode CC = getCondFromNode(*FlagUI);
3395
3396 if (mayUseCarryFlag(CC))
3397 return false;
3398 }
3399
3400 // This CopyToReg is ok. Move on to the next user.
3401 continue;
3402 }
3403
3404 // This might be an unselected node. So look for the pre-isel opcodes that
3405 // use flags.
3406 unsigned CCOpNo;
3407 switch (UIOpc) {
3408 default:
3409 // Something unusual. Be conservative.
3410 return false;
3411 case X86ISD::SETCC: CCOpNo = 0; break;
3412 case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
3413 case X86ISD::CMOV: CCOpNo = 2; break;
3414 case X86ISD::BRCOND: CCOpNo = 2; break;
3415 }
3416
3417 X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
3418 if (mayUseCarryFlag(CC))
3419 return false;
3420 }
3421 return true;
3422}
3423
3424/// Check whether or not the chain ending in StoreNode is suitable for doing
3425/// the {load; op; store} to modify transformation.
3427 SDValue StoredVal, SelectionDAG *CurDAG,
3428 unsigned LoadOpNo,
3429 LoadSDNode *&LoadNode,
3430 SDValue &InputChain) {
3431 // Is the stored value result 0 of the operation?
3432 if (StoredVal.getResNo() != 0) return false;
3433
3434 // Are there other uses of the operation other than the store?
3435 if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
3436
3437 // Is the store non-extending and non-indexed?
3438 if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
3439 return false;
3440
3441 SDValue Load = StoredVal->getOperand(LoadOpNo);
3442 // Is the stored value a non-extending and non-indexed load?
3443 if (!ISD::isNormalLoad(Load.getNode())) return false;
3444
3445 // Return LoadNode by reference.
3446 LoadNode = cast<LoadSDNode>(Load);
3447
3448 // Is store the only read of the loaded value?
3449 if (!Load.hasOneUse())
3450 return false;
3451
3452 // Is the address of the store the same as the load?
3453 if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
3454 LoadNode->getOffset() != StoreNode->getOffset())
3455 return false;
3456
3457 bool FoundLoad = false;
3458 SmallVector<SDValue, 4> ChainOps;
3459 SmallVector<const SDNode *, 4> LoopWorklist;
3461 const unsigned int Max = 1024;
3462
3463 // Visualization of Load-Op-Store fusion:
3464 // -------------------------
3465 // Legend:
3466 // *-lines = Chain operand dependencies.
3467 // |-lines = Normal operand dependencies.
3468 // Dependencies flow down and right. n-suffix references multiple nodes.
3469 //
3470 // C Xn C
3471 // * * *
3472 // * * *
3473 // Xn A-LD Yn TF Yn
3474 // * * \ | * |
3475 // * * \ | * |
3476 // * * \ | => A--LD_OP_ST
3477 // * * \| \
3478 // TF OP \
3479 // * | \ Zn
3480 // * | \
3481 // A-ST Zn
3482 //
3483
3484 // This merge induced dependences from: #1: Xn -> LD, OP, Zn
3485 // #2: Yn -> LD
3486 // #3: ST -> Zn
3487
3488 // Ensure the transform is safe by checking for the dual
3489 // dependencies to make sure we do not induce a loop.
3490
3491 // As LD is a predecessor to both OP and ST we can do this by checking:
3492 // a). if LD is a predecessor to a member of Xn or Yn.
3493 // b). if a Zn is a predecessor to ST.
3494
3495 // However, (b) can only occur through being a chain predecessor to
3496 // ST, which is the same as Zn being a member or predecessor of Xn,
3497 // which is a subset of LD being a predecessor of Xn. So it's
3498 // subsumed by check (a).
3499
3500 SDValue Chain = StoreNode->getChain();
3501
3502 // Gather X elements in ChainOps.
3503 if (Chain == Load.getValue(1)) {
3504 FoundLoad = true;
3505 ChainOps.push_back(Load.getOperand(0));
3506 } else if (Chain.getOpcode() == ISD::TokenFactor) {
3507 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
3508 SDValue Op = Chain.getOperand(i);
3509 if (Op == Load.getValue(1)) {
3510 FoundLoad = true;
3511 // Drop Load, but keep its chain. No cycle check necessary.
3512 ChainOps.push_back(Load.getOperand(0));
3513 continue;
3514 }
3515 LoopWorklist.push_back(Op.getNode());
3516 ChainOps.push_back(Op);
3517 }
3518 }
3519
3520 if (!FoundLoad)
3521 return false;
3522
3523 // Worklist is currently Xn. Add Yn to worklist.
3524 for (SDValue Op : StoredVal->ops())
3525 if (Op.getNode() != LoadNode)
3526 LoopWorklist.push_back(Op.getNode());
3527
3528 // Check (a) if Load is a predecessor to Xn + Yn
3529 if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
3530 true))
3531 return false;
3532
3533 InputChain =
3534 CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
3535 return true;
3536}
3537
3538// Change a chain of {load; op; store} of the same value into a simple op
3539// through memory of that value, if the uses of the modified value and its
3540// address are suitable.
3541//
3542// The tablegen pattern memory operand pattern is currently not able to match
3543// the case where the EFLAGS on the original operation are used.
3544//
3545// To move this to tablegen, we'll need to improve tablegen to allow flags to
3546// be transferred from a node in the pattern to the result node, probably with
3547// a new keyword. For example, we have this
3548// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
3549// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
3550// (implicit EFLAGS)]>;
3551// but maybe need something like this
3552// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
3553// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
3554// (transferrable EFLAGS)]>;
3555//
3556// Until then, we manually fold these and instruction select the operation
3557// here.
3558bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
3559 auto *StoreNode = cast<StoreSDNode>(Node);
3560 SDValue StoredVal = StoreNode->getOperand(1);
3561 unsigned Opc = StoredVal->getOpcode();
3562
3563 // Before we try to select anything, make sure this is memory operand size
3564 // and opcode we can handle. Note that this must match the code below that
3565 // actually lowers the opcodes.
3566 EVT MemVT = StoreNode->getMemoryVT();
3567 if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
3568 MemVT != MVT::i8)
3569 return false;
3570
3571 bool IsCommutable = false;
3572 bool IsNegate = false;
3573 switch (Opc) {
3574 default:
3575 return false;
3576 case X86ISD::SUB:
3577 IsNegate = isNullConstant(StoredVal.getOperand(0));
3578 break;
3579 case X86ISD::SBB:
3580 break;
3581 case X86ISD::ADD:
3582 case X86ISD::ADC:
3583 case X86ISD::AND:
3584 case X86ISD::OR:
3585 case X86ISD::XOR:
3586 IsCommutable = true;
3587 break;
3588 }
3589
3590 unsigned LoadOpNo = IsNegate ? 1 : 0;
3591 LoadSDNode *LoadNode = nullptr;
3592 SDValue InputChain;
3593 if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
3594 LoadNode, InputChain)) {
3595 if (!IsCommutable)
3596 return false;
3597
3598 // This operation is commutable, try the other operand.
3599 LoadOpNo = 1;
3600 if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
3601 LoadNode, InputChain))
3602 return false;
3603 }
3604
3605 SDValue Base, Scale, Index, Disp, Segment;
3606 if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
3607 Segment))
3608 return false;
3609
3610 auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
3611 unsigned Opc8) {
3612 switch (MemVT.getSimpleVT().SimpleTy) {
3613 case MVT::i64:
3614 return Opc64;
3615 case MVT::i32:
3616 return Opc32;
3617 case MVT::i16:
3618 return Opc16;
3619 case MVT::i8:
3620 return Opc8;
3621 default:
3622 llvm_unreachable("Invalid size!");
3623 }
3624 };
3625
3627 switch (Opc) {
3628 case X86ISD::SUB:
3629 // Handle negate.
3630 if (IsNegate) {
3631 unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
3632 X86::NEG8m);
3633 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3634 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3635 MVT::Other, Ops);
3636 break;
3637 }
3638 [[fallthrough]];
3639 case X86ISD::ADD:
3640 // Try to match inc/dec.
3641 if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
3642 bool IsOne = isOneConstant(StoredVal.getOperand(1));
3643 bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
3644 // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
3645 if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
3646 unsigned NewOpc =
3647 ((Opc == X86ISD::ADD) == IsOne)
3648 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3649 : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
3650 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3651 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3652 MVT::Other, Ops);
3653 break;
3654 }
3655 }
3656 [[fallthrough]];
3657 case X86ISD::ADC:
3658 case X86ISD::SBB:
3659 case X86ISD::AND:
3660 case X86ISD::OR:
3661 case X86ISD::XOR: {
3662 auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
3663 switch (Opc) {
3664 case X86ISD::ADD:
3665 return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3666 X86::ADD8mr);
3667 case X86ISD::ADC:
3668 return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3669 X86::ADC8mr);
3670 case X86ISD::SUB:
3671 return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3672 X86::SUB8mr);
3673 case X86ISD::SBB:
3674 return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3675 X86::SBB8mr);
3676 case X86ISD::AND:
3677 return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3678 X86::AND8mr);
3679 case X86ISD::OR:
3680 return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3681 case X86ISD::XOR:
3682 return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3683 X86::XOR8mr);
3684 default:
3685 llvm_unreachable("Invalid opcode!");
3686 }
3687 };
3688 auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
3689 switch (Opc) {
3690 case X86ISD::ADD:
3691 return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3692 X86::ADD8mi);
3693 case X86ISD::ADC:
3694 return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3695 X86::ADC8mi);
3696 case X86ISD::SUB:
3697 return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3698 X86::SUB8mi);
3699 case X86ISD::SBB:
3700 return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3701 X86::SBB8mi);
3702 case X86ISD::AND:
3703 return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3704 X86::AND8mi);
3705 case X86ISD::OR:
3706 return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3707 X86::OR8mi);
3708 case X86ISD::XOR:
3709 return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3710 X86::XOR8mi);
3711 default:
3712 llvm_unreachable("Invalid opcode!");
3713 }
3714 };
3715
3716 unsigned NewOpc = SelectRegOpcode(Opc);
3717 SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
3718
3719 // See if the operand is a constant that we can fold into an immediate
3720 // operand.
3721 if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3722 int64_t OperandV = OperandC->getSExtValue();
3723
3724 // Check if we can shrink the operand enough to fit in an immediate (or
3725 // fit into a smaller immediate) by negating it and switching the
3726 // operation.
3727 if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
3728 ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3729 (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3730 isInt<32>(-OperandV))) &&
3731 hasNoCarryFlagUses(StoredVal.getValue(1))) {
3732 OperandV = -OperandV;
3733 Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
3734 }
3735
3736 if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3737 Operand = CurDAG->getSignedConstant(OperandV, SDLoc(Node), MemVT,
3738 /*isTarget=*/true);
3739 NewOpc = SelectImmOpcode(Opc);
3740 }
3741 }
3742
3743 if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
3744 SDValue CopyTo =
3745 CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
3746 StoredVal.getOperand(2), SDValue());
3747
3748 const SDValue Ops[] = {Base, Scale, Index, Disp,
3749 Segment, Operand, CopyTo, CopyTo.getValue(1)};
3750 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3751 Ops);
3752 } else {
3753 const SDValue Ops[] = {Base, Scale, Index, Disp,
3754 Segment, Operand, InputChain};
3755 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3756 Ops);
3757 }
3758 break;
3759 }
3760 default:
3761 llvm_unreachable("Invalid opcode!");
3762 }
3763
3764 MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
3765 LoadNode->getMemOperand()};
3766 CurDAG->setNodeMemRefs(Result, MemOps);
3767
3768 // Update Load Chain uses as well.
3769 ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
3770 ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
3771 ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
3772 CurDAG->RemoveDeadNode(Node);
3773 return true;
3774}
3775
3776// See if this is an X & Mask that we can match to BEXTR/BZHI.
3777// Where Mask is one of the following patterns:
3778// a) x & (1 << nbits) - 1
3779// b) x & ~(-1 << nbits)
3780// c) x & (-1 >> (32 - y))
3781// d) x << (32 - y) >> (32 - y)
3782// e) (1 << nbits) - 1
3783bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
3784 assert(
3785 (Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::AND ||
3786 Node->getOpcode() == ISD::SRL) &&
3787 "Should be either an and-mask, or right-shift after clearing high bits.");
3788
3789 // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
3790 if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3791 return false;
3792
3793 MVT NVT = Node->getSimpleValueType(0);
3794
3795 // Only supported for 32 and 64 bits.
3796 if (NVT != MVT::i32 && NVT != MVT::i64)
3797 return false;
3798
3799 SDValue NBits;
3800 bool NegateNBits;
3801
3802 // If we have BMI2's BZHI, we are ok with muti-use patterns.
3803 // Else, if we only have BMI1's BEXTR, we require one-use.
3804 const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
3805 auto checkUses = [AllowExtraUsesByDefault](
3806 SDValue Op, unsigned NUses,
3807 std::optional<bool> AllowExtraUses) {
3808 return AllowExtraUses.value_or(AllowExtraUsesByDefault) ||
3809 Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
3810 };
3811 auto checkOneUse = [checkUses](SDValue Op,
3812 std::optional<bool> AllowExtraUses =
3813 std::nullopt) {
3814 return checkUses(Op, 1, AllowExtraUses);
3815 };
3816 auto checkTwoUse = [checkUses](SDValue Op,
3817 std::optional<bool> AllowExtraUses =
3818 std::nullopt) {
3819 return checkUses(Op, 2, AllowExtraUses);
3820 };
3821
3822 auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
3823 if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
3824 assert(V.getSimpleValueType() == MVT::i32 &&
3825 V.getOperand(0).getSimpleValueType() == MVT::i64 &&
3826 "Expected i64 -> i32 truncation");
3827 V = V.getOperand(0);
3828 }
3829 return V;
3830 };
3831
3832 // a) x & ((1 << nbits) + (-1))
3833 auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits,
3834 &NegateNBits](SDValue Mask) -> bool {
3835 // Match `add`. Must only have one use!
3836 if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
3837 return false;
3838 // We should be adding all-ones constant (i.e. subtracting one.)
3839 if (!isAllOnesConstant(Mask->getOperand(1)))
3840 return false;
3841 // Match `1 << nbits`. Might be truncated. Must only have one use!
3842 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3843 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3844 return false;
3845 if (!isOneConstant(M0->getOperand(0)))
3846 return false;
3847 NBits = M0->getOperand(1);
3848 NegateNBits = false;
3849 return true;
3850 };
3851
3852 auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
3853 V = peekThroughOneUseTruncation(V);
3854 return CurDAG->MaskedValueIsAllOnes(
3855 V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
3856 NVT.getSizeInBits()));
3857 };
3858
3859 // b) x & ~(-1 << nbits)
3860 auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3861 &NBits, &NegateNBits](SDValue Mask) -> bool {
3862 // Match `~()`. Must only have one use!
3863 if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
3864 return false;
3865 // The -1 only has to be all-ones for the final Node's NVT.
3866 if (!isAllOnes(Mask->getOperand(1)))
3867 return false;
3868 // Match `-1 << nbits`. Might be truncated. Must only have one use!
3869 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3870 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3871 return false;
3872 // The -1 only has to be all-ones for the final Node's NVT.
3873 if (!isAllOnes(M0->getOperand(0)))
3874 return false;
3875 NBits = M0->getOperand(1);
3876 NegateNBits = false;
3877 return true;
3878 };
3879
3880 // Try to match potentially-truncated shift amount as `(bitwidth - y)`,
3881 // or leave the shift amount as-is, but then we'll have to negate it.
3882 auto canonicalizeShiftAmt = [&NBits, &NegateNBits](SDValue ShiftAmt,
3883 unsigned Bitwidth) {
3884 NBits = ShiftAmt;
3885 NegateNBits = true;
3886 // Skip over a truncate of the shift amount, if any.
3887 if (NBits.getOpcode() == ISD::TRUNCATE)
3888 NBits = NBits.getOperand(0);
3889 // Try to match the shift amount as (bitwidth - y). It should go away, too.
3890 // If it doesn't match, that's fine, we'll just negate it ourselves.
3891 if (NBits.getOpcode() != ISD::SUB)
3892 return;
3893 auto *V0 = dyn_cast<ConstantSDNode>(NBits.getOperand(0));
3894 if (!V0 || V0->getZExtValue() != Bitwidth)
3895 return;
3896 NBits = NBits.getOperand(1);
3897 NegateNBits = false;
3898 };
3899
3900 // c) x & (-1 >> z) but then we'll have to subtract z from bitwidth
3901 // or
3902 // c) x & (-1 >> (32 - y))
3903 auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits,
3904 canonicalizeShiftAmt](SDValue Mask) -> bool {
3905 // The mask itself may be truncated.
3906 Mask = peekThroughOneUseTruncation(Mask);
3907 unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
3908 // Match `l>>`. Must only have one use!
3909 if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3910 return false;
3911 // We should be shifting truly all-ones constant.
3912 if (!isAllOnesConstant(Mask.getOperand(0)))
3913 return false;
3914 SDValue M1 = Mask.getOperand(1);
3915 // The shift amount should not be used externally.
3916 if (!checkOneUse(M1))
3917 return false;
3918 canonicalizeShiftAmt(M1, Bitwidth);
3919 // Pattern c. is non-canonical, and is expanded into pattern d. iff there
3920 // is no extra use of the mask. Clearly, there was one since we are here.
3921 // But at the same time, if we need to negate the shift amount,
3922 // then we don't want the mask to stick around, else it's unprofitable.
3923 return !NegateNBits;
3924 };
3925
3926 SDValue X;
3927
3928 // d) x << z >> z but then we'll have to subtract z from bitwidth
3929 // or
3930 // d) x << (32 - y) >> (32 - y)
3931 auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt,
3932 AllowExtraUsesByDefault, &NegateNBits,
3933 &X](SDNode *Node) -> bool {
3934 if (Node->getOpcode() != ISD::SRL)
3935 return false;
3936 SDValue N0 = Node->getOperand(0);
3937 if (N0->getOpcode() != ISD::SHL)
3938 return false;
3939 unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
3940 SDValue N1 = Node->getOperand(1);
3941 SDValue N01 = N0->getOperand(1);
3942 // Both of the shifts must be by the exact same value.
3943 if (N1 != N01)
3944 return false;
3945 canonicalizeShiftAmt(N1, Bitwidth);
3946 // There should not be any external uses of the inner shift / shift amount.
3947 // Note that while we are generally okay with external uses given BMI2,
3948 // iff we need to negate the shift amount, we are not okay with extra uses.
3949 const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits;
3950 if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses))
3951 return false;
3952 X = N0->getOperand(0);
3953 return true;
3954 };
3955
3956 auto matchLowBitMask = [matchPatternA, matchPatternB,
3957 matchPatternC](SDValue Mask) -> bool {
3958 return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3959 };
3960
3961 if (Node->getOpcode() == ISD::AND) {
3962 X = Node->getOperand(0);
3963 SDValue Mask = Node->getOperand(1);
3964
3965 if (matchLowBitMask(Mask)) {
3966 // Great.
3967 } else {
3968 std::swap(X, Mask);
3969 if (!matchLowBitMask(Mask))
3970 return false;
3971 }
3972 } else if (matchLowBitMask(SDValue(Node, 0))) {
3973 X = CurDAG->getAllOnesConstant(SDLoc(Node), NVT);
3974 } else if (!matchPatternD(Node))
3975 return false;
3976
3977 // If we need to negate the shift amount, require BMI2 BZHI support.
3978 // It's just too unprofitable for BMI1 BEXTR.
3979 if (NegateNBits && !Subtarget->hasBMI2())
3980 return false;
3981
3982 SDLoc DL(Node);
3983
3984 // Truncate the shift amount.
3985 NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3986 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3987
3988 // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3989 // All the other bits are undefined, we do not care about them.
3990 SDValue ImplDef = SDValue(
3991 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3992 insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
3993
3994 SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
3995 insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
3996 NBits = SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
3997 MVT::i32, ImplDef, NBits, SRIdxVal),
3998 0);
3999 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
4000
4001 // We might have matched the amount of high bits to be cleared,
4002 // but we want the amount of low bits to be kept, so negate it then.
4003 if (NegateNBits) {
4004 SDValue BitWidthC = CurDAG->getConstant(NVT.getSizeInBits(), DL, MVT::i32);
4005 insertDAGNode(*CurDAG, SDValue(Node, 0), BitWidthC);
4006
4007 NBits = CurDAG->getNode(ISD::SUB, DL, MVT::i32, BitWidthC, NBits);
4008 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
4009 }
4010
4011 if (Subtarget->hasBMI2()) {
4012 // Great, just emit the BZHI..
4013 if (NVT != MVT::i32) {
4014 // But have to place the bit count into the wide-enough register first.
4015 NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
4016 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
4017 }
4018
4019 SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
4020 ReplaceNode(Node, Extract.getNode());
4021 SelectCode(Extract.getNode());
4022 return true;
4023 }
4024
4025 // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
4026 // *logically* shifted (potentially with one-use trunc inbetween),
4027 // and the truncation was the only use of the shift,
4028 // and if so look past one-use truncation.
4029 {
4030 SDValue RealX = peekThroughOneUseTruncation(X);
4031 // FIXME: only if the shift is one-use?
4032 if (RealX != X && RealX.getOpcode() == ISD::SRL)
4033 X = RealX;
4034 }
4035
4036 MVT XVT = X.getSimpleValueType();
4037
4038 // Else, emitting BEXTR requires one more step.
4039 // The 'control' of BEXTR has the pattern of:
4040 // [15...8 bit][ 7...0 bit] location
4041 // [ bit count][ shift] name
4042 // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
4043
4044 // Shift NBits left by 8 bits, thus producing 'control'.
4045 // This makes the low 8 bits to be zero.
4046 SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
4047 insertDAGNode(*CurDAG, SDValue(Node, 0), C8);
4048 SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
4049 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
4050
4051 // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
4052 // FIXME: only if the shift is one-use?
4053 if (X.getOpcode() == ISD::SRL) {
4054 SDValue ShiftAmt = X.getOperand(1);
4055 X = X.getOperand(0);
4056
4057 assert(ShiftAmt.getValueType() == MVT::i8 &&
4058 "Expected shift amount to be i8");
4059
4060 // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
4061 // We could zext to i16 in some form, but we intentionally don't do that.
4062 SDValue OrigShiftAmt = ShiftAmt;
4063 ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
4064 insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
4065
4066 // And now 'or' these low 8 bits of shift amount into the 'control'.
4067 Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
4068 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
4069 }
4070
4071 // But have to place the 'control' into the wide-enough register first.
4072 if (XVT != MVT::i32) {
4073 Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
4074 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
4075 }
4076
4077 // And finally, form the BEXTR itself.
4078 SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
4079
4080 // The 'X' was originally truncated. Do that now.
4081 if (XVT != NVT) {
4082 insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
4083 Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
4084 }
4085
4086 ReplaceNode(Node, Extract.getNode());
4087 SelectCode(Extract.getNode());
4088
4089 return true;
4090}
4091
4092// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
4093MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
4094 MVT NVT = Node->getSimpleValueType(0);
4095 SDLoc dl(Node);
4096
4097 SDValue N0 = Node->getOperand(0);
4098 SDValue N1 = Node->getOperand(1);
4099
4100 // If we have TBM we can use an immediate for the control. If we have BMI
4101 // we should only do this if the BEXTR instruction is implemented well.
4102 // Otherwise moving the control into a register makes this more costly.
4103 // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
4104 // hoisting the move immediate would make it worthwhile with a less optimal
4105 // BEXTR?
4106 bool PreferBEXTR =
4107 Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
4108 if (!PreferBEXTR && !Subtarget->hasBMI2())
4109 return nullptr;
4110
4111 // Must have a shift right.
4112 if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
4113 return nullptr;
4114
4115 // Shift can't have additional users.
4116 if (!N0->hasOneUse())
4117 return nullptr;
4118
4119 // Only supported for 32 and 64 bits.
4120 if (NVT != MVT::i32 && NVT != MVT::i64)
4121 return nullptr;
4122
4123 // Shift amount and RHS of and must be constant.
4124 auto *MaskCst = dyn_cast<ConstantSDNode>(N1);
4125 auto *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
4126 if (!MaskCst || !ShiftCst)
4127 return nullptr;
4128
4129 // And RHS must be a mask.
4130 uint64_t Mask = MaskCst->getZExtValue();
4131 if (!isMask_64(Mask))
4132 return nullptr;
4133
4134 uint64_t Shift = ShiftCst->getZExtValue();
4135 uint64_t MaskSize = llvm::popcount(Mask);
4136
4137 // Don't interfere with something that can be handled by extracting AH.
4138 // TODO: If we are able to fold a load, BEXTR might still be better than AH.
4139 if (Shift == 8 && MaskSize == 8)
4140 return nullptr;
4141
4142 // Make sure we are only using bits that were in the original value, not
4143 // shifted in.
4144 if (Shift + MaskSize > NVT.getSizeInBits())
4145 return nullptr;
4146
4147 // BZHI, if available, is always fast, unlike BEXTR. But even if we decide
4148 // that we can't use BEXTR, it is only worthwhile using BZHI if the mask
4149 // does not fit into 32 bits. Load folding is not a sufficient reason.
4150 if (!PreferBEXTR && MaskSize <= 32)
4151 return nullptr;
4152
4153 SDValue Control;
4154 unsigned ROpc, MOpc;
4155
4156#define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC)
4157 if (!PreferBEXTR) {
4158 assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
4159 // If we can't make use of BEXTR then we can't fuse shift+mask stages.
4160 // Let's perform the mask first, and apply shift later. Note that we need to
4161 // widen the mask to account for the fact that we'll apply shift afterwards!
4162 Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
4163 ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rr)
4164 : GET_EGPR_IF_ENABLED(X86::BZHI32rr);
4165 MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rm)
4166 : GET_EGPR_IF_ENABLED(X86::BZHI32rm);
4167 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
4168 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4169 } else {
4170 // The 'control' of BEXTR has the pattern of:
4171 // [15...8 bit][ 7...0 bit] location
4172 // [ bit count][ shift] name
4173 // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
4174 Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
4175 if (Subtarget->hasTBM()) {
4176 ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
4177 MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
4178 } else {
4179 assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
4180 // BMI requires the immediate to placed in a register.
4181 ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rr)
4182 : GET_EGPR_IF_ENABLED(X86::BEXTR32rr);
4183 MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rm)
4184 : GET_EGPR_IF_ENABLED(X86::BEXTR32rm);
4185 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
4186 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4187 }
4188 }
4189
4190 MachineSDNode *NewNode;
4191 SDValue Input = N0->getOperand(0);
4192 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4193 if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4194 SDValue Ops[] = {
4195 Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)};
4196 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4197 NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4198 // Update the chain.
4199 ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
4200 // Record the mem-refs
4201 CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
4202 } else {
4203 NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control);
4204 }
4205
4206 if (!PreferBEXTR) {
4207 // We still need to apply the shift.
4208 SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
4209 unsigned NewOpc = NVT == MVT::i64 ? GET_ND_IF_ENABLED(X86::SHR64ri)
4210 : GET_ND_IF_ENABLED(X86::SHR32ri);
4211 NewNode =
4212 CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt);
4213 }
4214
4215 return NewNode;
4216}
4217
4218// Emit a PCMISTR(I/M) instruction.
4219MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
4220 bool MayFoldLoad, const SDLoc &dl,
4221 MVT VT, SDNode *Node) {
4222 SDValue N0 = Node->getOperand(0);
4223 SDValue N1 = Node->getOperand(1);
4224 SDValue Imm = Node->getOperand(2);
4225 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4226 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
4227
4228 // Try to fold a load. No need to check alignment.
4229 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4230 if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4231 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4232 N1.getOperand(0) };
4233 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
4234 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4235 // Update the chain.
4236 ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
4237 // Record the mem-refs
4238 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4239 return CNode;
4240 }
4241
4242 SDValue Ops[] = { N0, N1, Imm };
4243 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
4244 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4245 return CNode;
4246}
4247
4248// Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
4249// to emit a second instruction after this one. This is needed since we have two
4250// copyToReg nodes glued before this and we need to continue that glue through.
4251MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
4252 bool MayFoldLoad, const SDLoc &dl,
4253 MVT VT, SDNode *Node,
4254 SDValue &InGlue) {
4255 SDValue N0 = Node->getOperand(0);
4256 SDValue N2 = Node->getOperand(2);
4257 SDValue Imm = Node->getOperand(4);
4258 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4259 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
4260
4261 // Try to fold a load. No need to check alignment.
4262 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4263 if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4264 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4265 N2.getOperand(0), InGlue };
4266 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
4267 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4268 InGlue = SDValue(CNode, 3);
4269 // Update the chain.
4270 ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
4271 // Record the mem-refs
4272 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
4273 return CNode;
4274 }
4275
4276 SDValue Ops[] = { N0, N2, Imm, InGlue };
4277 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
4278 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4279 InGlue = SDValue(CNode, 2);
4280 return CNode;
4281}
4282
4283bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4284 EVT VT = N->getValueType(0);
4285
4286 // Only handle scalar shifts.
4287 if (VT.isVector())
4288 return false;
4289
4290 // Narrower shifts only mask to 5 bits in hardware.
4291 unsigned Size = VT == MVT::i64 ? 64 : 32;
4292
4293 SDValue OrigShiftAmt = N->getOperand(1);
4294 SDValue ShiftAmt = OrigShiftAmt;
4295 SDLoc DL(N);
4296
4297 // Skip over a truncate of the shift amount.
4298 if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
4299 ShiftAmt = ShiftAmt->getOperand(0);
4300
4301 // This function is called after X86DAGToDAGISel::matchBitExtract(),
4302 // so we are not afraid that we might mess up BZHI/BEXTR pattern.
4303
4304 SDValue NewShiftAmt;
4305 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB ||
4306 ShiftAmt->getOpcode() == ISD::XOR) {
4307 SDValue Add0 = ShiftAmt->getOperand(0);
4308 SDValue Add1 = ShiftAmt->getOperand(1);
4309 auto *Add0C = dyn_cast<ConstantSDNode>(Add0);
4310 auto *Add1C = dyn_cast<ConstantSDNode>(Add1);
4311 // If we are shifting by X+/-/^N where N == 0 mod Size, then just shift by X
4312 // to avoid the ADD/SUB/XOR.
4313 if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) {
4314 NewShiftAmt = Add0;
4315
4316 } else if (ShiftAmt->getOpcode() != ISD::ADD && ShiftAmt.hasOneUse() &&
4317 ((Add0C && Add0C->getAPIntValue().urem(Size) == Size - 1) ||
4318 (Add1C && Add1C->getAPIntValue().urem(Size) == Size - 1))) {
4319 // If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X
4320 // we can replace it with a NOT. In the XOR case it may save some code
4321 // size, in the SUB case it also may save a move.
4322 assert(Add0C == nullptr || Add1C == nullptr);
4323
4324 // We can only do N-X, not X-N
4325 if (ShiftAmt->getOpcode() == ISD::SUB && Add0C == nullptr)
4326 return false;
4327
4328 EVT OpVT = ShiftAmt.getValueType();
4329
4330 SDValue AllOnes = CurDAG->getAllOnesConstant(DL, OpVT);
4331 NewShiftAmt = CurDAG->getNode(ISD::XOR, DL, OpVT,
4332 Add0C == nullptr ? Add0 : Add1, AllOnes);
4333 insertDAGNode(*CurDAG, OrigShiftAmt, AllOnes);
4334 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
4335 // If we are shifting by N-X where N == 0 mod Size, then just shift by
4336 // -X to generate a NEG instead of a SUB of a constant.
4337 } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C &&
4338 Add0C->getZExtValue() != 0) {
4339 EVT SubVT = ShiftAmt.getValueType();
4340 SDValue X;
4341 if (Add0C->getZExtValue() % Size == 0)
4342 X = Add1;
4343 else if (ShiftAmt.hasOneUse() && Size == 64 &&
4344 Add0C->getZExtValue() % 32 == 0) {
4345 // We have a 64-bit shift by (n*32-x), turn it into -(x+n*32).
4346 // This is mainly beneficial if we already compute (x+n*32).
4347 if (Add1.getOpcode() == ISD::TRUNCATE) {
4348 Add1 = Add1.getOperand(0);
4349 SubVT = Add1.getValueType();
4350 }
4351 if (Add0.getValueType() != SubVT) {
4352 Add0 = CurDAG->getZExtOrTrunc(Add0, DL, SubVT);
4353 insertDAGNode(*CurDAG, OrigShiftAmt, Add0);
4354 }
4355
4356 X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1, Add0);
4357 insertDAGNode(*CurDAG, OrigShiftAmt, X);
4358 } else
4359 return false;
4360 // Insert a negate op.
4361 // TODO: This isn't guaranteed to replace the sub if there is a logic cone
4362 // that uses it that's not a shift.
4363 SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
4364 SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X);
4365 NewShiftAmt = Neg;
4366
4367 // Insert these operands into a valid topological order so they can
4368 // get selected independently.
4369 insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
4370 insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
4371 } else
4372 return false;
4373 } else
4374 return false;
4375
4376 if (NewShiftAmt.getValueType() != MVT::i8) {
4377 // Need to truncate the shift amount.
4378 NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
4379 // Add to a correct topological ordering.
4380 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
4381 }
4382
4383 // Insert a new mask to keep the shift amount legal. This should be removed
4384 // by isel patterns.
4385 NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
4386 CurDAG->getConstant(Size - 1, DL, MVT::i8));
4387 // Place in a correct topological ordering.
4388 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
4389
4390 SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
4391 NewShiftAmt);
4392 if (UpdatedNode != N) {
4393 // If we found an existing node, we should replace ourselves with that node
4394 // and wait for it to be selected after its other users.
4395 ReplaceNode(N, UpdatedNode);
4396 return true;
4397 }
4398
4399 // If the original shift amount is now dead, delete it so that we don't run
4400 // it through isel.
4401 if (OrigShiftAmt.getNode()->use_empty())
4402 CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
4403
4404 // Now that we've optimized the shift amount, defer to normal isel to get
4405 // load folding and legacy vs BMI2 selection without repeating it here.
4406 SelectCode(N);
4407 return true;
4408}
4409
4410bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
4411 MVT NVT = N->getSimpleValueType(0);
4412 unsigned Opcode = N->getOpcode();
4413 SDLoc dl(N);
4414
4415 // For operations of the form (x << C1) op C2, check if we can use a smaller
4416 // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
4417 SDValue Shift = N->getOperand(0);
4418 SDValue N1 = N->getOperand(1);
4419
4420 auto *Cst = dyn_cast<ConstantSDNode>(N1);
4421 if (!Cst)
4422 return false;
4423
4424 int64_t Val = Cst->getSExtValue();
4425
4426 // If we have an any_extend feeding the AND, look through it to see if there
4427 // is a shift behind it. But only if the AND doesn't use the extended bits.
4428 // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
4429 bool FoundAnyExtend = false;
4430 if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
4431 Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
4432 isUInt<32>(Val)) {
4433 FoundAnyExtend = true;
4434 Shift = Shift.getOperand(0);
4435 }
4436
4437 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
4438 return false;
4439
4440 // i8 is unshrinkable, i16 should be promoted to i32.
4441 if (NVT != MVT::i32 && NVT != MVT::i64)
4442 return false;
4443
4444 auto *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
4445 if (!ShlCst)
4446 return false;
4447
4448 uint64_t ShAmt = ShlCst->getZExtValue();
4449
4450 // Make sure that we don't change the operation by removing bits.
4451 // This only matters for OR and XOR, AND is unaffected.
4452 uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
4453 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
4454 return false;
4455
4456 // Check the minimum bitwidth for the new constant.
4457 // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
4458 auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
4459 if (Opcode == ISD::AND) {
4460 // AND32ri is the same as AND64ri32 with zext imm.
4461 // Try this before sign extended immediates below.
4462 ShiftedVal = (uint64_t)Val >> ShAmt;
4463 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4464 return true;
4465 // Also swap order when the AND can become MOVZX.
4466 if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
4467 return true;
4468 }
4469 ShiftedVal = Val >> ShAmt;
4470 if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
4471 (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
4472 return true;
4473 if (Opcode != ISD::AND) {
4474 // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
4475 ShiftedVal = (uint64_t)Val >> ShAmt;
4476 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4477 return true;
4478 }
4479 return false;
4480 };
4481
4482 int64_t ShiftedVal;
4483 if (!CanShrinkImmediate(ShiftedVal))
4484 return false;
4485
4486 // Ok, we can reorder to get a smaller immediate.
4487
4488 // But, its possible the original immediate allowed an AND to become MOVZX.
4489 // Doing this late due to avoid the MakedValueIsZero call as late as
4490 // possible.
4491 if (Opcode == ISD::AND) {
4492 // Find the smallest zext this could possibly be.
4493 unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
4494 ZExtWidth = llvm::bit_ceil(std::max(ZExtWidth, 8U));
4495
4496 // Figure out which bits need to be zero to achieve that mask.
4497 APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
4498 ZExtWidth);
4499 NeededMask &= ~Cst->getAPIntValue();
4500
4501 if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
4502 return false;
4503 }
4504
4505 SDValue X = Shift.getOperand(0);
4506 if (FoundAnyExtend) {
4507 SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
4508 insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
4509 X = NewX;
4510 }
4511
4512 SDValue NewCst = CurDAG->getSignedConstant(ShiftedVal, dl, NVT);
4513 insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
4514 SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
4515 insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
4516 SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
4517 Shift.getOperand(1));
4518 ReplaceNode(N, NewSHL.getNode());
4519 SelectCode(NewSHL.getNode());
4520 return true;
4521}
4522
4523bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
4524 SDNode *ParentB, SDNode *ParentC,
4526 uint8_t Imm) {
4527 assert(A.isOperandOf(ParentA) && B.isOperandOf(ParentB) &&
4528 C.isOperandOf(ParentC) && "Incorrect parent node");
4529
4530 auto tryFoldLoadOrBCast =
4531 [this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale,
4532 SDValue &Index, SDValue &Disp, SDValue &Segment) {
4533 if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment))
4534 return true;
4535
4536 // Not a load, check for broadcast which may be behind a bitcast.
4537 if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) {
4538 P = L.getNode();
4539 L = L.getOperand(0);
4540 }
4541
4542 if (L.getOpcode() != X86ISD::VBROADCAST_LOAD)
4543 return false;
4544
4545 // Only 32 and 64 bit broadcasts are supported.
4546 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4547 unsigned Size = MemIntr->getMemoryVT().getSizeInBits();
4548 if (Size != 32 && Size != 64)
4549 return false;
4550
4551 return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment);
4552 };
4553
4554 bool FoldedLoad = false;
4555 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4556 if (tryFoldLoadOrBCast(Root, ParentC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4557 FoldedLoad = true;
4558 } else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3,
4559 Tmp4)) {
4560 FoldedLoad = true;
4561 std::swap(A, C);
4562 // Swap bits 1/4 and 3/6.
4563 uint8_t OldImm = Imm;
4564 Imm = OldImm & 0xa5;
4565 if (OldImm & 0x02) Imm |= 0x10;
4566 if (OldImm & 0x10) Imm |= 0x02;
4567 if (OldImm & 0x08) Imm |= 0x40;
4568 if (OldImm & 0x40) Imm |= 0x08;
4569 } else if (tryFoldLoadOrBCast(Root, ParentB, B, Tmp0, Tmp1, Tmp2, Tmp3,
4570 Tmp4)) {
4571 FoldedLoad = true;
4572 std::swap(B, C);
4573 // Swap bits 1/2 and 5/6.
4574 uint8_t OldImm = Imm;
4575 Imm = OldImm & 0x99;
4576 if (OldImm & 0x02) Imm |= 0x04;
4577 if (OldImm & 0x04) Imm |= 0x02;
4578 if (OldImm & 0x20) Imm |= 0x40;
4579 if (OldImm & 0x40) Imm |= 0x20;
4580 }
4581
4582 SDLoc DL(Root);
4583
4584 SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8);
4585
4586 MVT NVT = Root->getSimpleValueType(0);
4587
4588 MachineSDNode *MNode;
4589 if (FoldedLoad) {
4590 SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
4591
4592 unsigned Opc;
4593 if (C.getOpcode() == X86ISD::VBROADCAST_LOAD) {
4594 auto *MemIntr = cast<MemIntrinsicSDNode>(C);
4595 unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits();
4596 assert((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!");
4597
4598 bool UseD = EltSize == 32;
4599 if (NVT.is128BitVector())
4600 Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi;
4601 else if (NVT.is256BitVector())
4602 Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi;
4603 else if (NVT.is512BitVector())
4604 Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi;
4605 else
4606 llvm_unreachable("Unexpected vector size!");
4607 } else {
4608 bool UseD = NVT.getVectorElementType() == MVT::i32;
4609 if (NVT.is128BitVector())
4610 Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi;
4611 else if (NVT.is256BitVector())
4612 Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi;
4613 else if (NVT.is512BitVector())
4614 Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi;
4615 else
4616 llvm_unreachable("Unexpected vector size!");
4617 }
4618
4619 SDValue Ops[] = {A, B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm, C.getOperand(0)};
4620 MNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
4621
4622 // Update the chain.
4623 ReplaceUses(C.getValue(1), SDValue(MNode, 1));
4624 // Record the mem-refs
4625 CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(C)->getMemOperand()});
4626 } else {
4627 bool UseD = NVT.getVectorElementType() == MVT::i32;
4628 unsigned Opc;
4629 if (NVT.is128BitVector())
4630 Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri;
4631 else if (NVT.is256BitVector())
4632 Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri;
4633 else if (NVT.is512BitVector())
4634 Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri;
4635 else
4636 llvm_unreachable("Unexpected vector size!");
4637
4638 MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm});
4639 }
4640
4641 ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0));
4642 CurDAG->RemoveDeadNode(Root);
4643 return true;
4644}
4645
4646// Try to match two logic ops to a VPTERNLOG.
4647// FIXME: Handle more complex patterns that use an operand more than once?
4648bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
4649 MVT NVT = N->getSimpleValueType(0);
4650
4651 // Make sure we support VPTERNLOG.
4652 if (!NVT.isVector() || !Subtarget->hasAVX512() ||
4653 NVT.getVectorElementType() == MVT::i1)
4654 return false;
4655
4656 // We need VLX for 128/256-bit.
4657 if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
4658 return false;
4659
4660 SDValue N0 = N->getOperand(0);
4661 SDValue N1 = N->getOperand(1);
4662
4663 auto getFoldableLogicOp = [](SDValue Op) {
4664 // Peek through single use bitcast.
4665 if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
4666 Op = Op.getOperand(0);
4667
4668 if (!Op.hasOneUse())
4669 return SDValue();
4670
4671 unsigned Opc = Op.getOpcode();
4672 if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
4673 Opc == X86ISD::ANDNP)
4674 return Op;
4675
4676 return SDValue();
4677 };
4678
4679 SDValue A, FoldableOp;
4680 if ((FoldableOp = getFoldableLogicOp(N1))) {
4681 A = N0;
4682 } else if ((FoldableOp = getFoldableLogicOp(N0))) {
4683 A = N1;
4684 } else
4685 return false;
4686
4687 SDValue B = FoldableOp.getOperand(0);
4688 SDValue C = FoldableOp.getOperand(1);
4689 SDNode *ParentA = N;
4690 SDNode *ParentB = FoldableOp.getNode();
4691 SDNode *ParentC = FoldableOp.getNode();
4692
4693 // We can build the appropriate control immediate by performing the logic
4694 // operation we're matching using these constants for A, B, and C.
4695 uint8_t TernlogMagicA = 0xf0;
4696 uint8_t TernlogMagicB = 0xcc;
4697 uint8_t TernlogMagicC = 0xaa;
4698
4699 // Some of the inputs may be inverted, peek through them and invert the
4700 // magic values accordingly.
4701 // TODO: There may be a bitcast before the xor that we should peek through.
4702 auto PeekThroughNot = [](SDValue &Op, SDNode *&Parent, uint8_t &Magic) {
4703 if (Op.getOpcode() == ISD::XOR && Op.hasOneUse() &&
4704 ISD::isBuildVectorAllOnes(Op.getOperand(1).getNode())) {
4705 Magic = ~Magic;
4706 Parent = Op.getNode();
4707 Op = Op.getOperand(0);
4708 }
4709 };
4710
4711 PeekThroughNot(A, ParentA, TernlogMagicA);
4712 PeekThroughNot(B, ParentB, TernlogMagicB);
4713 PeekThroughNot(C, ParentC, TernlogMagicC);
4714
4715 uint8_t Imm;
4716 switch (FoldableOp.getOpcode()) {
4717 default: llvm_unreachable("Unexpected opcode!");
4718 case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break;
4719 case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break;
4720 case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break;
4721 case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break;
4722 }
4723
4724 switch (N->getOpcode()) {
4725 default: llvm_unreachable("Unexpected opcode!");
4726 case X86ISD::ANDNP:
4727 if (A == N0)
4728 Imm &= ~TernlogMagicA;
4729 else
4730 Imm = ~(Imm) & TernlogMagicA;
4731 break;
4732 case ISD::AND: Imm &= TernlogMagicA; break;
4733 case ISD::OR: Imm |= TernlogMagicA; break;
4734 case ISD::XOR: Imm ^= TernlogMagicA; break;
4735 }
4736
4737 return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm);
4738}
4739
4740/// If the high bits of an 'and' operand are known zero, try setting the
4741/// high bits of an 'and' constant operand to produce a smaller encoding by
4742/// creating a small, sign-extended negative immediate rather than a large
4743/// positive one. This reverses a transform in SimplifyDemandedBits that
4744/// shrinks mask constants by clearing bits. There is also a possibility that
4745/// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
4746/// case, just replace the 'and'. Return 'true' if the node is replaced.
4747bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
4748 // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
4749 // have immediate operands.
4750 MVT VT = And->getSimpleValueType(0);
4751 if (VT != MVT::i32 && VT != MVT::i64)
4752 return false;
4753
4754 auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
4755 if (!And1C)
4756 return false;
4757
4758 // Bail out if the mask constant is already negative. It's can't shrink more.
4759 // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
4760 // patterns to use a 32-bit and instead of a 64-bit and by relying on the
4761 // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
4762 // are negative too.
4763 APInt MaskVal = And1C->getAPIntValue();
4764 unsigned MaskLZ = MaskVal.countl_zero();
4765 if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
4766 return false;
4767
4768 // Don't extend into the upper 32 bits of a 64 bit mask.
4769 if (VT == MVT::i64 && MaskLZ >= 32) {
4770 MaskLZ -= 32;
4771 MaskVal = MaskVal.trunc(32);
4772 }
4773
4774 SDValue And0 = And->getOperand(0);
4775 APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
4776 APInt NegMaskVal = MaskVal | HighZeros;
4777
4778 // If a negative constant would not allow a smaller encoding, there's no need
4779 // to continue. Only change the constant when we know it's a win.
4780 unsigned MinWidth = NegMaskVal.getSignificantBits();
4781 if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getSignificantBits() <= 32))
4782 return false;
4783
4784 // Extend masks if we truncated above.
4785 if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
4786 NegMaskVal = NegMaskVal.zext(64);
4787 HighZeros = HighZeros.zext(64);
4788 }
4789
4790 // The variable operand must be all zeros in the top bits to allow using the
4791 // new, negative constant as the mask.
4792 if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
4793 return false;
4794
4795 // Check if the mask is -1. In that case, this is an unnecessary instruction
4796 // that escaped earlier analysis.
4797 if (NegMaskVal.isAllOnes()) {
4798 ReplaceNode(And, And0.getNode());
4799 return true;
4800 }
4801
4802 // A negative mask allows a smaller encoding. Create a new 'and' node.
4803 SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
4804 insertDAGNode(*CurDAG, SDValue(And, 0), NewMask);
4805 SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
4806 ReplaceNode(And, NewAnd.getNode());
4807 SelectCode(NewAnd.getNode());
4808 return true;
4809}
4810
4811static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
4812 bool FoldedBCast, bool Masked) {
4813#define VPTESTM_CASE(VT, SUFFIX) \
4814case MVT::VT: \
4815 if (Masked) \
4816 return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \
4817 return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX;
4818
4819
4820#define VPTESTM_BROADCAST_CASES(SUFFIX) \
4821default: llvm_unreachable("Unexpected VT!"); \
4822VPTESTM_CASE(v4i32, DZ128##SUFFIX) \
4823VPTESTM_CASE(v2i64, QZ128##SUFFIX) \
4824VPTESTM_CASE(v8i32, DZ256##SUFFIX) \
4825VPTESTM_CASE(v4i64, QZ256##SUFFIX) \
4826VPTESTM_CASE(v16i32, DZ##SUFFIX) \
4827VPTESTM_CASE(v8i64, QZ##SUFFIX)
4828
4829#define VPTESTM_FULL_CASES(SUFFIX) \
4830VPTESTM_BROADCAST_CASES(SUFFIX) \
4831VPTESTM_CASE(v16i8, BZ128##SUFFIX) \
4832VPTESTM_CASE(v8i16, WZ128##SUFFIX) \
4833VPTESTM_CASE(v32i8, BZ256##SUFFIX) \
4834VPTESTM_CASE(v16i16, WZ256##SUFFIX) \
4835VPTESTM_CASE(v64i8, BZ##SUFFIX) \
4836VPTESTM_CASE(v32i16, WZ##SUFFIX)
4837
4838 if (FoldedBCast) {
4839 switch (TestVT.SimpleTy) {
4841 }
4842 }
4843
4844 if (FoldedLoad) {
4845 switch (TestVT.SimpleTy) {
4847 }
4848 }
4849
4850 switch (TestVT.SimpleTy) {
4852 }
4853
4854#undef VPTESTM_FULL_CASES
4855#undef VPTESTM_BROADCAST_CASES
4856#undef VPTESTM_CASE
4857}
4858
4859// Try to create VPTESTM instruction. If InMask is not null, it will be used
4860// to form a masked operation.
4861bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
4862 SDValue InMask) {
4863 assert(Subtarget->hasAVX512() && "Expected AVX512!");
4864 assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&
4865 "Unexpected VT!");
4866
4867 // Look for equal and not equal compares.
4868 ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
4869 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4870 return false;
4871
4872 SDValue SetccOp0 = Setcc.getOperand(0);
4873 SDValue SetccOp1 = Setcc.getOperand(1);
4874
4875 // Canonicalize the all zero vector to the RHS.
4876 if (ISD::isBuildVectorAllZeros(SetccOp0.getNode()))
4877 std::swap(SetccOp0, SetccOp1);
4878
4879 // See if we're comparing against zero.
4880 if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode()))
4881 return false;
4882
4883 SDValue N0 = SetccOp0;
4884
4885 MVT CmpVT = N0.getSimpleValueType();
4886 MVT CmpSVT = CmpVT.getVectorElementType();
4887
4888 // Start with both operands the same. We'll try to refine this.
4889 SDValue Src0 = N0;
4890 SDValue Src1 = N0;
4891
4892 {
4893 // Look through single use bitcasts.
4894 SDValue N0Temp = N0;
4895 if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
4896 N0Temp = N0.getOperand(0);
4897
4898 // Look for single use AND.
4899 if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
4900 Src0 = N0Temp.getOperand(0);
4901 Src1 = N0Temp.getOperand(1);
4902 }
4903 }
4904
4905 // Without VLX we need to widen the operation.
4906 bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
4907
4908 auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L,
4909 SDValue &Base, SDValue &Scale, SDValue &Index,
4910 SDValue &Disp, SDValue &Segment) {
4911 // If we need to widen, we can't fold the load.
4912 if (!Widen)
4913 if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment))
4914 return true;
4915
4916 // If we didn't fold a load, try to match broadcast. No widening limitation
4917 // for this. But only 32 and 64 bit types are supported.
4918 if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64)
4919 return false;
4920
4921 // Look through single use bitcasts.
4922 if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) {
4923 P = L.getNode();
4924 L = L.getOperand(0);
4925 }
4926
4927 if (L.getOpcode() != X86ISD::VBROADCAST_LOAD)
4928 return false;
4929
4930 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4931 if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits())
4932 return false;
4933
4934 return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment);
4935 };
4936
4937 // We can only fold loads if the sources are unique.
4938 bool CanFoldLoads = Src0 != Src1;
4939
4940 bool FoldedLoad = false;
4941 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4942 if (CanFoldLoads) {
4943 FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2,
4944 Tmp3, Tmp4);
4945 if (!FoldedLoad) {
4946 // And is commutative.
4947 FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1,
4948 Tmp2, Tmp3, Tmp4);
4949 if (FoldedLoad)
4950 std::swap(Src0, Src1);
4951 }
4952 }
4953
4954 bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD;
4955
4956 bool IsMasked = InMask.getNode() != nullptr;
4957
4958 SDLoc dl(Root);
4959
4960 MVT ResVT = Setcc.getSimpleValueType();
4961 MVT MaskVT = ResVT;
4962 if (Widen) {
4963 // Widen the inputs using insert_subreg or copy_to_regclass.
4964 unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
4965 unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
4966 unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
4967 CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
4968 MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4969 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4970 CmpVT), 0);
4971 Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
4972
4973 if (!FoldedBCast)
4974 Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
4975
4976 if (IsMasked) {
4977 // Widen the mask.
4978 unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID();
4979 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4980 InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4981 dl, MaskVT, InMask, RC), 0);
4982 }
4983 }
4984
4985 bool IsTestN = CC == ISD::SETEQ;
4986 unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4987 IsMasked);
4988
4989 MachineSDNode *CNode;
4990 if (FoldedLoad) {
4991 SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
4992
4993 if (IsMasked) {
4994 SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4995 Src1.getOperand(0) };
4996 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4997 } else {
4998 SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4999 Src1.getOperand(0) };
5000 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5001 }
5002
5003 // Update the chain.
5004 ReplaceUses(Src1.getValue(1), SDValue(CNode, 1));
5005 // Record the mem-refs
5006 CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()});
5007 } else {
5008 if (IsMasked)
5009 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
5010 else
5011 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
5012 }
5013
5014 // If we widened, we need to shrink the mask VT.
5015 if (Widen) {
5016 unsigned RegClass = TLI->getRegClassFor(ResVT)->getID();
5017 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
5018 CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
5019 dl, ResVT, SDValue(CNode, 0), RC);
5020 }
5021
5022 ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
5023 CurDAG->RemoveDeadNode(Root);
5024 return true;
5025}
5026
5027// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
5028// into vpternlog.
5029bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
5030 assert(N->getOpcode() == ISD::OR && "Unexpected opcode!");
5031
5032 MVT NVT = N->getSimpleValueType(0);
5033
5034 // Make sure we support VPTERNLOG.
5035 if (!NVT.isVector() || !Subtarget->hasAVX512())
5036 return false;
5037
5038 // We need VLX for 128/256-bit.
5039 if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
5040 return false;
5041
5042 SDValue N0 = N->getOperand(0);
5043 SDValue N1 = N->getOperand(1);
5044
5045 // Canonicalize AND to LHS.
5046 if (N1.getOpcode() == ISD::AND)
5047 std::swap(N0, N1);
5048
5049 if (N0.getOpcode() != ISD::AND ||
5050 N1.getOpcode() != X86ISD::ANDNP ||
5051 !N0.hasOneUse() || !N1.hasOneUse())
5052 return false;
5053
5054 // ANDN is not commutable, use it to pick down A and C.
5055 SDValue A = N1.getOperand(0);
5056 SDValue C = N1.getOperand(1);
5057
5058 // AND is commutable, if one operand matches A, the other operand is B.
5059 // Otherwise this isn't a match.
5060 SDValue B;
5061 if (N0.getOperand(0) == A)
5062 B = N0.getOperand(1);
5063 else if (N0.getOperand(1) == A)
5064 B = N0.getOperand(0);
5065 else
5066 return false;
5067
5068 SDLoc dl(N);
5069 SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
5070 SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
5071 ReplaceNode(N, Ternlog.getNode());
5072
5073 return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(),
5074 Ternlog.getNode(), A, B, C, 0xCA);
5075}
5076
5077void X86DAGToDAGISel::Select(SDNode *Node) {
5078 MVT NVT = Node->getSimpleValueType(0);
5079 unsigned Opcode = Node->getOpcode();
5080 SDLoc dl(Node);
5081
5082 if (Node->isMachineOpcode()) {
5083 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
5084 Node->setNodeId(-1);
5085 return; // Already selected.
5086 }
5087
5088 switch (Opcode) {
5089 default: break;
5091 unsigned IntNo = Node->getConstantOperandVal(1);
5092 switch (IntNo) {
5093 default: break;
5094 case Intrinsic::x86_encodekey128:
5095 case Intrinsic::x86_encodekey256: {
5096 if (!Subtarget->hasKL())
5097 break;
5098
5099 unsigned Opcode;
5100 switch (IntNo) {
5101 default: llvm_unreachable("Impossible intrinsic");
5102 case Intrinsic::x86_encodekey128:
5103 Opcode = X86::ENCODEKEY128;
5104 break;
5105 case Intrinsic::x86_encodekey256:
5106 Opcode = X86::ENCODEKEY256;
5107 break;
5108 }
5109
5110 SDValue Chain = Node->getOperand(0);
5111 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3),
5112 SDValue());
5113 if (Opcode == X86::ENCODEKEY256)
5114 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4),
5115 Chain.getValue(1));
5116
5117 MachineSDNode *Res = CurDAG->getMachineNode(
5118 Opcode, dl, Node->getVTList(),
5119 {Node->getOperand(2), Chain, Chain.getValue(1)});
5120 ReplaceNode(Node, Res);
5121 return;
5122 }
5123 case Intrinsic::x86_tileloadd64_internal:
5124 case Intrinsic::x86_tileloaddt164_internal: {
5125 if (!Subtarget->hasAMXTILE())
5126 break;
5127 auto *MFI =
5128 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5129 MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
5130 unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal
5131 ? X86::PTILELOADDV
5132 : X86::PTILELOADDT1V;
5133 // _tile_loadd_internal(row, col, buf, STRIDE)
5134 SDValue Base = Node->getOperand(4);
5135 SDValue Scale = getI8Imm(1, dl);
5136 SDValue Index = Node->getOperand(5);
5137 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5138 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5139 SDValue Chain = Node->getOperand(0);
5140 MachineSDNode *CNode;
5141 SDValue Ops[] = {Node->getOperand(2),
5142 Node->getOperand(3),
5143 Base,
5144 Scale,
5145 Index,
5146 Disp,
5147 Segment,
5148 Chain};
5149 CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
5150 ReplaceNode(Node, CNode);
5151 return;
5152 }
5153 }
5154 break;
5155 }
5156 case ISD::INTRINSIC_VOID: {
5157 unsigned IntNo = Node->getConstantOperandVal(1);
5158 switch (IntNo) {
5159 default: break;
5160 case Intrinsic::x86_sse3_monitor:
5161 case Intrinsic::x86_monitorx:
5162 case Intrinsic::x86_clzero: {
5163 bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
5164
5165 unsigned Opc = 0;
5166 switch (IntNo) {
5167 default: llvm_unreachable("Unexpected intrinsic!");
5168 case Intrinsic::x86_sse3_monitor:
5169 if (!Subtarget->hasSSE3())
5170 break;
5171 Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
5172 break;
5173 case Intrinsic::x86_monitorx:
5174 if (!Subtarget->hasMWAITX())
5175 break;
5176 Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
5177 break;
5178 case Intrinsic::x86_clzero:
5179 if (!Subtarget->hasCLZERO())
5180 break;
5181 Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
5182 break;
5183 }
5184
5185 if (Opc) {
5186 unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
5187 SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
5188 Node->getOperand(2), SDValue());
5189 SDValue InGlue = Chain.getValue(1);
5190
5191 if (IntNo == Intrinsic::x86_sse3_monitor ||
5192 IntNo == Intrinsic::x86_monitorx) {
5193 // Copy the other two operands to ECX and EDX.
5194 Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
5195 InGlue);
5196 InGlue = Chain.getValue(1);
5197 Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
5198 InGlue);
5199 InGlue = Chain.getValue(1);
5200 }
5201
5202 MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
5203 { Chain, InGlue});
5204 ReplaceNode(Node, CNode);
5205 return;
5206 }
5207
5208 break;
5209 }
5210 case Intrinsic::x86_tilestored64_internal: {
5211 auto *MFI =
5212 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5213 MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
5214 unsigned Opc = X86::PTILESTOREDV;
5215 // _tile_stored_internal(row, col, buf, STRIDE, c)
5216 SDValue Base = Node->getOperand(4);
5217 SDValue Scale = getI8Imm(1, dl);
5218 SDValue Index = Node->getOperand(5);
5219 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5220 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5221 SDValue Chain = Node->getOperand(0);
5222 MachineSDNode *CNode;
5223 SDValue Ops[] = {Node->getOperand(2),
5224 Node->getOperand(3),
5225 Base,
5226 Scale,
5227 Index,
5228 Disp,
5229 Segment,
5230 Node->getOperand(6),
5231 Chain};
5232 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5233 ReplaceNode(Node, CNode);
5234 return;
5235 }
5236 case Intrinsic::x86_tileloadd64:
5237 case Intrinsic::x86_tileloaddt164:
5238 case Intrinsic::x86_tilestored64: {
5239 if (!Subtarget->hasAMXTILE())
5240 break;
5241 auto *MFI =
5242 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5243 MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
5244 unsigned Opc;
5245 switch (IntNo) {
5246 default: llvm_unreachable("Unexpected intrinsic!");
5247 case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD; break;
5248 case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break;
5249 case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED; break;
5250 }
5251 // FIXME: Match displacement and scale.
5252 unsigned TIndex = Node->getConstantOperandVal(2);
5253 SDValue TReg = getI8Imm(TIndex, dl);
5254 SDValue Base = Node->getOperand(3);
5255 SDValue Scale = getI8Imm(1, dl);
5256 SDValue Index = Node->getOperand(4);
5257 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5258 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5259 SDValue Chain = Node->getOperand(0);
5260 MachineSDNode *CNode;
5261 if (Opc == X86::PTILESTORED) {
5262 SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain };
5263 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5264 } else {
5265 SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain };
5266 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5267 }
5268 ReplaceNode(Node, CNode);
5269 return;
5270 }
5271 }
5272 break;
5273 }
5274 case ISD::BRIND:
5275 case X86ISD::NT_BRIND: {
5276 if (Subtarget->isTargetNaCl())
5277 // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
5278 // leave the instruction alone.
5279 break;
5280 if (Subtarget->isTarget64BitILP32()) {
5281 // Converts a 32-bit register to a 64-bit, zero-extended version of
5282 // it. This is needed because x86-64 can do many things, but jmp %r32
5283 // ain't one of them.
5284 SDValue Target = Node->getOperand(1);
5285 assert(Target.getValueType() == MVT::i32 && "Unexpected VT!");
5286 SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64);
5287 SDValue Brind = CurDAG->getNode(Opcode, dl, MVT::Other,
5288 Node->getOperand(0), ZextTarget);
5289 ReplaceNode(Node, Brind.getNode());
5290 SelectCode(ZextTarget.getNode());
5291 SelectCode(Brind.getNode());
5292 return;
5293 }
5294 break;
5295 }
5297 ReplaceNode(Node, getGlobalBaseReg());
5298 return;
5299
5300 case ISD::BITCAST:
5301 // Just drop all 128/256/512-bit bitcasts.
5302 if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
5303 NVT == MVT::f128) {
5304 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
5305 CurDAG->RemoveDeadNode(Node);
5306 return;
5307 }
5308 break;
5309
5310 case ISD::SRL:
5311 if (matchBitExtract(Node))
5312 return;
5313 [[fallthrough]];
5314 case ISD::SRA:
5315 case ISD::SHL:
5316 if (tryShiftAmountMod(Node))
5317 return;
5318 break;
5319
5320 case X86ISD::VPTERNLOG: {
5321 uint8_t Imm = Node->getConstantOperandVal(3);
5322 if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0),
5323 Node->getOperand(1), Node->getOperand(2), Imm))
5324 return;
5325 break;
5326 }
5327
5328 case X86ISD::ANDNP:
5329 if (tryVPTERNLOG(Node))
5330 return;
5331 break;
5332
5333 case ISD::AND:
5334 if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
5335 // Try to form a masked VPTESTM. Operands can be in either order.
5336 SDValue N0 = Node->getOperand(0);
5337 SDValue N1 = Node->getOperand(1);
5338 if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
5339 tryVPTESTM(Node, N0, N1))
5340 return;
5341 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
5342 tryVPTESTM(Node, N1, N0))
5343 return;
5344 }
5345
5346 if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
5347 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
5348 CurDAG->RemoveDeadNode(Node);
5349 return;
5350 }
5351 if (matchBitExtract(Node))
5352 return;
5353 if (AndImmShrink && shrinkAndImmediate(Node))
5354 return;
5355
5356 [[fallthrough]];
5357 case ISD::OR:
5358 case ISD::XOR:
5359 if (tryShrinkShlLogicImm(Node))
5360 return;
5361 if (Opcode == ISD::OR && tryMatchBitSelect(Node))
5362 return;
5363 if (tryVPTERNLOG(Node))
5364 return;
5365
5366 [[fallthrough]];
5367 case ISD::ADD:
5368 if (Opcode == ISD::ADD && matchBitExtract(Node))
5369 return;
5370 [[fallthrough]];
5371 case ISD::SUB: {
5372 // Try to avoid folding immediates with multiple uses for optsize.
5373 // This code tries to select to register form directly to avoid going
5374 // through the isel table which might fold the immediate. We can't change
5375 // the patterns on the add/sub/and/or/xor with immediate paterns in the
5376 // tablegen files to check immediate use count without making the patterns
5377 // unavailable to the fast-isel table.
5378 if (!CurDAG->shouldOptForSize())
5379 break;
5380
5381 // Only handle i8/i16/i32/i64.
5382 if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
5383 break;
5384
5385 SDValue N0 = Node->getOperand(0);
5386 SDValue N1 = Node->getOperand(1);
5387
5388 auto *Cst = dyn_cast<ConstantSDNode>(N1);
5389 if (!Cst)
5390 break;
5391
5392 int64_t Val = Cst->getSExtValue();
5393
5394 // Make sure its an immediate that is considered foldable.
5395 // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
5396 if (!isInt<8>(Val) && !isInt<32>(Val))
5397 break;
5398
5399 // If this can match to INC/DEC, let it go.
5400 if (Opcode == ISD::ADD && (Val == 1 || Val == -1))
5401 break;
5402
5403 // Check if we should avoid folding this immediate.
5404 if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
5405 break;
5406
5407 // We should not fold the immediate. So we need a register form instead.
5408 unsigned ROpc, MOpc;
5409 switch (NVT.SimpleTy) {
5410 default: llvm_unreachable("Unexpected VT!");
5411 case MVT::i8:
5412 switch (Opcode) {
5413 default: llvm_unreachable("Unexpected opcode!");
5414 case ISD::ADD:
5415 ROpc = GET_ND_IF_ENABLED(X86::ADD8rr);
5416 MOpc = GET_ND_IF_ENABLED(X86::ADD8rm);
5417 break;
5418 case ISD::SUB:
5419 ROpc = GET_ND_IF_ENABLED(X86::SUB8rr);
5420 MOpc = GET_ND_IF_ENABLED(X86::SUB8rm);
5421 break;
5422 case ISD::AND:
5423 ROpc = GET_ND_IF_ENABLED(X86::AND8rr);
5424 MOpc = GET_ND_IF_ENABLED(X86::AND8rm);
5425 break;
5426 case ISD::OR:
5427 ROpc = GET_ND_IF_ENABLED(X86::OR8rr);
5428 MOpc = GET_ND_IF_ENABLED(X86::OR8rm);
5429 break;
5430 case ISD::XOR:
5431 ROpc = GET_ND_IF_ENABLED(X86::XOR8rr);
5432 MOpc = GET_ND_IF_ENABLED(X86::XOR8rm);
5433 break;
5434 }
5435 break;
5436 case MVT::i16:
5437 switch (Opcode) {
5438 default: llvm_unreachable("Unexpected opcode!");
5439 case ISD::ADD:
5440 ROpc = GET_ND_IF_ENABLED(X86::ADD16rr);
5441 MOpc = GET_ND_IF_ENABLED(X86::ADD16rm);
5442 break;
5443 case ISD::SUB:
5444 ROpc = GET_ND_IF_ENABLED(X86::SUB16rr);
5445 MOpc = GET_ND_IF_ENABLED(X86::SUB16rm);
5446 break;
5447 case ISD::AND:
5448 ROpc = GET_ND_IF_ENABLED(X86::AND16rr);
5449 MOpc = GET_ND_IF_ENABLED(X86::AND16rm);
5450 break;
5451 case ISD::OR:
5452 ROpc = GET_ND_IF_ENABLED(X86::OR16rr);
5453 MOpc = GET_ND_IF_ENABLED(X86::OR16rm);
5454 break;
5455 case ISD::XOR:
5456 ROpc = GET_ND_IF_ENABLED(X86::XOR16rr);
5457 MOpc = GET_ND_IF_ENABLED(X86::XOR16rm);
5458 break;
5459 }
5460 break;
5461 case MVT::i32:
5462 switch (Opcode) {
5463 default: llvm_unreachable("Unexpected opcode!");
5464 case ISD::ADD:
5465 ROpc = GET_ND_IF_ENABLED(X86::ADD32rr);
5466 MOpc = GET_ND_IF_ENABLED(X86::ADD32rm);
5467 break;
5468 case ISD::SUB:
5469 ROpc = GET_ND_IF_ENABLED(X86::SUB32rr);
5470 MOpc = GET_ND_IF_ENABLED(X86::SUB32rm);
5471 break;
5472 case ISD::AND:
5473 ROpc = GET_ND_IF_ENABLED(X86::AND32rr);
5474 MOpc = GET_ND_IF_ENABLED(X86::AND32rm);
5475 break;
5476 case ISD::OR:
5477 ROpc = GET_ND_IF_ENABLED(X86::OR32rr);
5478 MOpc = GET_ND_IF_ENABLED(X86::OR32rm);
5479 break;
5480 case ISD::XOR:
5481 ROpc = GET_ND_IF_ENABLED(X86::XOR32rr);
5482 MOpc = GET_ND_IF_ENABLED(X86::XOR32rm);
5483 break;
5484 }
5485 break;
5486 case MVT::i64:
5487 switch (Opcode) {
5488 default: llvm_unreachable("Unexpected opcode!");
5489 case ISD::ADD:
5490 ROpc = GET_ND_IF_ENABLED(X86::ADD64rr);
5491 MOpc = GET_ND_IF_ENABLED(X86::ADD64rm);
5492 break;
5493 case ISD::SUB:
5494 ROpc = GET_ND_IF_ENABLED(X86::SUB64rr);
5495 MOpc = GET_ND_IF_ENABLED(X86::SUB64rm);
5496 break;
5497 case ISD::AND:
5498 ROpc = GET_ND_IF_ENABLED(X86::AND64rr);
5499 MOpc = GET_ND_IF_ENABLED(X86::AND64rm);
5500 break;
5501 case ISD::OR:
5502 ROpc = GET_ND_IF_ENABLED(X86::OR64rr);
5503 MOpc = GET_ND_IF_ENABLED(X86::OR64rm);
5504 break;
5505 case ISD::XOR:
5506 ROpc = GET_ND_IF_ENABLED(X86::XOR64rr);
5507 MOpc = GET_ND_IF_ENABLED(X86::XOR64rm);
5508 break;
5509 }
5510 break;
5511 }
5512
5513 // Ok this is a AND/OR/XOR/ADD/SUB with constant.
5514
5515 // If this is a not a subtract, we can still try to fold a load.
5516 if (Opcode != ISD::SUB) {
5517 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5518 if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
5519 SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
5520 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
5521 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5522 // Update the chain.
5523 ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
5524 // Record the mem-refs
5525 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
5526 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5527 CurDAG->RemoveDeadNode(Node);
5528 return;
5529 }
5530 }
5531
5532 CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
5533 return;
5534 }
5535
5536 case X86ISD::SMUL:
5537 // i16/i32/i64 are handled with isel patterns.
5538 if (NVT != MVT::i8)
5539 break;
5540 [[fallthrough]];
5541 case X86ISD::UMUL: {
5542 SDValue N0 = Node->getOperand(0);
5543 SDValue N1 = Node->getOperand(1);
5544
5545 unsigned LoReg, ROpc, MOpc;
5546 switch (NVT.SimpleTy) {
5547 default: llvm_unreachable("Unsupported VT!");
5548 case MVT::i8:
5549 LoReg = X86::AL;
5550 ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
5551 MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
5552 break;
5553 case MVT::i16:
5554 LoReg = X86::AX;
5555 ROpc = X86::MUL16r;
5556 MOpc = X86::MUL16m;
5557 break;
5558 case MVT::i32:
5559 LoReg = X86::EAX;
5560 ROpc = X86::MUL32r;
5561 MOpc = X86::MUL32m;
5562 break;
5563 case MVT::i64:
5564 LoReg = X86::RAX;
5565 ROpc = X86::MUL64r;
5566 MOpc = X86::MUL64m;
5567 break;
5568 }
5569
5570 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5571 bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5572 // Multiply is commutative.
5573 if (!FoldedLoad) {
5574 FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5575 if (FoldedLoad)
5576 std::swap(N0, N1);
5577 }
5578
5579 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
5580 N0, SDValue()).getValue(1);
5581
5582 MachineSDNode *CNode;
5583 if (FoldedLoad) {
5584 // i16/i32/i64 use an instruction that produces a low and high result even
5585 // though only the low result is used.
5586 SDVTList VTs;
5587 if (NVT == MVT::i8)
5588 VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
5589 else
5590 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
5591
5592 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
5593 InGlue };
5594 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5595
5596 // Update the chain.
5597 ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
5598 // Record the mem-refs
5599 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5600 } else {
5601 // i16/i32/i64 use an instruction that produces a low and high result even
5602 // though only the low result is used.
5603 SDVTList VTs;
5604 if (NVT == MVT::i8)
5605 VTs = CurDAG->getVTList(NVT, MVT::i32);
5606 else
5607 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
5608
5609 CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InGlue});
5610 }
5611
5612 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5613 ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
5614 CurDAG->RemoveDeadNode(Node);
5615 return;
5616 }
5617
5618 case ISD::SMUL_LOHI:
5619 case ISD::UMUL_LOHI: {
5620 SDValue N0 = Node->getOperand(0);
5621 SDValue N1 = Node->getOperand(1);
5622
5623 unsigned Opc, MOpc;
5624 unsigned LoReg, HiReg;
5625 bool IsSigned = Opcode == ISD::SMUL_LOHI;
5626 bool UseMULX = !IsSigned && Subtarget->hasBMI2();
5627 bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
5628 switch (NVT.SimpleTy) {
5629 default: llvm_unreachable("Unsupported VT!");
5630 case MVT::i32:
5631 Opc = UseMULXHi ? X86::MULX32Hrr
5632 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rr)
5633 : IsSigned ? X86::IMUL32r
5634 : X86::MUL32r;
5635 MOpc = UseMULXHi ? X86::MULX32Hrm
5636 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rm)
5637 : IsSigned ? X86::IMUL32m
5638 : X86::MUL32m;
5639 LoReg = UseMULX ? X86::EDX : X86::EAX;
5640 HiReg = X86::EDX;
5641 break;
5642 case MVT::i64:
5643 Opc = UseMULXHi ? X86::MULX64Hrr
5644 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rr)
5645 : IsSigned ? X86::IMUL64r
5646 : X86::MUL64r;
5647 MOpc = UseMULXHi ? X86::MULX64Hrm
5648 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rm)
5649 : IsSigned ? X86::IMUL64m
5650 : X86::MUL64m;
5651 LoReg = UseMULX ? X86::RDX : X86::RAX;
5652 HiReg = X86::RDX;
5653 break;
5654 }
5655
5656 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5657 bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5658 // Multiply is commutative.
5659 if (!foldedLoad) {
5660 foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5661 if (foldedLoad)
5662 std::swap(N0, N1);
5663 }
5664
5665 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
5666 N0, SDValue()).getValue(1);
5667 SDValue ResHi, ResLo;
5668 if (foldedLoad) {
5669 SDValue Chain;
5670 MachineSDNode *CNode = nullptr;
5671 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
5672 InGlue };
5673 if (UseMULXHi) {
5674 SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
5675 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5676 ResHi = SDValue(CNode, 0);
5677 Chain = SDValue(CNode, 1);
5678 } else if (UseMULX) {
5679 SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other);
5680 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5681 ResHi = SDValue(CNode, 0);
5682 ResLo = SDValue(CNode, 1);
5683 Chain = SDValue(CNode, 2);
5684 } else {
5685 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
5686 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5687 Chain = SDValue(CNode, 0);
5688 InGlue = SDValue(CNode, 1);
5689 }
5690
5691 // Update the chain.
5692 ReplaceUses(N1.getValue(1), Chain);
5693 // Record the mem-refs
5694 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5695 } else {
5696 SDValue Ops[] = { N1, InGlue };
5697 if (UseMULXHi) {
5698 SDVTList VTs = CurDAG->getVTList(NVT);
5699 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5700 ResHi = SDValue(CNode, 0);
5701 } else if (UseMULX) {
5702 SDVTList VTs = CurDAG->getVTList(NVT, NVT);
5703 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5704 ResHi = SDValue(CNode, 0);
5705 ResLo = SDValue(CNode, 1);
5706 } else {
5707 SDVTList VTs = CurDAG->getVTList(MVT::Glue);
5708 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5709 InGlue = SDValue(CNode, 0);
5710 }
5711 }
5712
5713 // Copy the low half of the result, if it is needed.
5714 if (!SDValue(Node, 0).use_empty()) {
5715 if (!ResLo) {
5716 assert(LoReg && "Register for low half is not defined!");
5717 ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
5718 NVT, InGlue);
5719 InGlue = ResLo.getValue(2);
5720 }
5721 ReplaceUses(SDValue(Node, 0), ResLo);
5722 LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
5723 dbgs() << '\n');
5724 }
5725 // Copy the high half of the result, if it is needed.
5726 if (!SDValue(Node, 1).use_empty()) {
5727 if (!ResHi) {
5728 assert(HiReg && "Register for high half is not defined!");
5729 ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
5730 NVT, InGlue);
5731 InGlue = ResHi.getValue(2);
5732 }
5733 ReplaceUses(SDValue(Node, 1), ResHi);
5734 LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
5735 dbgs() << '\n');
5736 }
5737
5738 CurDAG->RemoveDeadNode(Node);
5739 return;
5740 }
5741
5742 case ISD::SDIVREM:
5743 case ISD::UDIVREM: {
5744 SDValue N0 = Node->getOperand(0);
5745 SDValue N1 = Node->getOperand(1);
5746
5747 unsigned ROpc, MOpc;
5748 bool isSigned = Opcode == ISD::SDIVREM;
5749 if (!isSigned) {
5750 switch (NVT.SimpleTy) {
5751 default: llvm_unreachable("Unsupported VT!");
5752 case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break;
5753 case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break;
5754 case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break;
5755 case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break;
5756 }
5757 } else {
5758 switch (NVT.SimpleTy) {
5759 default: llvm_unreachable("Unsupported VT!");
5760 case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
5761 case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
5762 case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
5763 case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
5764 }
5765 }
5766
5767 unsigned LoReg, HiReg, ClrReg;
5768 unsigned SExtOpcode;
5769 switch (NVT.SimpleTy) {
5770 default: llvm_unreachable("Unsupported VT!");
5771 case MVT::i8:
5772 LoReg = X86::AL; ClrReg = HiReg = X86::AH;
5773 SExtOpcode = 0; // Not used.
5774 break;
5775 case MVT::i16:
5776 LoReg = X86::AX; HiReg = X86::DX;
5777 ClrReg = X86::DX;
5778 SExtOpcode = X86::CWD;
5779 break;
5780 case MVT::i32:
5781 LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
5782 SExtOpcode = X86::CDQ;
5783 break;
5784 case MVT::i64:
5785 LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
5786 SExtOpcode = X86::CQO;
5787 break;
5788 }
5789
5790 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5791 bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5792 bool signBitIsZero = CurDAG->SignBitIsZero(N0);
5793
5794 SDValue InGlue;
5795 if (NVT == MVT::i8) {
5796 // Special case for div8, just use a move with zero extension to AX to
5797 // clear the upper 8 bits (AH).
5798 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
5799 MachineSDNode *Move;
5800 if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
5801 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
5802 unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8
5803 : X86::MOVZX16rm8;
5804 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
5805 Chain = SDValue(Move, 1);
5806 ReplaceUses(N0.getValue(1), Chain);
5807 // Record the mem-refs
5808 CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
5809 } else {
5810 unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8
5811 : X86::MOVZX16rr8;
5812 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
5813 Chain = CurDAG->getEntryNode();
5814 }
5815 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
5816 SDValue());
5817 InGlue = Chain.getValue(1);
5818 } else {
5819 InGlue =
5820 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
5821 LoReg, N0, SDValue()).getValue(1);
5822 if (isSigned && !signBitIsZero) {
5823 // Sign extend the low part into the high part.
5824 InGlue =
5825 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InGlue),0);
5826 } else {
5827 // Zero out the high part, effectively zero extending the input.
5828 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
5829 SDValue ClrNode = SDValue(
5830 CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0);
5831 switch (NVT.SimpleTy) {
5832 case MVT::i16:
5833 ClrNode =
5834 SDValue(CurDAG->getMachineNode(
5835 TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
5836 CurDAG->getTargetConstant(X86::sub_16bit, dl,
5837 MVT::i32)),
5838 0);
5839 break;
5840 case MVT::i32:
5841 break;
5842 case MVT::i64:
5843 ClrNode =
5844 SDValue(CurDAG->getMachineNode(
5845 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
5846 CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
5847 CurDAG->getTargetConstant(X86::sub_32bit, dl,
5848 MVT::i32)),
5849 0);
5850 break;
5851 default:
5852 llvm_unreachable("Unexpected division source");
5853 }
5854
5855 InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
5856 ClrNode, InGlue).getValue(1);
5857 }
5858 }
5859
5860 if (foldedLoad) {
5861 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
5862 InGlue };
5863 MachineSDNode *CNode =
5864 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
5865 InGlue = SDValue(CNode, 1);
5866 // Update the chain.
5867 ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
5868 // Record the mem-refs
5869 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5870 } else {
5871 InGlue =
5872 SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InGlue), 0);
5873 }
5874
5875 // Prevent use of AH in a REX instruction by explicitly copying it to
5876 // an ABCD_L register.
5877 //
5878 // The current assumption of the register allocator is that isel
5879 // won't generate explicit references to the GR8_ABCD_H registers. If
5880 // the allocator and/or the backend get enhanced to be more robust in
5881 // that regard, this can be, and should be, removed.
5882 if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
5883 SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
5884 unsigned AHExtOpcode =
5885 isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
5886
5887 SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
5888 MVT::Glue, AHCopy, InGlue);
5889 SDValue Result(RNode, 0);
5890 InGlue = SDValue(RNode, 1);
5891
5892 Result =
5893 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
5894
5895 ReplaceUses(SDValue(Node, 1), Result);
5896 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
5897 dbgs() << '\n');
5898 }
5899 // Copy the division (low) result, if it is needed.
5900 if (!SDValue(Node, 0).use_empty()) {
5901 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
5902 LoReg, NVT, InGlue);
5903 InGlue = Result.getValue(2);
5904 ReplaceUses(SDValue(Node, 0), Result);
5905 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
5906 dbgs() << '\n');
5907 }
5908 // Copy the remainder (high) result, if it is needed.
5909 if (!SDValue(Node, 1).use_empty()) {
5910 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
5911 HiReg, NVT, InGlue);
5912 InGlue = Result.getValue(2);
5913 ReplaceUses(SDValue(Node, 1), Result);
5914 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
5915 dbgs() << '\n');
5916 }
5917 CurDAG->RemoveDeadNode(Node);
5918 return;
5919 }
5920
5921 case X86ISD::FCMP:
5923 case X86ISD::STRICT_FCMPS: {
5924 bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP ||
5925 Node->getOpcode() == X86ISD::STRICT_FCMPS;
5926 SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0);
5927 SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1);
5928
5929 // Save the original VT of the compare.
5930 MVT CmpVT = N0.getSimpleValueType();
5931
5932 // Floating point needs special handling if we don't have FCOMI.
5933 if (Subtarget->canUseCMOV())
5934 break;
5935
5936 bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;
5937
5938 unsigned Opc;
5939 switch (CmpVT.SimpleTy) {
5940 default: llvm_unreachable("Unexpected type!");
5941 case MVT::f32:
5942 Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32;
5943 break;
5944 case MVT::f64:
5945 Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64;
5946 break;
5947 case MVT::f80:
5948 Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80;
5949 break;
5950 }
5951
5952 SDValue Chain =
5953 IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode();
5954 SDValue Glue;
5955 if (IsStrictCmp) {
5956 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
5957 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0);
5958 Glue = Chain.getValue(1);
5959 } else {
5960 Glue = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N0, N1), 0);
5961 }
5962
5963 // Move FPSW to AX.
5964 SDValue FNSTSW =
5965 SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, Glue), 0);
5966
5967 // Extract upper 8-bits of AX.
5968 SDValue Extract =
5969 CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW);
5970
5971 // Move AH into flags.
5972 // Some 64-bit targets lack SAHF support, but they do support FCOMI.
5973 assert(Subtarget->canUseLAHFSAHF() &&
5974 "Target doesn't support SAHF or FCOMI?");
5975 SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
5976 Chain = AH;
5977 SDValue SAHF = SDValue(
5978 CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0);
5979
5980 if (IsStrictCmp)
5981 ReplaceUses(SDValue(Node, 1), Chain);
5982
5983 ReplaceUses(SDValue(Node, 0), SAHF);
5984 CurDAG->RemoveDeadNode(Node);
5985 return;
5986 }
5987
5988 case X86ISD::CMP: {
5989 SDValue N0 = Node->getOperand(0);
5990 SDValue N1 = Node->getOperand(1);
5991
5992 // Optimizations for TEST compares.
5993 if (!isNullConstant(N1))
5994 break;
5995
5996 // Save the original VT of the compare.
5997 MVT CmpVT = N0.getSimpleValueType();
5998
5999 // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
6000 // by a test instruction. The test should be removed later by
6001 // analyzeCompare if we are using only the zero flag.
6002 // TODO: Should we check the users and use the BEXTR flags directly?
6003 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
6004 if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
6005 unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
6006 : X86::TEST32rr;
6007 SDValue BEXTR = SDValue(NewNode, 0);
6008 NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
6009 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
6010 CurDAG->RemoveDeadNode(Node);
6011 return;
6012 }
6013 }
6014
6015 // We can peek through truncates, but we need to be careful below.
6016 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
6017 N0 = N0.getOperand(0);
6018
6019 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
6020 // use a smaller encoding.
6021 // Look past the truncate if CMP is the only use of it.
6022 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6023 N0.getValueType() != MVT::i8) {
6024 auto *MaskC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6025 if (!MaskC)
6026 break;
6027
6028 // We may have looked through a truncate so mask off any bits that
6029 // shouldn't be part of the compare.
6030 uint64_t Mask = MaskC->getZExtValue();
6031 Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits());
6032
6033 // Check if we can replace AND+IMM{32,64} with a shift. This is possible
6034 // for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the
6035 // zero flag.
6036 if (CmpVT == MVT::i64 && !isInt<8>(Mask) && isShiftedMask_64(Mask) &&
6037 onlyUsesZeroFlag(SDValue(Node, 0))) {
6038 unsigned ShiftOpcode = ISD::DELETED_NODE;
6039 unsigned ShiftAmt;
6040 unsigned SubRegIdx;
6041 MVT SubRegVT;
6042 unsigned TestOpcode;
6043 unsigned LeadingZeros = llvm::countl_zero(Mask);
6044 unsigned TrailingZeros = llvm::countr_zero(Mask);
6045
6046 // With leading/trailing zeros, the transform is profitable if we can
6047 // eliminate a movabsq or shrink a 32-bit immediate to 8-bit without
6048 // incurring any extra register moves.
6049 bool SavesBytes = !isInt<32>(Mask) || N0.getOperand(0).hasOneUse();
6050 if (LeadingZeros == 0 && SavesBytes) {
6051 // If the mask covers the most significant bit, then we can replace
6052 // TEST+AND with a SHR and check eflags.
6053 // This emits a redundant TEST which is subsequently eliminated.
6054 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6055 ShiftAmt = TrailingZeros;
6056 SubRegIdx = 0;
6057 TestOpcode = X86::TEST64rr;
6058 } else if (TrailingZeros == 0 && SavesBytes) {
6059 // If the mask covers the least significant bit, then we can replace
6060 // TEST+AND with a SHL and check eflags.
6061 // This emits a redundant TEST which is subsequently eliminated.
6062 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHL64ri);
6063 ShiftAmt = LeadingZeros;
6064 SubRegIdx = 0;
6065 TestOpcode = X86::TEST64rr;
6066 } else if (MaskC->hasOneUse() && !isInt<32>(Mask)) {
6067 // If the shifted mask extends into the high half and is 8/16/32 bits
6068 // wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr.
6069 unsigned PopCount = 64 - LeadingZeros - TrailingZeros;
6070 if (PopCount == 8) {
6071 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6072 ShiftAmt = TrailingZeros;
6073 SubRegIdx = X86::sub_8bit;
6074 SubRegVT = MVT::i8;
6075 TestOpcode = X86::TEST8rr;
6076 } else if (PopCount == 16) {
6077 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6078 ShiftAmt = TrailingZeros;
6079 SubRegIdx = X86::sub_16bit;
6080 SubRegVT = MVT::i16;
6081 TestOpcode = X86::TEST16rr;
6082 } else if (PopCount == 32) {
6083 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6084 ShiftAmt = TrailingZeros;
6085 SubRegIdx = X86::sub_32bit;
6086 SubRegVT = MVT::i32;
6087 TestOpcode = X86::TEST32rr;
6088 }
6089 }
6090 if (ShiftOpcode != ISD::DELETED_NODE) {
6091 SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64);
6092 SDValue Shift = SDValue(
6093 CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32,
6094 N0.getOperand(0), ShiftC),
6095 0);
6096 if (SubRegIdx != 0) {
6097 Shift =
6098 CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift);
6099 }
6101 CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift);
6102 ReplaceNode(Node, Test);
6103 return;
6104 }
6105 }
6106
6107 MVT VT;
6108 int SubRegOp;
6109 unsigned ROpc, MOpc;
6110
6111 // For each of these checks we need to be careful if the sign flag is
6112 // being used. It is only safe to use the sign flag in two conditions,
6113 // either the sign bit in the shrunken mask is zero or the final test
6114 // size is equal to the original compare size.
6115
6116 if (isUInt<8>(Mask) &&
6117 (!(Mask & 0x80) || CmpVT == MVT::i8 ||
6118 hasNoSignFlagUses(SDValue(Node, 0)))) {
6119 // For example, convert "testl %eax, $8" to "testb %al, $8"
6120 VT = MVT::i8;
6121 SubRegOp = X86::sub_8bit;
6122 ROpc = X86::TEST8ri;
6123 MOpc = X86::TEST8mi;
6124 } else if (OptForMinSize && isUInt<16>(Mask) &&
6125 (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
6126 hasNoSignFlagUses(SDValue(Node, 0)))) {
6127 // For example, "testl %eax, $32776" to "testw %ax, $32776".
6128 // NOTE: We only want to form TESTW instructions if optimizing for
6129 // min size. Otherwise we only save one byte and possibly get a length
6130 // changing prefix penalty in the decoders.
6131 VT = MVT::i16;
6132 SubRegOp = X86::sub_16bit;
6133 ROpc = X86::TEST16ri;
6134 MOpc = X86::TEST16mi;
6135 } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
6136 ((!(Mask & 0x80000000) &&
6137 // Without minsize 16-bit Cmps can get here so we need to
6138 // be sure we calculate the correct sign flag if needed.
6139 (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
6140 CmpVT == MVT::i32 ||
6141 hasNoSignFlagUses(SDValue(Node, 0)))) {
6142 // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
6143 // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
6144 // Otherwize, we find ourselves in a position where we have to do
6145 // promotion. If previous passes did not promote the and, we assume
6146 // they had a good reason not to and do not promote here.
6147 VT = MVT::i32;
6148 SubRegOp = X86::sub_32bit;
6149 ROpc = X86::TEST32ri;
6150 MOpc = X86::TEST32mi;
6151 } else {
6152 // No eligible transformation was found.
6153 break;
6154 }
6155
6156 SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
6157 SDValue Reg = N0.getOperand(0);
6158
6159 // Emit a testl or testw.
6160 MachineSDNode *NewNode;
6161 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
6162 if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
6163 if (auto *LoadN = dyn_cast<LoadSDNode>(N0.getOperand(0).getNode())) {
6164 if (!LoadN->isSimple()) {
6165 unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits();
6166 if ((MOpc == X86::TEST8mi && NumVolBits != 8) ||
6167 (MOpc == X86::TEST16mi && NumVolBits != 16) ||
6168 (MOpc == X86::TEST32mi && NumVolBits != 32))
6169 break;
6170 }
6171 }
6172 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
6173 Reg.getOperand(0) };
6174 NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
6175 // Update the chain.
6176 ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
6177 // Record the mem-refs
6178 CurDAG->setNodeMemRefs(NewNode,
6179 {cast<LoadSDNode>(Reg)->getMemOperand()});
6180 } else {
6181 // Extract the subregister if necessary.
6182 if (N0.getValueType() != VT)
6183 Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
6184
6185 NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
6186 }
6187 // Replace CMP with TEST.
6188 ReplaceNode(Node, NewNode);
6189 return;
6190 }
6191 break;
6192 }
6193 case X86ISD::PCMPISTR: {
6194 if (!Subtarget->hasSSE42())
6195 break;
6196
6197 bool NeedIndex = !SDValue(Node, 0).use_empty();
6198 bool NeedMask = !SDValue(Node, 1).use_empty();
6199 // We can't fold a load if we are going to make two instructions.
6200 bool MayFoldLoad = !NeedIndex || !NeedMask;
6201
6202 MachineSDNode *CNode;
6203 if (NeedMask) {
6204 unsigned ROpc =
6205 Subtarget->hasAVX() ? X86::VPCMPISTRMrri : X86::PCMPISTRMrri;
6206 unsigned MOpc =
6207 Subtarget->hasAVX() ? X86::VPCMPISTRMrmi : X86::PCMPISTRMrmi;
6208 CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
6209 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
6210 }
6211 if (NeedIndex || !NeedMask) {
6212 unsigned ROpc =
6213 Subtarget->hasAVX() ? X86::VPCMPISTRIrri : X86::PCMPISTRIrri;
6214 unsigned MOpc =
6215 Subtarget->hasAVX() ? X86::VPCMPISTRIrmi : X86::PCMPISTRIrmi;
6216 CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
6217 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
6218 }
6219
6220 // Connect the flag usage to the last instruction created.
6221 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
6222 CurDAG->RemoveDeadNode(Node);
6223 return;
6224 }
6225 case X86ISD::PCMPESTR: {
6226 if (!Subtarget->hasSSE42())
6227 break;
6228
6229 // Copy the two implicit register inputs.
6230 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
6231 Node->getOperand(1),
6232 SDValue()).getValue(1);
6233 InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
6234 Node->getOperand(3), InGlue).getValue(1);
6235
6236 bool NeedIndex = !SDValue(Node, 0).use_empty();
6237 bool NeedMask = !SDValue(Node, 1).use_empty();
6238 // We can't fold a load if we are going to make two instructions.
6239 bool MayFoldLoad = !NeedIndex || !NeedMask;
6240
6241 MachineSDNode *CNode;
6242 if (NeedMask) {
6243 unsigned ROpc =
6244 Subtarget->hasAVX() ? X86::VPCMPESTRMrri : X86::PCMPESTRMrri;
6245 unsigned MOpc =
6246 Subtarget->hasAVX() ? X86::VPCMPESTRMrmi : X86::PCMPESTRMrmi;
6247 CNode =
6248 emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node, InGlue);
6249 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
6250 }
6251 if (NeedIndex || !NeedMask) {
6252 unsigned ROpc =
6253 Subtarget->hasAVX() ? X86::VPCMPESTRIrri : X86::PCMPESTRIrri;
6254 unsigned MOpc =
6255 Subtarget->hasAVX() ? X86::VPCMPESTRIrmi : X86::PCMPESTRIrmi;
6256 CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InGlue);
6257 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
6258 }
6259 // Connect the flag usage to the last instruction created.
6260 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
6261 CurDAG->RemoveDeadNode(Node);
6262 return;
6263 }
6264
6265 case ISD::SETCC: {
6266 if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
6267 return;
6268
6269 break;
6270 }
6271
6272 case ISD::STORE:
6273 if (foldLoadStoreIntoMemOperand(Node))
6274 return;
6275 break;
6276
6277 case X86ISD::SETCC_CARRY: {
6278 MVT VT = Node->getSimpleValueType(0);
6280 if (Subtarget->hasSBBDepBreaking()) {
6281 // We have to do this manually because tblgen will put the eflags copy in
6282 // the wrong place if we use an extract_subreg in the pattern.
6283 // Copy flags to the EFLAGS register and glue it to next node.
6284 SDValue EFLAGS =
6285 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
6286 Node->getOperand(1), SDValue());
6287
6288 // Create a 64-bit instruction if the result is 64-bits otherwise use the
6289 // 32-bit version.
6290 unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
6291 MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
6292 Result = SDValue(
6293 CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)),
6294 0);
6295 } else {
6296 // The target does not recognize sbb with the same reg operand as a
6297 // no-source idiom, so we explicitly zero the input values.
6298 Result = getSBBZero(Node);
6299 }
6300
6301 // For less than 32-bits we need to extract from the 32-bit node.
6302 if (VT == MVT::i8 || VT == MVT::i16) {
6303 int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
6304 Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
6305 }
6306
6307 ReplaceUses(SDValue(Node, 0), Result);
6308 CurDAG->RemoveDeadNode(Node);
6309 return;
6310 }
6311 case X86ISD::SBB: {
6312 if (isNullConstant(Node->getOperand(0)) &&
6313 isNullConstant(Node->getOperand(1))) {
6314 SDValue Result = getSBBZero(Node);
6315
6316 // Replace the flag use.
6317 ReplaceUses(SDValue(Node, 1), Result.getValue(1));
6318
6319 // Replace the result use.
6320 if (!SDValue(Node, 0).use_empty()) {
6321 // For less than 32-bits we need to extract from the 32-bit node.
6322 MVT VT = Node->getSimpleValueType(0);
6323 if (VT == MVT::i8 || VT == MVT::i16) {
6324 int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
6325 Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
6326 }
6327 ReplaceUses(SDValue(Node, 0), Result);
6328 }
6329
6330 CurDAG->RemoveDeadNode(Node);
6331 return;
6332 }
6333 break;
6334 }
6335 case X86ISD::MGATHER: {
6336 auto *Mgt = cast<X86MaskedGatherSDNode>(Node);
6337 SDValue IndexOp = Mgt->getIndex();
6338 SDValue Mask = Mgt->getMask();
6339 MVT IndexVT = IndexOp.getSimpleValueType();
6340 MVT ValueVT = Node->getSimpleValueType(0);
6341 MVT MaskVT = Mask.getSimpleValueType();
6342
6343 // This is just to prevent crashes if the nodes are malformed somehow. We're
6344 // otherwise only doing loose type checking in here based on type what
6345 // a type constraint would say just like table based isel.
6346 if (!ValueVT.isVector() || !MaskVT.isVector())
6347 break;
6348
6349 unsigned NumElts = ValueVT.getVectorNumElements();
6350 MVT ValueSVT = ValueVT.getVectorElementType();
6351
6352 bool IsFP = ValueSVT.isFloatingPoint();
6353 unsigned EltSize = ValueSVT.getSizeInBits();
6354
6355 unsigned Opc = 0;
6356 bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1;
6357 if (AVX512Gather) {
6358 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
6359 Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm;
6360 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
6361 Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm;
6362 else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
6363 Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm;
6364 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
6365 Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm;
6366 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
6367 Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm;
6368 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
6369 Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm;
6370 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
6371 Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm;
6372 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
6373 Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm;
6374 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
6375 Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm;
6376 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
6377 Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm;
6378 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
6379 Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm;
6380 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
6381 Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm;
6382 } else {
6383 assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() &&
6384 "Unexpected mask VT!");
6385 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
6386 Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm;
6387 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
6388 Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm;
6389 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
6390 Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm;
6391 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
6392 Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm;
6393 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
6394 Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm;
6395 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
6396 Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm;
6397 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
6398 Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm;
6399 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
6400 Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm;
6401 }
6402
6403 if (!Opc)
6404 break;
6405
6406 SDValue Base, Scale, Index, Disp, Segment;
6407 if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(),
6408 Base, Scale, Index, Disp, Segment))
6409 break;
6410
6411 SDValue PassThru = Mgt->getPassThru();
6412 SDValue Chain = Mgt->getChain();
6413 // Gather instructions have a mask output not in the ISD node.
6414 SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other);
6415
6416 MachineSDNode *NewNode;
6417 if (AVX512Gather) {
6418 SDValue Ops[] = {PassThru, Mask, Base, Scale,
6419 Index, Disp, Segment, Chain};
6420 NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6421 } else {
6422 SDValue Ops[] = {PassThru, Base, Scale, Index,
6423 Disp, Segment, Mask, Chain};
6424 NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6425 }
6426 CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()});
6427 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
6428 ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2));
6429 CurDAG->RemoveDeadNode(Node);
6430 return;
6431 }
6432 case X86ISD::MSCATTER: {
6433 auto *Sc = cast<X86MaskedScatterSDNode>(Node);
6434 SDValue Value = Sc->getValue();
6435 SDValue IndexOp = Sc->getIndex();
6436 MVT IndexVT = IndexOp.getSimpleValueType();
6437 MVT ValueVT = Value.getSimpleValueType();
6438
6439 // This is just to prevent crashes if the nodes are malformed somehow. We're
6440 // otherwise only doing loose type checking in here based on type what
6441 // a type constraint would say just like table based isel.
6442 if (!ValueVT.isVector())
6443 break;
6444
6445 unsigned NumElts = ValueVT.getVectorNumElements();
6446 MVT ValueSVT = ValueVT.getVectorElementType();
6447
6448 bool IsFP = ValueSVT.isFloatingPoint();
6449 unsigned EltSize = ValueSVT.getSizeInBits();
6450
6451 unsigned Opc;
6452 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
6453 Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr;
6454 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
6455 Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr;
6456 else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
6457 Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr;
6458 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
6459 Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr;
6460 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
6461 Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr;
6462 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
6463 Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr;
6464 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
6465 Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr;
6466 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
6467 Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr;
6468 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
6469 Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr;
6470 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
6471 Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr;
6472 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
6473 Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr;
6474 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
6475 Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr;
6476 else
6477 break;
6478
6479 SDValue Base, Scale, Index, Disp, Segment;
6480 if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(),
6481 Base, Scale, Index, Disp, Segment))
6482 break;
6483
6484 SDValue Mask = Sc->getMask();
6485 SDValue Chain = Sc->getChain();
6486 // Scatter instructions have a mask output not in the ISD node.
6487 SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other);
6488 SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain};
6489
6490 MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6491 CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()});
6492 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1));
6493 CurDAG->RemoveDeadNode(Node);
6494 return;
6495 }
6497 auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
6498 auto CallId = MFI->getPreallocatedIdForCallSite(
6499 cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
6500 SDValue Chain = Node->getOperand(0);
6501 SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
6502 MachineSDNode *New = CurDAG->getMachineNode(
6503 TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
6504 ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
6505 CurDAG->RemoveDeadNode(Node);
6506 return;
6507 }
6508 case ISD::PREALLOCATED_ARG: {
6509 auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
6510 auto CallId = MFI->getPreallocatedIdForCallSite(
6511 cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
6512 SDValue Chain = Node->getOperand(0);
6513 SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
6514 SDValue ArgIndex = Node->getOperand(2);
6515 SDValue Ops[3];
6516 Ops[0] = CallIdValue;
6517 Ops[1] = ArgIndex;
6518 Ops[2] = Chain;
6519 MachineSDNode *New = CurDAG->getMachineNode(
6520 TargetOpcode::PREALLOCATED_ARG, dl,
6521 CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
6522 MVT::Other),
6523 Ops);
6524 ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
6525 ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
6526 CurDAG->RemoveDeadNode(Node);
6527 return;
6528 }
6533 if (!Subtarget->hasWIDEKL())
6534 break;
6535
6536 unsigned Opcode;
6537 switch (Node->getOpcode()) {
6538 default:
6539 llvm_unreachable("Unexpected opcode!");
6541 Opcode = X86::AESENCWIDE128KL;
6542 break;
6544 Opcode = X86::AESDECWIDE128KL;
6545 break;
6547 Opcode = X86::AESENCWIDE256KL;
6548 break;
6550 Opcode = X86::AESDECWIDE256KL;
6551 break;
6552 }
6553
6554 SDValue Chain = Node->getOperand(0);
6555 SDValue Addr = Node->getOperand(1);
6556
6557 SDValue Base, Scale, Index, Disp, Segment;
6558 if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment))
6559 break;
6560
6561 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2),
6562 SDValue());
6563 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3),
6564 Chain.getValue(1));
6565 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4),
6566 Chain.getValue(1));
6567 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5),
6568 Chain.getValue(1));
6569 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6),
6570 Chain.getValue(1));
6571 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7),
6572 Chain.getValue(1));
6573 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8),
6574 Chain.getValue(1));
6575 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9),
6576 Chain.getValue(1));
6577
6578 MachineSDNode *Res = CurDAG->getMachineNode(
6579 Opcode, dl, Node->getVTList(),
6580 {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)});
6581 CurDAG->setNodeMemRefs(Res, cast<MemSDNode>(Node)->getMemOperand());
6582 ReplaceNode(Node, Res);
6583 return;
6584 }
6585 }
6586
6587 SelectCode(Node);
6588}
6589
6590bool X86DAGToDAGISel::SelectInlineAsmMemoryOperand(
6591 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
6592 std::vector<SDValue> &OutOps) {
6593 SDValue Op0, Op1, Op2, Op3, Op4;
6594 switch (ConstraintID) {
6595 default:
6596 llvm_unreachable("Unexpected asm memory constraint");
6597 case InlineAsm::ConstraintCode::o: // offsetable ??
6598 case InlineAsm::ConstraintCode::v: // not offsetable ??
6599 case InlineAsm::ConstraintCode::m: // memory
6600 case InlineAsm::ConstraintCode::X:
6601 case InlineAsm::ConstraintCode::p: // address
6602 if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
6603 return true;
6604 break;
6605 }
6606
6607 OutOps.push_back(Op0);
6608 OutOps.push_back(Op1);
6609 OutOps.push_back(Op2);
6610 OutOps.push_back(Op3);
6611 OutOps.push_back(Op4);
6612 return false;
6613}
6614
6617 std::make_unique<X86DAGToDAGISel>(TM, TM.getOptLevel())) {}
6618
6619/// This pass converts a legalized DAG into a X86-specific DAG,
6620/// ready for instruction scheduling.
6622 CodeGenOptLevel OptLevel) {
6623 return new X86DAGToDAGISelLegacy(TM, OptLevel);
6624}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
aarch64 promote const
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Module.h This file contains the declarations for the Module class.
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII)
Check if the instruction uses RIP relative addressing.
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
#define GET_EGPR_IF_ENABLED(OPC)
static bool needBWI(MVT VT)
static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, bool FoldedBCast, bool Masked)
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM, const X86Subtarget &Subtarget)
static bool mayUseCarryFlag(X86::CondCode CC)
static cl::opt< bool > EnablePromoteAnyextLoad("x86-promote-anyext-load", cl::init(true), cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden)
cl::opt< bool > IndirectBranchTracking
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load's chain operand and move load below the call...
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
#define GET_ND_IF_ENABLED(OPC)
#define VPTESTM_BROADCAST_CASES(SUFFIX)
#define FROM_TO(A, B)
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, X86ISelAddressMode &AM)
#define VPTESTM_FULL_CASES(SUFFIX)
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
static bool isDispSafeForFrameIndex(int64_t Val)
#define PASS_NAME
#define CASE(A)
#define CASE_ND(OP)
#define DEBUG_TYPE
static bool isEndbrImm64(uint64_t Imm)
#define GET_ND_IF_ENABLED(OPC)
Value * RHS
DEMANGLE_DUMP_METHOD void dump() const
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1555
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1489
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1613
The address of a basic block.
Definition: Constants.h:890
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:705
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
std::optional< ConstantRange > getAbsoluteSymbolRange() const
If this is an absolute symbol reference, returns the range of the symbol, otherwise returns std::null...
Definition: Globals.cpp:407
This class is used to form a handle around another node that is persistent and is updated across invo...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
Root of the metadata hierarchy.
Definition: Metadata.h:62
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition: Module.cpp:319
Register getReg() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
int getNodeId() const
Return the unique node id.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual void PostprocessISelDAG()
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
static int getUninvalidatedNodeId(SDNode *N)
virtual void emitFunctionEntryCode()
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
virtual bool ComplexPatternFuncMutatesDAG() const
Return true if complex patterns for this target can mutate the DAG.
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
static void InvalidateNodeId(SDNode *N)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:550
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
unsigned getID() const
Return the register class ID number.
Target - Wrapper for Target specific information.
static Type * getVoidTy(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5266
X86ISelDAGToDAGPass(X86TargetMachine &TM)
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setAMXProgModel(AMXProgModelEnum Model)
size_t getPreallocatedIdForCallSite(const Value *CS)
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ PREALLOCATED_SETUP
PREALLOCATED_SETUP - This has 2 operands: an input chain and a SRCVALUE with the preallocated call Va...
Definition: ISDOpcodes.h:1228
@ TargetExternalSymbol
Definition: ISDOpcodes.h:175
@ PREALLOCATED_ARG
PREALLOCATED_ARG - This has 3 operands: an input chain, a SRCVALUE with the preallocated call Value,...
Definition: ISDOpcodes.h:1231
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1120
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:170
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ LOCAL_RECOVER
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:120
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:859
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:938
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:443
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:881
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1208
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:171
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
const uint64_t Magic
Definition: CodeGenData.h:176
SymbolFlags
Symbol flags.
Definition: Symbol.h:24
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ X86
Windows x64, Windows Itanium (IA-64)
@ SS
Definition: X86.h:211
@ FS
Definition: X86.h:210
@ GS
Definition: X86.h:209
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
Definition: X86BaseInfo.h:363
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
Definition: X86BaseInfo.h:825
@ VEX
VEX - encoding using 0xC4/0xC5.
Definition: X86BaseInfo.h:818
@ XOP
XOP - Opcode prefix used by XOP instructions.
Definition: X86BaseInfo.h:820
@ FST
This instruction implements a truncating store from FP stack slots.
@ CMPM
Vector comparison generating mask bits for fp and integer signed and unsigned data types.
@ CMP
X86 compare and logical compare instructions.
@ BLENDV
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
@ STRICT_FCMP
X86 strict FP compare instructions.
@ STRICT_CMPM
Vector comparison generating mask bits for fp and integer signed and unsigned data types.
@ SETCC
X86 SetCC.
@ NT_BRIND
BRIND node with NoTrack prefix.
@ SELECTS
X86 Select.
@ FSETCCM
X86 FP SETCC, similar to above, but with output as an i1 mask and and a version with SAE.
@ FXOR
Bitwise logical XOR of floating point values.
@ BRCOND
X86 conditional branches.
@ CALL
These operations represent an abstract X86 call instruction, which includes a bunch of information.
@ FANDN
Bitwise logical ANDNOT of floating point values.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ FLD
This instruction implements an extending load to FP stack slots.
@ TC_RETURN
Tail call return.
@ FOR
Bitwise logical OR of floating point values.
@ Wrapper
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress,...
@ ANDNP
Bitwise Logical AND NOT of Packed FP values.
@ FAND
Bitwise logical AND of floating point values.
@ CMOV
X86 conditional moves.
@ WrapperRIP
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
int getCondSrcNoFromDesc(const MCInstrDesc &MCID)
Return the source operand # for condition code by MCID.
bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, bool AssumeSingleUse=false)
Check if Op is a load operation that could be folded into some other x86 instruction as a memory oper...
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement)
Returns true of the given offset can be fit into displacement field of the instruction.
bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs)
If Op is a constant whose elements are all the same constant or undefined, return true and return the...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:480
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
unsigned M1(unsigned Val)
Definition: VE.h:376
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:204
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:209
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.