LLVM 22.0.0git
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a DAG pattern matching instruction selector for X86,
10// converting from a legalized dag to a X86 dag.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelDAGToDAG.h"
15#include "X86.h"
17#include "X86Subtarget.h"
18#include "X86TargetMachine.h"
19#include "llvm/ADT/Statistic.h"
22#include "llvm/Config/llvm-config.h"
24#include "llvm/IR/Function.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsX86.h"
28#include "llvm/IR/Module.h"
29#include "llvm/IR/Type.h"
30#include "llvm/Support/Debug.h"
34#include <cstdint>
35
36using namespace llvm;
37
38#define DEBUG_TYPE "x86-isel"
39#define PASS_NAME "X86 DAG->DAG Instruction Selection"
40
41STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
42
43static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
44 cl::desc("Enable setting constant bits to reduce size of mask immediates"),
46
48 "x86-promote-anyext-load", cl::init(true),
49 cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden);
50
52
53//===----------------------------------------------------------------------===//
54// Pattern Matcher Implementation
55//===----------------------------------------------------------------------===//
56
57namespace {
58 /// This corresponds to X86AddressMode, but uses SDValue's instead of register
59 /// numbers for the leaves of the matched tree.
60 struct X86ISelAddressMode {
61 enum {
62 RegBase,
63 FrameIndexBase
64 } BaseType = RegBase;
65
66 // This is really a union, discriminated by BaseType!
67 SDValue Base_Reg;
68 int Base_FrameIndex = 0;
69
70 unsigned Scale = 1;
71 SDValue IndexReg;
72 int32_t Disp = 0;
73 SDValue Segment;
74 const GlobalValue *GV = nullptr;
75 const Constant *CP = nullptr;
76 const BlockAddress *BlockAddr = nullptr;
77 const char *ES = nullptr;
78 MCSymbol *MCSym = nullptr;
79 int JT = -1;
80 Align Alignment; // CP alignment.
81 unsigned char SymbolFlags = X86II::MO_NO_FLAG; // X86II::MO_*
82 bool NegateIndex = false;
83
84 X86ISelAddressMode() = default;
85
86 bool hasSymbolicDisplacement() const {
87 return GV != nullptr || CP != nullptr || ES != nullptr ||
88 MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
89 }
90
91 bool hasBaseOrIndexReg() const {
92 return BaseType == FrameIndexBase ||
93 IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
94 }
95
96 /// Return true if this addressing mode is already RIP-relative.
97 bool isRIPRelative() const {
98 if (BaseType != RegBase) return false;
99 if (RegisterSDNode *RegNode =
100 dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
101 return RegNode->getReg() == X86::RIP;
102 return false;
103 }
104
105 void setBaseReg(SDValue Reg) {
106 BaseType = RegBase;
107 Base_Reg = Reg;
108 }
109
110#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
111 void dump(SelectionDAG *DAG = nullptr) {
112 dbgs() << "X86ISelAddressMode " << this << '\n';
113 dbgs() << "Base_Reg ";
114 if (Base_Reg.getNode())
115 Base_Reg.getNode()->dump(DAG);
116 else
117 dbgs() << "nul\n";
118 if (BaseType == FrameIndexBase)
119 dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
120 dbgs() << " Scale " << Scale << '\n'
121 << "IndexReg ";
122 if (NegateIndex)
123 dbgs() << "negate ";
124 if (IndexReg.getNode())
125 IndexReg.getNode()->dump(DAG);
126 else
127 dbgs() << "nul\n";
128 dbgs() << " Disp " << Disp << '\n'
129 << "GV ";
130 if (GV)
131 GV->dump();
132 else
133 dbgs() << "nul";
134 dbgs() << " CP ";
135 if (CP)
136 CP->dump();
137 else
138 dbgs() << "nul";
139 dbgs() << '\n'
140 << "ES ";
141 if (ES)
142 dbgs() << ES;
143 else
144 dbgs() << "nul";
145 dbgs() << " MCSym ";
146 if (MCSym)
147 dbgs() << MCSym;
148 else
149 dbgs() << "nul";
150 dbgs() << " JT" << JT << " Align" << Alignment.value() << '\n';
151 }
152#endif
153 };
154}
155
156namespace {
157 //===--------------------------------------------------------------------===//
158 /// ISel - X86-specific code to select X86 machine instructions for
159 /// SelectionDAG operations.
160 ///
161 class X86DAGToDAGISel final : public SelectionDAGISel {
162 /// Keep a pointer to the X86Subtarget around so that we can
163 /// make the right decision when generating code for different targets.
164 const X86Subtarget *Subtarget;
165
166 /// If true, selector should try to optimize for minimum code size.
167 bool OptForMinSize;
168
169 /// Disable direct TLS access through segment registers.
170 bool IndirectTlsSegRefs;
171
172 public:
173 X86DAGToDAGISel() = delete;
174
175 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOptLevel OptLevel)
176 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
177 OptForMinSize(false), IndirectTlsSegRefs(false) {}
178
179 bool runOnMachineFunction(MachineFunction &MF) override {
180 // Reset the subtarget each time through.
181 Subtarget = &MF.getSubtarget<X86Subtarget>();
182 IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
183 "indirect-tls-seg-refs");
184
185 // OptFor[Min]Size are used in pattern predicates that isel is matching.
186 OptForMinSize = MF.getFunction().hasMinSize();
188 }
189
190 void emitFunctionEntryCode() override;
191
192 bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
193
194 void PreprocessISelDAG() override;
195 void PostprocessISelDAG() override;
196
197// Include the pieces autogenerated from the target description.
198#include "X86GenDAGISel.inc"
199
200 private:
201 void Select(SDNode *N) override;
202
203 bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
204 bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
205 bool AllowSegmentRegForX32 = false);
206 bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
207 bool matchAddress(SDValue N, X86ISelAddressMode &AM);
208 bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
209 bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
210 SDValue matchIndexRecursively(SDValue N, X86ISelAddressMode &AM,
211 unsigned Depth);
212 bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
213 unsigned Depth);
214 bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM,
215 unsigned Depth);
216 bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
217 bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
218 SDValue &Scale, SDValue &Index, SDValue &Disp,
219 SDValue &Segment);
220 bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp,
221 SDValue ScaleOp, SDValue &Base, SDValue &Scale,
222 SDValue &Index, SDValue &Disp, SDValue &Segment);
223 bool selectMOV64Imm32(SDValue N, SDValue &Imm);
224 bool selectLEAAddr(SDValue N, SDValue &Base,
225 SDValue &Scale, SDValue &Index, SDValue &Disp,
226 SDValue &Segment);
227 bool selectLEA64_Addr(SDValue N, SDValue &Base, SDValue &Scale,
228 SDValue &Index, SDValue &Disp, SDValue &Segment);
229 bool selectTLSADDRAddr(SDValue N, SDValue &Base,
230 SDValue &Scale, SDValue &Index, SDValue &Disp,
231 SDValue &Segment);
232 bool selectRelocImm(SDValue N, SDValue &Op);
233
234 bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
235 SDValue &Base, SDValue &Scale,
236 SDValue &Index, SDValue &Disp,
237 SDValue &Segment);
238
239 // Convenience method where P is also root.
240 bool tryFoldLoad(SDNode *P, SDValue N,
241 SDValue &Base, SDValue &Scale,
242 SDValue &Index, SDValue &Disp,
243 SDValue &Segment) {
244 return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
245 }
246
247 bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
248 SDValue &Base, SDValue &Scale,
249 SDValue &Index, SDValue &Disp,
250 SDValue &Segment);
251
252 bool isProfitableToFormMaskedOp(SDNode *N) const;
253
254 /// Implement addressing mode selection for inline asm expressions.
255 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
256 InlineAsm::ConstraintCode ConstraintID,
257 std::vector<SDValue> &OutOps) override;
258
259 void emitSpecialCodeForMain();
260
261 inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
262 MVT VT, SDValue &Base, SDValue &Scale,
263 SDValue &Index, SDValue &Disp,
264 SDValue &Segment) {
265 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
266 Base = CurDAG->getTargetFrameIndex(
267 AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
268 else if (AM.Base_Reg.getNode())
269 Base = AM.Base_Reg;
270 else
271 Base = CurDAG->getRegister(0, VT);
272
273 Scale = getI8Imm(AM.Scale, DL);
274
275#define GET_ND_IF_ENABLED(OPC) (Subtarget->hasNDD() ? OPC##_ND : OPC)
276 // Negate the index if needed.
277 if (AM.NegateIndex) {
278 unsigned NegOpc;
279 switch (VT.SimpleTy) {
280 default:
281 llvm_unreachable("Unsupported VT!");
282 case MVT::i64:
283 NegOpc = GET_ND_IF_ENABLED(X86::NEG64r);
284 break;
285 case MVT::i32:
286 NegOpc = GET_ND_IF_ENABLED(X86::NEG32r);
287 break;
288 case MVT::i16:
289 NegOpc = GET_ND_IF_ENABLED(X86::NEG16r);
290 break;
291 case MVT::i8:
292 NegOpc = GET_ND_IF_ENABLED(X86::NEG8r);
293 break;
294 }
295 SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
296 AM.IndexReg), 0);
297 AM.IndexReg = Neg;
298 }
299
300 if (AM.IndexReg.getNode())
301 Index = AM.IndexReg;
302 else
303 Index = CurDAG->getRegister(0, VT);
304
305 // These are 32-bit even in 64-bit mode since RIP-relative offset
306 // is 32-bit.
307 if (AM.GV)
308 Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
309 MVT::i32, AM.Disp,
310 AM.SymbolFlags);
311 else if (AM.CP)
312 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment,
313 AM.Disp, AM.SymbolFlags);
314 else if (AM.ES) {
315 assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
316 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
317 } else if (AM.MCSym) {
318 assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
319 assert(AM.SymbolFlags == 0 && "oo");
320 Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
321 } else if (AM.JT != -1) {
322 assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
323 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
324 } else if (AM.BlockAddr)
325 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
326 AM.SymbolFlags);
327 else
328 Disp = CurDAG->getSignedTargetConstant(AM.Disp, DL, MVT::i32);
329
330 if (AM.Segment.getNode())
331 Segment = AM.Segment;
332 else
333 Segment = CurDAG->getRegister(0, MVT::i16);
334 }
335
336 // Utility function to determine whether it is AMX SDNode right after
337 // lowering but before ISEL.
338 bool isAMXSDNode(SDNode *N) const {
339 // Check if N is AMX SDNode:
340 // 1. check result type;
341 // 2. check operand type;
342 for (unsigned Idx = 0, E = N->getNumValues(); Idx != E; ++Idx) {
343 if (N->getValueType(Idx) == MVT::x86amx)
344 return true;
345 }
346 for (unsigned Idx = 0, E = N->getNumOperands(); Idx != E; ++Idx) {
347 SDValue Op = N->getOperand(Idx);
348 if (Op.getValueType() == MVT::x86amx)
349 return true;
350 }
351 return false;
352 }
353
354 // Utility function to determine whether we should avoid selecting
355 // immediate forms of instructions for better code size or not.
356 // At a high level, we'd like to avoid such instructions when
357 // we have similar constants used within the same basic block
358 // that can be kept in a register.
359 //
360 bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
361 uint32_t UseCount = 0;
362
363 // Do not want to hoist if we're not optimizing for size.
364 // TODO: We'd like to remove this restriction.
365 // See the comment in X86InstrInfo.td for more info.
366 if (!CurDAG->shouldOptForSize())
367 return false;
368
369 // Walk all the users of the immediate.
370 for (const SDNode *User : N->users()) {
371 if (UseCount >= 2)
372 break;
373
374 // This user is already selected. Count it as a legitimate use and
375 // move on.
376 if (User->isMachineOpcode()) {
377 UseCount++;
378 continue;
379 }
380
381 // We want to count stores of immediates as real uses.
382 if (User->getOpcode() == ISD::STORE &&
383 User->getOperand(1).getNode() == N) {
384 UseCount++;
385 continue;
386 }
387
388 // We don't currently match users that have > 2 operands (except
389 // for stores, which are handled above)
390 // Those instruction won't match in ISEL, for now, and would
391 // be counted incorrectly.
392 // This may change in the future as we add additional instruction
393 // types.
394 if (User->getNumOperands() != 2)
395 continue;
396
397 // If this is a sign-extended 8-bit integer immediate used in an ALU
398 // instruction, there is probably an opcode encoding to save space.
400 if (C && isInt<8>(C->getSExtValue()))
401 continue;
402
403 // Immediates that are used for offsets as part of stack
404 // manipulation should be left alone. These are typically
405 // used to indicate SP offsets for argument passing and
406 // will get pulled into stores/pushes (implicitly).
407 if (User->getOpcode() == X86ISD::ADD ||
408 User->getOpcode() == ISD::ADD ||
409 User->getOpcode() == X86ISD::SUB ||
410 User->getOpcode() == ISD::SUB) {
411
412 // Find the other operand of the add/sub.
413 SDValue OtherOp = User->getOperand(0);
414 if (OtherOp.getNode() == N)
415 OtherOp = User->getOperand(1);
416
417 // Don't count if the other operand is SP.
418 RegisterSDNode *RegNode;
419 if (OtherOp->getOpcode() == ISD::CopyFromReg &&
421 OtherOp->getOperand(1).getNode())))
422 if ((RegNode->getReg() == X86::ESP) ||
423 (RegNode->getReg() == X86::RSP))
424 continue;
425 }
426
427 // ... otherwise, count this and move on.
428 UseCount++;
429 }
430
431 // If we have more than 1 use, then recommend for hoisting.
432 return (UseCount > 1);
433 }
434
435 /// Return a target constant with the specified value of type i8.
436 inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
437 return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
438 }
439
440 /// Return a target constant with the specified value, of type i32.
441 inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
442 return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
443 }
444
445 /// Return a target constant with the specified value, of type i64.
446 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
447 return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
448 }
449
450 SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
451 const SDLoc &DL) {
452 assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
453 uint64_t Index = N->getConstantOperandVal(1);
454 MVT VecVT = N->getOperand(0).getSimpleValueType();
455 return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
456 }
457
458 SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
459 const SDLoc &DL) {
460 assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
461 uint64_t Index = N->getConstantOperandVal(2);
462 MVT VecVT = N->getSimpleValueType(0);
463 return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
464 }
465
466 SDValue getPermuteVINSERTCommutedImmediate(SDNode *N, unsigned VecWidth,
467 const SDLoc &DL) {
468 assert(VecWidth == 128 && "Unexpected vector width");
469 uint64_t Index = N->getConstantOperandVal(2);
470 MVT VecVT = N->getSimpleValueType(0);
471 uint64_t InsertIdx = (Index * VecVT.getScalarSizeInBits()) / VecWidth;
472 assert((InsertIdx == 0 || InsertIdx == 1) && "Bad insertf128 index");
473 // vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub)
474 // vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub)
475 return getI8Imm(InsertIdx ? 0x02 : 0x30, DL);
476 }
477
478 SDValue getSBBZero(SDNode *N) {
479 SDLoc dl(N);
480 MVT VT = N->getSimpleValueType(0);
481
482 // Create zero.
483 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
484 SDValue Zero =
485 SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, {}), 0);
486 if (VT == MVT::i64) {
487 Zero = SDValue(
488 CurDAG->getMachineNode(
489 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
490 CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
491 CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
492 0);
493 }
494
495 // Copy flags to the EFLAGS register and glue it to next node.
496 unsigned Opcode = N->getOpcode();
497 assert((Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY) &&
498 "Unexpected opcode for SBB materialization");
499 unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1;
500 SDValue EFLAGS =
501 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
502 N->getOperand(FlagOpIndex), SDValue());
503
504 // Create a 64-bit instruction if the result is 64-bits otherwise use the
505 // 32-bit version.
506 unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
507 MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
508 VTs = CurDAG->getVTList(SBBVT, MVT::i32);
509 return SDValue(
510 CurDAG->getMachineNode(Opc, dl, VTs,
511 {Zero, Zero, EFLAGS, EFLAGS.getValue(1)}),
512 0);
513 }
514
515 // Helper to detect unneeded and instructions on shift amounts. Called
516 // from PatFrags in tablegen.
517 bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
518 assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
519 const APInt &Val = N->getConstantOperandAPInt(1);
520
521 if (Val.countr_one() >= Width)
522 return true;
523
524 APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
525 return Mask.countr_one() >= Width;
526 }
527
528 /// Return an SDNode that returns the value of the global base register.
529 /// Output instructions required to initialize the global base register,
530 /// if necessary.
531 SDNode *getGlobalBaseReg();
532
533 /// Return a reference to the TargetMachine, casted to the target-specific
534 /// type.
535 const X86TargetMachine &getTargetMachine() const {
536 return static_cast<const X86TargetMachine &>(TM);
537 }
538
539 /// Return a reference to the TargetInstrInfo, casted to the target-specific
540 /// type.
541 const X86InstrInfo *getInstrInfo() const {
542 return Subtarget->getInstrInfo();
543 }
544
545 /// Return a condition code of the given SDNode
546 X86::CondCode getCondFromNode(SDNode *N) const;
547
548 /// Address-mode matching performs shift-of-and to and-of-shift
549 /// reassociation in order to expose more scaled addressing
550 /// opportunities.
551 bool ComplexPatternFuncMutatesDAG() const override {
552 return true;
553 }
554
555 bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
556
557 // Indicates we should prefer to use a non-temporal load for this load.
558 bool useNonTemporalLoad(LoadSDNode *N) const {
559 if (!N->isNonTemporal())
560 return false;
561
562 unsigned StoreSize = N->getMemoryVT().getStoreSize();
563
564 if (N->getAlign().value() < StoreSize)
565 return false;
566
567 switch (StoreSize) {
568 default: llvm_unreachable("Unsupported store size");
569 case 4:
570 case 8:
571 return false;
572 case 16:
573 return Subtarget->hasSSE41();
574 case 32:
575 return Subtarget->hasAVX2();
576 case 64:
577 return Subtarget->hasAVX512();
578 }
579 }
580
581 bool foldLoadStoreIntoMemOperand(SDNode *Node);
582 MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
583 bool matchBitExtract(SDNode *Node);
584 bool shrinkAndImmediate(SDNode *N);
585 bool isMaskZeroExtended(SDNode *N) const;
586 bool tryShiftAmountMod(SDNode *N);
587 bool tryShrinkShlLogicImm(SDNode *N);
588 bool tryVPTERNLOG(SDNode *N);
589 bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentB,
590 SDNode *ParentC, SDValue A, SDValue B, SDValue C,
591 uint8_t Imm);
592 bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
593 bool tryMatchBitSelect(SDNode *N);
594
595 MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
596 const SDLoc &dl, MVT VT, SDNode *Node);
597 MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
598 const SDLoc &dl, MVT VT, SDNode *Node,
599 SDValue &InGlue);
600
601 bool tryOptimizeRem8Extend(SDNode *N);
602
603 bool onlyUsesZeroFlag(SDValue Flags) const;
604 bool hasNoSignFlagUses(SDValue Flags) const;
605 bool hasNoCarryFlagUses(SDValue Flags) const;
606 };
607
608 class X86DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
609 public:
610 static char ID;
611 explicit X86DAGToDAGISelLegacy(X86TargetMachine &tm,
612 CodeGenOptLevel OptLevel)
613 : SelectionDAGISelLegacy(
614 ID, std::make_unique<X86DAGToDAGISel>(tm, OptLevel)) {}
615 };
616}
617
618char X86DAGToDAGISelLegacy::ID = 0;
619
620INITIALIZE_PASS(X86DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
621
622// Returns true if this masked compare can be implemented legally with this
623// type.
624static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
625 unsigned Opcode = N->getOpcode();
626 if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM ||
627 Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC ||
628 Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) {
629 // We can get 256-bit 8 element types here without VLX being enabled. When
630 // this happens we will use 512-bit operations and the mask will not be
631 // zero extended.
632 EVT OpVT = N->getOperand(0).getValueType();
633 // The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the
634 // second operand.
635 if (Opcode == X86ISD::STRICT_CMPM)
636 OpVT = N->getOperand(1).getValueType();
637 if (OpVT.is256BitVector() || OpVT.is128BitVector())
638 return Subtarget->hasVLX();
639
640 return true;
641 }
642 // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
643 if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
644 Opcode == X86ISD::FSETCCM_SAE)
645 return true;
646
647 return false;
648}
649
650// Returns true if we can assume the writer of the mask has zero extended it
651// for us.
652bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
653 // If this is an AND, check if we have a compare on either side. As long as
654 // one side guarantees the mask is zero extended, the AND will preserve those
655 // zeros.
656 if (N->getOpcode() == ISD::AND)
657 return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
658 isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
659
660 return isLegalMaskCompare(N, Subtarget);
661}
662
663bool
664X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
665 if (OptLevel == CodeGenOptLevel::None)
666 return false;
667
668 if (!N.hasOneUse())
669 return false;
670
671 if (N.getOpcode() != ISD::LOAD)
672 return true;
673
674 // Don't fold non-temporal loads if we have an instruction for them.
675 if (useNonTemporalLoad(cast<LoadSDNode>(N)))
676 return false;
677
678 // If N is a load, do additional profitability checks.
679 if (U == Root) {
680 switch (U->getOpcode()) {
681 default: break;
682 case X86ISD::ADD:
683 case X86ISD::ADC:
684 case X86ISD::SUB:
685 case X86ISD::SBB:
686 case X86ISD::AND:
687 case X86ISD::XOR:
688 case X86ISD::OR:
689 case ISD::ADD:
690 case ISD::UADDO_CARRY:
691 case ISD::AND:
692 case ISD::OR:
693 case ISD::XOR: {
694 SDValue Op1 = U->getOperand(1);
695
696 // If the other operand is a 8-bit immediate we should fold the immediate
697 // instead. This reduces code size.
698 // e.g.
699 // movl 4(%esp), %eax
700 // addl $4, %eax
701 // vs.
702 // movl $4, %eax
703 // addl 4(%esp), %eax
704 // The former is 2 bytes shorter. In case where the increment is 1, then
705 // the saving can be 4 bytes (by using incl %eax).
706 if (auto *Imm = dyn_cast<ConstantSDNode>(Op1)) {
707 if (Imm->getAPIntValue().isSignedIntN(8))
708 return false;
709
710 // If this is a 64-bit AND with an immediate that fits in 32-bits,
711 // prefer using the smaller and over folding the load. This is needed to
712 // make sure immediates created by shrinkAndImmediate are always folded.
713 // Ideally we would narrow the load during DAG combine and get the
714 // best of both worlds.
715 if (U->getOpcode() == ISD::AND &&
716 Imm->getAPIntValue().getBitWidth() == 64 &&
717 Imm->getAPIntValue().isIntN(32))
718 return false;
719
720 // If this really a zext_inreg that can be represented with a movzx
721 // instruction, prefer that.
722 // TODO: We could shrink the load and fold if it is non-volatile.
723 if (U->getOpcode() == ISD::AND &&
724 (Imm->getAPIntValue() == UINT8_MAX ||
725 Imm->getAPIntValue() == UINT16_MAX ||
726 Imm->getAPIntValue() == UINT32_MAX))
727 return false;
728
729 // ADD/SUB with can negate the immediate and use the opposite operation
730 // to fit 128 into a sign extended 8 bit immediate.
731 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
732 (-Imm->getAPIntValue()).isSignedIntN(8))
733 return false;
734
735 if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) &&
736 (-Imm->getAPIntValue()).isSignedIntN(8) &&
737 hasNoCarryFlagUses(SDValue(U, 1)))
738 return false;
739 }
740
741 // If the other operand is a TLS address, we should fold it instead.
742 // This produces
743 // movl %gs:0, %eax
744 // leal i@NTPOFF(%eax), %eax
745 // instead of
746 // movl $i@NTPOFF, %eax
747 // addl %gs:0, %eax
748 // if the block also has an access to a second TLS address this will save
749 // a load.
750 // FIXME: This is probably also true for non-TLS addresses.
751 if (Op1.getOpcode() == X86ISD::Wrapper) {
752 SDValue Val = Op1.getOperand(0);
754 return false;
755 }
756
757 // Don't fold load if this matches the BTS/BTR/BTC patterns.
758 // BTS: (or X, (shl 1, n))
759 // BTR: (and X, (rotl -2, n))
760 // BTC: (xor X, (shl 1, n))
761 if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
762 if (U->getOperand(0).getOpcode() == ISD::SHL &&
763 isOneConstant(U->getOperand(0).getOperand(0)))
764 return false;
765
766 if (U->getOperand(1).getOpcode() == ISD::SHL &&
767 isOneConstant(U->getOperand(1).getOperand(0)))
768 return false;
769 }
770 if (U->getOpcode() == ISD::AND) {
771 SDValue U0 = U->getOperand(0);
772 SDValue U1 = U->getOperand(1);
773 if (U0.getOpcode() == ISD::ROTL) {
775 if (C && C->getSExtValue() == -2)
776 return false;
777 }
778
779 if (U1.getOpcode() == ISD::ROTL) {
781 if (C && C->getSExtValue() == -2)
782 return false;
783 }
784 }
785
786 break;
787 }
788 case ISD::SHL:
789 case ISD::SRA:
790 case ISD::SRL:
791 // Don't fold a load into a shift by immediate. The BMI2 instructions
792 // support folding a load, but not an immediate. The legacy instructions
793 // support folding an immediate, but can't fold a load. Folding an
794 // immediate is preferable to folding a load.
795 if (isa<ConstantSDNode>(U->getOperand(1)))
796 return false;
797
798 break;
799 }
800 }
801
802 // Prevent folding a load if this can implemented with an insert_subreg or
803 // a move that implicitly zeroes.
804 if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
805 isNullConstant(Root->getOperand(2)) &&
806 (Root->getOperand(0).isUndef() ||
808 return false;
809
810 return true;
811}
812
813// Indicates it is profitable to form an AVX512 masked operation. Returning
814// false will favor a masked register-register masked move or vblendm and the
815// operation will be selected separately.
816bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const {
817 assert(
818 (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) &&
819 "Unexpected opcode!");
820
821 // If the operation has additional users, the operation will be duplicated.
822 // Check the use count to prevent that.
823 // FIXME: Are there cheap opcodes we might want to duplicate?
824 return N->getOperand(1).hasOneUse();
825}
826
827/// Replace the original chain operand of the call with
828/// load's chain operand and move load below the call's chain operand.
829static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
830 SDValue Call, SDValue OrigChain) {
832 SDValue Chain = OrigChain.getOperand(0);
833 if (Chain.getNode() == Load.getNode())
834 Ops.push_back(Load.getOperand(0));
835 else {
836 assert(Chain.getOpcode() == ISD::TokenFactor &&
837 "Unexpected chain operand");
838 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
839 if (Chain.getOperand(i).getNode() == Load.getNode())
840 Ops.push_back(Load.getOperand(0));
841 else
842 Ops.push_back(Chain.getOperand(i));
843 SDValue NewChain =
844 CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
845 Ops.clear();
846 Ops.push_back(NewChain);
847 }
848 Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
849 CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
850 CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
851 Load.getOperand(1), Load.getOperand(2));
852
853 Ops.clear();
854 Ops.push_back(SDValue(Load.getNode(), 1));
855 Ops.append(Call->op_begin() + 1, Call->op_end());
856 CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
857}
858
859/// Return true if call address is a load and it can be
860/// moved below CALLSEQ_START and the chains leading up to the call.
861/// Return the CALLSEQ_START by reference as a second output.
862/// In the case of a tail call, there isn't a callseq node between the call
863/// chain and the load.
864static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
865 // The transformation is somewhat dangerous if the call's chain was glued to
866 // the call. After MoveBelowOrigChain the load is moved between the call and
867 // the chain, this can create a cycle if the load is not folded. So it is
868 // *really* important that we are sure the load will be folded.
869 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
870 return false;
871 auto *LD = dyn_cast<LoadSDNode>(Callee.getNode());
872 if (!LD ||
873 !LD->isSimple() ||
874 LD->getAddressingMode() != ISD::UNINDEXED ||
875 LD->getExtensionType() != ISD::NON_EXTLOAD)
876 return false;
877
878 // Now let's find the callseq_start.
879 while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
880 if (!Chain.hasOneUse())
881 return false;
882 Chain = Chain.getOperand(0);
883 }
884
885 if (!Chain.getNumOperands())
886 return false;
887 // Since we are not checking for AA here, conservatively abort if the chain
888 // writes to memory. It's not safe to move the callee (a load) across a store.
889 if (isa<MemSDNode>(Chain.getNode()) &&
890 cast<MemSDNode>(Chain.getNode())->writeMem())
891 return false;
892 if (Chain.getOperand(0).getNode() == Callee.getNode())
893 return true;
894 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
895 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
896 Callee.getValue(1).hasOneUse())
897 return true;
898 return false;
899}
900
901static bool isEndbrImm64(uint64_t Imm) {
902// There may be some other prefix bytes between 0xF3 and 0x0F1EFA.
903// i.g: 0xF3660F1EFA, 0xF3670F1EFA
904 if ((Imm & 0x00FFFFFF) != 0x0F1EFA)
905 return false;
906
907 uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64,
908 0x65, 0x66, 0x67, 0xf0, 0xf2};
909 int i = 24; // 24bit 0x0F1EFA has matched
910 while (i < 64) {
911 uint8_t Byte = (Imm >> i) & 0xFF;
912 if (Byte == 0xF3)
913 return true;
914 if (!llvm::is_contained(OptionalPrefixBytes, Byte))
915 return false;
916 i += 8;
917 }
918
919 return false;
920}
921
922static bool needBWI(MVT VT) {
923 return (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v64i8);
924}
925
926void X86DAGToDAGISel::PreprocessISelDAG() {
927 bool MadeChange = false;
928 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
929 E = CurDAG->allnodes_end(); I != E; ) {
930 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
931
932 // This is for CET enhancement.
933 //
934 // ENDBR32 and ENDBR64 have specific opcodes:
935 // ENDBR32: F3 0F 1E FB
936 // ENDBR64: F3 0F 1E FA
937 // And we want that attackers won’t find unintended ENDBR32/64
938 // opcode matches in the binary
939 // Here’s an example:
940 // If the compiler had to generate asm for the following code:
941 // a = 0xF30F1EFA
942 // it could, for example, generate:
943 // mov 0xF30F1EFA, dword ptr[a]
944 // In such a case, the binary would include a gadget that starts
945 // with a fake ENDBR64 opcode. Therefore, we split such generation
946 // into multiple operations, let it not shows in the binary
947 if (N->getOpcode() == ISD::Constant) {
948 MVT VT = N->getSimpleValueType(0);
949 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
950 int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB;
951 if (Imm == EndbrImm || isEndbrImm64(Imm)) {
952 // Check that the cf-protection-branch is enabled.
953 Metadata *CFProtectionBranch =
955 "cf-protection-branch");
956 if (CFProtectionBranch || IndirectBranchTracking) {
957 SDLoc dl(N);
958 SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true);
959 Complement = CurDAG->getNOT(dl, Complement, VT);
960 --I;
961 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Complement);
962 ++I;
963 MadeChange = true;
964 continue;
965 }
966 }
967 }
968
969 // If this is a target specific AND node with no flag usages, turn it back
970 // into ISD::AND to enable test instruction matching.
971 if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
972 SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
973 N->getOperand(0), N->getOperand(1));
974 --I;
975 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
976 ++I;
977 MadeChange = true;
978 continue;
979 }
980
981 // Convert vector increment or decrement to sub/add with an all-ones
982 // constant:
983 // add X, <1, 1...> --> sub X, <-1, -1...>
984 // sub X, <1, 1...> --> add X, <-1, -1...>
985 // The all-ones vector constant can be materialized using a pcmpeq
986 // instruction that is commonly recognized as an idiom (has no register
987 // dependency), so that's better/smaller than loading a splat 1 constant.
988 //
989 // But don't do this if it would inhibit a potentially profitable load
990 // folding opportunity for the other operand. That only occurs with the
991 // intersection of:
992 // (1) The other operand (op0) is load foldable.
993 // (2) The op is an add (otherwise, we are *creating* an add and can still
994 // load fold the other op).
995 // (3) The target has AVX (otherwise, we have a destructive add and can't
996 // load fold the other op without killing the constant op).
997 // (4) The constant 1 vector has multiple uses (so it is profitable to load
998 // into a register anyway).
999 auto mayPreventLoadFold = [&]() {
1000 return X86::mayFoldLoad(N->getOperand(0), *Subtarget) &&
1001 N->getOpcode() == ISD::ADD && Subtarget->hasAVX() &&
1002 !N->getOperand(1).hasOneUse();
1003 };
1004 if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1005 N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
1006 APInt SplatVal;
1008 peekThroughBitcasts(N->getOperand(0)).getNode()) &&
1009 X86::isConstantSplat(N->getOperand(1), SplatVal) &&
1010 SplatVal.isOne()) {
1011 SDLoc DL(N);
1012
1013 MVT VT = N->getSimpleValueType(0);
1014 unsigned NumElts = VT.getSizeInBits() / 32;
1016 CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts));
1017 AllOnes = CurDAG->getBitcast(VT, AllOnes);
1018
1019 unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
1020 SDValue Res =
1021 CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes);
1022 --I;
1023 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1024 ++I;
1025 MadeChange = true;
1026 continue;
1027 }
1028 }
1029
1030 switch (N->getOpcode()) {
1031 case X86ISD::VBROADCAST: {
1032 MVT VT = N->getSimpleValueType(0);
1033 // Emulate v32i16/v64i8 broadcast without BWI.
1034 if (!Subtarget->hasBWI() && needBWI(VT)) {
1035 MVT NarrowVT = VT.getHalfNumVectorElementsVT();
1036 SDLoc dl(N);
1037 SDValue NarrowBCast =
1038 CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0));
1039 SDValue Res =
1040 CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
1041 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1042 unsigned Index = NarrowVT.getVectorMinNumElements();
1043 Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
1044 CurDAG->getIntPtrConstant(Index, dl));
1045
1046 --I;
1047 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1048 ++I;
1049 MadeChange = true;
1050 continue;
1051 }
1052
1053 break;
1054 }
1056 MVT VT = N->getSimpleValueType(0);
1057 // Emulate v32i16/v64i8 broadcast without BWI.
1058 if (!Subtarget->hasBWI() && needBWI(VT)) {
1059 MVT NarrowVT = VT.getHalfNumVectorElementsVT();
1060 auto *MemNode = cast<MemSDNode>(N);
1061 SDLoc dl(N);
1062 SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other);
1063 SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()};
1064 SDValue NarrowBCast = CurDAG->getMemIntrinsicNode(
1065 X86ISD::VBROADCAST_LOAD, dl, VTs, Ops, MemNode->getMemoryVT(),
1066 MemNode->getMemOperand());
1067 SDValue Res =
1068 CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
1069 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1070 unsigned Index = NarrowVT.getVectorMinNumElements();
1071 Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
1072 CurDAG->getIntPtrConstant(Index, dl));
1073
1074 --I;
1075 SDValue To[] = {Res, NarrowBCast.getValue(1)};
1076 CurDAG->ReplaceAllUsesWith(N, To);
1077 ++I;
1078 MadeChange = true;
1079 continue;
1080 }
1081
1082 break;
1083 }
1084 case ISD::LOAD: {
1085 // If this is a XMM/YMM load of the same lower bits as another YMM/ZMM
1086 // load, then just extract the lower subvector and avoid the second load.
1087 auto *Ld = cast<LoadSDNode>(N);
1088 MVT VT = N->getSimpleValueType(0);
1089 if (!ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
1090 !(VT.is128BitVector() || VT.is256BitVector()))
1091 break;
1092
1093 MVT MaxVT = VT;
1094 SDNode *MaxLd = nullptr;
1095 SDValue Ptr = Ld->getBasePtr();
1096 SDValue Chain = Ld->getChain();
1097 for (SDNode *User : Ptr->users()) {
1098 auto *UserLd = dyn_cast<LoadSDNode>(User);
1099 MVT UserVT = User->getSimpleValueType(0);
1100 if (User != N && UserLd && ISD::isNormalLoad(User) &&
1101 UserLd->getBasePtr() == Ptr && UserLd->getChain() == Chain &&
1102 !User->hasAnyUseOfValue(1) &&
1103 (UserVT.is256BitVector() || UserVT.is512BitVector()) &&
1104 UserVT.getSizeInBits() > VT.getSizeInBits() &&
1105 (!MaxLd || UserVT.getSizeInBits() > MaxVT.getSizeInBits())) {
1106 MaxLd = User;
1107 MaxVT = UserVT;
1108 }
1109 }
1110 if (MaxLd) {
1111 SDLoc dl(N);
1112 unsigned NumSubElts = VT.getSizeInBits() / MaxVT.getScalarSizeInBits();
1113 MVT SubVT = MVT::getVectorVT(MaxVT.getScalarType(), NumSubElts);
1114 SDValue Extract = CurDAG->getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT,
1115 SDValue(MaxLd, 0),
1116 CurDAG->getIntPtrConstant(0, dl));
1117 SDValue Res = CurDAG->getBitcast(VT, Extract);
1118
1119 --I;
1120 SDValue To[] = {Res, SDValue(MaxLd, 1)};
1121 CurDAG->ReplaceAllUsesWith(N, To);
1122 ++I;
1123 MadeChange = true;
1124 continue;
1125 }
1126 break;
1127 }
1128 case ISD::VSELECT: {
1129 // Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG.
1130 EVT EleVT = N->getOperand(0).getValueType().getVectorElementType();
1131 if (EleVT == MVT::i1)
1132 break;
1133
1134 assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
1135 assert(N->getValueType(0).getVectorElementType() != MVT::i16 &&
1136 "We can't replace VSELECT with BLENDV in vXi16!");
1137 SDValue R;
1138 if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(N->getOperand(0)) ==
1139 EleVT.getSizeInBits()) {
1140 R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0),
1141 N->getOperand(0), N->getOperand(1), N->getOperand(2),
1142 CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8));
1143 } else {
1144 R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
1145 N->getOperand(0), N->getOperand(1),
1146 N->getOperand(2));
1147 }
1148 --I;
1149 CurDAG->ReplaceAllUsesWith(N, R.getNode());
1150 ++I;
1151 MadeChange = true;
1152 continue;
1153 }
1154 case ISD::FP_ROUND:
1156 case ISD::FP_TO_SINT:
1157 case ISD::FP_TO_UINT:
1160 // Replace vector fp_to_s/uint with their X86 specific equivalent so we
1161 // don't need 2 sets of patterns.
1162 if (!N->getSimpleValueType(0).isVector())
1163 break;
1164
1165 unsigned NewOpc;
1166 switch (N->getOpcode()) {
1167 default: llvm_unreachable("Unexpected opcode!");
1168 case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break;
1169 case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break;
1170 case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break;
1171 case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
1172 case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break;
1173 case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
1174 }
1175 SDValue Res;
1176 if (N->isStrictFPOpcode())
1177 Res =
1178 CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other},
1179 {N->getOperand(0), N->getOperand(1)});
1180 else
1181 Res =
1182 CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1183 N->getOperand(0));
1184 --I;
1185 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1186 ++I;
1187 MadeChange = true;
1188 continue;
1189 }
1190 case ISD::SHL:
1191 case ISD::SRA:
1192 case ISD::SRL: {
1193 // Replace vector shifts with their X86 specific equivalent so we don't
1194 // need 2 sets of patterns.
1195 if (!N->getValueType(0).isVector())
1196 break;
1197
1198 unsigned NewOpc;
1199 switch (N->getOpcode()) {
1200 default: llvm_unreachable("Unexpected opcode!");
1201 case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
1202 case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
1203 case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
1204 }
1205 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1206 N->getOperand(0), N->getOperand(1));
1207 --I;
1208 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1209 ++I;
1210 MadeChange = true;
1211 continue;
1212 }
1213 case ISD::ANY_EXTEND:
1215 // Replace vector any extend with the zero extend equivalents so we don't
1216 // need 2 sets of patterns. Ignore vXi1 extensions.
1217 if (!N->getValueType(0).isVector())
1218 break;
1219
1220 unsigned NewOpc;
1221 if (N->getOperand(0).getScalarValueSizeInBits() == 1) {
1222 assert(N->getOpcode() == ISD::ANY_EXTEND &&
1223 "Unexpected opcode for mask vector!");
1224 NewOpc = ISD::SIGN_EXTEND;
1225 } else {
1226 NewOpc = N->getOpcode() == ISD::ANY_EXTEND
1229 }
1230
1231 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1232 N->getOperand(0));
1233 --I;
1234 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1235 ++I;
1236 MadeChange = true;
1237 continue;
1238 }
1239 case ISD::FCEIL:
1240 case ISD::STRICT_FCEIL:
1241 case ISD::FFLOOR:
1242 case ISD::STRICT_FFLOOR:
1243 case ISD::FTRUNC:
1244 case ISD::STRICT_FTRUNC:
1245 case ISD::FROUNDEVEN:
1247 case ISD::FNEARBYINT:
1249 case ISD::FRINT:
1250 case ISD::STRICT_FRINT: {
1251 // Replace fp rounding with their X86 specific equivalent so we don't
1252 // need 2 sets of patterns.
1253 unsigned Imm;
1254 switch (N->getOpcode()) {
1255 default: llvm_unreachable("Unexpected opcode!");
1256 case ISD::STRICT_FCEIL:
1257 case ISD::FCEIL: Imm = 0xA; break;
1258 case ISD::STRICT_FFLOOR:
1259 case ISD::FFLOOR: Imm = 0x9; break;
1260 case ISD::STRICT_FTRUNC:
1261 case ISD::FTRUNC: Imm = 0xB; break;
1263 case ISD::FROUNDEVEN: Imm = 0x8; break;
1265 case ISD::FNEARBYINT: Imm = 0xC; break;
1266 case ISD::STRICT_FRINT:
1267 case ISD::FRINT: Imm = 0x4; break;
1268 }
1269 SDLoc dl(N);
1270 bool IsStrict = N->isStrictFPOpcode();
1271 SDValue Res;
1272 if (IsStrict)
1273 Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
1274 {N->getValueType(0), MVT::Other},
1275 {N->getOperand(0), N->getOperand(1),
1276 CurDAG->getTargetConstant(Imm, dl, MVT::i32)});
1277 else
1278 Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
1279 N->getOperand(0),
1280 CurDAG->getTargetConstant(Imm, dl, MVT::i32));
1281 --I;
1282 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1283 ++I;
1284 MadeChange = true;
1285 continue;
1286 }
1287 case X86ISD::FANDN:
1288 case X86ISD::FAND:
1289 case X86ISD::FOR:
1290 case X86ISD::FXOR: {
1291 // Widen scalar fp logic ops to vector to reduce isel patterns.
1292 // FIXME: Can we do this during lowering/combine.
1293 MVT VT = N->getSimpleValueType(0);
1294 if (VT.isVector() || VT == MVT::f128)
1295 break;
1296
1297 MVT VecVT = VT == MVT::f64 ? MVT::v2f64
1298 : VT == MVT::f32 ? MVT::v4f32
1299 : MVT::v8f16;
1300
1301 SDLoc dl(N);
1302 SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
1303 N->getOperand(0));
1304 SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
1305 N->getOperand(1));
1306
1307 SDValue Res;
1308 if (Subtarget->hasSSE2()) {
1309 EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
1310 Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
1311 Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
1312 unsigned Opc;
1313 switch (N->getOpcode()) {
1314 default: llvm_unreachable("Unexpected opcode!");
1315 case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
1316 case X86ISD::FAND: Opc = ISD::AND; break;
1317 case X86ISD::FOR: Opc = ISD::OR; break;
1318 case X86ISD::FXOR: Opc = ISD::XOR; break;
1319 }
1320 Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
1321 Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
1322 } else {
1323 Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
1324 }
1325 Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
1326 CurDAG->getIntPtrConstant(0, dl));
1327 --I;
1328 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1329 ++I;
1330 MadeChange = true;
1331 continue;
1332 }
1333 }
1334
1335 if (OptLevel != CodeGenOptLevel::None &&
1336 // Only do this when the target can fold the load into the call or
1337 // jmp.
1338 !Subtarget->useIndirectThunkCalls() &&
1339 ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
1340 (N->getOpcode() == X86ISD::TC_RETURN &&
1341 (Subtarget->is64Bit() ||
1342 !getTargetMachine().isPositionIndependent())))) {
1343 /// Also try moving call address load from outside callseq_start to just
1344 /// before the call to allow it to be folded.
1345 ///
1346 /// [Load chain]
1347 /// ^
1348 /// |
1349 /// [Load]
1350 /// ^ ^
1351 /// | |
1352 /// / \--
1353 /// / |
1354 ///[CALLSEQ_START] |
1355 /// ^ |
1356 /// | |
1357 /// [LOAD/C2Reg] |
1358 /// | |
1359 /// \ /
1360 /// \ /
1361 /// [CALL]
1362 bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
1363 SDValue Chain = N->getOperand(0);
1364 SDValue Load = N->getOperand(1);
1365 if (!isCalleeLoad(Load, Chain, HasCallSeq))
1366 continue;
1367 moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
1368 ++NumLoadMoved;
1369 MadeChange = true;
1370 continue;
1371 }
1372
1373 // Lower fpround and fpextend nodes that target the FP stack to be store and
1374 // load to the stack. This is a gross hack. We would like to simply mark
1375 // these as being illegal, but when we do that, legalize produces these when
1376 // it expands calls, then expands these in the same legalize pass. We would
1377 // like dag combine to be able to hack on these between the call expansion
1378 // and the node legalization. As such this pass basically does "really
1379 // late" legalization of these inline with the X86 isel pass.
1380 // FIXME: This should only happen when not compiled with -O0.
1381 switch (N->getOpcode()) {
1382 default: continue;
1383 case ISD::FP_ROUND:
1384 case ISD::FP_EXTEND:
1385 {
1386 MVT SrcVT = N->getOperand(0).getSimpleValueType();
1387 MVT DstVT = N->getSimpleValueType(0);
1388
1389 // If any of the sources are vectors, no fp stack involved.
1390 if (SrcVT.isVector() || DstVT.isVector())
1391 continue;
1392
1393 // If the source and destination are SSE registers, then this is a legal
1394 // conversion that should not be lowered.
1395 const X86TargetLowering *X86Lowering =
1396 static_cast<const X86TargetLowering *>(TLI);
1397 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1398 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1399 if (SrcIsSSE && DstIsSSE)
1400 continue;
1401
1402 if (!SrcIsSSE && !DstIsSSE) {
1403 // If this is an FPStack extension, it is a noop.
1404 if (N->getOpcode() == ISD::FP_EXTEND)
1405 continue;
1406 // If this is a value-preserving FPStack truncation, it is a noop.
1407 if (N->getConstantOperandVal(1))
1408 continue;
1409 }
1410
1411 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1412 // FPStack has extload and truncstore. SSE can fold direct loads into other
1413 // operations. Based on this, decide what we want to do.
1414 MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT;
1415 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1416 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1417 MachinePointerInfo MPI =
1418 MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
1419 SDLoc dl(N);
1420
1421 // FIXME: optimize the case where the src/dest is a load or store?
1422
1423 SDValue Store = CurDAG->getTruncStore(
1424 CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT);
1425 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store,
1426 MemTmp, MPI, MemVT);
1427
1428 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1429 // extload we created. This will cause general havok on the dag because
1430 // anything below the conversion could be folded into other existing nodes.
1431 // To avoid invalidating 'I', back it up to the convert node.
1432 --I;
1433 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1434 break;
1435 }
1436
1437 //The sequence of events for lowering STRICT_FP versions of these nodes requires
1438 //dealing with the chain differently, as there is already a preexisting chain.
1441 {
1442 MVT SrcVT = N->getOperand(1).getSimpleValueType();
1443 MVT DstVT = N->getSimpleValueType(0);
1444
1445 // If any of the sources are vectors, no fp stack involved.
1446 if (SrcVT.isVector() || DstVT.isVector())
1447 continue;
1448
1449 // If the source and destination are SSE registers, then this is a legal
1450 // conversion that should not be lowered.
1451 const X86TargetLowering *X86Lowering =
1452 static_cast<const X86TargetLowering *>(TLI);
1453 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1454 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1455 if (SrcIsSSE && DstIsSSE)
1456 continue;
1457
1458 if (!SrcIsSSE && !DstIsSSE) {
1459 // If this is an FPStack extension, it is a noop.
1460 if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
1461 continue;
1462 // If this is a value-preserving FPStack truncation, it is a noop.
1463 if (N->getConstantOperandVal(2))
1464 continue;
1465 }
1466
1467 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1468 // FPStack has extload and truncstore. SSE can fold direct loads into other
1469 // operations. Based on this, decide what we want to do.
1470 MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT;
1471 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1472 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1473 MachinePointerInfo MPI =
1474 MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
1475 SDLoc dl(N);
1476
1477 // FIXME: optimize the case where the src/dest is a load or store?
1478
1479 //Since the operation is StrictFP, use the preexisting chain.
1481 if (!SrcIsSSE) {
1482 SDVTList VTs = CurDAG->getVTList(MVT::Other);
1483 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp};
1484 Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT,
1485 MPI, /*Align*/ std::nullopt,
1487 if (N->getFlags().hasNoFPExcept()) {
1488 SDNodeFlags Flags = Store->getFlags();
1489 Flags.setNoFPExcept(true);
1490 Store->setFlags(Flags);
1491 }
1492 } else {
1493 assert(SrcVT == MemVT && "Unexpected VT!");
1494 Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp,
1495 MPI);
1496 }
1497
1498 if (!DstIsSSE) {
1499 SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other);
1500 SDValue Ops[] = {Store, MemTmp};
1501 Result = CurDAG->getMemIntrinsicNode(
1502 X86ISD::FLD, dl, VTs, Ops, MemVT, MPI,
1503 /*Align*/ std::nullopt, MachineMemOperand::MOLoad);
1504 if (N->getFlags().hasNoFPExcept()) {
1505 SDNodeFlags Flags = Result->getFlags();
1506 Flags.setNoFPExcept(true);
1507 Result->setFlags(Flags);
1508 }
1509 } else {
1510 assert(DstVT == MemVT && "Unexpected VT!");
1511 Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI);
1512 }
1513
1514 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1515 // extload we created. This will cause general havok on the dag because
1516 // anything below the conversion could be folded into other existing nodes.
1517 // To avoid invalidating 'I', back it up to the convert node.
1518 --I;
1519 CurDAG->ReplaceAllUsesWith(N, Result.getNode());
1520 break;
1521 }
1522 }
1523
1524
1525 // Now that we did that, the node is dead. Increment the iterator to the
1526 // next node to process, then delete N.
1527 ++I;
1528 MadeChange = true;
1529 }
1530
1531 // Remove any dead nodes that may have been left behind.
1532 if (MadeChange)
1533 CurDAG->RemoveDeadNodes();
1534}
1535
1536// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
1537bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
1538 unsigned Opc = N->getMachineOpcode();
1539 if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1540 Opc != X86::MOVSX64rr8)
1541 return false;
1542
1543 SDValue N0 = N->getOperand(0);
1544
1545 // We need to be extracting the lower bit of an extend.
1546 if (!N0.isMachineOpcode() ||
1547 N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
1548 N0.getConstantOperandVal(1) != X86::sub_8bit)
1549 return false;
1550
1551 // We're looking for either a movsx or movzx to match the original opcode.
1552 unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1553 : X86::MOVSX32rr8_NOREX;
1554 SDValue N00 = N0.getOperand(0);
1555 if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
1556 return false;
1557
1558 if (Opc == X86::MOVSX64rr8) {
1559 // If we had a sign extend from 8 to 64 bits. We still need to go from 32
1560 // to 64.
1561 MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
1562 MVT::i64, N00);
1563 ReplaceUses(N, Extend);
1564 } else {
1565 // Ok we can drop this extend and just use the original extend.
1566 ReplaceUses(N, N00.getNode());
1567 }
1568
1569 return true;
1570}
1571
1572void X86DAGToDAGISel::PostprocessISelDAG() {
1573 // Skip peepholes at -O0.
1574 if (TM.getOptLevel() == CodeGenOptLevel::None)
1575 return;
1576
1577 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
1578
1579 bool MadeChange = false;
1580 while (Position != CurDAG->allnodes_begin()) {
1581 SDNode *N = &*--Position;
1582 // Skip dead nodes and any non-machine opcodes.
1583 if (N->use_empty() || !N->isMachineOpcode())
1584 continue;
1585
1586 if (tryOptimizeRem8Extend(N)) {
1587 MadeChange = true;
1588 continue;
1589 }
1590
1591 unsigned Opc = N->getMachineOpcode();
1592 switch (Opc) {
1593 default:
1594 continue;
1595 // ANDrr/rm + TESTrr+ -> TESTrr/TESTmr
1596 case X86::TEST8rr:
1597 case X86::TEST16rr:
1598 case X86::TEST32rr:
1599 case X86::TEST64rr:
1600 // ANDrr/rm + CTESTrr -> CTESTrr/CTESTmr
1601 case X86::CTEST8rr:
1602 case X86::CTEST16rr:
1603 case X86::CTEST32rr:
1604 case X86::CTEST64rr: {
1605 auto &Op0 = N->getOperand(0);
1606 if (Op0 != N->getOperand(1) || !Op0->hasNUsesOfValue(2, Op0.getResNo()) ||
1607 !Op0.isMachineOpcode())
1608 continue;
1609 SDValue And = N->getOperand(0);
1610#define CASE_ND(OP) \
1611 case X86::OP: \
1612 case X86::OP##_ND:
1613 switch (And.getMachineOpcode()) {
1614 default:
1615 continue;
1616 CASE_ND(AND8rr)
1617 CASE_ND(AND16rr)
1618 CASE_ND(AND32rr)
1619 CASE_ND(AND64rr) {
1620 if (And->hasAnyUseOfValue(1))
1621 continue;
1622 SmallVector<SDValue> Ops(N->op_values());
1623 Ops[0] = And.getOperand(0);
1624 Ops[1] = And.getOperand(1);
1625 MachineSDNode *Test =
1626 CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i32, Ops);
1627 ReplaceUses(N, Test);
1628 MadeChange = true;
1629 continue;
1630 }
1631 CASE_ND(AND8rm)
1632 CASE_ND(AND16rm)
1633 CASE_ND(AND32rm)
1634 CASE_ND(AND64rm) {
1635 if (And->hasAnyUseOfValue(1))
1636 continue;
1637 unsigned NewOpc;
1638 bool IsCTESTCC = X86::isCTESTCC(Opc);
1639#define FROM_TO(A, B) \
1640 CASE_ND(A) NewOpc = IsCTESTCC ? X86::C##B : X86::B; \
1641 break;
1642 switch (And.getMachineOpcode()) {
1643 FROM_TO(AND8rm, TEST8mr);
1644 FROM_TO(AND16rm, TEST16mr);
1645 FROM_TO(AND32rm, TEST32mr);
1646 FROM_TO(AND64rm, TEST64mr);
1647 }
1648#undef FROM_TO
1649#undef CASE_ND
1650 // Need to swap the memory and register operand.
1651 SmallVector<SDValue> Ops = {And.getOperand(1), And.getOperand(2),
1652 And.getOperand(3), And.getOperand(4),
1653 And.getOperand(5), And.getOperand(0)};
1654 // CC, Cflags.
1655 if (IsCTESTCC) {
1656 Ops.push_back(N->getOperand(2));
1657 Ops.push_back(N->getOperand(3));
1658 }
1659 // Chain of memory load
1660 Ops.push_back(And.getOperand(6));
1661 // Glue
1662 if (IsCTESTCC)
1663 Ops.push_back(N->getOperand(4));
1664
1665 MachineSDNode *Test = CurDAG->getMachineNode(
1666 NewOpc, SDLoc(N), MVT::i32, MVT::Other, Ops);
1667 CurDAG->setNodeMemRefs(
1668 Test, cast<MachineSDNode>(And.getNode())->memoperands());
1669 ReplaceUses(And.getValue(2), SDValue(Test, 1));
1670 ReplaceUses(SDValue(N, 0), SDValue(Test, 0));
1671 MadeChange = true;
1672 continue;
1673 }
1674 }
1675 }
1676 // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
1677 // used. We're doing this late so we can prefer to fold the AND into masked
1678 // comparisons. Doing that can be better for the live range of the mask
1679 // register.
1680 case X86::KORTESTBkk:
1681 case X86::KORTESTWkk:
1682 case X86::KORTESTDkk:
1683 case X86::KORTESTQkk: {
1684 SDValue Op0 = N->getOperand(0);
1685 if (Op0 != N->getOperand(1) || !N->isOnlyUserOf(Op0.getNode()) ||
1686 !Op0.isMachineOpcode() || !onlyUsesZeroFlag(SDValue(N, 0)))
1687 continue;
1688#define CASE(A) \
1689 case X86::A: \
1690 break;
1691 switch (Op0.getMachineOpcode()) {
1692 default:
1693 continue;
1694 CASE(KANDBkk)
1695 CASE(KANDWkk)
1696 CASE(KANDDkk)
1697 CASE(KANDQkk)
1698 }
1699 unsigned NewOpc;
1700#define FROM_TO(A, B) \
1701 case X86::A: \
1702 NewOpc = X86::B; \
1703 break;
1704 switch (Opc) {
1705 FROM_TO(KORTESTBkk, KTESTBkk)
1706 FROM_TO(KORTESTWkk, KTESTWkk)
1707 FROM_TO(KORTESTDkk, KTESTDkk)
1708 FROM_TO(KORTESTQkk, KTESTQkk)
1709 }
1710 // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1711 // KAND instructions and KTEST use the same ISA feature.
1712 if (NewOpc == X86::KTESTWkk && !Subtarget->hasDQI())
1713 continue;
1714#undef FROM_TO
1715 MachineSDNode *KTest = CurDAG->getMachineNode(
1716 NewOpc, SDLoc(N), MVT::i32, Op0.getOperand(0), Op0.getOperand(1));
1717 ReplaceUses(N, KTest);
1718 MadeChange = true;
1719 continue;
1720 }
1721 // Attempt to remove vectors moves that were inserted to zero upper bits.
1722 case TargetOpcode::SUBREG_TO_REG: {
1723 unsigned SubRegIdx = N->getConstantOperandVal(2);
1724 if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1725 continue;
1726
1727 SDValue Move = N->getOperand(1);
1728 if (!Move.isMachineOpcode())
1729 continue;
1730
1731 // Make sure its one of the move opcodes we recognize.
1732 switch (Move.getMachineOpcode()) {
1733 default:
1734 continue;
1735 CASE(VMOVAPDrr) CASE(VMOVUPDrr)
1736 CASE(VMOVAPSrr) CASE(VMOVUPSrr)
1737 CASE(VMOVDQArr) CASE(VMOVDQUrr)
1738 CASE(VMOVAPDYrr) CASE(VMOVUPDYrr)
1739 CASE(VMOVAPSYrr) CASE(VMOVUPSYrr)
1740 CASE(VMOVDQAYrr) CASE(VMOVDQUYrr)
1741 CASE(VMOVAPDZ128rr) CASE(VMOVUPDZ128rr)
1742 CASE(VMOVAPSZ128rr) CASE(VMOVUPSZ128rr)
1743 CASE(VMOVDQA32Z128rr) CASE(VMOVDQU32Z128rr)
1744 CASE(VMOVDQA64Z128rr) CASE(VMOVDQU64Z128rr)
1745 CASE(VMOVAPDZ256rr) CASE(VMOVUPDZ256rr)
1746 CASE(VMOVAPSZ256rr) CASE(VMOVUPSZ256rr)
1747 CASE(VMOVDQA32Z256rr) CASE(VMOVDQU32Z256rr)
1748 CASE(VMOVDQA64Z256rr) CASE(VMOVDQU64Z256rr)
1749 }
1750#undef CASE
1751
1752 SDValue In = Move.getOperand(0);
1753 if (!In.isMachineOpcode() ||
1754 In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1755 continue;
1756
1757 // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1758 // the SHA instructions which use a legacy encoding.
1759 uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1760 if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1761 (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1762 (TSFlags & X86II::EncodingMask) != X86II::XOP)
1763 continue;
1764
1765 // Producing instruction is another vector instruction. We can drop the
1766 // move.
1767 CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1768 MadeChange = true;
1769 }
1770 }
1771 }
1772
1773 if (MadeChange)
1774 CurDAG->RemoveDeadNodes();
1775}
1776
1777
1778/// Emit any code that needs to be executed only in the main function.
1779void X86DAGToDAGISel::emitSpecialCodeForMain() {
1780 if (Subtarget->isTargetCygMing()) {
1781 TargetLowering::ArgListTy Args;
1782 auto &DL = CurDAG->getDataLayout();
1783
1784 TargetLowering::CallLoweringInfo CLI(*CurDAG);
1785 CLI.setChain(CurDAG->getRoot())
1786 .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1787 CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1788 std::move(Args));
1789 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1790 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1791 CurDAG->setRoot(Result.second);
1792 }
1793}
1794
1795void X86DAGToDAGISel::emitFunctionEntryCode() {
1796 // If this is main, emit special code for main.
1797 const Function &F = MF->getFunction();
1798 if (F.hasExternalLinkage() && F.getName() == "main")
1799 emitSpecialCodeForMain();
1800}
1801
1802static bool isDispSafeForFrameIndexOrRegBase(int64_t Val) {
1803 // We can run into an issue where a frame index or a register base
1804 // includes a displacement that, when added to the explicit displacement,
1805 // will overflow the displacement field. Assuming that the
1806 // displacement fits into a 31-bit integer (which is only slightly more
1807 // aggressive than the current fundamental assumption that it fits into
1808 // a 32-bit integer), a 31-bit disp should always be safe.
1809 return isInt<31>(Val);
1810}
1811
1812bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1813 X86ISelAddressMode &AM) {
1814 // We may have already matched a displacement and the caller just added the
1815 // symbolic displacement. So we still need to do the checks even if Offset
1816 // is zero.
1817
1818 int64_t Val = AM.Disp + Offset;
1819
1820 // Cannot combine ExternalSymbol displacements with integer offsets.
1821 if (Val != 0 && (AM.ES || AM.MCSym))
1822 return true;
1823
1824 CodeModel::Model M = TM.getCodeModel();
1825 if (Subtarget->is64Bit()) {
1826 if (Val != 0 &&
1828 AM.hasSymbolicDisplacement()))
1829 return true;
1830 // In addition to the checks required for a register base, check that
1831 // we do not try to use an unsafe Disp with a frame index.
1832 if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1834 return true;
1835 // In ILP32 (x32) mode, pointers are 32 bits and need to be zero-extended to
1836 // 64 bits. Instructions with 32-bit register addresses perform this zero
1837 // extension for us and we can safely ignore the high bits of Offset.
1838 // Instructions with only a 32-bit immediate address do not, though: they
1839 // sign extend instead. This means only address the low 2GB of address space
1840 // is directly addressable, we need indirect addressing for the high 2GB of
1841 // address space.
1842 // TODO: Some of the earlier checks may be relaxed for ILP32 mode as the
1843 // implicit zero extension of instructions would cover up any problem.
1844 // However, we have asserts elsewhere that get triggered if we do, so keep
1845 // the checks for now.
1846 // TODO: We would actually be able to accept these, as well as the same
1847 // addresses in LP64 mode, by adding the EIZ pseudo-register as an operand
1848 // to get an address size override to be emitted. However, this
1849 // pseudo-register is not part of any register class and therefore causes
1850 // MIR verification to fail.
1851 if (Subtarget->isTarget64BitILP32() &&
1852 !isDispSafeForFrameIndexOrRegBase((uint32_t)Val) &&
1853 !AM.hasBaseOrIndexReg())
1854 return true;
1855 } else if (AM.hasBaseOrIndexReg() && !isDispSafeForFrameIndexOrRegBase(Val))
1856 // For 32-bit X86, make sure the displacement still isn't close to the
1857 // expressible limit.
1858 return true;
1859 AM.Disp = Val;
1860 return false;
1861}
1862
1863bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
1864 bool AllowSegmentRegForX32) {
1865 SDValue Address = N->getOperand(1);
1866
1867 // load gs:0 -> GS segment register.
1868 // load fs:0 -> FS segment register.
1869 //
1870 // This optimization is generally valid because the GNU TLS model defines that
1871 // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode
1872 // with 32-bit registers, as we get in ILP32 mode, those registers are first
1873 // zero-extended to 64 bits and then added it to the base address, which gives
1874 // unwanted results when the register holds a negative value.
1875 // For more information see http://people.redhat.com/drepper/tls.pdf
1876 if (isNullConstant(Address) && AM.Segment.getNode() == nullptr &&
1877 !IndirectTlsSegRefs &&
1878 (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1879 Subtarget->isTargetFuchsia())) {
1880 if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
1881 return true;
1882 switch (N->getPointerInfo().getAddrSpace()) {
1883 case X86AS::GS:
1884 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1885 return false;
1886 case X86AS::FS:
1887 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1888 return false;
1889 // Address space X86AS::SS is not handled here, because it is not used to
1890 // address TLS areas.
1891 }
1892 }
1893
1894 return true;
1895}
1896
1897/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1898/// mode. These wrap things that will resolve down into a symbol reference.
1899/// If no match is possible, this returns true, otherwise it returns false.
1900bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1901 // If the addressing mode already has a symbol as the displacement, we can
1902 // never match another symbol.
1903 if (AM.hasSymbolicDisplacement())
1904 return true;
1905
1906 bool IsRIPRelTLS = false;
1907 bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1908 if (IsRIPRel) {
1909 SDValue Val = N.getOperand(0);
1911 IsRIPRelTLS = true;
1912 }
1913
1914 // We can't use an addressing mode in the 64-bit large code model.
1915 // Global TLS addressing is an exception. In the medium code model,
1916 // we use can use a mode when RIP wrappers are present.
1917 // That signifies access to globals that are known to be "near",
1918 // such as the GOT itself.
1919 CodeModel::Model M = TM.getCodeModel();
1920 if (Subtarget->is64Bit() && M == CodeModel::Large && !IsRIPRelTLS)
1921 return true;
1922
1923 // Base and index reg must be 0 in order to use %rip as base.
1924 if (IsRIPRel && AM.hasBaseOrIndexReg())
1925 return true;
1926
1927 // Make a local copy in case we can't do this fold.
1928 X86ISelAddressMode Backup = AM;
1929
1930 int64_t Offset = 0;
1931 SDValue N0 = N.getOperand(0);
1932 if (auto *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1933 AM.GV = G->getGlobal();
1934 AM.SymbolFlags = G->getTargetFlags();
1935 Offset = G->getOffset();
1936 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1937 AM.CP = CP->getConstVal();
1938 AM.Alignment = CP->getAlign();
1939 AM.SymbolFlags = CP->getTargetFlags();
1940 Offset = CP->getOffset();
1941 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1942 AM.ES = S->getSymbol();
1943 AM.SymbolFlags = S->getTargetFlags();
1944 } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1945 AM.MCSym = S->getMCSymbol();
1946 } else if (auto *J = dyn_cast<JumpTableSDNode>(N0)) {
1947 AM.JT = J->getIndex();
1948 AM.SymbolFlags = J->getTargetFlags();
1949 } else if (auto *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1950 AM.BlockAddr = BA->getBlockAddress();
1951 AM.SymbolFlags = BA->getTargetFlags();
1952 Offset = BA->getOffset();
1953 } else
1954 llvm_unreachable("Unhandled symbol reference node.");
1955
1956 // Can't use an addressing mode with large globals.
1957 if (Subtarget->is64Bit() && !IsRIPRel && AM.GV &&
1958 TM.isLargeGlobalValue(AM.GV)) {
1959 AM = Backup;
1960 return true;
1961 }
1962
1963 if (foldOffsetIntoAddress(Offset, AM)) {
1964 AM = Backup;
1965 return true;
1966 }
1967
1968 if (IsRIPRel)
1969 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1970
1971 // Commit the changes now that we know this fold is safe.
1972 return false;
1973}
1974
1975/// Add the specified node to the specified addressing mode, returning true if
1976/// it cannot be done. This just pattern matches for the addressing mode.
1977bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1978 if (matchAddressRecursively(N, AM, 0))
1979 return true;
1980
1981 // Post-processing: Make a second attempt to fold a load, if we now know
1982 // that there will not be any other register. This is only performed for
1983 // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded
1984 // any foldable load the first time.
1985 if (Subtarget->isTarget64BitILP32() &&
1986 AM.BaseType == X86ISelAddressMode::RegBase &&
1987 AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) {
1988 SDValue Save_Base_Reg = AM.Base_Reg;
1989 if (auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
1990 AM.Base_Reg = SDValue();
1991 if (matchLoadInAddress(LoadN, AM, /*AllowSegmentRegForX32=*/true))
1992 AM.Base_Reg = Save_Base_Reg;
1993 }
1994 }
1995
1996 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1997 // a smaller encoding and avoids a scaled-index.
1998 if (AM.Scale == 2 &&
1999 AM.BaseType == X86ISelAddressMode::RegBase &&
2000 AM.Base_Reg.getNode() == nullptr) {
2001 AM.Base_Reg = AM.IndexReg;
2002 AM.Scale = 1;
2003 }
2004
2005 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
2006 // because it has a smaller encoding.
2007 if (TM.getCodeModel() != CodeModel::Large &&
2008 (!AM.GV || !TM.isLargeGlobalValue(AM.GV)) && Subtarget->is64Bit() &&
2009 AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase &&
2010 AM.Base_Reg.getNode() == nullptr && AM.IndexReg.getNode() == nullptr &&
2011 AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) {
2012 // However, when GV is a local function symbol and in the same section as
2013 // the current instruction, and AM.Disp is negative and near INT32_MIN,
2014 // referencing GV+Disp generates a relocation referencing the section symbol
2015 // with an even smaller offset, which might underflow. We should bail out if
2016 // the negative offset is too close to INT32_MIN. Actually, we are more
2017 // conservative here, using a smaller magic number also used by
2018 // isOffsetSuitableForCodeModel.
2019 if (isa_and_nonnull<Function>(AM.GV) && AM.Disp < -16 * 1024 * 1024)
2020 return true;
2021
2022 AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
2023 }
2024
2025 return false;
2026}
2027
2028bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
2029 unsigned Depth) {
2030 // Add an artificial use to this node so that we can keep track of
2031 // it if it gets CSE'd with a different node.
2032 HandleSDNode Handle(N);
2033
2034 X86ISelAddressMode Backup = AM;
2035 if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
2036 !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
2037 return false;
2038 AM = Backup;
2039
2040 // Try again after commutating the operands.
2041 if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM,
2042 Depth + 1) &&
2043 !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1))
2044 return false;
2045 AM = Backup;
2046
2047 // If we couldn't fold both operands into the address at the same time,
2048 // see if we can just put each operand into a register and fold at least
2049 // the add.
2050 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2051 !AM.Base_Reg.getNode() &&
2052 !AM.IndexReg.getNode()) {
2053 N = Handle.getValue();
2054 AM.Base_Reg = N.getOperand(0);
2055 AM.IndexReg = N.getOperand(1);
2056 AM.Scale = 1;
2057 return false;
2058 }
2059 N = Handle.getValue();
2060 return true;
2061}
2062
2063// Insert a node into the DAG at least before the Pos node's position. This
2064// will reposition the node as needed, and will assign it a node ID that is <=
2065// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
2066// IDs! The selection DAG must no longer depend on their uniqueness when this
2067// is used.
2068static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
2069 if (N->getNodeId() == -1 ||
2072 DAG.RepositionNode(Pos->getIterator(), N.getNode());
2073 // Mark Node as invalid for pruning as after this it may be a successor to a
2074 // selected node but otherwise be in the same position of Pos.
2075 // Conservatively mark it with the same -abs(Id) to assure node id
2076 // invariant is preserved.
2077 N->setNodeId(Pos->getNodeId());
2079 }
2080}
2081
2082// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
2083// safe. This allows us to convert the shift and and into an h-register
2084// extract and a scaled index. Returns false if the simplification is
2085// performed.
2087 uint64_t Mask,
2088 SDValue Shift, SDValue X,
2089 X86ISelAddressMode &AM) {
2090 if (Shift.getOpcode() != ISD::SRL ||
2091 !isa<ConstantSDNode>(Shift.getOperand(1)) ||
2092 !Shift.hasOneUse())
2093 return true;
2094
2095 int ScaleLog = 8 - Shift.getConstantOperandVal(1);
2096 if (ScaleLog <= 0 || ScaleLog >= 4 ||
2097 Mask != (0xffu << ScaleLog))
2098 return true;
2099
2100 MVT XVT = X.getSimpleValueType();
2101 MVT VT = N.getSimpleValueType();
2102 SDLoc DL(N);
2103 SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
2104 SDValue NewMask = DAG.getConstant(0xff, DL, XVT);
2105 SDValue Srl = DAG.getNode(ISD::SRL, DL, XVT, X, Eight);
2106 SDValue And = DAG.getNode(ISD::AND, DL, XVT, Srl, NewMask);
2107 SDValue Ext = DAG.getZExtOrTrunc(And, DL, VT);
2108 SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
2109 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Ext, ShlCount);
2110
2111 // Insert the new nodes into the topological ordering. We must do this in
2112 // a valid topological ordering as nothing is going to go back and re-sort
2113 // these nodes. We continually insert before 'N' in sequence as this is
2114 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2115 // hierarchy left to express.
2116 insertDAGNode(DAG, N, Eight);
2117 insertDAGNode(DAG, N, NewMask);
2118 insertDAGNode(DAG, N, Srl);
2119 insertDAGNode(DAG, N, And);
2120 insertDAGNode(DAG, N, Ext);
2121 insertDAGNode(DAG, N, ShlCount);
2122 insertDAGNode(DAG, N, Shl);
2123 DAG.ReplaceAllUsesWith(N, Shl);
2124 DAG.RemoveDeadNode(N.getNode());
2125 AM.IndexReg = Ext;
2126 AM.Scale = (1 << ScaleLog);
2127 return false;
2128}
2129
2130// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
2131// allows us to fold the shift into this addressing mode. Returns false if the
2132// transform succeeded.
2134 X86ISelAddressMode &AM) {
2135 SDValue Shift = N.getOperand(0);
2136
2137 // Use a signed mask so that shifting right will insert sign bits. These
2138 // bits will be removed when we shift the result left so it doesn't matter
2139 // what we use. This might allow a smaller immediate encoding.
2140 int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
2141
2142 // If we have an any_extend feeding the AND, look through it to see if there
2143 // is a shift behind it. But only if the AND doesn't use the extended bits.
2144 // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
2145 bool FoundAnyExtend = false;
2146 if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
2147 Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
2148 isUInt<32>(Mask)) {
2149 FoundAnyExtend = true;
2150 Shift = Shift.getOperand(0);
2151 }
2152
2153 if (Shift.getOpcode() != ISD::SHL ||
2155 return true;
2156
2157 SDValue X = Shift.getOperand(0);
2158
2159 // Not likely to be profitable if either the AND or SHIFT node has more
2160 // than one use (unless all uses are for address computation). Besides,
2161 // isel mechanism requires their node ids to be reused.
2162 if (!N.hasOneUse() || !Shift.hasOneUse())
2163 return true;
2164
2165 // Verify that the shift amount is something we can fold.
2166 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
2167 if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
2168 return true;
2169
2170 MVT VT = N.getSimpleValueType();
2171 SDLoc DL(N);
2172 if (FoundAnyExtend) {
2173 SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
2174 insertDAGNode(DAG, N, NewX);
2175 X = NewX;
2176 }
2177
2178 SDValue NewMask = DAG.getSignedConstant(Mask >> ShiftAmt, DL, VT);
2179 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
2180 SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
2181
2182 // Insert the new nodes into the topological ordering. We must do this in
2183 // a valid topological ordering as nothing is going to go back and re-sort
2184 // these nodes. We continually insert before 'N' in sequence as this is
2185 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2186 // hierarchy left to express.
2187 insertDAGNode(DAG, N, NewMask);
2188 insertDAGNode(DAG, N, NewAnd);
2189 insertDAGNode(DAG, N, NewShift);
2190 DAG.ReplaceAllUsesWith(N, NewShift);
2191 DAG.RemoveDeadNode(N.getNode());
2192
2193 AM.Scale = 1 << ShiftAmt;
2194 AM.IndexReg = NewAnd;
2195 return false;
2196}
2197
2198// Implement some heroics to detect shifts of masked values where the mask can
2199// be replaced by extending the shift and undoing that in the addressing mode
2200// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
2201// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
2202// the addressing mode. This results in code such as:
2203//
2204// int f(short *y, int *lookup_table) {
2205// ...
2206// return *y + lookup_table[*y >> 11];
2207// }
2208//
2209// Turning into:
2210// movzwl (%rdi), %eax
2211// movl %eax, %ecx
2212// shrl $11, %ecx
2213// addl (%rsi,%rcx,4), %eax
2214//
2215// Instead of:
2216// movzwl (%rdi), %eax
2217// movl %eax, %ecx
2218// shrl $9, %ecx
2219// andl $124, %rcx
2220// addl (%rsi,%rcx), %eax
2221//
2222// Note that this function assumes the mask is provided as a mask *after* the
2223// value is shifted. The input chain may or may not match that, but computing
2224// such a mask is trivial.
2226 uint64_t Mask,
2227 SDValue Shift, SDValue X,
2228 X86ISelAddressMode &AM) {
2229 if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
2231 return true;
2232
2233 // We need to ensure that mask is a continuous run of bits.
2234 unsigned MaskIdx, MaskLen;
2235 if (!isShiftedMask_64(Mask, MaskIdx, MaskLen))
2236 return true;
2237 unsigned MaskLZ = 64 - (MaskIdx + MaskLen);
2238
2239 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
2240
2241 // The amount of shift we're trying to fit into the addressing mode is taken
2242 // from the shifted mask index (number of trailing zeros of the mask).
2243 unsigned AMShiftAmt = MaskIdx;
2244
2245 // There is nothing we can do here unless the mask is removing some bits.
2246 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
2247 if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
2248
2249 // Scale the leading zero count down based on the actual size of the value.
2250 // Also scale it down based on the size of the shift.
2251 unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
2252 if (MaskLZ < ScaleDown)
2253 return true;
2254 MaskLZ -= ScaleDown;
2255
2256 // The final check is to ensure that any masked out high bits of X are
2257 // already known to be zero. Otherwise, the mask has a semantic impact
2258 // other than masking out a couple of low bits. Unfortunately, because of
2259 // the mask, zero extensions will be removed from operands in some cases.
2260 // This code works extra hard to look through extensions because we can
2261 // replace them with zero extensions cheaply if necessary.
2262 bool ReplacingAnyExtend = false;
2263 if (X.getOpcode() == ISD::ANY_EXTEND) {
2264 unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
2265 X.getOperand(0).getSimpleValueType().getSizeInBits();
2266 // Assume that we'll replace the any-extend with a zero-extend, and
2267 // narrow the search to the extended value.
2268 X = X.getOperand(0);
2269 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
2270 ReplacingAnyExtend = true;
2271 }
2272 APInt MaskedHighBits =
2273 APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
2274 if (!DAG.MaskedValueIsZero(X, MaskedHighBits))
2275 return true;
2276
2277 // We've identified a pattern that can be transformed into a single shift
2278 // and an addressing mode. Make it so.
2279 MVT VT = N.getSimpleValueType();
2280 if (ReplacingAnyExtend) {
2281 assert(X.getValueType() != VT);
2282 // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
2283 SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
2284 insertDAGNode(DAG, N, NewX);
2285 X = NewX;
2286 }
2287
2288 MVT XVT = X.getSimpleValueType();
2289 SDLoc DL(N);
2290 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
2291 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, XVT, X, NewSRLAmt);
2292 SDValue NewExt = DAG.getZExtOrTrunc(NewSRL, DL, VT);
2293 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
2294 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewExt, NewSHLAmt);
2295
2296 // Insert the new nodes into the topological ordering. We must do this in
2297 // a valid topological ordering as nothing is going to go back and re-sort
2298 // these nodes. We continually insert before 'N' in sequence as this is
2299 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2300 // hierarchy left to express.
2301 insertDAGNode(DAG, N, NewSRLAmt);
2302 insertDAGNode(DAG, N, NewSRL);
2303 insertDAGNode(DAG, N, NewExt);
2304 insertDAGNode(DAG, N, NewSHLAmt);
2305 insertDAGNode(DAG, N, NewSHL);
2306 DAG.ReplaceAllUsesWith(N, NewSHL);
2307 DAG.RemoveDeadNode(N.getNode());
2308
2309 AM.Scale = 1 << AMShiftAmt;
2310 AM.IndexReg = NewExt;
2311 return false;
2312}
2313
2314// Transform "(X >> SHIFT) & (MASK << C1)" to
2315// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
2316// matched to a BEXTR later. Returns false if the simplification is performed.
2318 uint64_t Mask,
2319 SDValue Shift, SDValue X,
2320 X86ISelAddressMode &AM,
2321 const X86Subtarget &Subtarget) {
2322 if (Shift.getOpcode() != ISD::SRL ||
2323 !isa<ConstantSDNode>(Shift.getOperand(1)) ||
2324 !Shift.hasOneUse() || !N.hasOneUse())
2325 return true;
2326
2327 // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
2328 if (!Subtarget.hasTBM() &&
2329 !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
2330 return true;
2331
2332 // We need to ensure that mask is a continuous run of bits.
2333 unsigned MaskIdx, MaskLen;
2334 if (!isShiftedMask_64(Mask, MaskIdx, MaskLen))
2335 return true;
2336
2337 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
2338
2339 // The amount of shift we're trying to fit into the addressing mode is taken
2340 // from the shifted mask index (number of trailing zeros of the mask).
2341 unsigned AMShiftAmt = MaskIdx;
2342
2343 // There is nothing we can do here unless the mask is removing some bits.
2344 // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
2345 if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
2346
2347 MVT XVT = X.getSimpleValueType();
2348 MVT VT = N.getSimpleValueType();
2349 SDLoc DL(N);
2350 SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
2351 SDValue NewSRL = DAG.getNode(ISD::SRL, DL, XVT, X, NewSRLAmt);
2352 SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, XVT);
2353 SDValue NewAnd = DAG.getNode(ISD::AND, DL, XVT, NewSRL, NewMask);
2354 SDValue NewExt = DAG.getZExtOrTrunc(NewAnd, DL, VT);
2355 SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
2356 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewExt, NewSHLAmt);
2357
2358 // Insert the new nodes into the topological ordering. We must do this in
2359 // a valid topological ordering as nothing is going to go back and re-sort
2360 // these nodes. We continually insert before 'N' in sequence as this is
2361 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2362 // hierarchy left to express.
2363 insertDAGNode(DAG, N, NewSRLAmt);
2364 insertDAGNode(DAG, N, NewSRL);
2365 insertDAGNode(DAG, N, NewMask);
2366 insertDAGNode(DAG, N, NewAnd);
2367 insertDAGNode(DAG, N, NewExt);
2368 insertDAGNode(DAG, N, NewSHLAmt);
2369 insertDAGNode(DAG, N, NewSHL);
2370 DAG.ReplaceAllUsesWith(N, NewSHL);
2371 DAG.RemoveDeadNode(N.getNode());
2372
2373 AM.Scale = 1 << AMShiftAmt;
2374 AM.IndexReg = NewExt;
2375 return false;
2376}
2377
2378// Attempt to peek further into a scaled index register, collecting additional
2379// extensions / offsets / etc. Returns /p N if we can't peek any further.
2380SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N,
2381 X86ISelAddressMode &AM,
2382 unsigned Depth) {
2383 assert(AM.IndexReg.getNode() == nullptr && "IndexReg already matched");
2384 assert((AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8) &&
2385 "Illegal index scale");
2386
2387 // Limit recursion.
2389 return N;
2390
2391 EVT VT = N.getValueType();
2392 unsigned Opc = N.getOpcode();
2393
2394 // index: add(x,c) -> index: x, disp + c
2395 if (CurDAG->isBaseWithConstantOffset(N)) {
2396 auto *AddVal = cast<ConstantSDNode>(N.getOperand(1));
2397 uint64_t Offset = (uint64_t)AddVal->getSExtValue() * AM.Scale;
2398 if (!foldOffsetIntoAddress(Offset, AM))
2399 return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
2400 }
2401
2402 // index: add(x,x) -> index: x, scale * 2
2403 if (Opc == ISD::ADD && N.getOperand(0) == N.getOperand(1)) {
2404 if (AM.Scale <= 4) {
2405 AM.Scale *= 2;
2406 return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
2407 }
2408 }
2409
2410 // index: shl(x,i) -> index: x, scale * (1 << i)
2411 if (Opc == X86ISD::VSHLI) {
2412 uint64_t ShiftAmt = N.getConstantOperandVal(1);
2413 uint64_t ScaleAmt = 1ULL << ShiftAmt;
2414 if ((AM.Scale * ScaleAmt) <= 8) {
2415 AM.Scale *= ScaleAmt;
2416 return matchIndexRecursively(N.getOperand(0), AM, Depth + 1);
2417 }
2418 }
2419
2420 // index: sext(add_nsw(x,c)) -> index: sext(x), disp + sext(c)
2421 // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext?
2422 if (Opc == ISD::SIGN_EXTEND && !VT.isVector() && N.hasOneUse()) {
2423 SDValue Src = N.getOperand(0);
2424 if (Src.getOpcode() == ISD::ADD && Src->getFlags().hasNoSignedWrap() &&
2425 Src.hasOneUse()) {
2426 if (CurDAG->isBaseWithConstantOffset(Src)) {
2427 SDValue AddSrc = Src.getOperand(0);
2428 auto *AddVal = cast<ConstantSDNode>(Src.getOperand(1));
2429 int64_t Offset = AddVal->getSExtValue();
2430 if (!foldOffsetIntoAddress((uint64_t)Offset * AM.Scale, AM)) {
2431 SDLoc DL(N);
2432 SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
2433 SDValue ExtVal = CurDAG->getSignedConstant(Offset, DL, VT);
2434 SDValue ExtAdd = CurDAG->getNode(ISD::ADD, DL, VT, ExtSrc, ExtVal);
2435 insertDAGNode(*CurDAG, N, ExtSrc);
2436 insertDAGNode(*CurDAG, N, ExtVal);
2437 insertDAGNode(*CurDAG, N, ExtAdd);
2438 CurDAG->ReplaceAllUsesWith(N, ExtAdd);
2439 CurDAG->RemoveDeadNode(N.getNode());
2440 return ExtSrc;
2441 }
2442 }
2443 }
2444 }
2445
2446 // index: zext(add_nuw(x,c)) -> index: zext(x), disp + zext(c)
2447 // index: zext(addlike(x,c)) -> index: zext(x), disp + zext(c)
2448 // TODO: call matchIndexRecursively(AddSrc) if we won't corrupt sext?
2449 if (Opc == ISD::ZERO_EXTEND && !VT.isVector() && N.hasOneUse()) {
2450 SDValue Src = N.getOperand(0);
2451 unsigned SrcOpc = Src.getOpcode();
2452 if (((SrcOpc == ISD::ADD && Src->getFlags().hasNoUnsignedWrap()) ||
2453 CurDAG->isADDLike(Src, /*NoWrap=*/true)) &&
2454 Src.hasOneUse()) {
2455 if (CurDAG->isBaseWithConstantOffset(Src)) {
2456 SDValue AddSrc = Src.getOperand(0);
2457 uint64_t Offset = Src.getConstantOperandVal(1);
2458 if (!foldOffsetIntoAddress(Offset * AM.Scale, AM)) {
2459 SDLoc DL(N);
2460 SDValue Res;
2461 // If we're also scaling, see if we can use that as well.
2462 if (AddSrc.getOpcode() == ISD::SHL &&
2463 isa<ConstantSDNode>(AddSrc.getOperand(1))) {
2464 SDValue ShVal = AddSrc.getOperand(0);
2465 uint64_t ShAmt = AddSrc.getConstantOperandVal(1);
2466 APInt HiBits =
2468 uint64_t ScaleAmt = 1ULL << ShAmt;
2469 if ((AM.Scale * ScaleAmt) <= 8 &&
2470 (AddSrc->getFlags().hasNoUnsignedWrap() ||
2471 CurDAG->MaskedValueIsZero(ShVal, HiBits))) {
2472 AM.Scale *= ScaleAmt;
2473 SDValue ExtShVal = CurDAG->getNode(Opc, DL, VT, ShVal);
2474 SDValue ExtShift = CurDAG->getNode(ISD::SHL, DL, VT, ExtShVal,
2475 AddSrc.getOperand(1));
2476 insertDAGNode(*CurDAG, N, ExtShVal);
2477 insertDAGNode(*CurDAG, N, ExtShift);
2478 AddSrc = ExtShift;
2479 Res = ExtShVal;
2480 }
2481 }
2482 SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
2483 SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
2484 SDValue ExtAdd = CurDAG->getNode(SrcOpc, DL, VT, ExtSrc, ExtVal);
2485 insertDAGNode(*CurDAG, N, ExtSrc);
2486 insertDAGNode(*CurDAG, N, ExtVal);
2487 insertDAGNode(*CurDAG, N, ExtAdd);
2488 CurDAG->ReplaceAllUsesWith(N, ExtAdd);
2489 CurDAG->RemoveDeadNode(N.getNode());
2490 return Res ? Res : ExtSrc;
2491 }
2492 }
2493 }
2494 }
2495
2496 // TODO: Handle extensions, shifted masks etc.
2497 return N;
2498}
2499
2500bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
2501 unsigned Depth) {
2502 LLVM_DEBUG({
2503 dbgs() << "MatchAddress: ";
2504 AM.dump(CurDAG);
2505 });
2506 // Limit recursion.
2508 return matchAddressBase(N, AM);
2509
2510 // If this is already a %rip relative address, we can only merge immediates
2511 // into it. Instead of handling this in every case, we handle it here.
2512 // RIP relative addressing: %rip + 32-bit displacement!
2513 if (AM.isRIPRelative()) {
2514 // FIXME: JumpTable and ExternalSymbol address currently don't like
2515 // displacements. It isn't very important, but this should be fixed for
2516 // consistency.
2517 if (!(AM.ES || AM.MCSym) && AM.JT != -1)
2518 return true;
2519
2520 if (auto *Cst = dyn_cast<ConstantSDNode>(N))
2521 if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
2522 return false;
2523 return true;
2524 }
2525
2526 switch (N.getOpcode()) {
2527 default: break;
2528 case ISD::LOCAL_RECOVER: {
2529 if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
2530 if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
2531 // Use the symbol and don't prefix it.
2532 AM.MCSym = ESNode->getMCSymbol();
2533 return false;
2534 }
2535 break;
2536 }
2537 case ISD::Constant: {
2538 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2539 if (!foldOffsetIntoAddress(Val, AM))
2540 return false;
2541 break;
2542 }
2543
2544 case X86ISD::Wrapper:
2545 case X86ISD::WrapperRIP:
2546 if (!matchWrapper(N, AM))
2547 return false;
2548 break;
2549
2550 case ISD::LOAD:
2551 if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
2552 return false;
2553 break;
2554
2555 case ISD::FrameIndex:
2556 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2557 AM.Base_Reg.getNode() == nullptr &&
2558 (!Subtarget->is64Bit() || isDispSafeForFrameIndexOrRegBase(AM.Disp))) {
2559 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
2560 AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
2561 return false;
2562 }
2563 break;
2564
2565 case ISD::SHL:
2566 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2567 break;
2568
2569 if (auto *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
2570 unsigned Val = CN->getZExtValue();
2571 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
2572 // that the base operand remains free for further matching. If
2573 // the base doesn't end up getting used, a post-processing step
2574 // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
2575 if (Val == 1 || Val == 2 || Val == 3) {
2576 SDValue ShVal = N.getOperand(0);
2577 AM.Scale = 1 << Val;
2578 AM.IndexReg = matchIndexRecursively(ShVal, AM, Depth + 1);
2579 return false;
2580 }
2581 }
2582 break;
2583
2584 case ISD::SRL: {
2585 // Scale must not be used already.
2586 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2587
2588 // We only handle up to 64-bit values here as those are what matter for
2589 // addressing mode optimizations.
2590 assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
2591 "Unexpected value size!");
2592
2593 SDValue And = N.getOperand(0);
2594 if (And.getOpcode() != ISD::AND) break;
2595 SDValue X = And.getOperand(0);
2596
2597 // The mask used for the transform is expected to be post-shift, but we
2598 // found the shift first so just apply the shift to the mask before passing
2599 // it down.
2600 if (!isa<ConstantSDNode>(N.getOperand(1)) ||
2601 !isa<ConstantSDNode>(And.getOperand(1)))
2602 break;
2603 uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
2604
2605 // Try to fold the mask and shift into the scale, and return false if we
2606 // succeed.
2607 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
2608 return false;
2609 break;
2610 }
2611
2612 case ISD::SMUL_LOHI:
2613 case ISD::UMUL_LOHI:
2614 // A mul_lohi where we need the low part can be folded as a plain multiply.
2615 if (N.getResNo() != 0) break;
2616 [[fallthrough]];
2617 case ISD::MUL:
2618 case X86ISD::MUL_IMM:
2619 // X*[3,5,9] -> X+X*[2,4,8]
2620 if (AM.BaseType == X86ISelAddressMode::RegBase &&
2621 AM.Base_Reg.getNode() == nullptr &&
2622 AM.IndexReg.getNode() == nullptr) {
2623 if (auto *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
2624 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
2625 CN->getZExtValue() == 9) {
2626 AM.Scale = unsigned(CN->getZExtValue())-1;
2627
2628 SDValue MulVal = N.getOperand(0);
2629 SDValue Reg;
2630
2631 // Okay, we know that we have a scale by now. However, if the scaled
2632 // value is an add of something and a constant, we can fold the
2633 // constant into the disp field here.
2634 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
2635 isa<ConstantSDNode>(MulVal.getOperand(1))) {
2636 Reg = MulVal.getOperand(0);
2637 auto *AddVal = cast<ConstantSDNode>(MulVal.getOperand(1));
2638 uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
2639 if (foldOffsetIntoAddress(Disp, AM))
2640 Reg = N.getOperand(0);
2641 } else {
2642 Reg = N.getOperand(0);
2643 }
2644
2645 AM.IndexReg = AM.Base_Reg = Reg;
2646 return false;
2647 }
2648 }
2649 break;
2650
2651 case ISD::SUB: {
2652 // Given A-B, if A can be completely folded into the address and
2653 // the index field with the index field unused, use -B as the index.
2654 // This is a win if a has multiple parts that can be folded into
2655 // the address. Also, this saves a mov if the base register has
2656 // other uses, since it avoids a two-address sub instruction, however
2657 // it costs an additional mov if the index register has other uses.
2658
2659 // Add an artificial use to this node so that we can keep track of
2660 // it if it gets CSE'd with a different node.
2661 HandleSDNode Handle(N);
2662
2663 // Test if the LHS of the sub can be folded.
2664 X86ISelAddressMode Backup = AM;
2665 if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
2666 N = Handle.getValue();
2667 AM = Backup;
2668 break;
2669 }
2670 N = Handle.getValue();
2671 // Test if the index field is free for use.
2672 if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2673 AM = Backup;
2674 break;
2675 }
2676
2677 int Cost = 0;
2678 SDValue RHS = N.getOperand(1);
2679 // If the RHS involves a register with multiple uses, this
2680 // transformation incurs an extra mov, due to the neg instruction
2681 // clobbering its operand.
2682 if (!RHS.getNode()->hasOneUse() ||
2683 RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
2684 RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
2685 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
2686 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
2687 RHS.getOperand(0).getValueType() == MVT::i32))
2688 ++Cost;
2689 // If the base is a register with multiple uses, this
2690 // transformation may save a mov.
2691 if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2692 !AM.Base_Reg.getNode()->hasOneUse()) ||
2693 AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2694 --Cost;
2695 // If the folded LHS was interesting, this transformation saves
2696 // address arithmetic.
2697 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2698 ((AM.Disp != 0) && (Backup.Disp == 0)) +
2699 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2700 --Cost;
2701 // If it doesn't look like it may be an overall win, don't do it.
2702 if (Cost >= 0) {
2703 AM = Backup;
2704 break;
2705 }
2706
2707 // Ok, the transformation is legal and appears profitable. Go for it.
2708 // Negation will be emitted later to avoid creating dangling nodes if this
2709 // was an unprofitable LEA.
2710 AM.IndexReg = RHS;
2711 AM.NegateIndex = true;
2712 AM.Scale = 1;
2713 return false;
2714 }
2715
2716 case ISD::OR:
2717 case ISD::XOR:
2718 // See if we can treat the OR/XOR node as an ADD node.
2719 if (!CurDAG->isADDLike(N))
2720 break;
2721 [[fallthrough]];
2722 case ISD::ADD:
2723 if (!matchAdd(N, AM, Depth))
2724 return false;
2725 break;
2726
2727 case ISD::AND: {
2728 // Perform some heroic transforms on an and of a constant-count shift
2729 // with a constant to enable use of the scaled offset field.
2730
2731 // Scale must not be used already.
2732 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2733
2734 // We only handle up to 64-bit values here as those are what matter for
2735 // addressing mode optimizations.
2736 assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
2737 "Unexpected value size!");
2738
2739 if (!isa<ConstantSDNode>(N.getOperand(1)))
2740 break;
2741
2742 if (N.getOperand(0).getOpcode() == ISD::SRL) {
2743 SDValue Shift = N.getOperand(0);
2744 SDValue X = Shift.getOperand(0);
2745
2746 uint64_t Mask = N.getConstantOperandVal(1);
2747
2748 // Try to fold the mask and shift into an extract and scale.
2749 if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
2750 return false;
2751
2752 // Try to fold the mask and shift directly into the scale.
2753 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
2754 return false;
2755
2756 // Try to fold the mask and shift into BEXTR and scale.
2757 if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
2758 return false;
2759 }
2760
2761 // Try to swap the mask and shift to place shifts which can be done as
2762 // a scale on the outside of the mask.
2763 if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
2764 return false;
2765
2766 break;
2767 }
2768 case ISD::ZERO_EXTEND: {
2769 // Try to widen a zexted shift left to the same size as its use, so we can
2770 // match the shift as a scale factor.
2771 if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2772 break;
2773
2774 SDValue Src = N.getOperand(0);
2775
2776 // See if we can match a zext(addlike(x,c)).
2777 // TODO: Move more ZERO_EXTEND patterns into matchIndexRecursively.
2778 if (Src.getOpcode() == ISD::ADD || Src.getOpcode() == ISD::OR)
2779 if (SDValue Index = matchIndexRecursively(N, AM, Depth + 1))
2780 if (Index != N) {
2781 AM.IndexReg = Index;
2782 return false;
2783 }
2784
2785 // Peek through mask: zext(and(shl(x,c1),c2))
2786 APInt Mask = APInt::getAllOnes(Src.getScalarValueSizeInBits());
2787 if (Src.getOpcode() == ISD::AND && Src.hasOneUse())
2788 if (auto *MaskC = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
2789 Mask = MaskC->getAPIntValue();
2790 Src = Src.getOperand(0);
2791 }
2792
2793 if (Src.getOpcode() == ISD::SHL && Src.hasOneUse() && N->hasOneUse()) {
2794 // Give up if the shift is not a valid scale factor [1,2,3].
2795 SDValue ShlSrc = Src.getOperand(0);
2796 SDValue ShlAmt = Src.getOperand(1);
2797 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShlAmt);
2798 if (!ShAmtC)
2799 break;
2800 unsigned ShAmtV = ShAmtC->getZExtValue();
2801 if (ShAmtV > 3)
2802 break;
2803
2804 // The narrow shift must only shift out zero bits (it must be 'nuw').
2805 // That makes it safe to widen to the destination type.
2806 APInt HighZeros =
2807 APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV);
2808 if (!Src->getFlags().hasNoUnsignedWrap() &&
2809 !CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
2810 break;
2811
2812 // zext (shl nuw i8 %x, C1) to i32
2813 // --> shl (zext i8 %x to i32), (zext C1)
2814 // zext (and (shl nuw i8 %x, C1), C2) to i32
2815 // --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1)
2816 MVT SrcVT = ShlSrc.getSimpleValueType();
2817 MVT VT = N.getSimpleValueType();
2818 SDLoc DL(N);
2819
2820 SDValue Res = ShlSrc;
2821 if (!Mask.isAllOnes()) {
2822 Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT);
2823 insertDAGNode(*CurDAG, N, Res);
2824 Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res);
2825 insertDAGNode(*CurDAG, N, Res);
2826 }
2827 SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res);
2828 insertDAGNode(*CurDAG, N, Zext);
2829 SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt);
2830 insertDAGNode(*CurDAG, N, NewShl);
2831 CurDAG->ReplaceAllUsesWith(N, NewShl);
2832 CurDAG->RemoveDeadNode(N.getNode());
2833
2834 // Convert the shift to scale factor.
2835 AM.Scale = 1 << ShAmtV;
2836 // If matchIndexRecursively is not called here,
2837 // Zext may be replaced by other nodes but later used to call a builder
2838 // method
2839 AM.IndexReg = matchIndexRecursively(Zext, AM, Depth + 1);
2840 return false;
2841 }
2842
2843 if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes()) {
2844 // Try to fold the mask and shift into an extract and scale.
2845 if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src,
2846 Src.getOperand(0), AM))
2847 return false;
2848
2849 // Try to fold the mask and shift directly into the scale.
2850 if (!foldMaskAndShiftToScale(*CurDAG, N, Mask.getZExtValue(), Src,
2851 Src.getOperand(0), AM))
2852 return false;
2853
2854 // Try to fold the mask and shift into BEXTR and scale.
2855 if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask.getZExtValue(), Src,
2856 Src.getOperand(0), AM, *Subtarget))
2857 return false;
2858 }
2859
2860 break;
2861 }
2862 }
2863
2864 return matchAddressBase(N, AM);
2865}
2866
2867/// Helper for MatchAddress. Add the specified node to the
2868/// specified addressing mode without any further recursion.
2869bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
2870 // Is the base register already occupied?
2871 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2872 // If so, check to see if the scale index register is set.
2873 if (!AM.IndexReg.getNode()) {
2874 AM.IndexReg = N;
2875 AM.Scale = 1;
2876 return false;
2877 }
2878
2879 // Otherwise, we cannot select it.
2880 return true;
2881 }
2882
2883 // Default, generate it as a register.
2884 AM.BaseType = X86ISelAddressMode::RegBase;
2885 AM.Base_Reg = N;
2886 return false;
2887}
2888
2889bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N,
2890 X86ISelAddressMode &AM,
2891 unsigned Depth) {
2892 LLVM_DEBUG({
2893 dbgs() << "MatchVectorAddress: ";
2894 AM.dump(CurDAG);
2895 });
2896 // Limit recursion.
2898 return matchAddressBase(N, AM);
2899
2900 // TODO: Support other operations.
2901 switch (N.getOpcode()) {
2902 case ISD::Constant: {
2903 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2904 if (!foldOffsetIntoAddress(Val, AM))
2905 return false;
2906 break;
2907 }
2908 case X86ISD::Wrapper:
2909 if (!matchWrapper(N, AM))
2910 return false;
2911 break;
2912 case ISD::ADD: {
2913 // Add an artificial use to this node so that we can keep track of
2914 // it if it gets CSE'd with a different node.
2915 HandleSDNode Handle(N);
2916
2917 X86ISelAddressMode Backup = AM;
2918 if (!matchVectorAddressRecursively(N.getOperand(0), AM, Depth + 1) &&
2919 !matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2920 Depth + 1))
2921 return false;
2922 AM = Backup;
2923
2924 // Try again after commuting the operands.
2925 if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
2926 Depth + 1) &&
2927 !matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
2928 Depth + 1))
2929 return false;
2930 AM = Backup;
2931
2932 N = Handle.getValue();
2933 break;
2934 }
2935 }
2936
2937 return matchAddressBase(N, AM);
2938}
2939
2940/// Helper for selectVectorAddr. Handles things that can be folded into a
2941/// gather/scatter address. The index register and scale should have already
2942/// been handled.
2943bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
2944 return matchVectorAddressRecursively(N, AM, 0);
2945}
2946
2947bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
2948 SDValue IndexOp, SDValue ScaleOp,
2949 SDValue &Base, SDValue &Scale,
2950 SDValue &Index, SDValue &Disp,
2951 SDValue &Segment) {
2952 X86ISelAddressMode AM;
2953 AM.Scale = ScaleOp->getAsZExtVal();
2954
2955 // Attempt to match index patterns, as long as we're not relying on implicit
2956 // sign-extension, which is performed BEFORE scale.
2957 if (IndexOp.getScalarValueSizeInBits() == BasePtr.getScalarValueSizeInBits())
2958 AM.IndexReg = matchIndexRecursively(IndexOp, AM, 0);
2959 else
2960 AM.IndexReg = IndexOp;
2961
2962 unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace();
2963 if (AddrSpace == X86AS::GS)
2964 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2965 if (AddrSpace == X86AS::FS)
2966 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2967 if (AddrSpace == X86AS::SS)
2968 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2969
2970 SDLoc DL(BasePtr);
2971 MVT VT = BasePtr.getSimpleValueType();
2972
2973 // Try to match into the base and displacement fields.
2974 if (matchVectorAddress(BasePtr, AM))
2975 return false;
2976
2977 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2978 return true;
2979}
2980
2981/// Returns true if it is able to pattern match an addressing mode.
2982/// It returns the operands which make up the maximal addressing mode it can
2983/// match by reference.
2984///
2985/// Parent is the parent node of the addr operand that is being matched. It
2986/// is always a load, store, atomic node, or null. It is only null when
2987/// checking memory operands for inline asm nodes.
2988bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2989 SDValue &Scale, SDValue &Index,
2990 SDValue &Disp, SDValue &Segment) {
2991 X86ISelAddressMode AM;
2992
2993 if (Parent &&
2994 // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2995 // that are not a MemSDNode, and thus don't have proper addrspace info.
2996 Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2997 Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2998 Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2999 Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
3000 Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
3001 Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
3002 Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
3003 unsigned AddrSpace =
3004 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
3005 if (AddrSpace == X86AS::GS)
3006 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
3007 if (AddrSpace == X86AS::FS)
3008 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
3009 if (AddrSpace == X86AS::SS)
3010 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
3011 }
3012
3013 // Save the DL and VT before calling matchAddress, it can invalidate N.
3014 SDLoc DL(N);
3015 MVT VT = N.getSimpleValueType();
3016
3017 if (matchAddress(N, AM))
3018 return false;
3019
3020 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
3021 return true;
3022}
3023
3024bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
3025 // Cannot use 32 bit constants to reference objects in kernel/large code
3026 // model.
3027 if (TM.getCodeModel() == CodeModel::Kernel ||
3028 TM.getCodeModel() == CodeModel::Large)
3029 return false;
3030
3031 // In static codegen with small code model, we can get the address of a label
3032 // into a register with 'movl'
3033 if (N->getOpcode() != X86ISD::Wrapper)
3034 return false;
3035
3036 N = N.getOperand(0);
3037
3038 // At least GNU as does not accept 'movl' for TPOFF relocations.
3039 // FIXME: We could use 'movl' when we know we are targeting MC.
3040 if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
3041 return false;
3042
3043 Imm = N;
3044 // Small/medium code model can reference non-TargetGlobalAddress objects with
3045 // 32 bit constants.
3046 if (N->getOpcode() != ISD::TargetGlobalAddress) {
3047 return TM.getCodeModel() == CodeModel::Small ||
3048 TM.getCodeModel() == CodeModel::Medium;
3049 }
3050
3051 const GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
3052 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
3053 return CR->getUnsignedMax().ult(1ull << 32);
3054
3055 return !TM.isLargeGlobalValue(GV);
3056}
3057
3058bool X86DAGToDAGISel::selectLEA64_Addr(SDValue N, SDValue &Base, SDValue &Scale,
3059 SDValue &Index, SDValue &Disp,
3060 SDValue &Segment) {
3061 // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
3062 SDLoc DL(N);
3063
3064 if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
3065 return false;
3066
3067 EVT BaseType = Base.getValueType();
3068 unsigned SubReg;
3069 if (BaseType == MVT::i8)
3070 SubReg = X86::sub_8bit;
3071 else if (BaseType == MVT::i16)
3072 SubReg = X86::sub_16bit;
3073 else
3074 SubReg = X86::sub_32bit;
3075
3077 if (RN && RN->getReg() == 0)
3078 Base = CurDAG->getRegister(0, MVT::i64);
3079 else if ((BaseType == MVT::i8 || BaseType == MVT::i16 ||
3080 BaseType == MVT::i32) &&
3082 // Base could already be %rip, particularly in the x32 ABI.
3083 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
3084 MVT::i64), 0);
3085 Base = CurDAG->getTargetInsertSubreg(SubReg, DL, MVT::i64, ImplDef, Base);
3086 }
3087
3088 [[maybe_unused]] EVT IndexType = Index.getValueType();
3090 if (RN && RN->getReg() == 0)
3091 Index = CurDAG->getRegister(0, MVT::i64);
3092 else {
3093 assert((IndexType == BaseType) &&
3094 "Expect to be extending 8/16/32-bit registers for use in LEA");
3095 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
3096 MVT::i64), 0);
3097 Index = CurDAG->getTargetInsertSubreg(SubReg, DL, MVT::i64, ImplDef, Index);
3098 }
3099
3100 return true;
3101}
3102
3103/// Calls SelectAddr and determines if the maximal addressing
3104/// mode it matches can be cost effectively emitted as an LEA instruction.
3105bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
3106 SDValue &Base, SDValue &Scale,
3107 SDValue &Index, SDValue &Disp,
3108 SDValue &Segment) {
3109 X86ISelAddressMode AM;
3110
3111 // Save the DL and VT before calling matchAddress, it can invalidate N.
3112 SDLoc DL(N);
3113 MVT VT = N.getSimpleValueType();
3114
3115 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
3116 // segments.
3117 SDValue Copy = AM.Segment;
3118 SDValue T = CurDAG->getRegister(0, MVT::i32);
3119 AM.Segment = T;
3120 if (matchAddress(N, AM))
3121 return false;
3122 assert (T == AM.Segment);
3123 AM.Segment = Copy;
3124
3125 unsigned Complexity = 0;
3126 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
3127 Complexity = 1;
3128 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
3129 Complexity = 4;
3130
3131 if (AM.IndexReg.getNode())
3132 Complexity++;
3133
3134 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
3135 // a simple shift.
3136 if (AM.Scale > 1)
3137 Complexity++;
3138
3139 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
3140 // to a LEA. This is determined with some experimentation but is by no means
3141 // optimal (especially for code size consideration). LEA is nice because of
3142 // its three-address nature. Tweak the cost function again when we can run
3143 // convertToThreeAddress() at register allocation time.
3144 if (AM.hasSymbolicDisplacement()) {
3145 // For X86-64, always use LEA to materialize RIP-relative addresses.
3146 if (Subtarget->is64Bit())
3147 Complexity = 4;
3148 else
3149 Complexity += 2;
3150 }
3151
3152 // Heuristic: try harder to form an LEA from ADD if the operands set flags.
3153 // Unlike ADD, LEA does not affect flags, so we will be less likely to require
3154 // duplicating flag-producing instructions later in the pipeline.
3155 if (N.getOpcode() == ISD::ADD) {
3156 auto isMathWithFlags = [](SDValue V) {
3157 switch (V.getOpcode()) {
3158 case X86ISD::ADD:
3159 case X86ISD::SUB:
3160 case X86ISD::ADC:
3161 case X86ISD::SBB:
3162 case X86ISD::SMUL:
3163 case X86ISD::UMUL:
3164 /* TODO: These opcodes can be added safely, but we may want to justify
3165 their inclusion for different reasons (better for reg-alloc).
3166 case X86ISD::OR:
3167 case X86ISD::XOR:
3168 case X86ISD::AND:
3169 */
3170 // Value 1 is the flag output of the node - verify it's not dead.
3171 return !SDValue(V.getNode(), 1).use_empty();
3172 default:
3173 return false;
3174 }
3175 };
3176 // TODO: We might want to factor in whether there's a load folding
3177 // opportunity for the math op that disappears with LEA.
3178 if (isMathWithFlags(N.getOperand(0)) || isMathWithFlags(N.getOperand(1)))
3179 Complexity++;
3180 }
3181
3182 if (AM.Disp)
3183 Complexity++;
3184
3185 // If it isn't worth using an LEA, reject it.
3186 if (Complexity <= 2)
3187 return false;
3188
3189 getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
3190 return true;
3191}
3192
3193/// This is only run on TargetGlobalTLSAddress nodes.
3194bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
3195 SDValue &Scale, SDValue &Index,
3196 SDValue &Disp, SDValue &Segment) {
3197 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress ||
3198 N.getOpcode() == ISD::TargetExternalSymbol);
3199
3200 X86ISelAddressMode AM;
3201 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N)) {
3202 AM.GV = GA->getGlobal();
3203 AM.Disp += GA->getOffset();
3204 AM.SymbolFlags = GA->getTargetFlags();
3205 } else {
3206 auto *SA = cast<ExternalSymbolSDNode>(N);
3207 AM.ES = SA->getSymbol();
3208 AM.SymbolFlags = SA->getTargetFlags();
3209 }
3210
3211 if (Subtarget->is32Bit()) {
3212 AM.Scale = 1;
3213 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
3214 }
3215
3216 MVT VT = N.getSimpleValueType();
3217 getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
3218 return true;
3219}
3220
3221bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
3222 // Keep track of the original value type and whether this value was
3223 // truncated. If we see a truncation from pointer type to VT that truncates
3224 // bits that are known to be zero, we can use a narrow reference.
3225 EVT VT = N.getValueType();
3226 bool WasTruncated = false;
3227 if (N.getOpcode() == ISD::TRUNCATE) {
3228 WasTruncated = true;
3229 N = N.getOperand(0);
3230 }
3231
3232 if (N.getOpcode() != X86ISD::Wrapper)
3233 return false;
3234
3235 // We can only use non-GlobalValues as immediates if they were not truncated,
3236 // as we do not have any range information. If we have a GlobalValue and the
3237 // address was not truncated, we can select it as an operand directly.
3238 unsigned Opc = N.getOperand(0)->getOpcode();
3239 if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
3240 Op = N.getOperand(0);
3241 // We can only select the operand directly if we didn't have to look past a
3242 // truncate.
3243 return !WasTruncated;
3244 }
3245
3246 // Check that the global's range fits into VT.
3247 auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
3248 std::optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
3249 if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
3250 return false;
3251
3252 // Okay, we can use a narrow reference.
3253 Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
3254 GA->getOffset(), GA->getTargetFlags());
3255 return true;
3256}
3257
3258bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
3259 SDValue &Base, SDValue &Scale,
3260 SDValue &Index, SDValue &Disp,
3261 SDValue &Segment) {
3262 assert(Root && P && "Unknown root/parent nodes");
3263 if (!ISD::isNON_EXTLoad(N.getNode()) ||
3264 !IsProfitableToFold(N, P, Root) ||
3265 !IsLegalToFold(N, P, Root, OptLevel))
3266 return false;
3267
3268 return selectAddr(N.getNode(),
3269 N.getOperand(1), Base, Scale, Index, Disp, Segment);
3270}
3271
3272bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
3273 SDValue &Base, SDValue &Scale,
3274 SDValue &Index, SDValue &Disp,
3275 SDValue &Segment) {
3276 assert(Root && P && "Unknown root/parent nodes");
3277 if (N->getOpcode() != X86ISD::VBROADCAST_LOAD ||
3278 !IsProfitableToFold(N, P, Root) ||
3279 !IsLegalToFold(N, P, Root, OptLevel))
3280 return false;
3281
3282 return selectAddr(N.getNode(),
3283 N.getOperand(1), Base, Scale, Index, Disp, Segment);
3284}
3285
3286/// Return an SDNode that returns the value of the global base register.
3287/// Output instructions required to initialize the global base register,
3288/// if necessary.
3289SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
3290 Register GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
3291 auto &DL = MF->getDataLayout();
3292 return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
3293}
3294
3295bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
3296 if (N->getOpcode() == ISD::TRUNCATE)
3297 N = N->getOperand(0).getNode();
3298 if (N->getOpcode() != X86ISD::Wrapper)
3299 return false;
3300
3301 auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
3302 if (!GA)
3303 return false;
3304
3305 auto *GV = GA->getGlobal();
3306 std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange();
3307 if (CR)
3308 return CR->getSignedMin().sge(-1ull << Width) &&
3309 CR->getSignedMax().slt(1ull << Width);
3310 // In the kernel code model, globals are in the negative 2GB of the address
3311 // space, so globals can be a sign extended 32-bit immediate.
3312 // In other code models, small globals are in the low 2GB of the address
3313 // space, so sign extending them is equivalent to zero extending them.
3314 return Width == 32 && !TM.isLargeGlobalValue(GV);
3315}
3316
3317X86::CondCode X86DAGToDAGISel::getCondFromNode(SDNode *N) const {
3318 assert(N->isMachineOpcode() && "Unexpected node");
3319 unsigned Opc = N->getMachineOpcode();
3320 const MCInstrDesc &MCID = getInstrInfo()->get(Opc);
3321 int CondNo = X86::getCondSrcNoFromDesc(MCID);
3322 if (CondNo < 0)
3323 return X86::COND_INVALID;
3324
3325 return static_cast<X86::CondCode>(N->getConstantOperandVal(CondNo));
3326}
3327
3328/// Test whether the given X86ISD::CMP node has any users that use a flag
3329/// other than ZF.
3330bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
3331 // Examine each user of the node.
3332 for (SDUse &Use : Flags->uses()) {
3333 // Only check things that use the flags.
3334 if (Use.getResNo() != Flags.getResNo())
3335 continue;
3336 SDNode *User = Use.getUser();
3337 // Only examine CopyToReg uses that copy to EFLAGS.
3338 if (User->getOpcode() != ISD::CopyToReg ||
3339 cast<RegisterSDNode>(User->getOperand(1))->getReg() != X86::EFLAGS)
3340 return false;
3341 // Examine each user of the CopyToReg use.
3342 for (SDUse &FlagUse : User->uses()) {
3343 // Only examine the Flag result.
3344 if (FlagUse.getResNo() != 1)
3345 continue;
3346 // Anything unusual: assume conservatively.
3347 if (!FlagUse.getUser()->isMachineOpcode())
3348 return false;
3349 // Examine the condition code of the user.
3350 X86::CondCode CC = getCondFromNode(FlagUse.getUser());
3351
3352 switch (CC) {
3353 // Comparisons which only use the zero flag.
3354 case X86::COND_E: case X86::COND_NE:
3355 continue;
3356 // Anything else: assume conservatively.
3357 default:
3358 return false;
3359 }
3360 }
3361 }
3362 return true;
3363}
3364
3365/// Test whether the given X86ISD::CMP node has any uses which require the SF
3366/// flag to be accurate.
3367bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
3368 // Examine each user of the node.
3369 for (SDUse &Use : Flags->uses()) {
3370 // Only check things that use the flags.
3371 if (Use.getResNo() != Flags.getResNo())
3372 continue;
3373 SDNode *User = Use.getUser();
3374 // Only examine CopyToReg uses that copy to EFLAGS.
3375 if (User->getOpcode() != ISD::CopyToReg ||
3376 cast<RegisterSDNode>(User->getOperand(1))->getReg() != X86::EFLAGS)
3377 return false;
3378 // Examine each user of the CopyToReg use.
3379 for (SDUse &FlagUse : User->uses()) {
3380 // Only examine the Flag result.
3381 if (FlagUse.getResNo() != 1)
3382 continue;
3383 // Anything unusual: assume conservatively.
3384 if (!FlagUse.getUser()->isMachineOpcode())
3385 return false;
3386 // Examine the condition code of the user.
3387 X86::CondCode CC = getCondFromNode(FlagUse.getUser());
3388
3389 switch (CC) {
3390 // Comparisons which don't examine the SF flag.
3391 case X86::COND_A: case X86::COND_AE:
3392 case X86::COND_B: case X86::COND_BE:
3393 case X86::COND_E: case X86::COND_NE:
3394 case X86::COND_O: case X86::COND_NO:
3395 case X86::COND_P: case X86::COND_NP:
3396 continue;
3397 // Anything else: assume conservatively.
3398 default:
3399 return false;
3400 }
3401 }
3402 }
3403 return true;
3404}
3405
3407 switch (CC) {
3408 // Comparisons which don't examine the CF flag.
3409 case X86::COND_O: case X86::COND_NO:
3410 case X86::COND_E: case X86::COND_NE:
3411 case X86::COND_S: case X86::COND_NS:
3412 case X86::COND_P: case X86::COND_NP:
3413 case X86::COND_L: case X86::COND_GE:
3414 case X86::COND_G: case X86::COND_LE:
3415 return false;
3416 // Anything else: assume conservatively.
3417 default:
3418 return true;
3419 }
3420}
3421
3422/// Test whether the given node which sets flags has any uses which require the
3423/// CF flag to be accurate.
3424 bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
3425 // Examine each user of the node.
3426 for (SDUse &Use : Flags->uses()) {
3427 // Only check things that use the flags.
3428 if (Use.getResNo() != Flags.getResNo())
3429 continue;
3430
3431 SDNode *User = Use.getUser();
3432 unsigned UserOpc = User->getOpcode();
3433
3434 if (UserOpc == ISD::CopyToReg) {
3435 // Only examine CopyToReg uses that copy to EFLAGS.
3436 if (cast<RegisterSDNode>(User->getOperand(1))->getReg() != X86::EFLAGS)
3437 return false;
3438 // Examine each user of the CopyToReg use.
3439 for (SDUse &FlagUse : User->uses()) {
3440 // Only examine the Flag result.
3441 if (FlagUse.getResNo() != 1)
3442 continue;
3443 // Anything unusual: assume conservatively.
3444 if (!FlagUse.getUser()->isMachineOpcode())
3445 return false;
3446 // Examine the condition code of the user.
3447 X86::CondCode CC = getCondFromNode(FlagUse.getUser());
3448
3449 if (mayUseCarryFlag(CC))
3450 return false;
3451 }
3452
3453 // This CopyToReg is ok. Move on to the next user.
3454 continue;
3455 }
3456
3457 // This might be an unselected node. So look for the pre-isel opcodes that
3458 // use flags.
3459 unsigned CCOpNo;
3460 switch (UserOpc) {
3461 default:
3462 // Something unusual. Be conservative.
3463 return false;
3464 case X86ISD::SETCC: CCOpNo = 0; break;
3465 case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
3466 case X86ISD::CMOV: CCOpNo = 2; break;
3467 case X86ISD::BRCOND: CCOpNo = 2; break;
3468 }
3469
3470 X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo);
3471 if (mayUseCarryFlag(CC))
3472 return false;
3473 }
3474 return true;
3475}
3476
3477/// Check whether or not the chain ending in StoreNode is suitable for doing
3478/// the {load; op; store} to modify transformation.
3480 SDValue StoredVal, SelectionDAG *CurDAG,
3481 unsigned LoadOpNo,
3482 LoadSDNode *&LoadNode,
3483 SDValue &InputChain) {
3484 // Is the stored value result 0 of the operation?
3485 if (StoredVal.getResNo() != 0) return false;
3486
3487 // Are there other uses of the operation other than the store?
3488 if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
3489
3490 // Is the store non-extending and non-indexed?
3491 if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
3492 return false;
3493
3494 SDValue Load = StoredVal->getOperand(LoadOpNo);
3495 // Is the stored value a non-extending and non-indexed load?
3496 if (!ISD::isNormalLoad(Load.getNode())) return false;
3497
3498 // Return LoadNode by reference.
3499 LoadNode = cast<LoadSDNode>(Load);
3500
3501 // Is store the only read of the loaded value?
3502 if (!Load.hasOneUse())
3503 return false;
3504
3505 // Is the address of the store the same as the load?
3506 if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
3507 LoadNode->getOffset() != StoreNode->getOffset())
3508 return false;
3509
3510 bool FoundLoad = false;
3511 SmallVector<SDValue, 4> ChainOps;
3512 SmallVector<const SDNode *, 4> LoopWorklist;
3514 const unsigned int Max = 1024;
3515
3516 // Visualization of Load-Op-Store fusion:
3517 // -------------------------
3518 // Legend:
3519 // *-lines = Chain operand dependencies.
3520 // |-lines = Normal operand dependencies.
3521 // Dependencies flow down and right. n-suffix references multiple nodes.
3522 //
3523 // C Xn C
3524 // * * *
3525 // * * *
3526 // Xn A-LD Yn TF Yn
3527 // * * \ | * |
3528 // * * \ | * |
3529 // * * \ | => A--LD_OP_ST
3530 // * * \| \
3531 // TF OP \
3532 // * | \ Zn
3533 // * | \
3534 // A-ST Zn
3535 //
3536
3537 // This merge induced dependences from: #1: Xn -> LD, OP, Zn
3538 // #2: Yn -> LD
3539 // #3: ST -> Zn
3540
3541 // Ensure the transform is safe by checking for the dual
3542 // dependencies to make sure we do not induce a loop.
3543
3544 // As LD is a predecessor to both OP and ST we can do this by checking:
3545 // a). if LD is a predecessor to a member of Xn or Yn.
3546 // b). if a Zn is a predecessor to ST.
3547
3548 // However, (b) can only occur through being a chain predecessor to
3549 // ST, which is the same as Zn being a member or predecessor of Xn,
3550 // which is a subset of LD being a predecessor of Xn. So it's
3551 // subsumed by check (a).
3552
3553 SDValue Chain = StoreNode->getChain();
3554
3555 // Gather X elements in ChainOps.
3556 if (Chain == Load.getValue(1)) {
3557 FoundLoad = true;
3558 ChainOps.push_back(Load.getOperand(0));
3559 } else if (Chain.getOpcode() == ISD::TokenFactor) {
3560 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
3561 SDValue Op = Chain.getOperand(i);
3562 if (Op == Load.getValue(1)) {
3563 FoundLoad = true;
3564 // Drop Load, but keep its chain. No cycle check necessary.
3565 ChainOps.push_back(Load.getOperand(0));
3566 continue;
3567 }
3568 LoopWorklist.push_back(Op.getNode());
3569 ChainOps.push_back(Op);
3570 }
3571 }
3572
3573 if (!FoundLoad)
3574 return false;
3575
3576 // Worklist is currently Xn. Add Yn to worklist.
3577 for (SDValue Op : StoredVal->ops())
3578 if (Op.getNode() != LoadNode)
3579 LoopWorklist.push_back(Op.getNode());
3580
3581 // Check (a) if Load is a predecessor to Xn + Yn
3582 if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
3583 true))
3584 return false;
3585
3586 InputChain =
3587 CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
3588 return true;
3589}
3590
3591// Change a chain of {load; op; store} of the same value into a simple op
3592// through memory of that value, if the uses of the modified value and its
3593// address are suitable.
3594//
3595// The tablegen pattern memory operand pattern is currently not able to match
3596// the case where the EFLAGS on the original operation are used.
3597//
3598// To move this to tablegen, we'll need to improve tablegen to allow flags to
3599// be transferred from a node in the pattern to the result node, probably with
3600// a new keyword. For example, we have this
3601// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
3602// [(store (add (loadi64 addr:$dst), -1), addr:$dst)]>;
3603// but maybe need something like this
3604// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
3605// [(store (X86add_flag (loadi64 addr:$dst), -1), addr:$dst),
3606// (transferrable EFLAGS)]>;
3607//
3608// Until then, we manually fold these and instruction select the operation
3609// here.
3610bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
3611 auto *StoreNode = cast<StoreSDNode>(Node);
3612 SDValue StoredVal = StoreNode->getOperand(1);
3613 unsigned Opc = StoredVal->getOpcode();
3614
3615 // Before we try to select anything, make sure this is memory operand size
3616 // and opcode we can handle. Note that this must match the code below that
3617 // actually lowers the opcodes.
3618 EVT MemVT = StoreNode->getMemoryVT();
3619 if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
3620 MemVT != MVT::i8)
3621 return false;
3622
3623 bool IsCommutable = false;
3624 bool IsNegate = false;
3625 switch (Opc) {
3626 default:
3627 return false;
3628 case X86ISD::SUB:
3629 IsNegate = isNullConstant(StoredVal.getOperand(0));
3630 break;
3631 case X86ISD::SBB:
3632 break;
3633 case X86ISD::ADD:
3634 case X86ISD::ADC:
3635 case X86ISD::AND:
3636 case X86ISD::OR:
3637 case X86ISD::XOR:
3638 IsCommutable = true;
3639 break;
3640 }
3641
3642 unsigned LoadOpNo = IsNegate ? 1 : 0;
3643 LoadSDNode *LoadNode = nullptr;
3644 SDValue InputChain;
3645 if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
3646 LoadNode, InputChain)) {
3647 if (!IsCommutable)
3648 return false;
3649
3650 // This operation is commutable, try the other operand.
3651 LoadOpNo = 1;
3652 if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
3653 LoadNode, InputChain))
3654 return false;
3655 }
3656
3657 SDValue Base, Scale, Index, Disp, Segment;
3658 if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
3659 Segment))
3660 return false;
3661
3662 auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
3663 unsigned Opc8) {
3664 switch (MemVT.getSimpleVT().SimpleTy) {
3665 case MVT::i64:
3666 return Opc64;
3667 case MVT::i32:
3668 return Opc32;
3669 case MVT::i16:
3670 return Opc16;
3671 case MVT::i8:
3672 return Opc8;
3673 default:
3674 llvm_unreachable("Invalid size!");
3675 }
3676 };
3677
3678 MachineSDNode *Result;
3679 switch (Opc) {
3680 case X86ISD::SUB:
3681 // Handle negate.
3682 if (IsNegate) {
3683 unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
3684 X86::NEG8m);
3685 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3686 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3687 MVT::Other, Ops);
3688 break;
3689 }
3690 [[fallthrough]];
3691 case X86ISD::ADD:
3692 // Try to match inc/dec.
3693 if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
3694 bool IsOne = isOneConstant(StoredVal.getOperand(1));
3695 bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
3696 // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
3697 if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
3698 unsigned NewOpc =
3699 ((Opc == X86ISD::ADD) == IsOne)
3700 ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3701 : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
3702 const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3703 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3704 MVT::Other, Ops);
3705 break;
3706 }
3707 }
3708 [[fallthrough]];
3709 case X86ISD::ADC:
3710 case X86ISD::SBB:
3711 case X86ISD::AND:
3712 case X86ISD::OR:
3713 case X86ISD::XOR: {
3714 auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
3715 switch (Opc) {
3716 case X86ISD::ADD:
3717 return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3718 X86::ADD8mr);
3719 case X86ISD::ADC:
3720 return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3721 X86::ADC8mr);
3722 case X86ISD::SUB:
3723 return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3724 X86::SUB8mr);
3725 case X86ISD::SBB:
3726 return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3727 X86::SBB8mr);
3728 case X86ISD::AND:
3729 return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3730 X86::AND8mr);
3731 case X86ISD::OR:
3732 return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3733 case X86ISD::XOR:
3734 return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3735 X86::XOR8mr);
3736 default:
3737 llvm_unreachable("Invalid opcode!");
3738 }
3739 };
3740 auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
3741 switch (Opc) {
3742 case X86ISD::ADD:
3743 return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3744 X86::ADD8mi);
3745 case X86ISD::ADC:
3746 return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3747 X86::ADC8mi);
3748 case X86ISD::SUB:
3749 return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3750 X86::SUB8mi);
3751 case X86ISD::SBB:
3752 return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3753 X86::SBB8mi);
3754 case X86ISD::AND:
3755 return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3756 X86::AND8mi);
3757 case X86ISD::OR:
3758 return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3759 X86::OR8mi);
3760 case X86ISD::XOR:
3761 return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3762 X86::XOR8mi);
3763 default:
3764 llvm_unreachable("Invalid opcode!");
3765 }
3766 };
3767
3768 unsigned NewOpc = SelectRegOpcode(Opc);
3769 SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
3770
3771 // See if the operand is a constant that we can fold into an immediate
3772 // operand.
3773 if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3774 int64_t OperandV = OperandC->getSExtValue();
3775
3776 // Check if we can shrink the operand enough to fit in an immediate (or
3777 // fit into a smaller immediate) by negating it and switching the
3778 // operation.
3779 if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
3780 ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3781 (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3782 isInt<32>(-OperandV))) &&
3783 hasNoCarryFlagUses(StoredVal.getValue(1))) {
3784 OperandV = -OperandV;
3786 }
3787
3788 if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3789 Operand = CurDAG->getSignedTargetConstant(OperandV, SDLoc(Node), MemVT);
3790 NewOpc = SelectImmOpcode(Opc);
3791 }
3792 }
3793
3794 if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
3795 SDValue CopyTo =
3796 CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
3797 StoredVal.getOperand(2), SDValue());
3798
3799 const SDValue Ops[] = {Base, Scale, Index, Disp,
3800 Segment, Operand, CopyTo, CopyTo.getValue(1)};
3801 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3802 Ops);
3803 } else {
3804 const SDValue Ops[] = {Base, Scale, Index, Disp,
3805 Segment, Operand, InputChain};
3806 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3807 Ops);
3808 }
3809 break;
3810 }
3811 default:
3812 llvm_unreachable("Invalid opcode!");
3813 }
3814
3815 MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
3816 LoadNode->getMemOperand()};
3817 CurDAG->setNodeMemRefs(Result, MemOps);
3818
3819 // Update Load Chain uses as well.
3820 ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
3821 ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
3822 ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
3823 CurDAG->RemoveDeadNode(Node);
3824 return true;
3825}
3826
3827// See if this is an X & Mask that we can match to BEXTR/BZHI.
3828// Where Mask is one of the following patterns:
3829// a) x & (1 << nbits) - 1
3830// b) x & ~(-1 << nbits)
3831// c) x & (-1 >> (32 - y))
3832// d) x << (32 - y) >> (32 - y)
3833// e) (1 << nbits) - 1
3834bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
3835 assert(
3836 (Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::AND ||
3837 Node->getOpcode() == ISD::SRL) &&
3838 "Should be either an and-mask, or right-shift after clearing high bits.");
3839
3840 // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
3841 if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3842 return false;
3843
3844 MVT NVT = Node->getSimpleValueType(0);
3845
3846 // Only supported for 32 and 64 bits.
3847 if (NVT != MVT::i32 && NVT != MVT::i64)
3848 return false;
3849
3850 SDValue NBits;
3851 bool NegateNBits;
3852
3853 // If we have BMI2's BZHI, we are ok with muti-use patterns.
3854 // Else, if we only have BMI1's BEXTR, we require one-use.
3855 const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
3856 auto checkUses = [AllowExtraUsesByDefault](
3857 SDValue Op, unsigned NUses,
3858 std::optional<bool> AllowExtraUses) {
3859 return AllowExtraUses.value_or(AllowExtraUsesByDefault) ||
3860 Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
3861 };
3862 auto checkOneUse = [checkUses](SDValue Op,
3863 std::optional<bool> AllowExtraUses =
3864 std::nullopt) {
3865 return checkUses(Op, 1, AllowExtraUses);
3866 };
3867 auto checkTwoUse = [checkUses](SDValue Op,
3868 std::optional<bool> AllowExtraUses =
3869 std::nullopt) {
3870 return checkUses(Op, 2, AllowExtraUses);
3871 };
3872
3873 auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
3874 if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
3875 assert(V.getSimpleValueType() == MVT::i32 &&
3876 V.getOperand(0).getSimpleValueType() == MVT::i64 &&
3877 "Expected i64 -> i32 truncation");
3878 V = V.getOperand(0);
3879 }
3880 return V;
3881 };
3882
3883 // a) x & ((1 << nbits) + (-1))
3884 auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits,
3885 &NegateNBits](SDValue Mask) -> bool {
3886 // Match `add`. Must only have one use!
3887 if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
3888 return false;
3889 // We should be adding all-ones constant (i.e. subtracting one.)
3890 if (!isAllOnesConstant(Mask->getOperand(1)))
3891 return false;
3892 // Match `1 << nbits`. Might be truncated. Must only have one use!
3893 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3894 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3895 return false;
3896 if (!isOneConstant(M0->getOperand(0)))
3897 return false;
3898 NBits = M0->getOperand(1);
3899 NegateNBits = false;
3900 return true;
3901 };
3902
3903 auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
3904 V = peekThroughOneUseTruncation(V);
3905 return CurDAG->MaskedValueIsAllOnes(
3906 V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
3907 NVT.getSizeInBits()));
3908 };
3909
3910 // b) x & ~(-1 << nbits)
3911 auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3912 &NBits, &NegateNBits](SDValue Mask) -> bool {
3913 // Match `~()`. Must only have one use!
3914 if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
3915 return false;
3916 // The -1 only has to be all-ones for the final Node's NVT.
3917 if (!isAllOnes(Mask->getOperand(1)))
3918 return false;
3919 // Match `-1 << nbits`. Might be truncated. Must only have one use!
3920 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3921 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3922 return false;
3923 // The -1 only has to be all-ones for the final Node's NVT.
3924 if (!isAllOnes(M0->getOperand(0)))
3925 return false;
3926 NBits = M0->getOperand(1);
3927 NegateNBits = false;
3928 return true;
3929 };
3930
3931 // Try to match potentially-truncated shift amount as `(bitwidth - y)`,
3932 // or leave the shift amount as-is, but then we'll have to negate it.
3933 auto canonicalizeShiftAmt = [&NBits, &NegateNBits](SDValue ShiftAmt,
3934 unsigned Bitwidth) {
3935 NBits = ShiftAmt;
3936 NegateNBits = true;
3937 // Skip over a truncate of the shift amount, if any.
3938 if (NBits.getOpcode() == ISD::TRUNCATE)
3939 NBits = NBits.getOperand(0);
3940 // Try to match the shift amount as (bitwidth - y). It should go away, too.
3941 // If it doesn't match, that's fine, we'll just negate it ourselves.
3942 if (NBits.getOpcode() != ISD::SUB)
3943 return;
3944 auto *V0 = dyn_cast<ConstantSDNode>(NBits.getOperand(0));
3945 if (!V0 || V0->getZExtValue() != Bitwidth)
3946 return;
3947 NBits = NBits.getOperand(1);
3948 NegateNBits = false;
3949 };
3950
3951 // c) x & (-1 >> z) but then we'll have to subtract z from bitwidth
3952 // or
3953 // c) x & (-1 >> (32 - y))
3954 auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits,
3955 canonicalizeShiftAmt](SDValue Mask) -> bool {
3956 // The mask itself may be truncated.
3957 Mask = peekThroughOneUseTruncation(Mask);
3958 unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
3959 // Match `l>>`. Must only have one use!
3960 if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3961 return false;
3962 // We should be shifting truly all-ones constant.
3963 if (!isAllOnesConstant(Mask.getOperand(0)))
3964 return false;
3965 SDValue M1 = Mask.getOperand(1);
3966 // The shift amount should not be used externally.
3967 if (!checkOneUse(M1))
3968 return false;
3969 canonicalizeShiftAmt(M1, Bitwidth);
3970 // Pattern c. is non-canonical, and is expanded into pattern d. iff there
3971 // is no extra use of the mask. Clearly, there was one since we are here.
3972 // But at the same time, if we need to negate the shift amount,
3973 // then we don't want the mask to stick around, else it's unprofitable.
3974 return !NegateNBits;
3975 };
3976
3977 SDValue X;
3978
3979 // d) x << z >> z but then we'll have to subtract z from bitwidth
3980 // or
3981 // d) x << (32 - y) >> (32 - y)
3982 auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt,
3983 AllowExtraUsesByDefault, &NegateNBits,
3984 &X](SDNode *Node) -> bool {
3985 if (Node->getOpcode() != ISD::SRL)
3986 return false;
3987 SDValue N0 = Node->getOperand(0);
3988 if (N0->getOpcode() != ISD::SHL)
3989 return false;
3990 unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
3991 SDValue N1 = Node->getOperand(1);
3992 SDValue N01 = N0->getOperand(1);
3993 // Both of the shifts must be by the exact same value.
3994 if (N1 != N01)
3995 return false;
3996 canonicalizeShiftAmt(N1, Bitwidth);
3997 // There should not be any external uses of the inner shift / shift amount.
3998 // Note that while we are generally okay with external uses given BMI2,
3999 // iff we need to negate the shift amount, we are not okay with extra uses.
4000 const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits;
4001 if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses))
4002 return false;
4003 X = N0->getOperand(0);
4004 return true;
4005 };
4006
4007 auto matchLowBitMask = [matchPatternA, matchPatternB,
4008 matchPatternC](SDValue Mask) -> bool {
4009 return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
4010 };
4011
4012 if (Node->getOpcode() == ISD::AND) {
4013 X = Node->getOperand(0);
4014 SDValue Mask = Node->getOperand(1);
4015
4016 if (matchLowBitMask(Mask)) {
4017 // Great.
4018 } else {
4019 std::swap(X, Mask);
4020 if (!matchLowBitMask(Mask))
4021 return false;
4022 }
4023 } else if (matchLowBitMask(SDValue(Node, 0))) {
4024 X = CurDAG->getAllOnesConstant(SDLoc(Node), NVT);
4025 } else if (!matchPatternD(Node))
4026 return false;
4027
4028 // If we need to negate the shift amount, require BMI2 BZHI support.
4029 // It's just too unprofitable for BMI1 BEXTR.
4030 if (NegateNBits && !Subtarget->hasBMI2())
4031 return false;
4032
4033 SDLoc DL(Node);
4034
4035 // Truncate the shift amount.
4036 NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
4037 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
4038
4039 // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
4040 // All the other bits are undefined, we do not care about them.
4041 SDValue ImplDef = SDValue(
4042 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
4043 insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
4044
4045 SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
4046 insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
4047 NBits = SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
4048 MVT::i32, ImplDef, NBits, SRIdxVal),
4049 0);
4050 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
4051
4052 // We might have matched the amount of high bits to be cleared,
4053 // but we want the amount of low bits to be kept, so negate it then.
4054 if (NegateNBits) {
4055 SDValue BitWidthC = CurDAG->getConstant(NVT.getSizeInBits(), DL, MVT::i32);
4056 insertDAGNode(*CurDAG, SDValue(Node, 0), BitWidthC);
4057
4058 NBits = CurDAG->getNode(ISD::SUB, DL, MVT::i32, BitWidthC, NBits);
4059 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
4060 }
4061
4062 if (Subtarget->hasBMI2()) {
4063 // Great, just emit the BZHI..
4064 if (NVT != MVT::i32) {
4065 // But have to place the bit count into the wide-enough register first.
4066 NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
4067 insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
4068 }
4069
4070 SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
4071 ReplaceNode(Node, Extract.getNode());
4072 SelectCode(Extract.getNode());
4073 return true;
4074 }
4075
4076 // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
4077 // *logically* shifted (potentially with one-use trunc inbetween),
4078 // and the truncation was the only use of the shift,
4079 // and if so look past one-use truncation.
4080 {
4081 SDValue RealX = peekThroughOneUseTruncation(X);
4082 // FIXME: only if the shift is one-use?
4083 if (RealX != X && RealX.getOpcode() == ISD::SRL)
4084 X = RealX;
4085 }
4086
4087 MVT XVT = X.getSimpleValueType();
4088
4089 // Else, emitting BEXTR requires one more step.
4090 // The 'control' of BEXTR has the pattern of:
4091 // [15...8 bit][ 7...0 bit] location
4092 // [ bit count][ shift] name
4093 // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
4094
4095 // Shift NBits left by 8 bits, thus producing 'control'.
4096 // This makes the low 8 bits to be zero.
4097 SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
4098 insertDAGNode(*CurDAG, SDValue(Node, 0), C8);
4099 SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
4100 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
4101
4102 // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
4103 // FIXME: only if the shift is one-use?
4104 if (X.getOpcode() == ISD::SRL) {
4105 SDValue ShiftAmt = X.getOperand(1);
4106 X = X.getOperand(0);
4107
4108 assert(ShiftAmt.getValueType() == MVT::i8 &&
4109 "Expected shift amount to be i8");
4110
4111 // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
4112 // We could zext to i16 in some form, but we intentionally don't do that.
4113 SDValue OrigShiftAmt = ShiftAmt;
4114 ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
4115 insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
4116
4117 // And now 'or' these low 8 bits of shift amount into the 'control'.
4118 Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
4119 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
4120 }
4121
4122 // But have to place the 'control' into the wide-enough register first.
4123 if (XVT != MVT::i32) {
4124 Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
4125 insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
4126 }
4127
4128 // And finally, form the BEXTR itself.
4129 SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
4130
4131 // The 'X' was originally truncated. Do that now.
4132 if (XVT != NVT) {
4133 insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
4134 Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
4135 }
4136
4137 ReplaceNode(Node, Extract.getNode());
4138 SelectCode(Extract.getNode());
4139
4140 return true;
4141}
4142
4143// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
4144MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
4145 MVT NVT = Node->getSimpleValueType(0);
4146 SDLoc dl(Node);
4147
4148 SDValue N0 = Node->getOperand(0);
4149 SDValue N1 = Node->getOperand(1);
4150
4151 // If we have TBM we can use an immediate for the control. If we have BMI
4152 // we should only do this if the BEXTR instruction is implemented well.
4153 // Otherwise moving the control into a register makes this more costly.
4154 // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
4155 // hoisting the move immediate would make it worthwhile with a less optimal
4156 // BEXTR?
4157 bool PreferBEXTR =
4158 Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
4159 if (!PreferBEXTR && !Subtarget->hasBMI2())
4160 return nullptr;
4161
4162 // Must have a shift right.
4163 if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
4164 return nullptr;
4165
4166 // Shift can't have additional users.
4167 if (!N0->hasOneUse())
4168 return nullptr;
4169
4170 // Only supported for 32 and 64 bits.
4171 if (NVT != MVT::i32 && NVT != MVT::i64)
4172 return nullptr;
4173
4174 // Shift amount and RHS of and must be constant.
4175 auto *MaskCst = dyn_cast<ConstantSDNode>(N1);
4176 auto *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
4177 if (!MaskCst || !ShiftCst)
4178 return nullptr;
4179
4180 // And RHS must be a mask.
4181 uint64_t Mask = MaskCst->getZExtValue();
4182 if (!isMask_64(Mask))
4183 return nullptr;
4184
4185 uint64_t Shift = ShiftCst->getZExtValue();
4186 uint64_t MaskSize = llvm::popcount(Mask);
4187
4188 // Don't interfere with something that can be handled by extracting AH.
4189 // TODO: If we are able to fold a load, BEXTR might still be better than AH.
4190 if (Shift == 8 && MaskSize == 8)
4191 return nullptr;
4192
4193 // Make sure we are only using bits that were in the original value, not
4194 // shifted in.
4195 if (Shift + MaskSize > NVT.getSizeInBits())
4196 return nullptr;
4197
4198 // BZHI, if available, is always fast, unlike BEXTR. But even if we decide
4199 // that we can't use BEXTR, it is only worthwhile using BZHI if the mask
4200 // does not fit into 32 bits. Load folding is not a sufficient reason.
4201 if (!PreferBEXTR && MaskSize <= 32)
4202 return nullptr;
4203
4204 SDValue Control;
4205 unsigned ROpc, MOpc;
4206
4207#define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC)
4208 if (!PreferBEXTR) {
4209 assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
4210 // If we can't make use of BEXTR then we can't fuse shift+mask stages.
4211 // Let's perform the mask first, and apply shift later. Note that we need to
4212 // widen the mask to account for the fact that we'll apply shift afterwards!
4213 Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
4214 ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rr)
4215 : GET_EGPR_IF_ENABLED(X86::BZHI32rr);
4216 MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rm)
4217 : GET_EGPR_IF_ENABLED(X86::BZHI32rm);
4218 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
4219 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4220 } else {
4221 // The 'control' of BEXTR has the pattern of:
4222 // [15...8 bit][ 7...0 bit] location
4223 // [ bit count][ shift] name
4224 // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
4225 Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
4226 if (Subtarget->hasTBM()) {
4227 ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
4228 MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
4229 } else {
4230 assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
4231 // BMI requires the immediate to placed in a register.
4232 ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rr)
4233 : GET_EGPR_IF_ENABLED(X86::BEXTR32rr);
4234 MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rm)
4235 : GET_EGPR_IF_ENABLED(X86::BEXTR32rm);
4236 unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
4237 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4238 }
4239 }
4240
4241 MachineSDNode *NewNode;
4242 SDValue Input = N0->getOperand(0);
4243 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4244 if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4245 SDValue Ops[] = {
4246 Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)};
4247 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4248 NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4249 // Update the chain.
4250 ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
4251 // Record the mem-refs
4252 CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
4253 } else {
4254 NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control);
4255 }
4256
4257 if (!PreferBEXTR) {
4258 // We still need to apply the shift.
4259 SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
4260 unsigned NewOpc = NVT == MVT::i64 ? GET_ND_IF_ENABLED(X86::SHR64ri)
4261 : GET_ND_IF_ENABLED(X86::SHR32ri);
4262 NewNode =
4263 CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt);
4264 }
4265
4266 return NewNode;
4267}
4268
4269// Emit a PCMISTR(I/M) instruction.
4270MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
4271 bool MayFoldLoad, const SDLoc &dl,
4272 MVT VT, SDNode *Node) {
4273 SDValue N0 = Node->getOperand(0);
4274 SDValue N1 = Node->getOperand(1);
4275 SDValue Imm = Node->getOperand(2);
4276 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4277 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
4278
4279 // Try to fold a load. No need to check alignment.
4280 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4281 if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4282 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4283 N1.getOperand(0) };
4284 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
4285 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4286 // Update the chain.
4287 ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
4288 // Record the mem-refs
4289 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4290 return CNode;
4291 }
4292
4293 SDValue Ops[] = { N0, N1, Imm };
4294 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
4295 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4296 return CNode;
4297}
4298
4299// Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
4300// to emit a second instruction after this one. This is needed since we have two
4301// copyToReg nodes glued before this and we need to continue that glue through.
4302MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
4303 bool MayFoldLoad, const SDLoc &dl,
4304 MVT VT, SDNode *Node,
4305 SDValue &InGlue) {
4306 SDValue N0 = Node->getOperand(0);
4307 SDValue N2 = Node->getOperand(2);
4308 SDValue Imm = Node->getOperand(4);
4309 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4310 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
4311
4312 // Try to fold a load. No need to check alignment.
4313 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4314 if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4315 SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4316 N2.getOperand(0), InGlue };
4317 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
4318 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4319 InGlue = SDValue(CNode, 3);
4320 // Update the chain.
4321 ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
4322 // Record the mem-refs
4323 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
4324 return CNode;
4325 }
4326
4327 SDValue Ops[] = { N0, N2, Imm, InGlue };
4328 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
4329 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4330 InGlue = SDValue(CNode, 2);
4331 return CNode;
4332}
4333
4334bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4335 EVT VT = N->getValueType(0);
4336
4337 // Only handle scalar shifts.
4338 if (VT.isVector())
4339 return false;
4340
4341 // Narrower shifts only mask to 5 bits in hardware.
4342 unsigned Size = VT == MVT::i64 ? 64 : 32;
4343
4344 SDValue OrigShiftAmt = N->getOperand(1);
4345 SDValue ShiftAmt = OrigShiftAmt;
4346 SDLoc DL(N);
4347
4348 // Skip over a truncate of the shift amount.
4349 if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
4350 ShiftAmt = ShiftAmt->getOperand(0);
4351
4352 // This function is called after X86DAGToDAGISel::matchBitExtract(),
4353 // so we are not afraid that we might mess up BZHI/BEXTR pattern.
4354
4355 SDValue NewShiftAmt;
4356 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB ||
4357 ShiftAmt->getOpcode() == ISD::XOR) {
4358 SDValue Add0 = ShiftAmt->getOperand(0);
4359 SDValue Add1 = ShiftAmt->getOperand(1);
4360 auto *Add0C = dyn_cast<ConstantSDNode>(Add0);
4361 auto *Add1C = dyn_cast<ConstantSDNode>(Add1);
4362 // If we are shifting by X+/-/^N where N == 0 mod Size, then just shift by X
4363 // to avoid the ADD/SUB/XOR.
4364 if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) {
4365 NewShiftAmt = Add0;
4366
4367 } else if (ShiftAmt->getOpcode() != ISD::ADD && ShiftAmt.hasOneUse() &&
4368 ((Add0C && Add0C->getAPIntValue().urem(Size) == Size - 1) ||
4369 (Add1C && Add1C->getAPIntValue().urem(Size) == Size - 1))) {
4370 // If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X
4371 // we can replace it with a NOT. In the XOR case it may save some code
4372 // size, in the SUB case it also may save a move.
4373 assert(Add0C == nullptr || Add1C == nullptr);
4374
4375 // We can only do N-X, not X-N
4376 if (ShiftAmt->getOpcode() == ISD::SUB && Add0C == nullptr)
4377 return false;
4378
4379 EVT OpVT = ShiftAmt.getValueType();
4380
4381 SDValue AllOnes = CurDAG->getAllOnesConstant(DL, OpVT);
4382 NewShiftAmt = CurDAG->getNode(ISD::XOR, DL, OpVT,
4383 Add0C == nullptr ? Add0 : Add1, AllOnes);
4384 insertDAGNode(*CurDAG, OrigShiftAmt, AllOnes);
4385 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
4386 // If we are shifting by N-X where N == 0 mod Size, then just shift by
4387 // -X to generate a NEG instead of a SUB of a constant.
4388 } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C &&
4389 Add0C->getZExtValue() != 0) {
4390 EVT SubVT = ShiftAmt.getValueType();
4391 SDValue X;
4392 if (Add0C->getZExtValue() % Size == 0)
4393 X = Add1;
4394 else if (ShiftAmt.hasOneUse() && Size == 64 &&
4395 Add0C->getZExtValue() % 32 == 0) {
4396 // We have a 64-bit shift by (n*32-x), turn it into -(x+n*32).
4397 // This is mainly beneficial if we already compute (x+n*32).
4398 if (Add1.getOpcode() == ISD::TRUNCATE) {
4399 Add1 = Add1.getOperand(0);
4400 SubVT = Add1.getValueType();
4401 }
4402 if (Add0.getValueType() != SubVT) {
4403 Add0 = CurDAG->getZExtOrTrunc(Add0, DL, SubVT);
4404 insertDAGNode(*CurDAG, OrigShiftAmt, Add0);
4405 }
4406
4407 X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1, Add0);
4408 insertDAGNode(*CurDAG, OrigShiftAmt, X);
4409 } else
4410 return false;
4411 // Insert a negate op.
4412 // TODO: This isn't guaranteed to replace the sub if there is a logic cone
4413 // that uses it that's not a shift.
4414 SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
4415 SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X);
4416 NewShiftAmt = Neg;
4417
4418 // Insert these operands into a valid topological order so they can
4419 // get selected independently.
4420 insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
4421 insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
4422 } else
4423 return false;
4424 } else
4425 return false;
4426
4427 if (NewShiftAmt.getValueType() != MVT::i8) {
4428 // Need to truncate the shift amount.
4429 NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
4430 // Add to a correct topological ordering.
4431 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
4432 }
4433
4434 // Insert a new mask to keep the shift amount legal. This should be removed
4435 // by isel patterns.
4436 NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
4437 CurDAG->getConstant(Size - 1, DL, MVT::i8));
4438 // Place in a correct topological ordering.
4439 insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
4440
4441 SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
4442 NewShiftAmt);
4443 if (UpdatedNode != N) {
4444 // If we found an existing node, we should replace ourselves with that node
4445 // and wait for it to be selected after its other users.
4446 ReplaceNode(N, UpdatedNode);
4447 return true;
4448 }
4449
4450 // If the original shift amount is now dead, delete it so that we don't run
4451 // it through isel.
4452 if (OrigShiftAmt.getNode()->use_empty())
4453 CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
4454
4455 // Now that we've optimized the shift amount, defer to normal isel to get
4456 // load folding and legacy vs BMI2 selection without repeating it here.
4457 SelectCode(N);
4458 return true;
4459}
4460
4461bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
4462 MVT NVT = N->getSimpleValueType(0);
4463 unsigned Opcode = N->getOpcode();
4464 SDLoc dl(N);
4465
4466 // For operations of the form (x << C1) op C2, check if we can use a smaller
4467 // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
4468 SDValue Shift = N->getOperand(0);
4469 SDValue N1 = N->getOperand(1);
4470
4471 auto *Cst = dyn_cast<ConstantSDNode>(N1);
4472 if (!Cst)
4473 return false;
4474
4475 int64_t Val = Cst->getSExtValue();
4476
4477 // If we have an any_extend feeding the AND, look through it to see if there
4478 // is a shift behind it. But only if the AND doesn't use the extended bits.
4479 // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
4480 bool FoundAnyExtend = false;
4481 if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
4482 Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
4483 isUInt<32>(Val)) {
4484 FoundAnyExtend = true;
4485 Shift = Shift.getOperand(0);
4486 }
4487
4488 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
4489 return false;
4490
4491 // i8 is unshrinkable, i16 should be promoted to i32.
4492 if (NVT != MVT::i32 && NVT != MVT::i64)
4493 return false;
4494
4495 auto *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
4496 if (!ShlCst)
4497 return false;
4498
4499 uint64_t ShAmt = ShlCst->getZExtValue();
4500
4501 // Make sure that we don't change the operation by removing bits.
4502 // This only matters for OR and XOR, AND is unaffected.
4503 uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
4504 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
4505 return false;
4506
4507 // Check the minimum bitwidth for the new constant.
4508 // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
4509 auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
4510 if (Opcode == ISD::AND) {
4511 // AND32ri is the same as AND64ri32 with zext imm.
4512 // Try this before sign extended immediates below.
4513 ShiftedVal = (uint64_t)Val >> ShAmt;
4514 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4515 return true;
4516 // Also swap order when the AND can become MOVZX.
4517 if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
4518 return true;
4519 }
4520 ShiftedVal = Val >> ShAmt;
4521 if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
4522 (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
4523 return true;
4524 if (Opcode != ISD::AND) {
4525 // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
4526 ShiftedVal = (uint64_t)Val >> ShAmt;
4527 if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4528 return true;
4529 }
4530 return false;
4531 };
4532
4533 int64_t ShiftedVal;
4534 if (!CanShrinkImmediate(ShiftedVal))
4535 return false;
4536
4537 // Ok, we can reorder to get a smaller immediate.
4538
4539 // But, its possible the original immediate allowed an AND to become MOVZX.
4540 // Doing this late due to avoid the MakedValueIsZero call as late as
4541 // possible.
4542 if (Opcode == ISD::AND) {
4543 // Find the smallest zext this could possibly be.
4544 unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
4545 ZExtWidth = llvm::bit_ceil(std::max(ZExtWidth, 8U));
4546
4547 // Figure out which bits need to be zero to achieve that mask.
4548 APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
4549 ZExtWidth);
4550 NeededMask &= ~Cst->getAPIntValue();
4551
4552 if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
4553 return false;
4554 }
4555
4556 SDValue X = Shift.getOperand(0);
4557 if (FoundAnyExtend) {
4558 SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
4559 insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
4560 X = NewX;
4561 }
4562
4563 SDValue NewCst = CurDAG->getSignedConstant(ShiftedVal, dl, NVT);
4564 insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
4565 SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
4566 insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
4567 SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
4568 Shift.getOperand(1));
4569 ReplaceNode(N, NewSHL.getNode());
4570 SelectCode(NewSHL.getNode());
4571 return true;
4572}
4573
4574bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
4575 SDNode *ParentB, SDNode *ParentC,
4577 uint8_t Imm) {
4578 assert(A.isOperandOf(ParentA) && B.isOperandOf(ParentB) &&
4579 C.isOperandOf(ParentC) && "Incorrect parent node");
4580
4581 auto tryFoldLoadOrBCast =
4582 [this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale,
4583 SDValue &Index, SDValue &Disp, SDValue &Segment) {
4584 if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment))
4585 return true;
4586
4587 // Not a load, check for broadcast which may be behind a bitcast.
4588 if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) {
4589 P = L.getNode();
4590 L = L.getOperand(0);
4591 }
4592
4593 if (L.getOpcode() != X86ISD::VBROADCAST_LOAD)
4594 return false;
4595
4596 // Only 32 and 64 bit broadcasts are supported.
4597 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
4598 unsigned Size = MemIntr->getMemoryVT().getSizeInBits();
4599 if (Size != 32 && Size != 64)
4600 return false;
4601
4602 return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment);
4603 };
4604
4605 bool FoldedLoad = false;
4606 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4607 if (tryFoldLoadOrBCast(Root, ParentC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4608 FoldedLoad = true;
4609 } else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3,
4610 Tmp4)) {
4611 FoldedLoad = true;
4612 std::swap(A, C);
4613 // Swap bits 1/4 and 3/6.
4614 uint8_t OldImm = Imm;
4615 Imm = OldImm & 0xa5;
4616 if (OldImm & 0x02) Imm |= 0x10;
4617 if (OldImm & 0x10) Imm |= 0x02;
4618 if (OldImm & 0x08) Imm |= 0x40;
4619 if (OldImm & 0x40) Imm |= 0x08;
4620 } else if (tryFoldLoadOrBCast(Root, ParentB, B, Tmp0, Tmp1, Tmp2, Tmp3,
4621 Tmp4)) {
4622 FoldedLoad = true;
4623 std::swap(B, C);
4624 // Swap bits 1/2 and 5/6.
4625 uint8_t OldImm = Imm;
4626 Imm = OldImm & 0x99;
4627 if (OldImm & 0x02) Imm |= 0x04;
4628 if (OldImm & 0x04) Imm |= 0x02;
4629 if (OldImm & 0x20) Imm |= 0x40;
4630 if (OldImm & 0x40) Imm |= 0x20;
4631 }
4632
4633 SDLoc DL(Root);
4634
4635 SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8);
4636
4637 MVT NVT = Root->getSimpleValueType(0);
4638
4639 MachineSDNode *MNode;
4640 if (FoldedLoad) {
4641 SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
4642
4643 unsigned Opc;
4644 if (C.getOpcode() == X86ISD::VBROADCAST_LOAD) {
4645 auto *MemIntr = cast<MemIntrinsicSDNode>(C);
4646 unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits();
4647 assert((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!");
4648
4649 bool UseD = EltSize == 32;
4650 if (NVT.is128BitVector())
4651 Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi;
4652 else if (NVT.is256BitVector())
4653 Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi;
4654 else if (NVT.is512BitVector())
4655 Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi;
4656 else
4657 llvm_unreachable("Unexpected vector size!");
4658 } else {
4659 bool UseD = NVT.getVectorElementType() == MVT::i32;
4660 if (NVT.is128BitVector())
4661 Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi;
4662 else if (NVT.is256BitVector())
4663 Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi;
4664 else if (NVT.is512BitVector())
4665 Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi;
4666 else
4667 llvm_unreachable("Unexpected vector size!");
4668 }
4669
4670 SDValue Ops[] = {A, B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm, C.getOperand(0)};
4671 MNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
4672
4673 // Update the chain.
4674 ReplaceUses(C.getValue(1), SDValue(MNode, 1));
4675 // Record the mem-refs
4676 CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(C)->getMemOperand()});
4677 } else {
4678 bool UseD = NVT.getVectorElementType() == MVT::i32;
4679 unsigned Opc;
4680 if (NVT.is128BitVector())
4681 Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri;
4682 else if (NVT.is256BitVector())
4683 Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri;
4684 else if (NVT.is512BitVector())
4685 Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri;
4686 else
4687 llvm_unreachable("Unexpected vector size!");
4688
4689 MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm});
4690 }
4691
4692 ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0));
4693 CurDAG->RemoveDeadNode(Root);
4694 return true;
4695}
4696
4697// Try to match two logic ops to a VPTERNLOG.
4698// FIXME: Handle more complex patterns that use an operand more than once?
4699bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
4700 MVT NVT = N->getSimpleValueType(0);
4701
4702 // Make sure we support VPTERNLOG.
4703 if (!NVT.isVector() || !Subtarget->hasAVX512() ||
4704 NVT.getVectorElementType() == MVT::i1)
4705 return false;
4706
4707 // We need VLX for 128/256-bit.
4708 if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
4709 return false;
4710
4711 auto getFoldableLogicOp = [](SDValue Op) {
4712 // Peek through single use bitcast.
4713 if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
4714 Op = Op.getOperand(0);
4715
4716 if (!Op.hasOneUse())
4717 return SDValue();
4718
4719 unsigned Opc = Op.getOpcode();
4720 if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
4721 Opc == X86ISD::ANDNP)
4722 return Op;
4723
4724 return SDValue();
4725 };
4726
4727 SDValue N0, N1, A, FoldableOp;
4728
4729 // Identify and (optionally) peel an outer NOT that wraps a pure logic tree
4730 auto tryPeelOuterNotWrappingLogic = [&](SDNode *Op) {
4731 if (Op->getOpcode() == ISD::XOR && Op->hasOneUse() &&
4732 ISD::isBuildVectorAllOnes(Op->getOperand(1).getNode())) {
4733 SDValue InnerOp = getFoldableLogicOp(Op->getOperand(0));
4734
4735 if (!InnerOp)
4736 return SDValue();
4737
4738 N0 = InnerOp.getOperand(0);
4739 N1 = InnerOp.getOperand(1);
4740 if ((FoldableOp = getFoldableLogicOp(N1))) {
4741 A = N0;
4742 return InnerOp;
4743 }
4744 if ((FoldableOp = getFoldableLogicOp(N0))) {
4745 A = N1;
4746 return InnerOp;
4747 }
4748 }
4749 return SDValue();
4750 };
4751
4752 bool PeeledOuterNot = false;
4753 SDNode *OriN = N;
4754 if (SDValue InnerOp = tryPeelOuterNotWrappingLogic(N)) {
4755 PeeledOuterNot = true;
4756 N = InnerOp.getNode();
4757 } else {
4758 N0 = N->getOperand(0);
4759 N1 = N->getOperand(1);
4760
4761 if ((FoldableOp = getFoldableLogicOp(N1)))
4762 A = N0;
4763 else if ((FoldableOp = getFoldableLogicOp(N0)))
4764 A = N1;
4765 else
4766 return false;
4767 }
4768
4769 SDValue B = FoldableOp.getOperand(0);
4770 SDValue C = FoldableOp.getOperand(1);
4771 SDNode *ParentA = N;
4772 SDNode *ParentB = FoldableOp.getNode();
4773 SDNode *ParentC = FoldableOp.getNode();
4774
4775 // We can build the appropriate control immediate by performing the logic
4776 // operation we're matching using these constants for A, B, and C.
4777 uint8_t TernlogMagicA = 0xf0;
4778 uint8_t TernlogMagicB = 0xcc;
4779 uint8_t TernlogMagicC = 0xaa;
4780
4781 // Some of the inputs may be inverted, peek through them and invert the
4782 // magic values accordingly.
4783 // TODO: There may be a bitcast before the xor that we should peek through.
4784 auto PeekThroughNot = [](SDValue &Op, SDNode *&Parent, uint8_t &Magic) {
4785 if (Op.getOpcode() == ISD::XOR && Op.hasOneUse() &&
4786 ISD::isBuildVectorAllOnes(Op.getOperand(1).getNode())) {
4787 Magic = ~Magic;
4788 Parent = Op.getNode();
4789 Op = Op.getOperand(0);
4790 }
4791 };
4792
4793 PeekThroughNot(A, ParentA, TernlogMagicA);
4794 PeekThroughNot(B, ParentB, TernlogMagicB);
4795 PeekThroughNot(C, ParentC, TernlogMagicC);
4796
4797 uint8_t Imm;
4798 switch (FoldableOp.getOpcode()) {
4799 default: llvm_unreachable("Unexpected opcode!");
4800 case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break;
4801 case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break;
4802 case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break;
4803 case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break;
4804 }
4805
4806 switch (N->getOpcode()) {
4807 default: llvm_unreachable("Unexpected opcode!");
4808 case X86ISD::ANDNP:
4809 if (A == N0)
4810 Imm &= ~TernlogMagicA;
4811 else
4812 Imm = ~(Imm) & TernlogMagicA;
4813 break;
4814 case ISD::AND: Imm &= TernlogMagicA; break;
4815 case ISD::OR: Imm |= TernlogMagicA; break;
4816 case ISD::XOR: Imm ^= TernlogMagicA; break;
4817 }
4818
4819 if (PeeledOuterNot)
4820 Imm = ~Imm;
4821
4822 return matchVPTERNLOG(OriN, ParentA, ParentB, ParentC, A, B, C, Imm);
4823}
4824
4825/// If the high bits of an 'and' operand are known zero, try setting the
4826/// high bits of an 'and' constant operand to produce a smaller encoding by
4827/// creating a small, sign-extended negative immediate rather than a large
4828/// positive one. This reverses a transform in SimplifyDemandedBits that
4829/// shrinks mask constants by clearing bits. There is also a possibility that
4830/// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
4831/// case, just replace the 'and'. Return 'true' if the node is replaced.
4832bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
4833 // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
4834 // have immediate operands.
4835 MVT VT = And->getSimpleValueType(0);
4836 if (VT != MVT::i32 && VT != MVT::i64)
4837 return false;
4838
4839 auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
4840 if (!And1C)
4841 return false;
4842
4843 // Bail out if the mask constant is already negative. It's can't shrink more.
4844 // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
4845 // patterns to use a 32-bit and instead of a 64-bit and by relying on the
4846 // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
4847 // are negative too.
4848 APInt MaskVal = And1C->getAPIntValue();
4849 unsigned MaskLZ = MaskVal.countl_zero();
4850 if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
4851 return false;
4852
4853 // Don't extend into the upper 32 bits of a 64 bit mask.
4854 if (VT == MVT::i64 && MaskLZ >= 32) {
4855 MaskLZ -= 32;
4856 MaskVal = MaskVal.trunc(32);
4857 }
4858
4859 SDValue And0 = And->getOperand(0);
4860 APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
4861 APInt NegMaskVal = MaskVal | HighZeros;
4862
4863 // If a negative constant would not allow a smaller encoding, there's no need
4864 // to continue. Only change the constant when we know it's a win.
4865 unsigned MinWidth = NegMaskVal.getSignificantBits();
4866 if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getSignificantBits() <= 32))
4867 return false;
4868
4869 // Extend masks if we truncated above.
4870 if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
4871 NegMaskVal = NegMaskVal.zext(64);
4872 HighZeros = HighZeros.zext(64);
4873 }
4874
4875 // The variable operand must be all zeros in the top bits to allow using the
4876 // new, negative constant as the mask.
4877 // TODO: Handle constant folding?
4878 KnownBits Known0 = CurDAG->computeKnownBits(And0);
4879 if (Known0.isConstant() || !HighZeros.isSubsetOf(Known0.Zero))
4880 return false;
4881
4882 // Check if the mask is -1. In that case, this is an unnecessary instruction
4883 // that escaped earlier analysis.
4884 if (NegMaskVal.isAllOnes()) {
4885 ReplaceNode(And, And0.getNode());
4886 return true;
4887 }
4888
4889 // A negative mask allows a smaller encoding. Create a new 'and' node.
4890 SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
4891 insertDAGNode(*CurDAG, SDValue(And, 0), NewMask);
4892 SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
4893 ReplaceNode(And, NewAnd.getNode());
4894 SelectCode(NewAnd.getNode());
4895 return true;
4896}
4897
4898static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
4899 bool FoldedBCast, bool Masked) {
4900#define VPTESTM_CASE(VT, SUFFIX) \
4901case MVT::VT: \
4902 if (Masked) \
4903 return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \
4904 return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX;
4905
4906
4907#define VPTESTM_BROADCAST_CASES(SUFFIX) \
4908default: llvm_unreachable("Unexpected VT!"); \
4909VPTESTM_CASE(v4i32, DZ128##SUFFIX) \
4910VPTESTM_CASE(v2i64, QZ128##SUFFIX) \
4911VPTESTM_CASE(v8i32, DZ256##SUFFIX) \
4912VPTESTM_CASE(v4i64, QZ256##SUFFIX) \
4913VPTESTM_CASE(v16i32, DZ##SUFFIX) \
4914VPTESTM_CASE(v8i64, QZ##SUFFIX)
4915
4916#define VPTESTM_FULL_CASES(SUFFIX) \
4917VPTESTM_BROADCAST_CASES(SUFFIX) \
4918VPTESTM_CASE(v16i8, BZ128##SUFFIX) \
4919VPTESTM_CASE(v8i16, WZ128##SUFFIX) \
4920VPTESTM_CASE(v32i8, BZ256##SUFFIX) \
4921VPTESTM_CASE(v16i16, WZ256##SUFFIX) \
4922VPTESTM_CASE(v64i8, BZ##SUFFIX) \
4923VPTESTM_CASE(v32i16, WZ##SUFFIX)
4924
4925 if (FoldedBCast) {
4926 switch (TestVT.SimpleTy) {
4928 }
4929 }
4930
4931 if (FoldedLoad) {
4932 switch (TestVT.SimpleTy) {
4934 }
4935 }
4936
4937 switch (TestVT.SimpleTy) {
4939 }
4940
4941#undef VPTESTM_FULL_CASES
4942#undef VPTESTM_BROADCAST_CASES
4943#undef VPTESTM_CASE
4944}
4945
4946// Try to create VPTESTM instruction. If InMask is not null, it will be used
4947// to form a masked operation.
4948bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
4949 SDValue InMask) {
4950 assert(Subtarget->hasAVX512() && "Expected AVX512!");
4951 assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&
4952 "Unexpected VT!");
4953
4954 // Look for equal and not equal compares.
4955 ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
4956 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4957 return false;
4958
4959 SDValue SetccOp0 = Setcc.getOperand(0);
4960 SDValue SetccOp1 = Setcc.getOperand(1);
4961
4962 // Canonicalize the all zero vector to the RHS.
4963 if (ISD::isBuildVectorAllZeros(SetccOp0.getNode()))
4964 std::swap(SetccOp0, SetccOp1);
4965
4966 // See if we're comparing against zero.
4967 if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode()))
4968 return false;
4969
4970 SDValue N0 = SetccOp0;
4971
4972 MVT CmpVT = N0.getSimpleValueType();
4973 MVT CmpSVT = CmpVT.getVectorElementType();
4974
4975 // Start with both operands the same. We'll try to refine this.
4976 SDValue Src0 = N0;
4977 SDValue Src1 = N0;
4978
4979 {
4980 // Look through single use bitcasts.
4981 SDValue N0Temp = N0;
4982 if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
4983 N0Temp = N0.getOperand(0);
4984
4985 // Look for single use AND.
4986 if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
4987 Src0 = N0Temp.getOperand(0);
4988 Src1 = N0Temp.getOperand(1);
4989 }
4990 }
4991
4992 // Without VLX we need to widen the operation.
4993 bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
4994
4995 auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L,
4996 SDValue &Base, SDValue &Scale, SDValue &Index,
4997 SDValue &Disp, SDValue &Segment) {
4998 // If we need to widen, we can't fold the load.
4999 if (!Widen)
5000 if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment))
5001 return true;
5002
5003 // If we didn't fold a load, try to match broadcast. No widening limitation
5004 // for this. But only 32 and 64 bit types are supported.
5005 if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64)
5006 return false;
5007
5008 // Look through single use bitcasts.
5009 if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) {
5010 P = L.getNode();
5011 L = L.getOperand(0);
5012 }
5013
5014 if (L.getOpcode() != X86ISD::VBROADCAST_LOAD)
5015 return false;
5016
5017 auto *MemIntr = cast<MemIntrinsicSDNode>(L);
5018 if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits())
5019 return false;
5020
5021 return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment);
5022 };
5023
5024 // We can only fold loads if the sources are unique.
5025 bool CanFoldLoads = Src0 != Src1;
5026
5027 bool FoldedLoad = false;
5028 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5029 if (CanFoldLoads) {
5030 FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2,
5031 Tmp3, Tmp4);
5032 if (!FoldedLoad) {
5033 // And is commutative.
5034 FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1,
5035 Tmp2, Tmp3, Tmp4);
5036 if (FoldedLoad)
5037 std::swap(Src0, Src1);
5038 }
5039 }
5040
5041 bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD;
5042
5043 bool IsMasked = InMask.getNode() != nullptr;
5044
5045 SDLoc dl(Root);
5046
5047 MVT ResVT = Setcc.getSimpleValueType();
5048 MVT MaskVT = ResVT;
5049 if (Widen) {
5050 // Widen the inputs using insert_subreg or copy_to_regclass.
5051 unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
5052 unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
5053 unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
5054 CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
5055 MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5056 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
5057 CmpVT), 0);
5058 Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
5059
5060 if (!FoldedBCast)
5061 Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
5062
5063 if (IsMasked) {
5064 // Widen the mask.
5065 unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID();
5066 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
5067 InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
5068 dl, MaskVT, InMask, RC), 0);
5069 }
5070 }
5071
5072 bool IsTestN = CC == ISD::SETEQ;
5073 unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
5074 IsMasked);
5075
5076 MachineSDNode *CNode;
5077 if (FoldedLoad) {
5078 SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
5079
5080 if (IsMasked) {
5081 SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
5082 Src1.getOperand(0) };
5083 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5084 } else {
5085 SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
5086 Src1.getOperand(0) };
5087 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5088 }
5089
5090 // Update the chain.
5091 ReplaceUses(Src1.getValue(1), SDValue(CNode, 1));
5092 // Record the mem-refs
5093 CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()});
5094 } else {
5095 if (IsMasked)
5096 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
5097 else
5098 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
5099 }
5100
5101 // If we widened, we need to shrink the mask VT.
5102 if (Widen) {
5103 unsigned RegClass = TLI->getRegClassFor(ResVT)->getID();
5104 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
5105 CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
5106 dl, ResVT, SDValue(CNode, 0), RC);
5107 }
5108
5109 ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
5110 CurDAG->RemoveDeadNode(Root);
5111 return true;
5112}
5113
5114// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
5115// into vpternlog.
5116bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
5117 assert(N->getOpcode() == ISD::OR && "Unexpected opcode!");
5118
5119 MVT NVT = N->getSimpleValueType(0);
5120
5121 // Make sure we support VPTERNLOG.
5122 if (!NVT.isVector() || !Subtarget->hasAVX512())
5123 return false;
5124
5125 // We need VLX for 128/256-bit.
5126 if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
5127 return false;
5128
5129 SDValue N0 = N->getOperand(0);
5130 SDValue N1 = N->getOperand(1);
5131
5132 // Canonicalize AND to LHS.
5133 if (N1.getOpcode() == ISD::AND)
5134 std::swap(N0, N1);
5135
5136 if (N0.getOpcode() != ISD::AND ||
5137 N1.getOpcode() != X86ISD::ANDNP ||
5138 !N0.hasOneUse() || !N1.hasOneUse())
5139 return false;
5140
5141 // ANDN is not commutable, use it to pick down A and C.
5142 SDValue A = N1.getOperand(0);
5143 SDValue C = N1.getOperand(1);
5144
5145 // AND is commutable, if one operand matches A, the other operand is B.
5146 // Otherwise this isn't a match.
5147 SDValue B;
5148 if (N0.getOperand(0) == A)
5149 B = N0.getOperand(1);
5150 else if (N0.getOperand(1) == A)
5151 B = N0.getOperand(0);
5152 else
5153 return false;
5154
5155 SDLoc dl(N);
5156 SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
5157 SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
5158 ReplaceNode(N, Ternlog.getNode());
5159
5160 return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(),
5161 Ternlog.getNode(), A, B, C, 0xCA);
5162}
5163
5164void X86DAGToDAGISel::Select(SDNode *Node) {
5165 MVT NVT = Node->getSimpleValueType(0);
5166 unsigned Opcode = Node->getOpcode();
5167 SDLoc dl(Node);
5168
5169 if (Node->isMachineOpcode()) {
5170 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
5171 Node->setNodeId(-1);
5172 return; // Already selected.
5173 }
5174
5175 switch (Opcode) {
5176 default: break;
5178 unsigned IntNo = Node->getConstantOperandVal(1);
5179 switch (IntNo) {
5180 default: break;
5181 case Intrinsic::x86_encodekey128:
5182 case Intrinsic::x86_encodekey256: {
5183 if (!Subtarget->hasKL())
5184 break;
5185
5186 unsigned Opcode;
5187 switch (IntNo) {
5188 default: llvm_unreachable("Impossible intrinsic");
5189 case Intrinsic::x86_encodekey128:
5190 Opcode = X86::ENCODEKEY128;
5191 break;
5192 case Intrinsic::x86_encodekey256:
5193 Opcode = X86::ENCODEKEY256;
5194 break;
5195 }
5196
5197 SDValue Chain = Node->getOperand(0);
5198 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3),
5199 SDValue());
5200 if (Opcode == X86::ENCODEKEY256)
5201 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4),
5202 Chain.getValue(1));
5203
5204 MachineSDNode *Res = CurDAG->getMachineNode(
5205 Opcode, dl, Node->getVTList(),
5206 {Node->getOperand(2), Chain, Chain.getValue(1)});
5207 ReplaceNode(Node, Res);
5208 return;
5209 }
5210 case Intrinsic::x86_tileloaddrs64_internal:
5211 case Intrinsic::x86_tileloaddrst164_internal:
5212 if (!Subtarget->hasAMXMOVRS())
5213 break;
5214 [[fallthrough]];
5215 case Intrinsic::x86_tileloadd64_internal:
5216 case Intrinsic::x86_tileloaddt164_internal: {
5217 if (!Subtarget->hasAMXTILE())
5218 break;
5219 auto *MFI =
5220 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5221 MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
5222 unsigned Opc;
5223 switch (IntNo) {
5224 default:
5225 llvm_unreachable("Unexpected intrinsic!");
5226 case Intrinsic::x86_tileloaddrs64_internal:
5227 Opc = X86::PTILELOADDRSV;
5228 break;
5229 case Intrinsic::x86_tileloaddrst164_internal:
5230 Opc = X86::PTILELOADDRST1V;
5231 break;
5232 case Intrinsic::x86_tileloadd64_internal:
5233 Opc = X86::PTILELOADDV;
5234 break;
5235 case Intrinsic::x86_tileloaddt164_internal:
5236 Opc = X86::PTILELOADDT1V;
5237 break;
5238 }
5239 // _tile_loadd_internal(row, col, buf, STRIDE)
5240 SDValue Base = Node->getOperand(4);
5241 SDValue Scale = getI8Imm(1, dl);
5242 SDValue Index = Node->getOperand(5);
5243 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5244 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5245 SDValue Chain = Node->getOperand(0);
5246 MachineSDNode *CNode;
5247 SDValue Ops[] = {Node->getOperand(2),
5248 Node->getOperand(3),
5249 Base,
5250 Scale,
5251 Index,
5252 Disp,
5253 Segment,
5254 Chain};
5255 CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
5256 ReplaceNode(Node, CNode);
5257 return;
5258 }
5259 }
5260 break;
5261 }
5262 case ISD::INTRINSIC_VOID: {
5263 unsigned IntNo = Node->getConstantOperandVal(1);
5264 switch (IntNo) {
5265 default: break;
5266 case Intrinsic::x86_sse3_monitor:
5267 case Intrinsic::x86_monitorx:
5268 case Intrinsic::x86_clzero: {
5269 bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
5270
5271 unsigned Opc = 0;
5272 switch (IntNo) {
5273 default: llvm_unreachable("Unexpected intrinsic!");
5274 case Intrinsic::x86_sse3_monitor:
5275 if (!Subtarget->hasSSE3())
5276 break;
5277 Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
5278 break;
5279 case Intrinsic::x86_monitorx:
5280 if (!Subtarget->hasMWAITX())
5281 break;
5282 Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
5283 break;
5284 case Intrinsic::x86_clzero:
5285 if (!Subtarget->hasCLZERO())
5286 break;
5287 Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
5288 break;
5289 }
5290
5291 if (Opc) {
5292 unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
5293 SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
5294 Node->getOperand(2), SDValue());
5295 SDValue InGlue = Chain.getValue(1);
5296
5297 if (IntNo == Intrinsic::x86_sse3_monitor ||
5298 IntNo == Intrinsic::x86_monitorx) {
5299 // Copy the other two operands to ECX and EDX.
5300 Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
5301 InGlue);
5302 InGlue = Chain.getValue(1);
5303 Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
5304 InGlue);
5305 InGlue = Chain.getValue(1);
5306 }
5307
5308 MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
5309 { Chain, InGlue});
5310 ReplaceNode(Node, CNode);
5311 return;
5312 }
5313
5314 break;
5315 }
5316 case Intrinsic::x86_tilestored64_internal: {
5317 auto *MFI =
5318 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5319 MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
5320 unsigned Opc = X86::PTILESTOREDV;
5321 // _tile_stored_internal(row, col, buf, STRIDE, c)
5322 SDValue Base = Node->getOperand(4);
5323 SDValue Scale = getI8Imm(1, dl);
5324 SDValue Index = Node->getOperand(5);
5325 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5326 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5327 SDValue Chain = Node->getOperand(0);
5328 MachineSDNode *CNode;
5329 SDValue Ops[] = {Node->getOperand(2),
5330 Node->getOperand(3),
5331 Base,
5332 Scale,
5333 Index,
5334 Disp,
5335 Segment,
5336 Node->getOperand(6),
5337 Chain};
5338 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5339 ReplaceNode(Node, CNode);
5340 return;
5341 }
5342 case Intrinsic::x86_tileloaddrs64:
5343 case Intrinsic::x86_tileloaddrst164:
5344 if (!Subtarget->hasAMXMOVRS())
5345 break;
5346 [[fallthrough]];
5347 case Intrinsic::x86_tileloadd64:
5348 case Intrinsic::x86_tileloaddt164:
5349 case Intrinsic::x86_tilestored64: {
5350 if (!Subtarget->hasAMXTILE())
5351 break;
5352 auto *MFI =
5353 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5354 MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
5355 unsigned Opc;
5356 switch (IntNo) {
5357 default: llvm_unreachable("Unexpected intrinsic!");
5358 case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD; break;
5359 case Intrinsic::x86_tileloaddrs64:
5360 Opc = X86::PTILELOADDRS;
5361 break;
5362 case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break;
5363 case Intrinsic::x86_tileloaddrst164:
5364 Opc = X86::PTILELOADDRST1;
5365 break;
5366 case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED; break;
5367 }
5368 // FIXME: Match displacement and scale.
5369 unsigned TIndex = Node->getConstantOperandVal(2);
5370 SDValue TReg = getI8Imm(TIndex, dl);
5371 SDValue Base = Node->getOperand(3);
5372 SDValue Scale = getI8Imm(1, dl);
5373 SDValue Index = Node->getOperand(4);
5374 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5375 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5376 SDValue Chain = Node->getOperand(0);
5377 MachineSDNode *CNode;
5378 if (Opc == X86::PTILESTORED) {
5379 SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain };
5380 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5381 } else {
5382 SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain };
5383 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5384 }
5385 ReplaceNode(Node, CNode);
5386 return;
5387 }
5388 }
5389 break;
5390 }
5391 case ISD::BRIND:
5392 case X86ISD::NT_BRIND: {
5393 if (Subtarget->isTarget64BitILP32()) {
5394 // Converts a 32-bit register to a 64-bit, zero-extended version of
5395 // it. This is needed because x86-64 can do many things, but jmp %r32
5396 // ain't one of them.
5397 SDValue Target = Node->getOperand(1);
5398 assert(Target.getValueType() == MVT::i32 && "Unexpected VT!");
5399 SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64);
5400 SDValue Brind = CurDAG->getNode(Opcode, dl, MVT::Other,
5401 Node->getOperand(0), ZextTarget);
5402 ReplaceNode(Node, Brind.getNode());
5403 SelectCode(ZextTarget.getNode());
5404 SelectCode(Brind.getNode());
5405 return;
5406 }
5407 break;
5408 }
5410 ReplaceNode(Node, getGlobalBaseReg());
5411 return;
5412
5413 case ISD::BITCAST:
5414 // Just drop all 128/256/512-bit bitcasts.
5415 if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
5416 NVT == MVT::f128) {
5417 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
5418 CurDAG->RemoveDeadNode(Node);
5419 return;
5420 }
5421 break;
5422
5423 case ISD::SRL:
5424 if (matchBitExtract(Node))
5425 return;
5426 [[fallthrough]];
5427 case ISD::SRA:
5428 case ISD::SHL:
5429 if (tryShiftAmountMod(Node))
5430 return;
5431 break;
5432
5433 case X86ISD::VPTERNLOG: {
5434 uint8_t Imm = Node->getConstantOperandVal(3);
5435 if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0),
5436 Node->getOperand(1), Node->getOperand(2), Imm))
5437 return;
5438 break;
5439 }
5440
5441 case X86ISD::ANDNP:
5442 if (tryVPTERNLOG(Node))
5443 return;
5444 break;
5445
5446 case ISD::AND:
5447 if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
5448 // Try to form a masked VPTESTM. Operands can be in either order.
5449 SDValue N0 = Node->getOperand(0);
5450 SDValue N1 = Node->getOperand(1);
5451 if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
5452 tryVPTESTM(Node, N0, N1))
5453 return;
5454 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
5455 tryVPTESTM(Node, N1, N0))
5456 return;
5457 }
5458
5459 if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
5460 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
5461 CurDAG->RemoveDeadNode(Node);
5462 return;
5463 }
5464 if (matchBitExtract(Node))
5465 return;
5466 if (AndImmShrink && shrinkAndImmediate(Node))
5467 return;
5468
5469 [[fallthrough]];
5470 case ISD::OR:
5471 case ISD::XOR:
5472 if (tryShrinkShlLogicImm(Node))
5473 return;
5474 if (Opcode == ISD::OR && tryMatchBitSelect(Node))
5475 return;
5476 if (tryVPTERNLOG(Node))
5477 return;
5478
5479 [[fallthrough]];
5480 case ISD::ADD:
5481 if (Opcode == ISD::ADD && matchBitExtract(Node))
5482 return;
5483 [[fallthrough]];
5484 case ISD::SUB: {
5485 // Try to avoid folding immediates with multiple uses for optsize.
5486 // This code tries to select to register form directly to avoid going
5487 // through the isel table which might fold the immediate. We can't change
5488 // the patterns on the add/sub/and/or/xor with immediate paterns in the
5489 // tablegen files to check immediate use count without making the patterns
5490 // unavailable to the fast-isel table.
5491 if (!CurDAG->shouldOptForSize())
5492 break;
5493
5494 // Only handle i8/i16/i32/i64.
5495 if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
5496 break;
5497
5498 SDValue N0 = Node->getOperand(0);
5499 SDValue N1 = Node->getOperand(1);
5500
5501 auto *Cst = dyn_cast<ConstantSDNode>(N1);
5502 if (!Cst)
5503 break;
5504
5505 int64_t Val = Cst->getSExtValue();
5506
5507 // Make sure its an immediate that is considered foldable.
5508 // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
5509 if (!isInt<8>(Val) && !isInt<32>(Val))
5510 break;
5511
5512 // If this can match to INC/DEC, let it go.
5513 if (Opcode == ISD::ADD && (Val == 1 || Val == -1))
5514 break;
5515
5516 // Check if we should avoid folding this immediate.
5517 if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
5518 break;
5519
5520 // We should not fold the immediate. So we need a register form instead.
5521 unsigned ROpc, MOpc;
5522 switch (NVT.SimpleTy) {
5523 default: llvm_unreachable("Unexpected VT!");
5524 case MVT::i8:
5525 switch (Opcode) {
5526 default: llvm_unreachable("Unexpected opcode!");
5527 case ISD::ADD:
5528 ROpc = GET_ND_IF_ENABLED(X86::ADD8rr);
5529 MOpc = GET_ND_IF_ENABLED(X86::ADD8rm);
5530 break;
5531 case ISD::SUB:
5532 ROpc = GET_ND_IF_ENABLED(X86::SUB8rr);
5533 MOpc = GET_ND_IF_ENABLED(X86::SUB8rm);
5534 break;
5535 case ISD::AND:
5536 ROpc = GET_ND_IF_ENABLED(X86::AND8rr);
5537 MOpc = GET_ND_IF_ENABLED(X86::AND8rm);
5538 break;
5539 case ISD::OR:
5540 ROpc = GET_ND_IF_ENABLED(X86::OR8rr);
5541 MOpc = GET_ND_IF_ENABLED(X86::OR8rm);
5542 break;
5543 case ISD::XOR:
5544 ROpc = GET_ND_IF_ENABLED(X86::XOR8rr);
5545 MOpc = GET_ND_IF_ENABLED(X86::XOR8rm);
5546 break;
5547 }
5548 break;
5549 case MVT::i16:
5550 switch (Opcode) {
5551 default: llvm_unreachable("Unexpected opcode!");
5552 case ISD::ADD:
5553 ROpc = GET_ND_IF_ENABLED(X86::ADD16rr);
5554 MOpc = GET_ND_IF_ENABLED(X86::ADD16rm);
5555 break;
5556 case ISD::SUB:
5557 ROpc = GET_ND_IF_ENABLED(X86::SUB16rr);
5558 MOpc = GET_ND_IF_ENABLED(X86::SUB16rm);
5559 break;
5560 case ISD::AND:
5561 ROpc = GET_ND_IF_ENABLED(X86::AND16rr);
5562 MOpc = GET_ND_IF_ENABLED(X86::AND16rm);
5563 break;
5564 case ISD::OR:
5565 ROpc = GET_ND_IF_ENABLED(X86::OR16rr);
5566 MOpc = GET_ND_IF_ENABLED(X86::OR16rm);
5567 break;
5568 case ISD::XOR:
5569 ROpc = GET_ND_IF_ENABLED(X86::XOR16rr);
5570 MOpc = GET_ND_IF_ENABLED(X86::XOR16rm);
5571 break;
5572 }
5573 break;
5574 case MVT::i32:
5575 switch (Opcode) {
5576 default: llvm_unreachable("Unexpected opcode!");
5577 case ISD::ADD:
5578 ROpc = GET_ND_IF_ENABLED(X86::ADD32rr);
5579 MOpc = GET_ND_IF_ENABLED(X86::ADD32rm);
5580 break;
5581 case ISD::SUB:
5582 ROpc = GET_ND_IF_ENABLED(X86::SUB32rr);
5583 MOpc = GET_ND_IF_ENABLED(X86::SUB32rm);
5584 break;
5585 case ISD::AND:
5586 ROpc = GET_ND_IF_ENABLED(X86::AND32rr);
5587 MOpc = GET_ND_IF_ENABLED(X86::AND32rm);
5588 break;
5589 case ISD::OR:
5590 ROpc = GET_ND_IF_ENABLED(X86::OR32rr);
5591 MOpc = GET_ND_IF_ENABLED(X86::OR32rm);
5592 break;
5593 case ISD::XOR:
5594 ROpc = GET_ND_IF_ENABLED(X86::XOR32rr);
5595 MOpc = GET_ND_IF_ENABLED(X86::XOR32rm);
5596 break;
5597 }
5598 break;
5599 case MVT::i64:
5600 switch (Opcode) {
5601 default: llvm_unreachable("Unexpected opcode!");
5602 case ISD::ADD:
5603 ROpc = GET_ND_IF_ENABLED(X86::ADD64rr);
5604 MOpc = GET_ND_IF_ENABLED(X86::ADD64rm);
5605 break;
5606 case ISD::SUB:
5607 ROpc = GET_ND_IF_ENABLED(X86::SUB64rr);
5608 MOpc = GET_ND_IF_ENABLED(X86::SUB64rm);
5609 break;
5610 case ISD::AND:
5611 ROpc = GET_ND_IF_ENABLED(X86::AND64rr);
5612 MOpc = GET_ND_IF_ENABLED(X86::AND64rm);
5613 break;
5614 case ISD::OR:
5615 ROpc = GET_ND_IF_ENABLED(X86::OR64rr);
5616 MOpc = GET_ND_IF_ENABLED(X86::OR64rm);
5617 break;
5618 case ISD::XOR:
5619 ROpc = GET_ND_IF_ENABLED(X86::XOR64rr);
5620 MOpc = GET_ND_IF_ENABLED(X86::XOR64rm);
5621 break;
5622 }
5623 break;
5624 }
5625
5626 // Ok this is a AND/OR/XOR/ADD/SUB with constant.
5627
5628 // If this is a not a subtract, we can still try to fold a load.
5629 if (Opcode != ISD::SUB) {
5630 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5631 if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
5632 SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
5633 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
5634 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5635 // Update the chain.
5636 ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
5637 // Record the mem-refs
5638 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
5639 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5640 CurDAG->RemoveDeadNode(Node);
5641 return;
5642 }
5643 }
5644
5645 CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
5646 return;
5647 }
5648
5649 case X86ISD::SMUL:
5650 // i16/i32/i64 are handled with isel patterns.
5651 if (NVT != MVT::i8)
5652 break;
5653 [[fallthrough]];
5654 case X86ISD::UMUL: {
5655 SDValue N0 = Node->getOperand(0);
5656 SDValue N1 = Node->getOperand(1);
5657
5658 unsigned LoReg, ROpc, MOpc;
5659 switch (NVT.SimpleTy) {
5660 default: llvm_unreachable("Unsupported VT!");
5661 case MVT::i8:
5662 LoReg = X86::AL;
5663 ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
5664 MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
5665 break;
5666 case MVT::i16:
5667 LoReg = X86::AX;
5668 ROpc = X86::MUL16r;
5669 MOpc = X86::MUL16m;
5670 break;
5671 case MVT::i32:
5672 LoReg = X86::EAX;
5673 ROpc = X86::MUL32r;
5674 MOpc = X86::MUL32m;
5675 break;
5676 case MVT::i64:
5677 LoReg = X86::RAX;
5678 ROpc = X86::MUL64r;
5679 MOpc = X86::MUL64m;
5680 break;
5681 }
5682
5683 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5684 bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5685 // Multiply is commutative.
5686 if (!FoldedLoad) {
5687 FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5688 if (FoldedLoad)
5689 std::swap(N0, N1);
5690 }
5691
5692 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
5693 N0, SDValue()).getValue(1);
5694
5695 MachineSDNode *CNode;
5696 if (FoldedLoad) {
5697 // i16/i32/i64 use an instruction that produces a low and high result even
5698 // though only the low result is used.
5699 SDVTList VTs;
5700 if (NVT == MVT::i8)
5701 VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
5702 else
5703 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
5704
5705 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
5706 InGlue };
5707 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5708
5709 // Update the chain.
5710 ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
5711 // Record the mem-refs
5712 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5713 } else {
5714 // i16/i32/i64 use an instruction that produces a low and high result even
5715 // though only the low result is used.
5716 SDVTList VTs;
5717 if (NVT == MVT::i8)
5718 VTs = CurDAG->getVTList(NVT, MVT::i32);
5719 else
5720 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
5721
5722 CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InGlue});
5723 }
5724
5725 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
5726 ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
5727 CurDAG->RemoveDeadNode(Node);
5728 return;
5729 }
5730
5731 case ISD::SMUL_LOHI:
5732 case ISD::UMUL_LOHI: {
5733 SDValue N0 = Node->getOperand(0);
5734 SDValue N1 = Node->getOperand(1);
5735
5736 unsigned Opc, MOpc;
5737 unsigned LoReg, HiReg;
5738 bool IsSigned = Opcode == ISD::SMUL_LOHI;
5739 bool UseMULX = !IsSigned && Subtarget->hasBMI2();
5740 bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
5741 switch (NVT.SimpleTy) {
5742 default: llvm_unreachable("Unsupported VT!");
5743 case MVT::i32:
5744 Opc = UseMULXHi ? X86::MULX32Hrr
5745 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rr)
5746 : IsSigned ? X86::IMUL32r
5747 : X86::MUL32r;
5748 MOpc = UseMULXHi ? X86::MULX32Hrm
5749 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rm)
5750 : IsSigned ? X86::IMUL32m
5751 : X86::MUL32m;
5752 LoReg = UseMULX ? X86::EDX : X86::EAX;
5753 HiReg = X86::EDX;
5754 break;
5755 case MVT::i64:
5756 Opc = UseMULXHi ? X86::MULX64Hrr
5757 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rr)
5758 : IsSigned ? X86::IMUL64r
5759 : X86::MUL64r;
5760 MOpc = UseMULXHi ? X86::MULX64Hrm
5761 : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rm)
5762 : IsSigned ? X86::IMUL64m
5763 : X86::MUL64m;
5764 LoReg = UseMULX ? X86::RDX : X86::RAX;
5765 HiReg = X86::RDX;
5766 break;
5767 }
5768
5769 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5770 bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5771 // Multiply is commutative.
5772 if (!foldedLoad) {
5773 foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5774 if (foldedLoad)
5775 std::swap(N0, N1);
5776 }
5777
5778 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
5779 N0, SDValue()).getValue(1);
5780 SDValue ResHi, ResLo;
5781 if (foldedLoad) {
5782 SDValue Chain;
5783 MachineSDNode *CNode = nullptr;
5784 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
5785 InGlue };
5786 if (UseMULXHi) {
5787 SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
5788 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5789 ResHi = SDValue(CNode, 0);
5790 Chain = SDValue(CNode, 1);
5791 } else if (UseMULX) {
5792 SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other);
5793 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5794 ResHi = SDValue(CNode, 0);
5795 ResLo = SDValue(CNode, 1);
5796 Chain = SDValue(CNode, 2);
5797 } else {
5798 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
5799 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5800 Chain = SDValue(CNode, 0);
5801 InGlue = SDValue(CNode, 1);
5802 }
5803
5804 // Update the chain.
5805 ReplaceUses(N1.getValue(1), Chain);
5806 // Record the mem-refs
5807 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5808 } else {
5809 SDValue Ops[] = { N1, InGlue };
5810 if (UseMULXHi) {
5811 SDVTList VTs = CurDAG->getVTList(NVT);
5812 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5813 ResHi = SDValue(CNode, 0);
5814 } else if (UseMULX) {
5815 SDVTList VTs = CurDAG->getVTList(NVT, NVT);
5816 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5817 ResHi = SDValue(CNode, 0);
5818 ResLo = SDValue(CNode, 1);
5819 } else {
5820 SDVTList VTs = CurDAG->getVTList(MVT::Glue);
5821 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5822 InGlue = SDValue(CNode, 0);
5823 }
5824 }
5825
5826 // Copy the low half of the result, if it is needed.
5827 if (!SDValue(Node, 0).use_empty()) {
5828 if (!ResLo) {
5829 assert(LoReg && "Register for low half is not defined!");
5830 ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
5831 NVT, InGlue);
5832 InGlue = ResLo.getValue(2);
5833 }
5834 ReplaceUses(SDValue(Node, 0), ResLo);
5835 LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
5836 dbgs() << '\n');
5837 }
5838 // Copy the high half of the result, if it is needed.
5839 if (!SDValue(Node, 1).use_empty()) {
5840 if (!ResHi) {
5841 assert(HiReg && "Register for high half is not defined!");
5842 ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
5843 NVT, InGlue);
5844 InGlue = ResHi.getValue(2);
5845 }
5846 ReplaceUses(SDValue(Node, 1), ResHi);
5847 LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
5848 dbgs() << '\n');
5849 }
5850
5851 CurDAG->RemoveDeadNode(Node);
5852 return;
5853 }
5854
5855 case ISD::SDIVREM:
5856 case ISD::UDIVREM: {
5857 SDValue N0 = Node->getOperand(0);
5858 SDValue N1 = Node->getOperand(1);
5859
5860 unsigned ROpc, MOpc;
5861 bool isSigned = Opcode == ISD::SDIVREM;
5862 if (!isSigned) {
5863 switch (NVT.SimpleTy) {
5864 default: llvm_unreachable("Unsupported VT!");
5865 case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break;
5866 case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break;
5867 case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break;
5868 case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break;
5869 }
5870 } else {
5871 switch (NVT.SimpleTy) {
5872 default: llvm_unreachable("Unsupported VT!");
5873 case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
5874 case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
5875 case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
5876 case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
5877 }
5878 }
5879
5880 unsigned LoReg, HiReg, ClrReg;
5881 unsigned SExtOpcode;
5882 switch (NVT.SimpleTy) {
5883 default: llvm_unreachable("Unsupported VT!");
5884 case MVT::i8:
5885 LoReg = X86::AL; ClrReg = HiReg = X86::AH;
5886 SExtOpcode = 0; // Not used.
5887 break;
5888 case MVT::i16:
5889 LoReg = X86::AX; HiReg = X86::DX;
5890 ClrReg = X86::DX;
5891 SExtOpcode = X86::CWD;
5892 break;
5893 case MVT::i32:
5894 LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
5895 SExtOpcode = X86::CDQ;
5896 break;
5897 case MVT::i64:
5898 LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
5899 SExtOpcode = X86::CQO;
5900 break;
5901 }
5902
5903 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
5904 bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
5905 bool signBitIsZero = CurDAG->SignBitIsZero(N0);
5906
5907 SDValue InGlue;
5908 if (NVT == MVT::i8) {
5909 // Special case for div8, just use a move with zero extension to AX to
5910 // clear the upper 8 bits (AH).
5911 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
5912 MachineSDNode *Move;
5913 if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
5914 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
5915 unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8
5916 : X86::MOVZX16rm8;
5917 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
5918 Chain = SDValue(Move, 1);
5919 ReplaceUses(N0.getValue(1), Chain);
5920 // Record the mem-refs
5921 CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
5922 } else {
5923 unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8
5924 : X86::MOVZX16rr8;
5925 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
5926 Chain = CurDAG->getEntryNode();
5927 }
5928 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
5929 SDValue());
5930 InGlue = Chain.getValue(1);
5931 } else {
5932 InGlue =
5933 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
5934 LoReg, N0, SDValue()).getValue(1);
5935 if (isSigned && !signBitIsZero) {
5936 // Sign extend the low part into the high part.
5937 InGlue =
5938 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InGlue),0);
5939 } else {
5940 // Zero out the high part, effectively zero extending the input.
5941 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
5942 SDValue ClrNode =
5943 SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, {}), 0);
5944 switch (NVT.SimpleTy) {
5945 case MVT::i16:
5946 ClrNode =
5947 SDValue(CurDAG->getMachineNode(
5948 TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
5949 CurDAG->getTargetConstant(X86::sub_16bit, dl,
5950 MVT::i32)),
5951 0);
5952 break;
5953 case MVT::i32:
5954 break;
5955 case MVT::i64:
5956 ClrNode =
5957 SDValue(CurDAG->getMachineNode(
5958 TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
5959 CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
5960 CurDAG->getTargetConstant(X86::sub_32bit, dl,
5961 MVT::i32)),
5962 0);
5963 break;
5964 default:
5965 llvm_unreachable("Unexpected division source");
5966 }
5967
5968 InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
5969 ClrNode, InGlue).getValue(1);
5970 }
5971 }
5972
5973 if (foldedLoad) {
5974 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
5975 InGlue };
5976 MachineSDNode *CNode =
5977 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
5978 InGlue = SDValue(CNode, 1);
5979 // Update the chain.
5980 ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
5981 // Record the mem-refs
5982 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5983 } else {
5984 InGlue =
5985 SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InGlue), 0);
5986 }
5987
5988 // Prevent use of AH in a REX instruction by explicitly copying it to
5989 // an ABCD_L register.
5990 //
5991 // The current assumption of the register allocator is that isel
5992 // won't generate explicit references to the GR8_ABCD_H registers. If
5993 // the allocator and/or the backend get enhanced to be more robust in
5994 // that regard, this can be, and should be, removed.
5995 if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
5996 SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
5997 unsigned AHExtOpcode =
5998 isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
5999
6000 SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
6001 MVT::Glue, AHCopy, InGlue);
6002 SDValue Result(RNode, 0);
6003 InGlue = SDValue(RNode, 1);
6004
6005 Result =
6006 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
6007
6008 ReplaceUses(SDValue(Node, 1), Result);
6009 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
6010 dbgs() << '\n');
6011 }
6012 // Copy the division (low) result, if it is needed.
6013 if (!SDValue(Node, 0).use_empty()) {
6014 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
6015 LoReg, NVT, InGlue);
6016 InGlue = Result.getValue(2);
6017 ReplaceUses(SDValue(Node, 0), Result);
6018 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
6019 dbgs() << '\n');
6020 }
6021 // Copy the remainder (high) result, if it is needed.
6022 if (!SDValue(Node, 1).use_empty()) {
6023 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
6024 HiReg, NVT, InGlue);
6025 InGlue = Result.getValue(2);
6026 ReplaceUses(SDValue(Node, 1), Result);
6027 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
6028 dbgs() << '\n');
6029 }
6030 CurDAG->RemoveDeadNode(Node);
6031 return;
6032 }
6033
6034 case X86ISD::FCMP:
6036 case X86ISD::STRICT_FCMPS: {
6037 bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP ||
6038 Node->getOpcode() == X86ISD::STRICT_FCMPS;
6039 SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0);
6040 SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1);
6041
6042 // Save the original VT of the compare.
6043 MVT CmpVT = N0.getSimpleValueType();
6044
6045 // Floating point needs special handling if we don't have FCOMI.
6046 if (Subtarget->canUseCMOV())
6047 break;
6048
6049 bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;
6050
6051 unsigned Opc;
6052 switch (CmpVT.SimpleTy) {
6053 default: llvm_unreachable("Unexpected type!");
6054 case MVT::f32:
6055 Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32;
6056 break;
6057 case MVT::f64:
6058 Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64;
6059 break;
6060 case MVT::f80:
6061 Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80;
6062 break;
6063 }
6064
6065 SDValue Chain =
6066 IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode();
6067 SDValue Glue;
6068 if (IsStrictCmp) {
6069 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
6070 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0);
6071 Glue = Chain.getValue(1);
6072 } else {
6073 Glue = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N0, N1), 0);
6074 }
6075
6076 // Move FPSW to AX.
6077 SDValue FNSTSW =
6078 SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, Glue), 0);
6079
6080 // Extract upper 8-bits of AX.
6081 SDValue Extract =
6082 CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW);
6083
6084 // Move AH into flags.
6085 // Some 64-bit targets lack SAHF support, but they do support FCOMI.
6086 assert(Subtarget->canUseLAHFSAHF() &&
6087 "Target doesn't support SAHF or FCOMI?");
6088 SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
6089 Chain = AH;
6090 SDValue SAHF = SDValue(
6091 CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0);
6092
6093 if (IsStrictCmp)
6094 ReplaceUses(SDValue(Node, 1), Chain);
6095
6096 ReplaceUses(SDValue(Node, 0), SAHF);
6097 CurDAG->RemoveDeadNode(Node);
6098 return;
6099 }
6100
6101 case X86ISD::CMP: {
6102 SDValue N0 = Node->getOperand(0);
6103 SDValue N1 = Node->getOperand(1);
6104
6105 // Optimizations for TEST compares.
6106 if (!isNullConstant(N1))
6107 break;
6108
6109 // Save the original VT of the compare.
6110 MVT CmpVT = N0.getSimpleValueType();
6111
6112 // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
6113 // by a test instruction. The test should be removed later by
6114 // analyzeCompare if we are using only the zero flag.
6115 // TODO: Should we check the users and use the BEXTR flags directly?
6116 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
6117 if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
6118 unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
6119 : X86::TEST32rr;
6120 SDValue BEXTR = SDValue(NewNode, 0);
6121 NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
6122 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
6123 CurDAG->RemoveDeadNode(Node);
6124 return;
6125 }
6126 }
6127
6128 // We can peek through truncates, but we need to be careful below.
6129 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
6130 N0 = N0.getOperand(0);
6131
6132 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
6133 // use a smaller encoding.
6134 // Look past the truncate if CMP is the only use of it.
6135 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6136 N0.getValueType() != MVT::i8) {
6137 auto *MaskC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6138 if (!MaskC)
6139 break;
6140
6141 // We may have looked through a truncate so mask off any bits that
6142 // shouldn't be part of the compare.
6143 uint64_t Mask = MaskC->getZExtValue();
6145
6146 // Check if we can replace AND+IMM{32,64} with a shift. This is possible
6147 // for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the
6148 // zero flag.
6149 if (CmpVT == MVT::i64 && !isInt<8>(Mask) && isShiftedMask_64(Mask) &&
6150 onlyUsesZeroFlag(SDValue(Node, 0))) {
6151 unsigned ShiftOpcode = ISD::DELETED_NODE;
6152 unsigned ShiftAmt;
6153 unsigned SubRegIdx;
6154 MVT SubRegVT;
6155 unsigned TestOpcode;
6156 unsigned LeadingZeros = llvm::countl_zero(Mask);
6157 unsigned TrailingZeros = llvm::countr_zero(Mask);
6158
6159 // With leading/trailing zeros, the transform is profitable if we can
6160 // eliminate a movabsq or shrink a 32-bit immediate to 8-bit without
6161 // incurring any extra register moves.
6162 bool SavesBytes = !isInt<32>(Mask) || N0.getOperand(0).hasOneUse();
6163 if (LeadingZeros == 0 && SavesBytes) {
6164 // If the mask covers the most significant bit, then we can replace
6165 // TEST+AND with a SHR and check eflags.
6166 // This emits a redundant TEST which is subsequently eliminated.
6167 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6168 ShiftAmt = TrailingZeros;
6169 SubRegIdx = 0;
6170 TestOpcode = X86::TEST64rr;
6171 } else if (TrailingZeros == 0 && SavesBytes) {
6172 // If the mask covers the least significant bit, then we can replace
6173 // TEST+AND with a SHL and check eflags.
6174 // This emits a redundant TEST which is subsequently eliminated.
6175 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHL64ri);
6176 ShiftAmt = LeadingZeros;
6177 SubRegIdx = 0;
6178 TestOpcode = X86::TEST64rr;
6179 } else if (MaskC->hasOneUse() && !isInt<32>(Mask)) {
6180 // If the shifted mask extends into the high half and is 8/16/32 bits
6181 // wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr.
6182 unsigned PopCount = 64 - LeadingZeros - TrailingZeros;
6183 if (PopCount == 8) {
6184 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6185 ShiftAmt = TrailingZeros;
6186 SubRegIdx = X86::sub_8bit;
6187 SubRegVT = MVT::i8;
6188 TestOpcode = X86::TEST8rr;
6189 } else if (PopCount == 16) {
6190 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6191 ShiftAmt = TrailingZeros;
6192 SubRegIdx = X86::sub_16bit;
6193 SubRegVT = MVT::i16;
6194 TestOpcode = X86::TEST16rr;
6195 } else if (PopCount == 32) {
6196 ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
6197 ShiftAmt = TrailingZeros;
6198 SubRegIdx = X86::sub_32bit;
6199 SubRegVT = MVT::i32;
6200 TestOpcode = X86::TEST32rr;
6201 }
6202 }
6203 if (ShiftOpcode != ISD::DELETED_NODE) {
6204 SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64);
6205 SDValue Shift = SDValue(
6206 CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32,
6207 N0.getOperand(0), ShiftC),
6208 0);
6209 if (SubRegIdx != 0) {
6210 Shift =
6211 CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift);
6212 }
6213 MachineSDNode *Test =
6214 CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift);
6215 ReplaceNode(Node, Test);
6216 return;
6217 }
6218 }
6219
6220 MVT VT;
6221 int SubRegOp;
6222 unsigned ROpc, MOpc;
6223
6224 // For each of these checks we need to be careful if the sign flag is
6225 // being used. It is only safe to use the sign flag in two conditions,
6226 // either the sign bit in the shrunken mask is zero or the final test
6227 // size is equal to the original compare size.
6228
6229 if (isUInt<8>(Mask) &&
6230 (!(Mask & 0x80) || CmpVT == MVT::i8 ||
6231 hasNoSignFlagUses(SDValue(Node, 0)))) {
6232 // For example, convert "testl %eax, $8" to "testb %al, $8"
6233 VT = MVT::i8;
6234 SubRegOp = X86::sub_8bit;
6235 ROpc = X86::TEST8ri;
6236 MOpc = X86::TEST8mi;
6237 } else if (OptForMinSize && isUInt<16>(Mask) &&
6238 (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
6239 hasNoSignFlagUses(SDValue(Node, 0)))) {
6240 // For example, "testl %eax, $32776" to "testw %ax, $32776".
6241 // NOTE: We only want to form TESTW instructions if optimizing for
6242 // min size. Otherwise we only save one byte and possibly get a length
6243 // changing prefix penalty in the decoders.
6244 VT = MVT::i16;
6245 SubRegOp = X86::sub_16bit;
6246 ROpc = X86::TEST16ri;
6247 MOpc = X86::TEST16mi;
6248 } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
6249 ((!(Mask & 0x80000000) &&
6250 // Without minsize 16-bit Cmps can get here so we need to
6251 // be sure we calculate the correct sign flag if needed.
6252 (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
6253 CmpVT == MVT::i32 ||
6254 hasNoSignFlagUses(SDValue(Node, 0)))) {
6255 // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
6256 // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
6257 // Otherwize, we find ourselves in a position where we have to do
6258 // promotion. If previous passes did not promote the and, we assume
6259 // they had a good reason not to and do not promote here.
6260 VT = MVT::i32;
6261 SubRegOp = X86::sub_32bit;
6262 ROpc = X86::TEST32ri;
6263 MOpc = X86::TEST32mi;
6264 } else {
6265 // No eligible transformation was found.
6266 break;
6267 }
6268
6269 SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
6270 SDValue Reg = N0.getOperand(0);
6271
6272 // Emit a testl or testw.
6273 MachineSDNode *NewNode;
6274 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
6275 if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
6276 if (auto *LoadN = dyn_cast<LoadSDNode>(N0.getOperand(0).getNode())) {
6277 if (!LoadN->isSimple()) {
6278 unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits();
6279 if ((MOpc == X86::TEST8mi && NumVolBits != 8) ||
6280 (MOpc == X86::TEST16mi && NumVolBits != 16) ||
6281 (MOpc == X86::TEST32mi && NumVolBits != 32))
6282 break;
6283 }
6284 }
6285 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
6286 Reg.getOperand(0) };
6287 NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
6288 // Update the chain.
6289 ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
6290 // Record the mem-refs
6291 CurDAG->setNodeMemRefs(NewNode,
6292 {cast<LoadSDNode>(Reg)->getMemOperand()});
6293 } else {
6294 // Extract the subregister if necessary.
6295 if (N0.getValueType() != VT)
6296 Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
6297
6298 NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
6299 }
6300 // Replace CMP with TEST.
6301 ReplaceNode(Node, NewNode);
6302 return;
6303 }
6304 break;
6305 }
6306 case X86ISD::PCMPISTR: {
6307 if (!Subtarget->hasSSE42())
6308 break;
6309
6310 bool NeedIndex = !SDValue(Node, 0).use_empty();
6311 bool NeedMask = !SDValue(Node, 1).use_empty();
6312 // We can't fold a load if we are going to make two instructions.
6313 bool MayFoldLoad = !NeedIndex || !NeedMask;
6314
6315 MachineSDNode *CNode;
6316 if (NeedMask) {
6317 unsigned ROpc =
6318 Subtarget->hasAVX() ? X86::VPCMPISTRMrri : X86::PCMPISTRMrri;
6319 unsigned MOpc =
6320 Subtarget->hasAVX() ? X86::VPCMPISTRMrmi : X86::PCMPISTRMrmi;
6321 CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
6322 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
6323 }
6324 if (NeedIndex || !NeedMask) {
6325 unsigned ROpc =
6326 Subtarget->hasAVX() ? X86::VPCMPISTRIrri : X86::PCMPISTRIrri;
6327 unsigned MOpc =
6328 Subtarget->hasAVX() ? X86::VPCMPISTRIrmi : X86::PCMPISTRIrmi;
6329 CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
6330 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
6331 }
6332
6333 // Connect the flag usage to the last instruction created.
6334 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
6335 CurDAG->RemoveDeadNode(Node);
6336 return;
6337 }
6338 case X86ISD::PCMPESTR: {
6339 if (!Subtarget->hasSSE42())
6340 break;
6341
6342 // Copy the two implicit register inputs.
6343 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
6344 Node->getOperand(1),
6345 SDValue()).getValue(1);
6346 InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
6347 Node->getOperand(3), InGlue).getValue(1);
6348
6349 bool NeedIndex = !SDValue(Node, 0).use_empty();
6350 bool NeedMask = !SDValue(Node, 1).use_empty();
6351 // We can't fold a load if we are going to make two instructions.
6352 bool MayFoldLoad = !NeedIndex || !NeedMask;
6353
6354 MachineSDNode *CNode;
6355 if (NeedMask) {
6356 unsigned ROpc =
6357 Subtarget->hasAVX() ? X86::VPCMPESTRMrri : X86::PCMPESTRMrri;
6358 unsigned MOpc =
6359 Subtarget->hasAVX() ? X86::VPCMPESTRMrmi : X86::PCMPESTRMrmi;
6360 CNode =
6361 emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node, InGlue);
6362 ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
6363 }
6364 if (NeedIndex || !NeedMask) {
6365 unsigned ROpc =
6366 Subtarget->hasAVX() ? X86::VPCMPESTRIrri : X86::PCMPESTRIrri;
6367 unsigned MOpc =
6368 Subtarget->hasAVX() ? X86::VPCMPESTRIrmi : X86::PCMPESTRIrmi;
6369 CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InGlue);
6370 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
6371 }
6372 // Connect the flag usage to the last instruction created.
6373 ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
6374 CurDAG->RemoveDeadNode(Node);
6375 return;
6376 }
6377
6378 case ISD::SETCC: {
6379 if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
6380 return;
6381
6382 break;
6383 }
6384
6385 case ISD::STORE:
6386 if (foldLoadStoreIntoMemOperand(Node))
6387 return;
6388 break;
6389
6390 case X86ISD::SETCC_CARRY: {
6391 MVT VT = Node->getSimpleValueType(0);
6393 if (Subtarget->hasSBBDepBreaking()) {
6394 // We have to do this manually because tblgen will put the eflags copy in
6395 // the wrong place if we use an extract_subreg in the pattern.
6396 // Copy flags to the EFLAGS register and glue it to next node.
6397 SDValue EFLAGS =
6398 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
6399 Node->getOperand(1), SDValue());
6400
6401 // Create a 64-bit instruction if the result is 64-bits otherwise use the
6402 // 32-bit version.
6403 unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
6404 MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
6405 Result = SDValue(
6406 CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)),
6407 0);
6408 } else {
6409 // The target does not recognize sbb with the same reg operand as a
6410 // no-source idiom, so we explicitly zero the input values.
6411 Result = getSBBZero(Node);
6412 }
6413
6414 // For less than 32-bits we need to extract from the 32-bit node.
6415 if (VT == MVT::i8 || VT == MVT::i16) {
6416 int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
6417 Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
6418 }
6419
6420 ReplaceUses(SDValue(Node, 0), Result);
6421 CurDAG->RemoveDeadNode(Node);
6422 return;
6423 }
6424 case X86ISD::SBB: {
6425 if (isNullConstant(Node->getOperand(0)) &&
6426 isNullConstant(Node->getOperand(1))) {
6427 SDValue Result = getSBBZero(Node);
6428
6429 // Replace the flag use.
6430 ReplaceUses(SDValue(Node, 1), Result.getValue(1));
6431
6432 // Replace the result use.
6433 if (!SDValue(Node, 0).use_empty()) {
6434 // For less than 32-bits we need to extract from the 32-bit node.
6435 MVT VT = Node->getSimpleValueType(0);
6436 if (VT == MVT::i8 || VT == MVT::i16) {
6437 int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
6438 Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
6439 }
6440 ReplaceUses(SDValue(Node, 0), Result);
6441 }
6442
6443 CurDAG->RemoveDeadNode(Node);
6444 return;
6445 }
6446 break;
6447 }
6448 case X86ISD::MGATHER: {
6449 auto *Mgt = cast<X86MaskedGatherSDNode>(Node);
6450 SDValue IndexOp = Mgt->getIndex();
6451 SDValue Mask = Mgt->getMask();
6452 MVT IndexVT = IndexOp.getSimpleValueType();
6453 MVT ValueVT = Node->getSimpleValueType(0);
6454 MVT MaskVT = Mask.getSimpleValueType();
6455
6456 // This is just to prevent crashes if the nodes are malformed somehow. We're
6457 // otherwise only doing loose type checking in here based on type what
6458 // a type constraint would say just like table based isel.
6459 if (!ValueVT.isVector() || !MaskVT.isVector())
6460 break;
6461
6462 unsigned NumElts = ValueVT.getVectorNumElements();
6463 MVT ValueSVT = ValueVT.getVectorElementType();
6464
6465 bool IsFP = ValueSVT.isFloatingPoint();
6466 unsigned EltSize = ValueSVT.getSizeInBits();
6467
6468 unsigned Opc = 0;
6469 bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1;
6470 if (AVX512Gather) {
6471 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
6472 Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm;
6473 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
6474 Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm;
6475 else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
6476 Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm;
6477 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
6478 Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm;
6479 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
6480 Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm;
6481 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
6482 Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm;
6483 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
6484 Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm;
6485 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
6486 Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm;
6487 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
6488 Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm;
6489 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
6490 Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm;
6491 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
6492 Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm;
6493 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
6494 Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm;
6495 } else {
6496 assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() &&
6497 "Unexpected mask VT!");
6498 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
6499 Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm;
6500 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
6501 Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm;
6502 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
6503 Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm;
6504 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
6505 Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm;
6506 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
6507 Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm;
6508 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
6509 Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm;
6510 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
6511 Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm;
6512 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
6513 Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm;
6514 }
6515
6516 if (!Opc)
6517 break;
6518
6519 SDValue Base, Scale, Index, Disp, Segment;
6520 if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(),
6521 Base, Scale, Index, Disp, Segment))
6522 break;
6523
6524 SDValue PassThru = Mgt->getPassThru();
6525 SDValue Chain = Mgt->getChain();
6526 // Gather instructions have a mask output not in the ISD node.
6527 SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other);
6528
6529 MachineSDNode *NewNode;
6530 if (AVX512Gather) {
6531 SDValue Ops[] = {PassThru, Mask, Base, Scale,
6532 Index, Disp, Segment, Chain};
6533 NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6534 } else {
6535 SDValue Ops[] = {PassThru, Base, Scale, Index,
6536 Disp, Segment, Mask, Chain};
6537 NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6538 }
6539 CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()});
6540 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
6541 ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2));
6542 CurDAG->RemoveDeadNode(Node);
6543 return;
6544 }
6545 case X86ISD::MSCATTER: {
6546 auto *Sc = cast<X86MaskedScatterSDNode>(Node);
6547 SDValue Value = Sc->getValue();
6548 SDValue IndexOp = Sc->getIndex();
6549 MVT IndexVT = IndexOp.getSimpleValueType();
6550 MVT ValueVT = Value.getSimpleValueType();
6551
6552 // This is just to prevent crashes if the nodes are malformed somehow. We're
6553 // otherwise only doing loose type checking in here based on type what
6554 // a type constraint would say just like table based isel.
6555 if (!ValueVT.isVector())
6556 break;
6557
6558 unsigned NumElts = ValueVT.getVectorNumElements();
6559 MVT ValueSVT = ValueVT.getVectorElementType();
6560
6561 bool IsFP = ValueSVT.isFloatingPoint();
6562 unsigned EltSize = ValueSVT.getSizeInBits();
6563
6564 unsigned Opc;
6565 if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
6566 Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr;
6567 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
6568 Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr;
6569 else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
6570 Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr;
6571 else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
6572 Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr;
6573 else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
6574 Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr;
6575 else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
6576 Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr;
6577 else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
6578 Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr;
6579 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
6580 Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr;
6581 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
6582 Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr;
6583 else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
6584 Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr;
6585 else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
6586 Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr;
6587 else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
6588 Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr;
6589 else
6590 break;
6591
6592 SDValue Base, Scale, Index, Disp, Segment;
6593 if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(),
6594 Base, Scale, Index, Disp, Segment))
6595 break;
6596
6597 SDValue Mask = Sc->getMask();
6598 SDValue Chain = Sc->getChain();
6599 // Scatter instructions have a mask output not in the ISD node.
6600 SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other);
6601 SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain};
6602
6603 MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6604 CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()});
6605 ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1));
6606 CurDAG->RemoveDeadNode(Node);
6607 return;
6608 }
6609 case ISD::PREALLOCATED_SETUP: {
6610 auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
6611 auto CallId = MFI->getPreallocatedIdForCallSite(
6612 cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
6613 SDValue Chain = Node->getOperand(0);
6614 SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
6615 MachineSDNode *New = CurDAG->getMachineNode(
6616 TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
6617 ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
6618 CurDAG->RemoveDeadNode(Node);
6619 return;
6620 }
6621 case ISD::PREALLOCATED_ARG: {
6622 auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
6623 auto CallId = MFI->getPreallocatedIdForCallSite(
6624 cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
6625 SDValue Chain = Node->getOperand(0);
6626 SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
6627 SDValue ArgIndex = Node->getOperand(2);
6628 SDValue Ops[3];
6629 Ops[0] = CallIdValue;
6630 Ops[1] = ArgIndex;
6631 Ops[2] = Chain;
6632 MachineSDNode *New = CurDAG->getMachineNode(
6633 TargetOpcode::PREALLOCATED_ARG, dl,
6634 CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
6635 MVT::Other),
6636 Ops);
6637 ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
6638 ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
6639 CurDAG->RemoveDeadNode(Node);
6640 return;
6641 }
6646 if (!Subtarget->hasWIDEKL())
6647 break;
6648
6649 unsigned Opcode;
6650 switch (Node->getOpcode()) {
6651 default:
6652 llvm_unreachable("Unexpected opcode!");
6654 Opcode = X86::AESENCWIDE128KL;
6655 break;
6657 Opcode = X86::AESDECWIDE128KL;
6658 break;
6660 Opcode = X86::AESENCWIDE256KL;
6661 break;
6663 Opcode = X86::AESDECWIDE256KL;
6664 break;
6665 }
6666
6667 SDValue Chain = Node->getOperand(0);
6668 SDValue Addr = Node->getOperand(1);
6669
6670 SDValue Base, Scale, Index, Disp, Segment;
6671 if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment))
6672 break;
6673
6674 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2),
6675 SDValue());
6676 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3),
6677 Chain.getValue(1));
6678 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4),
6679 Chain.getValue(1));
6680 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5),
6681 Chain.getValue(1));
6682 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6),
6683 Chain.getValue(1));
6684 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7),
6685 Chain.getValue(1));
6686 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8),
6687 Chain.getValue(1));
6688 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9),
6689 Chain.getValue(1));
6690
6691 MachineSDNode *Res = CurDAG->getMachineNode(
6692 Opcode, dl, Node->getVTList(),
6693 {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)});
6694 CurDAG->setNodeMemRefs(Res, cast<MemSDNode>(Node)->getMemOperand());
6695 ReplaceNode(Node, Res);
6696 return;
6697 }
6699 SDValue Chain = Node->getOperand(0);
6700 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
6701 SDValue Glue;
6702 if (Node->getNumValues() == 3)
6703 Glue = Node->getOperand(2);
6704 SDValue Copy =
6705 CurDAG->getCopyFromReg(Chain, dl, Reg, Node->getValueType(0), Glue);
6706 ReplaceNode(Node, Copy.getNode());
6707 return;
6708 }
6709 }
6710
6711 SelectCode(Node);
6712}
6713
6714bool X86DAGToDAGISel::SelectInlineAsmMemoryOperand(
6715 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
6716 std::vector<SDValue> &OutOps) {
6717 SDValue Op0, Op1, Op2, Op3, Op4;
6718 switch (ConstraintID) {
6719 default:
6720 llvm_unreachable("Unexpected asm memory constraint");
6721 case InlineAsm::ConstraintCode::o: // offsetable ??
6722 case InlineAsm::ConstraintCode::v: // not offsetable ??
6723 case InlineAsm::ConstraintCode::m: // memory
6724 case InlineAsm::ConstraintCode::X:
6725 case InlineAsm::ConstraintCode::p: // address
6726 if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
6727 return true;
6728 break;
6729 }
6730
6731 OutOps.push_back(Op0);
6732 OutOps.push_back(Op1);
6733 OutOps.push_back(Op2);
6734 OutOps.push_back(Op3);
6735 OutOps.push_back(Op4);
6736 return false;
6737}
6738
6741 std::make_unique<X86DAGToDAGISel>(TM, TM.getOptLevel())) {}
6742
6743/// This pass converts a legalized DAG into a X86-specific DAG,
6744/// ready for instruction scheduling.
6746 CodeGenOptLevel OptLevel) {
6747 return new X86DAGToDAGISelLegacy(TM, OptLevel);
6748}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
#define CASE(ATTRNAME, AANAME,...)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
static bool isSigned(unsigned int Opcode)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode *&LoadNode, SDValue &InputChain)
static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N)
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII)
Check if the instruction uses RIP relative addressing.
#define FROM_TO(FROM, TO)
#define GET_EGPR_IF_ENABLED(OPC)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
static bool needBWI(MVT VT)
static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, bool FoldedBCast, bool Masked)
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM, const X86Subtarget &Subtarget)
static bool mayUseCarryFlag(X86::CondCode CC)
static cl::opt< bool > EnablePromoteAnyextLoad("x86-promote-anyext-load", cl::init(true), cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden)
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load's chain operand and move load below the call...
#define GET_ND_IF_ENABLED(OPC)
#define VPTESTM_BROADCAST_CASES(SUFFIX)
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, X86ISelAddressMode &AM)
#define VPTESTM_FULL_CASES(SUFFIX)
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
static bool isDispSafeForFrameIndexOrRegBase(int64_t Val)
static bool isEndbrImm64(uint64_t Imm)
cl::opt< bool > IndirectBranchTracking("x86-indirect-branch-tracking", cl::init(false), cl::Hidden, cl::desc("Enable X86 indirect branch tracking pass."))
#define GET_ND_IF_ENABLED(OPC)
#define CASE_ND(OP)
Value * RHS
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1657
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI std::optional< ConstantRange > getAbsoluteSymbolRange() const
If this is an absolute symbol reference, returns the range of the symbol, otherwise returns std::null...
Definition Globals.cpp:445
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition Module.cpp:353
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
iterator_range< user_iterator > users()
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
static int getUninvalidatedNodeId(SDNode *N)
virtual bool runOnMachineFunction(MachineFunction &mf)
static void InvalidateNodeId(SDNode *N)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
ilist< SDNode >::iterator allnodes_iterator
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
X86ISelDAGToDAGPass(X86TargetMachine &TM)
size_t getPreallocatedIdForCallSite(const Value *CS)
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ TargetExternalSymbol
Definition ISDOpcodes.h:185
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:180
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ LOCAL_RECOVER
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition ISDOpcodes.h:130
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:181
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ X86
Windows x64, Windows Itanium (IA-64)
Definition MCAsmInfo.h:50
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
@ VEX
VEX - encoding using 0xC4/0xC5.
@ XOP
XOP - Opcode prefix used by XOP instructions.
@ FST
This instruction implements a truncating store from FP stack slots.
@ CMPM
Vector comparison generating mask bits for fp and integer signed and unsigned data types.
@ CMP
X86 compare and logical compare instructions.
@ BLENDV
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
@ STRICT_CMPM
Vector comparison generating mask bits for fp and integer signed and unsigned data types.
@ SETCC
X86 SetCC.
@ NT_BRIND
BRIND node with NoTrack prefix.
@ SELECTS
X86 Select.
@ FSETCCM
X86 FP SETCC, similar to above, but with output as an i1 mask and and a version with SAE.
@ FXOR
Bitwise logical XOR of floating point values.
@ BRCOND
X86 conditional branches.
@ CALL
These operations represent an abstract X86 call instruction, which includes a bunch of information.
@ FANDN
Bitwise logical ANDNOT of floating point values.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ FLD
This instruction implements an extending load to FP stack slots.
@ TC_RETURN
Tail call return.
@ FOR
Bitwise logical OR of floating point values.
@ Wrapper
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress,...
@ ANDNP
Bitwise Logical AND NOT of Packed FP values.
@ POP_FROM_X87_REG
The same as ISD::CopyFromReg except that this node makes it explicit that it may lower to an x87 FPU ...
@ FAND
Bitwise logical AND of floating point values.
@ CMOV
X86 conditional moves.
@ WrapperRIP
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
int getCondSrcNoFromDesc(const MCInstrDesc &MCID)
Return the source operand # for condition code by MCID.
bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, bool AssumeSingleUse=false)
Check if Op is a load operation that could be folded into some other x86 instruction as a memory oper...
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement)
Returns true of the given offset can be fit into displacement field of the instruction.
bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs)
If Op is a constant whose elements are all the same constant or undefined, return true and return the...
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
constexpr uint16_t Magic
Definition SFrame.h:32
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
Matching combinators.
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool hasNoUnsignedWrap() const