Go to the documentation of this file.
25 #define DEBUG_TYPE "x86-selectiondag-info"
29 cl::desc(
"Use fast short rep mov in memcpy lowering"));
31 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
49 SDValue Size,
Align Alignment,
bool isVolatile,
bool AlwaysInline,
57 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
59 assert(!isBaseRegConflictPossible(DAG, ClobberSet));
69 if (Alignment <
Align(4) || !ConstantSize ||
78 unsigned BytesLeft = 0;
84 if (Alignment >
Align(2)) {
88 Val = (Val << 8) | Val;
89 Val = (Val << 16) | Val;
90 if (Subtarget.is64Bit() && Alignment >
Align(8)) {
93 Val = (Val << 32) | Val;
95 }
else if (Alignment ==
Align(2)) {
99 Val = (Val << 8) | Val;
110 BytesLeft = SizeVal % UBytes;
137 unsigned Offset = SizeVal - BytesLeft;
138 EVT AddrVT = Dst.getValueType();
139 EVT SizeVT = Size.getValueType();
145 Val, DAG.
getConstant(BytesLeft, dl, SizeVT), Alignment,
146 isVolatile, AlwaysInline,
159 const unsigned CX = Use64BitRegs ? X86::RCX :
X86::ECX;
160 const unsigned DI = Use64BitRegs ? X86::RDI :
X86::EDI;
161 const unsigned SI = Use64BitRegs ? X86::RSI :
X86::ESI;
180 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
208 unsigned Align,
bool isVolatile,
bool AlwaysInline,
217 if (Subtarget.hasERMSB())
218 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
220 assert(!Subtarget.hasERMSB() &&
"No efficient RepMovs");
223 if (!AlwaysInline && (
Align & 3) != 0)
228 const uint64_t BlockCount = Size / BlockBytes;
229 const uint64_t BytesLeft = Size % BlockBytes;
238 assert(BytesLeft &&
"We have leftover at this point");
243 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
248 unsigned Offset = Size - BytesLeft;
249 EVT DstVT = Dst.getValueType();
250 EVT SrcVT = Src.getValueType();
263 SDValue Size,
Align Alignment,
bool isVolatile,
bool AlwaysInline,
271 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
273 if (isBaseRegConflictPossible(DAG, ClobberSet))
284 if (
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
286 DAG, Subtarget, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
287 Size.getValueType(), Alignment.
value(), isVolatile, AlwaysInline,
288 DstPtrInfo, SrcPtrInfo);
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
This is an optimization pass for GlobalISel generic memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getValueType(EVT)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static cl::opt< bool > UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), cl::desc("Use fast short rep mov in memcpy lowering"))
Function Alias Analysis Results
static SDValue emitConstantSizeRepmov(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
Returns a REP MOVS instruction, possibly with a few load/stores to implement a constant size memory c...
const TargetSubtargetInfo & getSubtarget() const
unsigned const TargetRegisterInfo * TRI
static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size)
Emit a single REP MOVSB instruction for a particular constant size.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, MVT AVT)
Emit a single REP MOVS{B,W,D,Q} instruction.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget, uint64_t Align)
Returns the best type to use with repmovs depending on alignment.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo=AAMDNodes())
This class contains a discriminated union of information about pointers in memory operands,...
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
initializer< Ty > init(const Ty &Val)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
SDValue getValue(unsigned R) const
@ REP_STOS
Repeat fill, corresponds to X86::REP_STOSx.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
StandardInstrumentations SI(Debug, VerifyEach)
uint64_t getZExtValue() const
MachinePointerInfo getWithOffset(int64_t O) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
@ REP_MOVS
Repeat move, corresponds to X86::REP_MOVSx.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
Function & getFunction()
Return the LLVM function that this machine code represents.
uint64_t value() const
This is a hole in the type system and should not be abused.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
@ ADD
Simple integer binary arithmetic operators.
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
BlockType
Used as immediate MachineOperands for block signatures.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
MachineFunction & getMachineFunction() const
unsigned getMaxInlineSizeThreshold() const
Returns the maximum memset / memcpy size that still makes it profitable to inline the call.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.