LLVM 19.0.0git
X86SelectionDAGInfo.cpp
Go to the documentation of this file.
1//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the X86SelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86SelectionDAGInfo.h"
14#include "X86ISelLowering.h"
15#include "X86InstrInfo.h"
16#include "X86RegisterInfo.h"
17#include "X86Subtarget.h"
22
23using namespace llvm;
24
25#define DEBUG_TYPE "x86-selectiondag-info"
26
27static cl::opt<bool>
28 UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
29 cl::desc("Use fast short rep mov in memcpy lowering"));
30
31bool X86SelectionDAGInfo::isBaseRegConflictPossible(
32 SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
33 // We cannot use TRI->hasBasePointer() until *after* we select all basic
34 // blocks. Legalization may introduce new stack temporaries with large
35 // alignment requirements. Fall back to generic code if there are any
36 // dynamic stack adjustments (hopefully rare) and the base pointer would
37 // conflict if we had to use it.
39 if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
40 return false;
41
42 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
44 return llvm::is_contained(ClobberSet, TRI->getBaseRegister());
45}
46
48 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
49 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
50 MachinePointerInfo DstPtrInfo) const {
51 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
52 const X86Subtarget &Subtarget =
54
55#ifndef NDEBUG
56 // If the base register might conflict with our physical registers, bail out.
57 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
58 X86::ECX, X86::EAX, X86::EDI};
59 assert(!isBaseRegConflictPossible(DAG, ClobberSet));
60#endif
61
62 // If to a segment-relative address space, use the default lowering.
63 if (DstPtrInfo.getAddrSpace() >= 256)
64 return SDValue();
65
66 // If not DWORD aligned or size is more than the threshold, call the library.
67 // The libc version is likely to be faster for these cases. It can use the
68 // address value and run time information about the CPU.
69 if (Alignment < Align(4) || !ConstantSize ||
70 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())
71 return SDValue();
72
73 uint64_t SizeVal = ConstantSize->getZExtValue();
74 SDValue InGlue;
75 EVT AVT;
76 SDValue Count;
77 unsigned BytesLeft = 0;
78 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
79 unsigned ValReg;
80 uint64_t Val = ValC->getZExtValue() & 255;
81
82 // If the value is a constant, then we can potentially use larger sets.
83 if (Alignment > Align(2)) {
84 // DWORD aligned
85 AVT = MVT::i32;
86 ValReg = X86::EAX;
87 Val = (Val << 8) | Val;
88 Val = (Val << 16) | Val;
89 if (Subtarget.is64Bit() && Alignment > Align(8)) { // QWORD aligned
90 AVT = MVT::i64;
91 ValReg = X86::RAX;
92 Val = (Val << 32) | Val;
93 }
94 } else if (Alignment == Align(2)) {
95 // WORD aligned
96 AVT = MVT::i16;
97 ValReg = X86::AX;
98 Val = (Val << 8) | Val;
99 } else {
100 // Byte aligned
101 AVT = MVT::i8;
102 ValReg = X86::AL;
103 Count = DAG.getIntPtrConstant(SizeVal, dl);
104 }
105
106 if (AVT.bitsGT(MVT::i8)) {
107 unsigned UBytes = AVT.getSizeInBits() / 8;
108 Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl);
109 BytesLeft = SizeVal % UBytes;
110 }
111
112 Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
113 InGlue);
114 InGlue = Chain.getValue(1);
115 } else {
116 AVT = MVT::i8;
117 Count = DAG.getIntPtrConstant(SizeVal, dl);
118 Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InGlue);
119 InGlue = Chain.getValue(1);
120 }
121
122 bool Use64BitRegs = Subtarget.isTarget64BitLP64();
123 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
124 Count, InGlue);
125 InGlue = Chain.getValue(1);
126 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
127 Dst, InGlue);
128 InGlue = Chain.getValue(1);
129
130 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
131 SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue };
132 Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
133
134 if (BytesLeft) {
135 // Handle the last 1 - 7 bytes.
136 unsigned Offset = SizeVal - BytesLeft;
137 EVT AddrVT = Dst.getValueType();
138 EVT SizeVT = Size.getValueType();
139
140 Chain =
141 DAG.getMemset(Chain, dl,
142 DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
143 DAG.getConstant(Offset, dl, AddrVT)),
144 Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
145 isVolatile, AlwaysInline,
146 /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset));
147 }
148
149 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
150 return Chain;
151}
152
153/// Emit a single REP MOVS{B,W,D,Q} instruction.
154static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
155 const SDLoc &dl, SDValue Chain, SDValue Dst,
156 SDValue Src, SDValue Size, MVT AVT) {
157 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
158 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
159 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
160 const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
161
162 SDValue InGlue;
163 Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InGlue);
164 InGlue = Chain.getValue(1);
165 Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InGlue);
166 InGlue = Chain.getValue(1);
167 Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InGlue);
168 InGlue = Chain.getValue(1);
169
170 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
171 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
172 return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
173}
174
175/// Emit a single REP MOVSB instruction for a particular constant size.
176static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
177 const SDLoc &dl, SDValue Chain, SDValue Dst,
178 SDValue Src, uint64_t Size) {
179 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
180 DAG.getIntPtrConstant(Size, dl), MVT::i8);
181}
182
183/// Returns the best type to use with repmovs depending on alignment.
184static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget,
185 Align Alignment) {
186 uint64_t Align = Alignment.value();
187 assert((Align != 0) && "Align is normalized");
188 assert(isPowerOf2_64(Align) && "Align is a power of 2");
189 switch (Align) {
190 case 1:
191 return MVT::i8;
192 case 2:
193 return MVT::i16;
194 case 4:
195 return MVT::i32;
196 default:
197 return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
198 }
199}
200
201/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
202/// a constant size memory copy. In some cases where we know REP MOVS is
203/// inefficient we return an empty SDValue so the calling code can either
204/// generate a load/store sequence or call the runtime memcpy function.
206 SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
207 SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
208 Align Alignment, bool isVolatile, bool AlwaysInline,
209 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
210
211 /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
212 /// efficient.
213 if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
214 return SDValue();
215
216 /// If we have enhanced repmovs we use it.
217 if (Subtarget.hasERMSB())
218 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
219
220 assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
221 /// We assume runtime memcpy will do a better job for unaligned copies when
222 /// ERMS is not present.
223 if (!AlwaysInline && (Alignment.value() & 3) != 0)
224 return SDValue();
225
226 const MVT BlockType = getOptimalRepmovsType(Subtarget, Alignment);
227 const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
228 const uint64_t BlockCount = Size / BlockBytes;
229 const uint64_t BytesLeft = Size % BlockBytes;
230 SDValue RepMovs =
231 emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
232 DAG.getIntPtrConstant(BlockCount, dl), BlockType);
233
234 /// RepMov can process the whole length.
235 if (BytesLeft == 0)
236 return RepMovs;
237
238 assert(BytesLeft && "We have leftover at this point");
239
240 /// In case we optimize for size we use repmovsb even if it's less efficient
241 /// so we can save the loads/stores of the leftover.
243 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
244
245 // Handle the last 1 - 7 bytes.
247 Results.push_back(RepMovs);
248 unsigned Offset = Size - BytesLeft;
249 EVT DstVT = Dst.getValueType();
250 EVT SrcVT = Src.getValueType();
251 Results.push_back(DAG.getMemcpy(
252 Chain, dl,
253 DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
254 DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
255 DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, isVolatile,
256 /*AlwaysInline*/ true, /*isTailCall*/ false,
257 DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
258 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
259}
260
262 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
263 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
264 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
265 // If to a segment-relative address space, use the default lowering.
266 if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
267 return SDValue();
268
269 // If the base registers conflict with our physical registers, use the default
270 // lowering.
271 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
272 X86::ECX, X86::ESI, X86::EDI};
273 if (isBaseRegConflictPossible(DAG, ClobberSet))
274 return SDValue();
275
276 const X86Subtarget &Subtarget =
278
279 // If enabled and available, use fast short rep mov.
280 if (UseFSRMForMemcpy && Subtarget.hasFSRM())
281 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
282
283 /// Handle constant sizes,
284 if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
285 return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
286 ConstantSize->getZExtValue(),
287 Size.getValueType(), Alignment, isVolatile,
288 AlwaysInline, DstPtrInfo, SrcPtrInfo);
289
290 return SDValue();
291}
Function Alias Analysis Results
uint64_t Size
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size)
Emit a single REP MOVSB instruction for a particular constant size.
static SDValue emitConstantSizeRepmov(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
Returns a REP MOVS instruction, possibly with a few load/stores to implement a constant size memory c...
static cl::opt< bool > UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), cl::desc("Use fast short rep mov in memcpy lowering"))
static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, MVT AVT)
Emit a single REP MOVS{B,W,D,Q} instruction.
static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget, Align Alignment)
Returns the best type to use with repmovs depending on alignment.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:677
Machine Value Type.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDValue getValue(unsigned R) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:185
unsigned getMaxInlineSizeThreshold() const
Returns the maximum memset / memcpy size that still makes it profitable to inline the call.
Definition: X86Subtarget.h:153
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ REP_MOVS
Repeat move, corresponds to X86::REP_MOVSx.
@ REP_STOS
Repeat fill, corresponds to X86::REP_STOSx.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.