LLVM 20.0.0git
X86SelectionDAGInfo.cpp
Go to the documentation of this file.
1//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the X86SelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86SelectionDAGInfo.h"
14#include "X86ISelLowering.h"
15#include "X86InstrInfo.h"
16#include "X86RegisterInfo.h"
17#include "X86Subtarget.h"
21
22using namespace llvm;
23
24#define DEBUG_TYPE "x86-selectiondag-info"
25
26static cl::opt<bool>
27 UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
28 cl::desc("Use fast short rep mov in memcpy lowering"));
29
30/// Returns the best type to use with repmovs/repstos depending on alignment.
31static MVT getOptimalRepType(const X86Subtarget &Subtarget, Align Alignment) {
32 uint64_t Align = Alignment.value();
33 assert((Align != 0) && "Align is normalized");
34 assert(isPowerOf2_64(Align) && "Align is a power of 2");
35 switch (Align) {
36 case 1:
37 return MVT::i8;
38 case 2:
39 return MVT::i16;
40 case 4:
41 return MVT::i32;
42 default:
43 return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
44 }
45}
46
47bool X86SelectionDAGInfo::isBaseRegConflictPossible(
48 SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
49 // We cannot use TRI->hasBasePointer() until *after* we select all basic
50 // blocks. Legalization may introduce new stack temporaries with large
51 // alignment requirements. Fall back to generic code if there are any
52 // dynamic stack adjustments (hopefully rare) and the base pointer would
53 // conflict if we had to use it.
55 if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
56 return false;
57
58 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
60 return llvm::is_contained(ClobberSet, TRI->getBaseRegister());
61}
62
63/// Emit a single REP STOSB instruction for a particular constant size.
64static SDValue emitRepstos(const X86Subtarget &Subtarget, SelectionDAG &DAG,
65 const SDLoc &dl, SDValue Chain, SDValue Dst,
66 SDValue Val, SDValue Size, MVT AVT) {
67 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
68 unsigned AX = X86::AL;
69 switch (AVT.getSizeInBits()) {
70 case 8:
71 AX = X86::AL;
72 break;
73 case 16:
74 AX = X86::AX;
75 break;
76 case 32:
77 AX = X86::EAX;
78 break;
79 default:
80 AX = X86::RAX;
81 break;
82 }
83
84 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
85 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
86
87 SDValue InGlue;
88 Chain = DAG.getCopyToReg(Chain, dl, AX, Val, InGlue);
89 InGlue = Chain.getValue(1);
90 Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InGlue);
91 InGlue = Chain.getValue(1);
92 Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InGlue);
93 InGlue = Chain.getValue(1);
94
95 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
96 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
97 return DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
98}
99
100/// Emit a single REP STOSB instruction for a particular constant size.
101static SDValue emitRepstosB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
102 const SDLoc &dl, SDValue Chain, SDValue Dst,
103 SDValue Val, uint64_t Size) {
104 return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
105 DAG.getIntPtrConstant(Size, dl), MVT::i8);
106}
107
108/// Returns a REP STOS instruction, possibly with a few load/stores to implement
109/// a constant size memory set. In some cases where we know REP MOVS is
110/// inefficient we return an empty SDValue so the calling code can either
111/// generate a store sequence or call the runtime memset function.
113 const X86Subtarget &Subtarget,
114 const SDLoc &dl, SDValue Chain,
115 SDValue Dst, SDValue Val, uint64_t Size,
116 EVT SizeVT, Align Alignment,
117 bool isVolatile, bool AlwaysInline,
118 MachinePointerInfo DstPtrInfo) {
119 /// In case we optimize for size, we use repstosb even if it's less efficient
120 /// so we can save the loads/stores of the leftover.
122 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
123 // Special case 0 because otherwise we get large literals,
124 // which causes larger encoding.
125 if ((Size & 31) == 0 && (ValC->getZExtValue() & 255) == 0) {
126 MVT BlockType = MVT::i32;
127 const uint64_t BlockBits = BlockType.getSizeInBits();
128 const uint64_t BlockBytes = BlockBits / 8;
129 const uint64_t BlockCount = Size / BlockBytes;
130
131 Val = DAG.getConstant(0, dl, BlockType);
132 // repstosd is same size as repstosb
133 return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
134 DAG.getIntPtrConstant(BlockCount, dl), BlockType);
135 }
136 }
137 return emitRepstosB(Subtarget, DAG, dl, Chain, Dst, Val, Size);
138 }
139
140 if (Size > Subtarget.getMaxInlineSizeThreshold())
141 return SDValue();
142
143 // If not DWORD aligned or size is more than the threshold, call the library.
144 // The libc version is likely to be faster for these cases. It can use the
145 // address value and run time information about the CPU.
146 if (Alignment < Align(4))
147 return SDValue();
148
149 MVT BlockType = MVT::i8;
150 uint64_t BlockCount = Size;
151 uint64_t BytesLeft = 0;
152
153 SDValue OriginalVal = Val;
154 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
155 BlockType = getOptimalRepType(Subtarget, Alignment);
156 uint64_t Value = ValC->getZExtValue() & 255;
157 const uint64_t BlockBits = BlockType.getSizeInBits();
158
159 if (BlockBits >= 16)
160 Value = (Value << 8) | Value;
161
162 if (BlockBits >= 32)
163 Value = (Value << 16) | Value;
164
165 if (BlockBits >= 64)
166 Value = (Value << 32) | Value;
167
168 const uint64_t BlockBytes = BlockBits / 8;
169 BlockCount = Size / BlockBytes;
170 BytesLeft = Size % BlockBytes;
171 Val = DAG.getConstant(Value, dl, BlockType);
172 }
173
174 SDValue RepStos =
175 emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
176 DAG.getIntPtrConstant(BlockCount, dl), BlockType);
177 /// RepStos can process the whole length.
178 if (BytesLeft == 0)
179 return RepStos;
180
181 // Handle the last 1 - 7 bytes.
183 Results.push_back(RepStos);
184 unsigned Offset = Size - BytesLeft;
185 EVT AddrVT = Dst.getValueType();
186
187 Results.push_back(
188 DAG.getMemset(Chain, dl,
189 DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
190 DAG.getConstant(Offset, dl, AddrVT)),
191 OriginalVal, DAG.getConstant(BytesLeft, dl, SizeVT),
192 Alignment, isVolatile, AlwaysInline,
193 /* CI */ nullptr, DstPtrInfo.getWithOffset(Offset)));
194
195 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
196}
197
199 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
200 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
201 MachinePointerInfo DstPtrInfo) const {
202 // If to a segment-relative address space, use the default lowering.
203 if (DstPtrInfo.getAddrSpace() >= 256)
204 return SDValue();
205
206 // If the base register might conflict with our physical registers, bail out.
207 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
208 X86::ECX, X86::EAX, X86::EDI};
209 if (isBaseRegConflictPossible(DAG, ClobberSet))
210 return SDValue();
211
212 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
213 if (!ConstantSize)
214 return SDValue();
215
216 const X86Subtarget &Subtarget =
219 DAG, Subtarget, dl, Chain, Dst, Val, ConstantSize->getZExtValue(),
220 Size.getValueType(), Alignment, isVolatile, AlwaysInline, DstPtrInfo);
221}
222
223/// Emit a single REP MOVS{B,W,D,Q} instruction.
224static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
225 const SDLoc &dl, SDValue Chain, SDValue Dst,
226 SDValue Src, SDValue Size, MVT AVT) {
227 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
228 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
229 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
230 const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
231
232 SDValue InGlue;
233 Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InGlue);
234 InGlue = Chain.getValue(1);
235 Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InGlue);
236 InGlue = Chain.getValue(1);
237 Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InGlue);
238 InGlue = Chain.getValue(1);
239
240 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
241 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
242 return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
243}
244
245/// Emit a single REP MOVSB instruction for a particular constant size.
246static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
247 const SDLoc &dl, SDValue Chain, SDValue Dst,
248 SDValue Src, uint64_t Size) {
249 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
250 DAG.getIntPtrConstant(Size, dl), MVT::i8);
251}
252
253/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
254/// a constant size memory copy. In some cases where we know REP MOVS is
255/// inefficient we return an empty SDValue so the calling code can either
256/// generate a load/store sequence or call the runtime memcpy function.
258 SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
259 SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
260 Align Alignment, bool isVolatile, bool AlwaysInline,
261 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
262 /// In case we optimize for size, we use repmovsb even if it's less efficient
263 /// so we can save the loads/stores of the leftover.
265 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
266
267 /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
268 /// efficient.
269 if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
270 return SDValue();
271
272 /// If we have enhanced repmovs we use it.
273 if (Subtarget.hasERMSB())
274 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
275
276 assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
277 /// We assume runtime memcpy will do a better job for unaligned copies when
278 /// ERMS is not present.
279 if (!AlwaysInline && (Alignment < Align(4)))
280 return SDValue();
281
282 const MVT BlockType = getOptimalRepType(Subtarget, Alignment);
283 const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
284 const uint64_t BlockCount = Size / BlockBytes;
285 const uint64_t BytesLeft = Size % BlockBytes;
286 SDValue RepMovs =
287 emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
288 DAG.getIntPtrConstant(BlockCount, dl), BlockType);
289
290 /// RepMov can process the whole length.
291 if (BytesLeft == 0)
292 return RepMovs;
293
294 assert(BytesLeft && "We have leftover at this point");
295
296 // Handle the last 1 - 7 bytes.
298 Results.push_back(RepMovs);
299 unsigned Offset = Size - BytesLeft;
300 EVT DstVT = Dst.getValueType();
301 EVT SrcVT = Src.getValueType();
302 Results.push_back(DAG.getMemcpy(
303 Chain, dl,
304 DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
305 DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
306 DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, isVolatile,
307 /*AlwaysInline*/ true, /*CI=*/nullptr, std::nullopt,
308 DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
309 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
310}
311
313 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
314 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
315 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
316 // If to a segment-relative address space, use the default lowering.
317 if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
318 return SDValue();
319
320 // If the base registers conflict with our physical registers, use the default
321 // lowering.
322 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
323 X86::ECX, X86::ESI, X86::EDI};
324 if (isBaseRegConflictPossible(DAG, ClobberSet))
325 return SDValue();
326
327 const X86Subtarget &Subtarget =
329
330 // If enabled and available, use fast short rep mov.
331 if (UseFSRMForMemcpy && Subtarget.hasFSRM())
332 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
333
334 /// Handle constant sizes
335 if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
336 return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
337 ConstantSize->getZExtValue(),
338 Size.getValueType(), Alignment, isVolatile,
339 AlwaysInline, DstPtrInfo, SrcPtrInfo);
340
341 return SDValue();
342}
Function Alias Analysis Results
uint64_t Size
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
static SDValue emitRepstos(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, SDValue Size, MVT AVT)
Emit a single REP STOSB instruction for a particular constant size.
static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size)
Emit a single REP MOVSB instruction for a particular constant size.
static SDValue emitRepstosB(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, uint64_t Size)
Emit a single REP STOSB instruction for a particular constant size.
static SDValue emitConstantSizeRepmov(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
Returns a REP MOVS instruction, possibly with a few load/stores to implement a constant size memory c...
static MVT getOptimalRepType(const X86Subtarget &Subtarget, Align Alignment)
Returns the best type to use with repmovs/repstos depending on alignment.
static cl::opt< bool > UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), cl::desc("Use fast short rep mov in memcpy lowering"))
static SDValue emitConstantSizeRepstos(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, uint64_t Size, EVT SizeVT, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo)
Returns a REP STOS instruction, possibly with a few load/stores to implement a constant size memory s...
static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, MVT AVT)
Emit a single REP MOVS{B,W,D,Q} instruction.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
Machine Value Type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDValue getValue(unsigned R) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:497
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
LLVM Value Representation.
Definition: Value.h:74
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
unsigned getMaxInlineSizeThreshold() const
Returns the maximum memset / memcpy size that still makes it profitable to inline the call.
Definition: X86Subtarget.h:146
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ REP_MOVS
Repeat move, corresponds to X86::REP_MOVSx.
@ REP_STOS
Repeat fill, corresponds to X86::REP_STOSx.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.