LLVM 20.0.0git
X86SelectionDAGInfo.cpp
Go to the documentation of this file.
1//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the X86SelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86SelectionDAGInfo.h"
14#include "X86ISelLowering.h"
15#include "X86InstrInfo.h"
16#include "X86RegisterInfo.h"
17#include "X86Subtarget.h"
21
22using namespace llvm;
23
24#define DEBUG_TYPE "x86-selectiondag-info"
25
26static cl::opt<bool>
27 UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
28 cl::desc("Use fast short rep mov in memcpy lowering"));
29
30bool X86SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
31 return Opcode >= X86ISD::FIRST_MEMORY_OPCODE &&
33}
34
36 return Opcode >= X86ISD::FIRST_STRICTFP_OPCODE &&
38}
39
40/// Returns the best type to use with repmovs/repstos depending on alignment.
41static MVT getOptimalRepType(const X86Subtarget &Subtarget, Align Alignment) {
42 uint64_t Align = Alignment.value();
43 assert((Align != 0) && "Align is normalized");
44 assert(isPowerOf2_64(Align) && "Align is a power of 2");
45 switch (Align) {
46 case 1:
47 return MVT::i8;
48 case 2:
49 return MVT::i16;
50 case 4:
51 return MVT::i32;
52 default:
53 return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
54 }
55}
56
57bool X86SelectionDAGInfo::isBaseRegConflictPossible(
58 SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
59 // We cannot use TRI->hasBasePointer() until *after* we select all basic
60 // blocks. Legalization may introduce new stack temporaries with large
61 // alignment requirements. Fall back to generic code if there are any
62 // dynamic stack adjustments (hopefully rare) and the base pointer would
63 // conflict if we had to use it.
65 if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
66 return false;
67
68 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
70 return llvm::is_contained(ClobberSet, TRI->getBaseRegister());
71}
72
73/// Emit a single REP STOSB instruction for a particular constant size.
74static SDValue emitRepstos(const X86Subtarget &Subtarget, SelectionDAG &DAG,
75 const SDLoc &dl, SDValue Chain, SDValue Dst,
76 SDValue Val, SDValue Size, MVT AVT) {
77 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
78 unsigned AX = X86::AL;
79 switch (AVT.getSizeInBits()) {
80 case 8:
81 AX = X86::AL;
82 break;
83 case 16:
84 AX = X86::AX;
85 break;
86 case 32:
87 AX = X86::EAX;
88 break;
89 default:
90 AX = X86::RAX;
91 break;
92 }
93
94 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
95 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
96
97 SDValue InGlue;
98 Chain = DAG.getCopyToReg(Chain, dl, AX, Val, InGlue);
99 InGlue = Chain.getValue(1);
100 Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InGlue);
101 InGlue = Chain.getValue(1);
102 Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InGlue);
103 InGlue = Chain.getValue(1);
104
105 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
106 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
107 return DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
108}
109
110/// Emit a single REP STOSB instruction for a particular constant size.
111static SDValue emitRepstosB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
112 const SDLoc &dl, SDValue Chain, SDValue Dst,
113 SDValue Val, uint64_t Size) {
114 return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
115 DAG.getIntPtrConstant(Size, dl), MVT::i8);
116}
117
118/// Returns a REP STOS instruction, possibly with a few load/stores to implement
119/// a constant size memory set. In some cases where we know REP MOVS is
120/// inefficient we return an empty SDValue so the calling code can either
121/// generate a store sequence or call the runtime memset function.
123 const X86Subtarget &Subtarget,
124 const SDLoc &dl, SDValue Chain,
125 SDValue Dst, SDValue Val, uint64_t Size,
126 EVT SizeVT, Align Alignment,
127 bool isVolatile, bool AlwaysInline,
128 MachinePointerInfo DstPtrInfo) {
129 /// In case we optimize for size, we use repstosb even if it's less efficient
130 /// so we can save the loads/stores of the leftover.
132 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
133 // Special case 0 because otherwise we get large literals,
134 // which causes larger encoding.
135 if ((Size & 31) == 0 && (ValC->getZExtValue() & 255) == 0) {
136 MVT BlockType = MVT::i32;
137 const uint64_t BlockBits = BlockType.getSizeInBits();
138 const uint64_t BlockBytes = BlockBits / 8;
139 const uint64_t BlockCount = Size / BlockBytes;
140
141 Val = DAG.getConstant(0, dl, BlockType);
142 // repstosd is same size as repstosb
143 return emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
144 DAG.getIntPtrConstant(BlockCount, dl), BlockType);
145 }
146 }
147 return emitRepstosB(Subtarget, DAG, dl, Chain, Dst, Val, Size);
148 }
149
150 if (Size > Subtarget.getMaxInlineSizeThreshold())
151 return SDValue();
152
153 // If not DWORD aligned or size is more than the threshold, call the library.
154 // The libc version is likely to be faster for these cases. It can use the
155 // address value and run time information about the CPU.
156 if (Alignment < Align(4))
157 return SDValue();
158
159 MVT BlockType = MVT::i8;
160 uint64_t BlockCount = Size;
161 uint64_t BytesLeft = 0;
162
163 SDValue OriginalVal = Val;
164 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
165 BlockType = getOptimalRepType(Subtarget, Alignment);
166 uint64_t Value = ValC->getZExtValue() & 255;
167 const uint64_t BlockBits = BlockType.getSizeInBits();
168
169 if (BlockBits >= 16)
170 Value = (Value << 8) | Value;
171
172 if (BlockBits >= 32)
173 Value = (Value << 16) | Value;
174
175 if (BlockBits >= 64)
176 Value = (Value << 32) | Value;
177
178 const uint64_t BlockBytes = BlockBits / 8;
179 BlockCount = Size / BlockBytes;
180 BytesLeft = Size % BlockBytes;
181 Val = DAG.getConstant(Value, dl, BlockType);
182 }
183
184 SDValue RepStos =
185 emitRepstos(Subtarget, DAG, dl, Chain, Dst, Val,
186 DAG.getIntPtrConstant(BlockCount, dl), BlockType);
187 /// RepStos can process the whole length.
188 if (BytesLeft == 0)
189 return RepStos;
190
191 // Handle the last 1 - 7 bytes.
193 Results.push_back(RepStos);
194 unsigned Offset = Size - BytesLeft;
195 EVT AddrVT = Dst.getValueType();
196
197 Results.push_back(
198 DAG.getMemset(Chain, dl,
199 DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
200 DAG.getConstant(Offset, dl, AddrVT)),
201 OriginalVal, DAG.getConstant(BytesLeft, dl, SizeVT),
202 Alignment, isVolatile, AlwaysInline,
203 /* CI */ nullptr, DstPtrInfo.getWithOffset(Offset)));
204
205 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
206}
207
209 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
210 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
211 MachinePointerInfo DstPtrInfo) const {
212 // If to a segment-relative address space, use the default lowering.
213 if (DstPtrInfo.getAddrSpace() >= 256)
214 return SDValue();
215
216 // If the base register might conflict with our physical registers, bail out.
217 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
218 X86::ECX, X86::EAX, X86::EDI};
219 if (isBaseRegConflictPossible(DAG, ClobberSet))
220 return SDValue();
221
222 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
223 if (!ConstantSize)
224 return SDValue();
225
226 const X86Subtarget &Subtarget =
229 DAG, Subtarget, dl, Chain, Dst, Val, ConstantSize->getZExtValue(),
230 Size.getValueType(), Alignment, isVolatile, AlwaysInline, DstPtrInfo);
231}
232
233/// Emit a single REP MOVS{B,W,D,Q} instruction.
234static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
235 const SDLoc &dl, SDValue Chain, SDValue Dst,
236 SDValue Src, SDValue Size, MVT AVT) {
237 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
238 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
239 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
240 const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
241
242 SDValue InGlue;
243 Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InGlue);
244 InGlue = Chain.getValue(1);
245 Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InGlue);
246 InGlue = Chain.getValue(1);
247 Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InGlue);
248 InGlue = Chain.getValue(1);
249
250 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
251 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
252 return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
253}
254
255/// Emit a single REP MOVSB instruction for a particular constant size.
256static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
257 const SDLoc &dl, SDValue Chain, SDValue Dst,
258 SDValue Src, uint64_t Size) {
259 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
260 DAG.getIntPtrConstant(Size, dl), MVT::i8);
261}
262
263/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
264/// a constant size memory copy. In some cases where we know REP MOVS is
265/// inefficient we return an empty SDValue so the calling code can either
266/// generate a load/store sequence or call the runtime memcpy function.
268 SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
269 SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
270 Align Alignment, bool isVolatile, bool AlwaysInline,
271 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
272 /// In case we optimize for size, we use repmovsb even if it's less efficient
273 /// so we can save the loads/stores of the leftover.
275 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
276
277 /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
278 /// efficient.
279 if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
280 return SDValue();
281
282 /// If we have enhanced repmovs we use it.
283 if (Subtarget.hasERMSB())
284 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
285
286 assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
287 /// We assume runtime memcpy will do a better job for unaligned copies when
288 /// ERMS is not present.
289 if (!AlwaysInline && (Alignment < Align(4)))
290 return SDValue();
291
292 const MVT BlockType = getOptimalRepType(Subtarget, Alignment);
293 const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
294 const uint64_t BlockCount = Size / BlockBytes;
295 const uint64_t BytesLeft = Size % BlockBytes;
296 SDValue RepMovs =
297 emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
298 DAG.getIntPtrConstant(BlockCount, dl), BlockType);
299
300 /// RepMov can process the whole length.
301 if (BytesLeft == 0)
302 return RepMovs;
303
304 assert(BytesLeft && "We have leftover at this point");
305
306 // Handle the last 1 - 7 bytes.
308 Results.push_back(RepMovs);
309 unsigned Offset = Size - BytesLeft;
310 EVT DstVT = Dst.getValueType();
311 EVT SrcVT = Src.getValueType();
312 Results.push_back(DAG.getMemcpy(
313 Chain, dl,
314 DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
315 DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
316 DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, isVolatile,
317 /*AlwaysInline*/ true, /*CI=*/nullptr, std::nullopt,
318 DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
319 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
320}
321
323 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
324 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
325 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
326 // If to a segment-relative address space, use the default lowering.
327 if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
328 return SDValue();
329
330 // If the base registers conflict with our physical registers, use the default
331 // lowering.
332 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
333 X86::ECX, X86::ESI, X86::EDI};
334 if (isBaseRegConflictPossible(DAG, ClobberSet))
335 return SDValue();
336
337 const X86Subtarget &Subtarget =
339
340 // If enabled and available, use fast short rep mov.
341 if (UseFSRMForMemcpy && Subtarget.hasFSRM())
342 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
343
344 /// Handle constant sizes
345 if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
346 return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
347 ConstantSize->getZExtValue(),
348 Size.getValueType(), Alignment, isVolatile,
349 AlwaysInline, DstPtrInfo, SrcPtrInfo);
350
351 return SDValue();
352}
Function Alias Analysis Results
uint64_t Size
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
static SDValue emitRepstos(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, SDValue Size, MVT AVT)
Emit a single REP STOSB instruction for a particular constant size.
static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size)
Emit a single REP MOVSB instruction for a particular constant size.
static SDValue emitRepstosB(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, uint64_t Size)
Emit a single REP STOSB instruction for a particular constant size.
static SDValue emitConstantSizeRepmov(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
Returns a REP MOVS instruction, possibly with a few load/stores to implement a constant size memory c...
static MVT getOptimalRepType(const X86Subtarget &Subtarget, Align Alignment)
Returns the best type to use with repmovs/repstos depending on alignment.
static cl::opt< bool > UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), cl::desc("Use fast short rep mov in memcpy lowering"))
static SDValue emitConstantSizeRepstos(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, uint64_t Size, EVT SizeVT, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo)
Returns a REP STOS instruction, possibly with a few load/stores to implement a constant size memory s...
static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, MVT AVT)
Emit a single REP MOVS{B,W,D,Q} instruction.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
Machine Value Type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDValue getValue(unsigned R) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:497
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
LLVM Value Representation.
Definition: Value.h:74
bool isTargetMemoryOpcode(unsigned Opcode) const override
Returns true if a node with the given target-specific opcode has a memory operand.
bool isTargetStrictFPOpcode(unsigned Opcode) const override
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
unsigned getMaxInlineSizeThreshold() const
Returns the maximum memset / memcpy size that still makes it profitable to inline the call.
Definition: X86Subtarget.h:146
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ REP_MOVS
Repeat move, corresponds to X86::REP_MOVSx.
@ REP_STOS
Repeat fill, corresponds to X86::REP_STOSx.
@ FIRST_STRICTFP_OPCODE
X86 strict FP compare instructions.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.