LLVM 20.0.0git
X86SelectionDAGInfo.cpp
Go to the documentation of this file.
1//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the X86SelectionDAGInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86SelectionDAGInfo.h"
14#include "X86ISelLowering.h"
15#include "X86InstrInfo.h"
16#include "X86RegisterInfo.h"
17#include "X86Subtarget.h"
22
23using namespace llvm;
24
25#define DEBUG_TYPE "x86-selectiondag-info"
26
27static cl::opt<bool>
28 UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
29 cl::desc("Use fast short rep mov in memcpy lowering"));
30
31bool X86SelectionDAGInfo::isBaseRegConflictPossible(
32 SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
33 // We cannot use TRI->hasBasePointer() until *after* we select all basic
34 // blocks. Legalization may introduce new stack temporaries with large
35 // alignment requirements. Fall back to generic code if there are any
36 // dynamic stack adjustments (hopefully rare) and the base pointer would
37 // conflict if we had to use it.
39 if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
40 return false;
41
42 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
44 return llvm::is_contained(ClobberSet, TRI->getBaseRegister());
45}
46
48 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
49 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
50 MachinePointerInfo DstPtrInfo) const {
51 // If to a segment-relative address space, use the default lowering.
52 if (DstPtrInfo.getAddrSpace() >= 256)
53 return SDValue();
54
55 // If the base register might conflict with our physical registers, bail out.
56 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
57 X86::ECX, X86::EAX, X86::EDI};
58 if (isBaseRegConflictPossible(DAG, ClobberSet))
59 return SDValue();
60
61 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
62 const X86Subtarget &Subtarget =
64
65 // If not DWORD aligned or size is more than the threshold, call the library.
66 // The libc version is likely to be faster for these cases. It can use the
67 // address value and run time information about the CPU.
68 if (Alignment < Align(4) || !ConstantSize ||
69 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())
70 return SDValue();
71
72 uint64_t SizeVal = ConstantSize->getZExtValue();
73 SDValue InGlue;
74 EVT AVT;
75 SDValue Count;
76 unsigned BytesLeft = 0;
77 if (auto *ValC = dyn_cast<ConstantSDNode>(Val)) {
78 unsigned ValReg;
79 uint64_t Val = ValC->getZExtValue() & 255;
80
81 // If the value is a constant, then we can potentially use larger sets.
82 if (Alignment >= Align(4)) {
83 // DWORD aligned
84 AVT = MVT::i32;
85 ValReg = X86::EAX;
86 Val = (Val << 8) | Val;
87 Val = (Val << 16) | Val;
88 if (Subtarget.is64Bit() && Alignment >= Align(8)) { // QWORD aligned
89 AVT = MVT::i64;
90 ValReg = X86::RAX;
91 Val = (Val << 32) | Val;
92 }
93 } else if (Alignment == Align(2)) {
94 // WORD aligned
95 AVT = MVT::i16;
96 ValReg = X86::AX;
97 Val = (Val << 8) | Val;
98 } else {
99 // Byte aligned
100 AVT = MVT::i8;
101 ValReg = X86::AL;
102 Count = DAG.getIntPtrConstant(SizeVal, dl);
103 }
104
105 if (AVT.bitsGT(MVT::i8)) {
106 unsigned UBytes = AVT.getSizeInBits() / 8;
107 Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl);
108 BytesLeft = SizeVal % UBytes;
109 }
110
111 Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
112 InGlue);
113 InGlue = Chain.getValue(1);
114 } else {
115 AVT = MVT::i8;
116 Count = DAG.getIntPtrConstant(SizeVal, dl);
117 Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InGlue);
118 InGlue = Chain.getValue(1);
119 }
120
121 bool Use64BitRegs = Subtarget.isTarget64BitLP64();
122 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
123 Count, InGlue);
124 InGlue = Chain.getValue(1);
125 Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
126 Dst, InGlue);
127 InGlue = Chain.getValue(1);
128
129 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
130 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
131 SDValue RepStos = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
132
133 /// RepStos can process the whole length.
134 if (BytesLeft == 0)
135 return RepStos;
136
137 // Handle the last 1 - 7 bytes.
139 Results.push_back(RepStos);
140 unsigned Offset = SizeVal - BytesLeft;
141 EVT AddrVT = Dst.getValueType();
142 EVT SizeVT = Size.getValueType();
143
144 Results.push_back(
145 DAG.getMemset(Chain, dl,
146 DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
147 DAG.getConstant(Offset, dl, AddrVT)),
148 Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
149 isVolatile, AlwaysInline,
150 /* CI */ nullptr, DstPtrInfo.getWithOffset(Offset)));
151
152 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
153}
154
155/// Emit a single REP MOVS{B,W,D,Q} instruction.
156static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
157 const SDLoc &dl, SDValue Chain, SDValue Dst,
158 SDValue Src, SDValue Size, MVT AVT) {
159 const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
160 const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
161 const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
162 const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
163
164 SDValue InGlue;
165 Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InGlue);
166 InGlue = Chain.getValue(1);
167 Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InGlue);
168 InGlue = Chain.getValue(1);
169 Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InGlue);
170 InGlue = Chain.getValue(1);
171
172 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
173 SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
174 return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
175}
176
177/// Emit a single REP MOVSB instruction for a particular constant size.
178static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
179 const SDLoc &dl, SDValue Chain, SDValue Dst,
180 SDValue Src, uint64_t Size) {
181 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
182 DAG.getIntPtrConstant(Size, dl), MVT::i8);
183}
184
185/// Returns the best type to use with repmovs depending on alignment.
186static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget,
187 Align Alignment) {
188 uint64_t Align = Alignment.value();
189 assert((Align != 0) && "Align is normalized");
190 assert(isPowerOf2_64(Align) && "Align is a power of 2");
191 switch (Align) {
192 case 1:
193 return MVT::i8;
194 case 2:
195 return MVT::i16;
196 case 4:
197 return MVT::i32;
198 default:
199 return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
200 }
201}
202
203/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
204/// a constant size memory copy. In some cases where we know REP MOVS is
205/// inefficient we return an empty SDValue so the calling code can either
206/// generate a load/store sequence or call the runtime memcpy function.
208 SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
209 SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
210 Align Alignment, bool isVolatile, bool AlwaysInline,
211 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
212
213 /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
214 /// efficient.
215 if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
216 return SDValue();
217
218 /// If we have enhanced repmovs we use it.
219 if (Subtarget.hasERMSB())
220 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
221
222 assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
223 /// We assume runtime memcpy will do a better job for unaligned copies when
224 /// ERMS is not present.
225 if (!AlwaysInline && (Alignment.value() & 3) != 0)
226 return SDValue();
227
228 const MVT BlockType = getOptimalRepmovsType(Subtarget, Alignment);
229 const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
230 const uint64_t BlockCount = Size / BlockBytes;
231 const uint64_t BytesLeft = Size % BlockBytes;
232 SDValue RepMovs =
233 emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
234 DAG.getIntPtrConstant(BlockCount, dl), BlockType);
235
236 /// RepMov can process the whole length.
237 if (BytesLeft == 0)
238 return RepMovs;
239
240 assert(BytesLeft && "We have leftover at this point");
241
242 /// In case we optimize for size we use repmovsb even if it's less efficient
243 /// so we can save the loads/stores of the leftover.
245 return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
246
247 // Handle the last 1 - 7 bytes.
249 Results.push_back(RepMovs);
250 unsigned Offset = Size - BytesLeft;
251 EVT DstVT = Dst.getValueType();
252 EVT SrcVT = Src.getValueType();
253 Results.push_back(DAG.getMemcpy(
254 Chain, dl,
255 DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
256 DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
257 DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, isVolatile,
258 /*AlwaysInline*/ true, /*CI=*/nullptr, std::nullopt,
259 DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
260 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
261}
262
264 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
265 SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
266 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
267 // If to a segment-relative address space, use the default lowering.
268 if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
269 return SDValue();
270
271 // If the base registers conflict with our physical registers, use the default
272 // lowering.
273 const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
274 X86::ECX, X86::ESI, X86::EDI};
275 if (isBaseRegConflictPossible(DAG, ClobberSet))
276 return SDValue();
277
278 const X86Subtarget &Subtarget =
280
281 // If enabled and available, use fast short rep mov.
282 if (UseFSRMForMemcpy && Subtarget.hasFSRM())
283 return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
284
285 /// Handle constant sizes,
286 if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
287 return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
288 ConstantSize->getZExtValue(),
289 Size.getValueType(), Alignment, isVolatile,
290 AlwaysInline, DstPtrInfo, SrcPtrInfo);
291
292 return SDValue();
293}
Function Alias Analysis Results
uint64_t Size
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size)
Emit a single REP MOVSB instruction for a particular constant size.
static SDValue emitConstantSizeRepmov(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
Returns a REP MOVS instruction, possibly with a few load/stores to implement a constant size memory c...
static cl::opt< bool > UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), cl::desc("Use fast short rep mov in memcpy lowering"))
static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, MVT AVT)
Emit a single REP MOVS{B,W,D,Q} instruction.
static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget, Align Alignment)
Returns the best type to use with repmovs depending on alignment.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
uint64_t getZExtValue() const
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
Machine Value Type.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDValue getValue(unsigned R) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:489
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
unsigned getMaxInlineSizeThreshold() const
Returns the maximum memset / memcpy size that still makes it profitable to inline the call.
Definition: X86Subtarget.h:146
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ REP_MOVS
Repeat move, corresponds to X86::REP_MOVSx.
@ REP_STOS
Repeat fill, corresponds to X86::REP_STOSx.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:275
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.