LLVM  12.0.0git
X86SelectionDAGInfo.cpp
Go to the documentation of this file.
1 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the X86SelectionDAGInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "X86SelectionDAGInfo.h"
14 #include "X86ISelLowering.h"
15 #include "X86InstrInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
21 #include "llvm/IR/DerivedTypes.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "x86-selectiondag-info"
26 
27 static cl::opt<bool>
28  UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
29  cl::desc("Use fast short rep mov in memcpy lowering"));
30 
31 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
32  SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
33  // We cannot use TRI->hasBasePointer() until *after* we select all basic
34  // blocks. Legalization may introduce new stack temporaries with large
35  // alignment requirements. Fall back to generic code if there are any
36  // dynamic stack adjustments (hopefully rare) and the base pointer would
37  // conflict if we had to use it.
39  if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
40  return false;
41 
42  const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
44  Register BaseReg = TRI->getBaseRegister();
45  for (unsigned R : ClobberSet)
46  if (BaseReg == R)
47  return true;
48  return false;
49 }
50 
52  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
53  SDValue Size, Align Alignment, bool isVolatile,
54  MachinePointerInfo DstPtrInfo) const {
55  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
56  const X86Subtarget &Subtarget =
58 
59 #ifndef NDEBUG
60  // If the base register might conflict with our physical registers, bail out.
61  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
63  assert(!isBaseRegConflictPossible(DAG, ClobberSet));
64 #endif
65 
66  // If to a segment-relative address space, use the default lowering.
67  if (DstPtrInfo.getAddrSpace() >= 256)
68  return SDValue();
69 
70  // If not DWORD aligned or size is more than the threshold, call the library.
71  // The libc version is likely to be faster for these cases. It can use the
72  // address value and run time information about the CPU.
73  if (Alignment < Align(4) || !ConstantSize ||
74  ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
75  // Check to see if there is a specialized entry-point for memory zeroing.
76  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
77 
78  if (const char *bzeroName = (ValC && ValC->isNullValue())
79  ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
80  : nullptr) {
81  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
82  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
83  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
86  Entry.Node = Dst;
87  Entry.Ty = IntPtrTy;
88  Args.push_back(Entry);
89  Entry.Node = Size;
90  Args.push_back(Entry);
91 
93  CLI.setDebugLoc(dl)
94  .setChain(Chain)
96  DAG.getExternalSymbol(bzeroName, IntPtr),
97  std::move(Args))
98  .setDiscardResult();
99 
100  std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
101  return CallResult.second;
102  }
103 
104  // Otherwise have the target-independent code call memset.
105  return SDValue();
106  }
107 
108  uint64_t SizeVal = ConstantSize->getZExtValue();
109  SDValue InFlag;
110  EVT AVT;
111  SDValue Count;
112  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
113  unsigned BytesLeft = 0;
114  if (ValC) {
115  unsigned ValReg;
116  uint64_t Val = ValC->getZExtValue() & 255;
117 
118  // If the value is a constant, then we can potentially use larger sets.
119  if (Alignment > Align(2)) {
120  // DWORD aligned
121  AVT = MVT::i32;
122  ValReg = X86::EAX;
123  Val = (Val << 8) | Val;
124  Val = (Val << 16) | Val;
125  if (Subtarget.is64Bit() && Alignment > Align(8)) { // QWORD aligned
126  AVT = MVT::i64;
127  ValReg = X86::RAX;
128  Val = (Val << 32) | Val;
129  }
130  } else if (Alignment == Align(2)) {
131  // WORD aligned
132  AVT = MVT::i16;
133  ValReg = X86::AX;
134  Val = (Val << 8) | Val;
135  } else {
136  // Byte aligned
137  AVT = MVT::i8;
138  ValReg = X86::AL;
139  Count = DAG.getIntPtrConstant(SizeVal, dl);
140  }
141 
142  if (AVT.bitsGT(MVT::i8)) {
143  unsigned UBytes = AVT.getSizeInBits() / 8;
144  Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl);
145  BytesLeft = SizeVal % UBytes;
146  }
147 
148  Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
149  InFlag);
150  InFlag = Chain.getValue(1);
151  } else {
152  AVT = MVT::i8;
153  Count = DAG.getIntPtrConstant(SizeVal, dl);
154  Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag);
155  InFlag = Chain.getValue(1);
156  }
157 
158  bool Use64BitRegs = Subtarget.isTarget64BitLP64();
159  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
160  Count, InFlag);
161  InFlag = Chain.getValue(1);
162  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
163  Dst, InFlag);
164  InFlag = Chain.getValue(1);
165 
167  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
168  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
169 
170  if (BytesLeft) {
171  // Handle the last 1 - 7 bytes.
172  unsigned Offset = SizeVal - BytesLeft;
173  EVT AddrVT = Dst.getValueType();
174  EVT SizeVT = Size.getValueType();
175 
176  Chain =
177  DAG.getMemset(Chain, dl,
178  DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
179  DAG.getConstant(Offset, dl, AddrVT)),
180  Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
181  isVolatile, false, DstPtrInfo.getWithOffset(Offset));
182  }
183 
184  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
185  return Chain;
186 }
187 
188 /// Emit a single REP MOVS{B,W,D,Q} instruction.
189 static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
190  const SDLoc &dl, SDValue Chain, SDValue Dst,
191  SDValue Src, SDValue Size, MVT AVT) {
192  const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
193  const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
194  const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
195  const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
196 
197  SDValue InFlag;
198  Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InFlag);
199  InFlag = Chain.getValue(1);
200  Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InFlag);
201  InFlag = Chain.getValue(1);
202  Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InFlag);
203  InFlag = Chain.getValue(1);
204 
206  SDValue Ops[] = {Chain, DAG.getValueType(AVT), InFlag};
207  return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
208 }
209 
210 /// Emit a single REP MOVSB instruction for a particular constant size.
211 static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
212  const SDLoc &dl, SDValue Chain, SDValue Dst,
213  SDValue Src, uint64_t Size) {
214  return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
215  DAG.getIntPtrConstant(Size, dl), MVT::i8);
216 }
217 
218 /// Returns the best type to use with repmovs depending on alignment.
219 static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget,
220  uint64_t Align) {
221  assert((Align != 0) && "Align is normalized");
222  assert(isPowerOf2_64(Align) && "Align is a power of 2");
223  switch (Align) {
224  case 1:
225  return MVT::i8;
226  case 2:
227  return MVT::i16;
228  case 4:
229  return MVT::i32;
230  default:
231  return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
232  }
233 }
234 
235 /// Returns a REP MOVS instruction, possibly with a few load/stores to implement
236 /// a constant size memory copy. In some cases where we know REP MOVS is
237 /// inefficient we return an empty SDValue so the calling code can either
238 /// generate a load/store sequence or call the runtime memcpy function.
240  SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
241  SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
242  unsigned Align, bool isVolatile, bool AlwaysInline,
243  MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
244 
245  /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
246  /// efficient.
247  if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
248  return SDValue();
249 
250  /// If we have enhanced repmovs we use it.
251  if (Subtarget.hasERMSB())
252  return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
253 
254  assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
255  /// We assume runtime memcpy will do a better job for unaligned copies when
256  /// ERMS is not present.
257  if (!AlwaysInline && (Align & 3) != 0)
258  return SDValue();
259 
260  const MVT BlockType = getOptimalRepmovsType(Subtarget, Align);
261  const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
262  const uint64_t BlockCount = Size / BlockBytes;
263  const uint64_t BytesLeft = Size % BlockBytes;
264  SDValue RepMovs =
265  emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
266  DAG.getIntPtrConstant(BlockCount, dl), BlockType);
267 
268  /// RepMov can process the whole length.
269  if (BytesLeft == 0)
270  return RepMovs;
271 
272  assert(BytesLeft && "We have leftover at this point");
273 
274  /// In case we optimize for size we use repmovsb even if it's less efficient
275  /// so we can save the loads/stores of the leftover.
277  return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
278 
279  // Handle the last 1 - 7 bytes.
281  Results.push_back(RepMovs);
282  unsigned Offset = Size - BytesLeft;
283  EVT DstVT = Dst.getValueType();
284  EVT SrcVT = Src.getValueType();
285  Results.push_back(DAG.getMemcpy(
286  Chain, dl,
287  DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
288  DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
289  DAG.getConstant(BytesLeft, dl, SizeVT), llvm::Align(Align), isVolatile,
290  /*AlwaysInline*/ true, /*isTailCall*/ false,
291  DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
292  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
293 }
294 
296  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
297  SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
298  MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
299  // If to a segment-relative address space, use the default lowering.
300  if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
301  return SDValue();
302 
303  // If the base registers conflict with our physical registers, use the default
304  // lowering.
305  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
307  if (isBaseRegConflictPossible(DAG, ClobberSet))
308  return SDValue();
309 
310  const X86Subtarget &Subtarget =
312 
313  // If enabled and available, use fast short rep mov.
314  if (UseFSRMForMemcpy && Subtarget.hasFSRM())
315  return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
316 
317  /// Handle constant sizes,
318  if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
319  return emitConstantSizeRepmov(
320  DAG, Subtarget, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
321  Size.getValueType(), Alignment.value(), isVolatile, AlwaysInline,
322  DstPtrInfo, SrcPtrInfo);
323 
324  return SDValue();
325 }
static SDValue emitConstantSizeRepmov(SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
Returns a REP MOVS instruction, possibly with a few load/stores to implement a constant size memory c...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:592
static cl::opt< bool > UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), cl::desc("Use fast short rep mov in memcpy lowering"))
EVT getValueType() const
Return the ValueType of the referenced return value.
Repeat move, corresponds to X86::REP_MOVSx.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
Function Alias Analysis Results
unsigned const TargetRegisterInfo * TRI
static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, MVT AVT)
Emit a single REP MOVS{B,W,D,Q} instruction.
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget, uint64_t Align)
Returns the best type to use with repmovs depending on alignment.
SDValue getExternalSymbol(const char *Sym, EVT VT)
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:611
CallLoweringInfo & setChain(SDValue InChain)
bool hasFSRM() const
Definition: X86Subtarget.h:720
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:447
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:444
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:232
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:246
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:838
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:427
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Repeat fill, corresponds to X86::REP_STOSx.
bool hasERMSB() const
Definition: X86Subtarget.h:719
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:497
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:180
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
uint64_t Align
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:333
This structure contains all information that is necessary for lowering calls.
This class contains a discriminated union of information about pointers in memory operands,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
unsigned getMaxInlineSizeThreshold() const
Returns the maximum memset / memcpy size that still makes it profitable to inline the call.
Definition: X86Subtarget.h:570
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:450
uint64_t Offset
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:223
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1116
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:742
MachinePointerInfo getWithOffset(int64_t O) const
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo)
BlockType
Used as immediate MachineOperands for block signatures.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:449
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
SDValue getValue(unsigned R) const
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getValueType(EVT)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1556
static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size)
Emit a single REP MOVSB instruction for a particular constant size.
static bool isVolatile(Instruction *Inst)
uint64_t getZExtValue() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
LLVMContext * getContext() const
Definition: SelectionDAG.h:454
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This file describes how to lower LLVM code to machine code.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.