LLVM  7.0.0svn
X86SelectionDAGInfo.cpp
Go to the documentation of this file.
1 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the X86SelectionDAGInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86SelectionDAGInfo.h"
15 #include "X86ISelLowering.h"
16 #include "X86InstrInfo.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
21 #include "llvm/IR/DerivedTypes.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "x86-selectiondag-info"
26 
27 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
28  SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
29  // We cannot use TRI->hasBasePointer() until *after* we select all basic
30  // blocks. Legalization may introduce new stack temporaries with large
31  // alignment requirements. Fall back to generic code if there are any
32  // dynamic stack adjustments (hopefully rare) and the base pointer would
33  // conflict if we had to use it.
35  if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
36  return false;
37 
38  const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
40  unsigned BaseReg = TRI->getBaseRegister();
41  for (unsigned R : ClobberSet)
42  if (BaseReg == R)
43  return true;
44  return false;
45 }
46 
47 namespace {
48 
49 // Represents a cover of a buffer of Size bytes with Count() blocks of type AVT
50 // (of size UBytes() bytes), as well as how many bytes remain (BytesLeft() is
51 // always smaller than the block size).
52 struct RepMovsRepeats {
53  RepMovsRepeats(uint64_t Size) : Size(Size) {}
54 
55  uint64_t Count() const { return Size / UBytes(); }
56  uint64_t BytesLeft() const { return Size % UBytes(); }
57  uint64_t UBytes() const { return AVT.getSizeInBits() / 8; }
58 
59  const uint64_t Size;
60  MVT AVT = MVT::i8;
61 };
62 
63 } // namespace
64 
66  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
67  SDValue Size, unsigned Align, bool isVolatile,
68  MachinePointerInfo DstPtrInfo) const {
69  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
70  const X86Subtarget &Subtarget =
72 
73 #ifndef NDEBUG
74  // If the base register might conflict with our physical registers, bail out.
75  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
77  assert(!isBaseRegConflictPossible(DAG, ClobberSet));
78 #endif
79 
80  // If to a segment-relative address space, use the default lowering.
81  if (DstPtrInfo.getAddrSpace() >= 256)
82  return SDValue();
83 
84  // If not DWORD aligned or size is more than the threshold, call the library.
85  // The libc version is likely to be faster for these cases. It can use the
86  // address value and run time information about the CPU.
87  if ((Align & 3) != 0 || !ConstantSize ||
88  ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
89  // Check to see if there is a specialized entry-point for memory zeroing.
91 
92  if (const char *bzeroName = (ValC && ValC->isNullValue())
93  ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
94  : nullptr) {
95  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
96  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
97  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
100  Entry.Node = Dst;
101  Entry.Ty = IntPtrTy;
102  Args.push_back(Entry);
103  Entry.Node = Size;
104  Args.push_back(Entry);
105 
107  CLI.setDebugLoc(dl)
108  .setChain(Chain)
110  DAG.getExternalSymbol(bzeroName, IntPtr),
111  std::move(Args))
112  .setDiscardResult();
113 
114  std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
115  return CallResult.second;
116  }
117 
118  // Otherwise have the target-independent code call memset.
119  return SDValue();
120  }
121 
122  uint64_t SizeVal = ConstantSize->getZExtValue();
123  SDValue InFlag;
124  EVT AVT;
125  SDValue Count;
126  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
127  unsigned BytesLeft = 0;
128  if (ValC) {
129  unsigned ValReg;
130  uint64_t Val = ValC->getZExtValue() & 255;
131 
132  // If the value is a constant, then we can potentially use larger sets.
133  switch (Align & 3) {
134  case 2: // WORD aligned
135  AVT = MVT::i16;
136  ValReg = X86::AX;
137  Val = (Val << 8) | Val;
138  break;
139  case 0: // DWORD aligned
140  AVT = MVT::i32;
141  ValReg = X86::EAX;
142  Val = (Val << 8) | Val;
143  Val = (Val << 16) | Val;
144  if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
145  AVT = MVT::i64;
146  ValReg = X86::RAX;
147  Val = (Val << 32) | Val;
148  }
149  break;
150  default: // Byte aligned
151  AVT = MVT::i8;
152  ValReg = X86::AL;
153  Count = DAG.getIntPtrConstant(SizeVal, dl);
154  break;
155  }
156 
157  if (AVT.bitsGT(MVT::i8)) {
158  unsigned UBytes = AVT.getSizeInBits() / 8;
159  Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl);
160  BytesLeft = SizeVal % UBytes;
161  }
162 
163  Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
164  InFlag);
165  InFlag = Chain.getValue(1);
166  } else {
167  AVT = MVT::i8;
168  Count = DAG.getIntPtrConstant(SizeVal, dl);
169  Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag);
170  InFlag = Chain.getValue(1);
171  }
172 
173  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
174  Count, InFlag);
175  InFlag = Chain.getValue(1);
176  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
177  Dst, InFlag);
178  InFlag = Chain.getValue(1);
179 
181  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
182  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
183 
184  if (BytesLeft) {
185  // Handle the last 1 - 7 bytes.
186  unsigned Offset = SizeVal - BytesLeft;
187  EVT AddrVT = Dst.getValueType();
188  EVT SizeVT = Size.getValueType();
189 
190  Chain = DAG.getMemset(Chain, dl,
191  DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
192  DAG.getConstant(Offset, dl, AddrVT)),
193  Val,
194  DAG.getConstant(BytesLeft, dl, SizeVT),
195  Align, isVolatile, false,
196  DstPtrInfo.getWithOffset(Offset));
197  }
198 
199  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
200  return Chain;
201 }
202 
204  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
205  SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
206  MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
207  // This requires the copy size to be a constant, preferably
208  // within a subtarget-specific limit.
209  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
210  const X86Subtarget &Subtarget =
212  if (!ConstantSize)
213  return SDValue();
214  RepMovsRepeats Repeats(ConstantSize->getZExtValue());
215  if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold())
216  return SDValue();
217 
218  /// If not DWORD aligned, it is more efficient to call the library. However
219  /// if calling the library is not allowed (AlwaysInline), then soldier on as
220  /// the code generated here is better than the long load-store sequence we
221  /// would otherwise get.
222  if (!AlwaysInline && (Align & 3) != 0)
223  return SDValue();
224 
225  // If to a segment-relative address space, use the default lowering.
226  if (DstPtrInfo.getAddrSpace() >= 256 ||
227  SrcPtrInfo.getAddrSpace() >= 256)
228  return SDValue();
229 
230  // If the base register might conflict with our physical registers, bail out.
231  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
233  if (isBaseRegConflictPossible(DAG, ClobberSet))
234  return SDValue();
235 
236  // If the target has enhanced REPMOVSB, then it's at least as fast to use
237  // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
238  // BytesLeft.
239  if (!Subtarget.hasERMSB() && !(Align & 1)) {
240  if (Align & 2)
241  // WORD aligned
242  Repeats.AVT = MVT::i16;
243  else if (Align & 4)
244  // DWORD aligned
245  Repeats.AVT = MVT::i32;
246  else
247  // QWORD aligned
248  Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
249 
250  if (Repeats.BytesLeft() > 0 &&
252  // When agressively optimizing for size, avoid generating the code to
253  // handle BytesLeft.
254  Repeats.AVT = MVT::i8;
255  }
256  }
257 
258  SDValue InFlag;
259  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
260  DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag);
261  InFlag = Chain.getValue(1);
262  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
263  Dst, InFlag);
264  InFlag = Chain.getValue(1);
265  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI,
266  Src, InFlag);
267  InFlag = Chain.getValue(1);
268 
270  SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag };
271  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
272 
274  Results.push_back(RepMovs);
275  if (Repeats.BytesLeft()) {
276  // Handle the last 1 - 7 bytes.
277  unsigned Offset = Repeats.Size - Repeats.BytesLeft();
278  EVT DstVT = Dst.getValueType();
279  EVT SrcVT = Src.getValueType();
280  EVT SizeVT = Size.getValueType();
281  Results.push_back(DAG.getMemcpy(Chain, dl,
282  DAG.getNode(ISD::ADD, dl, DstVT, Dst,
283  DAG.getConstant(Offset, dl,
284  DstVT)),
285  DAG.getNode(ISD::ADD, dl, SrcVT, Src,
286  DAG.getConstant(Offset, dl,
287  SrcVT)),
288  DAG.getConstant(Repeats.BytesLeft(), dl,
289  SizeVT),
290  Align, isVolatile, AlwaysInline, false,
291  DstPtrInfo.getWithOffset(Offset),
292  SrcPtrInfo.getWithOffset(Offset)));
293  }
294 
295  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
296 }
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
Repeat move, corresponds to X86::REP_MOVSx.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
Function Alias Analysis Results
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getExternalSymbol(const char *Sym, EVT VT)
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
CallLoweringInfo & setChain(SDValue InChain)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:390
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getBaseRegister() const
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:387
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:744
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Repeat fill, corresponds to X86::REP_STOSx.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
This structure contains all information that is necessary for lowering calls.
This class contains a discriminated union of information about pointers in memory operands...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:393
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:210
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:664
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MachinePointerInfo getWithOffset(int64_t O) const
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:392
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:573
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
SDValue getValue(unsigned R) const
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
SDValue getValueType(EVT)
static bool isVolatile(Instruction *Inst)
uint64_t getZExtValue() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo)
LLVMContext * getContext() const
Definition: SelectionDAG.h:396
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
This file describes how to lower LLVM code to machine code.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.