LLVM  6.0.0svn
X86SelectionDAGInfo.cpp
Go to the documentation of this file.
1 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the X86SelectionDAGInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86SelectionDAGInfo.h"
15 #include "X86ISelLowering.h"
16 #include "X86InstrInfo.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
21 #include "llvm/IR/DerivedTypes.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "x86-selectiondag-info"
26 
27 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
28  SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
29  // We cannot use TRI->hasBasePointer() until *after* we select all basic
30  // blocks. Legalization may introduce new stack temporaries with large
31  // alignment requirements. Fall back to generic code if there are any
32  // dynamic stack adjustments (hopefully rare) and the base pointer would
33  // conflict if we had to use it.
35  if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
36  return false;
37 
38  const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
40  unsigned BaseReg = TRI->getBaseRegister();
41  for (unsigned R : ClobberSet)
42  if (BaseReg == R)
43  return true;
44  return false;
45 }
46 
47 namespace {
48 
49 // Represents a cover of a buffer of Size bytes with Count() blocks of type AVT
50 // (of size UBytes() bytes), as well as how many bytes remain (BytesLeft() is
51 // always smaller than the block size).
52 struct RepMovsRepeats {
53  RepMovsRepeats(uint64_t Size) : Size(Size) {}
54 
55  uint64_t Count() const { return Size / UBytes(); }
56  uint64_t BytesLeft() const { return Size % UBytes(); }
57  uint64_t UBytes() const { return AVT.getSizeInBits() / 8; }
58 
59  const uint64_t Size;
60  MVT AVT = MVT::i8;
61 };
62 
63 } // namespace
64 
66  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
67  SDValue Size, unsigned Align, bool isVolatile,
68  MachinePointerInfo DstPtrInfo) const {
69  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
70  const X86Subtarget &Subtarget =
72 
73 #ifndef NDEBUG
74  // If the base register might conflict with our physical registers, bail out.
75  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
77  assert(!isBaseRegConflictPossible(DAG, ClobberSet));
78 #endif
79 
80  // If to a segment-relative address space, use the default lowering.
81  if (DstPtrInfo.getAddrSpace() >= 256)
82  return SDValue();
83 
84  // If not DWORD aligned or size is more than the threshold, call the library.
85  // The libc version is likely to be faster for these cases. It can use the
86  // address value and run time information about the CPU.
87  if ((Align & 3) != 0 || !ConstantSize ||
88  ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
89  // Check to see if there is a specialized entry-point for memory zeroing.
91 
92  if (const char *bzeroEntry = ValC &&
93  ValC->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
94  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
95  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
96  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
99  Entry.Node = Dst;
100  Entry.Ty = IntPtrTy;
101  Args.push_back(Entry);
102  Entry.Node = Size;
103  Args.push_back(Entry);
104 
106  CLI.setDebugLoc(dl)
107  .setChain(Chain)
109  DAG.getExternalSymbol(bzeroEntry, IntPtr),
110  std::move(Args))
111  .setDiscardResult();
112 
113  std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
114  return CallResult.second;
115  }
116 
117  // Otherwise have the target-independent code call memset.
118  return SDValue();
119  }
120 
121  uint64_t SizeVal = ConstantSize->getZExtValue();
122  SDValue InFlag;
123  EVT AVT;
124  SDValue Count;
125  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
126  unsigned BytesLeft = 0;
127  if (ValC) {
128  unsigned ValReg;
129  uint64_t Val = ValC->getZExtValue() & 255;
130 
131  // If the value is a constant, then we can potentially use larger sets.
132  switch (Align & 3) {
133  case 2: // WORD aligned
134  AVT = MVT::i16;
135  ValReg = X86::AX;
136  Val = (Val << 8) | Val;
137  break;
138  case 0: // DWORD aligned
139  AVT = MVT::i32;
140  ValReg = X86::EAX;
141  Val = (Val << 8) | Val;
142  Val = (Val << 16) | Val;
143  if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
144  AVT = MVT::i64;
145  ValReg = X86::RAX;
146  Val = (Val << 32) | Val;
147  }
148  break;
149  default: // Byte aligned
150  AVT = MVT::i8;
151  ValReg = X86::AL;
152  Count = DAG.getIntPtrConstant(SizeVal, dl);
153  break;
154  }
155 
156  if (AVT.bitsGT(MVT::i8)) {
157  unsigned UBytes = AVT.getSizeInBits() / 8;
158  Count = DAG.getIntPtrConstant(SizeVal / UBytes, dl);
159  BytesLeft = SizeVal % UBytes;
160  }
161 
162  Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
163  InFlag);
164  InFlag = Chain.getValue(1);
165  } else {
166  AVT = MVT::i8;
167  Count = DAG.getIntPtrConstant(SizeVal, dl);
168  Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag);
169  InFlag = Chain.getValue(1);
170  }
171 
172  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
173  Count, InFlag);
174  InFlag = Chain.getValue(1);
175  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
176  Dst, InFlag);
177  InFlag = Chain.getValue(1);
178 
180  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
181  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
182 
183  if (BytesLeft) {
184  // Handle the last 1 - 7 bytes.
185  unsigned Offset = SizeVal - BytesLeft;
186  EVT AddrVT = Dst.getValueType();
187  EVT SizeVT = Size.getValueType();
188 
189  Chain = DAG.getMemset(Chain, dl,
190  DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
191  DAG.getConstant(Offset, dl, AddrVT)),
192  Val,
193  DAG.getConstant(BytesLeft, dl, SizeVT),
194  Align, isVolatile, false,
195  DstPtrInfo.getWithOffset(Offset));
196  }
197 
198  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
199  return Chain;
200 }
201 
203  SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
204  SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
205  MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
206  // This requires the copy size to be a constant, preferably
207  // within a subtarget-specific limit.
208  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
209  const X86Subtarget &Subtarget =
211  if (!ConstantSize)
212  return SDValue();
213  RepMovsRepeats Repeats(ConstantSize->getZExtValue());
214  if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold())
215  return SDValue();
216 
217  /// If not DWORD aligned, it is more efficient to call the library. However
218  /// if calling the library is not allowed (AlwaysInline), then soldier on as
219  /// the code generated here is better than the long load-store sequence we
220  /// would otherwise get.
221  if (!AlwaysInline && (Align & 3) != 0)
222  return SDValue();
223 
224  // If to a segment-relative address space, use the default lowering.
225  if (DstPtrInfo.getAddrSpace() >= 256 ||
226  SrcPtrInfo.getAddrSpace() >= 256)
227  return SDValue();
228 
229  // If the base register might conflict with our physical registers, bail out.
230  const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
232  if (isBaseRegConflictPossible(DAG, ClobberSet))
233  return SDValue();
234 
235  // If the target has enhanced REPMOVSB, then it's at least as fast to use
236  // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
237  // BytesLeft.
238  if (!Subtarget.hasERMSB() && !(Align & 1)) {
239  if (Align & 2)
240  // WORD aligned
241  Repeats.AVT = MVT::i16;
242  else if (Align & 4)
243  // DWORD aligned
244  Repeats.AVT = MVT::i32;
245  else
246  // QWORD aligned
247  Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
248 
249  if (Repeats.BytesLeft() > 0 &&
251  // When agressively optimizing for size, avoid generating the code to
252  // handle BytesLeft.
253  Repeats.AVT = MVT::i8;
254  }
255  }
256 
257  SDValue InFlag;
258  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
259  DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag);
260  InFlag = Chain.getValue(1);
261  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
262  Dst, InFlag);
263  InFlag = Chain.getValue(1);
264  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI,
265  Src, InFlag);
266  InFlag = Chain.getValue(1);
267 
269  SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag };
270  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
271 
273  Results.push_back(RepMovs);
274  if (Repeats.BytesLeft()) {
275  // Handle the last 1 - 7 bytes.
276  unsigned Offset = Repeats.Size - Repeats.BytesLeft();
277  EVT DstVT = Dst.getValueType();
278  EVT SrcVT = Src.getValueType();
279  EVT SizeVT = Size.getValueType();
280  Results.push_back(DAG.getMemcpy(Chain, dl,
281  DAG.getNode(ISD::ADD, dl, DstVT, Dst,
282  DAG.getConstant(Offset, dl,
283  DstVT)),
284  DAG.getNode(ISD::ADD, dl, SrcVT, Src,
285  DAG.getConstant(Offset, dl,
286  SrcVT)),
287  DAG.getConstant(Repeats.BytesLeft(), dl,
288  SizeVT),
289  Align, isVolatile, AlwaysInline, false,
290  DstPtrInfo.getWithOffset(Offset),
291  SrcPtrInfo.getWithOffset(Offset)));
292  }
293 
294  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
295 }
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
Repeat move, corresponds to X86::REP_MOVSx.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override
Emit target-specific code that performs a memcpy.
Function Alias Analysis Results
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getExternalSymbol(const char *Sym, EVT VT)
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
CallLoweringInfo & setChain(SDValue InChain)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:388
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getBaseRegister() const
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:385
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:702
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Repeat fill, corresponds to X86::REP_STOSx.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
This structure contains all information that is necessary for lowering calls.
This class contains a discriminated union of information about pointers in memory operands...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:391
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:209
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:657
MachinePointerInfo getWithOffset(int64_t O) const
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:390
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:527
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
SDValue getValue(unsigned R) const
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const override
Emit target-specific code that performs a memset.
SDValue getValueType(EVT)
static bool isVolatile(Instruction *Inst)
uint64_t getZExtValue() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo)
LLVMContext * getContext() const
Definition: SelectionDAG.h:393
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
This file describes how to lower LLVM code to machine code.