LLVM  9.0.0svn
X86TargetTransformInfo.h
Go to the documentation of this file.
1 //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// X86 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
18 
19 #include "X86.h"
20 #include "X86TargetMachine.h"
24 
25 namespace llvm {
26 
27 class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
29  typedef TargetTransformInfo TTI;
30  friend BaseT;
31 
32  const X86Subtarget *ST;
33  const X86TargetLowering *TLI;
34 
35  const X86Subtarget *getST() const { return ST; }
36  const X86TargetLowering *getTLI() const { return TLI; }
37 
38  const FeatureBitset InlineFeatureIgnoreList = {
39  // This indicates the CPU is 64 bit capable not that we are in 64-bit
40  // mode.
41  X86::Feature64Bit,
42 
43  // These features don't have any intrinsics or ABI effect.
44  X86::FeatureNOPL,
45  X86::FeatureCMPXCHG16B,
46  X86::FeatureLAHFSAHF,
47 
48  // Codegen control options.
49  X86::FeatureFast11ByteNOP,
50  X86::FeatureFast15ByteNOP,
51  X86::FeatureFastBEXTR,
52  X86::FeatureFastHorizontalOps,
53  X86::FeatureFastLZCNT,
54  X86::FeatureFastPartialYMMorZMMWrite,
55  X86::FeatureFastScalarFSQRT,
56  X86::FeatureFastSHLDRotate,
57  X86::FeatureFastScalarShiftMasks,
58  X86::FeatureFastVectorShiftMasks,
59  X86::FeatureFastVariableShuffle,
60  X86::FeatureFastVectorFSQRT,
61  X86::FeatureLEAForSP,
62  X86::FeatureLEAUsesAG,
63  X86::FeatureLZCNTFalseDeps,
64  X86::FeatureBranchFusion,
65  X86::FeatureMacroFusion,
66  X86::FeatureMergeToThreeWayBranch,
67  X86::FeaturePadShortFunctions,
68  X86::FeaturePOPCNTFalseDeps,
69  X86::FeatureSSEUnalignedMem,
70  X86::FeatureSlow3OpsLEA,
71  X86::FeatureSlowDivide32,
72  X86::FeatureSlowDivide64,
73  X86::FeatureSlowIncDec,
74  X86::FeatureSlowLEA,
75  X86::FeatureSlowPMADDWD,
76  X86::FeatureSlowPMULLD,
77  X86::FeatureSlowSHLD,
78  X86::FeatureSlowTwoMemOps,
79  X86::FeatureSlowUAMem16,
80 
81  // Perf-tuning flags.
82  X86::FeatureHasFastGather,
83  X86::FeatureSlowUAMem32,
84 
85  // Based on whether user set the -mprefer-vector-width command line.
86  X86::FeaturePrefer256Bit,
87 
88  // CPU name enums. These just follow CPU string.
89  X86::ProcIntelAtom,
90  X86::ProcIntelGLM,
91  X86::ProcIntelGLP,
92  X86::ProcIntelSLM,
93  X86::ProcIntelTRM,
94  };
95 
96 public:
97  explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
98  : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
99  TLI(ST->getTargetLowering()) {}
100 
101  /// \name Scalar TTI Implementations
102  /// @{
103  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
104 
105  /// @}
106 
107  /// \name Cache TTI Implementation
108  /// @{
112  TargetTransformInfo::CacheLevel Level) const;
113  /// @}
114 
115  /// \name Vector TTI Implementations
116  /// @{
117 
118  unsigned getNumberOfRegisters(bool Vector);
119  unsigned getRegisterBitWidth(bool Vector) const;
120  unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
121  unsigned getMaxInterleaveFactor(unsigned VF);
123  unsigned Opcode, Type *Ty,
129  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
130  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
131  const Instruction *I = nullptr);
132  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
133  const Instruction *I = nullptr);
134  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
135  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
136  unsigned AddressSpace, const Instruction *I = nullptr);
137  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
138  unsigned AddressSpace);
139  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
140  bool VariableMask, unsigned Alignment);
142  const SCEV *Ptr);
143 
144  unsigned getAtomicMemIntrinsicMaxElementSize() const;
145 
146  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
148  unsigned ScalarizationCostPassed = UINT_MAX);
149  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
151  unsigned VF = 1);
152 
153  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
154  bool IsPairwiseForm);
155 
156  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
157  bool IsUnsigned);
158 
159  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
160  unsigned Factor, ArrayRef<unsigned> Indices,
161  unsigned Alignment, unsigned AddressSpace,
162  bool UseMaskForCond = false,
163  bool UseMaskForGaps = false);
164  int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
165  unsigned Factor, ArrayRef<unsigned> Indices,
166  unsigned Alignment, unsigned AddressSpace,
167  bool UseMaskForCond = false,
168  bool UseMaskForGaps = false);
169  int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
170  unsigned Factor, ArrayRef<unsigned> Indices,
171  unsigned Alignment, unsigned AddressSpace,
172  bool UseMaskForCond = false,
173  bool UseMaskForGaps = false);
174 
175  int getIntImmCost(int64_t);
176 
177  int getIntImmCost(const APInt &Imm, Type *Ty);
178 
179  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
180 
181  int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
182  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
183  Type *Ty);
186  bool canMacroFuseCmp();
188  bool isLegalMaskedStore(Type *DataType);
189  bool isLegalNTLoad(Type *DataType, unsigned Alignment);
190  bool isLegalNTStore(Type *DataType, unsigned Alignment);
191  bool isLegalMaskedGather(Type *DataType);
192  bool isLegalMaskedScatter(Type *DataType);
193  bool isLegalMaskedExpandLoad(Type *DataType);
194  bool isLegalMaskedCompressStore(Type *DataType);
195  bool hasDivRemOp(Type *DataType, bool IsSigned);
197  bool areInlineCompatible(const Function *Caller,
198  const Function *Callee) const;
199  bool areFunctionArgsABICompatible(const Function *Caller,
200  const Function *Callee,
201  SmallPtrSetImpl<Argument *> &Args) const;
203  bool IsZeroCmp) const;
205 private:
206  int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
207  unsigned Alignment, unsigned AddressSpace);
208  int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr,
209  unsigned Alignment, unsigned AddressSpace);
210 
211  /// @}
212 };
213 
214 } // end namespace llvm
215 
216 #endif
llvm::Optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
Calculate the cost of Gather / Scatter operation.
The main scalar evolution driver.
unsigned getRegisterBitWidth(bool Vector) const
bool isLegalNTLoad(Type *DataType, unsigned Alignment)
bool isLegalMaskedScatter(Type *DataType)
F(f)
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getNumberOfRegisters(bool Vector)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
unsigned getMaxInterleaveFactor(unsigned VF)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool isLegalMaskedStore(Type *DataType)
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)
PopcntSupportKind
Flags indicating the kind of support for population count.
llvm::Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
Returns options for expansion of memcmp. IsZeroCmp is.
Container class for subtarget features.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isLegalMaskedGather(Type *DataType)
int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedLoad(Type *DataType)
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
unsigned getUserCost(const User *U, ArrayRef< const Value *> Operands)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
OperandValueProperties
Additional properties of an operand&#39;s values.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument *> &Args) const
AddressSpace
Definition: NVPTXBaseInfo.h:21
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned)
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
This class represents an analyzed expression in the program.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isLegalNTStore(Type *DataType, unsigned Alignment)
bool isLegalMaskedCompressStore(Type *DataType)
LLVM Value Representation.
Definition: Value.h:72
bool isLegalMaskedExpandLoad(Type *DataType)
bool hasDivRemOp(Type *DataType, bool IsSigned)
static const Function * getParent(const Value *V)
const DataLayout & getDataLayout() const
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
CacheLevel
The possible cache levels.
X86TTIImpl(const X86TargetMachine *TM, const Function &F)
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=UINT_MAX)
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
This file describes how to lower LLVM code to machine code.
ShuffleKind
The various kinds of shuffle patterns for vector queries.