LLVM  9.0.0svn
X86TargetTransformInfo.h
Go to the documentation of this file.
1 //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// X86 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
18 
19 #include "X86.h"
20 #include "X86TargetMachine.h"
24 
25 namespace llvm {
26 
27 class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
29  typedef TargetTransformInfo TTI;
30  friend BaseT;
31 
32  const X86Subtarget *ST;
33  const X86TargetLowering *TLI;
34 
35  const X86Subtarget *getST() const { return ST; }
36  const X86TargetLowering *getTLI() const { return TLI; }
37 
38  const FeatureBitset InlineFeatureIgnoreList = {
39  // This indicates the CPU is 64 bit capable not that we are in 64-bit
40  // mode.
41  X86::Feature64Bit,
42 
43  // These features don't have any intrinsics or ABI effect.
44  X86::FeatureNOPL,
45  X86::FeatureCMPXCHG16B,
46  X86::FeatureLAHFSAHF,
47 
48  // Codegen control options.
49  X86::FeatureFast11ByteNOP,
50  X86::FeatureFast15ByteNOP,
51  X86::FeatureFastBEXTR,
52  X86::FeatureFastHorizontalOps,
53  X86::FeatureFastLZCNT,
54  X86::FeatureFastPartialYMMorZMMWrite,
55  X86::FeatureFastScalarFSQRT,
56  X86::FeatureFastSHLDRotate,
57  X86::FeatureFastVariableShuffle,
58  X86::FeatureFastVectorFSQRT,
59  X86::FeatureLEAForSP,
60  X86::FeatureLEAUsesAG,
61  X86::FeatureLZCNTFalseDeps,
62  X86::FeatureBranchFusion,
63  X86::FeatureMacroFusion,
64  X86::FeatureMergeToThreeWayBranch,
65  X86::FeaturePadShortFunctions,
66  X86::FeaturePOPCNTFalseDeps,
67  X86::FeatureSSEUnalignedMem,
68  X86::FeatureSlow3OpsLEA,
69  X86::FeatureSlowDivide32,
70  X86::FeatureSlowDivide64,
71  X86::FeatureSlowIncDec,
72  X86::FeatureSlowLEA,
73  X86::FeatureSlowPMADDWD,
74  X86::FeatureSlowPMULLD,
75  X86::FeatureSlowSHLD,
76  X86::FeatureSlowTwoMemOps,
77  X86::FeatureSlowUAMem16,
78 
79  // Perf-tuning flags.
80  X86::FeatureHasFastGather,
81  X86::FeatureSlowUAMem32,
82 
83  // Based on whether user set the -mprefer-vector-width command line.
84  X86::FeaturePrefer256Bit,
85 
86  // CPU name enums. These just follow CPU string.
87  X86::ProcIntelAtom,
88  X86::ProcIntelGLM,
89  X86::ProcIntelGLP,
90  X86::ProcIntelSLM,
91  X86::ProcIntelTRM,
92  };
93 
94 public:
95  explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
96  : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
97  TLI(ST->getTargetLowering()) {}
98 
99  /// \name Scalar TTI Implementations
100  /// @{
101  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
102 
103  /// @}
104 
105  /// \name Cache TTI Implementation
106  /// @{
110  TargetTransformInfo::CacheLevel Level) const;
111  /// @}
112 
113  /// \name Vector TTI Implementations
114  /// @{
115 
116  unsigned getNumberOfRegisters(bool Vector);
117  unsigned getRegisterBitWidth(bool Vector) const;
118  unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
119  unsigned getMaxInterleaveFactor(unsigned VF);
121  unsigned Opcode, Type *Ty,
127  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
128  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
129  const Instruction *I = nullptr);
130  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
131  const Instruction *I = nullptr);
132  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
133  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
134  unsigned AddressSpace, const Instruction *I = nullptr);
135  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
136  unsigned AddressSpace);
137  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
138  bool VariableMask, unsigned Alignment);
140  const SCEV *Ptr);
141 
142  unsigned getAtomicMemIntrinsicMaxElementSize() const;
143 
144  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
146  unsigned ScalarizationCostPassed = UINT_MAX);
147  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
149  unsigned VF = 1);
150 
151  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
152  bool IsPairwiseForm);
153 
154  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
155  bool IsUnsigned);
156 
157  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
158  unsigned Factor, ArrayRef<unsigned> Indices,
159  unsigned Alignment, unsigned AddressSpace,
160  bool UseMaskForCond = false,
161  bool UseMaskForGaps = false);
162  int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
163  unsigned Factor, ArrayRef<unsigned> Indices,
164  unsigned Alignment, unsigned AddressSpace,
165  bool UseMaskForCond = false,
166  bool UseMaskForGaps = false);
167  int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
168  unsigned Factor, ArrayRef<unsigned> Indices,
169  unsigned Alignment, unsigned AddressSpace,
170  bool UseMaskForCond = false,
171  bool UseMaskForGaps = false);
172 
173  int getIntImmCost(int64_t);
174 
175  int getIntImmCost(const APInt &Imm, Type *Ty);
176 
177  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
178 
179  int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
180  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
181  Type *Ty);
184  bool canMacroFuseCmp();
186  bool isLegalMaskedStore(Type *DataType);
187  bool isLegalMaskedGather(Type *DataType);
188  bool isLegalMaskedScatter(Type *DataType);
189  bool isLegalMaskedExpandLoad(Type *DataType);
190  bool isLegalMaskedCompressStore(Type *DataType);
191  bool hasDivRemOp(Type *DataType, bool IsSigned);
193  bool areInlineCompatible(const Function *Caller,
194  const Function *Callee) const;
195  bool areFunctionArgsABICompatible(const Function *Caller,
196  const Function *Callee,
197  SmallPtrSetImpl<Argument *> &Args) const;
199  bool IsZeroCmp) const;
201 private:
202  int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
203  unsigned Alignment, unsigned AddressSpace);
204  int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr,
205  unsigned Alignment, unsigned AddressSpace);
206 
207  /// @}
208 };
209 
210 } // end namespace llvm
211 
212 #endif
llvm::Optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
Calculate the cost of Gather / Scatter operation.
The main scalar evolution driver.
unsigned getRegisterBitWidth(bool Vector) const
bool isLegalMaskedScatter(Type *DataType)
F(f)
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getNumberOfRegisters(bool Vector)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
unsigned getMaxInterleaveFactor(unsigned VF)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool isLegalMaskedStore(Type *DataType)
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)
PopcntSupportKind
Flags indicating the kind of support for population count.
llvm::Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
If not nullptr, enable inline expansion of memcmp.
Container class for subtarget features.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isLegalMaskedGather(Type *DataType)
int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedLoad(Type *DataType)
const TTI::MemCmpExpansionOptions * enableMemCmpExpansion(bool IsZeroCmp) const
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
unsigned getUserCost(const User *U, ArrayRef< const Value *> Operands)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
OperandValueProperties
Additional properties of an operand&#39;s values.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument *> &Args) const
AddressSpace
Definition: NVPTXBaseInfo.h:21
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned)
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
This class represents an analyzed expression in the program.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isLegalMaskedCompressStore(Type *DataType)
LLVM Value Representation.
Definition: Value.h:72
bool isLegalMaskedExpandLoad(Type *DataType)
bool hasDivRemOp(Type *DataType, bool IsSigned)
static const Function * getParent(const Value *V)
const DataLayout & getDataLayout() const
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
CacheLevel
The possible cache levels.
X86TTIImpl(const X86TargetMachine *TM, const Function &F)
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=UINT_MAX)
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
This file describes how to lower LLVM code to machine code.
ShuffleKind
The various kinds of shuffle patterns for vector queries.