LLVM  10.0.0svn
X86TargetTransformInfo.h
Go to the documentation of this file.
1 //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// X86 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
18 
19 #include "X86.h"
20 #include "X86TargetMachine.h"
24 
25 namespace llvm {
26 
27 class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
29  typedef TargetTransformInfo TTI;
30  friend BaseT;
31 
32  const X86Subtarget *ST;
33  const X86TargetLowering *TLI;
34 
35  const X86Subtarget *getST() const { return ST; }
36  const X86TargetLowering *getTLI() const { return TLI; }
37 
38  const FeatureBitset InlineFeatureIgnoreList = {
39  // This indicates the CPU is 64 bit capable not that we are in 64-bit
40  // mode.
41  X86::Feature64Bit,
42 
43  // These features don't have any intrinsics or ABI effect.
44  X86::FeatureNOPL,
45  X86::FeatureCMPXCHG16B,
46  X86::FeatureLAHFSAHF,
47 
48  // Codegen control options.
49  X86::FeatureFast11ByteNOP,
50  X86::FeatureFast15ByteNOP,
51  X86::FeatureFastBEXTR,
52  X86::FeatureFastHorizontalOps,
53  X86::FeatureFastLZCNT,
54  X86::FeatureFastPartialYMMorZMMWrite,
55  X86::FeatureFastScalarFSQRT,
56  X86::FeatureFastSHLDRotate,
57  X86::FeatureFastScalarShiftMasks,
58  X86::FeatureFastVectorShiftMasks,
59  X86::FeatureFastVariableShuffle,
60  X86::FeatureFastVectorFSQRT,
61  X86::FeatureLEAForSP,
62  X86::FeatureLEAUsesAG,
63  X86::FeatureLZCNTFalseDeps,
64  X86::FeatureBranchFusion,
65  X86::FeatureMacroFusion,
66  X86::FeatureMergeToThreeWayBranch,
67  X86::FeaturePadShortFunctions,
68  X86::FeaturePOPCNTFalseDeps,
69  X86::FeatureSSEUnalignedMem,
70  X86::FeatureSlow3OpsLEA,
71  X86::FeatureSlowDivide32,
72  X86::FeatureSlowDivide64,
73  X86::FeatureSlowIncDec,
74  X86::FeatureSlowLEA,
75  X86::FeatureSlowPMADDWD,
76  X86::FeatureSlowPMULLD,
77  X86::FeatureSlowSHLD,
78  X86::FeatureSlowTwoMemOps,
79  X86::FeatureSlowUAMem16,
80 
81  // Perf-tuning flags.
82  X86::FeatureHasFastGather,
83  X86::FeatureSlowUAMem32,
84 
85  // Based on whether user set the -mprefer-vector-width command line.
86  X86::FeaturePrefer128Bit,
87  X86::FeaturePrefer256Bit,
88 
89  // CPU name enums. These just follow CPU string.
90  X86::ProcIntelAtom,
91  X86::ProcIntelGLM,
92  X86::ProcIntelGLP,
93  X86::ProcIntelSLM,
94  X86::ProcIntelTRM,
95  };
96 
97 public:
98  explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
99  : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
100  TLI(ST->getTargetLowering()) {}
101 
102  /// \name Scalar TTI Implementations
103  /// @{
104  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
105 
106  /// @}
107 
108  /// \name Cache TTI Implementation
109  /// @{
113  TargetTransformInfo::CacheLevel Level) const;
114  /// @}
115 
116  /// \name Vector TTI Implementations
117  /// @{
118 
119  unsigned getNumberOfRegisters(unsigned ClassID) const;
120  unsigned getRegisterBitWidth(bool Vector) const;
121  unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
122  unsigned getMaxInterleaveFactor(unsigned VF);
124  unsigned Opcode, Type *Ty,
130  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
131  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
132  const Instruction *I = nullptr);
133  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
134  const Instruction *I = nullptr);
135  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
136  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
137  unsigned AddressSpace, const Instruction *I = nullptr);
138  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
139  unsigned AddressSpace);
140  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
141  bool VariableMask, unsigned Alignment);
143  const SCEV *Ptr);
144 
145  unsigned getAtomicMemIntrinsicMaxElementSize() const;
146 
147  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
149  unsigned ScalarizationCostPassed = UINT_MAX);
150  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
152  unsigned VF = 1);
153 
154  int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
155  bool IsPairwiseForm);
156 
157  int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
158  bool IsUnsigned);
159 
160  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
161  unsigned Factor, ArrayRef<unsigned> Indices,
162  unsigned Alignment, unsigned AddressSpace,
163  bool UseMaskForCond = false,
164  bool UseMaskForGaps = false);
165  int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
166  unsigned Factor, ArrayRef<unsigned> Indices,
167  unsigned Alignment, unsigned AddressSpace,
168  bool UseMaskForCond = false,
169  bool UseMaskForGaps = false);
170  int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
171  unsigned Factor, ArrayRef<unsigned> Indices,
172  unsigned Alignment, unsigned AddressSpace,
173  bool UseMaskForCond = false,
174  bool UseMaskForGaps = false);
175 
176  int getIntImmCost(int64_t);
177 
178  int getIntImmCost(const APInt &Imm, Type *Ty);
179 
180  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
181 
182  int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
183  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
184  Type *Ty);
187  bool canMacroFuseCmp();
188  bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment);
189  bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment);
190  bool isLegalNTLoad(Type *DataType, Align Alignment);
191  bool isLegalNTStore(Type *DataType, Align Alignment);
192  bool isLegalMaskedGather(Type *DataType);
193  bool isLegalMaskedScatter(Type *DataType);
194  bool isLegalMaskedExpandLoad(Type *DataType);
195  bool isLegalMaskedCompressStore(Type *DataType);
196  bool hasDivRemOp(Type *DataType, bool IsSigned);
198  bool areInlineCompatible(const Function *Caller,
199  const Function *Callee) const;
200  bool areFunctionArgsABICompatible(const Function *Caller,
201  const Function *Callee,
202  SmallPtrSetImpl<Argument *> &Args) const;
204  bool IsZeroCmp) const;
206 private:
207  int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
208  unsigned Alignment, unsigned AddressSpace);
209  int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr,
210  unsigned Alignment, unsigned AddressSpace);
211 
212  /// @}
213 };
214 
215 } // end namespace llvm
216 
217 #endif
llvm::Optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
Calculate the cost of Gather / Scatter operation.
The main scalar evolution driver.
unsigned getNumberOfRegisters(unsigned ClassID) const
unsigned getRegisterBitWidth(bool Vector) const
bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment)
bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment)
bool isLegalMaskedScatter(Type *DataType)
F(f)
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
bool isLegalNTLoad(Type *DataType, Align Alignment)
bool isLegalNTStore(Type *DataType, Align Alignment)
unsigned getAtomicMemIntrinsicMaxElementSize() const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
unsigned getMaxInterleaveFactor(unsigned VF)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
mir Rename Register Operands
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)
PopcntSupportKind
Flags indicating the kind of support for population count.
llvm::Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
Returns options for expansion of memcmp. IsZeroCmp is.
Container class for subtarget features.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool isLegalMaskedGather(Type *DataType)
int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
unsigned getUserCost(const User *U, ArrayRef< const Value *> Operands)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
OperandValueProperties
Additional properties of an operand&#39;s values.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument *> &Args) const
AddressSpace
Definition: NVPTXBaseInfo.h:21
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned)
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
This class represents an analyzed expression in the program.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isLegalMaskedCompressStore(Type *DataType)
LLVM Value Representation.
Definition: Value.h:74
bool isLegalMaskedExpandLoad(Type *DataType)
bool hasDivRemOp(Type *DataType, bool IsSigned)
static const Function * getParent(const Value *V)
const DataLayout & getDataLayout() const
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
CacheLevel
The possible cache levels.
X86TTIImpl(const X86TargetMachine *TM, const Function &F)
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=UINT_MAX)
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
This file describes how to lower LLVM code to machine code.
ShuffleKind
The various kinds of shuffle patterns for vector queries.