LLVM  3.7.0
NVPTXTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "NVPTXUtilities.h"
12 #include "llvm/Analysis/LoopInfo.h"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Target/CostTable.h"
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "NVPTXtti"
22 
23 // Whether the given intrinsic reads threadIdx.x/y/z.
24 static bool readsThreadIndex(const IntrinsicInst *II) {
25  switch (II->getIntrinsicID()) {
26  default: return false;
27  case Intrinsic::nvvm_read_ptx_sreg_tid_x:
28  case Intrinsic::nvvm_read_ptx_sreg_tid_y:
29  case Intrinsic::nvvm_read_ptx_sreg_tid_z:
30  return true;
31  }
32 }
33 
34 static bool readsLaneId(const IntrinsicInst *II) {
35  return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
36 }
37 
38 // Whether the given intrinsic is an atomic instruction in PTX.
39 static bool isNVVMAtomic(const IntrinsicInst *II) {
40  switch (II->getIntrinsicID()) {
41  default: return false;
42  case Intrinsic::nvvm_atomic_load_add_f32:
43  case Intrinsic::nvvm_atomic_load_inc_32:
44  case Intrinsic::nvvm_atomic_load_dec_32:
45  return true;
46  }
47 }
48 
50  // Without inter-procedural analysis, we conservatively assume that arguments
51  // to __device__ functions are divergent.
52  if (const Argument *Arg = dyn_cast<Argument>(V))
53  return !isKernelFunction(*Arg->getParent());
54 
55  if (const Instruction *I = dyn_cast<Instruction>(V)) {
56  // Without pointer analysis, we conservatively assume values loaded from
57  // generic or local address space are divergent.
58  if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
59  unsigned AS = LI->getPointerAddressSpace();
60  return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
61  }
62  // Atomic instructions may cause divergence. Atomic instructions are
63  // executed sequentially across all threads in a warp. Therefore, an earlier
64  // executed thread may see different memory inputs than a later executed
65  // thread. For example, suppose *a = 0 initially.
66  //
67  // atom.global.add.s32 d, [a], 1
68  //
69  // returns 0 for the first thread that enters the critical region, and 1 for
70  // the second thread.
71  if (I->isAtomic())
72  return true;
73  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
74  // Instructions that read threadIdx are obviously divergent.
75  if (readsThreadIndex(II) || readsLaneId(II))
76  return true;
77  // Handle the NVPTX atomic instrinsics that cannot be represented as an
78  // atomic IR instruction.
79  if (isNVVMAtomic(II))
80  return true;
81  }
82  // Conservatively consider the return value of function calls as divergent.
83  // We could analyze callees with bodies more precisely using
84  // inter-procedural analysis.
85  if (isa<CallInst>(I))
86  return true;
87  }
88 
89  return false;
90 }
91 
93  unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
95  TTI::OperandValueProperties Opd2PropInfo) {
96  // Legalize the type.
97  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
98 
99  int ISD = TLI->InstructionOpcodeToISD(Opcode);
100 
101  switch (ISD) {
102  default:
103  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
104  Opd1PropInfo, Opd2PropInfo);
105  case ISD::ADD:
106  case ISD::MUL:
107  case ISD::XOR:
108  case ISD::OR:
109  case ISD::AND:
110  // The machine code (SASS) simulates an i64 with two i32. Therefore, we
111  // estimate that arithmetic operations on i64 are twice as expensive as
112  // those on types that can fit into one machine register.
113  if (LT.second.SimpleTy == MVT::i64)
114  return 2 * LT.first;
115  // Delegate other cases to the basic TTI.
116  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
117  Opd1PropInfo, Opd2PropInfo);
118  }
119 }
120 
124 
125  // Enable partial unrolling and runtime unrolling, but reduce the
126  // threshold. This partially unrolls small loops which are often
127  // unrolled by the PTX to SASS compiler and unrolling earlier can be
128  // beneficial.
129  UP.Partial = UP.Runtime = true;
130  UP.PartialThreshold = UP.Threshold / 4;
131 }
static bool readsThreadIndex(const IntrinsicInst *II)
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
LLVM Argument representation.
Definition: Argument.h:35
Cost tables and simple lookup functions.
Intrinsic::ID getIntrinsicID() const
getIntrinsicID - Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:44
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool isKernelFunction(const llvm::Function &)
static bool readsLaneId(const IntrinsicInst *II)
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP)
This file a TargetTransformInfo::Concept conforming object specific to the NVPTX target machine...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isSourceOfDivergence(const Value *V)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
OperandValueProperties
Additional properties of an operand's values.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None)
Definition: BasicTTIImpl.h:285
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
unsigned Threshold
The cost threshold for the unrolled loop.
Parameters that control the generic loop unrolling transformation.
#define I(x, y, z)
Definition: MD5.cpp:54
static bool isNVVMAtomic(const IntrinsicInst *II)
LLVM Value Representation.
Definition: Value.h:69
OperandValueKind
Additional information about an operand's possible values.
This pass exposes codegen information to IR-level passes.
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:219
std::pair< unsigned, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:37
This file describes how to lower LLVM code to machine code.