LLVM  3.7.0
NVPTXFavorNonGenericAddrSpaces.cpp
Go to the documentation of this file.
1 //===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // When a load/store accesses the generic address space, checks whether the
11 // address is casted from a non-generic address space. If so, remove this
12 // addrspacecast because accessing non-generic address spaces is typically
13 // faster. Besides removing addrspacecasts directly used by loads/stores, this
14 // optimization also recursively traces into a GEP's pointer operand and a
15 // bitcast's source to find more eliminable addrspacecasts.
16 //
17 // For instance, the code below loads a float from an array allocated in
18 // addrspace(3).
19 //
20 // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
21 // %1 = gep [10 x float]* %0, i64 0, i64 %i
22 // %2 = bitcast float* %1 to i32*
23 // %3 = load i32* %2 ; emits ld.u32
24 //
25 // First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP,
26 // and the bitcast to expose more optimization opportunities to function
27 // optimizeMemoryInst. The intermediate code looks like:
28 //
29 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
30 // %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
31 // %2 = addrspacecast i32 addrspace(3)* %1 to i32*
32 // %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly
33 //
34 // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed
35 // generic pointers, and folds the load and the addrspacecast into a load from
36 // the original address space. The final code looks like:
37 //
38 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
39 // %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
40 // %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32
41 //
42 // This pass may remove an addrspacecast in a different BB. Therefore, we
43 // implement it as a FunctionPass.
44 //
45 // TODO:
46 // The current implementation doesn't handle PHINodes. Eliminating
47 // addrspacecasts used by PHINodes is trickier because PHINodes can introduce
48 // loops in data flow. For example,
49 //
50 // %generic.input = addrspacecast float addrspace(3)* %input to float*
51 // loop:
52 // %y = phi [ %generic.input, %y2 ]
53 // %y2 = getelementptr %y, 1
54 // %v = load %y2
55 // br ..., label %loop, ...
56 //
57 // Marking %y2 shared depends on marking %y shared, but %y also data-flow
58 // depends on %y2. We probably need an iterative fix-point algorithm on handle
59 // this case.
60 //
61 //===----------------------------------------------------------------------===//
62 
63 #include "NVPTX.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/Instructions.h"
66 #include "llvm/IR/Operator.h"
68 
69 using namespace llvm;
70 
71 // An option to disable this optimization. Enable it by default.
73  "disable-nvptx-favor-non-generic",
74  cl::init(false),
75  cl::desc("Do not convert generic address space usage "
76  "to non-generic address space usage"),
77  cl::Hidden);
78 
79 namespace {
80 /// \brief NVPTXFavorNonGenericAddrSpaces
81 class NVPTXFavorNonGenericAddrSpaces : public FunctionPass {
82 public:
83  static char ID;
84  NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {}
85  bool runOnFunction(Function &F) override;
86 
87 private:
88  /// Optimizes load/store instructions. Idx is the index of the pointer operand
89  /// (0 for load, and 1 for store). Returns true if it changes anything.
90  bool optimizeMemoryInstruction(Instruction *I, unsigned Idx);
91  /// Recursively traces into a GEP's pointer operand or a bitcast's source to
92  /// find an eliminable addrspacecast, and hoists that addrspacecast to the
93  /// outermost level. For example, this function transforms
94  /// bitcast(gep(gep(addrspacecast(X))))
95  /// to
96  /// addrspacecast(bitcast(gep(gep(X)))).
97  ///
98  /// This reordering exposes to optimizeMemoryInstruction more
99  /// optimization opportunities on loads and stores.
100  ///
101  /// If this function succesfully hoists an eliminable addrspacecast or V is
102  /// already such an addrspacecast, it returns the transformed value (which is
103  /// guaranteed to be an addrspacecast); otherwise, it returns nullptr.
104  Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
105  /// Helper function for GEPs.
106  Value *hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth);
107  /// Helper function for bitcasts.
108  Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
109 };
110 }
111 
113 
114 namespace llvm {
116 }
117 INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic",
118  "Remove unnecessary non-generic-to-generic addrspacecasts",
119  false, false)
120 
121 // Decides whether V is an addrspacecast and shortcutting V in load/store is
122 // valid and beneficial.
123 static bool isEliminableAddrSpaceCast(Value *V) {
124  // Returns false if V is not even an addrspacecast.
125  Operator *Cast = dyn_cast<Operator>(V);
126  if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast)
127  return false;
128 
129  Value *Src = Cast->getOperand(0);
130  PointerType *SrcTy = cast<PointerType>(Src->getType());
131  PointerType *DestTy = cast<PointerType>(Cast->getType());
132  // TODO: For now, we only handle the case where the addrspacecast only changes
133  // the address space but not the type. If the type also changes, we could
134  // still get rid of the addrspacecast by adding an extra bitcast, but we
135  // rarely see such scenarios.
136  if (SrcTy->getElementType() != DestTy->getElementType())
137  return false;
138 
139  // Checks whether the addrspacecast is from a non-generic address space to the
140  // generic address space.
142  DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
143 }
144 
145 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
146  GEPOperator *GEP, int Depth) {
147  Value *NewOperand =
148  hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1);
149  if (NewOperand == nullptr)
150  return nullptr;
151 
152  // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
153  assert(isEliminableAddrSpaceCast(NewOperand));
154  Operator *Cast = cast<Operator>(NewOperand);
155 
156  SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
157  Value *NewASC;
158  if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
159  // GEP = gep (addrspacecast X), indices
160  // =>
161  // NewGEP = gep X, indices
162  // NewASC = addrspacecast NewGEP
164  GEP->getSourceElementType(), Cast->getOperand(0), Indices,
165  "", GEPI);
166  NewGEP->setIsInBounds(GEP->isInBounds());
167  NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
168  NewASC->takeName(GEP);
169  // Without RAUWing GEP, the compiler would visit GEP again and emit
170  // redundant instructions. This is exercised in test @rauw in
171  // access-non-generic.ll.
172  GEP->replaceAllUsesWith(NewASC);
173  } else {
174  // GEP is a constant expression.
176  GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
177  Indices, GEP->isInBounds());
178  NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType());
179  }
180  return NewASC;
181 }
182 
183 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
184  BitCastOperator *BC, int Depth) {
185  Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1);
186  if (NewOperand == nullptr)
187  return nullptr;
188 
189  // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
190  assert(isEliminableAddrSpaceCast(NewOperand));
191  Operator *Cast = cast<Operator>(NewOperand);
192 
193  // Cast = addrspacecast Src
194  // BC = bitcast Cast
195  // =>
196  // Cast' = bitcast Src
197  // BC' = addrspacecast Cast'
198  Value *Src = Cast->getOperand(0);
199  Type *TypeOfNewCast =
201  Src->getType()->getPointerAddressSpace());
202  Value *NewBC;
203  if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) {
204  Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI);
205  NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
206  NewBC->takeName(BC);
207  // Without RAUWing BC, the compiler would visit BC again and emit
208  // redundant instructions. This is exercised in test @rauw in
209  // access-non-generic.ll.
210  BC->replaceAllUsesWith(NewBC);
211  } else {
212  // BC is a constant expression.
213  Constant *NewCast =
214  ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast);
215  NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
216  }
217  return NewBC;
218 }
219 
220 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
221  int Depth) {
222  // Returns V if V is already an eliminable addrspacecast.
223  if (isEliminableAddrSpaceCast(V))
224  return V;
225 
226  // Limit the depth to prevent this recursive function from running too long.
227  const int MaxDepth = 20;
228  if (Depth >= MaxDepth)
229  return nullptr;
230 
231  // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer
232  // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts
233  // that are not directly used by the load/store.
234  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
235  return hoistAddrSpaceCastFromGEP(GEP, Depth);
236 
237  if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
238  return hoistAddrSpaceCastFromBitCast(BC, Depth);
239 
240  return nullptr;
241 }
242 
243 bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
244  unsigned Idx) {
245  Value *NewOperand = hoistAddrSpaceCastFrom(MI->getOperand(Idx));
246  if (NewOperand == nullptr)
247  return false;
248 
249  // load/store (addrspacecast X) => load/store X if shortcutting the
250  // addrspacecast is valid and can improve performance.
251  //
252  // e.g.,
253  // %1 = addrspacecast float addrspace(3)* %0 to float*
254  // %2 = load float* %1
255  // ->
256  // %2 = load float addrspace(3)* %0
257  //
258  // Note: the addrspacecast can also be a constant expression.
259  assert(isEliminableAddrSpaceCast(NewOperand));
260  Operator *ASC = dyn_cast<Operator>(NewOperand);
261  MI->setOperand(Idx, ASC->getOperand(0));
262  return true;
263 }
264 
265 bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
267  return false;
268 
269  bool Changed = false;
270  for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
271  for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) {
272  if (isa<LoadInst>(I)) {
273  // V = load P
274  Changed |= optimizeMemoryInstruction(I, 0);
275  } else if (isa<StoreInst>(I)) {
276  // store V, P
277  Changed |= optimizeMemoryInstruction(I, 1);
278  }
279  }
280  }
281  return Changed;
282 }
283 
285  return new NVPTXFavorNonGenericAddrSpaces();
286 }
Type * getSourceElementType() const
Definition: Operator.cpp:9
FunctionPass * createNVPTXFavorNonGenericAddrSpacesPass()
iterator end()
Definition: Function.h:459
static Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1847
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
Definition: Type.cpp:738
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1092
F(f)
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:472
Hexagon Common GEP
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: Type.cpp:216
static cl::opt< bool > DisableFavorNonGeneric("disable-nvptx-favor-non-generic", cl::init(false), cl::desc("Do not convert generic address space usage ""to non-generic address space usage"), cl::Hidden)
op_iterator idx_end()
Definition: Operator.h:385
Type * getPointerElementType() const
Definition: Type.h:366
This class represents a conversion between pointers from one address space to another.
void setIsInBounds(bool b=true)
setIsInBounds - Set or clear the inbounds flag on this GEP instruction.
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &)
This class represents a no-op cast from one type to another.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:256
iterator begin()
Definition: Function.h:457
Type * getElementType() const
Definition: DerivedTypes.h:323
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1835
GetElementPtrInst - an instruction for type-safe pointer arithmetic to access elements of arrays and ...
Definition: Instructions.h:830
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces,"nvptx-favor-non-generic","Remove unnecessary non-generic-to-generic addrspacecasts", false, false) static bool isEliminableAddrSpaceCast(Value *V)
This is an important base class in LLVM.
Definition: Constant.h:41
Value * getPointerOperand()
Definition: Operator.h:388
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
Value * getOperand(unsigned i) const
Definition: User.h:118
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:854
op_iterator idx_begin()
Definition: Operator.h:383
const unsigned MaxDepth
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:32
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
void setOperand(unsigned i, Value *Val)
Definition: User.h:122
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:48
bool isInBounds() const
Test whether this is an inbounds GEP, as defined by LangRef.html.
Definition: Operator.h:379
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM Value Representation.
Definition: Value.h:69
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:41