LLVM 22.0.0git
AMDGPUUniformIntrinsicCombine.cpp
Go to the documentation of this file.
1//===-- AMDGPUUniformIntrinsicCombine.cpp ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass simplifies certain intrinsic calls when the arguments are uniform.
11/// It's true that this pass has transforms that can lead to a situation where
12/// some instruction whose operand was previously recognized as statically
13/// uniform is later on no longer recognized as statically uniform. However, the
14/// semantics of how programs execute don't (and must not, for this precise
15/// reason) care about static uniformity, they only ever care about dynamic
16/// uniformity. And every instruction that's downstream and cares about dynamic
17/// uniformity must be convergent (and isel will introduce v_readfirstlane for
18/// them if their operands can't be proven statically uniform).
19//===----------------------------------------------------------------------===//
20
21#include "AMDGPU.h"
22#include "GCNSubtarget.h"
29#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/IntrinsicsAMDGPU.h"
37
38#define DEBUG_TYPE "amdgpu-uniform-intrinsic-combine"
39
40using namespace llvm;
41using namespace llvm::AMDGPU;
42using namespace llvm::PatternMatch;
43
44/// Wrapper for querying uniformity info that first checks locally tracked
45/// instructions.
46static bool
48 const ValueMap<const Value *, bool> &Tracker) {
49 Value *V = U.get();
50 if (auto It = Tracker.find(V); It != Tracker.end())
51 return !It->second; // divergent if marked false
52 return UI.isDivergentUse(U);
53}
54
55/// Optimizes uniform intrinsics calls if their operand can be proven uniform.
57 const UniformityInfo &UI,
59 llvm::Intrinsic::ID IID = II.getIntrinsicID();
60
61 switch (IID) {
62 case Intrinsic::amdgcn_permlane64:
63 case Intrinsic::amdgcn_readfirstlane:
64 case Intrinsic::amdgcn_readlane: {
65 Value *Src = II.getArgOperand(0);
66 if (isDivergentUseWithNew(II.getOperandUse(0), UI, Tracker))
67 return false;
68 LLVM_DEBUG(dbgs() << "Replacing " << II << " with " << *Src << '\n');
69 II.replaceAllUsesWith(Src);
70 II.eraseFromParent();
71 return true;
72 }
73 case Intrinsic::amdgcn_ballot: {
74 Value *Src = II.getArgOperand(0);
75 if (isDivergentUseWithNew(II.getOperandUse(0), UI, Tracker))
76 return false;
77 LLVM_DEBUG(dbgs() << "Found uniform ballot intrinsic: " << II << '\n');
78
79 bool Changed = false;
80 for (User *U : make_early_inc_range(II.users())) {
81 if (auto *ICmp = dyn_cast<ICmpInst>(U)) {
82 Value *Op0 = ICmp->getOperand(0);
83 Value *Op1 = ICmp->getOperand(1);
84 ICmpInst::Predicate Pred = ICmp->getPredicate();
85 Value *OtherOp = Op0 == &II ? Op1 : Op0;
86
87 if (Pred == ICmpInst::ICMP_EQ && match(OtherOp, m_Zero())) {
88 // Case: (icmp eq %ballot, 0) -> xor %ballot_arg, 1
89 Instruction *NotOp =
90 BinaryOperator::CreateNot(Src, "", ICmp->getIterator());
91 Tracker[NotOp] = true; // NOT preserves uniformity
92 LLVM_DEBUG(dbgs() << "Replacing ICMP_EQ: " << *NotOp << '\n');
93 ICmp->replaceAllUsesWith(NotOp);
94 Changed = true;
95 } else if (Pred == ICmpInst::ICMP_NE && match(OtherOp, m_Zero())) {
96 // Case: (icmp ne %ballot, 0) -> %ballot_arg
97 LLVM_DEBUG(dbgs() << "Replacing ICMP_NE with ballot argument: "
98 << *Src << '\n');
99 ICmp->replaceAllUsesWith(Src);
100 Changed = true;
101 }
102 }
103 }
104 // Erase the intrinsic if it has no remaining uses.
105 if (II.use_empty())
106 II.eraseFromParent();
107 return Changed;
108 }
109 default:
110 llvm_unreachable("Unexpected intrinsic ID in optimizeUniformIntrinsic");
111 }
112 return false;
113}
114
115/// Iterates over intrinsic calls in the Function to optimize.
117 bool IsChanged = false;
119
121 auto *II = dyn_cast<IntrinsicInst>(&I);
122 if (!II)
123 continue;
124
125 switch (II->getIntrinsicID()) {
126 case Intrinsic::amdgcn_permlane64:
127 case Intrinsic::amdgcn_readfirstlane:
128 case Intrinsic::amdgcn_readlane:
129 case Intrinsic::amdgcn_ballot:
130 break;
131 default:
132 continue;
133 }
134 IsChanged |= optimizeUniformIntrinsic(*II, UI, Tracker);
135 }
136 return IsChanged;
137}
138
150
151namespace {
152class AMDGPUUniformIntrinsicCombineLegacy : public FunctionPass {
153public:
154 static char ID;
155 AMDGPUUniformIntrinsicCombineLegacy() : FunctionPass(ID) {
158 }
159
160private:
161 bool runOnFunction(Function &F) override;
162 void getAnalysisUsage(AnalysisUsage &AU) const override {
163 AU.setPreservesCFG();
166 }
167};
168} // namespace
169
170char AMDGPUUniformIntrinsicCombineLegacy::ID = 0;
172 AMDGPUUniformIntrinsicCombineLegacy::ID;
173
174bool AMDGPUUniformIntrinsicCombineLegacy::runOnFunction(Function &F) {
175 if (skipFunction(F))
176 return false;
177 const UniformityInfo &UI =
178 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
179 return runUniformIntrinsicCombine(F, UI);
180}
181
182INITIALIZE_PASS_BEGIN(AMDGPUUniformIntrinsicCombineLegacy, DEBUG_TYPE,
183 "AMDGPU Uniform Intrinsic Combine", false, false)
186INITIALIZE_PASS_END(AMDGPUUniformIntrinsicCombineLegacy, DEBUG_TYPE,
187 "AMDGPU Uniform Intrinsic Combine", false, false)
188
190 return new AMDGPUUniformIntrinsicCombineLegacy();
191}
static bool runUniformIntrinsicCombine(Function &F, const UniformityInfo &UI)
Iterates over intrinsic calls in the Function to optimize.
static bool optimizeUniformIntrinsic(IntrinsicInst &II, const UniformityInfo &UI, ValueMap< const Value *, bool > &Tracker)
Optimizes uniform intrinsics calls if their operand can be proven uniform.
static bool isDivergentUseWithNew(const Use &U, const UniformityInfo &UI, const ValueMap< const Value *, bool > &Tracker)
Wrapper for querying uniformity info that first checks locally tracked instructions.
Expand Atomic instructions
static bool runOnFunction(Function &F, bool PostInlining)
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
#define LLVM_DEBUG(...)
Definition Debug.h:114
Target-Independent Code Generator Pass Configuration Options pass.
LLVM IR instance of the generic uniformity analysis.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool isDivergentUse(const UseT &U) const
Whether U is divergent.
A wrapper class for inspecting calls to intrinsic functions.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
Target-Independent Code Generator Pass Configuration Options.
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
See the file comment.
Definition ValueMap.h:84
iterator find(const KeyT &Val)
Definition ValueMap.h:160
iterator end()
Definition ValueMap.h:139
LLVM Value Representation.
Definition Value.h:75
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
bool match(Val *V, const Pattern &P)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
FunctionPass * createAMDGPUUniformIntrinsicCombineLegacyPass()
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void initializeAMDGPUUniformIntrinsicCombineLegacyPass(PassRegistry &)
char & AMDGPUUniformIntrinsicCombineLegacyPassID
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)