36#define DEBUG_TYPE "printfToRuntime"
40class AMDGPUPrintfRuntimeBinding final :
public ModulePass {
45 explicit AMDGPUPrintfRuntimeBinding();
56class AMDGPUPrintfRuntimeBindingImpl {
58 AMDGPUPrintfRuntimeBindingImpl(
61 : GetDT(GetDT), GetTLI(GetTLI) {}
68 bool lowerPrintfForGpu(
Module &M);
82char AMDGPUPrintfRuntimeBinding::ID = 0;
85 "amdgpu-printf-runtime-binding",
"AMDGPU Printf lowering",
96 return new AMDGPUPrintfRuntimeBinding();
100AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() :
ModulePass(
ID) {
104void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
106 size_t NumOps)
const {
112 static const char ConvSpecifiers[] =
"cdieEfgGaosuxXp";
113 size_t CurFmtSpecifierIdx = 0;
114 size_t PrevFmtSpecifierIdx = 0;
118 bool ArgDump =
false;
120 CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
124 while (pTag && CurFmt[--pTag] ==
'%') {
130 OpConvSpecifiers.
push_back(Fmt[CurFmtSpecifierIdx]);
132 PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx;
137 return Specifier ==
's' && isa<PointerType>(OpType);
154 "printf format string must be a trivially resolved constant string "
160bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(
Module &M) {
169 NamedMDNode *metaD =
M.getOrInsertNamedMetadata(
"llvm.printf.fmts");
172 for (
auto *CI : Printfs) {
173 unsigned NumOps = CI->arg_size();
176 Value *
Op = CI->getArgOperand(0);
178 if (
auto LI = dyn_cast<LoadInst>(Op)) {
179 Op = LI->getPointerOperand();
180 for (
auto *
Use :
Op->users()) {
181 if (
auto SI = dyn_cast<StoreInst>(
Use)) {
182 Op =
SI->getValueOperand();
188 if (
auto I = dyn_cast<Instruction>(Op)) {
189 Value *Op_simplified =
190 simplify(
I, &GetTLI(*
I->getFunction()), &GetDT(*
I->getFunction()));
197 Value *Stripped =
Op->stripPointerCasts();
198 if (!isa<UndefValue>(Stripped) && !isa<ConstantPointerNull>(Stripped))
205 getConversionSpecifiers(OpConvSpecifiers, FormatStr, NumOps - 1);
208 std::string AStreamHolder;
211 Sizes << CI->arg_size() - 1;
213 for (
unsigned ArgCount = 1;
214 ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.
size();
216 Value *
Arg = CI->getArgOperand(ArgCount);
217 Type *ArgType =
Arg->getType();
218 unsigned ArgSize = TD->getTypeAllocSize(ArgType);
225 if (
auto *VecType = dyn_cast<VectorType>(ArgType))
226 ResType = VectorType::get(ResType,
VecType->getElementCount());
228 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
236 if (OpConvSpecifiers[ArgCount - 1] ==
'x' ||
237 OpConvSpecifiers[ArgCount - 1] ==
'X' ||
238 OpConvSpecifiers[ArgCount - 1] ==
'u' ||
239 OpConvSpecifiers[ArgCount - 1] ==
'o')
243 ArgType =
Arg->getType();
244 ArgSize = TD->getTypeAllocSize(ArgType);
245 CI->setOperand(ArgCount,
Arg);
247 if (OpConvSpecifiers[ArgCount - 1] ==
'f') {
262 <<
" for type: " << *ArgType <<
'\n');
263 Sizes << ArgSize <<
':';
266 LLVM_DEBUG(
dbgs() <<
"Printf format string in source = " << FormatStr
268 for (
char C : FormatStr) {
303 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
306 Attribute::NoUnwind);
310 Type *Tys_alloc[1] = {SizetTy};
312 Type *I8Ptr = PointerType::get(I8Ty, 1);
313 FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc,
false);
315 M.getOrInsertFunction(
StringRef(
"__printf_alloc"), FTy_alloc, Attr);
318 std::string fmtstr = itostr(++UniqID) +
":" +
Sizes.str();
336 auto *cmp = cast<ICmpInst>(
Builder.CreateICmpNE(pcall, zeroIntPtr,
""));
337 if (!CI->use_empty()) {
340 CI->replaceAllUsesWith(result);
355 new BitCastInst(BufferIdx, idPointer,
"PrintBuffIdCast", Brnch);
363 "PrintBuffGep", Brnch);
366 for (
unsigned ArgCount = 1;
367 ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.
size();
369 Value *
Arg = CI->getArgOperand(ArgCount);
370 Type *ArgType =
Arg->getType();
373 if (OpConvSpecifiers[ArgCount - 1] ==
'f') {
374 if (
auto *FpCons = dyn_cast<ConstantFP>(
Arg)) {
377 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
380 }
else if (
auto *FpExt = dyn_cast<FPExtInst>(
Arg)) {
381 if (FpExt->getType()->isDoubleTy() &&
382 FpExt->getOperand(0)->getType()->isFloatTy()) {
383 Arg = FpExt->getOperand(0);
388 }
else if (isa<PointerType>(ArgType)) {
402 ReadBytes = Extractor.getU8(
Offset);
405 ReadBytes = Extractor.getU16(
Offset);
408 ReadBytes = Extractor.getU24(
Offset);
411 ReadBytes = Extractor.getU32(
Offset);
416 "failed to read bytes from constant array");
421 if (ReadNow < ReadSize)
438 for (
unsigned I = 0,
E = WhatToStore.
size();
I !=
E; ++
I) {
439 Value *TheBtCast = WhatToStore[
I];
440 unsigned ArgSize = TD->getTypeAllocSize(TheBtCast->
getType());
445 if (
I + 1 ==
E && ArgCount + 1 == CI->arg_size())
449 "PrintBuffNextPtr", Brnch);
451 << *BufferIdx <<
'\n');
457 for (
auto *CI : Printfs)
458 CI->eraseFromParent();
464bool AMDGPUPrintfRuntimeBindingImpl::run(
Module &M) {
469 auto PrintfFunction =
M.getFunction(
"printf");
470 if (!PrintfFunction || !PrintfFunction->isDeclaration())
473 for (
auto &U : PrintfFunction->uses()) {
474 if (
auto *CI = dyn_cast<CallInst>(
U.getUser())) {
475 if (CI->isCallee(&U))
476 Printfs.push_back(CI);
483 TD = &
M.getDataLayout();
485 return lowerPrintfForGpu(M);
488bool AMDGPUPrintfRuntimeBinding::runOnModule(
Module &M) {
490 return this->getAnalysis<DominatorTreeWrapperPass>(
F).getDomTree();
493 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
496 return AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
509 bool Changed = AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).
run(M);
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static void diagnoseInvalidFormatString(const CallBase *CI)
amdgpu printf runtime AMDGPU Printf lowering
constexpr StringLiteral NonLiteralStr("???")
static StringRef getAsConstantStr(Value *V)
amdgpu printf runtime binding
static bool shouldPrintAsStr(char Specifier, Type *OpType)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Class for arbitrary precision integers.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
const Function * getParent() const
Return the enclosing method, or null if none.
This class represents a no-op cast from one type to another.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
A constant pointer value that points to null.
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
A parsed version of the target data layout string in and methods for querying it.
Diagnostic information for unsupported feature in backend.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
This class represents an extension of floating point types.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const BasicBlock * getParent() const
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
unsigned getNumOperands() const
void addOperand(MDNode *M)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
static constexpr size_t npos
Analysis pass providing the TargetLibraryInfo.
TargetLibraryInfo run(const Function &F, FunctionAnalysisManager &)
Provides information about what library functions are available for the current target.
Triple - Helper class for working with autoconf configuration names.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
An efficient, type-erasing, non-owning reference to a callable.
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
char & AMDGPUPrintfRuntimeBindingID
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
ModulePass * createAMDGPUPrintfRuntimeBinding()
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)