File: | llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp |
Warning: | line 371, column 42 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // \file | |||
9 | // | |||
10 | // The pass bind printfs to a kernel arg pointer that will be bound to a buffer | |||
11 | // later by the runtime. | |||
12 | // | |||
13 | // This pass traverses the functions in the module and converts | |||
14 | // each call to printf to a sequence of operations that | |||
15 | // store the following into the printf buffer: | |||
16 | // - format string (passed as a module's metadata unique ID) | |||
17 | // - bitwise copies of printf arguments | |||
18 | // The backend passes will need to store metadata in the kernel | |||
19 | //===----------------------------------------------------------------------===// | |||
20 | ||||
21 | #include "AMDGPU.h" | |||
22 | #include "llvm/ADT/SmallString.h" | |||
23 | #include "llvm/ADT/StringExtras.h" | |||
24 | #include "llvm/ADT/Triple.h" | |||
25 | #include "llvm/Analysis/InstructionSimplify.h" | |||
26 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
27 | #include "llvm/CodeGen/Passes.h" | |||
28 | #include "llvm/IR/Constants.h" | |||
29 | #include "llvm/IR/DataLayout.h" | |||
30 | #include "llvm/IR/Dominators.h" | |||
31 | #include "llvm/IR/GlobalVariable.h" | |||
32 | #include "llvm/IR/IRBuilder.h" | |||
33 | #include "llvm/IR/Instructions.h" | |||
34 | #include "llvm/IR/Module.h" | |||
35 | #include "llvm/IR/Type.h" | |||
36 | #include "llvm/InitializePasses.h" | |||
37 | #include "llvm/Support/CommandLine.h" | |||
38 | #include "llvm/Support/Debug.h" | |||
39 | #include "llvm/Support/raw_ostream.h" | |||
40 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||
41 | using namespace llvm; | |||
42 | ||||
43 | #define DEBUG_TYPE"printfToRuntime" "printfToRuntime" | |||
44 | #define DWORD_ALIGN4 4 | |||
45 | ||||
46 | namespace { | |||
47 | class LLVM_LIBRARY_VISIBILITY__attribute__ ((visibility("hidden"))) AMDGPUPrintfRuntimeBinding final | |||
48 | : public ModulePass { | |||
49 | ||||
50 | public: | |||
51 | static char ID; | |||
52 | ||||
53 | explicit AMDGPUPrintfRuntimeBinding(); | |||
54 | ||||
55 | private: | |||
56 | bool runOnModule(Module &M) override; | |||
57 | void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers, | |||
58 | StringRef fmt, size_t num_ops) const; | |||
59 | ||||
60 | bool shouldPrintAsStr(char Specifier, Type *OpType) const; | |||
61 | bool | |||
62 | lowerPrintfForGpu(Module &M, | |||
63 | function_ref<const TargetLibraryInfo &(Function &)> GetTLI); | |||
64 | ||||
65 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
66 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
67 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
68 | } | |||
69 | ||||
70 | Value *simplify(Instruction *I, const TargetLibraryInfo *TLI) { | |||
71 | return SimplifyInstruction(I, {*TD, TLI, DT}); | |||
72 | } | |||
73 | ||||
74 | const DataLayout *TD; | |||
75 | const DominatorTree *DT; | |||
76 | SmallVector<CallInst *, 32> Printfs; | |||
77 | }; | |||
78 | } // namespace | |||
79 | ||||
80 | char AMDGPUPrintfRuntimeBinding::ID = 0; | |||
81 | ||||
82 | INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding,static void *initializeAMDGPUPrintfRuntimeBindingPassOnce(PassRegistry &Registry) { | |||
83 | "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering",static void *initializeAMDGPUPrintfRuntimeBindingPassOnce(PassRegistry &Registry) { | |||
84 | false, false)static void *initializeAMDGPUPrintfRuntimeBindingPassOnce(PassRegistry &Registry) { | |||
85 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
86 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
87 | INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding",PassInfo *PI = new PassInfo( "AMDGPU Printf lowering", "amdgpu-printf-runtime-binding" , &AMDGPUPrintfRuntimeBinding::ID, PassInfo::NormalCtor_t (callDefaultCtor<AMDGPUPrintfRuntimeBinding>), false, false ); Registry.registerPass(*PI, true); return PI; } static llvm ::once_flag InitializeAMDGPUPrintfRuntimeBindingPassFlag; void llvm::initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry & Registry) { llvm::call_once(InitializeAMDGPUPrintfRuntimeBindingPassFlag , initializeAMDGPUPrintfRuntimeBindingPassOnce, std::ref(Registry )); } | |||
88 | "AMDGPU Printf lowering", false, false)PassInfo *PI = new PassInfo( "AMDGPU Printf lowering", "amdgpu-printf-runtime-binding" , &AMDGPUPrintfRuntimeBinding::ID, PassInfo::NormalCtor_t (callDefaultCtor<AMDGPUPrintfRuntimeBinding>), false, false ); Registry.registerPass(*PI, true); return PI; } static llvm ::once_flag InitializeAMDGPUPrintfRuntimeBindingPassFlag; void llvm::initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry & Registry) { llvm::call_once(InitializeAMDGPUPrintfRuntimeBindingPassFlag , initializeAMDGPUPrintfRuntimeBindingPassOnce, std::ref(Registry )); } | |||
89 | ||||
90 | char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID; | |||
91 | ||||
92 | namespace llvm { | |||
93 | ModulePass *createAMDGPUPrintfRuntimeBinding() { | |||
94 | return new AMDGPUPrintfRuntimeBinding(); | |||
95 | } | |||
96 | } // namespace llvm | |||
97 | ||||
98 | AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() | |||
99 | : ModulePass(ID), TD(nullptr), DT(nullptr) { | |||
100 | initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry()); | |||
101 | } | |||
102 | ||||
103 | void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers( | |||
104 | SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt, | |||
105 | size_t NumOps) const { | |||
106 | // not all format characters are collected. | |||
107 | // At this time the format characters of interest | |||
108 | // are %p and %s, which use to know if we | |||
109 | // are either storing a literal string or a | |||
110 | // pointer to the printf buffer. | |||
111 | static const char ConvSpecifiers[] = "cdieEfgGaosuxXp"; | |||
112 | size_t CurFmtSpecifierIdx = 0; | |||
113 | size_t PrevFmtSpecifierIdx = 0; | |||
114 | ||||
115 | while ((CurFmtSpecifierIdx = Fmt.find_first_of( | |||
116 | ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) { | |||
117 | bool ArgDump = false; | |||
118 | StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx, | |||
119 | CurFmtSpecifierIdx - PrevFmtSpecifierIdx); | |||
120 | size_t pTag = CurFmt.find_last_of("%"); | |||
121 | if (pTag != StringRef::npos) { | |||
122 | ArgDump = true; | |||
123 | while (pTag && CurFmt[--pTag] == '%') { | |||
124 | ArgDump = !ArgDump; | |||
125 | } | |||
126 | } | |||
127 | ||||
128 | if (ArgDump) | |||
129 | OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]); | |||
130 | ||||
131 | PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx; | |||
132 | } | |||
133 | } | |||
134 | ||||
135 | bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier, | |||
136 | Type *OpType) const { | |||
137 | if (Specifier != 's') | |||
138 | return false; | |||
139 | const PointerType *PT = dyn_cast<PointerType>(OpType); | |||
140 | if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) | |||
141 | return false; | |||
142 | Type *ElemType = PT->getContainedType(0); | |||
143 | if (ElemType->getTypeID() != Type::IntegerTyID) | |||
144 | return false; | |||
145 | IntegerType *ElemIType = cast<IntegerType>(ElemType); | |||
146 | return ElemIType->getBitWidth() == 8; | |||
147 | } | |||
148 | ||||
149 | bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( | |||
150 | Module &M, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { | |||
151 | LLVMContext &Ctx = M.getContext(); | |||
152 | IRBuilder<> Builder(Ctx); | |||
153 | Type *I32Ty = Type::getInt32Ty(Ctx); | |||
154 | unsigned UniqID = 0; | |||
155 | // NB: This is important for this string size to be divizable by 4 | |||
156 | const char NonLiteralStr[4] = "???"; | |||
157 | ||||
158 | for (auto CI : Printfs) { | |||
| ||||
159 | unsigned NumOps = CI->getNumArgOperands(); | |||
160 | ||||
161 | SmallString<16> OpConvSpecifiers; | |||
162 | Value *Op = CI->getArgOperand(0); | |||
163 | ||||
164 | if (auto LI = dyn_cast<LoadInst>(Op)) { | |||
165 | Op = LI->getPointerOperand(); | |||
166 | for (auto Use : Op->users()) { | |||
167 | if (auto SI = dyn_cast<StoreInst>(Use)) { | |||
168 | Op = SI->getValueOperand(); | |||
169 | break; | |||
170 | } | |||
171 | } | |||
172 | } | |||
173 | ||||
174 | if (auto I = dyn_cast<Instruction>(Op)) { | |||
175 | Value *Op_simplified = simplify(I, &GetTLI(*I->getFunction())); | |||
176 | if (Op_simplified) | |||
177 | Op = Op_simplified; | |||
178 | } | |||
179 | ||||
180 | ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op); | |||
181 | ||||
182 | if (ConstExpr) { | |||
183 | GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); | |||
184 | ||||
185 | StringRef Str("unknown"); | |||
186 | if (GVar && GVar->hasInitializer()) { | |||
187 | auto Init = GVar->getInitializer(); | |||
188 | if (auto CA = dyn_cast<ConstantDataArray>(Init)) { | |||
189 | if (CA->isString()) | |||
190 | Str = CA->getAsCString(); | |||
191 | } else if (isa<ConstantAggregateZero>(Init)) { | |||
192 | Str = ""; | |||
193 | } | |||
194 | // | |||
195 | // we need this call to ascertain | |||
196 | // that we are printing a string | |||
197 | // or a pointer. It takes out the | |||
198 | // specifiers and fills up the first | |||
199 | // arg | |||
200 | getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1); | |||
201 | } | |||
202 | // Add metadata for the string | |||
203 | std::string AStreamHolder; | |||
204 | raw_string_ostream Sizes(AStreamHolder); | |||
205 | int Sum = DWORD_ALIGN4; | |||
206 | Sizes << CI->getNumArgOperands() - 1; | |||
207 | Sizes << ':'; | |||
208 | for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && | |||
209 | ArgCount <= OpConvSpecifiers.size(); | |||
210 | ArgCount++) { | |||
211 | Value *Arg = CI->getArgOperand(ArgCount); | |||
212 | Type *ArgType = Arg->getType(); | |||
213 | unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType); | |||
214 | ArgSize = ArgSize / 8; | |||
215 | // | |||
216 | // ArgSize by design should be a multiple of DWORD_ALIGN, | |||
217 | // expand the arguments that do not follow this rule. | |||
218 | // | |||
219 | if (ArgSize % DWORD_ALIGN4 != 0) { | |||
220 | llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx); | |||
221 | auto *LLVMVecType = llvm::dyn_cast<llvm::FixedVectorType>(ArgType); | |||
222 | int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1; | |||
223 | if (LLVMVecType && NumElem > 1) | |||
224 | ResType = llvm::FixedVectorType::get(ResType, NumElem); | |||
225 | Builder.SetInsertPoint(CI); | |||
226 | Builder.SetCurrentDebugLocation(CI->getDebugLoc()); | |||
227 | if (OpConvSpecifiers[ArgCount - 1] == 'x' || | |||
228 | OpConvSpecifiers[ArgCount - 1] == 'X' || | |||
229 | OpConvSpecifiers[ArgCount - 1] == 'u' || | |||
230 | OpConvSpecifiers[ArgCount - 1] == 'o') | |||
231 | Arg = Builder.CreateZExt(Arg, ResType); | |||
232 | else | |||
233 | Arg = Builder.CreateSExt(Arg, ResType); | |||
234 | ArgType = Arg->getType(); | |||
235 | ArgSize = TD->getTypeAllocSizeInBits(ArgType); | |||
236 | ArgSize = ArgSize / 8; | |||
237 | CI->setOperand(ArgCount, Arg); | |||
238 | } | |||
239 | if (OpConvSpecifiers[ArgCount - 1] == 'f') { | |||
240 | ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg); | |||
241 | if (FpCons) | |||
242 | ArgSize = 4; | |||
243 | else { | |||
244 | FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); | |||
245 | if (FpExt && FpExt->getType()->isDoubleTy() && | |||
246 | FpExt->getOperand(0)->getType()->isFloatTy()) | |||
247 | ArgSize = 4; | |||
248 | } | |||
249 | } | |||
250 | if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { | |||
251 | if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { | |||
252 | GlobalVariable *GV = | |||
253 | dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); | |||
254 | if (GV && GV->hasInitializer()) { | |||
255 | Constant *Init = GV->getInitializer(); | |||
256 | ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); | |||
257 | if (Init->isZeroValue() || CA->isString()) { | |||
258 | size_t SizeStr = Init->isZeroValue() | |||
259 | ? 1 | |||
260 | : (strlen(CA->getAsCString().data()) + 1); | |||
261 | size_t Rem = SizeStr % DWORD_ALIGN4; | |||
262 | size_t NSizeStr = 0; | |||
263 | LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "Printf string original size = " << SizeStr << '\n'; } } while (false) | |||
264 | << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "Printf string original size = " << SizeStr << '\n'; } } while (false); | |||
265 | if (Rem) { | |||
266 | NSizeStr = SizeStr + (DWORD_ALIGN4 - Rem); | |||
267 | } else { | |||
268 | NSizeStr = SizeStr; | |||
269 | } | |||
270 | ArgSize = NSizeStr; | |||
271 | } | |||
272 | } else { | |||
273 | ArgSize = sizeof(NonLiteralStr); | |||
274 | } | |||
275 | } else { | |||
276 | ArgSize = sizeof(NonLiteralStr); | |||
277 | } | |||
278 | } | |||
279 | LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "Printf ArgSize (in buffer) = " << ArgSize << " for type: " << *ArgType << '\n'; } } while (false) | |||
280 | << " for type: " << *ArgType << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "Printf ArgSize (in buffer) = " << ArgSize << " for type: " << *ArgType << '\n'; } } while (false); | |||
281 | Sizes << ArgSize << ':'; | |||
282 | Sum += ArgSize; | |||
283 | } | |||
284 | LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "Printf format string in source = " << Str.str() << '\n'; } } while (false) | |||
285 | << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "Printf format string in source = " << Str.str() << '\n'; } } while (false); | |||
286 | for (size_t I = 0; I < Str.size(); ++I) { | |||
287 | // Rest of the C escape sequences (e.g. \') are handled correctly | |||
288 | // by the MDParser | |||
289 | switch (Str[I]) { | |||
290 | case '\a': | |||
291 | Sizes << "\\a"; | |||
292 | break; | |||
293 | case '\b': | |||
294 | Sizes << "\\b"; | |||
295 | break; | |||
296 | case '\f': | |||
297 | Sizes << "\\f"; | |||
298 | break; | |||
299 | case '\n': | |||
300 | Sizes << "\\n"; | |||
301 | break; | |||
302 | case '\r': | |||
303 | Sizes << "\\r"; | |||
304 | break; | |||
305 | case '\v': | |||
306 | Sizes << "\\v"; | |||
307 | break; | |||
308 | case ':': | |||
309 | // ':' cannot be scanned by Flex, as it is defined as a delimiter | |||
310 | // Replace it with it's octal representation \72 | |||
311 | Sizes << "\\72"; | |||
312 | break; | |||
313 | default: | |||
314 | Sizes << Str[I]; | |||
315 | break; | |||
316 | } | |||
317 | } | |||
318 | ||||
319 | // Insert the printf_alloc call | |||
320 | Builder.SetInsertPoint(CI); | |||
321 | Builder.SetCurrentDebugLocation(CI->getDebugLoc()); | |||
322 | ||||
323 | AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex, | |||
324 | Attribute::NoUnwind); | |||
325 | ||||
326 | Type *SizetTy = Type::getInt32Ty(Ctx); | |||
327 | ||||
328 | Type *Tys_alloc[1] = {SizetTy}; | |||
329 | Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1); | |||
330 | FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); | |||
331 | FunctionCallee PrintfAllocFn = | |||
332 | M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); | |||
333 | ||||
334 | LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "Printf metadata = " << Sizes.str() << '\n'; } } while (false); | |||
335 | std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str(); | |||
336 | MDString *fmtStrArray = MDString::get(Ctx, fmtstr); | |||
337 | ||||
338 | // Instead of creating global variables, the | |||
339 | // printf format strings are extracted | |||
340 | // and passed as metadata. This avoids | |||
341 | // polluting llvm's symbol tables in this module. | |||
342 | // Metadata is going to be extracted | |||
343 | // by the backend passes and inserted | |||
344 | // into the OpenCL binary as appropriate. | |||
345 | StringRef amd("llvm.printf.fmts"); | |||
346 | NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd); | |||
347 | MDNode *myMD = MDNode::get(Ctx, fmtStrArray); | |||
348 | metaD->addOperand(myMD); | |||
349 | Value *sumC = ConstantInt::get(SizetTy, Sum, false); | |||
350 | SmallVector<Value *, 1> alloc_args; | |||
351 | alloc_args.push_back(sumC); | |||
352 | CallInst *pcall = | |||
353 | CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI); | |||
354 | ||||
355 | // | |||
356 | // Insert code to split basicblock with a | |||
357 | // piece of hammock code. | |||
358 | // basicblock splits after buffer overflow check | |||
359 | // | |||
360 | ConstantPointerNull *zeroIntPtr = | |||
361 | ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1)); | |||
362 | ICmpInst *cmp = | |||
363 | dyn_cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, "")); | |||
364 | if (!CI->use_empty()) { | |||
365 | Value *result = | |||
366 | Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res"); | |||
367 | CI->replaceAllUsesWith(result); | |||
368 | } | |||
369 | SplitBlock(CI->getParent(), cmp); | |||
370 | Instruction *Brnch = | |||
371 | SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false); | |||
| ||||
372 | ||||
373 | Builder.SetInsertPoint(Brnch); | |||
374 | ||||
375 | // store unique printf id in the buffer | |||
376 | // | |||
377 | SmallVector<Value *, 1> ZeroIdxList; | |||
378 | ConstantInt *zeroInt = | |||
379 | ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10)); | |||
380 | ZeroIdxList.push_back(zeroInt); | |||
381 | ||||
382 | GetElementPtrInst *BufferIdx = GetElementPtrInst::Create( | |||
383 | nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch); | |||
384 | ||||
385 | Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS); | |||
386 | Value *id_gep_cast = | |||
387 | new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch); | |||
388 | ||||
389 | new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast, Brnch); | |||
390 | ||||
391 | SmallVector<Value *, 2> FourthIdxList; | |||
392 | ConstantInt *fourInt = | |||
393 | ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10)); | |||
394 | ||||
395 | FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id | |||
396 | // the following GEP is the buffer pointer | |||
397 | BufferIdx = GetElementPtrInst::Create(nullptr, pcall, FourthIdxList, | |||
398 | "PrintBuffGep", Brnch); | |||
399 | ||||
400 | Type *Int32Ty = Type::getInt32Ty(Ctx); | |||
401 | Type *Int64Ty = Type::getInt64Ty(Ctx); | |||
402 | for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && | |||
403 | ArgCount <= OpConvSpecifiers.size(); | |||
404 | ArgCount++) { | |||
405 | Value *Arg = CI->getArgOperand(ArgCount); | |||
406 | Type *ArgType = Arg->getType(); | |||
407 | SmallVector<Value *, 32> WhatToStore; | |||
408 | if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) { | |||
409 | Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty; | |||
410 | if (OpConvSpecifiers[ArgCount - 1] == 'f') { | |||
411 | if (auto *FpCons = dyn_cast<ConstantFP>(Arg)) { | |||
412 | APFloat Val(FpCons->getValueAPF()); | |||
413 | bool Lost = false; | |||
414 | Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, | |||
415 | &Lost); | |||
416 | Arg = ConstantFP::get(Ctx, Val); | |||
417 | IType = Int32Ty; | |||
418 | } else if (auto *FpExt = dyn_cast<FPExtInst>(Arg)) { | |||
419 | if (FpExt->getType()->isDoubleTy() && | |||
420 | FpExt->getOperand(0)->getType()->isFloatTy()) { | |||
421 | Arg = FpExt->getOperand(0); | |||
422 | IType = Int32Ty; | |||
423 | } | |||
424 | } | |||
425 | } | |||
426 | Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch); | |||
427 | WhatToStore.push_back(Arg); | |||
428 | } else if (ArgType->getTypeID() == Type::PointerTyID) { | |||
429 | if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { | |||
430 | const char *S = NonLiteralStr; | |||
431 | if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) { | |||
432 | auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0)); | |||
433 | if (GV && GV->hasInitializer()) { | |||
434 | Constant *Init = GV->getInitializer(); | |||
435 | ConstantDataArray *CA = dyn_cast<ConstantDataArray>(Init); | |||
436 | if (Init->isZeroValue() || CA->isString()) { | |||
437 | S = Init->isZeroValue() ? "" : CA->getAsCString().data(); | |||
438 | } | |||
439 | } | |||
440 | } | |||
441 | size_t SizeStr = strlen(S) + 1; | |||
442 | size_t Rem = SizeStr % DWORD_ALIGN4; | |||
443 | size_t NSizeStr = 0; | |||
444 | if (Rem) { | |||
445 | NSizeStr = SizeStr + (DWORD_ALIGN4 - Rem); | |||
446 | } else { | |||
447 | NSizeStr = SizeStr; | |||
448 | } | |||
449 | if (S[0]) { | |||
450 | char *MyNewStr = new char[NSizeStr](); | |||
451 | strcpy(MyNewStr, S); | |||
452 | int NumInts = NSizeStr / 4; | |||
453 | int CharC = 0; | |||
454 | while (NumInts) { | |||
455 | int ANum = *(int *)(MyNewStr + CharC); | |||
456 | CharC += 4; | |||
457 | NumInts--; | |||
458 | Value *ANumV = ConstantInt::get(Int32Ty, ANum, false); | |||
459 | WhatToStore.push_back(ANumV); | |||
460 | } | |||
461 | delete[] MyNewStr; | |||
462 | } else { | |||
463 | // Empty string, give a hint to RT it is no NULL | |||
464 | Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false); | |||
465 | WhatToStore.push_back(ANumV); | |||
466 | } | |||
467 | } else { | |||
468 | uint64_t Size = TD->getTypeAllocSizeInBits(ArgType); | |||
469 | assert((Size == 32 || Size == 64) && "unsupported size")(((Size == 32 || Size == 64) && "unsupported size") ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"unsupported size\"" , "/build/llvm-toolchain-snapshot-12~++20210104111112+e43b3d1f5e05/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp" , 469, __PRETTY_FUNCTION__)); | |||
470 | Type *DstType = (Size == 32) ? Int32Ty : Int64Ty; | |||
471 | Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch); | |||
472 | WhatToStore.push_back(Arg); | |||
473 | } | |||
474 | } else if (isa<FixedVectorType>(ArgType)) { | |||
475 | Type *IType = NULL__null; | |||
476 | uint32_t EleCount = cast<FixedVectorType>(ArgType)->getNumElements(); | |||
477 | uint32_t EleSize = ArgType->getScalarSizeInBits(); | |||
478 | uint32_t TotalSize = EleCount * EleSize; | |||
479 | if (EleCount == 3) { | |||
480 | ShuffleVectorInst *Shuffle = | |||
481 | new ShuffleVectorInst(Arg, Arg, ArrayRef<int>{0, 1, 2, 2}); | |||
482 | Shuffle->insertBefore(Brnch); | |||
483 | Arg = Shuffle; | |||
484 | ArgType = Arg->getType(); | |||
485 | TotalSize += EleSize; | |||
486 | } | |||
487 | switch (EleSize) { | |||
488 | default: | |||
489 | EleCount = TotalSize / 64; | |||
490 | IType = Type::getInt64Ty(ArgType->getContext()); | |||
491 | break; | |||
492 | case 8: | |||
493 | if (EleCount >= 8) { | |||
494 | EleCount = TotalSize / 64; | |||
495 | IType = Type::getInt64Ty(ArgType->getContext()); | |||
496 | } else if (EleCount >= 3) { | |||
497 | EleCount = 1; | |||
498 | IType = Type::getInt32Ty(ArgType->getContext()); | |||
499 | } else { | |||
500 | EleCount = 1; | |||
501 | IType = Type::getInt16Ty(ArgType->getContext()); | |||
502 | } | |||
503 | break; | |||
504 | case 16: | |||
505 | if (EleCount >= 3) { | |||
506 | EleCount = TotalSize / 64; | |||
507 | IType = Type::getInt64Ty(ArgType->getContext()); | |||
508 | } else { | |||
509 | EleCount = 1; | |||
510 | IType = Type::getInt32Ty(ArgType->getContext()); | |||
511 | } | |||
512 | break; | |||
513 | } | |||
514 | if (EleCount > 1) { | |||
515 | IType = FixedVectorType::get(IType, EleCount); | |||
516 | } | |||
517 | Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch); | |||
518 | WhatToStore.push_back(Arg); | |||
519 | } else { | |||
520 | WhatToStore.push_back(Arg); | |||
521 | } | |||
522 | for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) { | |||
523 | Value *TheBtCast = WhatToStore[I]; | |||
524 | unsigned ArgSize = | |||
525 | TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8; | |||
526 | SmallVector<Value *, 1> BuffOffset; | |||
527 | BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize)); | |||
528 | ||||
529 | Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1); | |||
530 | Value *CastedGEP = | |||
531 | new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch); | |||
532 | StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch); | |||
533 | LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "inserting store to printf buffer:\n" << *StBuff << '\n'; } } while (false) | |||
534 | << *StBuff << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "inserting store to printf buffer:\n" << *StBuff << '\n'; } } while (false); | |||
535 | (void)StBuff; | |||
536 | if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands()) | |||
537 | break; | |||
538 | BufferIdx = GetElementPtrInst::Create(nullptr, BufferIdx, BuffOffset, | |||
539 | "PrintBuffNextPtr", Brnch); | |||
540 | LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "inserting gep to the printf buffer:\n" << *BufferIdx << '\n'; } } while (false) | |||
541 | << *BufferIdx << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("printfToRuntime")) { dbgs() << "inserting gep to the printf buffer:\n" << *BufferIdx << '\n'; } } while (false); | |||
542 | } | |||
543 | } | |||
544 | } | |||
545 | } | |||
546 | ||||
547 | // erase the printf calls | |||
548 | for (auto CI : Printfs) | |||
549 | CI->eraseFromParent(); | |||
550 | ||||
551 | Printfs.clear(); | |||
552 | return true; | |||
553 | } | |||
554 | ||||
555 | bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) { | |||
556 | Triple TT(M.getTargetTriple()); | |||
557 | if (TT.getArch() == Triple::r600) | |||
558 | return false; | |||
559 | ||||
560 | auto PrintfFunction = M.getFunction("printf"); | |||
561 | if (!PrintfFunction) | |||
562 | return false; | |||
563 | ||||
564 | for (auto &U : PrintfFunction->uses()) { | |||
565 | if (auto *CI = dyn_cast<CallInst>(U.getUser())) { | |||
566 | if (CI->isCallee(&U)) | |||
567 | Printfs.push_back(CI); | |||
568 | } | |||
569 | } | |||
570 | ||||
571 | if (Printfs.empty()) | |||
572 | return false; | |||
573 | ||||
574 | if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) { | |||
575 | for (auto &U : HostcallFunction->uses()) { | |||
576 | if (auto *CI = dyn_cast<CallInst>(U.getUser())) { | |||
577 | M.getContext().emitError( | |||
578 | CI, "Cannot use both printf and hostcall in the same module"); | |||
579 | } | |||
580 | } | |||
581 | } | |||
582 | ||||
583 | TD = &M.getDataLayout(); | |||
584 | auto DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); | |||
585 | DT = DTWP ? &DTWP->getDomTree() : nullptr; | |||
586 | auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { | |||
587 | return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); | |||
588 | }; | |||
589 | ||||
590 | return lowerPrintfForGpu(M, GetTLI); | |||
591 | } |