LLVM  15.0.0git
AMDGPUPrintfRuntimeBinding.cpp
Go to the documentation of this file.
1 //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // \file
9 //
10 // The pass bind printfs to a kernel arg pointer that will be bound to a buffer
11 // later by the runtime.
12 //
13 // This pass traverses the functions in the module and converts
14 // each call to printf to a sequence of operations that
15 // store the following into the printf buffer:
16 // - format string (passed as a module's metadata unique ID)
17 // - bitwise copies of printf arguments
18 // The backend passes will need to store metadata in the kernel
19 //===----------------------------------------------------------------------===//
20 
21 #include "AMDGPU.h"
22 #include "llvm/ADT/Triple.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/InitializePasses.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "printfToRuntime"
34 #define DWORD_ALIGN 4
35 
36 namespace {
37 class AMDGPUPrintfRuntimeBinding final : public ModulePass {
38 
39 public:
40  static char ID;
41 
42  explicit AMDGPUPrintfRuntimeBinding();
43 
44 private:
45  bool runOnModule(Module &M) override;
46 
47  void getAnalysisUsage(AnalysisUsage &AU) const override {
50  }
51 };
52 
53 class AMDGPUPrintfRuntimeBindingImpl {
54 public:
55  AMDGPUPrintfRuntimeBindingImpl(
56  function_ref<const DominatorTree &(Function &)> GetDT,
57  function_ref<const TargetLibraryInfo &(Function &)> GetTLI)
58  : GetDT(GetDT), GetTLI(GetTLI) {}
59  bool run(Module &M);
60 
61 private:
62  void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers,
63  StringRef fmt, size_t num_ops) const;
64 
65  bool shouldPrintAsStr(char Specifier, Type *OpType) const;
66  bool lowerPrintfForGpu(Module &M);
67 
69  const DominatorTree *DT) {
70  return simplifyInstruction(I, {*TD, TLI, DT});
71  }
72 
73  const DataLayout *TD;
74  function_ref<const DominatorTree &(Function &)> GetDT;
75  function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
77 };
78 } // namespace
79 
81 
82 INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding,
83  "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering",
84  false, false)
87 INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding",
89 
90 char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID;
91 
92 namespace llvm {
94  return new AMDGPUPrintfRuntimeBinding();
95 }
96 } // namespace llvm
97 
98 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() : ModulePass(ID) {
100 }
101 
102 void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
103  SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt,
104  size_t NumOps) const {
105  // not all format characters are collected.
106  // At this time the format characters of interest
107  // are %p and %s, which use to know if we
108  // are either storing a literal string or a
109  // pointer to the printf buffer.
110  static const char ConvSpecifiers[] = "cdieEfgGaosuxXp";
111  size_t CurFmtSpecifierIdx = 0;
112  size_t PrevFmtSpecifierIdx = 0;
113 
114  while ((CurFmtSpecifierIdx = Fmt.find_first_of(
115  ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) {
116  bool ArgDump = false;
117  StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx,
118  CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
119  size_t pTag = CurFmt.find_last_of("%");
120  if (pTag != StringRef::npos) {
121  ArgDump = true;
122  while (pTag && CurFmt[--pTag] == '%') {
123  ArgDump = !ArgDump;
124  }
125  }
126 
127  if (ArgDump)
128  OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]);
129 
130  PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx;
131  }
132 }
133 
134 bool AMDGPUPrintfRuntimeBindingImpl::shouldPrintAsStr(char Specifier,
135  Type *OpType) const {
136  if (Specifier != 's')
137  return false;
138  const PointerType *PT = dyn_cast<PointerType>(OpType);
139  if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
140  return false;
141  Type *ElemType = PT->getContainedType(0);
142  if (ElemType->getTypeID() != Type::IntegerTyID)
143  return false;
144  IntegerType *ElemIType = cast<IntegerType>(ElemType);
145  return ElemIType->getBitWidth() == 8;
146 }
147 
148 bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
149  LLVMContext &Ctx = M.getContext();
150  IRBuilder<> Builder(Ctx);
151  Type *I32Ty = Type::getInt32Ty(Ctx);
152  unsigned UniqID = 0;
153  // NB: This is important for this string size to be divisible by 4
154  const char NonLiteralStr[4] = "???";
155 
156  for (auto CI : Printfs) {
157  unsigned NumOps = CI->arg_size();
158 
159  SmallString<16> OpConvSpecifiers;
160  Value *Op = CI->getArgOperand(0);
161 
162  if (auto LI = dyn_cast<LoadInst>(Op)) {
163  Op = LI->getPointerOperand();
164  for (auto Use : Op->users()) {
165  if (auto SI = dyn_cast<StoreInst>(Use)) {
166  Op = SI->getValueOperand();
167  break;
168  }
169  }
170  }
171 
172  if (auto I = dyn_cast<Instruction>(Op)) {
173  Value *Op_simplified =
174  simplify(I, &GetTLI(*I->getFunction()), &GetDT(*I->getFunction()));
175  if (Op_simplified)
176  Op = Op_simplified;
177  }
178 
179  ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op);
180 
181  if (ConstExpr) {
182  GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
183 
184  StringRef Str("unknown");
185  if (GVar && GVar->hasInitializer()) {
186  auto *Init = GVar->getInitializer();
187  if (auto *CA = dyn_cast<ConstantDataArray>(Init)) {
188  if (CA->isString())
189  Str = CA->getAsCString();
190  } else if (isa<ConstantAggregateZero>(Init)) {
191  Str = "";
192  }
193  //
194  // we need this call to ascertain
195  // that we are printing a string
196  // or a pointer. It takes out the
197  // specifiers and fills up the first
198  // arg
199  getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1);
200  }
201  // Add metadata for the string
202  std::string AStreamHolder;
203  raw_string_ostream Sizes(AStreamHolder);
204  int Sum = DWORD_ALIGN;
205  Sizes << CI->arg_size() - 1;
206  Sizes << ':';
207  for (unsigned ArgCount = 1;
208  ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
209  ArgCount++) {
210  Value *Arg = CI->getArgOperand(ArgCount);
211  Type *ArgType = Arg->getType();
212  unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType);
213  ArgSize = ArgSize / 8;
214  //
215  // ArgSize by design should be a multiple of DWORD_ALIGN,
216  // expand the arguments that do not follow this rule.
217  //
218  if (ArgSize % DWORD_ALIGN != 0) {
219  llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx);
220  auto *LLVMVecType = llvm::dyn_cast<llvm::FixedVectorType>(ArgType);
221  int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1;
222  if (LLVMVecType && NumElem > 1)
223  ResType = llvm::FixedVectorType::get(ResType, NumElem);
224  Builder.SetInsertPoint(CI);
225  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
226  if (OpConvSpecifiers[ArgCount - 1] == 'x' ||
227  OpConvSpecifiers[ArgCount - 1] == 'X' ||
228  OpConvSpecifiers[ArgCount - 1] == 'u' ||
229  OpConvSpecifiers[ArgCount - 1] == 'o')
230  Arg = Builder.CreateZExt(Arg, ResType);
231  else
232  Arg = Builder.CreateSExt(Arg, ResType);
233  ArgType = Arg->getType();
234  ArgSize = TD->getTypeAllocSizeInBits(ArgType);
235  ArgSize = ArgSize / 8;
236  CI->setOperand(ArgCount, Arg);
237  }
238  if (OpConvSpecifiers[ArgCount - 1] == 'f') {
239  ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg);
240  if (FpCons)
241  ArgSize = 4;
242  else {
243  FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg);
244  if (FpExt && FpExt->getType()->isDoubleTy() &&
245  FpExt->getOperand(0)->getType()->isFloatTy())
246  ArgSize = 4;
247  }
248  }
249  if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
250  if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
251  auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
252  if (GV && GV->hasInitializer()) {
253  Constant *Init = GV->getInitializer();
254  bool IsZeroValue = Init->isZeroValue();
255  auto *CA = dyn_cast<ConstantDataArray>(Init);
256  if (IsZeroValue || (CA && CA->isString())) {
257  size_t SizeStr =
258  IsZeroValue ? 1 : (strlen(CA->getAsCString().data()) + 1);
259  size_t Rem = SizeStr % DWORD_ALIGN;
260  size_t NSizeStr = 0;
261  LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr
262  << '\n');
263  if (Rem) {
264  NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
265  } else {
266  NSizeStr = SizeStr;
267  }
268  ArgSize = NSizeStr;
269  }
270  } else {
271  ArgSize = sizeof(NonLiteralStr);
272  }
273  } else {
274  ArgSize = sizeof(NonLiteralStr);
275  }
276  }
277  LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize
278  << " for type: " << *ArgType << '\n');
279  Sizes << ArgSize << ':';
280  Sum += ArgSize;
281  }
282  LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str()
283  << '\n');
284  for (char C : Str) {
285  // Rest of the C escape sequences (e.g. \') are handled correctly
286  // by the MDParser
287  switch (C) {
288  case '\a':
289  Sizes << "\\a";
290  break;
291  case '\b':
292  Sizes << "\\b";
293  break;
294  case '\f':
295  Sizes << "\\f";
296  break;
297  case '\n':
298  Sizes << "\\n";
299  break;
300  case '\r':
301  Sizes << "\\r";
302  break;
303  case '\v':
304  Sizes << "\\v";
305  break;
306  case ':':
307  // ':' cannot be scanned by Flex, as it is defined as a delimiter
308  // Replace it with it's octal representation \72
309  Sizes << "\\72";
310  break;
311  default:
312  Sizes << C;
313  break;
314  }
315  }
316 
317  // Insert the printf_alloc call
318  Builder.SetInsertPoint(CI);
319  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
320 
321  AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex,
322  Attribute::NoUnwind);
323 
324  Type *SizetTy = Type::getInt32Ty(Ctx);
325 
326  Type *Tys_alloc[1] = {SizetTy};
327  Type *I8Ty = Type::getInt8Ty(Ctx);
328  Type *I8Ptr = PointerType::get(I8Ty, 1);
329  FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false);
330  FunctionCallee PrintfAllocFn =
331  M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
332 
333  LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n');
334  std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str();
335  MDString *fmtStrArray = MDString::get(Ctx, fmtstr);
336 
337  // Instead of creating global variables, the
338  // printf format strings are extracted
339  // and passed as metadata. This avoids
340  // polluting llvm's symbol tables in this module.
341  // Metadata is going to be extracted
342  // by the backend passes and inserted
343  // into the OpenCL binary as appropriate.
344  StringRef amd("llvm.printf.fmts");
345  NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd);
346  MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
347  metaD->addOperand(myMD);
348  Value *sumC = ConstantInt::get(SizetTy, Sum, false);
349  SmallVector<Value *, 1> alloc_args;
350  alloc_args.push_back(sumC);
351  CallInst *pcall =
352  CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI);
353 
354  //
355  // Insert code to split basicblock with a
356  // piece of hammock code.
357  // basicblock splits after buffer overflow check
358  //
359  ConstantPointerNull *zeroIntPtr =
361  auto *cmp = cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, ""));
362  if (!CI->use_empty()) {
363  Value *result =
364  Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res");
365  CI->replaceAllUsesWith(result);
366  }
367  SplitBlock(CI->getParent(), cmp);
368  Instruction *Brnch =
369  SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false);
370 
371  Builder.SetInsertPoint(Brnch);
372 
373  // store unique printf id in the buffer
374  //
375  GetElementPtrInst *BufferIdx = GetElementPtrInst::Create(
376  I8Ty, pcall, ConstantInt::get(Ctx, APInt(32, 0)), "PrintBuffID",
377  Brnch);
378 
379  Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS);
380  Value *id_gep_cast =
381  new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch);
382 
383  new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast, Brnch);
384 
385  // 1st 4 bytes hold the printf_id
386  // the following GEP is the buffer pointer
387  BufferIdx = GetElementPtrInst::Create(
388  I8Ty, pcall, ConstantInt::get(Ctx, APInt(32, 4)), "PrintBuffGep",
389  Brnch);
390 
391  Type *Int32Ty = Type::getInt32Ty(Ctx);
392  Type *Int64Ty = Type::getInt64Ty(Ctx);
393  for (unsigned ArgCount = 1;
394  ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
395  ArgCount++) {
396  Value *Arg = CI->getArgOperand(ArgCount);
397  Type *ArgType = Arg->getType();
398  SmallVector<Value *, 32> WhatToStore;
399  if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) {
400  Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty;
401  if (OpConvSpecifiers[ArgCount - 1] == 'f') {
402  if (auto *FpCons = dyn_cast<ConstantFP>(Arg)) {
403  APFloat Val(FpCons->getValueAPF());
404  bool Lost = false;
405  Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
406  &Lost);
407  Arg = ConstantFP::get(Ctx, Val);
408  IType = Int32Ty;
409  } else if (auto *FpExt = dyn_cast<FPExtInst>(Arg)) {
410  if (FpExt->getType()->isDoubleTy() &&
411  FpExt->getOperand(0)->getType()->isFloatTy()) {
412  Arg = FpExt->getOperand(0);
413  IType = Int32Ty;
414  }
415  }
416  }
417  Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch);
418  WhatToStore.push_back(Arg);
419  } else if (ArgType->getTypeID() == Type::PointerTyID) {
420  if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
421  const char *S = NonLiteralStr;
422  if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
423  auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
424  if (GV && GV->hasInitializer()) {
425  Constant *Init = GV->getInitializer();
426  bool IsZeroValue = Init->isZeroValue();
427  auto *CA = dyn_cast<ConstantDataArray>(Init);
428  if (IsZeroValue || (CA && CA->isString())) {
429  S = IsZeroValue ? "" : CA->getAsCString().data();
430  }
431  }
432  }
433  size_t SizeStr = strlen(S) + 1;
434  size_t Rem = SizeStr % DWORD_ALIGN;
435  size_t NSizeStr = 0;
436  if (Rem) {
437  NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
438  } else {
439  NSizeStr = SizeStr;
440  }
441  if (S[0]) {
442  char *MyNewStr = new char[NSizeStr]();
443  strcpy(MyNewStr, S);
444  int NumInts = NSizeStr / 4;
445  int CharC = 0;
446  while (NumInts) {
447  int ANum = *(int *)(MyNewStr + CharC);
448  CharC += 4;
449  NumInts--;
450  Value *ANumV = ConstantInt::get(Int32Ty, ANum, false);
451  WhatToStore.push_back(ANumV);
452  }
453  delete[] MyNewStr;
454  } else {
455  // Empty string, give a hint to RT it is no NULL
456  Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false);
457  WhatToStore.push_back(ANumV);
458  }
459  } else {
460  uint64_t Size = TD->getTypeAllocSizeInBits(ArgType);
461  assert((Size == 32 || Size == 64) && "unsupported size");
462  Type *DstType = (Size == 32) ? Int32Ty : Int64Ty;
463  Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch);
464  WhatToStore.push_back(Arg);
465  }
466  } else if (isa<FixedVectorType>(ArgType)) {
467  Type *IType = nullptr;
468  uint32_t EleCount = cast<FixedVectorType>(ArgType)->getNumElements();
469  uint32_t EleSize = ArgType->getScalarSizeInBits();
470  uint32_t TotalSize = EleCount * EleSize;
471  if (EleCount == 3) {
472  ShuffleVectorInst *Shuffle =
473  new ShuffleVectorInst(Arg, Arg, ArrayRef<int>{0, 1, 2, 2});
474  Shuffle->insertBefore(Brnch);
475  Arg = Shuffle;
476  ArgType = Arg->getType();
477  TotalSize += EleSize;
478  }
479  switch (EleSize) {
480  default:
481  EleCount = TotalSize / 64;
482  IType = Type::getInt64Ty(ArgType->getContext());
483  break;
484  case 8:
485  if (EleCount >= 8) {
486  EleCount = TotalSize / 64;
487  IType = Type::getInt64Ty(ArgType->getContext());
488  } else if (EleCount >= 3) {
489  EleCount = 1;
490  IType = Type::getInt32Ty(ArgType->getContext());
491  } else {
492  EleCount = 1;
493  IType = Type::getInt16Ty(ArgType->getContext());
494  }
495  break;
496  case 16:
497  if (EleCount >= 3) {
498  EleCount = TotalSize / 64;
499  IType = Type::getInt64Ty(ArgType->getContext());
500  } else {
501  EleCount = 1;
502  IType = Type::getInt32Ty(ArgType->getContext());
503  }
504  break;
505  }
506  if (EleCount > 1) {
507  IType = FixedVectorType::get(IType, EleCount);
508  }
509  Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch);
510  WhatToStore.push_back(Arg);
511  } else {
512  WhatToStore.push_back(Arg);
513  }
514  for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) {
515  Value *TheBtCast = WhatToStore[I];
516  unsigned ArgSize =
517  TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8;
518  SmallVector<Value *, 1> BuffOffset;
519  BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize));
520 
521  Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1);
522  Value *CastedGEP =
523  new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch);
524  StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch);
525  LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
526  << *StBuff << '\n');
527  (void)StBuff;
528  if (I + 1 == E && ArgCount + 1 == CI->arg_size())
529  break;
530  BufferIdx = GetElementPtrInst::Create(I8Ty, BufferIdx, BuffOffset,
531  "PrintBuffNextPtr", Brnch);
532  LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"
533  << *BufferIdx << '\n');
534  }
535  }
536  }
537  }
538 
539  // erase the printf calls
540  for (auto CI : Printfs)
541  CI->eraseFromParent();
542 
543  Printfs.clear();
544  return true;
545 }
546 
548  Triple TT(M.getTargetTriple());
549  if (TT.getArch() == Triple::r600)
550  return false;
551 
552  auto PrintfFunction = M.getFunction("printf");
553  if (!PrintfFunction)
554  return false;
555 
556  for (auto &U : PrintfFunction->uses()) {
557  if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
558  if (CI->isCallee(&U))
559  Printfs.push_back(CI);
560  }
561  }
562 
563  if (Printfs.empty())
564  return false;
565 
566  TD = &M.getDataLayout();
567 
568  return lowerPrintfForGpu(M);
569 }
570 
571 bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
572  auto GetDT = [this](Function &F) -> DominatorTree & {
573  return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
574  };
575  auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
576  return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
577  };
578 
579  return AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
580 }
581 
586  auto GetDT = [&FAM](Function &F) -> DominatorTree & {
588  };
589  auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
591  };
592  bool Changed = AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
593  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
594 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
binding
amdgpu printf runtime binding
Definition: AMDGPUPrintfRuntimeBinding.cpp:87
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::NamedMDNode
A tuple of MDNodes.
Definition: Metadata.h:1572
llvm::ARM::PredBlockMask::TT
@ TT
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
simplify
hexagon bit simplify
Definition: HexagonBitSimplify.cpp:289
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::Function
Definition: Function.h:60
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:632
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5212
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:83
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:179
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AttributeList
Definition: Attributes.h:425
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:298
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", false, false) INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::StringRef::substr
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:615
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::ConstantPointerNull
A constant pointer value that points to null.
Definition: Constants.h:535
llvm::NamedMDNode::addOperand
void addOperand(MDNode *M)
Definition: Metadata.cpp:1228
llvm::StringRef::find_last_of
LLVM_NODISCARD size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Definition: StringRef.h:440
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
cmp
< i32 >< i32 > cmp
Definition: README.txt:1447
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TargetLibraryAnalysis::run
TargetLibraryInfo run(const Function &F, FunctionAnalysisManager &)
Definition: TargetLibraryInfo.cpp:1918
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::SmallString< 16 >
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:101
llvm::createAMDGPUPrintfRuntimeBinding
ModulePass * createAMDGPUPrintfRuntimeBinding()
Definition: AMDGPUPrintfRuntimeBinding.cpp:93
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::APFloat
Definition: APFloat.h:701
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:368
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:916
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::simplifyInstruction
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6478
llvm::AMDGPUPrintfRuntimeBindingID
char & AMDGPUPrintfRuntimeBindingID
Definition: AMDGPUPrintfRuntimeBinding.cpp:90
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
Triple.h
llvm::ArrayRef< int >
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:365
AMDGPU.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
uint32_t
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5161
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::FPExtInst
This class represents an extension of floating point types.
Definition: Instructions.h:4923
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::Init
Definition: Record.h:281
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:148
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
llvm::AMDGPU::HSAMD::Key::Printf
constexpr char Printf[]
Key for HSA::Metadata::mPrintf.
Definition: AMDGPUMetadata.h:430
llvm::initializeAMDGPUPrintfRuntimeBindingPass
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:1995
Instructions.h
Dominators.h
InstructionSimplify.h
llvm::IntegerType::getBitWidth
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::SmallVectorImpl< char >
lowering
amdgpu printf runtime AMDGPU Printf lowering
Definition: AMDGPUPrintfRuntimeBinding.cpp:88
DWORD_ALIGN
#define DWORD_ALIGN
Definition: AMDGPUPrintfRuntimeBinding.cpp:34
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:937
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
llvm::StringRef::find_first_of
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:414
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1446
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
BasicBlockUtils.h
llvm::MDString
A single uniqued string.
Definition: Metadata.h:612
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:837
InitializePasses.h
llvm::Type::getContainedType
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:338
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38