LLVM  14.0.0git
AMDGPUEmitPrintf.cpp
Go to the documentation of this file.
1 //===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Utility function to lower a printf call into a series of device
10 // library calls on the AMDGPU target.
11 //
12 // WARNING: This file knows about certain library functions. It recognizes them
13 // by name, and hardwires knowledge of their semantics.
14 //
15 //===----------------------------------------------------------------------===//
16 
20 
21 using namespace llvm;
22 
23 #define DEBUG_TYPE "amdgpu-emit-printf"
24 
25 static bool isCString(const Value *Arg) {
26  auto Ty = Arg->getType();
27  auto PtrTy = dyn_cast<PointerType>(Ty);
28  if (!PtrTy)
29  return false;
30 
31  auto IntTy = dyn_cast<IntegerType>(PtrTy->getElementType());
32  if (!IntTy)
33  return false;
34 
35  return IntTy->getBitWidth() == 8;
36 }
37 
39  auto Int64Ty = Builder.getInt64Ty();
40  auto Ty = Arg->getType();
41 
42  if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
43  switch (IntTy->getBitWidth()) {
44  case 32:
45  return Builder.CreateZExt(Arg, Int64Ty);
46  case 64:
47  return Arg;
48  }
49  }
50 
51  if (Ty->getTypeID() == Type::DoubleTyID) {
52  return Builder.CreateBitCast(Arg, Int64Ty);
53  }
54 
55  if (isa<PointerType>(Ty)) {
56  return Builder.CreatePtrToInt(Arg, Int64Ty);
57  }
58 
59  llvm_unreachable("unexpected type");
60 }
61 
63  auto Int64Ty = Builder.getInt64Ty();
64  auto M = Builder.GetInsertBlock()->getModule();
65  auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
66  if (!M->getModuleFlag("amdgpu_hostcall")) {
67  M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
68  }
69  return Builder.CreateCall(Fn, Version);
70 }
71 
72 static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
73  Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
74  Value *Arg4, Value *Arg5, Value *Arg6,
75  bool IsLast) {
76  auto Int64Ty = Builder.getInt64Ty();
77  auto Int32Ty = Builder.getInt32Ty();
78  auto M = Builder.GetInsertBlock()->getModule();
79  auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
80  Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
81  Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
82  auto IsLastValue = Builder.getInt32(IsLast);
83  auto NumArgsValue = Builder.getInt32(NumArgs);
84  return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
85  Arg4, Arg5, Arg6, IsLastValue});
86 }
87 
89  bool IsLast) {
90  auto Arg0 = fitArgInto64Bits(Builder, Arg);
91  auto Zero = Builder.getInt64(0);
92  return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
93  Zero, IsLast);
94 }
95 
96 // The device library does not provide strlen, so we build our own loop
97 // here. While we are at it, we also include the terminating null in the length.
99  auto *Prev = Builder.GetInsertBlock();
100  Module *M = Prev->getModule();
101 
102  auto CharZero = Builder.getInt8(0);
103  auto One = Builder.getInt64(1);
104  auto Zero = Builder.getInt64(0);
105  auto Int64Ty = Builder.getInt64Ty();
106 
107  // The length is either zero for a null pointer, or the computed value for an
108  // actual string. We need a join block for a phi that represents the final
109  // value.
110  //
111  // Strictly speaking, the zero does not matter since
112  // __ockl_printf_append_string_n ignores the length if the pointer is null.
113  BasicBlock *Join = nullptr;
114  if (Prev->getTerminator()) {
115  Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
116  "strlen.join");
117  Prev->getTerminator()->eraseFromParent();
118  } else {
119  Join = BasicBlock::Create(M->getContext(), "strlen.join",
120  Prev->getParent());
121  }
122  BasicBlock *While =
123  BasicBlock::Create(M->getContext(), "strlen.while",
124  Prev->getParent(), Join);
125  BasicBlock *WhileDone = BasicBlock::Create(
126  M->getContext(), "strlen.while.done",
127  Prev->getParent(), Join);
128 
129  // Emit an early return for when the pointer is null.
130  Builder.SetInsertPoint(Prev);
131  auto CmpNull =
132  Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
133  BranchInst::Create(Join, While, CmpNull, Prev);
134 
135  // Entry to the while loop.
136  Builder.SetInsertPoint(While);
137 
138  auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
139  PtrPhi->addIncoming(Str, Prev);
140  auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One);
141  PtrPhi->addIncoming(PtrNext, While);
142 
143  // Condition for the while loop.
144  auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi);
145  auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
146  Builder.CreateCondBr(Cmp, WhileDone, While);
147 
148  // Add one to the computed length.
149  Builder.SetInsertPoint(WhileDone, WhileDone->begin());
150  auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
151  auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
152  auto Len = Builder.CreateSub(End, Begin);
153  Len = Builder.CreateAdd(Len, One);
154 
155  // Final join.
156  BranchInst::Create(Join, WhileDone);
157  Builder.SetInsertPoint(Join, Join->begin());
158  auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
159  LenPhi->addIncoming(Len, WhileDone);
160  LenPhi->addIncoming(Zero, Prev);
161 
162  return LenPhi;
163 }
164 
166  Value *Length, bool isLast) {
167  auto Int64Ty = Builder.getInt64Ty();
168  auto CharPtrTy = Builder.getInt8PtrTy();
169  auto Int32Ty = Builder.getInt32Ty();
170  auto M = Builder.GetInsertBlock()->getModule();
171  auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
172  Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
173  auto IsLastInt32 = Builder.getInt32(isLast);
174  return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
175 }
176 
178  bool IsLast) {
179  auto Length = getStrlenWithNull(Builder, Arg);
180  return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
181 }
182 
184  bool SpecIsCString, bool IsLast) {
185  if (SpecIsCString && isCString(Arg)) {
186  return appendString(Builder, Desc, Arg, IsLast);
187  }
188  // If the format specifies a string but the argument is not, the frontend will
189  // have printed a warning. We just rely on undefined behaviour and send the
190  // argument anyway.
191  return appendArg(Builder, Desc, Arg, IsLast);
192 }
193 
194 // Scan the format string to locate all specifiers, and mark the ones that
195 // specify a string, i.e, the "%s" specifier with optional '*' characters.
196 static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
197  StringRef Str;
198  if (!getConstantStringInfo(Fmt, Str) || Str.empty())
199  return;
200 
201  static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
202  size_t SpecPos = 0;
203  // Skip the first argument, the format string.
204  unsigned ArgIdx = 1;
205 
206  while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
207  if (Str[SpecPos + 1] == '%') {
208  SpecPos += 2;
209  continue;
210  }
211  auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
212  if (SpecEnd == StringRef::npos)
213  return;
214  auto Spec = Str.slice(SpecPos, SpecEnd + 1);
215  ArgIdx += Spec.count('*');
216  if (Str[SpecEnd] == 's') {
217  BV.set(ArgIdx);
218  }
219  SpecPos = SpecEnd + 1;
220  ++ArgIdx;
221  }
222 }
223 
226  auto NumOps = Args.size();
227  assert(NumOps >= 1);
228 
229  auto Fmt = Args[0];
230  SparseBitVector<8> SpecIsCString;
231  locateCStrings(SpecIsCString, Fmt);
232 
233  auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
234  Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
235 
236  // FIXME: This invokes hostcall once for each argument. We can pack up to
237  // seven scalar printf arguments in a single hostcall. See the signature of
238  // callAppendArgs().
239  for (unsigned int i = 1; i != NumOps; ++i) {
240  bool IsLast = i == NumOps - 1;
241  bool IsCString = SpecIsCString.test(i);
242  Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
243  }
244 
245  return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
246 }
i
i
Definition: README.txt:29
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
isCString
static bool isCString(const Value *Arg)
Definition: AMDGPUEmitPrintf.cpp:25
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
callPrintfBegin
static Value * callPrintfBegin(IRBuilder<> &Builder, Value *Version)
Definition: AMDGPUEmitPrintf.cpp:62
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:59
processArg
static Value * processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, bool SpecIsCString, bool IsLast)
Definition: AMDGPUEmitPrintf.cpp:183
llvm::IRBuilder<>
ValueTracking.h
AMDGPUEmitPrintf.h
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:385
llvm::Module::Override
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition: Module.h:136
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
SparseBitVector.h
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:185
appendString
static Value * appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, bool IsLast)
Definition: AMDGPUEmitPrintf.cpp:177
llvm::SparseBitVector
Definition: SparseBitVector.h:255
locateCStrings
static void locateCStrings(SparseBitVector< 8 > &BV, Value *Fmt)
Definition: AMDGPUEmitPrintf.cpp:196
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
appendArg
static Value * appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, bool IsLast)
Definition: AMDGPUEmitPrintf.cpp:88
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::SparseBitVector::set
void set(unsigned Idx)
Definition: SparseBitVector.h:507
llvm::getConstantStringInfo
bool getConstantStringInfo(const Value *V, StringRef &Str, uint64_t Offset=0, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
Definition: ValueTracking.cpp:4112
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3148
llvm::SparseBitVector::test
bool test(unsigned Idx) const
Definition: SparseBitVector.h:471
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
getStrlenWithNull
static Value * getStrlenWithNull(IRBuilder<> &Builder, Value *Str)
Definition: AMDGPUEmitPrintf.cpp:98
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
fitArgInto64Bits
static Value * fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg)
Definition: AMDGPUEmitPrintf.cpp:38
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
Version
uint64_t Version
Definition: RawMemProfReader.cpp:25
callAppendArgs
static Value * callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs, Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3, Value *Arg4, Value *Arg5, Value *Arg6, bool IsLast)
Definition: AMDGPUEmitPrintf.cpp:72
callAppendStringN
static Value * callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str, Value *Length, bool isLast)
Definition: AMDGPUEmitPrintf.cpp:165
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::emitAMDGPUPrintfCall
Value * emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef< Value * > Args)
Definition: AMDGPUEmitPrintf.cpp:224