LLVM 22.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/DenseSet.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/ADT/Twine.h"
52#include "llvm/IR/Argument.h"
53#include "llvm/IR/Attributes.h"
54#include "llvm/IR/BasicBlock.h"
55#include "llvm/IR/Constant.h"
56#include "llvm/IR/Constants.h"
57#include "llvm/IR/DataLayout.h"
58#include "llvm/IR/DebugInfo.h"
60#include "llvm/IR/DebugLoc.h"
62#include "llvm/IR/Function.h"
63#include "llvm/IR/GlobalAlias.h"
64#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/Instruction.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/User.h"
72#include "llvm/MC/MCExpr.h"
73#include "llvm/MC/MCInst.h"
74#include "llvm/MC/MCInstrDesc.h"
75#include "llvm/MC/MCStreamer.h"
76#include "llvm/MC/MCSymbol.h"
81#include "llvm/Support/Endian.h"
88#include <cassert>
89#include <cstdint>
90#include <cstring>
91#include <string>
92#include <utility>
93
94using namespace llvm;
95
96#define DEPOTNAME "__local_depot"
97
98/// discoverDependentGlobals - Return a set of GlobalVariables on which \p V
99/// depends.
100static void
103 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
104 Globals.insert(GV);
105 return;
106 }
107
108 if (const User *U = dyn_cast<User>(V))
109 for (const auto &O : U->operands())
110 discoverDependentGlobals(O, Globals);
111}
112
113/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
114/// instances to be emitted, but only after any dependents have been added
115/// first.s
116static void
121 // Have we already visited this one?
122 if (Visited.count(GV))
123 return;
124
125 // Do we have a circular dependency?
126 if (!Visiting.insert(GV).second)
127 report_fatal_error("Circular dependency found in global variable set");
128
129 // Make sure we visit all dependents first
131 for (const auto &O : GV->operands())
132 discoverDependentGlobals(O, Others);
133
134 for (const GlobalVariable *GV : Others)
135 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
136
137 // Now we can visit ourself
138 Order.push_back(GV);
139 Visited.insert(GV);
140 Visiting.erase(GV);
141}
142
143void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
144 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
145 getSubtargetInfo().getFeatureBits());
146
147 MCInst Inst;
148 lowerToMCInst(MI, Inst);
150}
151
152void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
153 OutMI.setOpcode(MI->getOpcode());
154 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
155 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
156 const MachineOperand &MO = MI->getOperand(0);
157 OutMI.addOperand(GetSymbolRef(
158 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
159 return;
160 }
161
162 for (const auto MO : MI->operands())
163 OutMI.addOperand(lowerOperand(MO));
164}
165
166MCOperand NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
167 switch (MO.getType()) {
168 default:
169 llvm_unreachable("unknown operand type");
171 return MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
173 return MCOperand::createImm(MO.getImm());
178 return GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
180 return GetSymbolRef(getSymbol(MO.getGlobal()));
182 const ConstantFP *Cnt = MO.getFPImm();
183 const APFloat &Val = Cnt->getValueAPF();
184
185 switch (Cnt->getType()->getTypeID()) {
186 default:
187 report_fatal_error("Unsupported FP type");
188 break;
189 case Type::HalfTyID:
192 case Type::BFloatTyID:
195 case Type::FloatTyID:
198 case Type::DoubleTyID:
201 }
202 break;
203 }
204 }
205}
206
207unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
209 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
210
211 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
212 unsigned RegNum = RegMap[Reg];
213
214 // Encode the register class in the upper 4 bits
215 // Must be kept in sync with NVPTXInstPrinter::printRegName
216 unsigned Ret = 0;
217 if (RC == &NVPTX::B1RegClass) {
218 Ret = (1 << 28);
219 } else if (RC == &NVPTX::B16RegClass) {
220 Ret = (2 << 28);
221 } else if (RC == &NVPTX::B32RegClass) {
222 Ret = (3 << 28);
223 } else if (RC == &NVPTX::B64RegClass) {
224 Ret = (4 << 28);
225 } else if (RC == &NVPTX::B128RegClass) {
226 Ret = (7 << 28);
227 } else {
228 report_fatal_error("Bad register class");
229 }
230
231 // Insert the vreg number
232 Ret |= (RegNum & 0x0FFFFFFF);
233 return Ret;
234 } else {
235 // Some special-use registers are actually physical registers.
236 // Encode this as the register class ID of 0 and the real register ID.
237 return Reg & 0x0FFFFFFF;
238 }
239}
240
241MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
242 const MCExpr *Expr;
243 Expr = MCSymbolRefExpr::create(Symbol, OutContext);
244 return MCOperand::createExpr(Expr);
245}
246
247void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
248 const DataLayout &DL = getDataLayout();
249 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
250 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
251
252 Type *Ty = F->getReturnType();
253 if (Ty->getTypeID() == Type::VoidTyID)
254 return;
255 O << " (";
256
257 auto PrintScalarRetVal = [&](unsigned Size) {
258 O << ".param .b" << promoteScalarArgumentSize(Size) << " func_retval0";
259 };
260 if (shouldPassAsArray(Ty)) {
261 const unsigned TotalSize = DL.getTypeAllocSize(Ty);
262 const Align RetAlignment = TLI->getFunctionArgumentAlignment(
263 F, Ty, AttributeList::ReturnIndex, DL);
264 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
265 << TotalSize << "]";
266 } else if (Ty->isFloatingPointTy()) {
267 PrintScalarRetVal(Ty->getPrimitiveSizeInBits());
268 } else if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
269 PrintScalarRetVal(ITy->getBitWidth());
270 } else if (isa<PointerType>(Ty)) {
271 PrintScalarRetVal(TLI->getPointerTy(DL).getSizeInBits());
272 } else
273 llvm_unreachable("Unknown return type");
274 O << ") ";
275}
276
277void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
278 raw_ostream &O) {
279 const Function &F = MF.getFunction();
280 printReturnValStr(&F, O);
281}
282
283// Return true if MBB is the header of a loop marked with
284// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
285bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
286 const MachineBasicBlock &MBB) const {
287 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
288 // We insert .pragma "nounroll" only to the loop header.
289 if (!LI.isLoopHeader(&MBB))
290 return false;
291
292 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
293 // we iterate through each back edge of the loop with header MBB, and check
294 // whether its metadata contains llvm.loop.unroll.disable.
295 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
296 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
297 // Edges from other loops to MBB are not back edges.
298 continue;
299 }
300 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
301 if (MDNode *LoopID =
302 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
303 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
304 return true;
305 if (MDNode *UnrollCountMD =
306 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
307 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
308 ->isOne())
309 return true;
310 }
311 }
312 }
313 }
314 return false;
315}
316
317void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
319 if (isLoopHeaderOfNoUnroll(MBB))
320 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
321}
322
324 SmallString<128> Str;
325 raw_svector_ostream O(Str);
326
327 if (!GlobalsEmitted) {
328 emitGlobals(*MF->getFunction().getParent());
329 GlobalsEmitted = true;
330 }
331
332 // Set up
333 MRI = &MF->getRegInfo();
334 F = &MF->getFunction();
335 emitLinkageDirective(F, O);
336 if (isKernelFunction(*F))
337 O << ".entry ";
338 else {
339 O << ".func ";
340 printReturnValStr(*MF, O);
341 }
342
343 CurrentFnSym->print(O, MAI);
344
345 emitFunctionParamList(F, O);
346 O << "\n";
347
348 if (isKernelFunction(*F))
349 emitKernelFunctionDirectives(*F, O);
350
352 O << ".noreturn";
353
354 OutStreamer->emitRawText(O.str());
355
356 VRegMapping.clear();
357 // Emit open brace for function body.
358 OutStreamer->emitRawText(StringRef("{\n"));
359 setAndEmitFunctionVirtualRegisters(*MF);
360 encodeDebugInfoRegisterNumbers(*MF);
361 // Emit initial .loc debug directive for correct relocation symbol data.
362 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
363 assert(SP->getUnit());
364 if (!SP->getUnit()->isDebugDirectivesOnly())
366 }
367}
368
370 bool Result = AsmPrinter::runOnMachineFunction(F);
371 // Emit closing brace for the body of function F.
372 // The closing brace must be emitted here because we need to emit additional
373 // debug labels/data after the last basic block.
374 // We need to emit the closing brace here because we don't have function that
375 // finished emission of the function body.
376 OutStreamer->emitRawText(StringRef("}\n"));
377 return Result;
378}
379
382 raw_svector_ostream O(Str);
383 emitDemotedVars(&MF->getFunction(), O);
384 OutStreamer->emitRawText(O.str());
385}
386
388 VRegMapping.clear();
389}
390
394 return OutContext.getOrCreateSymbol(Str);
395}
396
397void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
398 Register RegNo = MI->getOperand(0).getReg();
399 if (RegNo.isVirtual()) {
400 OutStreamer->AddComment(Twine("implicit-def: ") +
402 } else {
403 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
404 OutStreamer->AddComment(Twine("implicit-def: ") +
405 STI.getRegisterInfo()->getName(RegNo));
406 }
407 OutStreamer->addBlankLine();
408}
409
410void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
411 raw_ostream &O) const {
412 // If the NVVM IR has some of reqntid* specified, then output
413 // the reqntid directive, and set the unspecified ones to 1.
414 // If none of Reqntid* is specified, don't output reqntid directive.
415 const auto ReqNTID = getReqNTID(F);
416 if (!ReqNTID.empty())
417 O << formatv(".reqntid {0:$[, ]}\n",
418 make_range(ReqNTID.begin(), ReqNTID.end()));
419
420 const auto MaxNTID = getMaxNTID(F);
421 if (!MaxNTID.empty())
422 O << formatv(".maxntid {0:$[, ]}\n",
423 make_range(MaxNTID.begin(), MaxNTID.end()));
424
425 if (const auto Mincta = getMinCTASm(F))
426 O << ".minnctapersm " << *Mincta << "\n";
427
428 if (const auto Maxnreg = getMaxNReg(F))
429 O << ".maxnreg " << *Maxnreg << "\n";
430
431 // .maxclusterrank directive requires SM_90 or higher, make sure that we
432 // filter it out for lower SM versions, as it causes a hard ptxas crash.
433 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
434 const NVPTXSubtarget *STI = &NTM.getSubtarget<NVPTXSubtarget>(F);
435
436 if (STI->getSmVersion() >= 90) {
437 const auto ClusterDim = getClusterDim(F);
438 const bool BlocksAreClusters = hasBlocksAreClusters(F);
439
440 if (!ClusterDim.empty()) {
441
442 if (!BlocksAreClusters)
443 O << ".explicitcluster\n";
444
445 if (ClusterDim[0] != 0) {
446 assert(llvm::all_of(ClusterDim, [](unsigned D) { return D != 0; }) &&
447 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
448 "should be non-zero as well");
449
450 O << formatv(".reqnctapercluster {0:$[, ]}\n",
451 make_range(ClusterDim.begin(), ClusterDim.end()));
452 } else {
453 assert(llvm::all_of(ClusterDim, [](unsigned D) { return D == 0; }) &&
454 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
455 "should be 0 as well");
456 }
457 }
458
459 if (BlocksAreClusters) {
460 LLVMContext &Ctx = F.getContext();
461 if (ReqNTID.empty() || ClusterDim.empty())
462 Ctx.diagnose(DiagnosticInfoUnsupported(
463 F, "blocksareclusters requires reqntid and cluster_dim attributes",
464 F.getSubprogram()));
465 else if (STI->getPTXVersion() < 90)
466 Ctx.diagnose(DiagnosticInfoUnsupported(
467 F, "blocksareclusters requires PTX version >= 9.0",
468 F.getSubprogram()));
469 else
470 O << ".blocksareclusters\n";
471 }
472
473 if (const auto Maxclusterrank = getMaxClusterRank(F))
474 O << ".maxclusterrank " << *Maxclusterrank << "\n";
475 }
476}
477
478std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
479 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
480
481 std::string Name;
482 raw_string_ostream NameStr(Name);
483
484 VRegRCMap::const_iterator I = VRegMapping.find(RC);
485 assert(I != VRegMapping.end() && "Bad register class");
486 const DenseMap<unsigned, unsigned> &RegMap = I->second;
487
488 VRegMap::const_iterator VI = RegMap.find(Reg);
489 assert(VI != RegMap.end() && "Bad virtual register");
490 unsigned MappedVR = VI->second;
491
492 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
493
494 return Name;
495}
496
497void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
498 raw_ostream &O) {
499 O << getVirtualRegisterName(vr);
500}
501
502void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
503 raw_ostream &O) {
505 if (!F || isKernelFunction(*F) || F->isDeclaration())
507 "NVPTX aliasee must be a non-kernel function definition");
508
509 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
511 report_fatal_error("NVPTX aliasee must not be '.weak'");
512
513 emitDeclarationWithName(F, getSymbol(GA), O);
514}
515
516void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
517 emitDeclarationWithName(F, getSymbol(F), O);
518}
519
520void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
521 raw_ostream &O) {
522 emitLinkageDirective(F, O);
523 if (isKernelFunction(*F))
524 O << ".entry ";
525 else
526 O << ".func ";
527 printReturnValStr(F, O);
528 S->print(O, MAI);
529 O << "\n";
530 emitFunctionParamList(F, O);
531 O << "\n";
533 O << ".noreturn";
534 O << ";\n";
535}
536
537static bool usedInGlobalVarDef(const Constant *C) {
538 if (!C)
539 return false;
540
542 return GV->getName() != "llvm.used";
543
544 for (const User *U : C->users())
545 if (const Constant *C = dyn_cast<Constant>(U))
547 return true;
548
549 return false;
550}
551
552static bool usedInOneFunc(const User *U, Function const *&OneFunc) {
553 if (const GlobalVariable *OtherGV = dyn_cast<GlobalVariable>(U))
554 if (OtherGV->getName() == "llvm.used")
555 return true;
556
557 if (const Instruction *I = dyn_cast<Instruction>(U)) {
558 if (const Function *CurFunc = I->getFunction()) {
559 if (OneFunc && (CurFunc != OneFunc))
560 return false;
561 OneFunc = CurFunc;
562 return true;
563 }
564 return false;
565 }
566
567 for (const User *UU : U->users())
568 if (!usedInOneFunc(UU, OneFunc))
569 return false;
570
571 return true;
572}
573
574/* Find out if a global variable can be demoted to local scope.
575 * Currently, this is valid for CUDA shared variables, which have local
576 * scope and global lifetime. So the conditions to check are :
577 * 1. Is the global variable in shared address space?
578 * 2. Does it have local linkage?
579 * 3. Is the global variable referenced only in one function?
580 */
581static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f) {
582 if (!GV->hasLocalLinkage())
583 return false;
585 return false;
586
587 const Function *oneFunc = nullptr;
588
589 bool flag = usedInOneFunc(GV, oneFunc);
590 if (!flag)
591 return false;
592 if (!oneFunc)
593 return false;
594 f = oneFunc;
595 return true;
596}
597
598static bool useFuncSeen(const Constant *C,
599 const SmallPtrSetImpl<const Function *> &SeenSet) {
600 for (const User *U : C->users()) {
601 if (const Constant *cu = dyn_cast<Constant>(U)) {
602 if (useFuncSeen(cu, SeenSet))
603 return true;
604 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
605 if (const Function *Caller = I->getFunction())
606 if (SeenSet.contains(Caller))
607 return true;
608 }
609 }
610 return false;
611}
612
613void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
614 SmallPtrSet<const Function *, 32> SeenSet;
615 for (const Function &F : M) {
616 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
617 emitDeclaration(&F, O);
618 continue;
619 }
620
621 if (F.isDeclaration()) {
622 if (F.use_empty())
623 continue;
624 if (F.getIntrinsicID())
625 continue;
626 emitDeclaration(&F, O);
627 continue;
628 }
629 for (const User *U : F.users()) {
630 if (const Constant *C = dyn_cast<Constant>(U)) {
631 if (usedInGlobalVarDef(C)) {
632 // The use is in the initialization of a global variable
633 // that is a function pointer, so print a declaration
634 // for the original function
635 emitDeclaration(&F, O);
636 break;
637 }
638 // Emit a declaration of this function if the function that
639 // uses this constant expr has already been seen.
640 if (useFuncSeen(C, SeenSet)) {
641 emitDeclaration(&F, O);
642 break;
643 }
644 }
645
646 if (!isa<Instruction>(U))
647 continue;
648 const Function *Caller = cast<Instruction>(U)->getFunction();
649 if (!Caller)
650 continue;
651
652 // If a caller has already been seen, then the caller is
653 // appearing in the module before the callee. so print out
654 // a declaration for the callee.
655 if (SeenSet.contains(Caller)) {
656 emitDeclaration(&F, O);
657 break;
658 }
659 }
660 SeenSet.insert(&F);
661 }
662 for (const GlobalAlias &GA : M.aliases())
663 emitAliasDeclaration(&GA, O);
664}
665
666void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
667 // Construct a default subtarget off of the TargetMachine defaults. The
668 // rest of NVPTX isn't friendly to change subtargets per function and
669 // so the default TargetMachine will have all of the options.
670 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
671 const NVPTXSubtarget *STI = NTM.getSubtargetImpl();
672 SmallString<128> Str1;
673 raw_svector_ostream OS1(Str1);
674
675 // Emit header before any dwarf directives are emitted below.
676 emitHeader(M, OS1, *STI);
677 OutStreamer->emitRawText(OS1.str());
678}
679
681 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
682 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
683 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
684 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
685
686 // We need to call the parent's one explicitly.
687 bool Result = AsmPrinter::doInitialization(M);
688
689 GlobalsEmitted = false;
690
691 return Result;
692}
693
694void NVPTXAsmPrinter::emitGlobals(const Module &M) {
695 SmallString<128> Str2;
696 raw_svector_ostream OS2(Str2);
697
698 emitDeclarations(M, OS2);
699
700 // As ptxas does not support forward references of globals, we need to first
701 // sort the list of module-level globals in def-use order. We visit each
702 // global variable in order, and ensure that we emit it *after* its dependent
703 // globals. We use a little extra memory maintaining both a set and a list to
704 // have fast searches while maintaining a strict ordering.
708
709 // Visit each global variable, in order
710 for (const GlobalVariable &I : M.globals())
711 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
712
713 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
714 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
715
716 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
717 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
718
719 // Print out module-level global variables in proper order
720 for (const GlobalVariable *GV : Globals)
721 printModuleLevelGV(GV, OS2, /*ProcessDemoted=*/false, STI);
722
723 OS2 << '\n';
724
725 OutStreamer->emitRawText(OS2.str());
726}
727
728void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
730 raw_svector_ostream OS(Str);
731
732 MCSymbol *Name = getSymbol(&GA);
733
734 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
735 << ";\n";
736
737 OutStreamer->emitRawText(OS.str());
738}
739
740void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
741 const NVPTXSubtarget &STI) {
742 const unsigned PTXVersion = STI.getPTXVersion();
743
744 O << "//\n"
745 "// Generated by LLVM NVPTX Back-End\n"
746 "//\n"
747 "\n"
748 << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"
749 << ".target " << STI.getTargetName();
750
751 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
752 if (NTM.getDrvInterface() == NVPTX::NVCL)
753 O << ", texmode_independent";
754
755 bool HasFullDebugInfo = false;
756 for (DICompileUnit *CU : M.debug_compile_units()) {
757 switch(CU->getEmissionKind()) {
760 break;
763 HasFullDebugInfo = true;
764 break;
765 }
766 if (HasFullDebugInfo)
767 break;
768 }
769 if (HasFullDebugInfo)
770 O << ", debug";
771
772 O << "\n"
773 << ".address_size " << (NTM.is64Bit() ? "64" : "32") << "\n"
774 << "\n";
775}
776
778 // If we did not emit any functions, then the global declarations have not
779 // yet been emitted.
780 if (!GlobalsEmitted) {
781 emitGlobals(M);
782 GlobalsEmitted = true;
783 }
784
785 // call doFinalization
786 bool ret = AsmPrinter::doFinalization(M);
787
789
790 auto *TS =
791 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
792 // Close the last emitted section
793 if (hasDebugInfo()) {
794 TS->closeLastSection();
795 // Emit empty .debug_macinfo section for better support of the empty files.
796 OutStreamer->emitRawText("\t.section\t.debug_macinfo\t{\t}");
797 }
798
799 // Output last DWARF .file directives, if any.
801
802 return ret;
803}
804
805// This function emits appropriate linkage directives for
806// functions and global variables.
807//
808// extern function declaration -> .extern
809// extern function definition -> .visible
810// external global variable with init -> .visible
811// external without init -> .extern
812// appending -> not allowed, assert.
813// for any linkage other than
814// internal, private, linker_private,
815// linker_private_weak, linker_private_weak_def_auto,
816// we emit -> .weak.
817
818void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
819 raw_ostream &O) {
820 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
821 if (V->hasExternalLinkage()) {
822 if (const auto *GVar = dyn_cast<GlobalVariable>(V))
823 O << (GVar->hasInitializer() ? ".visible " : ".extern ");
824 else if (V->isDeclaration())
825 O << ".extern ";
826 else
827 O << ".visible ";
828 } else if (V->hasAppendingLinkage()) {
829 report_fatal_error("Symbol '" + (V->hasName() ? V->getName() : "") +
830 "' has unsupported appending linkage type");
831 } else if (!V->hasInternalLinkage() && !V->hasPrivateLinkage()) {
832 O << ".weak ";
833 }
834 }
835}
836
837void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
838 raw_ostream &O, bool ProcessDemoted,
839 const NVPTXSubtarget &STI) {
840 // Skip meta data
841 if (GVar->hasSection())
842 if (GVar->getSection() == "llvm.metadata")
843 return;
844
845 // Skip LLVM intrinsic global variables
846 if (GVar->getName().starts_with("llvm.") ||
847 GVar->getName().starts_with("nvvm."))
848 return;
849
850 const DataLayout &DL = getDataLayout();
851
852 // GlobalVariables are always constant pointers themselves.
853 Type *ETy = GVar->getValueType();
854
855 if (GVar->hasExternalLinkage()) {
856 if (GVar->hasInitializer())
857 O << ".visible ";
858 else
859 O << ".extern ";
860 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
862 O << ".common ";
863 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
865 GVar->hasCommonLinkage()) {
866 O << ".weak ";
867 }
868
869 if (isTexture(*GVar)) {
870 O << ".global .texref " << getTextureName(*GVar) << ";\n";
871 return;
872 }
873
874 if (isSurface(*GVar)) {
875 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
876 return;
877 }
878
879 if (GVar->isDeclaration()) {
880 // (extern) declarations, no definition or initializer
881 // Currently the only known declaration is for an automatic __local
882 // (.shared) promoted to global.
883 emitPTXGlobalVariable(GVar, O, STI);
884 O << ";\n";
885 return;
886 }
887
888 if (isSampler(*GVar)) {
889 O << ".global .samplerref " << getSamplerName(*GVar);
890
891 const Constant *Initializer = nullptr;
892 if (GVar->hasInitializer())
893 Initializer = GVar->getInitializer();
894 const ConstantInt *CI = nullptr;
895 if (Initializer)
896 CI = dyn_cast<ConstantInt>(Initializer);
897 if (CI) {
898 unsigned sample = CI->getZExtValue();
899
900 O << " = { ";
901
902 for (int i = 0,
903 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
904 i < 3; i++) {
905 O << "addr_mode_" << i << " = ";
906 switch (addr) {
907 case 0:
908 O << "wrap";
909 break;
910 case 1:
911 O << "clamp_to_border";
912 break;
913 case 2:
914 O << "clamp_to_edge";
915 break;
916 case 3:
917 O << "wrap";
918 break;
919 case 4:
920 O << "mirror";
921 break;
922 }
923 O << ", ";
924 }
925 O << "filter_mode = ";
926 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
927 case 0:
928 O << "nearest";
929 break;
930 case 1:
931 O << "linear";
932 break;
933 case 2:
934 llvm_unreachable("Anisotropic filtering is not supported");
935 default:
936 O << "nearest";
937 break;
938 }
939 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
940 O << ", force_unnormalized_coords = 1";
941 }
942 O << " }";
943 }
944
945 O << ";\n";
946 return;
947 }
948
949 if (GVar->hasPrivateLinkage()) {
950 if (GVar->getName().starts_with("unrollpragma"))
951 return;
952
953 // FIXME - need better way (e.g. Metadata) to avoid generating this global
954 if (GVar->getName().starts_with("filename"))
955 return;
956 if (GVar->use_empty())
957 return;
958 }
959
960 const Function *DemotedFunc = nullptr;
961 if (!ProcessDemoted && canDemoteGlobalVar(GVar, DemotedFunc)) {
962 O << "// " << GVar->getName() << " has been demoted\n";
963 localDecls[DemotedFunc].push_back(GVar);
964 return;
965 }
966
967 O << ".";
968 emitPTXAddressSpace(GVar->getAddressSpace(), O);
969
970 if (isManaged(*GVar)) {
971 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
973 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
974 O << " .attribute(.managed)";
975 }
976
977 O << " .align "
978 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
979
980 if (ETy->isPointerTy() || ((ETy->isIntegerTy() || ETy->isFloatingPointTy()) &&
981 ETy->getScalarSizeInBits() <= 64)) {
982 O << " .";
983 // Special case: ABI requires that we use .u8 for predicates
984 if (ETy->isIntegerTy(1))
985 O << "u8";
986 else
987 O << getPTXFundamentalTypeStr(ETy, false);
988 O << " ";
989 getSymbol(GVar)->print(O, MAI);
990
991 // Ptx allows variable initilization only for constant and global state
992 // spaces.
993 if (GVar->hasInitializer()) {
994 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
996 const Constant *Initializer = GVar->getInitializer();
997 // 'undef' is treated as there is no value specified.
998 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
999 O << " = ";
1000 printScalarConstant(Initializer, O);
1001 }
1002 } else {
1003 // The frontend adds zero-initializer to device and constant variables
1004 // that don't have an initial value, and UndefValue to shared
1005 // variables, so skip warning for this case.
1006 if (!GVar->getInitializer()->isNullValue() &&
1007 !isa<UndefValue>(GVar->getInitializer())) {
1008 report_fatal_error("initial value of '" + GVar->getName() +
1009 "' is not allowed in addrspace(" +
1010 Twine(GVar->getAddressSpace()) + ")");
1011 }
1012 }
1013 }
1014 } else {
1015 // Although PTX has direct support for struct type and array type and
1016 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1017 // targets that support these high level field accesses. Structs, arrays
1018 // and vectors are lowered into arrays of bytes.
1019 switch (ETy->getTypeID()) {
1020 case Type::IntegerTyID: // Integers larger than 64 bits
1021 case Type::FP128TyID:
1022 case Type::StructTyID:
1023 case Type::ArrayTyID:
1024 case Type::FixedVectorTyID: {
1025 const uint64_t ElementSize = DL.getTypeStoreSize(ETy);
1026 // Ptx allows variable initilization only for constant and
1027 // global state spaces.
1028 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1029 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1030 GVar->hasInitializer()) {
1031 const Constant *Initializer = GVar->getInitializer();
1032 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1033 AggBuffer aggBuffer(ElementSize, *this);
1034 bufferAggregateConstant(Initializer, &aggBuffer);
1035 if (aggBuffer.numSymbols()) {
1036 const unsigned int ptrSize = MAI->getCodePointerSize();
1037 if (ElementSize % ptrSize ||
1038 !aggBuffer.allSymbolsAligned(ptrSize)) {
1039 // Print in bytes and use the mask() operator for pointers.
1040 if (!STI.hasMaskOperator())
1042 "initialized packed aggregate with pointers '" +
1043 GVar->getName() +
1044 "' requires at least PTX ISA version 7.1");
1045 O << " .u8 ";
1046 getSymbol(GVar)->print(O, MAI);
1047 O << "[" << ElementSize << "] = {";
1048 aggBuffer.printBytes(O);
1049 O << "}";
1050 } else {
1051 O << " .u" << ptrSize * 8 << " ";
1052 getSymbol(GVar)->print(O, MAI);
1053 O << "[" << ElementSize / ptrSize << "] = {";
1054 aggBuffer.printWords(O);
1055 O << "}";
1056 }
1057 } else {
1058 O << " .b8 ";
1059 getSymbol(GVar)->print(O, MAI);
1060 O << "[" << ElementSize << "] = {";
1061 aggBuffer.printBytes(O);
1062 O << "}";
1063 }
1064 } else {
1065 O << " .b8 ";
1066 getSymbol(GVar)->print(O, MAI);
1067 if (ElementSize)
1068 O << "[" << ElementSize << "]";
1069 }
1070 } else {
1071 O << " .b8 ";
1072 getSymbol(GVar)->print(O, MAI);
1073 if (ElementSize)
1074 O << "[" << ElementSize << "]";
1075 }
1076 break;
1077 }
1078 default:
1079 llvm_unreachable("type not supported yet");
1080 }
1081 }
1082 O << ";\n";
1083}
1084
1085void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1086 const Value *v = Symbols[nSym];
1087 const Value *v0 = SymbolsBeforeStripping[nSym];
1088 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1089 MCSymbol *Name = AP.getSymbol(GVar);
1091 // Is v0 a generic pointer?
1092 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1093 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1094 os << "generic(";
1095 Name->print(os, AP.MAI);
1096 os << ")";
1097 } else {
1098 Name->print(os, AP.MAI);
1099 }
1100 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1101 const MCExpr *Expr = AP.lowerConstantForGV(CExpr, false);
1102 AP.printMCExpr(*Expr, os);
1103 } else
1104 llvm_unreachable("symbol type unknown");
1105}
1106
1107void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1108 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1109 // Do not emit trailing zero initializers. They will be zero-initialized by
1110 // ptxas. This saves on both space requirements for the generated PTX and on
1111 // memory use by ptxas. (See:
1112 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1113 unsigned int InitializerCount = size;
1114 // TODO: symbols make this harder, but it would still be good to trim trailing
1115 // 0s for aggs with symbols as well.
1116 if (numSymbols() == 0)
1117 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1118 InitializerCount--;
1119
1120 symbolPosInBuffer.push_back(InitializerCount);
1121 unsigned int nSym = 0;
1122 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1123 for (unsigned int pos = 0; pos < InitializerCount;) {
1124 if (pos)
1125 os << ", ";
1126 if (pos != nextSymbolPos) {
1127 os << (unsigned int)buffer[pos];
1128 ++pos;
1129 continue;
1130 }
1131 // Generate a per-byte mask() operator for the symbol, which looks like:
1132 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1133 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1134 std::string symText;
1135 llvm::raw_string_ostream oss(symText);
1136 printSymbol(nSym, oss);
1137 for (unsigned i = 0; i < ptrSize; ++i) {
1138 if (i)
1139 os << ", ";
1140 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1141 os << "(" << symText << ")";
1142 }
1143 pos += ptrSize;
1144 nextSymbolPos = symbolPosInBuffer[++nSym];
1145 assert(nextSymbolPos >= pos);
1146 }
1147}
1148
1149void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1150 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1151 symbolPosInBuffer.push_back(size);
1152 unsigned int nSym = 0;
1153 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1154 assert(nextSymbolPos % ptrSize == 0);
1155 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1156 if (pos)
1157 os << ", ";
1158 if (pos == nextSymbolPos) {
1159 printSymbol(nSym, os);
1160 nextSymbolPos = symbolPosInBuffer[++nSym];
1161 assert(nextSymbolPos % ptrSize == 0);
1162 assert(nextSymbolPos >= pos + ptrSize);
1163 } else if (ptrSize == 4)
1164 os << support::endian::read32le(&buffer[pos]);
1165 else
1166 os << support::endian::read64le(&buffer[pos]);
1167 }
1168}
1169
1170void NVPTXAsmPrinter::emitDemotedVars(const Function *F, raw_ostream &O) {
1171 auto It = localDecls.find(F);
1172 if (It == localDecls.end())
1173 return;
1174
1175 ArrayRef<const GlobalVariable *> GVars = It->second;
1176
1177 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1178 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
1179
1180 for (const GlobalVariable *GV : GVars) {
1181 O << "\t// demoted variable\n\t";
1182 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1183 }
1184}
1185
1186void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1187 raw_ostream &O) const {
1188 switch (AddressSpace) {
1190 O << "local";
1191 break;
1193 O << "global";
1194 break;
1196 O << "const";
1197 break;
1199 O << "shared";
1200 break;
1201 default:
1202 report_fatal_error("Bad address space found while emitting PTX: " +
1203 llvm::Twine(AddressSpace));
1204 break;
1205 }
1206}
1207
1208std::string
1209NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1210 switch (Ty->getTypeID()) {
1211 case Type::IntegerTyID: {
1212 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1213 if (NumBits == 1)
1214 return "pred";
1215 if (NumBits <= 64) {
1216 std::string name = "u";
1217 return name + utostr(NumBits);
1218 }
1219 llvm_unreachable("Integer too large");
1220 break;
1221 }
1222 case Type::BFloatTyID:
1223 case Type::HalfTyID:
1224 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1225 // PTX assembly.
1226 return "b16";
1227 case Type::FloatTyID:
1228 return "f32";
1229 case Type::DoubleTyID:
1230 return "f64";
1231 case Type::PointerTyID: {
1232 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1233 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1234
1235 if (PtrSize == 64)
1236 if (useB4PTR)
1237 return "b64";
1238 else
1239 return "u64";
1240 else if (useB4PTR)
1241 return "b32";
1242 else
1243 return "u32";
1244 }
1245 default:
1246 break;
1247 }
1248 llvm_unreachable("unexpected type");
1249}
1250
1251void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1252 raw_ostream &O,
1253 const NVPTXSubtarget &STI) {
1254 const DataLayout &DL = getDataLayout();
1255
1256 // GlobalVariables are always constant pointers themselves.
1257 Type *ETy = GVar->getValueType();
1258
1259 O << ".";
1260 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1261 if (isManaged(*GVar)) {
1262 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
1264 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1265
1266 O << " .attribute(.managed)";
1267 }
1268 O << " .align "
1269 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1270
1271 // Special case for i128/fp128
1272 if (ETy->getScalarSizeInBits() == 128) {
1273 O << " .b8 ";
1274 getSymbol(GVar)->print(O, MAI);
1275 O << "[16]";
1276 return;
1277 }
1278
1279 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1280 O << " ." << getPTXFundamentalTypeStr(ETy) << " ";
1281 getSymbol(GVar)->print(O, MAI);
1282 return;
1283 }
1284
1285 int64_t ElementSize = 0;
1286
1287 // Although PTX has direct support for struct type and array type and LLVM IR
1288 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1289 // support these high level field accesses. Structs and arrays are lowered
1290 // into arrays of bytes.
1291 switch (ETy->getTypeID()) {
1292 case Type::StructTyID:
1293 case Type::ArrayTyID:
1295 ElementSize = DL.getTypeStoreSize(ETy);
1296 O << " .b8 ";
1297 getSymbol(GVar)->print(O, MAI);
1298 O << "[";
1299 if (ElementSize) {
1300 O << ElementSize;
1301 }
1302 O << "]";
1303 break;
1304 default:
1305 llvm_unreachable("type not supported yet");
1306 }
1307}
1308
1309void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1310 const DataLayout &DL = getDataLayout();
1311 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1312 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1313 const NVPTXMachineFunctionInfo *MFI =
1314 MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1315
1316 bool IsFirst = true;
1317 const bool IsKernelFunc = isKernelFunction(*F);
1318
1319 if (F->arg_empty() && !F->isVarArg()) {
1320 O << "()";
1321 return;
1322 }
1323
1324 O << "(\n";
1325
1326 for (const Argument &Arg : F->args()) {
1327 Type *Ty = Arg.getType();
1328 const std::string ParamSym = TLI->getParamName(F, Arg.getArgNo());
1329
1330 if (!IsFirst)
1331 O << ",\n";
1332
1333 IsFirst = false;
1334
1335 // Handle image/sampler parameters
1336 if (IsKernelFunc) {
1337 const bool IsSampler = isSampler(Arg);
1338 const bool IsTexture = !IsSampler && isImageReadOnly(Arg);
1339 const bool IsSurface = !IsSampler && !IsTexture &&
1340 (isImageReadWrite(Arg) || isImageWriteOnly(Arg));
1341 if (IsSampler || IsTexture || IsSurface) {
1342 const bool EmitImgPtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
1343 O << "\t.param ";
1344 if (EmitImgPtr)
1345 O << ".u64 .ptr ";
1346
1347 if (IsSampler)
1348 O << ".samplerref ";
1349 else if (IsTexture)
1350 O << ".texref ";
1351 else // IsSurface
1352 O << ".surfref ";
1353 O << ParamSym;
1354 continue;
1355 }
1356 }
1357
1358 auto GetOptimalAlignForParam = [TLI, &DL, F, &Arg](Type *Ty) -> Align {
1359 if (MaybeAlign StackAlign =
1360 getAlign(*F, Arg.getArgNo() + AttributeList::FirstArgIndex))
1361 return StackAlign.value();
1362
1363 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1364 MaybeAlign ParamAlign =
1365 Arg.hasByValAttr() ? Arg.getParamAlign() : MaybeAlign();
1366 return std::max(TypeAlign, ParamAlign.valueOrOne());
1367 };
1368
1369 if (Arg.hasByValAttr()) {
1370 // param has byVal attribute.
1371 Type *ETy = Arg.getParamByValType();
1372 assert(ETy && "Param should have byval type");
1373
1374 // Print .param .align <a> .b8 .param[size];
1375 // <a> = optimal alignment for the element type; always multiple of
1376 // PAL.getParamAlignment
1377 // size = typeallocsize of element type
1378 const Align OptimalAlign =
1379 IsKernelFunc ? GetOptimalAlignForParam(ETy)
1380 : TLI->getFunctionByValParamAlign(
1381 F, ETy, Arg.getParamAlign().valueOrOne(), DL);
1382
1383 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1384 << "[" << DL.getTypeAllocSize(ETy) << "]";
1385 continue;
1386 }
1387
1388 if (shouldPassAsArray(Ty)) {
1389 // Just print .param .align <a> .b8 .param[size];
1390 // <a> = optimal alignment for the element type; always multiple of
1391 // PAL.getParamAlignment
1392 // size = typeallocsize of element type
1393 Align OptimalAlign = GetOptimalAlignForParam(Ty);
1394
1395 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1396 << "[" << DL.getTypeAllocSize(Ty) << "]";
1397
1398 continue;
1399 }
1400 // Just a scalar
1401 auto *PTy = dyn_cast<PointerType>(Ty);
1402 unsigned PTySizeInBits = 0;
1403 if (PTy) {
1404 PTySizeInBits =
1405 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1406 assert(PTySizeInBits && "Invalid pointer size");
1407 }
1408
1409 if (IsKernelFunc) {
1410 if (PTy) {
1411 O << "\t.param .u" << PTySizeInBits << " .ptr";
1412
1413 switch (PTy->getAddressSpace()) {
1414 default:
1415 break;
1417 O << " .global";
1418 break;
1420 O << " .shared";
1421 break;
1423 O << " .const";
1424 break;
1426 O << " .local";
1427 break;
1428 }
1429
1430 O << " .align " << Arg.getParamAlign().valueOrOne().value() << " "
1431 << ParamSym;
1432 continue;
1433 }
1434
1435 // non-pointer scalar to kernel func
1436 O << "\t.param .";
1437 // Special case: predicate operands become .u8 types
1438 if (Ty->isIntegerTy(1))
1439 O << "u8";
1440 else
1441 O << getPTXFundamentalTypeStr(Ty);
1442 O << " " << ParamSym;
1443 continue;
1444 }
1445 // Non-kernel function, just print .param .b<size> for ABI
1446 // and .reg .b<size> for non-ABI
1447 unsigned Size;
1448 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
1449 Size = promoteScalarArgumentSize(ITy->getBitWidth());
1450 } else if (PTy) {
1451 assert(PTySizeInBits && "Invalid pointer size");
1452 Size = PTySizeInBits;
1453 } else
1455 O << "\t.param .b" << Size << " " << ParamSym;
1456 }
1457
1458 if (F->isVarArg()) {
1459 if (!IsFirst)
1460 O << ",\n";
1461 O << "\t.param .align " << STI.getMaxRequiredAlignment() << " .b8 "
1462 << TLI->getParamName(F, /* vararg */ -1) << "[]";
1463 }
1464
1465 O << "\n)";
1466}
1467
1468void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1469 const MachineFunction &MF) {
1470 SmallString<128> Str;
1471 raw_svector_ostream O(Str);
1472
1473 // Map the global virtual register number to a register class specific
1474 // virtual register number starting from 1 with that class.
1475 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1476
1477 // Emit the Fake Stack Object
1478 const MachineFrameInfo &MFI = MF.getFrameInfo();
1479 int64_t NumBytes = MFI.getStackSize();
1480 if (NumBytes) {
1481 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1482 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1483 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1484 O << "\t.reg .b64 \t%SP;\n"
1485 << "\t.reg .b64 \t%SPL;\n";
1486 } else {
1487 O << "\t.reg .b32 \t%SP;\n"
1488 << "\t.reg .b32 \t%SPL;\n";
1489 }
1490 }
1491
1492 // Go through all virtual registers to establish the mapping between the
1493 // global virtual
1494 // register number and the per class virtual register number.
1495 // We use the per class virtual register number in the ptx output.
1496 for (unsigned I : llvm::seq(MRI->getNumVirtRegs())) {
1498 if (MRI->use_empty(VR) && MRI->def_empty(VR))
1499 continue;
1500 auto &RCRegMap = VRegMapping[MRI->getRegClass(VR)];
1501 RCRegMap[VR] = RCRegMap.size() + 1;
1502 }
1503
1504 // Emit declaration of the virtual registers or 'physical' registers for
1505 // each register class
1506 for (const TargetRegisterClass *RC : TRI->regclasses()) {
1507 const unsigned N = VRegMapping[RC].size();
1508
1509 // Only declare those registers that may be used.
1510 if (N) {
1511 const StringRef RCName = getNVPTXRegClassName(RC);
1512 const StringRef RCStr = getNVPTXRegClassStr(RC);
1513 O << "\t.reg " << RCName << " \t" << RCStr << "<" << (N + 1) << ">;\n";
1514 }
1515 }
1516
1517 OutStreamer->emitRawText(O.str());
1518}
1519
1520/// Translate virtual register numbers in DebugInfo locations to their printed
1521/// encodings, as used by CUDA-GDB.
1522void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1523 const MachineFunction &MF) {
1524 const NVPTXSubtarget &STI = MF.getSubtarget<NVPTXSubtarget>();
1525 const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo();
1526
1527 // Clear the old mapping, and add the new one. This mapping is used after the
1528 // printing of the current function is complete, but before the next function
1529 // is printed.
1530 registerInfo->clearDebugRegisterMap();
1531
1532 for (auto &classMap : VRegMapping) {
1533 for (auto &registerMapping : classMap.getSecond()) {
1534 auto reg = registerMapping.getFirst();
1535 registerInfo->addToDebugRegisterMap(reg, getVirtualRegisterName(reg));
1536 }
1537 }
1538}
1539
1540void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp,
1541 raw_ostream &O) const {
1542 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1543 bool ignored;
1544 unsigned int numHex;
1545 const char *lead;
1546
1547 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1548 numHex = 8;
1549 lead = "0f";
1551 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1552 numHex = 16;
1553 lead = "0d";
1555 } else
1556 llvm_unreachable("unsupported fp type");
1557
1558 APInt API = APF.bitcastToAPInt();
1559 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1560}
1561
1562void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1563 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1564 O << CI->getValue();
1565 return;
1566 }
1567 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1568 printFPConstant(CFP, O);
1569 return;
1570 }
1571 if (isa<ConstantPointerNull>(CPV)) {
1572 O << "0";
1573 return;
1574 }
1575 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1576 const bool IsNonGenericPointer = GVar->getAddressSpace() != 0;
1577 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1578 O << "generic(";
1579 getSymbol(GVar)->print(O, MAI);
1580 O << ")";
1581 } else {
1582 getSymbol(GVar)->print(O, MAI);
1583 }
1584 return;
1585 }
1586 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1587 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1588 printMCExpr(*E, O);
1589 return;
1590 }
1591 llvm_unreachable("Not scalar type found in printScalarConstant()");
1592}
1593
1594void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1595 AggBuffer *AggBuffer) {
1596 const DataLayout &DL = getDataLayout();
1597 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1598 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1599 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1600 // only the space allocated by CPV.
1601 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1602 return;
1603 }
1604
1605 // Helper for filling AggBuffer with APInts.
1606 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1607 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1608 SmallVector<unsigned char, 16> Buf(NumBytes);
1609 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1610 // input's bit width, and i1 arrays may not have a length that is a multuple
1611 // of 8. We handle the last byte separately, so we never request out of
1612 // bounds bits.
1613 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1614 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1615 }
1616 size_t LastBytePosition = (NumBytes - 1) * 8;
1617 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1618 Buf[NumBytes - 1] =
1619 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1620 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1621 };
1622
1623 switch (CPV->getType()->getTypeID()) {
1624 case Type::IntegerTyID:
1625 if (const auto *CI = dyn_cast<ConstantInt>(CPV)) {
1626 AddIntToBuffer(CI->getValue());
1627 break;
1628 }
1629 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1630 if (const auto *CI =
1632 AddIntToBuffer(CI->getValue());
1633 break;
1634 }
1635 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1636 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1637 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1638 AggBuffer->addZeros(AllocSize);
1639 break;
1640 }
1641 }
1642 llvm_unreachable("unsupported integer const type");
1643 break;
1644
1645 case Type::HalfTyID:
1646 case Type::BFloatTyID:
1647 case Type::FloatTyID:
1648 case Type::DoubleTyID:
1649 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1650 break;
1651
1652 case Type::PointerTyID: {
1653 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1654 AggBuffer->addSymbol(GVar, GVar);
1655 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1656 const Value *v = Cexpr->stripPointerCasts();
1657 AggBuffer->addSymbol(v, Cexpr);
1658 }
1659 AggBuffer->addZeros(AllocSize);
1660 break;
1661 }
1662
1663 case Type::ArrayTyID:
1665 case Type::StructTyID: {
1667 bufferAggregateConstant(CPV, AggBuffer);
1668 if (Bytes > AllocSize)
1669 AggBuffer->addZeros(Bytes - AllocSize);
1670 } else if (isa<ConstantAggregateZero>(CPV))
1671 AggBuffer->addZeros(Bytes);
1672 else
1673 llvm_unreachable("Unexpected Constant type");
1674 break;
1675 }
1676
1677 default:
1678 llvm_unreachable("unsupported type");
1679 }
1680}
1681
1682void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1683 AggBuffer *aggBuffer) {
1684 const DataLayout &DL = getDataLayout();
1685
1686 auto ExtendBuffer = [](APInt Val, AggBuffer *Buffer) {
1687 for (unsigned I : llvm::seq(Val.getBitWidth() / 8))
1688 Buffer->addByte(Val.extractBitsAsZExtValue(8, I * 8));
1689 };
1690
1691 // Integers of arbitrary width
1692 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1693 ExtendBuffer(CI->getValue(), aggBuffer);
1694 return;
1695 }
1696
1697 // f128
1698 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1699 if (CFP->getType()->isFP128Ty()) {
1700 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1701 return;
1702 }
1703 }
1704
1705 // Old constants
1706 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1707 for (const auto &Op : CPV->operands())
1708 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1709 return;
1710 }
1711
1712 if (const auto *CDS = dyn_cast<ConstantDataSequential>(CPV)) {
1713 for (unsigned I : llvm::seq(CDS->getNumElements()))
1714 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(I)), 0, aggBuffer);
1715 return;
1716 }
1717
1718 if (isa<ConstantStruct>(CPV)) {
1719 if (CPV->getNumOperands()) {
1720 StructType *ST = cast<StructType>(CPV->getType());
1721 for (unsigned I : llvm::seq(CPV->getNumOperands())) {
1722 int EndOffset = (I + 1 == CPV->getNumOperands())
1723 ? DL.getStructLayout(ST)->getElementOffset(0) +
1724 DL.getTypeAllocSize(ST)
1725 : DL.getStructLayout(ST)->getElementOffset(I + 1);
1726 int Bytes = EndOffset - DL.getStructLayout(ST)->getElementOffset(I);
1727 bufferLEByte(cast<Constant>(CPV->getOperand(I)), Bytes, aggBuffer);
1728 }
1729 }
1730 return;
1731 }
1732 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1733}
1734
1735/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1736/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1737/// expressions that are representable in PTX and create
1738/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1739const MCExpr *
1740NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV,
1741 bool ProcessingGeneric) const {
1742 MCContext &Ctx = OutContext;
1743
1744 if (CV->isNullValue() || isa<UndefValue>(CV))
1745 return MCConstantExpr::create(0, Ctx);
1746
1747 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1748 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1749
1750 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1751 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(getSymbol(GV), Ctx);
1752 if (ProcessingGeneric)
1753 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1754 return Expr;
1755 }
1756
1757 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1758 if (!CE) {
1759 llvm_unreachable("Unknown constant value to lower!");
1760 }
1761
1762 switch (CE->getOpcode()) {
1763 default:
1764 break; // Error
1765
1766 case Instruction::AddrSpaceCast: {
1767 // Strip the addrspacecast and pass along the operand
1768 PointerType *DstTy = cast<PointerType>(CE->getType());
1769 if (DstTy->getAddressSpace() == 0)
1770 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1771
1772 break; // Error
1773 }
1774
1775 case Instruction::GetElementPtr: {
1776 const DataLayout &DL = getDataLayout();
1777
1778 // Generate a symbolic expression for the byte address
1779 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1780 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1781
1782 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1783 ProcessingGeneric);
1784 if (!OffsetAI)
1785 return Base;
1786
1787 int64_t Offset = OffsetAI.getSExtValue();
1789 Ctx);
1790 }
1791
1792 case Instruction::Trunc:
1793 // We emit the value and depend on the assembler to truncate the generated
1794 // expression properly. This is important for differences between
1795 // blockaddress labels. Since the two labels are in the same function, it
1796 // is reasonable to treat their delta as a 32-bit value.
1797 [[fallthrough]];
1798 case Instruction::BitCast:
1799 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1800
1801 case Instruction::IntToPtr: {
1802 const DataLayout &DL = getDataLayout();
1803
1804 // Handle casts to pointers by changing them into casts to the appropriate
1805 // integer type. This promotes constant folding and simplifies this code.
1806 Constant *Op = CE->getOperand(0);
1807 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1808 /*IsSigned*/ false, DL);
1809 if (Op)
1810 return lowerConstantForGV(Op, ProcessingGeneric);
1811
1812 break; // Error
1813 }
1814
1815 case Instruction::PtrToInt: {
1816 const DataLayout &DL = getDataLayout();
1817
1818 // Support only foldable casts to/from pointers that can be eliminated by
1819 // changing the pointer to the appropriately sized integer type.
1820 Constant *Op = CE->getOperand(0);
1821 Type *Ty = CE->getType();
1822
1823 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
1824
1825 // We can emit the pointer value into this slot if the slot is an
1826 // integer slot equal to the size of the pointer.
1827 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
1828 return OpExpr;
1829
1830 // Otherwise the pointer is smaller than the resultant integer, mask off
1831 // the high bits so we are sure to get a proper truncation if the input is
1832 // a constant expr.
1833 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
1834 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
1835 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
1836 }
1837
1838 // The MC library also has a right-shift operator, but it isn't consistently
1839 // signed or unsigned between different targets.
1840 case Instruction::Add: {
1841 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1842 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
1843 switch (CE->getOpcode()) {
1844 default: llvm_unreachable("Unknown binary operator constant cast expr");
1845 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
1846 }
1847 }
1848 }
1849
1850 // If the code isn't optimized, there may be outstanding folding
1851 // opportunities. Attempt to fold the expression using DataLayout as a
1852 // last resort before giving up.
1854 if (C != CE)
1855 return lowerConstantForGV(C, ProcessingGeneric);
1856
1857 // Otherwise report the problem to the user.
1858 std::string S;
1859 raw_string_ostream OS(S);
1860 OS << "Unsupported expression in static initializer: ";
1861 CE->printAsOperand(OS, /*PrintType=*/false,
1862 !MF ? nullptr : MF->getFunction().getParent());
1863 report_fatal_error(Twine(OS.str()));
1864}
1865
1866void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) const {
1867 OutContext.getAsmInfo()->printExpr(OS, Expr);
1868}
1869
1870/// PrintAsmOperand - Print out an operand for an inline asm expression.
1871///
1872bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
1873 const char *ExtraCode, raw_ostream &O) {
1874 if (ExtraCode && ExtraCode[0]) {
1875 if (ExtraCode[1] != 0)
1876 return true; // Unknown modifier.
1877
1878 switch (ExtraCode[0]) {
1879 default:
1880 // See if this is a generic print operand
1881 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
1882 case 'r':
1883 break;
1884 }
1885 }
1886
1887 printOperand(MI, OpNo, O);
1888
1889 return false;
1890}
1891
1892bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
1893 unsigned OpNo,
1894 const char *ExtraCode,
1895 raw_ostream &O) {
1896 if (ExtraCode && ExtraCode[0])
1897 return true; // Unknown modifier
1898
1899 O << '[';
1900 printMemOperand(MI, OpNo, O);
1901 O << ']';
1902
1903 return false;
1904}
1905
1906void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
1907 raw_ostream &O) {
1908 const MachineOperand &MO = MI->getOperand(OpNum);
1909 switch (MO.getType()) {
1911 if (MO.getReg().isPhysical()) {
1912 if (MO.getReg() == NVPTX::VRDepot)
1914 else
1916 } else {
1917 emitVirtualRegister(MO.getReg(), O);
1918 }
1919 break;
1920
1922 O << MO.getImm();
1923 break;
1924
1926 printFPConstant(MO.getFPImm(), O);
1927 break;
1928
1930 PrintSymbolOperand(MO, O);
1931 break;
1932
1934 MO.getMBB()->getSymbol()->print(O, MAI);
1935 break;
1936
1937 default:
1938 llvm_unreachable("Operand type not supported.");
1939 }
1940}
1941
1942void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
1943 raw_ostream &O, const char *Modifier) {
1944 printOperand(MI, OpNum, O);
1945
1946 if (Modifier && strcmp(Modifier, "add") == 0) {
1947 O << ", ";
1948 printOperand(MI, OpNum + 1, O);
1949 } else {
1950 if (MI->getOperand(OpNum + 1).isImm() &&
1951 MI->getOperand(OpNum + 1).getImm() == 0)
1952 return; // don't print ',0' or '+0'
1953 O << "+";
1954 printOperand(MI, OpNum + 1, O);
1955 }
1956}
1957
1958char NVPTXAsmPrinter::ID = 0;
1959
1960INITIALIZE_PASS(NVPTXAsmPrinter, "nvptx-asm-printer", "NVPTX Assembly Printer",
1961 false, false)
1962
1963// Force static initialization.
1964extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
1965LLVMInitializeNVPTXAsmPrinter() {
1968}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define DEPOTNAME
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static const char * name
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
APInt bitcastToAPInt() const
Definition APFloat.h:1335
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition APInt.cpp:520
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
MCSymbol * getSymbol(const GlobalValue *GV) const
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:99
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition AsmPrinter.h:102
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:117
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition AsmPrinter.h:502
virtual void emitFunctionBodyStart()
Targets can override this to emit stuff before the first basic block in the function.
Definition AsmPrinter.h:611
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition AsmPrinter.h:136
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:106
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition AsmPrinter.h:449
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:111
virtual void emitFunctionBodyEnd()
Targets can override this to emit stuff after the last basic block in the function.
Definition AsmPrinter.h:615
const DataLayout & getDataLayout() const
Return information about data layout.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
const APFloat & getValueAPF() const
Definition Constants.h:320
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
bool hasLinkOnceLinkage() const
bool hasExternalLinkage() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
unsigned getAddressSpace() const
PointerType * getType() const
Global values are always pointers.
bool hasWeakLinkage() const
bool hasCommonLinkage() const
bool hasAvailableExternallyLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:348
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
void addOperand(const MCOperand Op)
Definition MCInst.h:215
void setOpcode(unsigned Op)
Definition MCInst.h:201
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition MCSymbol.cpp:59
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void outputDwarfFileDirectives()
Outputs the list of the DWARF '.file' directives to the streamer.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition Type.h:74
@ HalfTyID
16-bit floating point type
Definition Type.h:56
@ VoidTyID
type with no size
Definition Type.h:63
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ StructTyID
Structures.
Definition Type.h:73
@ IntegerTyID
Arbitrary bit width integers.
Definition Type.h:70
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition Type.h:75
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition Type.h:57
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ PointerTyID
Pointers.
Definition Type.h:72
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:136
op_range operands()
Definition User.h:292
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type size() const
Definition DenseSet.h:87
bool erase(const ValueT &V)
Definition DenseSet.h:100
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
uint64_t read64le(const void *P)
Definition Endian.h:435
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
MaybeAlign getAlign(const CallInst &I, unsigned Index)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
std::optional< unsigned > getMaxNReg(const Function &F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
StringRef getSamplerName(const Value &V)
bool isImageReadWrite(const Value &V)
bool isImageReadOnly(const Value &V)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::string utostr(uint64_t X, bool isNeg=false)
std::optional< unsigned > getMinCTASm(const Function &F)
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
bool isSampler(const Value &V)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isSurface(const Value &V)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition Format.h:204
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::optional< unsigned > getMaxClusterRank(const Function &F)
StringRef getTextureName(const Value &V)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
StringRef getSurfaceName(const Value &V)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool isTexture(const Value &V)
bool isImageWriteOnly(const Value &V)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Target & getTheNVPTXTarget32()
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...