LLVM 22.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/DenseSet.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/ADT/Twine.h"
52#include "llvm/IR/Argument.h"
53#include "llvm/IR/Attributes.h"
54#include "llvm/IR/BasicBlock.h"
55#include "llvm/IR/Constant.h"
56#include "llvm/IR/Constants.h"
57#include "llvm/IR/DataLayout.h"
58#include "llvm/IR/DebugInfo.h"
60#include "llvm/IR/DebugLoc.h"
62#include "llvm/IR/Function.h"
63#include "llvm/IR/GlobalAlias.h"
64#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/Instruction.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/User.h"
72#include "llvm/MC/MCExpr.h"
73#include "llvm/MC/MCInst.h"
74#include "llvm/MC/MCInstrDesc.h"
75#include "llvm/MC/MCStreamer.h"
76#include "llvm/MC/MCSymbol.h"
81#include "llvm/Support/Endian.h"
88#include <cassert>
89#include <cstdint>
90#include <cstring>
91#include <string>
92#include <utility>
93#include <vector>
94
95using namespace llvm;
96
97#define DEPOTNAME "__local_depot"
98
99/// discoverDependentGlobals - Return a set of GlobalVariables on which \p V
100/// depends.
101static void
104 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
105 Globals.insert(GV);
106 return;
107 }
108
109 if (const User *U = dyn_cast<User>(V))
110 for (const auto &O : U->operands())
111 discoverDependentGlobals(O, Globals);
112}
113
114/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
115/// instances to be emitted, but only after any dependents have been added
116/// first.s
117static void
122 // Have we already visited this one?
123 if (Visited.count(GV))
124 return;
125
126 // Do we have a circular dependency?
127 if (!Visiting.insert(GV).second)
128 report_fatal_error("Circular dependency found in global variable set");
129
130 // Make sure we visit all dependents first
132 for (const auto &O : GV->operands())
133 discoverDependentGlobals(O, Others);
134
135 for (const GlobalVariable *GV : Others)
136 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
137
138 // Now we can visit ourself
139 Order.push_back(GV);
140 Visited.insert(GV);
141 Visiting.erase(GV);
142}
143
144void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
145 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
146 getSubtargetInfo().getFeatureBits());
147
148 MCInst Inst;
149 lowerToMCInst(MI, Inst);
151}
152
153void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
154 OutMI.setOpcode(MI->getOpcode());
155 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
156 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
157 const MachineOperand &MO = MI->getOperand(0);
158 OutMI.addOperand(GetSymbolRef(
159 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
160 return;
161 }
162
163 for (const auto MO : MI->operands())
164 OutMI.addOperand(lowerOperand(MO));
165}
166
167MCOperand NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
168 switch (MO.getType()) {
169 default:
170 llvm_unreachable("unknown operand type");
172 return MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
174 return MCOperand::createImm(MO.getImm());
179 return GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
181 return GetSymbolRef(getSymbol(MO.getGlobal()));
183 const ConstantFP *Cnt = MO.getFPImm();
184 const APFloat &Val = Cnt->getValueAPF();
185
186 switch (Cnt->getType()->getTypeID()) {
187 default:
188 report_fatal_error("Unsupported FP type");
189 break;
190 case Type::HalfTyID:
193 case Type::BFloatTyID:
196 case Type::FloatTyID:
199 case Type::DoubleTyID:
202 }
203 break;
204 }
205 }
206}
207
208unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
210 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
211
212 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
213 unsigned RegNum = RegMap[Reg];
214
215 // Encode the register class in the upper 4 bits
216 // Must be kept in sync with NVPTXInstPrinter::printRegName
217 unsigned Ret = 0;
218 if (RC == &NVPTX::B1RegClass) {
219 Ret = (1 << 28);
220 } else if (RC == &NVPTX::B16RegClass) {
221 Ret = (2 << 28);
222 } else if (RC == &NVPTX::B32RegClass) {
223 Ret = (3 << 28);
224 } else if (RC == &NVPTX::B64RegClass) {
225 Ret = (4 << 28);
226 } else if (RC == &NVPTX::B128RegClass) {
227 Ret = (7 << 28);
228 } else {
229 report_fatal_error("Bad register class");
230 }
231
232 // Insert the vreg number
233 Ret |= (RegNum & 0x0FFFFFFF);
234 return Ret;
235 } else {
236 // Some special-use registers are actually physical registers.
237 // Encode this as the register class ID of 0 and the real register ID.
238 return Reg & 0x0FFFFFFF;
239 }
240}
241
242MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
243 const MCExpr *Expr;
244 Expr = MCSymbolRefExpr::create(Symbol, OutContext);
245 return MCOperand::createExpr(Expr);
246}
247
248void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
249 const DataLayout &DL = getDataLayout();
250 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
251 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
252
253 Type *Ty = F->getReturnType();
254 if (Ty->getTypeID() == Type::VoidTyID)
255 return;
256 O << " (";
257
258 auto PrintScalarRetVal = [&](unsigned Size) {
259 O << ".param .b" << promoteScalarArgumentSize(Size) << " func_retval0";
260 };
261 if (shouldPassAsArray(Ty)) {
262 const unsigned TotalSize = DL.getTypeAllocSize(Ty);
263 const Align RetAlignment = TLI->getFunctionArgumentAlignment(
264 F, Ty, AttributeList::ReturnIndex, DL);
265 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
266 << TotalSize << "]";
267 } else if (Ty->isFloatingPointTy()) {
268 PrintScalarRetVal(Ty->getPrimitiveSizeInBits());
269 } else if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
270 PrintScalarRetVal(ITy->getBitWidth());
271 } else if (isa<PointerType>(Ty)) {
272 PrintScalarRetVal(TLI->getPointerTy(DL).getSizeInBits());
273 } else
274 llvm_unreachable("Unknown return type");
275 O << ") ";
276}
277
278void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
279 raw_ostream &O) {
280 const Function &F = MF.getFunction();
281 printReturnValStr(&F, O);
282}
283
284// Return true if MBB is the header of a loop marked with
285// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
286bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
287 const MachineBasicBlock &MBB) const {
288 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
289 // We insert .pragma "nounroll" only to the loop header.
290 if (!LI.isLoopHeader(&MBB))
291 return false;
292
293 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
294 // we iterate through each back edge of the loop with header MBB, and check
295 // whether its metadata contains llvm.loop.unroll.disable.
296 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
297 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
298 // Edges from other loops to MBB are not back edges.
299 continue;
300 }
301 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
302 if (MDNode *LoopID =
303 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
304 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
305 return true;
306 if (MDNode *UnrollCountMD =
307 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
308 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
309 ->isOne())
310 return true;
311 }
312 }
313 }
314 }
315 return false;
316}
317
318void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
320 if (isLoopHeaderOfNoUnroll(MBB))
321 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
322}
323
325 SmallString<128> Str;
326 raw_svector_ostream O(Str);
327
328 if (!GlobalsEmitted) {
329 emitGlobals(*MF->getFunction().getParent());
330 GlobalsEmitted = true;
331 }
332
333 // Set up
334 MRI = &MF->getRegInfo();
335 F = &MF->getFunction();
336 emitLinkageDirective(F, O);
337 if (isKernelFunction(*F))
338 O << ".entry ";
339 else {
340 O << ".func ";
341 printReturnValStr(*MF, O);
342 }
343
344 CurrentFnSym->print(O, MAI);
345
346 emitFunctionParamList(F, O);
347 O << "\n";
348
349 if (isKernelFunction(*F))
350 emitKernelFunctionDirectives(*F, O);
351
353 O << ".noreturn";
354
355 OutStreamer->emitRawText(O.str());
356
357 VRegMapping.clear();
358 // Emit open brace for function body.
359 OutStreamer->emitRawText(StringRef("{\n"));
360 setAndEmitFunctionVirtualRegisters(*MF);
361 encodeDebugInfoRegisterNumbers(*MF);
362 // Emit initial .loc debug directive for correct relocation symbol data.
363 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
364 assert(SP->getUnit());
365 if (!SP->getUnit()->isDebugDirectivesOnly())
367 }
368}
369
371 bool Result = AsmPrinter::runOnMachineFunction(F);
372 // Emit closing brace for the body of function F.
373 // The closing brace must be emitted here because we need to emit additional
374 // debug labels/data after the last basic block.
375 // We need to emit the closing brace here because we don't have function that
376 // finished emission of the function body.
377 OutStreamer->emitRawText(StringRef("}\n"));
378 return Result;
379}
380
383 raw_svector_ostream O(Str);
384 emitDemotedVars(&MF->getFunction(), O);
385 OutStreamer->emitRawText(O.str());
386}
387
389 VRegMapping.clear();
390}
391
395 return OutContext.getOrCreateSymbol(Str);
396}
397
398void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
399 Register RegNo = MI->getOperand(0).getReg();
400 if (RegNo.isVirtual()) {
401 OutStreamer->AddComment(Twine("implicit-def: ") +
403 } else {
404 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
405 OutStreamer->AddComment(Twine("implicit-def: ") +
406 STI.getRegisterInfo()->getName(RegNo));
407 }
408 OutStreamer->addBlankLine();
409}
410
411void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
412 raw_ostream &O) const {
413 // If the NVVM IR has some of reqntid* specified, then output
414 // the reqntid directive, and set the unspecified ones to 1.
415 // If none of Reqntid* is specified, don't output reqntid directive.
416 const auto ReqNTID = getReqNTID(F);
417 if (!ReqNTID.empty())
418 O << formatv(".reqntid {0:$[, ]}\n",
419 make_range(ReqNTID.begin(), ReqNTID.end()));
420
421 const auto MaxNTID = getMaxNTID(F);
422 if (!MaxNTID.empty())
423 O << formatv(".maxntid {0:$[, ]}\n",
424 make_range(MaxNTID.begin(), MaxNTID.end()));
425
426 if (const auto Mincta = getMinCTASm(F))
427 O << ".minnctapersm " << *Mincta << "\n";
428
429 if (const auto Maxnreg = getMaxNReg(F))
430 O << ".maxnreg " << *Maxnreg << "\n";
431
432 // .maxclusterrank directive requires SM_90 or higher, make sure that we
433 // filter it out for lower SM versions, as it causes a hard ptxas crash.
434 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
435 const NVPTXSubtarget *STI = &NTM.getSubtarget<NVPTXSubtarget>(F);
436
437 if (STI->getSmVersion() >= 90) {
438 const auto ClusterDim = getClusterDim(F);
439 const bool BlocksAreClusters = hasBlocksAreClusters(F);
440
441 if (!ClusterDim.empty()) {
442
443 if (!BlocksAreClusters)
444 O << ".explicitcluster\n";
445
446 if (ClusterDim[0] != 0) {
447 assert(llvm::all_of(ClusterDim, [](unsigned D) { return D != 0; }) &&
448 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
449 "should be non-zero as well");
450
451 O << formatv(".reqnctapercluster {0:$[, ]}\n",
452 make_range(ClusterDim.begin(), ClusterDim.end()));
453 } else {
454 assert(llvm::all_of(ClusterDim, [](unsigned D) { return D == 0; }) &&
455 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
456 "should be 0 as well");
457 }
458 }
459
460 if (BlocksAreClusters) {
461 LLVMContext &Ctx = F.getContext();
462 if (ReqNTID.empty() || ClusterDim.empty())
463 Ctx.diagnose(DiagnosticInfoUnsupported(
464 F, "blocksareclusters requires reqntid and cluster_dim attributes",
465 F.getSubprogram()));
466 else if (STI->getPTXVersion() < 90)
467 Ctx.diagnose(DiagnosticInfoUnsupported(
468 F, "blocksareclusters requires PTX version >= 9.0",
469 F.getSubprogram()));
470 else
471 O << ".blocksareclusters\n";
472 }
473
474 if (const auto Maxclusterrank = getMaxClusterRank(F))
475 O << ".maxclusterrank " << *Maxclusterrank << "\n";
476 }
477}
478
479std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
480 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
481
482 std::string Name;
483 raw_string_ostream NameStr(Name);
484
485 VRegRCMap::const_iterator I = VRegMapping.find(RC);
486 assert(I != VRegMapping.end() && "Bad register class");
487 const DenseMap<unsigned, unsigned> &RegMap = I->second;
488
489 VRegMap::const_iterator VI = RegMap.find(Reg);
490 assert(VI != RegMap.end() && "Bad virtual register");
491 unsigned MappedVR = VI->second;
492
493 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
494
495 return Name;
496}
497
498void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
499 raw_ostream &O) {
500 O << getVirtualRegisterName(vr);
501}
502
503void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
504 raw_ostream &O) {
506 if (!F || isKernelFunction(*F) || F->isDeclaration())
508 "NVPTX aliasee must be a non-kernel function definition");
509
510 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
512 report_fatal_error("NVPTX aliasee must not be '.weak'");
513
514 emitDeclarationWithName(F, getSymbol(GA), O);
515}
516
517void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
518 emitDeclarationWithName(F, getSymbol(F), O);
519}
520
521void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
522 raw_ostream &O) {
523 emitLinkageDirective(F, O);
524 if (isKernelFunction(*F))
525 O << ".entry ";
526 else
527 O << ".func ";
528 printReturnValStr(F, O);
529 S->print(O, MAI);
530 O << "\n";
531 emitFunctionParamList(F, O);
532 O << "\n";
534 O << ".noreturn";
535 O << ";\n";
536}
537
538static bool usedInGlobalVarDef(const Constant *C) {
539 if (!C)
540 return false;
541
543 return GV->getName() != "llvm.used";
544
545 for (const User *U : C->users())
546 if (const Constant *C = dyn_cast<Constant>(U))
548 return true;
549
550 return false;
551}
552
553static bool usedInOneFunc(const User *U, Function const *&OneFunc) {
554 if (const GlobalVariable *OtherGV = dyn_cast<GlobalVariable>(U))
555 if (OtherGV->getName() == "llvm.used")
556 return true;
557
558 if (const Instruction *I = dyn_cast<Instruction>(U)) {
559 if (const Function *CurFunc = I->getFunction()) {
560 if (OneFunc && (CurFunc != OneFunc))
561 return false;
562 OneFunc = CurFunc;
563 return true;
564 }
565 return false;
566 }
567
568 for (const User *UU : U->users())
569 if (!usedInOneFunc(UU, OneFunc))
570 return false;
571
572 return true;
573}
574
575/* Find out if a global variable can be demoted to local scope.
576 * Currently, this is valid for CUDA shared variables, which have local
577 * scope and global lifetime. So the conditions to check are :
578 * 1. Is the global variable in shared address space?
579 * 2. Does it have local linkage?
580 * 3. Is the global variable referenced only in one function?
581 */
582static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f) {
583 if (!GV->hasLocalLinkage())
584 return false;
586 return false;
587
588 const Function *oneFunc = nullptr;
589
590 bool flag = usedInOneFunc(GV, oneFunc);
591 if (!flag)
592 return false;
593 if (!oneFunc)
594 return false;
595 f = oneFunc;
596 return true;
597}
598
599static bool useFuncSeen(const Constant *C,
600 const SmallPtrSetImpl<const Function *> &SeenSet) {
601 for (const User *U : C->users()) {
602 if (const Constant *cu = dyn_cast<Constant>(U)) {
603 if (useFuncSeen(cu, SeenSet))
604 return true;
605 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
606 if (const Function *Caller = I->getFunction())
607 if (SeenSet.contains(Caller))
608 return true;
609 }
610 }
611 return false;
612}
613
614void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
615 SmallPtrSet<const Function *, 32> SeenSet;
616 for (const Function &F : M) {
617 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
618 emitDeclaration(&F, O);
619 continue;
620 }
621
622 if (F.isDeclaration()) {
623 if (F.use_empty())
624 continue;
625 if (F.getIntrinsicID())
626 continue;
627 emitDeclaration(&F, O);
628 continue;
629 }
630 for (const User *U : F.users()) {
631 if (const Constant *C = dyn_cast<Constant>(U)) {
632 if (usedInGlobalVarDef(C)) {
633 // The use is in the initialization of a global variable
634 // that is a function pointer, so print a declaration
635 // for the original function
636 emitDeclaration(&F, O);
637 break;
638 }
639 // Emit a declaration of this function if the function that
640 // uses this constant expr has already been seen.
641 if (useFuncSeen(C, SeenSet)) {
642 emitDeclaration(&F, O);
643 break;
644 }
645 }
646
647 if (!isa<Instruction>(U))
648 continue;
649 const Function *Caller = cast<Instruction>(U)->getFunction();
650 if (!Caller)
651 continue;
652
653 // If a caller has already been seen, then the caller is
654 // appearing in the module before the callee. so print out
655 // a declaration for the callee.
656 if (SeenSet.contains(Caller)) {
657 emitDeclaration(&F, O);
658 break;
659 }
660 }
661 SeenSet.insert(&F);
662 }
663 for (const GlobalAlias &GA : M.aliases())
664 emitAliasDeclaration(&GA, O);
665}
666
667void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
668 // Construct a default subtarget off of the TargetMachine defaults. The
669 // rest of NVPTX isn't friendly to change subtargets per function and
670 // so the default TargetMachine will have all of the options.
671 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
672 const NVPTXSubtarget *STI = NTM.getSubtargetImpl();
673 SmallString<128> Str1;
674 raw_svector_ostream OS1(Str1);
675
676 // Emit header before any dwarf directives are emitted below.
677 emitHeader(M, OS1, *STI);
678 OutStreamer->emitRawText(OS1.str());
679}
680
682 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
683 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
684 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
685 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
686
687 // We need to call the parent's one explicitly.
688 bool Result = AsmPrinter::doInitialization(M);
689
690 GlobalsEmitted = false;
691
692 return Result;
693}
694
695void NVPTXAsmPrinter::emitGlobals(const Module &M) {
696 SmallString<128> Str2;
697 raw_svector_ostream OS2(Str2);
698
699 emitDeclarations(M, OS2);
700
701 // As ptxas does not support forward references of globals, we need to first
702 // sort the list of module-level globals in def-use order. We visit each
703 // global variable in order, and ensure that we emit it *after* its dependent
704 // globals. We use a little extra memory maintaining both a set and a list to
705 // have fast searches while maintaining a strict ordering.
709
710 // Visit each global variable, in order
711 for (const GlobalVariable &I : M.globals())
712 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
713
714 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
715 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
716
717 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
718 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
719
720 // Print out module-level global variables in proper order
721 for (const GlobalVariable *GV : Globals)
722 printModuleLevelGV(GV, OS2, /*ProcessDemoted=*/false, STI);
723
724 OS2 << '\n';
725
726 OutStreamer->emitRawText(OS2.str());
727}
728
729void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
731 raw_svector_ostream OS(Str);
732
733 MCSymbol *Name = getSymbol(&GA);
734
735 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
736 << ";\n";
737
738 OutStreamer->emitRawText(OS.str());
739}
740
741void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
742 const NVPTXSubtarget &STI) {
743 const unsigned PTXVersion = STI.getPTXVersion();
744
745 O << "//\n"
746 "// Generated by LLVM NVPTX Back-End\n"
747 "//\n"
748 "\n"
749 << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"
750 << ".target " << STI.getTargetName();
751
752 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
753 if (NTM.getDrvInterface() == NVPTX::NVCL)
754 O << ", texmode_independent";
755
756 bool HasFullDebugInfo = false;
757 for (DICompileUnit *CU : M.debug_compile_units()) {
758 switch(CU->getEmissionKind()) {
761 break;
764 HasFullDebugInfo = true;
765 break;
766 }
767 if (HasFullDebugInfo)
768 break;
769 }
770 if (HasFullDebugInfo)
771 O << ", debug";
772
773 O << "\n"
774 << ".address_size " << (NTM.is64Bit() ? "64" : "32") << "\n"
775 << "\n";
776}
777
779 // If we did not emit any functions, then the global declarations have not
780 // yet been emitted.
781 if (!GlobalsEmitted) {
782 emitGlobals(M);
783 GlobalsEmitted = true;
784 }
785
786 // call doFinalization
787 bool ret = AsmPrinter::doFinalization(M);
788
790
791 auto *TS =
792 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
793 // Close the last emitted section
794 if (hasDebugInfo()) {
795 TS->closeLastSection();
796 // Emit empty .debug_macinfo section for better support of the empty files.
797 OutStreamer->emitRawText("\t.section\t.debug_macinfo\t{\t}");
798 }
799
800 // Output last DWARF .file directives, if any.
802
803 return ret;
804}
805
806// This function emits appropriate linkage directives for
807// functions and global variables.
808//
809// extern function declaration -> .extern
810// extern function definition -> .visible
811// external global variable with init -> .visible
812// external without init -> .extern
813// appending -> not allowed, assert.
814// for any linkage other than
815// internal, private, linker_private,
816// linker_private_weak, linker_private_weak_def_auto,
817// we emit -> .weak.
818
819void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
820 raw_ostream &O) {
821 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
822 if (V->hasExternalLinkage()) {
823 if (const auto *GVar = dyn_cast<GlobalVariable>(V))
824 O << (GVar->hasInitializer() ? ".visible " : ".extern ");
825 else if (V->isDeclaration())
826 O << ".extern ";
827 else
828 O << ".visible ";
829 } else if (V->hasAppendingLinkage()) {
830 report_fatal_error("Symbol '" + (V->hasName() ? V->getName() : "") +
831 "' has unsupported appending linkage type");
832 } else if (!V->hasInternalLinkage() && !V->hasPrivateLinkage()) {
833 O << ".weak ";
834 }
835 }
836}
837
838void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
839 raw_ostream &O, bool ProcessDemoted,
840 const NVPTXSubtarget &STI) {
841 // Skip meta data
842 if (GVar->hasSection())
843 if (GVar->getSection() == "llvm.metadata")
844 return;
845
846 // Skip LLVM intrinsic global variables
847 if (GVar->getName().starts_with("llvm.") ||
848 GVar->getName().starts_with("nvvm."))
849 return;
850
851 const DataLayout &DL = getDataLayout();
852
853 // GlobalVariables are always constant pointers themselves.
854 Type *ETy = GVar->getValueType();
855
856 if (GVar->hasExternalLinkage()) {
857 if (GVar->hasInitializer())
858 O << ".visible ";
859 else
860 O << ".extern ";
861 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
863 O << ".common ";
864 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
866 GVar->hasCommonLinkage()) {
867 O << ".weak ";
868 }
869
870 if (isTexture(*GVar)) {
871 O << ".global .texref " << getTextureName(*GVar) << ";\n";
872 return;
873 }
874
875 if (isSurface(*GVar)) {
876 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
877 return;
878 }
879
880 if (GVar->isDeclaration()) {
881 // (extern) declarations, no definition or initializer
882 // Currently the only known declaration is for an automatic __local
883 // (.shared) promoted to global.
884 emitPTXGlobalVariable(GVar, O, STI);
885 O << ";\n";
886 return;
887 }
888
889 if (isSampler(*GVar)) {
890 O << ".global .samplerref " << getSamplerName(*GVar);
891
892 const Constant *Initializer = nullptr;
893 if (GVar->hasInitializer())
894 Initializer = GVar->getInitializer();
895 const ConstantInt *CI = nullptr;
896 if (Initializer)
897 CI = dyn_cast<ConstantInt>(Initializer);
898 if (CI) {
899 unsigned sample = CI->getZExtValue();
900
901 O << " = { ";
902
903 for (int i = 0,
904 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
905 i < 3; i++) {
906 O << "addr_mode_" << i << " = ";
907 switch (addr) {
908 case 0:
909 O << "wrap";
910 break;
911 case 1:
912 O << "clamp_to_border";
913 break;
914 case 2:
915 O << "clamp_to_edge";
916 break;
917 case 3:
918 O << "wrap";
919 break;
920 case 4:
921 O << "mirror";
922 break;
923 }
924 O << ", ";
925 }
926 O << "filter_mode = ";
927 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
928 case 0:
929 O << "nearest";
930 break;
931 case 1:
932 O << "linear";
933 break;
934 case 2:
935 llvm_unreachable("Anisotropic filtering is not supported");
936 default:
937 O << "nearest";
938 break;
939 }
940 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
941 O << ", force_unnormalized_coords = 1";
942 }
943 O << " }";
944 }
945
946 O << ";\n";
947 return;
948 }
949
950 if (GVar->hasPrivateLinkage()) {
951 if (GVar->getName().starts_with("unrollpragma"))
952 return;
953
954 // FIXME - need better way (e.g. Metadata) to avoid generating this global
955 if (GVar->getName().starts_with("filename"))
956 return;
957 if (GVar->use_empty())
958 return;
959 }
960
961 const Function *DemotedFunc = nullptr;
962 if (!ProcessDemoted && canDemoteGlobalVar(GVar, DemotedFunc)) {
963 O << "// " << GVar->getName() << " has been demoted\n";
964 localDecls[DemotedFunc].push_back(GVar);
965 return;
966 }
967
968 O << ".";
969 emitPTXAddressSpace(GVar->getAddressSpace(), O);
970
971 if (isManaged(*GVar)) {
972 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
974 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
975 O << " .attribute(.managed)";
976 }
977
978 O << " .align "
979 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
980
981 if (ETy->isPointerTy() || ((ETy->isIntegerTy() || ETy->isFloatingPointTy()) &&
982 ETy->getScalarSizeInBits() <= 64)) {
983 O << " .";
984 // Special case: ABI requires that we use .u8 for predicates
985 if (ETy->isIntegerTy(1))
986 O << "u8";
987 else
988 O << getPTXFundamentalTypeStr(ETy, false);
989 O << " ";
990 getSymbol(GVar)->print(O, MAI);
991
992 // Ptx allows variable initilization only for constant and global state
993 // spaces.
994 if (GVar->hasInitializer()) {
995 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
997 const Constant *Initializer = GVar->getInitializer();
998 // 'undef' is treated as there is no value specified.
999 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1000 O << " = ";
1001 printScalarConstant(Initializer, O);
1002 }
1003 } else {
1004 // The frontend adds zero-initializer to device and constant variables
1005 // that don't have an initial value, and UndefValue to shared
1006 // variables, so skip warning for this case.
1007 if (!GVar->getInitializer()->isNullValue() &&
1008 !isa<UndefValue>(GVar->getInitializer())) {
1009 report_fatal_error("initial value of '" + GVar->getName() +
1010 "' is not allowed in addrspace(" +
1011 Twine(GVar->getAddressSpace()) + ")");
1012 }
1013 }
1014 }
1015 } else {
1016 // Although PTX has direct support for struct type and array type and
1017 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1018 // targets that support these high level field accesses. Structs, arrays
1019 // and vectors are lowered into arrays of bytes.
1020 switch (ETy->getTypeID()) {
1021 case Type::IntegerTyID: // Integers larger than 64 bits
1022 case Type::FP128TyID:
1023 case Type::StructTyID:
1024 case Type::ArrayTyID:
1025 case Type::FixedVectorTyID: {
1026 const uint64_t ElementSize = DL.getTypeStoreSize(ETy);
1027 // Ptx allows variable initilization only for constant and
1028 // global state spaces.
1029 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1030 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1031 GVar->hasInitializer()) {
1032 const Constant *Initializer = GVar->getInitializer();
1033 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1034 AggBuffer aggBuffer(ElementSize, *this);
1035 bufferAggregateConstant(Initializer, &aggBuffer);
1036 if (aggBuffer.numSymbols()) {
1037 const unsigned int ptrSize = MAI->getCodePointerSize();
1038 if (ElementSize % ptrSize ||
1039 !aggBuffer.allSymbolsAligned(ptrSize)) {
1040 // Print in bytes and use the mask() operator for pointers.
1041 if (!STI.hasMaskOperator())
1043 "initialized packed aggregate with pointers '" +
1044 GVar->getName() +
1045 "' requires at least PTX ISA version 7.1");
1046 O << " .u8 ";
1047 getSymbol(GVar)->print(O, MAI);
1048 O << "[" << ElementSize << "] = {";
1049 aggBuffer.printBytes(O);
1050 O << "}";
1051 } else {
1052 O << " .u" << ptrSize * 8 << " ";
1053 getSymbol(GVar)->print(O, MAI);
1054 O << "[" << ElementSize / ptrSize << "] = {";
1055 aggBuffer.printWords(O);
1056 O << "}";
1057 }
1058 } else {
1059 O << " .b8 ";
1060 getSymbol(GVar)->print(O, MAI);
1061 O << "[" << ElementSize << "] = {";
1062 aggBuffer.printBytes(O);
1063 O << "}";
1064 }
1065 } else {
1066 O << " .b8 ";
1067 getSymbol(GVar)->print(O, MAI);
1068 if (ElementSize)
1069 O << "[" << ElementSize << "]";
1070 }
1071 } else {
1072 O << " .b8 ";
1073 getSymbol(GVar)->print(O, MAI);
1074 if (ElementSize)
1075 O << "[" << ElementSize << "]";
1076 }
1077 break;
1078 }
1079 default:
1080 llvm_unreachable("type not supported yet");
1081 }
1082 }
1083 O << ";\n";
1084}
1085
1086void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1087 const Value *v = Symbols[nSym];
1088 const Value *v0 = SymbolsBeforeStripping[nSym];
1089 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1090 MCSymbol *Name = AP.getSymbol(GVar);
1092 // Is v0 a generic pointer?
1093 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1094 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1095 os << "generic(";
1096 Name->print(os, AP.MAI);
1097 os << ")";
1098 } else {
1099 Name->print(os, AP.MAI);
1100 }
1101 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1102 const MCExpr *Expr = AP.lowerConstantForGV(CExpr, false);
1103 AP.printMCExpr(*Expr, os);
1104 } else
1105 llvm_unreachable("symbol type unknown");
1106}
1107
1108void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1109 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1110 // Do not emit trailing zero initializers. They will be zero-initialized by
1111 // ptxas. This saves on both space requirements for the generated PTX and on
1112 // memory use by ptxas. (See:
1113 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1114 unsigned int InitializerCount = size;
1115 // TODO: symbols make this harder, but it would still be good to trim trailing
1116 // 0s for aggs with symbols as well.
1117 if (numSymbols() == 0)
1118 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1119 InitializerCount--;
1120
1121 symbolPosInBuffer.push_back(InitializerCount);
1122 unsigned int nSym = 0;
1123 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1124 for (unsigned int pos = 0; pos < InitializerCount;) {
1125 if (pos)
1126 os << ", ";
1127 if (pos != nextSymbolPos) {
1128 os << (unsigned int)buffer[pos];
1129 ++pos;
1130 continue;
1131 }
1132 // Generate a per-byte mask() operator for the symbol, which looks like:
1133 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1134 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1135 std::string symText;
1136 llvm::raw_string_ostream oss(symText);
1137 printSymbol(nSym, oss);
1138 for (unsigned i = 0; i < ptrSize; ++i) {
1139 if (i)
1140 os << ", ";
1141 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1142 os << "(" << symText << ")";
1143 }
1144 pos += ptrSize;
1145 nextSymbolPos = symbolPosInBuffer[++nSym];
1146 assert(nextSymbolPos >= pos);
1147 }
1148}
1149
1150void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1151 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1152 symbolPosInBuffer.push_back(size);
1153 unsigned int nSym = 0;
1154 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1155 assert(nextSymbolPos % ptrSize == 0);
1156 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1157 if (pos)
1158 os << ", ";
1159 if (pos == nextSymbolPos) {
1160 printSymbol(nSym, os);
1161 nextSymbolPos = symbolPosInBuffer[++nSym];
1162 assert(nextSymbolPos % ptrSize == 0);
1163 assert(nextSymbolPos >= pos + ptrSize);
1164 } else if (ptrSize == 4)
1165 os << support::endian::read32le(&buffer[pos]);
1166 else
1167 os << support::endian::read64le(&buffer[pos]);
1168 }
1169}
1170
1171void NVPTXAsmPrinter::emitDemotedVars(const Function *F, raw_ostream &O) {
1172 auto It = localDecls.find(F);
1173 if (It == localDecls.end())
1174 return;
1175
1176 ArrayRef<const GlobalVariable *> GVars = It->second;
1177
1178 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1179 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
1180
1181 for (const GlobalVariable *GV : GVars) {
1182 O << "\t// demoted variable\n\t";
1183 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1184 }
1185}
1186
1187void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1188 raw_ostream &O) const {
1189 switch (AddressSpace) {
1191 O << "local";
1192 break;
1194 O << "global";
1195 break;
1197 O << "const";
1198 break;
1200 O << "shared";
1201 break;
1202 default:
1203 report_fatal_error("Bad address space found while emitting PTX: " +
1204 llvm::Twine(AddressSpace));
1205 break;
1206 }
1207}
1208
1209std::string
1210NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1211 switch (Ty->getTypeID()) {
1212 case Type::IntegerTyID: {
1213 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1214 if (NumBits == 1)
1215 return "pred";
1216 if (NumBits <= 64) {
1217 std::string name = "u";
1218 return name + utostr(NumBits);
1219 }
1220 llvm_unreachable("Integer too large");
1221 break;
1222 }
1223 case Type::BFloatTyID:
1224 case Type::HalfTyID:
1225 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1226 // PTX assembly.
1227 return "b16";
1228 case Type::FloatTyID:
1229 return "f32";
1230 case Type::DoubleTyID:
1231 return "f64";
1232 case Type::PointerTyID: {
1233 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1234 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1235
1236 if (PtrSize == 64)
1237 if (useB4PTR)
1238 return "b64";
1239 else
1240 return "u64";
1241 else if (useB4PTR)
1242 return "b32";
1243 else
1244 return "u32";
1245 }
1246 default:
1247 break;
1248 }
1249 llvm_unreachable("unexpected type");
1250}
1251
1252void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1253 raw_ostream &O,
1254 const NVPTXSubtarget &STI) {
1255 const DataLayout &DL = getDataLayout();
1256
1257 // GlobalVariables are always constant pointers themselves.
1258 Type *ETy = GVar->getValueType();
1259
1260 O << ".";
1261 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1262 if (isManaged(*GVar)) {
1263 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
1265 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1266
1267 O << " .attribute(.managed)";
1268 }
1269 O << " .align "
1270 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1271
1272 // Special case for i128/fp128
1273 if (ETy->getScalarSizeInBits() == 128) {
1274 O << " .b8 ";
1275 getSymbol(GVar)->print(O, MAI);
1276 O << "[16]";
1277 return;
1278 }
1279
1280 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1281 O << " ." << getPTXFundamentalTypeStr(ETy) << " ";
1282 getSymbol(GVar)->print(O, MAI);
1283 return;
1284 }
1285
1286 int64_t ElementSize = 0;
1287
1288 // Although PTX has direct support for struct type and array type and LLVM IR
1289 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1290 // support these high level field accesses. Structs and arrays are lowered
1291 // into arrays of bytes.
1292 switch (ETy->getTypeID()) {
1293 case Type::StructTyID:
1294 case Type::ArrayTyID:
1296 ElementSize = DL.getTypeStoreSize(ETy);
1297 O << " .b8 ";
1298 getSymbol(GVar)->print(O, MAI);
1299 O << "[";
1300 if (ElementSize) {
1301 O << ElementSize;
1302 }
1303 O << "]";
1304 break;
1305 default:
1306 llvm_unreachable("type not supported yet");
1307 }
1308}
1309
1310void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1311 const DataLayout &DL = getDataLayout();
1312 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1313 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1314 const NVPTXMachineFunctionInfo *MFI =
1315 MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1316
1317 bool IsFirst = true;
1318 const bool IsKernelFunc = isKernelFunction(*F);
1319
1320 if (F->arg_empty() && !F->isVarArg()) {
1321 O << "()";
1322 return;
1323 }
1324
1325 O << "(\n";
1326
1327 for (const Argument &Arg : F->args()) {
1328 Type *Ty = Arg.getType();
1329 const std::string ParamSym = TLI->getParamName(F, Arg.getArgNo());
1330
1331 if (!IsFirst)
1332 O << ",\n";
1333
1334 IsFirst = false;
1335
1336 // Handle image/sampler parameters
1337 if (IsKernelFunc) {
1338 const bool IsSampler = isSampler(Arg);
1339 const bool IsTexture = !IsSampler && isImageReadOnly(Arg);
1340 const bool IsSurface = !IsSampler && !IsTexture &&
1341 (isImageReadWrite(Arg) || isImageWriteOnly(Arg));
1342 if (IsSampler || IsTexture || IsSurface) {
1343 const bool EmitImgPtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
1344 O << "\t.param ";
1345 if (EmitImgPtr)
1346 O << ".u64 .ptr ";
1347
1348 if (IsSampler)
1349 O << ".samplerref ";
1350 else if (IsTexture)
1351 O << ".texref ";
1352 else // IsSurface
1353 O << ".surfref ";
1354 O << ParamSym;
1355 continue;
1356 }
1357 }
1358
1359 auto GetOptimalAlignForParam = [TLI, &DL, F, &Arg](Type *Ty) -> Align {
1360 if (MaybeAlign StackAlign =
1361 getAlign(*F, Arg.getArgNo() + AttributeList::FirstArgIndex))
1362 return StackAlign.value();
1363
1364 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1365 MaybeAlign ParamAlign =
1366 Arg.hasByValAttr() ? Arg.getParamAlign() : MaybeAlign();
1367 return std::max(TypeAlign, ParamAlign.valueOrOne());
1368 };
1369
1370 if (Arg.hasByValAttr()) {
1371 // param has byVal attribute.
1372 Type *ETy = Arg.getParamByValType();
1373 assert(ETy && "Param should have byval type");
1374
1375 // Print .param .align <a> .b8 .param[size];
1376 // <a> = optimal alignment for the element type; always multiple of
1377 // PAL.getParamAlignment
1378 // size = typeallocsize of element type
1379 const Align OptimalAlign =
1380 IsKernelFunc ? GetOptimalAlignForParam(ETy)
1381 : TLI->getFunctionByValParamAlign(
1382 F, ETy, Arg.getParamAlign().valueOrOne(), DL);
1383
1384 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1385 << "[" << DL.getTypeAllocSize(ETy) << "]";
1386 continue;
1387 }
1388
1389 if (shouldPassAsArray(Ty)) {
1390 // Just print .param .align <a> .b8 .param[size];
1391 // <a> = optimal alignment for the element type; always multiple of
1392 // PAL.getParamAlignment
1393 // size = typeallocsize of element type
1394 Align OptimalAlign = GetOptimalAlignForParam(Ty);
1395
1396 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1397 << "[" << DL.getTypeAllocSize(Ty) << "]";
1398
1399 continue;
1400 }
1401 // Just a scalar
1402 auto *PTy = dyn_cast<PointerType>(Ty);
1403 unsigned PTySizeInBits = 0;
1404 if (PTy) {
1405 PTySizeInBits =
1406 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1407 assert(PTySizeInBits && "Invalid pointer size");
1408 }
1409
1410 if (IsKernelFunc) {
1411 if (PTy) {
1412 O << "\t.param .u" << PTySizeInBits << " .ptr";
1413
1414 switch (PTy->getAddressSpace()) {
1415 default:
1416 break;
1418 O << " .global";
1419 break;
1421 O << " .shared";
1422 break;
1424 O << " .const";
1425 break;
1427 O << " .local";
1428 break;
1429 }
1430
1431 O << " .align " << Arg.getParamAlign().valueOrOne().value() << " "
1432 << ParamSym;
1433 continue;
1434 }
1435
1436 // non-pointer scalar to kernel func
1437 O << "\t.param .";
1438 // Special case: predicate operands become .u8 types
1439 if (Ty->isIntegerTy(1))
1440 O << "u8";
1441 else
1442 O << getPTXFundamentalTypeStr(Ty);
1443 O << " " << ParamSym;
1444 continue;
1445 }
1446 // Non-kernel function, just print .param .b<size> for ABI
1447 // and .reg .b<size> for non-ABI
1448 unsigned Size;
1449 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
1450 Size = promoteScalarArgumentSize(ITy->getBitWidth());
1451 } else if (PTy) {
1452 assert(PTySizeInBits && "Invalid pointer size");
1453 Size = PTySizeInBits;
1454 } else
1456 O << "\t.param .b" << Size << " " << ParamSym;
1457 }
1458
1459 if (F->isVarArg()) {
1460 if (!IsFirst)
1461 O << ",\n";
1462 O << "\t.param .align " << STI.getMaxRequiredAlignment() << " .b8 "
1463 << TLI->getParamName(F, /* vararg */ -1) << "[]";
1464 }
1465
1466 O << "\n)";
1467}
1468
1469void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1470 const MachineFunction &MF) {
1471 SmallString<128> Str;
1472 raw_svector_ostream O(Str);
1473
1474 // Map the global virtual register number to a register class specific
1475 // virtual register number starting from 1 with that class.
1476 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1477
1478 // Emit the Fake Stack Object
1479 const MachineFrameInfo &MFI = MF.getFrameInfo();
1480 int64_t NumBytes = MFI.getStackSize();
1481 if (NumBytes) {
1482 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1483 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1484 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1485 O << "\t.reg .b64 \t%SP;\n"
1486 << "\t.reg .b64 \t%SPL;\n";
1487 } else {
1488 O << "\t.reg .b32 \t%SP;\n"
1489 << "\t.reg .b32 \t%SPL;\n";
1490 }
1491 }
1492
1493 // Go through all virtual registers to establish the mapping between the
1494 // global virtual
1495 // register number and the per class virtual register number.
1496 // We use the per class virtual register number in the ptx output.
1497 for (unsigned I : llvm::seq(MRI->getNumVirtRegs())) {
1499 if (MRI->use_empty(VR) && MRI->def_empty(VR))
1500 continue;
1501 auto &RCRegMap = VRegMapping[MRI->getRegClass(VR)];
1502 RCRegMap[VR] = RCRegMap.size() + 1;
1503 }
1504
1505 // Emit declaration of the virtual registers or 'physical' registers for
1506 // each register class
1507 for (const TargetRegisterClass *RC : TRI->regclasses()) {
1508 const unsigned N = VRegMapping[RC].size();
1509
1510 // Only declare those registers that may be used.
1511 if (N) {
1512 const StringRef RCName = getNVPTXRegClassName(RC);
1513 const StringRef RCStr = getNVPTXRegClassStr(RC);
1514 O << "\t.reg " << RCName << " \t" << RCStr << "<" << (N + 1) << ">;\n";
1515 }
1516 }
1517
1518 OutStreamer->emitRawText(O.str());
1519}
1520
1521/// Translate virtual register numbers in DebugInfo locations to their printed
1522/// encodings, as used by CUDA-GDB.
1523void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1524 const MachineFunction &MF) {
1525 const NVPTXSubtarget &STI = MF.getSubtarget<NVPTXSubtarget>();
1526 const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo();
1527
1528 // Clear the old mapping, and add the new one. This mapping is used after the
1529 // printing of the current function is complete, but before the next function
1530 // is printed.
1531 registerInfo->clearDebugRegisterMap();
1532
1533 for (auto &classMap : VRegMapping) {
1534 for (auto &registerMapping : classMap.getSecond()) {
1535 auto reg = registerMapping.getFirst();
1536 registerInfo->addToDebugRegisterMap(reg, getVirtualRegisterName(reg));
1537 }
1538 }
1539}
1540
1541void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp,
1542 raw_ostream &O) const {
1543 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1544 bool ignored;
1545 unsigned int numHex;
1546 const char *lead;
1547
1548 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1549 numHex = 8;
1550 lead = "0f";
1552 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1553 numHex = 16;
1554 lead = "0d";
1556 } else
1557 llvm_unreachable("unsupported fp type");
1558
1559 APInt API = APF.bitcastToAPInt();
1560 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1561}
1562
1563void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1564 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1565 O << CI->getValue();
1566 return;
1567 }
1568 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1569 printFPConstant(CFP, O);
1570 return;
1571 }
1572 if (isa<ConstantPointerNull>(CPV)) {
1573 O << "0";
1574 return;
1575 }
1576 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1577 const bool IsNonGenericPointer = GVar->getAddressSpace() != 0;
1578 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1579 O << "generic(";
1580 getSymbol(GVar)->print(O, MAI);
1581 O << ")";
1582 } else {
1583 getSymbol(GVar)->print(O, MAI);
1584 }
1585 return;
1586 }
1587 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1588 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1589 printMCExpr(*E, O);
1590 return;
1591 }
1592 llvm_unreachable("Not scalar type found in printScalarConstant()");
1593}
1594
1595void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1596 AggBuffer *AggBuffer) {
1597 const DataLayout &DL = getDataLayout();
1598 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1599 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1600 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1601 // only the space allocated by CPV.
1602 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1603 return;
1604 }
1605
1606 // Helper for filling AggBuffer with APInts.
1607 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1608 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1609 SmallVector<unsigned char, 16> Buf(NumBytes);
1610 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1611 // input's bit width, and i1 arrays may not have a length that is a multuple
1612 // of 8. We handle the last byte separately, so we never request out of
1613 // bounds bits.
1614 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1615 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1616 }
1617 size_t LastBytePosition = (NumBytes - 1) * 8;
1618 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1619 Buf[NumBytes - 1] =
1620 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1621 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1622 };
1623
1624 switch (CPV->getType()->getTypeID()) {
1625 case Type::IntegerTyID:
1626 if (const auto *CI = dyn_cast<ConstantInt>(CPV)) {
1627 AddIntToBuffer(CI->getValue());
1628 break;
1629 }
1630 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1631 if (const auto *CI =
1633 AddIntToBuffer(CI->getValue());
1634 break;
1635 }
1636 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1637 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1638 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1639 AggBuffer->addZeros(AllocSize);
1640 break;
1641 }
1642 }
1643 llvm_unreachable("unsupported integer const type");
1644 break;
1645
1646 case Type::HalfTyID:
1647 case Type::BFloatTyID:
1648 case Type::FloatTyID:
1649 case Type::DoubleTyID:
1650 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1651 break;
1652
1653 case Type::PointerTyID: {
1654 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1655 AggBuffer->addSymbol(GVar, GVar);
1656 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1657 const Value *v = Cexpr->stripPointerCasts();
1658 AggBuffer->addSymbol(v, Cexpr);
1659 }
1660 AggBuffer->addZeros(AllocSize);
1661 break;
1662 }
1663
1664 case Type::ArrayTyID:
1666 case Type::StructTyID: {
1668 bufferAggregateConstant(CPV, AggBuffer);
1669 if (Bytes > AllocSize)
1670 AggBuffer->addZeros(Bytes - AllocSize);
1671 } else if (isa<ConstantAggregateZero>(CPV))
1672 AggBuffer->addZeros(Bytes);
1673 else
1674 llvm_unreachable("Unexpected Constant type");
1675 break;
1676 }
1677
1678 default:
1679 llvm_unreachable("unsupported type");
1680 }
1681}
1682
1683void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1684 AggBuffer *aggBuffer) {
1685 const DataLayout &DL = getDataLayout();
1686
1687 auto ExtendBuffer = [](APInt Val, AggBuffer *Buffer) {
1688 for (unsigned I : llvm::seq(Val.getBitWidth() / 8))
1689 Buffer->addByte(Val.extractBitsAsZExtValue(8, I * 8));
1690 };
1691
1692 // Integers of arbitrary width
1693 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1694 ExtendBuffer(CI->getValue(), aggBuffer);
1695 return;
1696 }
1697
1698 // f128
1699 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1700 if (CFP->getType()->isFP128Ty()) {
1701 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1702 return;
1703 }
1704 }
1705
1706 // Old constants
1707 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1708 for (const auto &Op : CPV->operands())
1709 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1710 return;
1711 }
1712
1713 if (const auto *CDS = dyn_cast<ConstantDataSequential>(CPV)) {
1714 for (unsigned I : llvm::seq(CDS->getNumElements()))
1715 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(I)), 0, aggBuffer);
1716 return;
1717 }
1718
1719 if (isa<ConstantStruct>(CPV)) {
1720 if (CPV->getNumOperands()) {
1721 StructType *ST = cast<StructType>(CPV->getType());
1722 for (unsigned I : llvm::seq(CPV->getNumOperands())) {
1723 int EndOffset = (I + 1 == CPV->getNumOperands())
1724 ? DL.getStructLayout(ST)->getElementOffset(0) +
1725 DL.getTypeAllocSize(ST)
1726 : DL.getStructLayout(ST)->getElementOffset(I + 1);
1727 int Bytes = EndOffset - DL.getStructLayout(ST)->getElementOffset(I);
1728 bufferLEByte(cast<Constant>(CPV->getOperand(I)), Bytes, aggBuffer);
1729 }
1730 }
1731 return;
1732 }
1733 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1734}
1735
1736/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1737/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1738/// expressions that are representable in PTX and create
1739/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1740const MCExpr *
1741NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV,
1742 bool ProcessingGeneric) const {
1743 MCContext &Ctx = OutContext;
1744
1745 if (CV->isNullValue() || isa<UndefValue>(CV))
1746 return MCConstantExpr::create(0, Ctx);
1747
1748 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1749 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1750
1751 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1752 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(getSymbol(GV), Ctx);
1753 if (ProcessingGeneric)
1754 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1755 return Expr;
1756 }
1757
1758 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1759 if (!CE) {
1760 llvm_unreachable("Unknown constant value to lower!");
1761 }
1762
1763 switch (CE->getOpcode()) {
1764 default:
1765 break; // Error
1766
1767 case Instruction::AddrSpaceCast: {
1768 // Strip the addrspacecast and pass along the operand
1769 PointerType *DstTy = cast<PointerType>(CE->getType());
1770 if (DstTy->getAddressSpace() == 0)
1771 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1772
1773 break; // Error
1774 }
1775
1776 case Instruction::GetElementPtr: {
1777 const DataLayout &DL = getDataLayout();
1778
1779 // Generate a symbolic expression for the byte address
1780 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1781 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1782
1783 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1784 ProcessingGeneric);
1785 if (!OffsetAI)
1786 return Base;
1787
1788 int64_t Offset = OffsetAI.getSExtValue();
1790 Ctx);
1791 }
1792
1793 case Instruction::Trunc:
1794 // We emit the value and depend on the assembler to truncate the generated
1795 // expression properly. This is important for differences between
1796 // blockaddress labels. Since the two labels are in the same function, it
1797 // is reasonable to treat their delta as a 32-bit value.
1798 [[fallthrough]];
1799 case Instruction::BitCast:
1800 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1801
1802 case Instruction::IntToPtr: {
1803 const DataLayout &DL = getDataLayout();
1804
1805 // Handle casts to pointers by changing them into casts to the appropriate
1806 // integer type. This promotes constant folding and simplifies this code.
1807 Constant *Op = CE->getOperand(0);
1808 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1809 /*IsSigned*/ false, DL);
1810 if (Op)
1811 return lowerConstantForGV(Op, ProcessingGeneric);
1812
1813 break; // Error
1814 }
1815
1816 case Instruction::PtrToInt: {
1817 const DataLayout &DL = getDataLayout();
1818
1819 // Support only foldable casts to/from pointers that can be eliminated by
1820 // changing the pointer to the appropriately sized integer type.
1821 Constant *Op = CE->getOperand(0);
1822 Type *Ty = CE->getType();
1823
1824 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
1825
1826 // We can emit the pointer value into this slot if the slot is an
1827 // integer slot equal to the size of the pointer.
1828 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
1829 return OpExpr;
1830
1831 // Otherwise the pointer is smaller than the resultant integer, mask off
1832 // the high bits so we are sure to get a proper truncation if the input is
1833 // a constant expr.
1834 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
1835 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
1836 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
1837 }
1838
1839 // The MC library also has a right-shift operator, but it isn't consistently
1840 // signed or unsigned between different targets.
1841 case Instruction::Add: {
1842 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1843 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
1844 switch (CE->getOpcode()) {
1845 default: llvm_unreachable("Unknown binary operator constant cast expr");
1846 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
1847 }
1848 }
1849 }
1850
1851 // If the code isn't optimized, there may be outstanding folding
1852 // opportunities. Attempt to fold the expression using DataLayout as a
1853 // last resort before giving up.
1855 if (C != CE)
1856 return lowerConstantForGV(C, ProcessingGeneric);
1857
1858 // Otherwise report the problem to the user.
1859 std::string S;
1860 raw_string_ostream OS(S);
1861 OS << "Unsupported expression in static initializer: ";
1862 CE->printAsOperand(OS, /*PrintType=*/false,
1863 !MF ? nullptr : MF->getFunction().getParent());
1864 report_fatal_error(Twine(OS.str()));
1865}
1866
1867void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) const {
1868 OutContext.getAsmInfo()->printExpr(OS, Expr);
1869}
1870
1871/// PrintAsmOperand - Print out an operand for an inline asm expression.
1872///
1873bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
1874 const char *ExtraCode, raw_ostream &O) {
1875 if (ExtraCode && ExtraCode[0]) {
1876 if (ExtraCode[1] != 0)
1877 return true; // Unknown modifier.
1878
1879 switch (ExtraCode[0]) {
1880 default:
1881 // See if this is a generic print operand
1882 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
1883 case 'r':
1884 break;
1885 }
1886 }
1887
1888 printOperand(MI, OpNo, O);
1889
1890 return false;
1891}
1892
1893bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
1894 unsigned OpNo,
1895 const char *ExtraCode,
1896 raw_ostream &O) {
1897 if (ExtraCode && ExtraCode[0])
1898 return true; // Unknown modifier
1899
1900 O << '[';
1901 printMemOperand(MI, OpNo, O);
1902 O << ']';
1903
1904 return false;
1905}
1906
1907void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
1908 raw_ostream &O) {
1909 const MachineOperand &MO = MI->getOperand(OpNum);
1910 switch (MO.getType()) {
1912 if (MO.getReg().isPhysical()) {
1913 if (MO.getReg() == NVPTX::VRDepot)
1915 else
1917 } else {
1918 emitVirtualRegister(MO.getReg(), O);
1919 }
1920 break;
1921
1923 O << MO.getImm();
1924 break;
1925
1927 printFPConstant(MO.getFPImm(), O);
1928 break;
1929
1931 PrintSymbolOperand(MO, O);
1932 break;
1933
1935 MO.getMBB()->getSymbol()->print(O, MAI);
1936 break;
1937
1938 default:
1939 llvm_unreachable("Operand type not supported.");
1940 }
1941}
1942
1943void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
1944 raw_ostream &O, const char *Modifier) {
1945 printOperand(MI, OpNum, O);
1946
1947 if (Modifier && strcmp(Modifier, "add") == 0) {
1948 O << ", ";
1949 printOperand(MI, OpNum + 1, O);
1950 } else {
1951 if (MI->getOperand(OpNum + 1).isImm() &&
1952 MI->getOperand(OpNum + 1).getImm() == 0)
1953 return; // don't print ',0' or '+0'
1954 O << "+";
1955 printOperand(MI, OpNum + 1, O);
1956 }
1957}
1958
1959char NVPTXAsmPrinter::ID = 0;
1960
1961INITIALIZE_PASS(NVPTXAsmPrinter, "nvptx-asm-printer", "NVPTX Assembly Printer",
1962 false, false)
1963
1964// Force static initialization.
1965extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
1966LLVMInitializeNVPTXAsmPrinter() {
1969}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define DEPOTNAME
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static const char * name
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
APInt bitcastToAPInt() const
Definition APFloat.h:1353
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition APInt.cpp:520
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
MCSymbol * getSymbol(const GlobalValue *GV) const
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:93
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition AsmPrinter.h:96
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:108
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition AsmPrinter.h:504
virtual void emitFunctionBodyStart()
Targets can override this to emit stuff before the first basic block in the function.
Definition AsmPrinter.h:613
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition AsmPrinter.h:127
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:100
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition AsmPrinter.h:452
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:105
virtual void emitFunctionBodyEnd()
Targets can override this to emit stuff after the last basic block in the function.
Definition AsmPrinter.h:617
const DataLayout & getDataLayout() const
Return information about data layout.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
const APFloat & getValueAPF() const
Definition Constants.h:320
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:261
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:623
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
bool hasLinkOnceLinkage() const
bool hasExternalLinkage() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:316
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
unsigned getAddressSpace() const
PointerType * getType() const
Global values are always pointers.
bool hasWeakLinkage() const
bool hasCommonLinkage() const
bool hasAvailableExternallyLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:348
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
void addOperand(const MCOperand Op)
Definition MCInst.h:215
void setOpcode(unsigned Op)
Definition MCInst.h:201
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition MCSymbol.cpp:59
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void outputDwarfFileDirectives()
Outputs the list of the DWARF '.file' directives to the streamer.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:67
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:61
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition Type.h:74
@ HalfTyID
16-bit floating point type
Definition Type.h:56
@ VoidTyID
type with no size
Definition Type.h:63
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ StructTyID
Structures.
Definition Type.h:73
@ IntegerTyID
Arbitrary bit width integers.
Definition Type.h:70
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition Type.h:75
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition Type.h:57
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ PointerTyID
Pointers.
Definition Type.h:72
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:136
op_range operands()
Definition User.h:292
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
size_type size() const
Definition DenseSet.h:87
bool erase(const ValueT &V)
Definition DenseSet.h:100
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:174
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:666
uint64_t read64le(const void *P)
Definition Endian.h:432
uint32_t read32le(const void *P)
Definition Endian.h:429
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
MaybeAlign getAlign(const CallInst &I, unsigned Index)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1685
std::optional< unsigned > getMaxNReg(const Function &F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
StringRef getSamplerName(const Value &V)
bool isImageReadWrite(const Value &V)
bool isImageReadOnly(const Value &V)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::string utostr(uint64_t X, bool isNeg=false)
std::optional< unsigned > getMinCTASm(const Function &F)
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
bool isSampler(const Value &V)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isSurface(const Value &V)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition Format.h:201
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
std::optional< unsigned > getMaxClusterRank(const Function &F)
StringRef getTextureName(const Value &V)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
StringRef getSurfaceName(const Value &V)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool isTexture(const Value &V)
bool isImageWriteOnly(const Value &V)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Target & getTheNVPTXTarget32()
#define N
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:141
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...