LLVM 23.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXDwarfDebug.h"
21#include "NVPTXMCExpr.h"
23#include "NVPTXRegisterInfo.h"
24#include "NVPTXSubtarget.h"
25#include "NVPTXTargetMachine.h"
26#include "NVPTXUtilities.h"
27#include "NVVMProperties.h"
29#include "cl_common_defines.h"
30#include "llvm/ADT/APFloat.h"
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/ArrayRef.h"
33#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/DenseSet.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/Twine.h"
54#include "llvm/IR/Argument.h"
55#include "llvm/IR/Attributes.h"
56#include "llvm/IR/BasicBlock.h"
57#include "llvm/IR/Constant.h"
58#include "llvm/IR/Constants.h"
59#include "llvm/IR/DataLayout.h"
60#include "llvm/IR/DebugInfo.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalAlias.h"
66#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/Instruction.h"
69#include "llvm/IR/LLVMContext.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Operator.h"
72#include "llvm/IR/Type.h"
73#include "llvm/IR/User.h"
74#include "llvm/MC/MCExpr.h"
75#include "llvm/MC/MCInst.h"
76#include "llvm/MC/MCInstrDesc.h"
77#include "llvm/MC/MCStreamer.h"
78#include "llvm/MC/MCSymbol.h"
83#include "llvm/Support/Endian.h"
90#include <cassert>
91#include <cstdint>
92#include <cstring>
93#include <string>
94
95using namespace llvm;
96
97#define DEPOTNAME "__local_depot"
98
99static StringRef getTextureName(const Value &V) {
100 assert(V.hasName() && "Found texture variable with no name");
101 return V.getName();
102}
103
105 assert(V.hasName() && "Found surface variable with no name");
106 return V.getName();
107}
108
110 assert(V.hasName() && "Found sampler variable with no name");
111 return V.getName();
112}
113
114/// discoverDependentGlobals - Return a set of GlobalVariables on which \p V
115/// depends.
116static void
119 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
120 Globals.insert(GV);
121 return;
122 }
123
124 if (const User *U = dyn_cast<User>(V))
125 for (const auto &O : U->operands())
126 discoverDependentGlobals(O, Globals);
127}
128
129/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
130/// instances to be emitted, but only after any dependents have been added
131/// first.s
132static void
137 // Have we already visited this one?
138 if (Visited.count(GV))
139 return;
140
141 // Do we have a circular dependency?
142 if (!Visiting.insert(GV).second)
143 report_fatal_error("Circular dependency found in global variable set");
144
145 // Make sure we visit all dependents first
147 for (const auto &O : GV->operands())
148 discoverDependentGlobals(O, Others);
149
150 for (const GlobalVariable *GV : Others)
151 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
152
153 // Now we can visit ourself
154 Order.push_back(GV);
155 Visited.insert(GV);
156 Visiting.erase(GV);
157}
158
159void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
160 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
161 getSubtargetInfo().getFeatureBits());
162
163 MCInst Inst;
164 lowerToMCInst(MI, Inst);
166}
167
168void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
169 OutMI.setOpcode(MI->getOpcode());
170 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
171 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
172 const MachineOperand &MO = MI->getOperand(0);
173 OutMI.addOperand(GetSymbolRef(
174 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
175 return;
176 }
177
178 for (const auto MO : MI->operands())
179 OutMI.addOperand(lowerOperand(MO));
180}
181
182MCOperand NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
183 switch (MO.getType()) {
184 default:
185 llvm_unreachable("unknown operand type");
187 return MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
189 return MCOperand::createImm(MO.getImm());
194 return GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
196 return GetSymbolRef(getSymbol(MO.getGlobal()));
198 const ConstantFP *Cnt = MO.getFPImm();
199 const APFloat &Val = Cnt->getValueAPF();
200
201 switch (Cnt->getType()->getTypeID()) {
202 default:
203 report_fatal_error("Unsupported FP type");
204 break;
205 case Type::HalfTyID:
208 case Type::BFloatTyID:
211 case Type::FloatTyID:
214 case Type::DoubleTyID:
217 }
218 break;
219 }
220 }
221}
222
223unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
225 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
226
227 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
228 unsigned RegNum = RegMap[Reg];
229
230 // Encode the register class in the upper 4 bits
231 // Must be kept in sync with NVPTXInstPrinter::printRegName
232 unsigned Ret = 0;
233 if (RC == &NVPTX::B1RegClass) {
234 Ret = (1 << 28);
235 } else if (RC == &NVPTX::B16RegClass) {
236 Ret = (2 << 28);
237 } else if (RC == &NVPTX::B32RegClass) {
238 Ret = (3 << 28);
239 } else if (RC == &NVPTX::B64RegClass) {
240 Ret = (4 << 28);
241 } else if (RC == &NVPTX::B128RegClass) {
242 Ret = (7 << 28);
243 } else {
244 report_fatal_error("Bad register class");
245 }
246
247 // Insert the vreg number
248 Ret |= (RegNum & 0x0FFFFFFF);
249 return Ret;
250 } else {
251 // Some special-use registers are actually physical registers.
252 // Encode this as the register class ID of 0 and the real register ID.
253 return Reg & 0x0FFFFFFF;
254 }
255}
256
257MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
258 const MCExpr *Expr;
259 Expr = MCSymbolRefExpr::create(Symbol, OutContext);
260 return MCOperand::createExpr(Expr);
261}
262
263void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
264 const DataLayout &DL = getDataLayout();
265 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
266 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
267
268 Type *Ty = F->getReturnType();
269 if (Ty->getTypeID() == Type::VoidTyID)
270 return;
271 O << " (";
272
273 auto PrintScalarRetVal = [&](unsigned Size) {
274 O << ".param .b" << promoteScalarArgumentSize(Size) << " func_retval0";
275 };
276 if (shouldPassAsArray(Ty)) {
277 const unsigned TotalSize = DL.getTypeAllocSize(Ty);
278 const Align RetAlignment =
279 getFunctionArgumentAlignment(F, Ty, AttributeList::ReturnIndex, DL);
280 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
281 << TotalSize << "]";
282 } else if (Ty->isFloatingPointTy()) {
283 PrintScalarRetVal(Ty->getPrimitiveSizeInBits());
284 } else if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
285 PrintScalarRetVal(ITy->getBitWidth());
286 } else if (isa<PointerType>(Ty)) {
287 PrintScalarRetVal(TLI->getPointerTy(DL).getSizeInBits());
288 } else
289 llvm_unreachable("Unknown return type");
290 O << ") ";
291}
292
293void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
294 raw_ostream &O) {
295 const Function &F = MF.getFunction();
296 printReturnValStr(&F, O);
297}
298
299// Return true if MBB is the header of a loop marked with
300// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
301bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
302 const MachineBasicBlock &MBB) const {
303 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
304 // We insert .pragma "nounroll" only to the loop header.
305 if (!LI.isLoopHeader(&MBB))
306 return false;
307
308 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
309 // we iterate through each back edge of the loop with header MBB, and check
310 // whether its metadata contains llvm.loop.unroll.disable.
311 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
312 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
313 // Edges from other loops to MBB are not back edges.
314 continue;
315 }
316 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
317 if (MDNode *LoopID =
318 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
319 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
320 return true;
321 if (MDNode *UnrollCountMD =
322 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
323 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
324 ->isOne())
325 return true;
326 }
327 }
328 }
329 }
330 return false;
331}
332
333void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
335 if (isLoopHeaderOfNoUnroll(MBB))
336 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
337}
338
340 SmallString<128> Str;
341 raw_svector_ostream O(Str);
342
343 if (!GlobalsEmitted) {
344 emitGlobals(*MF->getFunction().getParent());
345 GlobalsEmitted = true;
346 }
347
348 // Set up
349 MRI = &MF->getRegInfo();
350 F = &MF->getFunction();
351 emitLinkageDirective(F, O);
352 if (isKernelFunction(*F))
353 O << ".entry ";
354 else {
355 O << ".func ";
356 printReturnValStr(*MF, O);
357 }
358
359 CurrentFnSym->print(O, MAI);
360
361 emitFunctionParamList(F, O);
362 O << "\n";
363
364 if (isKernelFunction(*F))
365 emitKernelFunctionDirectives(*F, O);
366
368 O << ".noreturn";
369
370 OutStreamer->emitRawText(O.str());
371
372 VRegMapping.clear();
373 // Emit open brace for function body.
374 OutStreamer->emitRawText(StringRef("{\n"));
375 setAndEmitFunctionVirtualRegisters(*MF);
376 encodeDebugInfoRegisterNumbers(*MF);
377 // Emit initial .loc debug directive for correct relocation symbol data.
378 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
379 assert(SP->getUnit());
380 if (!SP->getUnit()->isDebugDirectivesOnly())
382 }
383}
384
386 bool Result = AsmPrinter::runOnMachineFunction(F);
387 // Emit closing brace for the body of function F.
388 // The closing brace must be emitted here because we need to emit additional
389 // debug labels/data after the last basic block.
390 // We need to emit the closing brace here because we don't have function that
391 // finished emission of the function body.
392 OutStreamer->emitRawText(StringRef("}\n"));
393 return Result;
394}
395
398 raw_svector_ostream O(Str);
399 emitDemotedVars(&MF->getFunction(), O);
400 OutStreamer->emitRawText(O.str());
401}
402
404 VRegMapping.clear();
405}
406
410 return OutContext.getOrCreateSymbol(Str);
411}
412
413void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
414 Register RegNo = MI->getOperand(0).getReg();
415 if (RegNo.isVirtual()) {
416 OutStreamer->AddComment(Twine("implicit-def: ") +
418 } else {
419 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
420 OutStreamer->AddComment(Twine("implicit-def: ") +
421 STI.getRegisterInfo()->getName(RegNo));
422 }
423 OutStreamer->addBlankLine();
424}
425
426void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
427 raw_ostream &O) const {
428 // If the NVVM IR has some of reqntid* specified, then output
429 // the reqntid directive, and set the unspecified ones to 1.
430 // If none of Reqntid* is specified, don't output reqntid directive.
431 const auto ReqNTID = getReqNTID(F);
432 if (!ReqNTID.empty())
433 O << formatv(".reqntid {0:$[, ]}\n",
435
436 const auto MaxNTID = getMaxNTID(F);
437 if (!MaxNTID.empty())
438 O << formatv(".maxntid {0:$[, ]}\n",
440
441 if (const auto Mincta = getMinCTASm(F))
442 O << ".minnctapersm " << *Mincta << "\n";
443
444 if (const auto Maxnreg = getMaxNReg(F))
445 O << ".maxnreg " << *Maxnreg << "\n";
446
447 // .maxclusterrank directive requires SM_90 or higher, make sure that we
448 // filter it out for lower SM versions, as it causes a hard ptxas crash.
449 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
450 const NVPTXSubtarget *STI = &NTM.getSubtarget<NVPTXSubtarget>(F);
451
452 if (STI->getSmVersion() >= 90) {
453 const auto ClusterDim = getClusterDim(F);
455
456 if (!ClusterDim.empty()) {
457
458 if (!BlocksAreClusters)
459 O << ".explicitcluster\n";
460
461 if (ClusterDim[0] != 0) {
462 assert(llvm::all_of(ClusterDim, not_equal_to(0)) &&
463 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
464 "should be non-zero as well");
465
466 O << formatv(".reqnctapercluster {0:$[, ]}\n",
468 } else {
469 assert(llvm::all_of(ClusterDim, equal_to(0)) &&
470 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
471 "should be 0 as well");
472 }
473 }
474
475 if (BlocksAreClusters) {
476 LLVMContext &Ctx = F.getContext();
477 if (ReqNTID.empty() || ClusterDim.empty())
478 Ctx.diagnose(DiagnosticInfoUnsupported(
479 F, "blocksareclusters requires reqntid and cluster_dim attributes",
480 F.getSubprogram()));
481 else if (STI->getPTXVersion() < 90)
482 Ctx.diagnose(DiagnosticInfoUnsupported(
483 F, "blocksareclusters requires PTX version >= 9.0",
484 F.getSubprogram()));
485 else
486 O << ".blocksareclusters\n";
487 }
488
489 if (const auto Maxclusterrank = getMaxClusterRank(F))
490 O << ".maxclusterrank " << *Maxclusterrank << "\n";
491 }
492}
493
494std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
495 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
496
497 std::string Name;
498 raw_string_ostream NameStr(Name);
499
500 VRegRCMap::const_iterator I = VRegMapping.find(RC);
501 assert(I != VRegMapping.end() && "Bad register class");
502 const DenseMap<unsigned, unsigned> &RegMap = I->second;
503
504 VRegMap::const_iterator VI = RegMap.find(Reg);
505 assert(VI != RegMap.end() && "Bad virtual register");
506 unsigned MappedVR = VI->second;
507
508 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
509
510 return Name;
511}
512
513void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
514 raw_ostream &O) {
515 O << getVirtualRegisterName(vr);
516}
517
518void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
519 raw_ostream &O) {
521 if (!F || isKernelFunction(*F) || F->isDeclaration())
523 "NVPTX aliasee must be a non-kernel function definition");
524
525 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
527 report_fatal_error("NVPTX aliasee must not be '.weak'");
528
529 emitDeclarationWithName(F, getSymbol(GA), O);
530}
531
532void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
533 emitDeclarationWithName(F, getSymbol(F), O);
534}
535
536void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
537 raw_ostream &O) {
538 emitLinkageDirective(F, O);
539 if (isKernelFunction(*F))
540 O << ".entry ";
541 else
542 O << ".func ";
543 printReturnValStr(F, O);
544 S->print(O, MAI);
545 O << "\n";
546 emitFunctionParamList(F, O);
547 O << "\n";
549 O << ".noreturn";
550 O << ";\n";
551}
552
553static bool usedInGlobalVarDef(const Constant *C) {
554 if (!C)
555 return false;
556
558 return GV->getName() != "llvm.used";
559
560 for (const User *U : C->users())
561 if (const Constant *C = dyn_cast<Constant>(U))
563 return true;
564
565 return false;
566}
567
568static bool usedInOneFunc(const User *U, Function const *&OneFunc) {
569 if (const GlobalVariable *OtherGV = dyn_cast<GlobalVariable>(U))
570 if (OtherGV->getName() == "llvm.used")
571 return true;
572
573 if (const Instruction *I = dyn_cast<Instruction>(U)) {
574 if (const Function *CurFunc = I->getFunction()) {
575 if (OneFunc && (CurFunc != OneFunc))
576 return false;
577 OneFunc = CurFunc;
578 return true;
579 }
580 return false;
581 }
582
583 for (const User *UU : U->users())
584 if (!usedInOneFunc(UU, OneFunc))
585 return false;
586
587 return true;
588}
589
590/* Find out if a global variable can be demoted to local scope.
591 * Currently, this is valid for CUDA shared variables, which have local
592 * scope and global lifetime. So the conditions to check are :
593 * 1. Is the global variable in shared address space?
594 * 2. Does it have local linkage?
595 * 3. Is the global variable referenced only in one function?
596 */
597static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f) {
598 if (!GV->hasLocalLinkage())
599 return false;
601 return false;
602
603 const Function *oneFunc = nullptr;
604
605 bool flag = usedInOneFunc(GV, oneFunc);
606 if (!flag)
607 return false;
608 if (!oneFunc)
609 return false;
610 f = oneFunc;
611 return true;
612}
613
614static bool useFuncSeen(const Constant *C,
615 const SmallPtrSetImpl<const Function *> &SeenSet) {
616 for (const User *U : C->users()) {
617 if (const Constant *cu = dyn_cast<Constant>(U)) {
618 if (useFuncSeen(cu, SeenSet))
619 return true;
620 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
621 if (const Function *Caller = I->getFunction())
622 if (SeenSet.contains(Caller))
623 return true;
624 }
625 }
626 return false;
627}
628
629void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
630 SmallPtrSet<const Function *, 32> SeenSet;
631 for (const Function &F : M) {
632 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
633 emitDeclaration(&F, O);
634 continue;
635 }
636
637 if (F.isDeclaration()) {
638 if (F.use_empty())
639 continue;
640 if (F.getIntrinsicID())
641 continue;
642 emitDeclaration(&F, O);
643 continue;
644 }
645 for (const User *U : F.users()) {
646 if (const Constant *C = dyn_cast<Constant>(U)) {
647 if (usedInGlobalVarDef(C)) {
648 // The use is in the initialization of a global variable
649 // that is a function pointer, so print a declaration
650 // for the original function
651 emitDeclaration(&F, O);
652 break;
653 }
654 // Emit a declaration of this function if the function that
655 // uses this constant expr has already been seen.
656 if (useFuncSeen(C, SeenSet)) {
657 emitDeclaration(&F, O);
658 break;
659 }
660 }
661
662 if (!isa<Instruction>(U))
663 continue;
664 const Function *Caller = cast<Instruction>(U)->getFunction();
665 if (!Caller)
666 continue;
667
668 // If a caller has already been seen, then the caller is
669 // appearing in the module before the callee. so print out
670 // a declaration for the callee.
671 if (SeenSet.contains(Caller)) {
672 emitDeclaration(&F, O);
673 break;
674 }
675 }
676 SeenSet.insert(&F);
677 }
678 for (const GlobalAlias &GA : M.aliases())
679 emitAliasDeclaration(&GA, O);
680}
681
682void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
683 // Construct a default subtarget off of the TargetMachine defaults. The
684 // rest of NVPTX isn't friendly to change subtargets per function and
685 // so the default TargetMachine will have all of the options.
686 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
687 const NVPTXSubtarget *STI = NTM.getSubtargetImpl();
688
689 // Emit header before any dwarf directives are emitted below.
690 emitHeader(M, *STI);
691}
692
693/// Create NVPTX-specific DwarfDebug handler.
697
699 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
700 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
701 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
702 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
703
704 // We need to call the parent's one explicitly.
705 bool Result = AsmPrinter::doInitialization(M);
706
707 GlobalsEmitted = false;
708
709 return Result;
710}
711
712void NVPTXAsmPrinter::emitGlobals(const Module &M) {
713 SmallString<128> Str2;
714 raw_svector_ostream OS2(Str2);
715
716 emitDeclarations(M, OS2);
717
718 // As ptxas does not support forward references of globals, we need to first
719 // sort the list of module-level globals in def-use order. We visit each
720 // global variable in order, and ensure that we emit it *after* its dependent
721 // globals. We use a little extra memory maintaining both a set and a list to
722 // have fast searches while maintaining a strict ordering.
726
727 // Visit each global variable, in order
728 for (const GlobalVariable &I : M.globals())
729 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
730
731 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
732 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
733
734 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
735 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
736
737 // Print out module-level global variables in proper order
738 for (const GlobalVariable *GV : Globals)
739 printModuleLevelGV(GV, OS2, /*ProcessDemoted=*/false, STI);
740
741 OS2 << '\n';
742
743 OutStreamer->emitRawText(OS2.str());
744}
745
746void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
748 raw_svector_ostream OS(Str);
749
750 MCSymbol *Name = getSymbol(&GA);
751
752 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
753 << ";\n";
754
755 OutStreamer->emitRawText(OS.str());
756}
757
758NVPTXTargetStreamer *NVPTXAsmPrinter::getTargetStreamer() const {
759 return static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
760}
761
762static bool hasFullDebugInfo(Module &M) {
763 for (DICompileUnit *CU : M.debug_compile_units()) {
764 switch(CU->getEmissionKind()) {
767 break;
770 return true;
771 }
772 }
773
774 return false;
775}
776
777void NVPTXAsmPrinter::emitHeader(Module &M, const NVPTXSubtarget &STI) {
778 auto *TS = getTargetStreamer();
779
780 TS->emitBanner();
781
782 const unsigned PTXVersion = STI.getPTXVersion();
783 TS->emitVersionDirective(PTXVersion);
784
785 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
786 bool TexModeIndependent = NTM.getDrvInterface() == NVPTX::NVCL;
787
788 TS->emitTargetDirective(STI.getTargetName(), TexModeIndependent,
790 TS->emitAddressSizeDirective(M.getDataLayout().getPointerSizeInBits());
791}
792
794 // If we did not emit any functions, then the global declarations have not
795 // yet been emitted.
796 if (!GlobalsEmitted) {
797 emitGlobals(M);
798 GlobalsEmitted = true;
799 }
800
801 // call doFinalization
802 bool ret = AsmPrinter::doFinalization(M);
803
805
806 auto *TS =
807 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
808 // Close the last emitted section
809 if (hasDebugInfo()) {
810 TS->closeLastSection();
811 // Emit empty .debug_macinfo section for better support of the empty files.
812 OutStreamer->emitRawText("\t.section\t.debug_macinfo\t{\t}");
813 }
814
815 // Output last DWARF .file directives, if any.
817
818 return ret;
819}
820
821// This function emits appropriate linkage directives for
822// functions and global variables.
823//
824// extern function declaration -> .extern
825// extern function definition -> .visible
826// external global variable with init -> .visible
827// external without init -> .extern
828// appending -> not allowed, assert.
829// for any linkage other than
830// internal, private, linker_private,
831// linker_private_weak, linker_private_weak_def_auto,
832// we emit -> .weak.
833
834void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
835 raw_ostream &O) {
836 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
837 if (V->hasExternalLinkage()) {
838 if (const auto *GVar = dyn_cast<GlobalVariable>(V))
839 O << (GVar->hasInitializer() ? ".visible " : ".extern ");
840 else if (V->isDeclaration())
841 O << ".extern ";
842 else
843 O << ".visible ";
844 } else if (V->hasAppendingLinkage()) {
845 report_fatal_error("Symbol '" + (V->hasName() ? V->getName() : "") +
846 "' has unsupported appending linkage type");
847 } else if (!V->hasInternalLinkage() && !V->hasPrivateLinkage()) {
848 O << ".weak ";
849 }
850 }
851}
852
853void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
854 raw_ostream &O, bool ProcessDemoted,
855 const NVPTXSubtarget &STI) {
856 // Skip meta data
857 if (GVar->hasSection())
858 if (GVar->getSection() == "llvm.metadata")
859 return;
860
861 // Skip LLVM intrinsic global variables
862 if (GVar->getName().starts_with("llvm.") ||
863 GVar->getName().starts_with("nvvm."))
864 return;
865
866 const DataLayout &DL = getDataLayout();
867
868 // GlobalVariables are always constant pointers themselves.
869 Type *ETy = GVar->getValueType();
870
871 if (GVar->hasExternalLinkage()) {
872 if (GVar->hasInitializer())
873 O << ".visible ";
874 else
875 O << ".extern ";
876 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
878 O << ".common ";
879 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
881 GVar->hasCommonLinkage()) {
882 O << ".weak ";
883 }
884
885 const PTXOpaqueType OpaqueType = getPTXOpaqueType(*GVar);
886
887 if (OpaqueType == PTXOpaqueType::Texture) {
888 O << ".global .texref " << getTextureName(*GVar) << ";\n";
889 return;
890 }
891
892 if (OpaqueType == PTXOpaqueType::Surface) {
893 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
894 return;
895 }
896
897 if (GVar->isDeclaration()) {
898 // (extern) declarations, no definition or initializer
899 // Currently the only known declaration is for an automatic __local
900 // (.shared) promoted to global.
901 emitPTXGlobalVariable(GVar, O, STI);
902 O << ";\n";
903 return;
904 }
905
906 if (OpaqueType == PTXOpaqueType::Sampler) {
907 O << ".global .samplerref " << getSamplerName(*GVar);
908
909 const Constant *Initializer = nullptr;
910 if (GVar->hasInitializer())
911 Initializer = GVar->getInitializer();
912 const ConstantInt *CI = nullptr;
913 if (Initializer)
914 CI = dyn_cast<ConstantInt>(Initializer);
915 if (CI) {
916 unsigned sample = CI->getZExtValue();
917
918 O << " = { ";
919
920 for (int i = 0,
921 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
922 i < 3; i++) {
923 O << "addr_mode_" << i << " = ";
924 switch (addr) {
925 case 0:
926 O << "wrap";
927 break;
928 case 1:
929 O << "clamp_to_border";
930 break;
931 case 2:
932 O << "clamp_to_edge";
933 break;
934 case 3:
935 O << "wrap";
936 break;
937 case 4:
938 O << "mirror";
939 break;
940 }
941 O << ", ";
942 }
943 O << "filter_mode = ";
944 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
945 case 0:
946 O << "nearest";
947 break;
948 case 1:
949 O << "linear";
950 break;
951 case 2:
952 llvm_unreachable("Anisotropic filtering is not supported");
953 default:
954 O << "nearest";
955 break;
956 }
957 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
958 O << ", force_unnormalized_coords = 1";
959 }
960 O << " }";
961 }
962
963 O << ";\n";
964 return;
965 }
966
967 if (GVar->hasPrivateLinkage()) {
968 if (GVar->getName().starts_with("unrollpragma"))
969 return;
970
971 // FIXME - need better way (e.g. Metadata) to avoid generating this global
972 if (GVar->getName().starts_with("filename"))
973 return;
974 if (GVar->use_empty())
975 return;
976 }
977
978 const Function *DemotedFunc = nullptr;
979 if (!ProcessDemoted && canDemoteGlobalVar(GVar, DemotedFunc)) {
980 O << "// " << GVar->getName() << " has been demoted\n";
981 localDecls[DemotedFunc].push_back(GVar);
982 return;
983 }
984
985 O << ".";
986 emitPTXAddressSpace(GVar->getAddressSpace(), O);
987
988 if (isManaged(*GVar)) {
989 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
991 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
992 O << " .attribute(.managed)";
993 }
994
995 O << " .align "
996 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
997
998 if (ETy->isPointerTy() || ((ETy->isIntegerTy() || ETy->isFloatingPointTy()) &&
999 ETy->getScalarSizeInBits() <= 64)) {
1000 O << " .";
1001 // Special case: ABI requires that we use .u8 for predicates
1002 if (ETy->isIntegerTy(1))
1003 O << "u8";
1004 else
1005 O << getPTXFundamentalTypeStr(ETy, false);
1006 O << " ";
1007 getSymbol(GVar)->print(O, MAI);
1008
1009 // Ptx allows variable initilization only for constant and global state
1010 // spaces.
1011 if (GVar->hasInitializer()) {
1012 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1013 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1014 const Constant *Initializer = GVar->getInitializer();
1015 // 'undef' is treated as there is no value specified.
1016 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1017 O << " = ";
1018 printScalarConstant(Initializer, O);
1019 }
1020 } else {
1021 // The frontend adds zero-initializer to device and constant variables
1022 // that don't have an initial value, and UndefValue to shared
1023 // variables, so skip warning for this case.
1024 if (!GVar->getInitializer()->isNullValue() &&
1025 !isa<UndefValue>(GVar->getInitializer())) {
1026 report_fatal_error("initial value of '" + GVar->getName() +
1027 "' is not allowed in addrspace(" +
1028 Twine(GVar->getAddressSpace()) + ")");
1029 }
1030 }
1031 }
1032 } else {
1033 // Although PTX has direct support for struct type and array type and
1034 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1035 // targets that support these high level field accesses. Structs, arrays
1036 // and vectors are lowered into arrays of bytes.
1037 switch (ETy->getTypeID()) {
1038 case Type::IntegerTyID: // Integers larger than 64 bits
1039 case Type::FP128TyID:
1040 case Type::StructTyID:
1041 case Type::ArrayTyID:
1042 case Type::FixedVectorTyID: {
1043 const uint64_t ElementSize = DL.getTypeStoreSize(ETy);
1044 // Ptx allows variable initilization only for constant and
1045 // global state spaces.
1046 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1047 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1048 GVar->hasInitializer()) {
1049 const Constant *Initializer = GVar->getInitializer();
1050 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1051 AggBuffer aggBuffer(ElementSize, *this);
1052 bufferAggregateConstant(Initializer, &aggBuffer);
1053 if (aggBuffer.numSymbols()) {
1054 const unsigned int ptrSize = MAI->getCodePointerSize();
1055 if (ElementSize % ptrSize ||
1056 !aggBuffer.allSymbolsAligned(ptrSize)) {
1057 // Print in bytes and use the mask() operator for pointers.
1058 if (!STI.hasMaskOperator())
1060 "initialized packed aggregate with pointers '" +
1061 GVar->getName() +
1062 "' requires at least PTX ISA version 7.1");
1063 O << " .u8 ";
1064 getSymbol(GVar)->print(O, MAI);
1065 O << "[" << ElementSize << "] = {";
1066 aggBuffer.printBytes(O);
1067 O << "}";
1068 } else {
1069 O << " .u" << ptrSize * 8 << " ";
1070 getSymbol(GVar)->print(O, MAI);
1071 O << "[" << ElementSize / ptrSize << "] = {";
1072 aggBuffer.printWords(O);
1073 O << "}";
1074 }
1075 } else {
1076 O << " .b8 ";
1077 getSymbol(GVar)->print(O, MAI);
1078 O << "[" << ElementSize << "] = {";
1079 aggBuffer.printBytes(O);
1080 O << "}";
1081 }
1082 } else {
1083 O << " .b8 ";
1084 getSymbol(GVar)->print(O, MAI);
1085 if (ElementSize)
1086 O << "[" << ElementSize << "]";
1087 }
1088 } else {
1089 O << " .b8 ";
1090 getSymbol(GVar)->print(O, MAI);
1091 if (ElementSize)
1092 O << "[" << ElementSize << "]";
1093 }
1094 break;
1095 }
1096 default:
1097 llvm_unreachable("type not supported yet");
1098 }
1099 }
1100 O << ";\n";
1101}
1102
1103void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1104 const Value *v = Symbols[nSym];
1105 const Value *v0 = SymbolsBeforeStripping[nSym];
1106 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1107 MCSymbol *Name = AP.getSymbol(GVar);
1109 // Is v0 a generic pointer?
1110 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1111 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1112 os << "generic(";
1113 Name->print(os, AP.MAI);
1114 os << ")";
1115 } else {
1116 Name->print(os, AP.MAI);
1117 }
1118 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1119 const MCExpr *Expr = AP.lowerConstantForGV(CExpr, false);
1120 AP.printMCExpr(*Expr, os);
1121 } else
1122 llvm_unreachable("symbol type unknown");
1123}
1124
1125void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1126 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1127 // Do not emit trailing zero initializers. They will be zero-initialized by
1128 // ptxas. This saves on both space requirements for the generated PTX and on
1129 // memory use by ptxas. (See:
1130 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1131 unsigned int InitializerCount = Size;
1132 // TODO: symbols make this harder, but it would still be good to trim trailing
1133 // 0s for aggs with symbols as well.
1134 if (numSymbols() == 0)
1135 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1136 InitializerCount--;
1137
1138 symbolPosInBuffer.push_back(InitializerCount);
1139 unsigned int nSym = 0;
1140 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1141 for (unsigned int pos = 0; pos < InitializerCount;) {
1142 if (pos)
1143 os << ", ";
1144 if (pos != nextSymbolPos) {
1145 os << (unsigned int)buffer[pos];
1146 ++pos;
1147 continue;
1148 }
1149 // Generate a per-byte mask() operator for the symbol, which looks like:
1150 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1151 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1152 std::string symText;
1153 llvm::raw_string_ostream oss(symText);
1154 printSymbol(nSym, oss);
1155 for (unsigned i = 0; i < ptrSize; ++i) {
1156 if (i)
1157 os << ", ";
1158 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1159 os << "(" << symText << ")";
1160 }
1161 pos += ptrSize;
1162 nextSymbolPos = symbolPosInBuffer[++nSym];
1163 assert(nextSymbolPos >= pos);
1164 }
1165}
1166
1167void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1168 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1169 symbolPosInBuffer.push_back(Size);
1170 unsigned int nSym = 0;
1171 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1172 assert(nextSymbolPos % ptrSize == 0);
1173 for (unsigned int pos = 0; pos < Size; pos += ptrSize) {
1174 if (pos)
1175 os << ", ";
1176 if (pos == nextSymbolPos) {
1177 printSymbol(nSym, os);
1178 nextSymbolPos = symbolPosInBuffer[++nSym];
1179 assert(nextSymbolPos % ptrSize == 0);
1180 assert(nextSymbolPos >= pos + ptrSize);
1181 } else if (ptrSize == 4)
1182 os << support::endian::read32le(&buffer[pos]);
1183 else
1184 os << support::endian::read64le(&buffer[pos]);
1185 }
1186}
1187
1188void NVPTXAsmPrinter::emitDemotedVars(const Function *F, raw_ostream &O) {
1189 auto It = localDecls.find(F);
1190 if (It == localDecls.end())
1191 return;
1192
1193 ArrayRef<const GlobalVariable *> GVars = It->second;
1194
1195 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1196 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
1197
1198 for (const GlobalVariable *GV : GVars) {
1199 O << "\t// demoted variable\n\t";
1200 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1201 }
1202}
1203
1204void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1205 raw_ostream &O) const {
1206 switch (AddressSpace) {
1208 O << "local";
1209 break;
1211 O << "global";
1212 break;
1214 O << "const";
1215 break;
1217 O << "shared";
1218 break;
1219 default:
1220 report_fatal_error("Bad address space found while emitting PTX: " +
1221 llvm::Twine(AddressSpace));
1222 break;
1223 }
1224}
1225
1226std::string
1227NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1228 switch (Ty->getTypeID()) {
1229 case Type::IntegerTyID: {
1230 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1231 if (NumBits == 1)
1232 return "pred";
1233 if (NumBits <= 64) {
1234 std::string name = "u";
1235 return name + utostr(NumBits);
1236 }
1237 llvm_unreachable("Integer too large");
1238 break;
1239 }
1240 case Type::BFloatTyID:
1241 case Type::HalfTyID:
1242 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1243 // PTX assembly.
1244 return "b16";
1245 case Type::FloatTyID:
1246 return "f32";
1247 case Type::DoubleTyID:
1248 return "f64";
1249 case Type::PointerTyID: {
1250 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1251 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1252
1253 if (PtrSize == 64)
1254 if (useB4PTR)
1255 return "b64";
1256 else
1257 return "u64";
1258 else if (useB4PTR)
1259 return "b32";
1260 else
1261 return "u32";
1262 }
1263 default:
1264 break;
1265 }
1266 llvm_unreachable("unexpected type");
1267}
1268
1269void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1270 raw_ostream &O,
1271 const NVPTXSubtarget &STI) {
1272 const DataLayout &DL = getDataLayout();
1273
1274 // GlobalVariables are always constant pointers themselves.
1275 Type *ETy = GVar->getValueType();
1276
1277 O << ".";
1278 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1279 if (isManaged(*GVar)) {
1280 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
1282 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1283
1284 O << " .attribute(.managed)";
1285 }
1286 O << " .align "
1287 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1288
1289 // Special case for i128/fp128
1290 if (ETy->getScalarSizeInBits() == 128) {
1291 O << " .b8 ";
1292 getSymbol(GVar)->print(O, MAI);
1293 O << "[16]";
1294 return;
1295 }
1296
1297 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1298 O << " ." << getPTXFundamentalTypeStr(ETy) << " ";
1299 getSymbol(GVar)->print(O, MAI);
1300 return;
1301 }
1302
1303 int64_t ElementSize = 0;
1304
1305 // Although PTX has direct support for struct type and array type and LLVM IR
1306 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1307 // support these high level field accesses. Structs and arrays are lowered
1308 // into arrays of bytes.
1309 switch (ETy->getTypeID()) {
1310 case Type::StructTyID:
1311 case Type::ArrayTyID:
1313 ElementSize = DL.getTypeStoreSize(ETy);
1314 O << " .b8 ";
1315 getSymbol(GVar)->print(O, MAI);
1316 O << "[";
1317 if (ElementSize) {
1318 O << ElementSize;
1319 }
1320 O << "]";
1321 break;
1322 default:
1323 llvm_unreachable("type not supported yet");
1324 }
1325}
1326
1327void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1328 const DataLayout &DL = getDataLayout();
1329 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1330 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1331 const NVPTXMachineFunctionInfo *MFI =
1332 MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1333
1334 bool IsFirst = true;
1335 const bool IsKernelFunc = isKernelFunction(*F);
1336
1337 if (F->arg_empty() && !F->isVarArg()) {
1338 O << "()";
1339 return;
1340 }
1341
1342 O << "(\n";
1343
1344 for (const Argument &Arg : F->args()) {
1345 Type *Ty = Arg.getType();
1346 const std::string ParamSym = TLI->getParamName(F, Arg.getArgNo());
1347
1348 if (!IsFirst)
1349 O << ",\n";
1350
1351 IsFirst = false;
1352
1353 // Handle image/sampler parameters
1354 if (IsKernelFunc) {
1355 const PTXOpaqueType ArgOpaqueType = getPTXOpaqueType(Arg);
1356 if (ArgOpaqueType != PTXOpaqueType::None) {
1357 const bool EmitImgPtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
1358 O << "\t.param ";
1359 if (EmitImgPtr)
1360 O << ".u64 .ptr ";
1361
1362 switch (ArgOpaqueType) {
1364 O << ".samplerref ";
1365 break;
1367 O << ".texref ";
1368 break;
1370 O << ".surfref ";
1371 break;
1373 llvm_unreachable("handled above");
1374 }
1375 O << ParamSym;
1376 continue;
1377 }
1378 }
1379
1380 auto GetOptimalAlignForParam = [&DL, F, &Arg](Type *Ty) -> Align {
1381 if (MaybeAlign StackAlign =
1382 getAlign(*F, Arg.getArgNo() + AttributeList::FirstArgIndex))
1383 return StackAlign.value();
1384
1385 Align TypeAlign = getFunctionParamOptimizedAlign(F, Ty, DL);
1386 MaybeAlign ParamAlign =
1387 Arg.hasByValAttr() ? Arg.getParamAlign() : MaybeAlign();
1388 return std::max(TypeAlign, ParamAlign.valueOrOne());
1389 };
1390
1391 if (Arg.hasByValAttr()) {
1392 // param has byVal attribute.
1393 Type *ETy = Arg.getParamByValType();
1394 assert(ETy && "Param should have byval type");
1395
1396 // Print .param .align <a> .b8 .param[size];
1397 // <a> = optimal alignment for the element type; always multiple of
1398 // PAL.getParamAlignment
1399 // size = typeallocsize of element type
1400 const Align OptimalAlign =
1401 IsKernelFunc ? GetOptimalAlignForParam(ETy)
1403 F, ETy, Arg.getParamAlign().valueOrOne(), DL);
1404
1405 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1406 << "[" << DL.getTypeAllocSize(ETy) << "]";
1407 continue;
1408 }
1409
1410 if (shouldPassAsArray(Ty)) {
1411 // Just print .param .align <a> .b8 .param[size];
1412 // <a> = optimal alignment for the element type; always multiple of
1413 // PAL.getParamAlignment
1414 // size = typeallocsize of element type
1415 Align OptimalAlign = GetOptimalAlignForParam(Ty);
1416
1417 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1418 << "[" << DL.getTypeAllocSize(Ty) << "]";
1419
1420 continue;
1421 }
1422 // Just a scalar
1423 auto *PTy = dyn_cast<PointerType>(Ty);
1424 unsigned PTySizeInBits = 0;
1425 if (PTy) {
1426 PTySizeInBits =
1427 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1428 assert(PTySizeInBits && "Invalid pointer size");
1429 }
1430
1431 if (IsKernelFunc) {
1432 if (PTy) {
1433 O << "\t.param .u" << PTySizeInBits << " .ptr";
1434
1435 switch (PTy->getAddressSpace()) {
1436 default:
1437 break;
1439 O << " .global";
1440 break;
1442 O << " .shared";
1443 break;
1445 O << " .const";
1446 break;
1448 O << " .local";
1449 break;
1450 }
1451
1452 O << " .align " << Arg.getParamAlign().valueOrOne().value() << " "
1453 << ParamSym;
1454 continue;
1455 }
1456
1457 // non-pointer scalar to kernel func
1458 O << "\t.param .";
1459 // Special case: predicate operands become .u8 types
1460 if (Ty->isIntegerTy(1))
1461 O << "u8";
1462 else
1463 O << getPTXFundamentalTypeStr(Ty);
1464 O << " " << ParamSym;
1465 continue;
1466 }
1467 // Non-kernel function, just print .param .b<size> for ABI
1468 // and .reg .b<size> for non-ABI
1469 unsigned Size;
1470 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
1471 Size = promoteScalarArgumentSize(ITy->getBitWidth());
1472 } else if (PTy) {
1473 assert(PTySizeInBits && "Invalid pointer size");
1474 Size = PTySizeInBits;
1475 } else
1477 O << "\t.param .b" << Size << " " << ParamSym;
1478 }
1479
1480 if (F->isVarArg()) {
1481 if (!IsFirst)
1482 O << ",\n";
1483 O << "\t.param .align " << STI.getMaxRequiredAlignment() << " .b8 "
1484 << TLI->getParamName(F, /* vararg */ -1) << "[]";
1485 }
1486
1487 O << "\n)";
1488}
1489
1490void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1491 const MachineFunction &MF) {
1492 SmallString<128> Str;
1493 raw_svector_ostream O(Str);
1494
1495 // Map the global virtual register number to a register class specific
1496 // virtual register number starting from 1 with that class.
1497 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1498
1499 // Emit the Fake Stack Object
1500 const MachineFrameInfo &MFI = MF.getFrameInfo();
1501 int64_t NumBytes = MFI.getStackSize();
1502 if (NumBytes) {
1503 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1504 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1505 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1506 O << "\t.reg .b64 \t%SP;\n"
1507 << "\t.reg .b64 \t%SPL;\n";
1508 } else {
1509 O << "\t.reg .b32 \t%SP;\n"
1510 << "\t.reg .b32 \t%SPL;\n";
1511 }
1512 }
1513
1514 // Go through all virtual registers to establish the mapping between the
1515 // global virtual
1516 // register number and the per class virtual register number.
1517 // We use the per class virtual register number in the ptx output.
1518 for (unsigned I : llvm::seq(MRI->getNumVirtRegs())) {
1520 if (MRI->use_empty(VR) && MRI->def_empty(VR))
1521 continue;
1522 auto &RCRegMap = VRegMapping[MRI->getRegClass(VR)];
1523 RCRegMap[VR] = RCRegMap.size() + 1;
1524 }
1525
1526 // Emit declaration of the virtual registers or 'physical' registers for
1527 // each register class
1528 for (const TargetRegisterClass *RC : TRI->regclasses()) {
1529 const unsigned N = VRegMapping[RC].size();
1530
1531 // Only declare those registers that may be used.
1532 if (N) {
1533 const StringRef RCName = getNVPTXRegClassName(RC);
1534 const StringRef RCStr = getNVPTXRegClassStr(RC);
1535 O << "\t.reg " << RCName << " \t" << RCStr << "<" << (N + 1) << ">;\n";
1536 }
1537 }
1538
1539 OutStreamer->emitRawText(O.str());
1540}
1541
1542/// Translate virtual register numbers in DebugInfo locations to their printed
1543/// encodings, as used by CUDA-GDB.
1544void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1545 const MachineFunction &MF) {
1546 const NVPTXSubtarget &STI = MF.getSubtarget<NVPTXSubtarget>();
1547 const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo();
1548
1549 // Clear the old mapping, and add the new one. This mapping is used after the
1550 // printing of the current function is complete, but before the next function
1551 // is printed.
1552 registerInfo->clearDebugRegisterMap();
1553
1554 for (auto &classMap : VRegMapping) {
1555 for (auto &registerMapping : classMap.getSecond()) {
1556 auto reg = registerMapping.getFirst();
1557 registerInfo->addToDebugRegisterMap(reg, getVirtualRegisterName(reg));
1558 }
1559 }
1560}
1561
1562void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp,
1563 raw_ostream &O) const {
1564 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1565 bool ignored;
1566 unsigned int numHex;
1567 const char *lead;
1568
1569 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1570 numHex = 8;
1571 lead = "0f";
1573 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1574 numHex = 16;
1575 lead = "0d";
1577 } else
1578 llvm_unreachable("unsupported fp type");
1579
1580 APInt API = APF.bitcastToAPInt();
1581 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1582}
1583
1584void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1585 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1586 O << CI->getValue();
1587 return;
1588 }
1589 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1590 printFPConstant(CFP, O);
1591 return;
1592 }
1593 if (isa<ConstantPointerNull>(CPV)) {
1594 O << "0";
1595 return;
1596 }
1597 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1598 const bool IsNonGenericPointer = GVar->getAddressSpace() != 0;
1599 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1600 O << "generic(";
1601 getSymbol(GVar)->print(O, MAI);
1602 O << ")";
1603 } else {
1604 getSymbol(GVar)->print(O, MAI);
1605 }
1606 return;
1607 }
1608 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1609 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1610 printMCExpr(*E, O);
1611 return;
1612 }
1613 llvm_unreachable("Not scalar type found in printScalarConstant()");
1614}
1615
1616void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1617 AggBuffer *AggBuffer) {
1618 const DataLayout &DL = getDataLayout();
1619 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1620 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1621 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1622 // only the space allocated by CPV.
1623 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1624 return;
1625 }
1626
1627 // Helper for filling AggBuffer with APInts.
1628 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1629 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1630 SmallVector<unsigned char, 16> Buf(NumBytes);
1631 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1632 // input's bit width, and i1 arrays may not have a length that is a multuple
1633 // of 8. We handle the last byte separately, so we never request out of
1634 // bounds bits.
1635 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1636 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1637 }
1638 size_t LastBytePosition = (NumBytes - 1) * 8;
1639 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1640 Buf[NumBytes - 1] =
1641 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1642 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1643 };
1644
1645 switch (CPV->getType()->getTypeID()) {
1646 case Type::IntegerTyID:
1647 if (const auto *CI = dyn_cast<ConstantInt>(CPV)) {
1648 AddIntToBuffer(CI->getValue());
1649 break;
1650 }
1651 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1652 if (const auto *CI =
1654 AddIntToBuffer(CI->getValue());
1655 break;
1656 }
1657 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1658 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1659 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1660 AggBuffer->addZeros(AllocSize);
1661 break;
1662 }
1663 }
1664 llvm_unreachable("unsupported integer const type");
1665 break;
1666
1667 case Type::HalfTyID:
1668 case Type::BFloatTyID:
1669 case Type::FloatTyID:
1670 case Type::DoubleTyID:
1671 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1672 break;
1673
1674 case Type::PointerTyID: {
1675 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1676 AggBuffer->addSymbol(GVar, GVar);
1677 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1678 const Value *v = Cexpr->stripPointerCasts();
1679 AggBuffer->addSymbol(v, Cexpr);
1680 }
1681 AggBuffer->addZeros(AllocSize);
1682 break;
1683 }
1684
1685 case Type::ArrayTyID:
1687 case Type::StructTyID: {
1689 bufferAggregateConstant(CPV, AggBuffer);
1690 if (Bytes > AllocSize)
1691 AggBuffer->addZeros(Bytes - AllocSize);
1692 } else if (isa<ConstantAggregateZero>(CPV))
1693 AggBuffer->addZeros(Bytes);
1694 else
1695 llvm_unreachable("Unexpected Constant type");
1696 break;
1697 }
1698
1699 default:
1700 llvm_unreachable("unsupported type");
1701 }
1702}
1703
1704void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1705 AggBuffer *aggBuffer) {
1706 const DataLayout &DL = getDataLayout();
1707
1708 auto ExtendBuffer = [](APInt Val, AggBuffer *Buffer) {
1709 for (unsigned I : llvm::seq(Val.getBitWidth() / 8))
1710 Buffer->addByte(Val.extractBitsAsZExtValue(8, I * 8));
1711 };
1712
1713 // Integer or floating point vector splats.
1715 if (auto *VTy = dyn_cast<FixedVectorType>(CPV->getType())) {
1716 for (unsigned I : llvm::seq(VTy->getNumElements()))
1717 bufferLEByte(CPV->getAggregateElement(I), 0, aggBuffer);
1718 return;
1719 }
1720 }
1721
1722 // Integers of arbitrary width
1723 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1724 assert(CI->getType()->isIntegerTy() && "Expected integer constant!");
1725 ExtendBuffer(CI->getValue(), aggBuffer);
1726 return;
1727 }
1728
1729 // f128
1730 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1731 assert(CFP->getType()->isFloatingPointTy() && "Expected fp constant!");
1732 if (CFP->getType()->isFP128Ty()) {
1733 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1734 return;
1735 }
1736 }
1737
1738 // Buffer arrays one element at a time.
1739 if (isa<ConstantArray>(CPV)) {
1740 for (const auto &Op : CPV->operands())
1741 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1742 return;
1743 }
1744
1745 // Constant vectors
1746 if (const auto *CVec = dyn_cast<ConstantVector>(CPV)) {
1747 bufferAggregateConstVec(CVec, aggBuffer);
1748 return;
1749 }
1750
1751 if (const auto *CDS = dyn_cast<ConstantDataSequential>(CPV)) {
1752 for (unsigned I : llvm::seq(CDS->getNumElements()))
1753 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(I)), 0, aggBuffer);
1754 return;
1755 }
1756
1757 if (isa<ConstantStruct>(CPV)) {
1758 if (CPV->getNumOperands()) {
1759 StructType *ST = cast<StructType>(CPV->getType());
1760 for (unsigned I : llvm::seq(CPV->getNumOperands())) {
1761 int EndOffset = (I + 1 == CPV->getNumOperands())
1762 ? DL.getStructLayout(ST)->getElementOffset(0) +
1763 DL.getTypeAllocSize(ST)
1764 : DL.getStructLayout(ST)->getElementOffset(I + 1);
1765 int Bytes = EndOffset - DL.getStructLayout(ST)->getElementOffset(I);
1766 bufferLEByte(cast<Constant>(CPV->getOperand(I)), Bytes, aggBuffer);
1767 }
1768 }
1769 return;
1770 }
1771 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1772}
1773
1774void NVPTXAsmPrinter::bufferAggregateConstVec(const ConstantVector *CV,
1775 AggBuffer *aggBuffer) {
1776 unsigned NumElems = CV->getType()->getNumElements();
1777 const unsigned BuffSize = aggBuffer->getBufferSize();
1778
1779 // Buffer one element at a time if we have allocated enough buffer space.
1780 if (BuffSize >= NumElems) {
1781 for (const auto &Op : CV->operands())
1782 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1783 return;
1784 }
1785
1786 // Sub-byte datatypes will have more elements than bytes allocated for the
1787 // buffer. Merge consecutive elements to form a full byte. We expect that 8 %
1788 // sub-byte-elem-size should be 0 and current expected usage is for i4 (for
1789 // e2m1-fp4 types).
1790 Type *ElemTy = CV->getType()->getElementType();
1791 assert(ElemTy->isIntegerTy() && "Expected integer data type.");
1792 unsigned ElemTySize = ElemTy->getPrimitiveSizeInBits();
1793 assert(ElemTySize < 8 && "Expected sub-byte data type.");
1794 assert(8 % ElemTySize == 0 && "Element type size must evenly divide a byte.");
1795 // Number of elements to merge to form a full byte.
1796 unsigned NumElemsPerByte = 8 / ElemTySize;
1797 unsigned NumCompleteBytes = NumElems / NumElemsPerByte;
1798 unsigned NumTailElems = NumElems % NumElemsPerByte;
1799
1800 // Helper lambda to constant-fold sub-vector of sub-byte type elements into
1801 // i8. Start and end indices of the sub-vector is provided, along with number
1802 // of padding zeros if required.
1803 auto ConvertSubCVtoInt8 = [this, &ElemTy](const ConstantVector *CV,
1804 unsigned Start, unsigned End,
1805 unsigned NumPaddingZeros = 0) {
1806 // Collect elements to create sub-vector.
1807 SmallVector<Constant *, 8> SubCVElems;
1808 for (unsigned I : llvm::seq(Start, End))
1809 SubCVElems.push_back(CV->getAggregateElement(I));
1810
1811 // Optionally pad with zeros.
1812 for (auto _ : llvm::seq(NumPaddingZeros))
1813 SubCVElems.push_back(ConstantInt::getNullValue(ElemTy));
1814
1815 auto SubCV = ConstantVector::get(SubCVElems);
1816 Type *Int8Ty = IntegerType::get(SubCV->getContext(), 8);
1817
1818 // Merge elements of the sub-vector using ConstantFolding.
1819 ConstantInt *MergedElem =
1821 ConstantExpr::getBitCast(const_cast<Constant *>(SubCV), Int8Ty),
1822 getDataLayout()));
1823
1824 if (!MergedElem)
1826 "Cannot lower vector global with unusual element type");
1827
1828 return MergedElem;
1829 };
1830
1831 // Iterate through elements of vector one chunk at a time and buffer that
1832 // chunk.
1833 for (unsigned ByteIdx : llvm::seq(NumCompleteBytes))
1834 bufferLEByte(ConvertSubCVtoInt8(CV, ByteIdx * NumElemsPerByte,
1835 (ByteIdx + 1) * NumElemsPerByte),
1836 0, aggBuffer);
1837
1838 // For unevenly sized vectors add tail padding zeros.
1839 if (NumTailElems > 0)
1840 bufferLEByte(ConvertSubCVtoInt8(CV, NumElems - NumTailElems, NumElems,
1841 NumElemsPerByte - NumTailElems),
1842 0, aggBuffer);
1843}
1844
1845/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1846/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1847/// expressions that are representable in PTX and create
1848/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1849const MCExpr *
1850NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV,
1851 bool ProcessingGeneric) const {
1852 MCContext &Ctx = OutContext;
1853
1854 if (CV->isNullValue() || isa<UndefValue>(CV))
1855 return MCConstantExpr::create(0, Ctx);
1856
1857 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1858 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1859
1860 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1861 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(getSymbol(GV), Ctx);
1862 if (ProcessingGeneric)
1863 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1864 return Expr;
1865 }
1866
1867 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1868 if (!CE) {
1869 llvm_unreachable("Unknown constant value to lower!");
1870 }
1871
1872 switch (CE->getOpcode()) {
1873 default:
1874 break; // Error
1875
1876 case Instruction::AddrSpaceCast: {
1877 // Strip the addrspacecast and pass along the operand
1878 PointerType *DstTy = cast<PointerType>(CE->getType());
1879 if (DstTy->getAddressSpace() == 0)
1880 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1881
1882 break; // Error
1883 }
1884
1885 case Instruction::GetElementPtr: {
1886 const DataLayout &DL = getDataLayout();
1887
1888 // Generate a symbolic expression for the byte address
1889 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1890 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1891
1892 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1893 ProcessingGeneric);
1894 if (!OffsetAI)
1895 return Base;
1896
1897 int64_t Offset = OffsetAI.getSExtValue();
1899 Ctx);
1900 }
1901
1902 case Instruction::Trunc:
1903 // We emit the value and depend on the assembler to truncate the generated
1904 // expression properly. This is important for differences between
1905 // blockaddress labels. Since the two labels are in the same function, it
1906 // is reasonable to treat their delta as a 32-bit value.
1907 [[fallthrough]];
1908 case Instruction::BitCast:
1909 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1910
1911 case Instruction::IntToPtr: {
1912 const DataLayout &DL = getDataLayout();
1913
1914 // Handle casts to pointers by changing them into casts to the appropriate
1915 // integer type. This promotes constant folding and simplifies this code.
1916 Constant *Op = CE->getOperand(0);
1917 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1918 /*IsSigned*/ false, DL);
1919 if (Op)
1920 return lowerConstantForGV(Op, ProcessingGeneric);
1921
1922 break; // Error
1923 }
1924
1925 case Instruction::PtrToInt: {
1926 const DataLayout &DL = getDataLayout();
1927
1928 // Support only foldable casts to/from pointers that can be eliminated by
1929 // changing the pointer to the appropriately sized integer type.
1930 Constant *Op = CE->getOperand(0);
1931 Type *Ty = CE->getType();
1932
1933 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
1934
1935 // We can emit the pointer value into this slot if the slot is an
1936 // integer slot equal to the size of the pointer.
1937 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
1938 return OpExpr;
1939
1940 // Otherwise the pointer is smaller than the resultant integer, mask off
1941 // the high bits so we are sure to get a proper truncation if the input is
1942 // a constant expr.
1943 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
1944 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
1945 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
1946 }
1947
1948 // The MC library also has a right-shift operator, but it isn't consistently
1949 // signed or unsigned between different targets.
1950 case Instruction::Add: {
1951 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1952 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
1953 switch (CE->getOpcode()) {
1954 default: llvm_unreachable("Unknown binary operator constant cast expr");
1955 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
1956 }
1957 }
1958 }
1959
1960 // If the code isn't optimized, there may be outstanding folding
1961 // opportunities. Attempt to fold the expression using DataLayout as a
1962 // last resort before giving up.
1964 if (C != CE)
1965 return lowerConstantForGV(C, ProcessingGeneric);
1966
1967 // Otherwise report the problem to the user.
1968 std::string S;
1969 raw_string_ostream OS(S);
1970 OS << "Unsupported expression in static initializer: ";
1971 CE->printAsOperand(OS, /*PrintType=*/false,
1972 !MF ? nullptr : MF->getFunction().getParent());
1973 report_fatal_error(Twine(OS.str()));
1974}
1975
1976void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) const {
1977 OutContext.getAsmInfo()->printExpr(OS, Expr);
1978}
1979
1980/// PrintAsmOperand - Print out an operand for an inline asm expression.
1981///
1982bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
1983 const char *ExtraCode, raw_ostream &O) {
1984 if (ExtraCode && ExtraCode[0]) {
1985 if (ExtraCode[1] != 0)
1986 return true; // Unknown modifier.
1987
1988 switch (ExtraCode[0]) {
1989 default:
1990 // See if this is a generic print operand
1991 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
1992 case 'r':
1993 break;
1994 }
1995 }
1996
1997 printOperand(MI, OpNo, O);
1998
1999 return false;
2000}
2001
2002bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2003 unsigned OpNo,
2004 const char *ExtraCode,
2005 raw_ostream &O) {
2006 if (ExtraCode && ExtraCode[0])
2007 return true; // Unknown modifier
2008
2009 O << '[';
2010 printMemOperand(MI, OpNo, O);
2011 O << ']';
2012
2013 return false;
2014}
2015
2016void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
2017 raw_ostream &O) {
2018 const MachineOperand &MO = MI->getOperand(OpNum);
2019 switch (MO.getType()) {
2021 if (MO.getReg().isPhysical()) {
2022 if (MO.getReg() == NVPTX::VRDepot)
2024 else
2026 } else {
2027 emitVirtualRegister(MO.getReg(), O);
2028 }
2029 break;
2030
2032 O << MO.getImm();
2033 break;
2034
2036 printFPConstant(MO.getFPImm(), O);
2037 break;
2038
2040 PrintSymbolOperand(MO, O);
2041 break;
2042
2044 MO.getMBB()->getSymbol()->print(O, MAI);
2045 break;
2046
2047 default:
2048 llvm_unreachable("Operand type not supported.");
2049 }
2050}
2051
2052void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
2053 raw_ostream &O, const char *Modifier) {
2054 printOperand(MI, OpNum, O);
2055
2056 if (Modifier && strcmp(Modifier, "add") == 0) {
2057 O << ", ";
2058 printOperand(MI, OpNum + 1, O);
2059 } else {
2060 if (MI->getOperand(OpNum + 1).isImm() &&
2061 MI->getOperand(OpNum + 1).getImm() == 0)
2062 return; // don't print ',0' or '+0'
2063 O << "+";
2064 printOperand(MI, OpNum + 1, O);
2065 }
2066}
2067
2068/// Returns true if \p Line begins with an alphabetic character or underscore,
2069/// indicating it is a PTX instruction that should receive a .loc directive.
2070static bool isPTXInstruction(StringRef Line) {
2071 StringRef Trimmed = Line.ltrim();
2072 return !Trimmed.empty() &&
2073 (std::isalpha(static_cast<unsigned char>(Trimmed[0])) ||
2074 Trimmed[0] == '_');
2075}
2076
2077/// Returns the DILocation for an inline asm MachineInstr if debug line info
2078/// should be emitted, or nullptr otherwise.
2080 if (!MI || !MI->getDebugLoc())
2081 return nullptr;
2082 const DISubprogram *SP = MI->getMF()->getFunction().getSubprogram();
2083 if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
2084 return nullptr;
2085 const DILocation *DL = MI->getDebugLoc();
2086 if (!DL->getFile() || !DL->getLine())
2087 return nullptr;
2088 return DL;
2089}
2090
2091namespace {
2092struct InlineAsmInliningContext {
2093 MCSymbol *FuncNameSym = nullptr;
2094 unsigned FileIA = 0;
2095 unsigned LineIA = 0;
2096 unsigned ColIA = 0;
2097
2098 bool hasInlinedAt() const { return FuncNameSym != nullptr; }
2099};
2100} // namespace
2101
2102/// Resolves the enhanced-lineinfo inlining context for an inline asm debug
2103/// location. Returns a default (empty) context if inlining info is unavailable.
2104static InlineAsmInliningContext
2106 NVPTXDwarfDebug *NVDD, MCStreamer &Streamer,
2107 unsigned CUID) {
2108 InlineAsmInliningContext Ctx;
2109 const DILocation *InlinedAt = DL->getInlinedAt();
2110 if (!InlinedAt || !InlinedAt->getFile() || !NVDD ||
2111 !NVDD->isEnhancedLineinfo(MF))
2112 return Ctx;
2113 const auto *SubProg = getDISubprogram(DL->getScope());
2114 if (!SubProg)
2115 return Ctx;
2116 Ctx.FuncNameSym = NVDD->getOrCreateFuncNameSymbol(SubProg->getLinkageName());
2117 Ctx.FileIA = Streamer.emitDwarfFileDirective(
2118 0, InlinedAt->getFile()->getDirectory(),
2119 InlinedAt->getFile()->getFilename(), std::nullopt, std::nullopt, CUID);
2120 Ctx.LineIA = InlinedAt->getLine();
2121 Ctx.ColIA = InlinedAt->getColumn();
2122 return Ctx;
2123}
2124
2125void NVPTXAsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
2126 const MCTargetOptions &MCOptions,
2127 const MDNode *LocMDNode,
2128 InlineAsm::AsmDialect Dialect,
2129 const MachineInstr *MI) {
2130 assert(!Str.empty() && "Can't emit empty inline asm block");
2131 if (Str.back() == 0)
2132 Str = Str.substr(0, Str.size() - 1);
2133
2134 auto emitAsmStr = [&](StringRef AsmStr) {
2136 OutStreamer->emitRawText(AsmStr);
2137 emitInlineAsmEnd(STI, nullptr, MI);
2138 };
2139
2140 const DILocation *DL = getInlineAsmDebugLoc(MI);
2141 if (!DL) {
2142 emitAsmStr(Str);
2143 return;
2144 }
2145
2146 const DIFile *File = DL->getFile();
2147 unsigned Line = DL->getLine();
2148 const unsigned Column = DL->getColumn();
2149 const unsigned CUID = OutStreamer->getContext().getDwarfCompileUnitID();
2150 const unsigned FileNumber = OutStreamer->emitDwarfFileDirective(
2151 0, File->getDirectory(), File->getFilename(), std::nullopt, std::nullopt,
2152 CUID);
2153
2154 auto *NVDD = static_cast<NVPTXDwarfDebug *>(getDwarfDebug());
2155 InlineAsmInliningContext InlineCtx =
2156 getInlineAsmInliningContext(DL, *MI->getMF(), NVDD, *OutStreamer, CUID);
2157
2158 SmallVector<StringRef, 16> Lines;
2159 Str.split(Lines, '\n');
2161 for (const StringRef &L : Lines) {
2162 StringRef RTrimmed = L.rtrim('\r');
2163 if (isPTXInstruction(L)) {
2164 if (InlineCtx.hasInlinedAt()) {
2165 OutStreamer->emitDwarfLocDirectiveWithInlinedAt(
2166 FileNumber, Line, Column, InlineCtx.FileIA, InlineCtx.LineIA,
2167 InlineCtx.ColIA, InlineCtx.FuncNameSym, DWARF2_FLAG_IS_STMT, 0, 0,
2168 File->getFilename());
2169 } else {
2170 OutStreamer->emitDwarfLocDirective(FileNumber, Line, Column,
2171 DWARF2_FLAG_IS_STMT, 0, 0,
2172 File->getFilename());
2173 }
2174 }
2175 OutStreamer->emitRawText(RTrimmed);
2176 ++Line;
2177 }
2178 emitInlineAsmEnd(STI, nullptr, MI);
2179}
2180
2181char NVPTXAsmPrinter::ID = 0;
2182
2183INITIALIZE_PASS(NVPTXAsmPrinter, "nvptx-asm-printer", "NVPTX Assembly Printer",
2184 false, false)
2185
2186// Force static initialization.
2187extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
2188LLVMInitializeNVPTXAsmPrinter() {
2191}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define DWARF2_FLAG_IS_STMT
Definition MCDwarf.h:119
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static StringRef getTextureName(const Value &V)
static const DILocation * getInlineAsmDebugLoc(const MachineInstr *MI)
Returns the DILocation for an inline asm MachineInstr if debug line info should be emitted,...
#define DEPOTNAME
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool hasFullDebugInfo(Module &M)
static StringRef getSurfaceName(const Value &V)
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static StringRef getSamplerName(const Value &V)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static InlineAsmInliningContext getInlineAsmInliningContext(const DILocation *DL, const MachineFunction &MF, NVPTXDwarfDebug *NVDD, MCStreamer &Streamer, unsigned CUID)
Resolves the enhanced-lineinfo inlining context for an inline asm debug location.
static bool isPTXInstruction(StringRef Line)
Returns true if Line begins with an alphabetic character or underscore, indicating it is a PTX instru...
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static const char * name
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5890
APInt bitcastToAPInt() const
Definition APFloat.h:1408
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition APInt.cpp:520
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
MCSymbol * getSymbol(const GlobalValue *GV) const
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
DwarfDebug * getDwarfDebug()
Definition AsmPrinter.h:290
virtual void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo, const MachineInstr *MI)
Let the target do anything it needs to do after emitting inlineasm.
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:94
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition AsmPrinter.h:97
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:109
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition AsmPrinter.h:518
virtual void emitFunctionBodyStart()
Targets can override this to emit stuff before the first basic block in the function.
Definition AsmPrinter.h:625
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition AsmPrinter.h:128
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:101
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition AsmPrinter.h:456
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:106
virtual void emitFunctionBodyEnd()
Targets can override this to emit stuff after the last basic block in the function.
Definition AsmPrinter.h:629
const DataLayout & getDataLayout() const
Return information about data layout.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
virtual void emitInlineAsmStart() const
Let the target do anything it needs to do before emitting inlineasm.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
const APFloat & getValueAPF() const
Definition Constants.h:463
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
FixedVectorType * getType() const
Specialize the getType() method to always return a FixedVectorType, which reduces the amount of casti...
Definition Constants.h:683
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
Subprogram description. Uses SubclassData1.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Collects and handles dwarf debug information.
Definition DwarfDebug.h:352
unsigned getNumElements() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:659
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
bool hasLinkOnceLinkage() const
bool hasExternalLinkage() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
unsigned getAddressSpace() const
PointerType * getType() const
Global values are always pointers.
bool hasWeakLinkage() const
bool hasCommonLinkage() const
bool hasAvailableExternallyLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:348
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
void addOperand(const MCOperand Op)
Definition MCInst.h:215
void setOpcode(unsigned Op)
Definition MCInst.h:201
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
Streaming machine code generation interface.
Definition MCStreamer.h:222
unsigned emitDwarfFileDirective(unsigned FileNo, StringRef Directory, StringRef Filename, std::optional< MD5::MD5Result > Checksum=std::nullopt, std::optional< StringRef > Source=std::nullopt, unsigned CUID=0)
Associate a filename with a specified logical file number.
Definition MCStreamer.h:879
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition MCSymbol.cpp:59
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
DwarfDebug * createDwarfDebug() override
Create NVPTX-specific DwarfDebug handler.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
NVPTX-specific DwarfDebug implementation.
bool isEnhancedLineinfo(const MachineFunction &MF) const
Returns true if the enhanced lineinfo mode (with inlined_at) is active for the given MachineFunction.
MCSymbol * getOrCreateFuncNameSymbol(StringRef LinkageName)
Get or create an MCSymbol in .debug_str for a function's linkage name.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void outputDwarfFileDirectives()
Outputs the list of the DWARF '.file' directives to the streamer.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
iterator begin() const
Definition StringRef.h:113
StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
Definition StringRef.h:820
iterator end() const
Definition StringRef.h:115
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition Type.h:76
@ HalfTyID
16-bit floating point type
Definition Type.h:57
@ VoidTyID
type with no size
Definition Type.h:64
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ StructTyID
Structures.
Definition Type.h:75
@ IntegerTyID
Arbitrary bit width integers.
Definition Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition Type.h:77
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ PointerTyID
Pointers.
Definition Type.h:74
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:272
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:138
op_range operands()
Definition User.h:267
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type size() const
Definition DenseSet.h:87
bool erase(const ValueT &V)
Definition DenseSet.h:100
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
constexpr StringLiteral BlocksAreClusters("nvvm.blocksareclusters")
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
uint64_t read64le(const void *P)
Definition Endian.h:435
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
Definition STLExtras.h:2180
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MaybeAlign getAlign(const CallInst &I, unsigned Index)
std::optional< unsigned > getMaxNReg(const Function &F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::string utostr(uint64_t X, bool isNeg=false)
std::optional< unsigned > getMinCTASm(const Function &F)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition Format.h:204
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::optional< unsigned > getMaxClusterRank(const Function &F)
Align getFunctionByValParamAlign(const Function *F, Type *ArgTy, Align InitialAlign, const DataLayout &DL)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx, const DataLayout &DL)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
PTXOpaqueType getPTXOpaqueType(const GlobalVariable &GV)
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
LLVM_ABI DISubprogram * getDISubprogram(const MDNode *Scope)
Find subprogram that is enclosing this scope.
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, const DataLayout &DL)
Since function arguments are passed via .param space, we may want to increase their alignment in a wa...
Target & getTheNVPTXTarget32()
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...