LLVM 23.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXDwarfDebug.h"
21#include "NVPTXMCExpr.h"
23#include "NVPTXRegisterInfo.h"
24#include "NVPTXSubtarget.h"
25#include "NVPTXTargetMachine.h"
26#include "NVPTXUtilities.h"
27#include "NVVMProperties.h"
29#include "cl_common_defines.h"
30#include "llvm/ADT/APFloat.h"
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/ArrayRef.h"
33#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/DenseSet.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/Twine.h"
54#include "llvm/IR/Argument.h"
55#include "llvm/IR/Attributes.h"
56#include "llvm/IR/BasicBlock.h"
57#include "llvm/IR/Constant.h"
58#include "llvm/IR/Constants.h"
59#include "llvm/IR/DataLayout.h"
60#include "llvm/IR/DebugInfo.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalAlias.h"
66#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/Instruction.h"
69#include "llvm/IR/LLVMContext.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Operator.h"
72#include "llvm/IR/Type.h"
73#include "llvm/IR/User.h"
74#include "llvm/MC/MCExpr.h"
75#include "llvm/MC/MCInst.h"
76#include "llvm/MC/MCInstrDesc.h"
77#include "llvm/MC/MCStreamer.h"
78#include "llvm/MC/MCSymbol.h"
83#include "llvm/Support/Endian.h"
90#include <cassert>
91#include <cstdint>
92#include <cstring>
93#include <string>
94
95using namespace llvm;
96
97#define DEPOTNAME "__local_depot"
98
99static StringRef getTextureName(const Value &V) {
100 assert(V.hasName() && "Found texture variable with no name");
101 return V.getName();
102}
103
105 assert(V.hasName() && "Found surface variable with no name");
106 return V.getName();
107}
108
110 assert(V.hasName() && "Found sampler variable with no name");
111 return V.getName();
112}
113
114/// discoverDependentGlobals - Return a set of GlobalVariables on which \p V
115/// depends.
116static void
119 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
120 Globals.insert(GV);
121 return;
122 }
123
124 if (const User *U = dyn_cast<User>(V))
125 for (const auto &O : U->operands())
126 discoverDependentGlobals(O, Globals);
127}
128
129/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
130/// instances to be emitted, but only after any dependents have been added
131/// first.s
132static void
137 // Have we already visited this one?
138 if (Visited.count(GV))
139 return;
140
141 // Do we have a circular dependency?
142 if (!Visiting.insert(GV).second)
143 report_fatal_error("Circular dependency found in global variable set");
144
145 // Make sure we visit all dependents first
147 for (const auto &O : GV->operands())
148 discoverDependentGlobals(O, Others);
149
150 for (const GlobalVariable *GV : Others)
151 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
152
153 // Now we can visit ourself
154 Order.push_back(GV);
155 Visited.insert(GV);
156 Visiting.erase(GV);
157}
158
159void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
160 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
161 getSubtargetInfo().getFeatureBits());
162
163 MCInst Inst;
164 lowerToMCInst(MI, Inst);
166}
167
168void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
169 OutMI.setOpcode(MI->getOpcode());
170 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
171 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
172 const MachineOperand &MO = MI->getOperand(0);
173 OutMI.addOperand(GetSymbolRef(
174 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
175 return;
176 }
177
178 for (const auto MO : MI->operands())
179 OutMI.addOperand(lowerOperand(MO));
180}
181
182MCOperand NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
183 switch (MO.getType()) {
184 default:
185 llvm_unreachable("unknown operand type");
187 return MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
189 return MCOperand::createImm(MO.getImm());
194 return GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
196 return GetSymbolRef(getSymbol(MO.getGlobal()));
198 const ConstantFP *Cnt = MO.getFPImm();
199 const APFloat &Val = Cnt->getValueAPF();
200
201 switch (Cnt->getType()->getTypeID()) {
202 default:
203 report_fatal_error("Unsupported FP type");
204 break;
205 case Type::HalfTyID:
208 case Type::BFloatTyID:
211 case Type::FloatTyID:
214 case Type::DoubleTyID:
217 }
218 break;
219 }
220 }
221}
222
223unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
225 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
226
227 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
228 unsigned RegNum = RegMap[Reg];
229
230 // Encode the register class in the upper 4 bits
231 // Must be kept in sync with NVPTXInstPrinter::printRegName
232 unsigned Ret = 0;
233 if (RC == &NVPTX::B1RegClass) {
234 Ret = (1 << 28);
235 } else if (RC == &NVPTX::B16RegClass) {
236 Ret = (2 << 28);
237 } else if (RC == &NVPTX::B32RegClass) {
238 Ret = (3 << 28);
239 } else if (RC == &NVPTX::B64RegClass) {
240 Ret = (4 << 28);
241 } else if (RC == &NVPTX::B128RegClass) {
242 Ret = (7 << 28);
243 } else {
244 report_fatal_error("Bad register class");
245 }
246
247 // Insert the vreg number
248 Ret |= (RegNum & 0x0FFFFFFF);
249 return Ret;
250 } else {
251 // Some special-use registers are actually physical registers.
252 // Encode this as the register class ID of 0 and the real register ID.
253 return Reg & 0x0FFFFFFF;
254 }
255}
256
257MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
258 const MCExpr *Expr;
259 Expr = MCSymbolRefExpr::create(Symbol, OutContext);
260 return MCOperand::createExpr(Expr);
261}
262
263void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
264 const DataLayout &DL = getDataLayout();
265 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
266 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
267
268 Type *Ty = F->getReturnType();
269 if (Ty->getTypeID() == Type::VoidTyID)
270 return;
271 O << " (";
272
273 auto PrintScalarRetVal = [&](unsigned Size) {
274 O << ".param .b" << promoteScalarArgumentSize(Size) << " func_retval0";
275 };
276 if (shouldPassAsArray(Ty)) {
277 const unsigned TotalSize = DL.getTypeAllocSize(Ty);
278 const Align RetAlignment =
279 getFunctionArgumentAlignment(F, Ty, AttributeList::ReturnIndex, DL);
280 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
281 << TotalSize << "]";
282 } else if (Ty->isFloatingPointTy()) {
283 PrintScalarRetVal(Ty->getPrimitiveSizeInBits());
284 } else if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
285 PrintScalarRetVal(ITy->getBitWidth());
286 } else if (isa<PointerType>(Ty)) {
287 PrintScalarRetVal(TLI->getPointerTy(DL).getSizeInBits());
288 } else
289 llvm_unreachable("Unknown return type");
290 O << ") ";
291}
292
293void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
294 raw_ostream &O) {
295 const Function &F = MF.getFunction();
296 printReturnValStr(&F, O);
297}
298
299// Return true if MBB is the header of a loop marked with
300// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
301bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
302 const MachineBasicBlock &MBB) const {
303 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
304 // We insert .pragma "nounroll" only to the loop header.
305 if (!LI.isLoopHeader(&MBB))
306 return false;
307
308 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
309 // we iterate through each back edge of the loop with header MBB, and check
310 // whether its metadata contains llvm.loop.unroll.disable.
311 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
312 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
313 // Edges from other loops to MBB are not back edges.
314 continue;
315 }
316 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
317 if (MDNode *LoopID =
318 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
319 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
320 return true;
321 if (MDNode *UnrollCountMD =
322 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
323 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
324 ->isOne())
325 return true;
326 }
327 }
328 }
329 }
330 return false;
331}
332
333void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
335 if (isLoopHeaderOfNoUnroll(MBB))
336 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
337}
338
340 SmallString<128> Str;
341 raw_svector_ostream O(Str);
342
343 if (!GlobalsEmitted) {
344 emitGlobals(*MF->getFunction().getParent());
345 GlobalsEmitted = true;
346 }
347
348 // Set up
349 MRI = &MF->getRegInfo();
350 F = &MF->getFunction();
351 emitLinkageDirective(F, O);
352 if (isKernelFunction(*F))
353 O << ".entry ";
354 else {
355 O << ".func ";
356 printReturnValStr(*MF, O);
357 }
358
359 CurrentFnSym->print(O, MAI);
360
361 emitFunctionParamList(F, O);
362 O << "\n";
363
364 if (isKernelFunction(*F))
365 emitKernelFunctionDirectives(*F, O);
366
368 O << ".noreturn";
369
370 OutStreamer->emitRawText(O.str());
371
372 VRegMapping.clear();
373 // Emit open brace for function body.
374 OutStreamer->emitRawText(StringRef("{\n"));
375 setAndEmitFunctionVirtualRegisters(*MF);
376 encodeDebugInfoRegisterNumbers(*MF);
377 // Emit initial .loc debug directive for correct relocation symbol data.
378 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
379 assert(SP->getUnit());
380 if (!SP->getUnit()->isDebugDirectivesOnly())
382 }
383}
384
386 bool Result = AsmPrinter::runOnMachineFunction(F);
387 // Emit closing brace for the body of function F.
388 // The closing brace must be emitted here because we need to emit additional
389 // debug labels/data after the last basic block.
390 // We need to emit the closing brace here because we don't have function that
391 // finished emission of the function body.
392 OutStreamer->emitRawText(StringRef("}\n"));
393 return Result;
394}
395
398 raw_svector_ostream O(Str);
399 emitDemotedVars(&MF->getFunction(), O);
400 OutStreamer->emitRawText(O.str());
401}
402
404 VRegMapping.clear();
405}
406
410 return OutContext.getOrCreateSymbol(Str);
411}
412
413void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
414 Register RegNo = MI->getOperand(0).getReg();
415 if (RegNo.isVirtual()) {
416 OutStreamer->AddComment(Twine("implicit-def: ") +
418 } else {
419 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
420 OutStreamer->AddComment(Twine("implicit-def: ") +
421 STI.getRegisterInfo()->getName(RegNo));
422 }
423 OutStreamer->addBlankLine();
424}
425
426void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
427 raw_ostream &O) const {
428 // If the NVVM IR has some of reqntid* specified, then output
429 // the reqntid directive, and set the unspecified ones to 1.
430 // If none of Reqntid* is specified, don't output reqntid directive.
431 const auto ReqNTID = getReqNTID(F);
432 if (!ReqNTID.empty())
433 O << formatv(".reqntid {0:$[, ]}\n",
435
436 const auto MaxNTID = getMaxNTID(F);
437 if (!MaxNTID.empty())
438 O << formatv(".maxntid {0:$[, ]}\n",
440
441 if (const auto Mincta = getMinCTASm(F))
442 O << ".minnctapersm " << *Mincta << "\n";
443
444 if (const auto Maxnreg = getMaxNReg(F))
445 O << ".maxnreg " << *Maxnreg << "\n";
446
447 // .maxclusterrank directive requires SM_90 or higher, make sure that we
448 // filter it out for lower SM versions, as it causes a hard ptxas crash.
449 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
450 const NVPTXSubtarget *STI = &NTM.getSubtarget<NVPTXSubtarget>(F);
451
452 if (STI->getSmVersion() >= 90) {
453 const auto ClusterDim = getClusterDim(F);
455
456 if (!ClusterDim.empty()) {
457
458 if (!BlocksAreClusters)
459 O << ".explicitcluster\n";
460
461 if (ClusterDim[0] != 0) {
462 assert(llvm::all_of(ClusterDim, not_equal_to(0)) &&
463 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
464 "should be non-zero as well");
465
466 O << formatv(".reqnctapercluster {0:$[, ]}\n",
468 } else {
469 assert(llvm::all_of(ClusterDim, equal_to(0)) &&
470 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
471 "should be 0 as well");
472 }
473 }
474
475 if (BlocksAreClusters) {
476 LLVMContext &Ctx = F.getContext();
477 if (ReqNTID.empty() || ClusterDim.empty())
478 Ctx.diagnose(DiagnosticInfoUnsupported(
479 F, "blocksareclusters requires reqntid and cluster_dim attributes",
480 F.getSubprogram()));
481 else if (STI->getPTXVersion() < 90)
482 Ctx.diagnose(DiagnosticInfoUnsupported(
483 F, "blocksareclusters requires PTX version >= 9.0",
484 F.getSubprogram()));
485 else
486 O << ".blocksareclusters\n";
487 }
488
489 if (const auto Maxclusterrank = getMaxClusterRank(F))
490 O << ".maxclusterrank " << *Maxclusterrank << "\n";
491 }
492}
493
494std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
495 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
496
497 std::string Name;
498 raw_string_ostream NameStr(Name);
499
500 VRegRCMap::const_iterator I = VRegMapping.find(RC);
501 assert(I != VRegMapping.end() && "Bad register class");
502 const DenseMap<unsigned, unsigned> &RegMap = I->second;
503
504 VRegMap::const_iterator VI = RegMap.find(Reg);
505 assert(VI != RegMap.end() && "Bad virtual register");
506 unsigned MappedVR = VI->second;
507
508 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
509
510 return Name;
511}
512
513void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
514 raw_ostream &O) {
515 O << getVirtualRegisterName(vr);
516}
517
518void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
519 raw_ostream &O) {
521 if (!F || isKernelFunction(*F) || F->isDeclaration())
523 "NVPTX aliasee must be a non-kernel function definition");
524
525 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
527 report_fatal_error("NVPTX aliasee must not be '.weak'");
528
529 emitDeclarationWithName(F, getSymbol(GA), O);
530}
531
532void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
533 emitDeclarationWithName(F, getSymbol(F), O);
534}
535
536void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
537 raw_ostream &O) {
538 emitLinkageDirective(F, O);
539 if (isKernelFunction(*F))
540 O << ".entry ";
541 else
542 O << ".func ";
543 printReturnValStr(F, O);
544 S->print(O, MAI);
545 O << "\n";
546 emitFunctionParamList(F, O);
547 O << "\n";
549 O << ".noreturn";
550 O << ";\n";
551}
552
553static bool usedInGlobalVarDef(const Constant *C) {
554 if (!C)
555 return false;
556
558 return GV->getName() != "llvm.used";
559
560 for (const User *U : C->users())
561 if (const Constant *C = dyn_cast<Constant>(U))
563 return true;
564
565 return false;
566}
567
568static bool usedInOneFunc(const User *U, Function const *&OneFunc) {
569 if (const GlobalVariable *OtherGV = dyn_cast<GlobalVariable>(U))
570 if (OtherGV->getName() == "llvm.used")
571 return true;
572
573 if (const Instruction *I = dyn_cast<Instruction>(U)) {
574 if (const Function *CurFunc = I->getFunction()) {
575 if (OneFunc && (CurFunc != OneFunc))
576 return false;
577 OneFunc = CurFunc;
578 return true;
579 }
580 return false;
581 }
582
583 for (const User *UU : U->users())
584 if (!usedInOneFunc(UU, OneFunc))
585 return false;
586
587 return true;
588}
589
590/* Find out if a global variable can be demoted to local scope.
591 * Currently, this is valid for CUDA shared variables, which have local
592 * scope and global lifetime. So the conditions to check are :
593 * 1. Is the global variable in shared address space?
594 * 2. Does it have local linkage?
595 * 3. Is the global variable referenced only in one function?
596 */
597static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f) {
598 if (!GV->hasLocalLinkage())
599 return false;
601 return false;
602
603 const Function *oneFunc = nullptr;
604
605 bool flag = usedInOneFunc(GV, oneFunc);
606 if (!flag)
607 return false;
608 if (!oneFunc)
609 return false;
610 f = oneFunc;
611 return true;
612}
613
614static bool useFuncSeen(const Constant *C,
615 const SmallPtrSetImpl<const Function *> &SeenSet) {
616 for (const User *U : C->users()) {
617 if (const Constant *cu = dyn_cast<Constant>(U)) {
618 if (useFuncSeen(cu, SeenSet))
619 return true;
620 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
621 if (const Function *Caller = I->getFunction())
622 if (SeenSet.contains(Caller))
623 return true;
624 }
625 }
626 return false;
627}
628
629void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
630 SmallPtrSet<const Function *, 32> SeenSet;
631 for (const Function &F : M) {
632 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
633 emitDeclaration(&F, O);
634 continue;
635 }
636
637 if (F.isDeclaration()) {
638 if (F.use_empty())
639 continue;
640 if (F.getIntrinsicID())
641 continue;
642 // An unrecognized intrinsic would produce an invalid PTX declaration. Let
643 // the user know that, and skip it.
644 if (F.isIntrinsic()) {
645 LLVMContext &Ctx = F.getContext();
646 Ctx.diagnose(DiagnosticInfoUnsupported(
647 F, "unknown intrinsic '" + F.getName() +
648 "' cannot be lowered by the NVPTX backend"));
649 continue;
650 }
651 emitDeclaration(&F, O);
652 continue;
653 }
654 for (const User *U : F.users()) {
655 if (const Constant *C = dyn_cast<Constant>(U)) {
656 if (usedInGlobalVarDef(C)) {
657 // The use is in the initialization of a global variable
658 // that is a function pointer, so print a declaration
659 // for the original function
660 emitDeclaration(&F, O);
661 break;
662 }
663 // Emit a declaration of this function if the function that
664 // uses this constant expr has already been seen.
665 if (useFuncSeen(C, SeenSet)) {
666 emitDeclaration(&F, O);
667 break;
668 }
669 }
670
671 if (!isa<Instruction>(U))
672 continue;
673 const Function *Caller = cast<Instruction>(U)->getFunction();
674 if (!Caller)
675 continue;
676
677 // If a caller has already been seen, then the caller is
678 // appearing in the module before the callee. so print out
679 // a declaration for the callee.
680 if (SeenSet.contains(Caller)) {
681 emitDeclaration(&F, O);
682 break;
683 }
684 }
685 SeenSet.insert(&F);
686 }
687 for (const GlobalAlias &GA : M.aliases())
688 emitAliasDeclaration(&GA, O);
689}
690
691void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
692 // Construct a default subtarget off of the TargetMachine defaults. The
693 // rest of NVPTX isn't friendly to change subtargets per function and
694 // so the default TargetMachine will have all of the options.
695 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
696 const NVPTXSubtarget *STI = NTM.getSubtargetImpl();
697
698 // Emit header before any dwarf directives are emitted below.
699 emitHeader(M, *STI);
700}
701
702/// Create NVPTX-specific DwarfDebug handler.
706
708 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
709 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
710 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
711 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
712
713 // We need to call the parent's one explicitly.
714 bool Result = AsmPrinter::doInitialization(M);
715
716 GlobalsEmitted = false;
717
718 return Result;
719}
720
721void NVPTXAsmPrinter::emitGlobals(const Module &M) {
722 SmallString<128> Str2;
723 raw_svector_ostream OS2(Str2);
724
725 emitDeclarations(M, OS2);
726
727 // As ptxas does not support forward references of globals, we need to first
728 // sort the list of module-level globals in def-use order. We visit each
729 // global variable in order, and ensure that we emit it *after* its dependent
730 // globals. We use a little extra memory maintaining both a set and a list to
731 // have fast searches while maintaining a strict ordering.
735
736 // Visit each global variable, in order
737 for (const GlobalVariable &I : M.globals())
738 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
739
740 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
741 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
742
743 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
744 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
745
746 // Print out module-level global variables in proper order
747 for (const GlobalVariable *GV : Globals)
748 printModuleLevelGV(GV, OS2, /*ProcessDemoted=*/false, STI);
749
750 OS2 << '\n';
751
752 OutStreamer->emitRawText(OS2.str());
753}
754
755void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
757 raw_svector_ostream OS(Str);
758
759 MCSymbol *Name = getSymbol(&GA);
760
761 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
762 << ";\n";
763
764 OutStreamer->emitRawText(OS.str());
765}
766
767NVPTXTargetStreamer *NVPTXAsmPrinter::getTargetStreamer() const {
768 return static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
769}
770
771static bool hasFullDebugInfo(Module &M) {
772 for (DICompileUnit *CU : M.debug_compile_units()) {
773 switch(CU->getEmissionKind()) {
776 break;
779 return true;
780 }
781 }
782
783 return false;
784}
785
786void NVPTXAsmPrinter::emitHeader(Module &M, const NVPTXSubtarget &STI) {
787 auto *TS = getTargetStreamer();
788
789 TS->emitBanner();
790
791 const unsigned PTXVersion = STI.getPTXVersion();
792 TS->emitVersionDirective(PTXVersion);
793
794 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
795 bool TexModeIndependent = NTM.getDrvInterface() == NVPTX::NVCL;
796
797 TS->emitTargetDirective(STI.getTargetName(), TexModeIndependent,
799 TS->emitAddressSizeDirective(M.getDataLayout().getPointerSizeInBits());
800}
801
803 // If we did not emit any functions, then the global declarations have not
804 // yet been emitted.
805 if (!GlobalsEmitted) {
806 emitGlobals(M);
807 GlobalsEmitted = true;
808 }
809
810 // call doFinalization
811 bool ret = AsmPrinter::doFinalization(M);
812
814
815 auto *TS =
816 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
817 // Close the last emitted section
818 if (hasDebugInfo()) {
819 TS->closeLastSection();
820 // Emit empty .debug_macinfo section for better support of the empty files.
821 OutStreamer->emitRawText("\t.section\t.debug_macinfo\t{\t}");
822 }
823
824 // Output last DWARF .file directives, if any.
826
827 return ret;
828}
829
830// This function emits appropriate linkage directives for
831// functions and global variables.
832//
833// extern function declaration -> .extern
834// extern function definition -> .visible
835// external global variable with init -> .visible
836// external without init -> .extern
837// appending -> not allowed, assert.
838// for any linkage other than
839// internal, private, linker_private,
840// linker_private_weak, linker_private_weak_def_auto,
841// we emit -> .weak.
842
843void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
844 raw_ostream &O) {
845 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
846 if (V->hasExternalLinkage()) {
847 if (const auto *GVar = dyn_cast<GlobalVariable>(V))
848 O << (GVar->hasInitializer() ? ".visible " : ".extern ");
849 else if (V->isDeclaration())
850 O << ".extern ";
851 else
852 O << ".visible ";
853 } else if (V->hasAppendingLinkage()) {
854 report_fatal_error("Symbol '" + (V->hasName() ? V->getName() : "") +
855 "' has unsupported appending linkage type");
856 } else if (!V->hasInternalLinkage() && !V->hasPrivateLinkage()) {
857 O << ".weak ";
858 }
859 }
860}
861
862void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
863 raw_ostream &O, bool ProcessDemoted,
864 const NVPTXSubtarget &STI) {
865 // Skip meta data
866 if (GVar->hasSection())
867 if (GVar->getSection() == "llvm.metadata")
868 return;
869
870 // Skip LLVM intrinsic global variables
871 if (GVar->getName().starts_with("llvm.") ||
872 GVar->getName().starts_with("nvvm."))
873 return;
874
875 const DataLayout &DL = getDataLayout();
876
877 // GlobalVariables are always constant pointers themselves.
878 Type *ETy = GVar->getValueType();
879
880 if (GVar->hasExternalLinkage()) {
881 if (GVar->hasInitializer())
882 O << ".visible ";
883 else
884 O << ".extern ";
885 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
887 O << ".common ";
888 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
890 GVar->hasCommonLinkage()) {
891 O << ".weak ";
892 }
893
894 const PTXOpaqueType OpaqueType = getPTXOpaqueType(*GVar);
895
896 if (OpaqueType == PTXOpaqueType::Texture) {
897 O << ".global .texref " << getTextureName(*GVar) << ";\n";
898 return;
899 }
900
901 if (OpaqueType == PTXOpaqueType::Surface) {
902 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
903 return;
904 }
905
906 if (GVar->isDeclaration()) {
907 // (extern) declarations, no definition or initializer
908 // Currently the only known declaration is for an automatic __local
909 // (.shared) promoted to global.
910 emitPTXGlobalVariable(GVar, O, STI);
911 O << ";\n";
912 return;
913 }
914
915 if (OpaqueType == PTXOpaqueType::Sampler) {
916 O << ".global .samplerref " << getSamplerName(*GVar);
917
918 const Constant *Initializer = nullptr;
919 if (GVar->hasInitializer())
920 Initializer = GVar->getInitializer();
921 const ConstantInt *CI = nullptr;
922 if (Initializer)
923 CI = dyn_cast<ConstantInt>(Initializer);
924 if (CI) {
925 unsigned sample = CI->getZExtValue();
926
927 O << " = { ";
928
929 for (int i = 0,
930 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
931 i < 3; i++) {
932 O << "addr_mode_" << i << " = ";
933 switch (addr) {
934 case 0:
935 O << "wrap";
936 break;
937 case 1:
938 O << "clamp_to_border";
939 break;
940 case 2:
941 O << "clamp_to_edge";
942 break;
943 case 3:
944 O << "wrap";
945 break;
946 case 4:
947 O << "mirror";
948 break;
949 }
950 O << ", ";
951 }
952 O << "filter_mode = ";
953 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
954 case 0:
955 O << "nearest";
956 break;
957 case 1:
958 O << "linear";
959 break;
960 case 2:
961 llvm_unreachable("Anisotropic filtering is not supported");
962 default:
963 O << "nearest";
964 break;
965 }
966 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
967 O << ", force_unnormalized_coords = 1";
968 }
969 O << " }";
970 }
971
972 O << ";\n";
973 return;
974 }
975
976 if (GVar->hasPrivateLinkage()) {
977 if (GVar->getName().starts_with("unrollpragma"))
978 return;
979
980 // FIXME - need better way (e.g. Metadata) to avoid generating this global
981 if (GVar->getName().starts_with("filename"))
982 return;
983 if (GVar->use_empty())
984 return;
985 }
986
987 const Function *DemotedFunc = nullptr;
988 if (!ProcessDemoted && canDemoteGlobalVar(GVar, DemotedFunc)) {
989 O << "// " << GVar->getName() << " has been demoted\n";
990 localDecls[DemotedFunc].push_back(GVar);
991 return;
992 }
993
994 O << ".";
995 emitPTXAddressSpace(GVar->getAddressSpace(), O);
996
997 if (isManaged(*GVar)) {
998 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
1000 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1001 O << " .attribute(.managed)";
1002 }
1003
1004 O << " .align "
1005 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1006
1007 if (ETy->isPointerTy() || ((ETy->isIntegerTy() || ETy->isFloatingPointTy()) &&
1008 ETy->getScalarSizeInBits() <= 64)) {
1009 O << " .";
1010 // Special case: ABI requires that we use .u8 for predicates
1011 if (ETy->isIntegerTy(1))
1012 O << "u8";
1013 else
1014 O << getPTXFundamentalTypeStr(ETy, false);
1015 O << " ";
1016 getSymbol(GVar)->print(O, MAI);
1017
1018 // Ptx allows variable initilization only for constant and global state
1019 // spaces.
1020 if (GVar->hasInitializer()) {
1021 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1022 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1023 const Constant *Initializer = GVar->getInitializer();
1024 // 'undef' is treated as there is no value specified.
1025 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1026 O << " = ";
1027 printScalarConstant(Initializer, O);
1028 }
1029 } else {
1030 // The frontend adds zero-initializer to device and constant variables
1031 // that don't have an initial value, and UndefValue to shared
1032 // variables, so skip warning for this case.
1033 if (!GVar->getInitializer()->isNullValue() &&
1034 !isa<UndefValue>(GVar->getInitializer())) {
1035 report_fatal_error("initial value of '" + GVar->getName() +
1036 "' is not allowed in addrspace(" +
1037 Twine(GVar->getAddressSpace()) + ")");
1038 }
1039 }
1040 }
1041 } else {
1042 // Although PTX has direct support for struct type and array type and
1043 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1044 // targets that support these high level field accesses. Structs, arrays
1045 // and vectors are lowered into arrays of bytes.
1046 switch (ETy->getTypeID()) {
1047 case Type::IntegerTyID: // Integers larger than 64 bits
1048 case Type::FP128TyID:
1049 case Type::StructTyID:
1050 case Type::ArrayTyID:
1051 case Type::FixedVectorTyID: {
1052 const uint64_t ElementSize = DL.getTypeStoreSize(ETy);
1053 // Ptx allows variable initilization only for constant and
1054 // global state spaces.
1055 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1056 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1057 GVar->hasInitializer()) {
1058 const Constant *Initializer = GVar->getInitializer();
1059 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1060 AggBuffer aggBuffer(ElementSize, *this);
1061 bufferAggregateConstant(Initializer, &aggBuffer);
1062 if (aggBuffer.numSymbols()) {
1063 const unsigned int ptrSize = MAI.getCodePointerSize();
1064 if (ElementSize % ptrSize ||
1065 !aggBuffer.allSymbolsAligned(ptrSize)) {
1066 // Print in bytes and use the mask() operator for pointers.
1067 if (!STI.hasMaskOperator())
1069 "initialized packed aggregate with pointers '" +
1070 GVar->getName() +
1071 "' requires at least PTX ISA version 7.1");
1072 O << " .u8 ";
1073 getSymbol(GVar)->print(O, MAI);
1074 O << "[" << ElementSize << "] = {";
1075 aggBuffer.printBytes(O);
1076 O << "}";
1077 } else {
1078 O << " .u" << ptrSize * 8 << " ";
1079 getSymbol(GVar)->print(O, MAI);
1080 O << "[" << ElementSize / ptrSize << "] = {";
1081 aggBuffer.printWords(O);
1082 O << "}";
1083 }
1084 } else {
1085 O << " .b8 ";
1086 getSymbol(GVar)->print(O, MAI);
1087 O << "[" << ElementSize << "] = {";
1088 aggBuffer.printBytes(O);
1089 O << "}";
1090 }
1091 } else {
1092 O << " .b8 ";
1093 getSymbol(GVar)->print(O, MAI);
1094 if (ElementSize)
1095 O << "[" << ElementSize << "]";
1096 }
1097 } else {
1098 O << " .b8 ";
1099 getSymbol(GVar)->print(O, MAI);
1100 if (ElementSize)
1101 O << "[" << ElementSize << "]";
1102 }
1103 break;
1104 }
1105 default:
1106 llvm_unreachable("type not supported yet");
1107 }
1108 }
1109 O << ";\n";
1110}
1111
1112void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1113 const Value *v = Symbols[nSym];
1114 const Value *v0 = SymbolsBeforeStripping[nSym];
1115 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1116 MCSymbol *Name = AP.getSymbol(GVar);
1118 // Is v0 a generic pointer?
1119 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1120 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1121 os << "generic(";
1122 Name->print(os, AP.MAI);
1123 os << ")";
1124 } else {
1125 Name->print(os, AP.MAI);
1126 }
1127 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1128 const MCExpr *Expr = AP.lowerConstantForGV(CExpr, false);
1129 AP.printMCExpr(*Expr, os);
1130 } else
1131 llvm_unreachable("symbol type unknown");
1132}
1133
1134void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1135 unsigned int ptrSize = AP.MAI.getCodePointerSize();
1136 // Do not emit trailing zero initializers. They will be zero-initialized by
1137 // ptxas. This saves on both space requirements for the generated PTX and on
1138 // memory use by ptxas. (See:
1139 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1140 unsigned int InitializerCount = Size;
1141 // TODO: symbols make this harder, but it would still be good to trim trailing
1142 // 0s for aggs with symbols as well.
1143 if (numSymbols() == 0)
1144 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1145 InitializerCount--;
1146
1147 symbolPosInBuffer.push_back(InitializerCount);
1148 unsigned int nSym = 0;
1149 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1150 for (unsigned int pos = 0; pos < InitializerCount;) {
1151 if (pos)
1152 os << ", ";
1153 if (pos != nextSymbolPos) {
1154 os << (unsigned int)buffer[pos];
1155 ++pos;
1156 continue;
1157 }
1158 // Generate a per-byte mask() operator for the symbol, which looks like:
1159 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1160 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1161 std::string symText;
1162 llvm::raw_string_ostream oss(symText);
1163 printSymbol(nSym, oss);
1164 for (unsigned i = 0; i < ptrSize; ++i) {
1165 if (i)
1166 os << ", ";
1167 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1168 os << "(" << symText << ")";
1169 }
1170 pos += ptrSize;
1171 nextSymbolPos = symbolPosInBuffer[++nSym];
1172 assert(nextSymbolPos >= pos);
1173 }
1174}
1175
1176void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1177 unsigned int ptrSize = AP.MAI.getCodePointerSize();
1178 symbolPosInBuffer.push_back(Size);
1179 unsigned int nSym = 0;
1180 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1181 assert(nextSymbolPos % ptrSize == 0);
1182 for (unsigned int pos = 0; pos < Size; pos += ptrSize) {
1183 if (pos)
1184 os << ", ";
1185 if (pos == nextSymbolPos) {
1186 printSymbol(nSym, os);
1187 nextSymbolPos = symbolPosInBuffer[++nSym];
1188 assert(nextSymbolPos % ptrSize == 0);
1189 assert(nextSymbolPos >= pos + ptrSize);
1190 } else if (ptrSize == 4)
1191 os << support::endian::read32le(&buffer[pos]);
1192 else
1193 os << support::endian::read64le(&buffer[pos]);
1194 }
1195}
1196
1197void NVPTXAsmPrinter::emitDemotedVars(const Function *F, raw_ostream &O) {
1198 auto It = localDecls.find(F);
1199 if (It == localDecls.end())
1200 return;
1201
1202 ArrayRef<const GlobalVariable *> GVars = It->second;
1203
1204 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1205 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
1206
1207 for (const GlobalVariable *GV : GVars) {
1208 O << "\t// demoted variable\n\t";
1209 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1210 }
1211}
1212
1213void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1214 raw_ostream &O) const {
1215 switch (AddressSpace) {
1217 O << "local";
1218 break;
1220 O << "global";
1221 break;
1223 O << "const";
1224 break;
1226 O << "shared";
1227 break;
1228 default:
1229 report_fatal_error("Bad address space found while emitting PTX: " +
1230 llvm::Twine(AddressSpace));
1231 break;
1232 }
1233}
1234
1235std::string
1236NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1237 switch (Ty->getTypeID()) {
1238 case Type::IntegerTyID: {
1239 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1240 if (NumBits == 1)
1241 return "pred";
1242 if (NumBits <= 64) {
1243 std::string name = "u";
1244 return name + utostr(NumBits);
1245 }
1246 llvm_unreachable("Integer too large");
1247 break;
1248 }
1249 case Type::BFloatTyID:
1250 case Type::HalfTyID:
1251 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1252 // PTX assembly.
1253 return "b16";
1254 case Type::FloatTyID:
1255 return "f32";
1256 case Type::DoubleTyID:
1257 return "f64";
1258 case Type::PointerTyID: {
1259 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1260 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1261
1262 if (PtrSize == 64)
1263 if (useB4PTR)
1264 return "b64";
1265 else
1266 return "u64";
1267 else if (useB4PTR)
1268 return "b32";
1269 else
1270 return "u32";
1271 }
1272 default:
1273 break;
1274 }
1275 llvm_unreachable("unexpected type");
1276}
1277
1278void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1279 raw_ostream &O,
1280 const NVPTXSubtarget &STI) {
1281 const DataLayout &DL = getDataLayout();
1282
1283 // GlobalVariables are always constant pointers themselves.
1284 Type *ETy = GVar->getValueType();
1285
1286 O << ".";
1287 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1288 if (isManaged(*GVar)) {
1289 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
1291 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1292
1293 O << " .attribute(.managed)";
1294 }
1295 O << " .align "
1296 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1297
1298 // Special case for i128/fp128
1299 if (ETy->getScalarSizeInBits() == 128) {
1300 O << " .b8 ";
1301 getSymbol(GVar)->print(O, MAI);
1302 O << "[16]";
1303 return;
1304 }
1305
1306 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1307 O << " ." << getPTXFundamentalTypeStr(ETy) << " ";
1308 getSymbol(GVar)->print(O, MAI);
1309 return;
1310 }
1311
1312 int64_t ElementSize = 0;
1313
1314 // Although PTX has direct support for struct type and array type and LLVM IR
1315 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1316 // support these high level field accesses. Structs and arrays are lowered
1317 // into arrays of bytes.
1318 switch (ETy->getTypeID()) {
1319 case Type::StructTyID:
1320 case Type::ArrayTyID:
1322 ElementSize = DL.getTypeStoreSize(ETy);
1323 O << " .b8 ";
1324 getSymbol(GVar)->print(O, MAI);
1325 O << "[";
1326 if (ElementSize) {
1327 O << ElementSize;
1328 }
1329 O << "]";
1330 break;
1331 default:
1332 llvm_unreachable("type not supported yet");
1333 }
1334}
1335
1336void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1337 const DataLayout &DL = getDataLayout();
1338 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1339 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1340 const NVPTXMachineFunctionInfo *MFI =
1341 MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1342
1343 bool IsFirst = true;
1344 const bool IsKernelFunc = isKernelFunction(*F);
1345
1346 if (F->arg_empty() && !F->isVarArg()) {
1347 O << "()";
1348 return;
1349 }
1350
1351 O << "(\n";
1352
1353 for (const Argument &Arg : F->args()) {
1354 Type *Ty = Arg.getType();
1355 const std::string ParamSym = TLI->getParamName(F, Arg.getArgNo());
1356
1357 if (!IsFirst)
1358 O << ",\n";
1359
1360 IsFirst = false;
1361
1362 // Handle image/sampler parameters
1363 if (IsKernelFunc) {
1364 const PTXOpaqueType ArgOpaqueType = getPTXOpaqueType(Arg);
1365 if (ArgOpaqueType != PTXOpaqueType::None) {
1366 const bool EmitImgPtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
1367 O << "\t.param ";
1368 if (EmitImgPtr)
1369 O << ".u64 .ptr ";
1370
1371 switch (ArgOpaqueType) {
1373 O << ".samplerref ";
1374 break;
1376 O << ".texref ";
1377 break;
1379 O << ".surfref ";
1380 break;
1382 llvm_unreachable("handled above");
1383 }
1384 O << ParamSym;
1385 continue;
1386 }
1387 }
1388
1389 auto GetOptimalAlignForParam = [&DL, F, &Arg](Type *Ty) -> Align {
1390 if (MaybeAlign StackAlign =
1391 getAlign(*F, Arg.getArgNo() + AttributeList::FirstArgIndex))
1392 return StackAlign.value();
1393
1394 Align TypeAlign = getFunctionParamOptimizedAlign(F, Ty, DL);
1395 MaybeAlign ParamAlign =
1396 Arg.hasByValAttr() ? Arg.getParamAlign() : MaybeAlign();
1397 return std::max(TypeAlign, ParamAlign.valueOrOne());
1398 };
1399
1400 if (Arg.hasByValAttr()) {
1401 // param has byVal attribute.
1402 Type *ETy = Arg.getParamByValType();
1403 assert(ETy && "Param should have byval type");
1404
1405 // Print .param .align <a> .b8 .param[size];
1406 // <a> = optimal alignment for the element type; always multiple of
1407 // PAL.getParamAlignment
1408 // size = typeallocsize of element type
1409 const Align OptimalAlign =
1410 IsKernelFunc ? GetOptimalAlignForParam(ETy)
1412 F, ETy, Arg.getParamAlign().valueOrOne(), DL);
1413
1414 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1415 << "[" << DL.getTypeAllocSize(ETy) << "]";
1416 continue;
1417 }
1418
1419 if (shouldPassAsArray(Ty)) {
1420 // Just print .param .align <a> .b8 .param[size];
1421 // <a> = optimal alignment for the element type; always multiple of
1422 // PAL.getParamAlignment
1423 // size = typeallocsize of element type
1424 Align OptimalAlign = GetOptimalAlignForParam(Ty);
1425
1426 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1427 << "[" << DL.getTypeAllocSize(Ty) << "]";
1428
1429 continue;
1430 }
1431 // Just a scalar
1432 auto *PTy = dyn_cast<PointerType>(Ty);
1433 unsigned PTySizeInBits = 0;
1434 if (PTy) {
1435 PTySizeInBits =
1436 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1437 assert(PTySizeInBits && "Invalid pointer size");
1438 }
1439
1440 if (IsKernelFunc) {
1441 if (PTy) {
1442 O << "\t.param .u" << PTySizeInBits << " .ptr";
1443
1444 switch (PTy->getAddressSpace()) {
1445 default:
1446 break;
1448 O << " .global";
1449 break;
1451 O << " .shared";
1452 break;
1454 O << " .const";
1455 break;
1457 O << " .local";
1458 break;
1459 }
1460
1461 O << " .align " << Arg.getParamAlign().valueOrOne().value() << " "
1462 << ParamSym;
1463 continue;
1464 }
1465
1466 // non-pointer scalar to kernel func
1467 O << "\t.param .";
1468 // Special case: predicate operands become .u8 types
1469 if (Ty->isIntegerTy(1))
1470 O << "u8";
1471 else
1472 O << getPTXFundamentalTypeStr(Ty);
1473 O << " " << ParamSym;
1474 continue;
1475 }
1476 // Non-kernel function, just print .param .b<size> for ABI
1477 // and .reg .b<size> for non-ABI
1478 unsigned Size;
1479 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
1480 Size = promoteScalarArgumentSize(ITy->getBitWidth());
1481 } else if (PTy) {
1482 assert(PTySizeInBits && "Invalid pointer size");
1483 Size = PTySizeInBits;
1484 } else
1486 O << "\t.param .b" << Size << " " << ParamSym;
1487 }
1488
1489 if (F->isVarArg()) {
1490 if (!IsFirst)
1491 O << ",\n";
1492 O << "\t.param .align " << STI.getMaxRequiredAlignment() << " .b8 "
1493 << TLI->getParamName(F, /* vararg */ -1) << "[]";
1494 }
1495
1496 O << "\n)";
1497}
1498
1499void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1500 const MachineFunction &MF) {
1501 SmallString<128> Str;
1502 raw_svector_ostream O(Str);
1503
1504 // Map the global virtual register number to a register class specific
1505 // virtual register number starting from 1 with that class.
1506 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1507
1508 // Emit the Fake Stack Object
1509 const MachineFrameInfo &MFI = MF.getFrameInfo();
1510 int64_t NumBytes = MFI.getStackSize();
1511 if (NumBytes) {
1512 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1513 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1514 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1515 O << "\t.reg .b64 \t%SP;\n"
1516 << "\t.reg .b64 \t%SPL;\n";
1517 } else {
1518 O << "\t.reg .b32 \t%SP;\n"
1519 << "\t.reg .b32 \t%SPL;\n";
1520 }
1521 }
1522
1523 // Go through all virtual registers to establish the mapping between the
1524 // global virtual
1525 // register number and the per class virtual register number.
1526 // We use the per class virtual register number in the ptx output.
1527 for (unsigned I : llvm::seq(MRI->getNumVirtRegs())) {
1529 if (MRI->use_empty(VR) && MRI->def_empty(VR))
1530 continue;
1531 auto &RCRegMap = VRegMapping[MRI->getRegClass(VR)];
1532 RCRegMap[VR] = RCRegMap.size() + 1;
1533 }
1534
1535 // Emit declaration of the virtual registers or 'physical' registers for
1536 // each register class
1537 for (const TargetRegisterClass *RC : TRI->regclasses()) {
1538 const unsigned N = VRegMapping[RC].size();
1539
1540 // Only declare those registers that may be used.
1541 if (N) {
1542 const StringRef RCName = getNVPTXRegClassName(RC);
1543 const StringRef RCStr = getNVPTXRegClassStr(RC);
1544 O << "\t.reg " << RCName << " \t" << RCStr << "<" << (N + 1) << ">;\n";
1545 }
1546 }
1547
1548 OutStreamer->emitRawText(O.str());
1549}
1550
1551/// Translate virtual register numbers in DebugInfo locations to their printed
1552/// encodings, as used by CUDA-GDB.
1553void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1554 const MachineFunction &MF) {
1555 const NVPTXSubtarget &STI = MF.getSubtarget<NVPTXSubtarget>();
1556 const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo();
1557
1558 // Clear the old mapping, and add the new one. This mapping is used after the
1559 // printing of the current function is complete, but before the next function
1560 // is printed.
1561 registerInfo->clearDebugRegisterMap();
1562
1563 for (auto &classMap : VRegMapping) {
1564 for (auto &registerMapping : classMap.getSecond()) {
1565 auto reg = registerMapping.getFirst();
1566 registerInfo->addToDebugRegisterMap(reg, getVirtualRegisterName(reg));
1567 }
1568 }
1569}
1570
1571void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp,
1572 raw_ostream &O) const {
1573 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1574 bool ignored;
1575 unsigned int numHex;
1576 const char *lead;
1577
1578 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1579 numHex = 8;
1580 lead = "0f";
1582 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1583 numHex = 16;
1584 lead = "0d";
1586 } else
1587 llvm_unreachable("unsupported fp type");
1588
1589 APInt API = APF.bitcastToAPInt();
1590 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1591}
1592
1593void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1594 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1595 O << CI->getValue();
1596 return;
1597 }
1598 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1599 printFPConstant(CFP, O);
1600 return;
1601 }
1602 if (isa<ConstantPointerNull>(CPV)) {
1603 O << "0";
1604 return;
1605 }
1606 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1607 const bool IsNonGenericPointer = GVar->getAddressSpace() != 0;
1608 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1609 O << "generic(";
1610 getSymbol(GVar)->print(O, MAI);
1611 O << ")";
1612 } else {
1613 getSymbol(GVar)->print(O, MAI);
1614 }
1615 return;
1616 }
1617 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1618 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1619 printMCExpr(*E, O);
1620 return;
1621 }
1622 llvm_unreachable("Not scalar type found in printScalarConstant()");
1623}
1624
1625void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1626 AggBuffer *AggBuffer) {
1627 const DataLayout &DL = getDataLayout();
1628 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1629 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1630 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1631 // only the space allocated by CPV.
1632 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1633 return;
1634 }
1635
1636 // Helper for filling AggBuffer with APInts.
1637 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1638 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1639 SmallVector<unsigned char, 16> Buf(NumBytes);
1640 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1641 // input's bit width, and i1 arrays may not have a length that is a multuple
1642 // of 8. We handle the last byte separately, so we never request out of
1643 // bounds bits.
1644 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1645 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1646 }
1647 size_t LastBytePosition = (NumBytes - 1) * 8;
1648 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1649 Buf[NumBytes - 1] =
1650 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1651 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1652 };
1653
1654 switch (CPV->getType()->getTypeID()) {
1655 case Type::IntegerTyID:
1656 if (const auto *CI = dyn_cast<ConstantInt>(CPV)) {
1657 AddIntToBuffer(CI->getValue());
1658 break;
1659 }
1660 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1661 if (const auto *CI =
1663 AddIntToBuffer(CI->getValue());
1664 break;
1665 }
1666 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1667 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1668 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1669 AggBuffer->addZeros(AllocSize);
1670 break;
1671 }
1672 }
1673 llvm_unreachable("unsupported integer const type");
1674 break;
1675
1676 case Type::HalfTyID:
1677 case Type::BFloatTyID:
1678 case Type::FloatTyID:
1679 case Type::DoubleTyID:
1680 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1681 break;
1682
1683 case Type::PointerTyID: {
1684 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1685 AggBuffer->addSymbol(GVar, GVar);
1686 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1687 const Value *v = Cexpr->stripPointerCasts();
1688 AggBuffer->addSymbol(v, Cexpr);
1689 }
1690 AggBuffer->addZeros(AllocSize);
1691 break;
1692 }
1693
1694 case Type::ArrayTyID:
1696 case Type::StructTyID: {
1698 bufferAggregateConstant(CPV, AggBuffer);
1699 if (Bytes > AllocSize)
1700 AggBuffer->addZeros(Bytes - AllocSize);
1701 } else if (isa<ConstantAggregateZero>(CPV))
1702 AggBuffer->addZeros(Bytes);
1703 else
1704 llvm_unreachable("Unexpected Constant type");
1705 break;
1706 }
1707
1708 default:
1709 llvm_unreachable("unsupported type");
1710 }
1711}
1712
1713void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1714 AggBuffer *aggBuffer) {
1715 const DataLayout &DL = getDataLayout();
1716
1717 auto ExtendBuffer = [](APInt Val, AggBuffer *Buffer) {
1718 for (unsigned I : llvm::seq(Val.getBitWidth() / 8))
1719 Buffer->addByte(Val.extractBitsAsZExtValue(8, I * 8));
1720 };
1721
1722 // Integer or floating point vector splats.
1724 if (auto *VTy = dyn_cast<FixedVectorType>(CPV->getType())) {
1725 for (unsigned I : llvm::seq(VTy->getNumElements()))
1726 bufferLEByte(CPV->getAggregateElement(I), 0, aggBuffer);
1727 return;
1728 }
1729 }
1730
1731 // Integers of arbitrary width
1732 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1733 assert(CI->getType()->isIntegerTy() && "Expected integer constant!");
1734 ExtendBuffer(CI->getValue(), aggBuffer);
1735 return;
1736 }
1737
1738 // f128
1739 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1740 assert(CFP->getType()->isFloatingPointTy() && "Expected fp constant!");
1741 if (CFP->getType()->isFP128Ty()) {
1742 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1743 return;
1744 }
1745 }
1746
1747 // Buffer arrays one element at a time.
1748 if (isa<ConstantArray>(CPV)) {
1749 for (const auto &Op : CPV->operands())
1750 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1751 return;
1752 }
1753
1754 // Constant vectors
1755 if (const auto *CVec = dyn_cast<ConstantVector>(CPV)) {
1756 bufferAggregateConstVec(CVec, aggBuffer);
1757 return;
1758 }
1759
1760 if (const auto *CDS = dyn_cast<ConstantDataSequential>(CPV)) {
1761 for (unsigned I : llvm::seq(CDS->getNumElements()))
1762 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(I)), 0, aggBuffer);
1763 return;
1764 }
1765
1766 if (isa<ConstantStruct>(CPV)) {
1767 if (CPV->getNumOperands()) {
1768 StructType *ST = cast<StructType>(CPV->getType());
1769 for (unsigned I : llvm::seq(CPV->getNumOperands())) {
1770 int EndOffset = (I + 1 == CPV->getNumOperands())
1771 ? DL.getStructLayout(ST)->getElementOffset(0) +
1772 DL.getTypeAllocSize(ST)
1773 : DL.getStructLayout(ST)->getElementOffset(I + 1);
1774 int Bytes = EndOffset - DL.getStructLayout(ST)->getElementOffset(I);
1775 bufferLEByte(cast<Constant>(CPV->getOperand(I)), Bytes, aggBuffer);
1776 }
1777 }
1778 return;
1779 }
1780 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1781}
1782
1783void NVPTXAsmPrinter::bufferAggregateConstVec(const ConstantVector *CV,
1784 AggBuffer *aggBuffer) {
1785 unsigned NumElems = CV->getType()->getNumElements();
1786 const unsigned BuffSize = aggBuffer->getBufferSize();
1787
1788 // Buffer one element at a time if we have allocated enough buffer space.
1789 if (BuffSize >= NumElems) {
1790 for (const auto &Op : CV->operands())
1791 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1792 return;
1793 }
1794
1795 // Sub-byte datatypes will have more elements than bytes allocated for the
1796 // buffer. Merge consecutive elements to form a full byte. We expect that 8 %
1797 // sub-byte-elem-size should be 0 and current expected usage is for i4 (for
1798 // e2m1-fp4 types).
1799 Type *ElemTy = CV->getType()->getElementType();
1800 assert(ElemTy->isIntegerTy() && "Expected integer data type.");
1801 unsigned ElemTySize = ElemTy->getPrimitiveSizeInBits();
1802 assert(ElemTySize < 8 && "Expected sub-byte data type.");
1803 assert(8 % ElemTySize == 0 && "Element type size must evenly divide a byte.");
1804 // Number of elements to merge to form a full byte.
1805 unsigned NumElemsPerByte = 8 / ElemTySize;
1806 unsigned NumCompleteBytes = NumElems / NumElemsPerByte;
1807 unsigned NumTailElems = NumElems % NumElemsPerByte;
1808
1809 // Helper lambda to constant-fold sub-vector of sub-byte type elements into
1810 // i8. Start and end indices of the sub-vector is provided, along with number
1811 // of padding zeros if required.
1812 auto ConvertSubCVtoInt8 = [this, &ElemTy](const ConstantVector *CV,
1813 unsigned Start, unsigned End,
1814 unsigned NumPaddingZeros = 0) {
1815 // Collect elements to create sub-vector.
1816 SmallVector<Constant *, 8> SubCVElems;
1817 for (unsigned I : llvm::seq(Start, End))
1818 SubCVElems.push_back(CV->getAggregateElement(I));
1819
1820 // Optionally pad with zeros.
1821 if (NumPaddingZeros)
1822 SubCVElems.append(NumPaddingZeros, ConstantInt::getNullValue(ElemTy));
1823
1824 auto SubCV = ConstantVector::get(SubCVElems);
1825 Type *Int8Ty = IntegerType::get(SubCV->getContext(), 8);
1826
1827 // Merge elements of the sub-vector using ConstantFolding.
1828 ConstantInt *MergedElem =
1830 ConstantExpr::getBitCast(const_cast<Constant *>(SubCV), Int8Ty),
1831 getDataLayout()));
1832
1833 if (!MergedElem)
1835 "Cannot lower vector global with unusual element type");
1836
1837 return MergedElem;
1838 };
1839
1840 // Iterate through elements of vector one chunk at a time and buffer that
1841 // chunk.
1842 for (unsigned ByteIdx : llvm::seq(NumCompleteBytes))
1843 bufferLEByte(ConvertSubCVtoInt8(CV, ByteIdx * NumElemsPerByte,
1844 (ByteIdx + 1) * NumElemsPerByte),
1845 0, aggBuffer);
1846
1847 // For unevenly sized vectors add tail padding zeros.
1848 if (NumTailElems > 0)
1849 bufferLEByte(ConvertSubCVtoInt8(CV, NumElems - NumTailElems, NumElems,
1850 NumElemsPerByte - NumTailElems),
1851 0, aggBuffer);
1852}
1853
1854/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1855/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1856/// expressions that are representable in PTX and create
1857/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1858const MCExpr *
1859NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV,
1860 bool ProcessingGeneric) const {
1861 MCContext &Ctx = OutContext;
1862
1863 if (CV->isNullValue() || isa<UndefValue>(CV))
1864 return MCConstantExpr::create(0, Ctx);
1865
1866 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1867 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1868
1869 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1870 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(getSymbol(GV), Ctx);
1871 if (ProcessingGeneric)
1872 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1873 return Expr;
1874 }
1875
1876 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1877 if (!CE) {
1878 llvm_unreachable("Unknown constant value to lower!");
1879 }
1880
1881 switch (CE->getOpcode()) {
1882 default:
1883 break; // Error
1884
1885 case Instruction::AddrSpaceCast: {
1886 // Strip the addrspacecast and pass along the operand
1887 PointerType *DstTy = cast<PointerType>(CE->getType());
1888 if (DstTy->getAddressSpace() == 0)
1889 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1890
1891 break; // Error
1892 }
1893
1894 case Instruction::GetElementPtr: {
1895 const DataLayout &DL = getDataLayout();
1896
1897 // Generate a symbolic expression for the byte address
1898 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1899 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1900
1901 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1902 ProcessingGeneric);
1903 if (!OffsetAI)
1904 return Base;
1905
1906 int64_t Offset = OffsetAI.getSExtValue();
1908 Ctx);
1909 }
1910
1911 case Instruction::Trunc:
1912 // We emit the value and depend on the assembler to truncate the generated
1913 // expression properly. This is important for differences between
1914 // blockaddress labels. Since the two labels are in the same function, it
1915 // is reasonable to treat their delta as a 32-bit value.
1916 [[fallthrough]];
1917 case Instruction::BitCast:
1918 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1919
1920 case Instruction::IntToPtr: {
1921 const DataLayout &DL = getDataLayout();
1922
1923 // Handle casts to pointers by changing them into casts to the appropriate
1924 // integer type. This promotes constant folding and simplifies this code.
1925 Constant *Op = CE->getOperand(0);
1926 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1927 /*IsSigned*/ false, DL);
1928 if (Op)
1929 return lowerConstantForGV(Op, ProcessingGeneric);
1930
1931 break; // Error
1932 }
1933
1934 case Instruction::PtrToInt: {
1935 const DataLayout &DL = getDataLayout();
1936
1937 // Support only foldable casts to/from pointers that can be eliminated by
1938 // changing the pointer to the appropriately sized integer type.
1939 Constant *Op = CE->getOperand(0);
1940 Type *Ty = CE->getType();
1941
1942 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
1943
1944 // We can emit the pointer value into this slot if the slot is an
1945 // integer slot equal to the size of the pointer.
1946 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
1947 return OpExpr;
1948
1949 // Otherwise the pointer is smaller than the resultant integer, mask off
1950 // the high bits so we are sure to get a proper truncation if the input is
1951 // a constant expr.
1952 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
1953 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
1954 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
1955 }
1956
1957 // The MC library also has a right-shift operator, but it isn't consistently
1958 // signed or unsigned between different targets.
1959 case Instruction::Add: {
1960 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1961 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
1962 switch (CE->getOpcode()) {
1963 default: llvm_unreachable("Unknown binary operator constant cast expr");
1964 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
1965 }
1966 }
1967 }
1968
1969 // If the code isn't optimized, there may be outstanding folding
1970 // opportunities. Attempt to fold the expression using DataLayout as a
1971 // last resort before giving up.
1973 if (C != CE)
1974 return lowerConstantForGV(C, ProcessingGeneric);
1975
1976 // Otherwise report the problem to the user.
1977 std::string S;
1978 raw_string_ostream OS(S);
1979 OS << "Unsupported expression in static initializer: ";
1980 CE->printAsOperand(OS, /*PrintType=*/false,
1981 !MF ? nullptr : MF->getFunction().getParent());
1982 report_fatal_error(Twine(OS.str()));
1983}
1984
1985void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) const {
1986 OutContext.getAsmInfo().printExpr(OS, Expr);
1987}
1988
1989/// PrintAsmOperand - Print out an operand for an inline asm expression.
1990///
1991bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
1992 const char *ExtraCode, raw_ostream &O) {
1993 if (ExtraCode && ExtraCode[0]) {
1994 if (ExtraCode[1] != 0)
1995 return true; // Unknown modifier.
1996
1997 switch (ExtraCode[0]) {
1998 default:
1999 // See if this is a generic print operand
2000 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2001 case 'r':
2002 break;
2003 }
2004 }
2005
2006 printOperand(MI, OpNo, O);
2007
2008 return false;
2009}
2010
2011bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2012 unsigned OpNo,
2013 const char *ExtraCode,
2014 raw_ostream &O) {
2015 if (ExtraCode && ExtraCode[0])
2016 return true; // Unknown modifier
2017
2018 O << '[';
2019 printMemOperand(MI, OpNo, O);
2020 O << ']';
2021
2022 return false;
2023}
2024
2025void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
2026 raw_ostream &O) {
2027 const MachineOperand &MO = MI->getOperand(OpNum);
2028 switch (MO.getType()) {
2030 if (MO.getReg().isPhysical()) {
2031 if (MO.getReg() == NVPTX::VRDepot)
2033 else
2035 } else {
2036 emitVirtualRegister(MO.getReg(), O);
2037 }
2038 break;
2039
2041 O << MO.getImm();
2042 break;
2043
2045 printFPConstant(MO.getFPImm(), O);
2046 break;
2047
2049 PrintSymbolOperand(MO, O);
2050 break;
2051
2053 MO.getMBB()->getSymbol()->print(O, MAI);
2054 break;
2055
2056 default:
2057 llvm_unreachable("Operand type not supported.");
2058 }
2059}
2060
2061void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
2062 raw_ostream &O, const char *Modifier) {
2063 printOperand(MI, OpNum, O);
2064
2065 if (Modifier && strcmp(Modifier, "add") == 0) {
2066 O << ", ";
2067 printOperand(MI, OpNum + 1, O);
2068 } else {
2069 if (MI->getOperand(OpNum + 1).isImm() &&
2070 MI->getOperand(OpNum + 1).getImm() == 0)
2071 return; // don't print ',0' or '+0'
2072 O << "+";
2073 printOperand(MI, OpNum + 1, O);
2074 }
2075}
2076
2077/// Returns true if \p Line begins with an alphabetic character or underscore,
2078/// indicating it is a PTX instruction that should receive a .loc directive.
2079static bool isPTXInstruction(StringRef Line) {
2080 StringRef Trimmed = Line.ltrim();
2081 return !Trimmed.empty() &&
2082 (std::isalpha(static_cast<unsigned char>(Trimmed[0])) ||
2083 Trimmed[0] == '_');
2084}
2085
2086/// Returns the DILocation for an inline asm MachineInstr if debug line info
2087/// should be emitted, or nullptr otherwise.
2089 if (!MI || !MI->getDebugLoc())
2090 return nullptr;
2091 const DISubprogram *SP = MI->getMF()->getFunction().getSubprogram();
2092 if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
2093 return nullptr;
2094 const DILocation *DL = MI->getDebugLoc();
2095 if (!DL->getFile() || !DL->getLine())
2096 return nullptr;
2097 return DL;
2098}
2099
2100namespace {
2101struct InlineAsmInliningContext {
2102 MCSymbol *FuncNameSym = nullptr;
2103 unsigned FileIA = 0;
2104 unsigned LineIA = 0;
2105 unsigned ColIA = 0;
2106
2107 bool hasInlinedAt() const { return FuncNameSym != nullptr; }
2108};
2109} // namespace
2110
2111/// Resolves the enhanced-lineinfo inlining context for an inline asm debug
2112/// location. Returns a default (empty) context if inlining info is unavailable.
2113static InlineAsmInliningContext
2115 NVPTXDwarfDebug *NVDD, MCStreamer &Streamer,
2116 unsigned CUID) {
2117 InlineAsmInliningContext Ctx;
2118 const DILocation *InlinedAt = DL->getInlinedAt();
2119 if (!InlinedAt || !InlinedAt->getFile() || !NVDD ||
2120 !NVDD->isEnhancedLineinfo(MF))
2121 return Ctx;
2122 const auto *SubProg = getDISubprogram(DL->getScope());
2123 if (!SubProg)
2124 return Ctx;
2125 Ctx.FuncNameSym = NVDD->getOrCreateFuncNameSymbol(SubProg->getLinkageName());
2126 Ctx.FileIA = Streamer.emitDwarfFileDirective(
2127 0, InlinedAt->getFile()->getDirectory(),
2128 InlinedAt->getFile()->getFilename(), std::nullopt, std::nullopt, CUID);
2129 Ctx.LineIA = InlinedAt->getLine();
2130 Ctx.ColIA = InlinedAt->getColumn();
2131 return Ctx;
2132}
2133
2134void NVPTXAsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
2135 const MCTargetOptions &MCOptions,
2136 const MDNode *LocMDNode,
2137 InlineAsm::AsmDialect Dialect,
2138 const MachineInstr *MI) {
2139 assert(!Str.empty() && "Can't emit empty inline asm block");
2140 if (Str.back() == 0)
2141 Str = Str.substr(0, Str.size() - 1);
2142
2143 auto emitAsmStr = [&](StringRef AsmStr) {
2145 OutStreamer->emitRawText(AsmStr);
2146 emitInlineAsmEnd(STI, nullptr, MI);
2147 };
2148
2149 const DILocation *DL = getInlineAsmDebugLoc(MI);
2150 if (!DL) {
2151 emitAsmStr(Str);
2152 return;
2153 }
2154
2155 const DIFile *File = DL->getFile();
2156 unsigned Line = DL->getLine();
2157 const unsigned Column = DL->getColumn();
2158 const unsigned CUID = OutStreamer->getContext().getDwarfCompileUnitID();
2159 const unsigned FileNumber = OutStreamer->emitDwarfFileDirective(
2160 0, File->getDirectory(), File->getFilename(), std::nullopt, std::nullopt,
2161 CUID);
2162
2163 auto *NVDD = static_cast<NVPTXDwarfDebug *>(getDwarfDebug());
2164 InlineAsmInliningContext InlineCtx =
2165 getInlineAsmInliningContext(DL, *MI->getMF(), NVDD, *OutStreamer, CUID);
2166
2167 SmallVector<StringRef, 16> Lines;
2168 Str.split(Lines, '\n');
2170 for (const StringRef &L : Lines) {
2171 StringRef RTrimmed = L.rtrim('\r');
2172 if (isPTXInstruction(L)) {
2173 if (InlineCtx.hasInlinedAt()) {
2174 OutStreamer->emitDwarfLocDirectiveWithInlinedAt(
2175 FileNumber, Line, Column, InlineCtx.FileIA, InlineCtx.LineIA,
2176 InlineCtx.ColIA, InlineCtx.FuncNameSym, DWARF2_FLAG_IS_STMT, 0, 0,
2177 File->getFilename());
2178 } else {
2179 OutStreamer->emitDwarfLocDirective(FileNumber, Line, Column,
2180 DWARF2_FLAG_IS_STMT, 0, 0,
2181 File->getFilename());
2182 }
2183 }
2184 OutStreamer->emitRawText(RTrimmed);
2185 ++Line;
2186 }
2187 emitInlineAsmEnd(STI, nullptr, MI);
2188}
2189
2190char NVPTXAsmPrinter::ID = 0;
2191
2192INITIALIZE_PASS(NVPTXAsmPrinter, "nvptx-asm-printer", "NVPTX Assembly Printer",
2193 false, false)
2194
2195// Force static initialization.
2196extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
2197LLVMInitializeNVPTXAsmPrinter() {
2200}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define DWARF2_FLAG_IS_STMT
Definition MCDwarf.h:119
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static StringRef getTextureName(const Value &V)
static const DILocation * getInlineAsmDebugLoc(const MachineInstr *MI)
Returns the DILocation for an inline asm MachineInstr if debug line info should be emitted,...
#define DEPOTNAME
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool hasFullDebugInfo(Module &M)
static StringRef getSurfaceName(const Value &V)
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static StringRef getSamplerName(const Value &V)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static InlineAsmInliningContext getInlineAsmInliningContext(const DILocation *DL, const MachineFunction &MF, NVPTXDwarfDebug *NVDD, MCStreamer &Streamer, unsigned CUID)
Resolves the enhanced-lineinfo inlining context for an inline asm debug location.
static bool isPTXInstruction(StringRef Line)
Returns true if Line begins with an alphabetic character or underscore, indicating it is a PTX instru...
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static const char * name
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5890
APInt bitcastToAPInt() const
Definition APFloat.h:1426
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition APInt.cpp:520
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
MCSymbol * getSymbol(const GlobalValue *GV) const
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
DwarfDebug * getDwarfDebug()
Definition AsmPrinter.h:290
virtual void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo, const MachineInstr *MI)
Let the target do anything it needs to do after emitting inlineasm.
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:94
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:109
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition AsmPrinter.h:518
virtual void emitFunctionBodyStart()
Targets can override this to emit stuff before the first basic block in the function.
Definition AsmPrinter.h:625
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition AsmPrinter.h:128
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:101
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition AsmPrinter.h:456
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:106
const MCAsmInfo & MAI
Target Asm Printer information.
Definition AsmPrinter.h:97
virtual void emitFunctionBodyEnd()
Targets can override this to emit stuff after the last basic block in the function.
Definition AsmPrinter.h:629
const DataLayout & getDataLayout() const
Return information about data layout.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
virtual void emitInlineAsmStart() const
Let the target do anything it needs to do before emitting inlineasm.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
const APFloat & getValueAPF() const
Definition Constants.h:463
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
FixedVectorType * getType() const
Specialize the getType() method to always return a FixedVectorType, which reduces the amount of casti...
Definition Constants.h:683
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
Subprogram description. Uses SubclassData1.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Collects and handles dwarf debug information.
Definition DwarfDebug.h:352
unsigned getNumElements() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:659
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
bool hasLinkOnceLinkage() const
bool hasExternalLinkage() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
unsigned getAddressSpace() const
PointerType * getType() const
Global values are always pointers.
bool hasWeakLinkage() const
bool hasCommonLinkage() const
bool hasAvailableExternallyLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:348
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
void addOperand(const MCOperand Op)
Definition MCInst.h:215
void setOpcode(unsigned Op)
Definition MCInst.h:201
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
Streaming machine code generation interface.
Definition MCStreamer.h:222
unsigned emitDwarfFileDirective(unsigned FileNo, StringRef Directory, StringRef Filename, std::optional< MD5::MD5Result > Checksum=std::nullopt, std::optional< StringRef > Source=std::nullopt, unsigned CUID=0)
Associate a filename with a specified logical file number.
Definition MCStreamer.h:879
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition MCSymbol.cpp:59
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
DwarfDebug * createDwarfDebug() override
Create NVPTX-specific DwarfDebug handler.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
NVPTX-specific DwarfDebug implementation.
bool isEnhancedLineinfo(const MachineFunction &MF) const
Returns true if the enhanced lineinfo mode (with inlined_at) is active for the given MachineFunction.
MCSymbol * getOrCreateFuncNameSymbol(StringRef LinkageName)
Get or create an MCSymbol in .debug_str for a function's linkage name.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void outputDwarfFileDirectives()
Outputs the list of the DWARF '.file' directives to the streamer.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
iterator begin() const
Definition StringRef.h:113
StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
Definition StringRef.h:820
iterator end() const
Definition StringRef.h:115
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition Type.h:76
@ HalfTyID
16-bit floating point type
Definition Type.h:57
@ VoidTyID
type with no size
Definition Type.h:64
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ StructTyID
Structures.
Definition Type.h:75
@ IntegerTyID
Arbitrary bit width integers.
Definition Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition Type.h:77
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ PointerTyID
Pointers.
Definition Type.h:74
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:272
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:138
op_range operands()
Definition User.h:267
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type size() const
Definition DenseSet.h:87
bool erase(const ValueT &V)
Definition DenseSet.h:100
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
constexpr StringLiteral BlocksAreClusters("nvvm.blocksareclusters")
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
uint64_t read64le(const void *P)
Definition Endian.h:435
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
Definition STLExtras.h:2179
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
MaybeAlign getAlign(const CallInst &I, unsigned Index)
std::optional< unsigned > getMaxNReg(const Function &F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::string utostr(uint64_t X, bool isNeg=false)
std::optional< unsigned > getMinCTASm(const Function &F)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition Format.h:204
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::optional< unsigned > getMaxClusterRank(const Function &F)
Align getFunctionByValParamAlign(const Function *F, Type *ArgTy, Align InitialAlign, const DataLayout &DL)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx, const DataLayout &DL)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
PTXOpaqueType getPTXOpaqueType(const GlobalVariable &GV)
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
LLVM_ABI DISubprogram * getDISubprogram(const MDNode *Scope)
Find subprogram that is enclosing this scope.
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, const DataLayout &DL)
Since function arguments are passed via .param space, we may want to increase their alignment in a wa...
Target & getTheNVPTXTarget32()
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...