LLVM 23.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXDwarfDebug.h"
21#include "NVPTXMCExpr.h"
23#include "NVPTXRegisterInfo.h"
24#include "NVPTXSubtarget.h"
25#include "NVPTXTargetMachine.h"
26#include "NVPTXUtilities.h"
27#include "NVVMProperties.h"
29#include "cl_common_defines.h"
30#include "llvm/ADT/APFloat.h"
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/ArrayRef.h"
33#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/DenseSet.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/Twine.h"
54#include "llvm/IR/Argument.h"
55#include "llvm/IR/Attributes.h"
56#include "llvm/IR/BasicBlock.h"
57#include "llvm/IR/Constant.h"
58#include "llvm/IR/Constants.h"
59#include "llvm/IR/DataLayout.h"
60#include "llvm/IR/DebugInfo.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalAlias.h"
66#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/Instruction.h"
69#include "llvm/IR/LLVMContext.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Operator.h"
72#include "llvm/IR/Type.h"
73#include "llvm/IR/User.h"
74#include "llvm/MC/MCExpr.h"
75#include "llvm/MC/MCInst.h"
76#include "llvm/MC/MCInstrDesc.h"
77#include "llvm/MC/MCStreamer.h"
78#include "llvm/MC/MCSymbol.h"
83#include "llvm/Support/Endian.h"
90#include <cassert>
91#include <cstdint>
92#include <cstring>
93#include <string>
94
95using namespace llvm;
96
97#define DEPOTNAME "__local_depot"
98
99static StringRef getTextureName(const Value &V) {
100 assert(V.hasName() && "Found texture variable with no name");
101 return V.getName();
102}
103
105 assert(V.hasName() && "Found surface variable with no name");
106 return V.getName();
107}
108
110 assert(V.hasName() && "Found sampler variable with no name");
111 return V.getName();
112}
113
114/// discoverDependentGlobals - Return a set of GlobalVariables on which \p V
115/// depends.
116static void
119 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
120 Globals.insert(GV);
121 return;
122 }
123
124 if (const User *U = dyn_cast<User>(V))
125 for (const auto &O : U->operands())
126 discoverDependentGlobals(O, Globals);
127}
128
129/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
130/// instances to be emitted, but only after any dependents have been added
131/// first.s
132static void
137 // Have we already visited this one?
138 if (Visited.count(GV))
139 return;
140
141 // Do we have a circular dependency?
142 if (!Visiting.insert(GV).second)
143 report_fatal_error("Circular dependency found in global variable set");
144
145 // Make sure we visit all dependents first
147 for (const auto &O : GV->operands())
148 discoverDependentGlobals(O, Others);
149
150 for (const GlobalVariable *GV : Others)
151 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
152
153 // Now we can visit ourself
154 Order.push_back(GV);
155 Visited.insert(GV);
156 Visiting.erase(GV);
157}
158
159void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
160 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
161 getSubtargetInfo().getFeatureBits());
162
163 MCInst Inst;
164 lowerToMCInst(MI, Inst);
166}
167
168void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
169 OutMI.setOpcode(MI->getOpcode());
170 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
171 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
172 const MachineOperand &MO = MI->getOperand(0);
173 OutMI.addOperand(GetSymbolRef(
174 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
175 return;
176 }
177
178 for (const auto MO : MI->operands())
179 OutMI.addOperand(lowerOperand(MO));
180}
181
182MCOperand NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
183 switch (MO.getType()) {
184 default:
185 llvm_unreachable("unknown operand type");
187 return MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
189 return MCOperand::createImm(MO.getImm());
194 return GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
196 return GetSymbolRef(getSymbol(MO.getGlobal()));
198 const ConstantFP *Cnt = MO.getFPImm();
199 const APFloat &Val = Cnt->getValueAPF();
200
201 switch (Cnt->getType()->getTypeID()) {
202 default:
203 report_fatal_error("Unsupported FP type");
204 break;
205 case Type::HalfTyID:
208 case Type::BFloatTyID:
211 case Type::FloatTyID:
214 case Type::DoubleTyID:
217 }
218 break;
219 }
220 }
221}
222
223unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
225 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
226
227 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
228 unsigned RegNum = RegMap[Reg];
229
230 // Encode the register class in the upper 4 bits
231 // Must be kept in sync with NVPTXInstPrinter::printRegName
232 unsigned Ret = 0;
233 if (RC == &NVPTX::B1RegClass) {
234 Ret = (1 << 28);
235 } else if (RC == &NVPTX::B16RegClass) {
236 Ret = (2 << 28);
237 } else if (RC == &NVPTX::B32RegClass) {
238 Ret = (3 << 28);
239 } else if (RC == &NVPTX::B64RegClass) {
240 Ret = (4 << 28);
241 } else if (RC == &NVPTX::B128RegClass) {
242 Ret = (7 << 28);
243 } else {
244 report_fatal_error("Bad register class");
245 }
246
247 // Insert the vreg number
248 Ret |= (RegNum & 0x0FFFFFFF);
249 return Ret;
250 } else {
251 // Some special-use registers are actually physical registers.
252 // Encode this as the register class ID of 0 and the real register ID.
253 return Reg & 0x0FFFFFFF;
254 }
255}
256
257MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
258 const MCExpr *Expr;
259 Expr = MCSymbolRefExpr::create(Symbol, OutContext);
260 return MCOperand::createExpr(Expr);
261}
262
263void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
264 const DataLayout &DL = getDataLayout();
265 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
266 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
267
268 Type *Ty = F->getReturnType();
269 if (Ty->getTypeID() == Type::VoidTyID)
270 return;
271 O << " (";
272
273 auto PrintScalarRetVal = [&](unsigned Size) {
274 O << ".param .b" << promoteScalarArgumentSize(Size) << " func_retval0";
275 };
276 if (shouldPassAsArray(Ty)) {
277 const unsigned TotalSize = DL.getTypeAllocSize(Ty);
278 const Align RetAlignment =
279 getFunctionArgumentAlignment(F, Ty, AttributeList::ReturnIndex, DL);
280 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
281 << TotalSize << "]";
282 } else if (Ty->isFloatingPointTy()) {
283 PrintScalarRetVal(Ty->getPrimitiveSizeInBits());
284 } else if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
285 PrintScalarRetVal(ITy->getBitWidth());
286 } else if (isa<PointerType>(Ty)) {
287 PrintScalarRetVal(TLI->getPointerTy(DL).getSizeInBits());
288 } else
289 llvm_unreachable("Unknown return type");
290 O << ") ";
291}
292
293void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
294 raw_ostream &O) {
295 const Function &F = MF.getFunction();
296 printReturnValStr(&F, O);
297}
298
299// Return true if MBB is the header of a loop marked with
300// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
301bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
302 const MachineBasicBlock &MBB) const {
303 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
304 // We insert .pragma "nounroll" only to the loop header.
305 if (!LI.isLoopHeader(&MBB))
306 return false;
307
308 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
309 // we iterate through each back edge of the loop with header MBB, and check
310 // whether its metadata contains llvm.loop.unroll.disable.
311 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
312 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
313 // Edges from other loops to MBB are not back edges.
314 continue;
315 }
316 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
317 if (MDNode *LoopID =
318 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
319 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
320 return true;
321 if (MDNode *UnrollCountMD =
322 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
323 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
324 ->isOne())
325 return true;
326 }
327 }
328 }
329 }
330 return false;
331}
332
333void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
335 if (isLoopHeaderOfNoUnroll(MBB))
336 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
337}
338
340 SmallString<128> Str;
341 raw_svector_ostream O(Str);
342
343 if (!GlobalsEmitted) {
344 emitGlobals(*MF->getFunction().getParent());
345 GlobalsEmitted = true;
346 }
347
348 // Set up
349 MRI = &MF->getRegInfo();
350 F = &MF->getFunction();
351 emitLinkageDirective(F, O);
352 if (isKernelFunction(*F))
353 O << ".entry ";
354 else {
355 O << ".func ";
356 printReturnValStr(*MF, O);
357 }
358
359 CurrentFnSym->print(O, MAI);
360
361 emitFunctionParamList(F, O);
362 O << "\n";
363
364 if (isKernelFunction(*F))
365 emitKernelFunctionDirectives(*F, O);
366
368 O << ".noreturn";
369
370 OutStreamer->emitRawText(O.str());
371
372 VRegMapping.clear();
373 // Emit open brace for function body.
374 OutStreamer->emitRawText(StringRef("{\n"));
375 setAndEmitFunctionVirtualRegisters(*MF);
376 encodeDebugInfoRegisterNumbers(*MF);
377 // Emit initial .loc debug directive for correct relocation symbol data.
378 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
379 assert(SP->getUnit());
380 if (!SP->getUnit()->isDebugDirectivesOnly())
382 }
383}
384
386 bool Result = AsmPrinter::runOnMachineFunction(F);
387 // Emit closing brace for the body of function F.
388 // The closing brace must be emitted here because we need to emit additional
389 // debug labels/data after the last basic block.
390 // We need to emit the closing brace here because we don't have function that
391 // finished emission of the function body.
392 OutStreamer->emitRawText(StringRef("}\n"));
393 return Result;
394}
395
398 raw_svector_ostream O(Str);
399 emitDemotedVars(&MF->getFunction(), O);
400 OutStreamer->emitRawText(O.str());
401}
402
404 VRegMapping.clear();
405}
406
410 return OutContext.getOrCreateSymbol(Str);
411}
412
413void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
414 Register RegNo = MI->getOperand(0).getReg();
415 if (RegNo.isVirtual()) {
416 OutStreamer->AddComment(Twine("implicit-def: ") +
418 } else {
419 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
420 OutStreamer->AddComment(Twine("implicit-def: ") +
421 STI.getRegisterInfo()->getName(RegNo));
422 }
423 OutStreamer->addBlankLine();
424}
425
426void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
427 raw_ostream &O) const {
428 // If the NVVM IR has some of reqntid* specified, then output
429 // the reqntid directive, and set the unspecified ones to 1.
430 // If none of Reqntid* is specified, don't output reqntid directive.
431 const auto ReqNTID = getReqNTID(F);
432 if (!ReqNTID.empty())
433 O << formatv(".reqntid {0:$[, ]}\n",
435
436 const auto MaxNTID = getMaxNTID(F);
437 if (!MaxNTID.empty())
438 O << formatv(".maxntid {0:$[, ]}\n",
440
441 if (const auto Mincta = getMinCTASm(F))
442 O << ".minnctapersm " << *Mincta << "\n";
443
444 if (const auto Maxnreg = getMaxNReg(F))
445 O << ".maxnreg " << *Maxnreg << "\n";
446
447 // .maxclusterrank directive requires SM_90 or higher, make sure that we
448 // filter it out for lower SM versions, as it causes a hard ptxas crash.
449 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
450 const NVPTXSubtarget *STI = &NTM.getSubtarget<NVPTXSubtarget>(F);
451
452 if (STI->getSmVersion() >= 90) {
453 const auto ClusterDim = getClusterDim(F);
455
456 if (!ClusterDim.empty()) {
457
458 if (!BlocksAreClusters)
459 O << ".explicitcluster\n";
460
461 if (ClusterDim[0] != 0) {
462 assert(llvm::all_of(ClusterDim, not_equal_to(0)) &&
463 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
464 "should be non-zero as well");
465
466 O << formatv(".reqnctapercluster {0:$[, ]}\n",
468 } else {
469 assert(llvm::all_of(ClusterDim, equal_to(0)) &&
470 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
471 "should be 0 as well");
472 }
473 }
474
475 if (BlocksAreClusters) {
476 LLVMContext &Ctx = F.getContext();
477 if (ReqNTID.empty() || ClusterDim.empty())
478 Ctx.diagnose(DiagnosticInfoUnsupported(
479 F, "blocksareclusters requires reqntid and cluster_dim attributes",
480 F.getSubprogram()));
481 else if (STI->getPTXVersion() < 90)
482 Ctx.diagnose(DiagnosticInfoUnsupported(
483 F, "blocksareclusters requires PTX version >= 9.0",
484 F.getSubprogram()));
485 else
486 O << ".blocksareclusters\n";
487 }
488
489 if (const auto Maxclusterrank = getMaxClusterRank(F))
490 O << ".maxclusterrank " << *Maxclusterrank << "\n";
491 }
492}
493
494std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
495 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
496
497 std::string Name;
498 raw_string_ostream NameStr(Name);
499
500 VRegRCMap::const_iterator I = VRegMapping.find(RC);
501 assert(I != VRegMapping.end() && "Bad register class");
502 const DenseMap<unsigned, unsigned> &RegMap = I->second;
503
504 VRegMap::const_iterator VI = RegMap.find(Reg);
505 assert(VI != RegMap.end() && "Bad virtual register");
506 unsigned MappedVR = VI->second;
507
508 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
509
510 return Name;
511}
512
513void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
514 raw_ostream &O) {
515 O << getVirtualRegisterName(vr);
516}
517
518void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
519 raw_ostream &O) {
521 if (!F || isKernelFunction(*F) || F->isDeclaration())
523 "NVPTX aliasee must be a non-kernel function definition");
524
525 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
527 report_fatal_error("NVPTX aliasee must not be '.weak'");
528
529 emitDeclarationWithName(F, getSymbol(GA), O);
530}
531
532void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
533 emitDeclarationWithName(F, getSymbol(F), O);
534}
535
536void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
537 raw_ostream &O) {
538 emitLinkageDirective(F, O);
539 if (isKernelFunction(*F))
540 O << ".entry ";
541 else
542 O << ".func ";
543 printReturnValStr(F, O);
544 S->print(O, MAI);
545 O << "\n";
546 emitFunctionParamList(F, O);
547 O << "\n";
549 O << ".noreturn";
550 O << ";\n";
551}
552
553static bool usedInGlobalVarDef(const Constant *C) {
554 if (!C)
555 return false;
556
558 return GV->getName() != "llvm.used";
559
560 for (const User *U : C->users())
561 if (const Constant *C = dyn_cast<Constant>(U))
563 return true;
564
565 return false;
566}
567
568static bool usedInOneFunc(const User *U, Function const *&OneFunc) {
569 if (const GlobalVariable *OtherGV = dyn_cast<GlobalVariable>(U))
570 if (OtherGV->getName() == "llvm.used")
571 return true;
572
573 if (const Instruction *I = dyn_cast<Instruction>(U)) {
574 if (const Function *CurFunc = I->getFunction()) {
575 if (OneFunc && (CurFunc != OneFunc))
576 return false;
577 OneFunc = CurFunc;
578 return true;
579 }
580 return false;
581 }
582
583 for (const User *UU : U->users())
584 if (!usedInOneFunc(UU, OneFunc))
585 return false;
586
587 return true;
588}
589
590/* Find out if a global variable can be demoted to local scope.
591 * Currently, this is valid for CUDA shared variables, which have local
592 * scope and global lifetime. So the conditions to check are :
593 * 1. Is the global variable in shared address space?
594 * 2. Does it have local linkage?
595 * 3. Is the global variable referenced only in one function?
596 */
597static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f) {
598 if (!GV->hasLocalLinkage())
599 return false;
601 return false;
602
603 const Function *oneFunc = nullptr;
604
605 bool flag = usedInOneFunc(GV, oneFunc);
606 if (!flag)
607 return false;
608 if (!oneFunc)
609 return false;
610 f = oneFunc;
611 return true;
612}
613
614static bool useFuncSeen(const Constant *C,
615 const SmallPtrSetImpl<const Function *> &SeenSet) {
616 for (const User *U : C->users()) {
617 if (const Constant *cu = dyn_cast<Constant>(U)) {
618 if (useFuncSeen(cu, SeenSet))
619 return true;
620 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
621 if (const Function *Caller = I->getFunction())
622 if (SeenSet.contains(Caller))
623 return true;
624 }
625 }
626 return false;
627}
628
629void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
630 SmallPtrSet<const Function *, 32> SeenSet;
631 for (const Function &F : M) {
632 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
633 emitDeclaration(&F, O);
634 continue;
635 }
636
637 if (F.isDeclaration()) {
638 if (F.use_empty())
639 continue;
640 if (F.getIntrinsicID())
641 continue;
642 emitDeclaration(&F, O);
643 continue;
644 }
645 for (const User *U : F.users()) {
646 if (const Constant *C = dyn_cast<Constant>(U)) {
647 if (usedInGlobalVarDef(C)) {
648 // The use is in the initialization of a global variable
649 // that is a function pointer, so print a declaration
650 // for the original function
651 emitDeclaration(&F, O);
652 break;
653 }
654 // Emit a declaration of this function if the function that
655 // uses this constant expr has already been seen.
656 if (useFuncSeen(C, SeenSet)) {
657 emitDeclaration(&F, O);
658 break;
659 }
660 }
661
662 if (!isa<Instruction>(U))
663 continue;
664 const Function *Caller = cast<Instruction>(U)->getFunction();
665 if (!Caller)
666 continue;
667
668 // If a caller has already been seen, then the caller is
669 // appearing in the module before the callee. so print out
670 // a declaration for the callee.
671 if (SeenSet.contains(Caller)) {
672 emitDeclaration(&F, O);
673 break;
674 }
675 }
676 SeenSet.insert(&F);
677 }
678 for (const GlobalAlias &GA : M.aliases())
679 emitAliasDeclaration(&GA, O);
680}
681
682void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
683 // Construct a default subtarget off of the TargetMachine defaults. The
684 // rest of NVPTX isn't friendly to change subtargets per function and
685 // so the default TargetMachine will have all of the options.
686 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
687 const NVPTXSubtarget *STI = NTM.getSubtargetImpl();
688 SmallString<128> Str1;
689 raw_svector_ostream OS1(Str1);
690
691 // Emit header before any dwarf directives are emitted below.
692 emitHeader(M, OS1, *STI);
693 OutStreamer->emitRawText(OS1.str());
694}
695
696/// Create NVPTX-specific DwarfDebug handler.
700
702 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
703 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
704 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
705 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
706
707 // We need to call the parent's one explicitly.
708 bool Result = AsmPrinter::doInitialization(M);
709
710 GlobalsEmitted = false;
711
712 return Result;
713}
714
715void NVPTXAsmPrinter::emitGlobals(const Module &M) {
716 SmallString<128> Str2;
717 raw_svector_ostream OS2(Str2);
718
719 emitDeclarations(M, OS2);
720
721 // As ptxas does not support forward references of globals, we need to first
722 // sort the list of module-level globals in def-use order. We visit each
723 // global variable in order, and ensure that we emit it *after* its dependent
724 // globals. We use a little extra memory maintaining both a set and a list to
725 // have fast searches while maintaining a strict ordering.
729
730 // Visit each global variable, in order
731 for (const GlobalVariable &I : M.globals())
732 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
733
734 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
735 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
736
737 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
738 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
739
740 // Print out module-level global variables in proper order
741 for (const GlobalVariable *GV : Globals)
742 printModuleLevelGV(GV, OS2, /*ProcessDemoted=*/false, STI);
743
744 OS2 << '\n';
745
746 OutStreamer->emitRawText(OS2.str());
747}
748
749void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
751 raw_svector_ostream OS(Str);
752
753 MCSymbol *Name = getSymbol(&GA);
754
755 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
756 << ";\n";
757
758 OutStreamer->emitRawText(OS.str());
759}
760
761void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
762 const NVPTXSubtarget &STI) {
763 const unsigned PTXVersion = STI.getPTXVersion();
764
765 O << "//\n"
766 "// Generated by LLVM NVPTX Back-End\n"
767 "//\n"
768 "\n"
769 << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"
770 << ".target " << STI.getTargetName();
771
772 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
773 if (NTM.getDrvInterface() == NVPTX::NVCL)
774 O << ", texmode_independent";
775
776 bool HasFullDebugInfo = false;
777 for (DICompileUnit *CU : M.debug_compile_units()) {
778 switch(CU->getEmissionKind()) {
781 break;
784 HasFullDebugInfo = true;
785 break;
786 }
787 if (HasFullDebugInfo)
788 break;
789 }
790 if (HasFullDebugInfo)
791 O << ", debug";
792
793 O << "\n"
794 << ".address_size " << (NTM.is64Bit() ? "64" : "32") << "\n"
795 << "\n";
796}
797
799 // If we did not emit any functions, then the global declarations have not
800 // yet been emitted.
801 if (!GlobalsEmitted) {
802 emitGlobals(M);
803 GlobalsEmitted = true;
804 }
805
806 // call doFinalization
807 bool ret = AsmPrinter::doFinalization(M);
808
810
811 auto *TS =
812 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
813 // Close the last emitted section
814 if (hasDebugInfo()) {
815 TS->closeLastSection();
816 // Emit empty .debug_macinfo section for better support of the empty files.
817 OutStreamer->emitRawText("\t.section\t.debug_macinfo\t{\t}");
818 }
819
820 // Output last DWARF .file directives, if any.
822
823 return ret;
824}
825
826// This function emits appropriate linkage directives for
827// functions and global variables.
828//
829// extern function declaration -> .extern
830// extern function definition -> .visible
831// external global variable with init -> .visible
832// external without init -> .extern
833// appending -> not allowed, assert.
834// for any linkage other than
835// internal, private, linker_private,
836// linker_private_weak, linker_private_weak_def_auto,
837// we emit -> .weak.
838
839void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
840 raw_ostream &O) {
841 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
842 if (V->hasExternalLinkage()) {
843 if (const auto *GVar = dyn_cast<GlobalVariable>(V))
844 O << (GVar->hasInitializer() ? ".visible " : ".extern ");
845 else if (V->isDeclaration())
846 O << ".extern ";
847 else
848 O << ".visible ";
849 } else if (V->hasAppendingLinkage()) {
850 report_fatal_error("Symbol '" + (V->hasName() ? V->getName() : "") +
851 "' has unsupported appending linkage type");
852 } else if (!V->hasInternalLinkage() && !V->hasPrivateLinkage()) {
853 O << ".weak ";
854 }
855 }
856}
857
858void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
859 raw_ostream &O, bool ProcessDemoted,
860 const NVPTXSubtarget &STI) {
861 // Skip meta data
862 if (GVar->hasSection())
863 if (GVar->getSection() == "llvm.metadata")
864 return;
865
866 // Skip LLVM intrinsic global variables
867 if (GVar->getName().starts_with("llvm.") ||
868 GVar->getName().starts_with("nvvm."))
869 return;
870
871 const DataLayout &DL = getDataLayout();
872
873 // GlobalVariables are always constant pointers themselves.
874 Type *ETy = GVar->getValueType();
875
876 if (GVar->hasExternalLinkage()) {
877 if (GVar->hasInitializer())
878 O << ".visible ";
879 else
880 O << ".extern ";
881 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
883 O << ".common ";
884 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
886 GVar->hasCommonLinkage()) {
887 O << ".weak ";
888 }
889
890 const PTXOpaqueType OpaqueType = getPTXOpaqueType(*GVar);
891
892 if (OpaqueType == PTXOpaqueType::Texture) {
893 O << ".global .texref " << getTextureName(*GVar) << ";\n";
894 return;
895 }
896
897 if (OpaqueType == PTXOpaqueType::Surface) {
898 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
899 return;
900 }
901
902 if (GVar->isDeclaration()) {
903 // (extern) declarations, no definition or initializer
904 // Currently the only known declaration is for an automatic __local
905 // (.shared) promoted to global.
906 emitPTXGlobalVariable(GVar, O, STI);
907 O << ";\n";
908 return;
909 }
910
911 if (OpaqueType == PTXOpaqueType::Sampler) {
912 O << ".global .samplerref " << getSamplerName(*GVar);
913
914 const Constant *Initializer = nullptr;
915 if (GVar->hasInitializer())
916 Initializer = GVar->getInitializer();
917 const ConstantInt *CI = nullptr;
918 if (Initializer)
919 CI = dyn_cast<ConstantInt>(Initializer);
920 if (CI) {
921 unsigned sample = CI->getZExtValue();
922
923 O << " = { ";
924
925 for (int i = 0,
926 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
927 i < 3; i++) {
928 O << "addr_mode_" << i << " = ";
929 switch (addr) {
930 case 0:
931 O << "wrap";
932 break;
933 case 1:
934 O << "clamp_to_border";
935 break;
936 case 2:
937 O << "clamp_to_edge";
938 break;
939 case 3:
940 O << "wrap";
941 break;
942 case 4:
943 O << "mirror";
944 break;
945 }
946 O << ", ";
947 }
948 O << "filter_mode = ";
949 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
950 case 0:
951 O << "nearest";
952 break;
953 case 1:
954 O << "linear";
955 break;
956 case 2:
957 llvm_unreachable("Anisotropic filtering is not supported");
958 default:
959 O << "nearest";
960 break;
961 }
962 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
963 O << ", force_unnormalized_coords = 1";
964 }
965 O << " }";
966 }
967
968 O << ";\n";
969 return;
970 }
971
972 if (GVar->hasPrivateLinkage()) {
973 if (GVar->getName().starts_with("unrollpragma"))
974 return;
975
976 // FIXME - need better way (e.g. Metadata) to avoid generating this global
977 if (GVar->getName().starts_with("filename"))
978 return;
979 if (GVar->use_empty())
980 return;
981 }
982
983 const Function *DemotedFunc = nullptr;
984 if (!ProcessDemoted && canDemoteGlobalVar(GVar, DemotedFunc)) {
985 O << "// " << GVar->getName() << " has been demoted\n";
986 localDecls[DemotedFunc].push_back(GVar);
987 return;
988 }
989
990 O << ".";
991 emitPTXAddressSpace(GVar->getAddressSpace(), O);
992
993 if (isManaged(*GVar)) {
994 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
996 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
997 O << " .attribute(.managed)";
998 }
999
1000 O << " .align "
1001 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1002
1003 if (ETy->isPointerTy() || ((ETy->isIntegerTy() || ETy->isFloatingPointTy()) &&
1004 ETy->getScalarSizeInBits() <= 64)) {
1005 O << " .";
1006 // Special case: ABI requires that we use .u8 for predicates
1007 if (ETy->isIntegerTy(1))
1008 O << "u8";
1009 else
1010 O << getPTXFundamentalTypeStr(ETy, false);
1011 O << " ";
1012 getSymbol(GVar)->print(O, MAI);
1013
1014 // Ptx allows variable initilization only for constant and global state
1015 // spaces.
1016 if (GVar->hasInitializer()) {
1017 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1018 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1019 const Constant *Initializer = GVar->getInitializer();
1020 // 'undef' is treated as there is no value specified.
1021 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1022 O << " = ";
1023 printScalarConstant(Initializer, O);
1024 }
1025 } else {
1026 // The frontend adds zero-initializer to device and constant variables
1027 // that don't have an initial value, and UndefValue to shared
1028 // variables, so skip warning for this case.
1029 if (!GVar->getInitializer()->isNullValue() &&
1030 !isa<UndefValue>(GVar->getInitializer())) {
1031 report_fatal_error("initial value of '" + GVar->getName() +
1032 "' is not allowed in addrspace(" +
1033 Twine(GVar->getAddressSpace()) + ")");
1034 }
1035 }
1036 }
1037 } else {
1038 // Although PTX has direct support for struct type and array type and
1039 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1040 // targets that support these high level field accesses. Structs, arrays
1041 // and vectors are lowered into arrays of bytes.
1042 switch (ETy->getTypeID()) {
1043 case Type::IntegerTyID: // Integers larger than 64 bits
1044 case Type::FP128TyID:
1045 case Type::StructTyID:
1046 case Type::ArrayTyID:
1047 case Type::FixedVectorTyID: {
1048 const uint64_t ElementSize = DL.getTypeStoreSize(ETy);
1049 // Ptx allows variable initilization only for constant and
1050 // global state spaces.
1051 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1052 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1053 GVar->hasInitializer()) {
1054 const Constant *Initializer = GVar->getInitializer();
1055 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1056 AggBuffer aggBuffer(ElementSize, *this);
1057 bufferAggregateConstant(Initializer, &aggBuffer);
1058 if (aggBuffer.numSymbols()) {
1059 const unsigned int ptrSize = MAI->getCodePointerSize();
1060 if (ElementSize % ptrSize ||
1061 !aggBuffer.allSymbolsAligned(ptrSize)) {
1062 // Print in bytes and use the mask() operator for pointers.
1063 if (!STI.hasMaskOperator())
1065 "initialized packed aggregate with pointers '" +
1066 GVar->getName() +
1067 "' requires at least PTX ISA version 7.1");
1068 O << " .u8 ";
1069 getSymbol(GVar)->print(O, MAI);
1070 O << "[" << ElementSize << "] = {";
1071 aggBuffer.printBytes(O);
1072 O << "}";
1073 } else {
1074 O << " .u" << ptrSize * 8 << " ";
1075 getSymbol(GVar)->print(O, MAI);
1076 O << "[" << ElementSize / ptrSize << "] = {";
1077 aggBuffer.printWords(O);
1078 O << "}";
1079 }
1080 } else {
1081 O << " .b8 ";
1082 getSymbol(GVar)->print(O, MAI);
1083 O << "[" << ElementSize << "] = {";
1084 aggBuffer.printBytes(O);
1085 O << "}";
1086 }
1087 } else {
1088 O << " .b8 ";
1089 getSymbol(GVar)->print(O, MAI);
1090 if (ElementSize)
1091 O << "[" << ElementSize << "]";
1092 }
1093 } else {
1094 O << " .b8 ";
1095 getSymbol(GVar)->print(O, MAI);
1096 if (ElementSize)
1097 O << "[" << ElementSize << "]";
1098 }
1099 break;
1100 }
1101 default:
1102 llvm_unreachable("type not supported yet");
1103 }
1104 }
1105 O << ";\n";
1106}
1107
1108void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1109 const Value *v = Symbols[nSym];
1110 const Value *v0 = SymbolsBeforeStripping[nSym];
1111 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1112 MCSymbol *Name = AP.getSymbol(GVar);
1114 // Is v0 a generic pointer?
1115 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1116 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1117 os << "generic(";
1118 Name->print(os, AP.MAI);
1119 os << ")";
1120 } else {
1121 Name->print(os, AP.MAI);
1122 }
1123 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1124 const MCExpr *Expr = AP.lowerConstantForGV(CExpr, false);
1125 AP.printMCExpr(*Expr, os);
1126 } else
1127 llvm_unreachable("symbol type unknown");
1128}
1129
1130void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1131 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1132 // Do not emit trailing zero initializers. They will be zero-initialized by
1133 // ptxas. This saves on both space requirements for the generated PTX and on
1134 // memory use by ptxas. (See:
1135 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1136 unsigned int InitializerCount = size;
1137 // TODO: symbols make this harder, but it would still be good to trim trailing
1138 // 0s for aggs with symbols as well.
1139 if (numSymbols() == 0)
1140 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1141 InitializerCount--;
1142
1143 symbolPosInBuffer.push_back(InitializerCount);
1144 unsigned int nSym = 0;
1145 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1146 for (unsigned int pos = 0; pos < InitializerCount;) {
1147 if (pos)
1148 os << ", ";
1149 if (pos != nextSymbolPos) {
1150 os << (unsigned int)buffer[pos];
1151 ++pos;
1152 continue;
1153 }
1154 // Generate a per-byte mask() operator for the symbol, which looks like:
1155 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1156 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1157 std::string symText;
1158 llvm::raw_string_ostream oss(symText);
1159 printSymbol(nSym, oss);
1160 for (unsigned i = 0; i < ptrSize; ++i) {
1161 if (i)
1162 os << ", ";
1163 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1164 os << "(" << symText << ")";
1165 }
1166 pos += ptrSize;
1167 nextSymbolPos = symbolPosInBuffer[++nSym];
1168 assert(nextSymbolPos >= pos);
1169 }
1170}
1171
1172void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1173 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1174 symbolPosInBuffer.push_back(size);
1175 unsigned int nSym = 0;
1176 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1177 assert(nextSymbolPos % ptrSize == 0);
1178 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1179 if (pos)
1180 os << ", ";
1181 if (pos == nextSymbolPos) {
1182 printSymbol(nSym, os);
1183 nextSymbolPos = symbolPosInBuffer[++nSym];
1184 assert(nextSymbolPos % ptrSize == 0);
1185 assert(nextSymbolPos >= pos + ptrSize);
1186 } else if (ptrSize == 4)
1187 os << support::endian::read32le(&buffer[pos]);
1188 else
1189 os << support::endian::read64le(&buffer[pos]);
1190 }
1191}
1192
1193void NVPTXAsmPrinter::emitDemotedVars(const Function *F, raw_ostream &O) {
1194 auto It = localDecls.find(F);
1195 if (It == localDecls.end())
1196 return;
1197
1198 ArrayRef<const GlobalVariable *> GVars = It->second;
1199
1200 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1201 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
1202
1203 for (const GlobalVariable *GV : GVars) {
1204 O << "\t// demoted variable\n\t";
1205 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1206 }
1207}
1208
1209void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1210 raw_ostream &O) const {
1211 switch (AddressSpace) {
1213 O << "local";
1214 break;
1216 O << "global";
1217 break;
1219 O << "const";
1220 break;
1222 O << "shared";
1223 break;
1224 default:
1225 report_fatal_error("Bad address space found while emitting PTX: " +
1226 llvm::Twine(AddressSpace));
1227 break;
1228 }
1229}
1230
1231std::string
1232NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1233 switch (Ty->getTypeID()) {
1234 case Type::IntegerTyID: {
1235 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1236 if (NumBits == 1)
1237 return "pred";
1238 if (NumBits <= 64) {
1239 std::string name = "u";
1240 return name + utostr(NumBits);
1241 }
1242 llvm_unreachable("Integer too large");
1243 break;
1244 }
1245 case Type::BFloatTyID:
1246 case Type::HalfTyID:
1247 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1248 // PTX assembly.
1249 return "b16";
1250 case Type::FloatTyID:
1251 return "f32";
1252 case Type::DoubleTyID:
1253 return "f64";
1254 case Type::PointerTyID: {
1255 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1256 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1257
1258 if (PtrSize == 64)
1259 if (useB4PTR)
1260 return "b64";
1261 else
1262 return "u64";
1263 else if (useB4PTR)
1264 return "b32";
1265 else
1266 return "u32";
1267 }
1268 default:
1269 break;
1270 }
1271 llvm_unreachable("unexpected type");
1272}
1273
1274void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1275 raw_ostream &O,
1276 const NVPTXSubtarget &STI) {
1277 const DataLayout &DL = getDataLayout();
1278
1279 // GlobalVariables are always constant pointers themselves.
1280 Type *ETy = GVar->getValueType();
1281
1282 O << ".";
1283 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1284 if (isManaged(*GVar)) {
1285 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
1287 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1288
1289 O << " .attribute(.managed)";
1290 }
1291 O << " .align "
1292 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1293
1294 // Special case for i128/fp128
1295 if (ETy->getScalarSizeInBits() == 128) {
1296 O << " .b8 ";
1297 getSymbol(GVar)->print(O, MAI);
1298 O << "[16]";
1299 return;
1300 }
1301
1302 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1303 O << " ." << getPTXFundamentalTypeStr(ETy) << " ";
1304 getSymbol(GVar)->print(O, MAI);
1305 return;
1306 }
1307
1308 int64_t ElementSize = 0;
1309
1310 // Although PTX has direct support for struct type and array type and LLVM IR
1311 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1312 // support these high level field accesses. Structs and arrays are lowered
1313 // into arrays of bytes.
1314 switch (ETy->getTypeID()) {
1315 case Type::StructTyID:
1316 case Type::ArrayTyID:
1318 ElementSize = DL.getTypeStoreSize(ETy);
1319 O << " .b8 ";
1320 getSymbol(GVar)->print(O, MAI);
1321 O << "[";
1322 if (ElementSize) {
1323 O << ElementSize;
1324 }
1325 O << "]";
1326 break;
1327 default:
1328 llvm_unreachable("type not supported yet");
1329 }
1330}
1331
1332void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1333 const DataLayout &DL = getDataLayout();
1334 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1335 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1336 const NVPTXMachineFunctionInfo *MFI =
1337 MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1338
1339 bool IsFirst = true;
1340 const bool IsKernelFunc = isKernelFunction(*F);
1341
1342 if (F->arg_empty() && !F->isVarArg()) {
1343 O << "()";
1344 return;
1345 }
1346
1347 O << "(\n";
1348
1349 for (const Argument &Arg : F->args()) {
1350 Type *Ty = Arg.getType();
1351 const std::string ParamSym = TLI->getParamName(F, Arg.getArgNo());
1352
1353 if (!IsFirst)
1354 O << ",\n";
1355
1356 IsFirst = false;
1357
1358 // Handle image/sampler parameters
1359 if (IsKernelFunc) {
1360 const PTXOpaqueType ArgOpaqueType = getPTXOpaqueType(Arg);
1361 if (ArgOpaqueType != PTXOpaqueType::None) {
1362 const bool EmitImgPtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
1363 O << "\t.param ";
1364 if (EmitImgPtr)
1365 O << ".u64 .ptr ";
1366
1367 switch (ArgOpaqueType) {
1369 O << ".samplerref ";
1370 break;
1372 O << ".texref ";
1373 break;
1375 O << ".surfref ";
1376 break;
1378 llvm_unreachable("handled above");
1379 }
1380 O << ParamSym;
1381 continue;
1382 }
1383 }
1384
1385 auto GetOptimalAlignForParam = [&DL, F, &Arg](Type *Ty) -> Align {
1386 if (MaybeAlign StackAlign =
1387 getAlign(*F, Arg.getArgNo() + AttributeList::FirstArgIndex))
1388 return StackAlign.value();
1389
1390 Align TypeAlign = getFunctionParamOptimizedAlign(F, Ty, DL);
1391 MaybeAlign ParamAlign =
1392 Arg.hasByValAttr() ? Arg.getParamAlign() : MaybeAlign();
1393 return std::max(TypeAlign, ParamAlign.valueOrOne());
1394 };
1395
1396 if (Arg.hasByValAttr()) {
1397 // param has byVal attribute.
1398 Type *ETy = Arg.getParamByValType();
1399 assert(ETy && "Param should have byval type");
1400
1401 // Print .param .align <a> .b8 .param[size];
1402 // <a> = optimal alignment for the element type; always multiple of
1403 // PAL.getParamAlignment
1404 // size = typeallocsize of element type
1405 const Align OptimalAlign =
1406 IsKernelFunc ? GetOptimalAlignForParam(ETy)
1408 F, ETy, Arg.getParamAlign().valueOrOne(), DL);
1409
1410 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1411 << "[" << DL.getTypeAllocSize(ETy) << "]";
1412 continue;
1413 }
1414
1415 if (shouldPassAsArray(Ty)) {
1416 // Just print .param .align <a> .b8 .param[size];
1417 // <a> = optimal alignment for the element type; always multiple of
1418 // PAL.getParamAlignment
1419 // size = typeallocsize of element type
1420 Align OptimalAlign = GetOptimalAlignForParam(Ty);
1421
1422 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1423 << "[" << DL.getTypeAllocSize(Ty) << "]";
1424
1425 continue;
1426 }
1427 // Just a scalar
1428 auto *PTy = dyn_cast<PointerType>(Ty);
1429 unsigned PTySizeInBits = 0;
1430 if (PTy) {
1431 PTySizeInBits =
1432 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1433 assert(PTySizeInBits && "Invalid pointer size");
1434 }
1435
1436 if (IsKernelFunc) {
1437 if (PTy) {
1438 O << "\t.param .u" << PTySizeInBits << " .ptr";
1439
1440 switch (PTy->getAddressSpace()) {
1441 default:
1442 break;
1444 O << " .global";
1445 break;
1447 O << " .shared";
1448 break;
1450 O << " .const";
1451 break;
1453 O << " .local";
1454 break;
1455 }
1456
1457 O << " .align " << Arg.getParamAlign().valueOrOne().value() << " "
1458 << ParamSym;
1459 continue;
1460 }
1461
1462 // non-pointer scalar to kernel func
1463 O << "\t.param .";
1464 // Special case: predicate operands become .u8 types
1465 if (Ty->isIntegerTy(1))
1466 O << "u8";
1467 else
1468 O << getPTXFundamentalTypeStr(Ty);
1469 O << " " << ParamSym;
1470 continue;
1471 }
1472 // Non-kernel function, just print .param .b<size> for ABI
1473 // and .reg .b<size> for non-ABI
1474 unsigned Size;
1475 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
1476 Size = promoteScalarArgumentSize(ITy->getBitWidth());
1477 } else if (PTy) {
1478 assert(PTySizeInBits && "Invalid pointer size");
1479 Size = PTySizeInBits;
1480 } else
1482 O << "\t.param .b" << Size << " " << ParamSym;
1483 }
1484
1485 if (F->isVarArg()) {
1486 if (!IsFirst)
1487 O << ",\n";
1488 O << "\t.param .align " << STI.getMaxRequiredAlignment() << " .b8 "
1489 << TLI->getParamName(F, /* vararg */ -1) << "[]";
1490 }
1491
1492 O << "\n)";
1493}
1494
1495void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1496 const MachineFunction &MF) {
1497 SmallString<128> Str;
1498 raw_svector_ostream O(Str);
1499
1500 // Map the global virtual register number to a register class specific
1501 // virtual register number starting from 1 with that class.
1502 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1503
1504 // Emit the Fake Stack Object
1505 const MachineFrameInfo &MFI = MF.getFrameInfo();
1506 int64_t NumBytes = MFI.getStackSize();
1507 if (NumBytes) {
1508 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1509 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1510 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1511 O << "\t.reg .b64 \t%SP;\n"
1512 << "\t.reg .b64 \t%SPL;\n";
1513 } else {
1514 O << "\t.reg .b32 \t%SP;\n"
1515 << "\t.reg .b32 \t%SPL;\n";
1516 }
1517 }
1518
1519 // Go through all virtual registers to establish the mapping between the
1520 // global virtual
1521 // register number and the per class virtual register number.
1522 // We use the per class virtual register number in the ptx output.
1523 for (unsigned I : llvm::seq(MRI->getNumVirtRegs())) {
1525 if (MRI->use_empty(VR) && MRI->def_empty(VR))
1526 continue;
1527 auto &RCRegMap = VRegMapping[MRI->getRegClass(VR)];
1528 RCRegMap[VR] = RCRegMap.size() + 1;
1529 }
1530
1531 // Emit declaration of the virtual registers or 'physical' registers for
1532 // each register class
1533 for (const TargetRegisterClass *RC : TRI->regclasses()) {
1534 const unsigned N = VRegMapping[RC].size();
1535
1536 // Only declare those registers that may be used.
1537 if (N) {
1538 const StringRef RCName = getNVPTXRegClassName(RC);
1539 const StringRef RCStr = getNVPTXRegClassStr(RC);
1540 O << "\t.reg " << RCName << " \t" << RCStr << "<" << (N + 1) << ">;\n";
1541 }
1542 }
1543
1544 OutStreamer->emitRawText(O.str());
1545}
1546
1547/// Translate virtual register numbers in DebugInfo locations to their printed
1548/// encodings, as used by CUDA-GDB.
1549void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1550 const MachineFunction &MF) {
1551 const NVPTXSubtarget &STI = MF.getSubtarget<NVPTXSubtarget>();
1552 const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo();
1553
1554 // Clear the old mapping, and add the new one. This mapping is used after the
1555 // printing of the current function is complete, but before the next function
1556 // is printed.
1557 registerInfo->clearDebugRegisterMap();
1558
1559 for (auto &classMap : VRegMapping) {
1560 for (auto &registerMapping : classMap.getSecond()) {
1561 auto reg = registerMapping.getFirst();
1562 registerInfo->addToDebugRegisterMap(reg, getVirtualRegisterName(reg));
1563 }
1564 }
1565}
1566
1567void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp,
1568 raw_ostream &O) const {
1569 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1570 bool ignored;
1571 unsigned int numHex;
1572 const char *lead;
1573
1574 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1575 numHex = 8;
1576 lead = "0f";
1578 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1579 numHex = 16;
1580 lead = "0d";
1582 } else
1583 llvm_unreachable("unsupported fp type");
1584
1585 APInt API = APF.bitcastToAPInt();
1586 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1587}
1588
1589void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1590 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1591 O << CI->getValue();
1592 return;
1593 }
1594 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1595 printFPConstant(CFP, O);
1596 return;
1597 }
1598 if (isa<ConstantPointerNull>(CPV)) {
1599 O << "0";
1600 return;
1601 }
1602 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1603 const bool IsNonGenericPointer = GVar->getAddressSpace() != 0;
1604 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1605 O << "generic(";
1606 getSymbol(GVar)->print(O, MAI);
1607 O << ")";
1608 } else {
1609 getSymbol(GVar)->print(O, MAI);
1610 }
1611 return;
1612 }
1613 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1614 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1615 printMCExpr(*E, O);
1616 return;
1617 }
1618 llvm_unreachable("Not scalar type found in printScalarConstant()");
1619}
1620
1621void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1622 AggBuffer *AggBuffer) {
1623 const DataLayout &DL = getDataLayout();
1624 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1625 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1626 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1627 // only the space allocated by CPV.
1628 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1629 return;
1630 }
1631
1632 // Helper for filling AggBuffer with APInts.
1633 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1634 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1635 SmallVector<unsigned char, 16> Buf(NumBytes);
1636 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1637 // input's bit width, and i1 arrays may not have a length that is a multuple
1638 // of 8. We handle the last byte separately, so we never request out of
1639 // bounds bits.
1640 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1641 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1642 }
1643 size_t LastBytePosition = (NumBytes - 1) * 8;
1644 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1645 Buf[NumBytes - 1] =
1646 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1647 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1648 };
1649
1650 switch (CPV->getType()->getTypeID()) {
1651 case Type::IntegerTyID:
1652 if (const auto *CI = dyn_cast<ConstantInt>(CPV)) {
1653 AddIntToBuffer(CI->getValue());
1654 break;
1655 }
1656 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1657 if (const auto *CI =
1659 AddIntToBuffer(CI->getValue());
1660 break;
1661 }
1662 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1663 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1664 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1665 AggBuffer->addZeros(AllocSize);
1666 break;
1667 }
1668 }
1669 llvm_unreachable("unsupported integer const type");
1670 break;
1671
1672 case Type::HalfTyID:
1673 case Type::BFloatTyID:
1674 case Type::FloatTyID:
1675 case Type::DoubleTyID:
1676 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1677 break;
1678
1679 case Type::PointerTyID: {
1680 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1681 AggBuffer->addSymbol(GVar, GVar);
1682 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1683 const Value *v = Cexpr->stripPointerCasts();
1684 AggBuffer->addSymbol(v, Cexpr);
1685 }
1686 AggBuffer->addZeros(AllocSize);
1687 break;
1688 }
1689
1690 case Type::ArrayTyID:
1692 case Type::StructTyID: {
1694 bufferAggregateConstant(CPV, AggBuffer);
1695 if (Bytes > AllocSize)
1696 AggBuffer->addZeros(Bytes - AllocSize);
1697 } else if (isa<ConstantAggregateZero>(CPV))
1698 AggBuffer->addZeros(Bytes);
1699 else
1700 llvm_unreachable("Unexpected Constant type");
1701 break;
1702 }
1703
1704 default:
1705 llvm_unreachable("unsupported type");
1706 }
1707}
1708
1709void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1710 AggBuffer *aggBuffer) {
1711 const DataLayout &DL = getDataLayout();
1712
1713 auto ExtendBuffer = [](APInt Val, AggBuffer *Buffer) {
1714 for (unsigned I : llvm::seq(Val.getBitWidth() / 8))
1715 Buffer->addByte(Val.extractBitsAsZExtValue(8, I * 8));
1716 };
1717
1718 // Integers of arbitrary width
1719 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1720 ExtendBuffer(CI->getValue(), aggBuffer);
1721 return;
1722 }
1723
1724 // f128
1725 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1726 if (CFP->getType()->isFP128Ty()) {
1727 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1728 return;
1729 }
1730 }
1731
1732 // Old constants
1733 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1734 for (const auto &Op : CPV->operands())
1735 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1736 return;
1737 }
1738
1739 if (const auto *CDS = dyn_cast<ConstantDataSequential>(CPV)) {
1740 for (unsigned I : llvm::seq(CDS->getNumElements()))
1741 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(I)), 0, aggBuffer);
1742 return;
1743 }
1744
1745 if (isa<ConstantStruct>(CPV)) {
1746 if (CPV->getNumOperands()) {
1747 StructType *ST = cast<StructType>(CPV->getType());
1748 for (unsigned I : llvm::seq(CPV->getNumOperands())) {
1749 int EndOffset = (I + 1 == CPV->getNumOperands())
1750 ? DL.getStructLayout(ST)->getElementOffset(0) +
1751 DL.getTypeAllocSize(ST)
1752 : DL.getStructLayout(ST)->getElementOffset(I + 1);
1753 int Bytes = EndOffset - DL.getStructLayout(ST)->getElementOffset(I);
1754 bufferLEByte(cast<Constant>(CPV->getOperand(I)), Bytes, aggBuffer);
1755 }
1756 }
1757 return;
1758 }
1759 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1760}
1761
1762/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1763/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1764/// expressions that are representable in PTX and create
1765/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1766const MCExpr *
1767NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV,
1768 bool ProcessingGeneric) const {
1769 MCContext &Ctx = OutContext;
1770
1771 if (CV->isNullValue() || isa<UndefValue>(CV))
1772 return MCConstantExpr::create(0, Ctx);
1773
1774 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1775 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1776
1777 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1778 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(getSymbol(GV), Ctx);
1779 if (ProcessingGeneric)
1780 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1781 return Expr;
1782 }
1783
1784 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1785 if (!CE) {
1786 llvm_unreachable("Unknown constant value to lower!");
1787 }
1788
1789 switch (CE->getOpcode()) {
1790 default:
1791 break; // Error
1792
1793 case Instruction::AddrSpaceCast: {
1794 // Strip the addrspacecast and pass along the operand
1795 PointerType *DstTy = cast<PointerType>(CE->getType());
1796 if (DstTy->getAddressSpace() == 0)
1797 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1798
1799 break; // Error
1800 }
1801
1802 case Instruction::GetElementPtr: {
1803 const DataLayout &DL = getDataLayout();
1804
1805 // Generate a symbolic expression for the byte address
1806 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1807 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1808
1809 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1810 ProcessingGeneric);
1811 if (!OffsetAI)
1812 return Base;
1813
1814 int64_t Offset = OffsetAI.getSExtValue();
1816 Ctx);
1817 }
1818
1819 case Instruction::Trunc:
1820 // We emit the value and depend on the assembler to truncate the generated
1821 // expression properly. This is important for differences between
1822 // blockaddress labels. Since the two labels are in the same function, it
1823 // is reasonable to treat their delta as a 32-bit value.
1824 [[fallthrough]];
1825 case Instruction::BitCast:
1826 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1827
1828 case Instruction::IntToPtr: {
1829 const DataLayout &DL = getDataLayout();
1830
1831 // Handle casts to pointers by changing them into casts to the appropriate
1832 // integer type. This promotes constant folding and simplifies this code.
1833 Constant *Op = CE->getOperand(0);
1834 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1835 /*IsSigned*/ false, DL);
1836 if (Op)
1837 return lowerConstantForGV(Op, ProcessingGeneric);
1838
1839 break; // Error
1840 }
1841
1842 case Instruction::PtrToInt: {
1843 const DataLayout &DL = getDataLayout();
1844
1845 // Support only foldable casts to/from pointers that can be eliminated by
1846 // changing the pointer to the appropriately sized integer type.
1847 Constant *Op = CE->getOperand(0);
1848 Type *Ty = CE->getType();
1849
1850 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
1851
1852 // We can emit the pointer value into this slot if the slot is an
1853 // integer slot equal to the size of the pointer.
1854 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
1855 return OpExpr;
1856
1857 // Otherwise the pointer is smaller than the resultant integer, mask off
1858 // the high bits so we are sure to get a proper truncation if the input is
1859 // a constant expr.
1860 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
1861 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
1862 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
1863 }
1864
1865 // The MC library also has a right-shift operator, but it isn't consistently
1866 // signed or unsigned between different targets.
1867 case Instruction::Add: {
1868 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1869 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
1870 switch (CE->getOpcode()) {
1871 default: llvm_unreachable("Unknown binary operator constant cast expr");
1872 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
1873 }
1874 }
1875 }
1876
1877 // If the code isn't optimized, there may be outstanding folding
1878 // opportunities. Attempt to fold the expression using DataLayout as a
1879 // last resort before giving up.
1881 if (C != CE)
1882 return lowerConstantForGV(C, ProcessingGeneric);
1883
1884 // Otherwise report the problem to the user.
1885 std::string S;
1886 raw_string_ostream OS(S);
1887 OS << "Unsupported expression in static initializer: ";
1888 CE->printAsOperand(OS, /*PrintType=*/false,
1889 !MF ? nullptr : MF->getFunction().getParent());
1890 report_fatal_error(Twine(OS.str()));
1891}
1892
1893void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) const {
1894 OutContext.getAsmInfo()->printExpr(OS, Expr);
1895}
1896
1897/// PrintAsmOperand - Print out an operand for an inline asm expression.
1898///
1899bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
1900 const char *ExtraCode, raw_ostream &O) {
1901 if (ExtraCode && ExtraCode[0]) {
1902 if (ExtraCode[1] != 0)
1903 return true; // Unknown modifier.
1904
1905 switch (ExtraCode[0]) {
1906 default:
1907 // See if this is a generic print operand
1908 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
1909 case 'r':
1910 break;
1911 }
1912 }
1913
1914 printOperand(MI, OpNo, O);
1915
1916 return false;
1917}
1918
1919bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
1920 unsigned OpNo,
1921 const char *ExtraCode,
1922 raw_ostream &O) {
1923 if (ExtraCode && ExtraCode[0])
1924 return true; // Unknown modifier
1925
1926 O << '[';
1927 printMemOperand(MI, OpNo, O);
1928 O << ']';
1929
1930 return false;
1931}
1932
1933void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
1934 raw_ostream &O) {
1935 const MachineOperand &MO = MI->getOperand(OpNum);
1936 switch (MO.getType()) {
1938 if (MO.getReg().isPhysical()) {
1939 if (MO.getReg() == NVPTX::VRDepot)
1941 else
1943 } else {
1944 emitVirtualRegister(MO.getReg(), O);
1945 }
1946 break;
1947
1949 O << MO.getImm();
1950 break;
1951
1953 printFPConstant(MO.getFPImm(), O);
1954 break;
1955
1957 PrintSymbolOperand(MO, O);
1958 break;
1959
1961 MO.getMBB()->getSymbol()->print(O, MAI);
1962 break;
1963
1964 default:
1965 llvm_unreachable("Operand type not supported.");
1966 }
1967}
1968
1969void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
1970 raw_ostream &O, const char *Modifier) {
1971 printOperand(MI, OpNum, O);
1972
1973 if (Modifier && strcmp(Modifier, "add") == 0) {
1974 O << ", ";
1975 printOperand(MI, OpNum + 1, O);
1976 } else {
1977 if (MI->getOperand(OpNum + 1).isImm() &&
1978 MI->getOperand(OpNum + 1).getImm() == 0)
1979 return; // don't print ',0' or '+0'
1980 O << "+";
1981 printOperand(MI, OpNum + 1, O);
1982 }
1983}
1984
1985char NVPTXAsmPrinter::ID = 0;
1986
1987INITIALIZE_PASS(NVPTXAsmPrinter, "nvptx-asm-printer", "NVPTX Assembly Printer",
1988 false, false)
1989
1990// Force static initialization.
1991extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
1992LLVMInitializeNVPTXAsmPrinter() {
1995}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static StringRef getTextureName(const Value &V)
#define DEPOTNAME
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static StringRef getSurfaceName(const Value &V)
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static StringRef getSamplerName(const Value &V)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static const char * name
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5890
APInt bitcastToAPInt() const
Definition APFloat.h:1408
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition APInt.cpp:520
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
MCSymbol * getSymbol(const GlobalValue *GV) const
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:94
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition AsmPrinter.h:97
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:109
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition AsmPrinter.h:520
virtual void emitFunctionBodyStart()
Targets can override this to emit stuff before the first basic block in the function.
Definition AsmPrinter.h:626
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition AsmPrinter.h:128
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:101
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition AsmPrinter.h:458
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:106
virtual void emitFunctionBodyEnd()
Targets can override this to emit stuff after the last basic block in the function.
Definition AsmPrinter.h:630
const DataLayout & getDataLayout() const
Return information about data layout.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
const APFloat & getValueAPF() const
Definition Constants.h:463
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Collects and handles dwarf debug information.
Definition DwarfDebug.h:352
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:651
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
bool hasLinkOnceLinkage() const
bool hasExternalLinkage() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:329
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
unsigned getAddressSpace() const
PointerType * getType() const
Global values are always pointers.
bool hasWeakLinkage() const
bool hasCommonLinkage() const
bool hasAvailableExternallyLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:348
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
void addOperand(const MCOperand Op)
Definition MCInst.h:215
void setOpcode(unsigned Op)
Definition MCInst.h:201
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition MCSymbol.cpp:59
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
DwarfDebug * createDwarfDebug() override
Create NVPTX-specific DwarfDebug handler.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
NVPTX-specific DwarfDebug implementation.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void outputDwarfFileDirectives()
Outputs the list of the DWARF '.file' directives to the streamer.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
iterator begin() const
Definition StringRef.h:113
iterator end() const
Definition StringRef.h:115
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition Type.h:76
@ HalfTyID
16-bit floating point type
Definition Type.h:57
@ VoidTyID
type with no size
Definition Type.h:64
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ StructTyID
Structures.
Definition Type.h:75
@ IntegerTyID
Arbitrary bit width integers.
Definition Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition Type.h:77
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ PointerTyID
Pointers.
Definition Type.h:74
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:272
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:138
op_range operands()
Definition User.h:267
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
iterator_range< user_iterator > users()
Definition Value.h:427
bool use_empty() const
Definition Value.h:347
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type size() const
Definition DenseSet.h:87
bool erase(const ValueT &V)
Definition DenseSet.h:100
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
constexpr StringLiteral BlocksAreClusters("nvvm.blocksareclusters")
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
uint64_t read64le(const void *P)
Definition Endian.h:435
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
Definition STLExtras.h:2180
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MaybeAlign getAlign(const CallInst &I, unsigned Index)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
std::optional< unsigned > getMaxNReg(const Function &F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::string utostr(uint64_t X, bool isNeg=false)
std::optional< unsigned > getMinCTASm(const Function &F)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition Format.h:204
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::optional< unsigned > getMaxClusterRank(const Function &F)
Align getFunctionByValParamAlign(const Function *F, Type *ArgTy, Align InitialAlign, const DataLayout &DL)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx, const DataLayout &DL)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
PTXOpaqueType getPTXOpaqueType(const GlobalVariable &GV)
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, const DataLayout &DL)
Since function arguments are passed via .param space, we may want to increase their alignment in a wa...
Target & getTheNVPTXTarget32()
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...