LLVM 22.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/DenseSet.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/ADT/Twine.h"
52#include "llvm/IR/Argument.h"
53#include "llvm/IR/Attributes.h"
54#include "llvm/IR/BasicBlock.h"
55#include "llvm/IR/Constant.h"
56#include "llvm/IR/Constants.h"
57#include "llvm/IR/DataLayout.h"
58#include "llvm/IR/DebugInfo.h"
60#include "llvm/IR/DebugLoc.h"
62#include "llvm/IR/Function.h"
63#include "llvm/IR/GlobalAlias.h"
64#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/Instruction.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/User.h"
72#include "llvm/MC/MCExpr.h"
73#include "llvm/MC/MCInst.h"
74#include "llvm/MC/MCInstrDesc.h"
75#include "llvm/MC/MCStreamer.h"
76#include "llvm/MC/MCSymbol.h"
81#include "llvm/Support/Endian.h"
88#include <cassert>
89#include <cstdint>
90#include <cstring>
91#include <string>
92
93using namespace llvm;
94
95#define DEPOTNAME "__local_depot"
96
97/// discoverDependentGlobals - Return a set of GlobalVariables on which \p V
98/// depends.
99static void
102 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
103 Globals.insert(GV);
104 return;
105 }
106
107 if (const User *U = dyn_cast<User>(V))
108 for (const auto &O : U->operands())
109 discoverDependentGlobals(O, Globals);
110}
111
112/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
113/// instances to be emitted, but only after any dependents have been added
114/// first.s
115static void
120 // Have we already visited this one?
121 if (Visited.count(GV))
122 return;
123
124 // Do we have a circular dependency?
125 if (!Visiting.insert(GV).second)
126 report_fatal_error("Circular dependency found in global variable set");
127
128 // Make sure we visit all dependents first
130 for (const auto &O : GV->operands())
131 discoverDependentGlobals(O, Others);
132
133 for (const GlobalVariable *GV : Others)
134 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
135
136 // Now we can visit ourself
137 Order.push_back(GV);
138 Visited.insert(GV);
139 Visiting.erase(GV);
140}
141
142void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
143 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
144 getSubtargetInfo().getFeatureBits());
145
146 MCInst Inst;
147 lowerToMCInst(MI, Inst);
149}
150
151void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
152 OutMI.setOpcode(MI->getOpcode());
153 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
154 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
155 const MachineOperand &MO = MI->getOperand(0);
156 OutMI.addOperand(GetSymbolRef(
157 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
158 return;
159 }
160
161 for (const auto MO : MI->operands())
162 OutMI.addOperand(lowerOperand(MO));
163}
164
165MCOperand NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
166 switch (MO.getType()) {
167 default:
168 llvm_unreachable("unknown operand type");
170 return MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
172 return MCOperand::createImm(MO.getImm());
177 return GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
179 return GetSymbolRef(getSymbol(MO.getGlobal()));
181 const ConstantFP *Cnt = MO.getFPImm();
182 const APFloat &Val = Cnt->getValueAPF();
183
184 switch (Cnt->getType()->getTypeID()) {
185 default:
186 report_fatal_error("Unsupported FP type");
187 break;
188 case Type::HalfTyID:
191 case Type::BFloatTyID:
194 case Type::FloatTyID:
197 case Type::DoubleTyID:
200 }
201 break;
202 }
203 }
204}
205
206unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
208 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
209
210 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
211 unsigned RegNum = RegMap[Reg];
212
213 // Encode the register class in the upper 4 bits
214 // Must be kept in sync with NVPTXInstPrinter::printRegName
215 unsigned Ret = 0;
216 if (RC == &NVPTX::B1RegClass) {
217 Ret = (1 << 28);
218 } else if (RC == &NVPTX::B16RegClass) {
219 Ret = (2 << 28);
220 } else if (RC == &NVPTX::B32RegClass) {
221 Ret = (3 << 28);
222 } else if (RC == &NVPTX::B64RegClass) {
223 Ret = (4 << 28);
224 } else if (RC == &NVPTX::B128RegClass) {
225 Ret = (7 << 28);
226 } else {
227 report_fatal_error("Bad register class");
228 }
229
230 // Insert the vreg number
231 Ret |= (RegNum & 0x0FFFFFFF);
232 return Ret;
233 } else {
234 // Some special-use registers are actually physical registers.
235 // Encode this as the register class ID of 0 and the real register ID.
236 return Reg & 0x0FFFFFFF;
237 }
238}
239
240MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
241 const MCExpr *Expr;
242 Expr = MCSymbolRefExpr::create(Symbol, OutContext);
243 return MCOperand::createExpr(Expr);
244}
245
246void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
247 const DataLayout &DL = getDataLayout();
248 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
249 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
250
251 Type *Ty = F->getReturnType();
252 if (Ty->getTypeID() == Type::VoidTyID)
253 return;
254 O << " (";
255
256 auto PrintScalarRetVal = [&](unsigned Size) {
257 O << ".param .b" << promoteScalarArgumentSize(Size) << " func_retval0";
258 };
259 if (shouldPassAsArray(Ty)) {
260 const unsigned TotalSize = DL.getTypeAllocSize(Ty);
261 const Align RetAlignment = TLI->getFunctionArgumentAlignment(
262 F, Ty, AttributeList::ReturnIndex, DL);
263 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
264 << TotalSize << "]";
265 } else if (Ty->isFloatingPointTy()) {
266 PrintScalarRetVal(Ty->getPrimitiveSizeInBits());
267 } else if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
268 PrintScalarRetVal(ITy->getBitWidth());
269 } else if (isa<PointerType>(Ty)) {
270 PrintScalarRetVal(TLI->getPointerTy(DL).getSizeInBits());
271 } else
272 llvm_unreachable("Unknown return type");
273 O << ") ";
274}
275
276void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
277 raw_ostream &O) {
278 const Function &F = MF.getFunction();
279 printReturnValStr(&F, O);
280}
281
282// Return true if MBB is the header of a loop marked with
283// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
284bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
285 const MachineBasicBlock &MBB) const {
286 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
287 // We insert .pragma "nounroll" only to the loop header.
288 if (!LI.isLoopHeader(&MBB))
289 return false;
290
291 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
292 // we iterate through each back edge of the loop with header MBB, and check
293 // whether its metadata contains llvm.loop.unroll.disable.
294 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
295 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
296 // Edges from other loops to MBB are not back edges.
297 continue;
298 }
299 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
300 if (MDNode *LoopID =
301 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
302 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
303 return true;
304 if (MDNode *UnrollCountMD =
305 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
306 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
307 ->isOne())
308 return true;
309 }
310 }
311 }
312 }
313 return false;
314}
315
316void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
318 if (isLoopHeaderOfNoUnroll(MBB))
319 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
320}
321
323 SmallString<128> Str;
324 raw_svector_ostream O(Str);
325
326 if (!GlobalsEmitted) {
327 emitGlobals(*MF->getFunction().getParent());
328 GlobalsEmitted = true;
329 }
330
331 // Set up
332 MRI = &MF->getRegInfo();
333 F = &MF->getFunction();
334 emitLinkageDirective(F, O);
335 if (isKernelFunction(*F))
336 O << ".entry ";
337 else {
338 O << ".func ";
339 printReturnValStr(*MF, O);
340 }
341
342 CurrentFnSym->print(O, MAI);
343
344 emitFunctionParamList(F, O);
345 O << "\n";
346
347 if (isKernelFunction(*F))
348 emitKernelFunctionDirectives(*F, O);
349
351 O << ".noreturn";
352
353 OutStreamer->emitRawText(O.str());
354
355 VRegMapping.clear();
356 // Emit open brace for function body.
357 OutStreamer->emitRawText(StringRef("{\n"));
358 setAndEmitFunctionVirtualRegisters(*MF);
359 encodeDebugInfoRegisterNumbers(*MF);
360 // Emit initial .loc debug directive for correct relocation symbol data.
361 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
362 assert(SP->getUnit());
363 if (!SP->getUnit()->isDebugDirectivesOnly())
365 }
366}
367
369 bool Result = AsmPrinter::runOnMachineFunction(F);
370 // Emit closing brace for the body of function F.
371 // The closing brace must be emitted here because we need to emit additional
372 // debug labels/data after the last basic block.
373 // We need to emit the closing brace here because we don't have function that
374 // finished emission of the function body.
375 OutStreamer->emitRawText(StringRef("}\n"));
376 return Result;
377}
378
381 raw_svector_ostream O(Str);
382 emitDemotedVars(&MF->getFunction(), O);
383 OutStreamer->emitRawText(O.str());
384}
385
387 VRegMapping.clear();
388}
389
393 return OutContext.getOrCreateSymbol(Str);
394}
395
396void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
397 Register RegNo = MI->getOperand(0).getReg();
398 if (RegNo.isVirtual()) {
399 OutStreamer->AddComment(Twine("implicit-def: ") +
401 } else {
402 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
403 OutStreamer->AddComment(Twine("implicit-def: ") +
404 STI.getRegisterInfo()->getName(RegNo));
405 }
406 OutStreamer->addBlankLine();
407}
408
409void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
410 raw_ostream &O) const {
411 // If the NVVM IR has some of reqntid* specified, then output
412 // the reqntid directive, and set the unspecified ones to 1.
413 // If none of Reqntid* is specified, don't output reqntid directive.
414 const auto ReqNTID = getReqNTID(F);
415 if (!ReqNTID.empty())
416 O << formatv(".reqntid {0:$[, ]}\n",
417 make_range(ReqNTID.begin(), ReqNTID.end()));
418
419 const auto MaxNTID = getMaxNTID(F);
420 if (!MaxNTID.empty())
421 O << formatv(".maxntid {0:$[, ]}\n",
422 make_range(MaxNTID.begin(), MaxNTID.end()));
423
424 if (const auto Mincta = getMinCTASm(F))
425 O << ".minnctapersm " << *Mincta << "\n";
426
427 if (const auto Maxnreg = getMaxNReg(F))
428 O << ".maxnreg " << *Maxnreg << "\n";
429
430 // .maxclusterrank directive requires SM_90 or higher, make sure that we
431 // filter it out for lower SM versions, as it causes a hard ptxas crash.
432 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
433 const NVPTXSubtarget *STI = &NTM.getSubtarget<NVPTXSubtarget>(F);
434
435 if (STI->getSmVersion() >= 90) {
436 const auto ClusterDim = getClusterDim(F);
437 const bool BlocksAreClusters = hasBlocksAreClusters(F);
438
439 if (!ClusterDim.empty()) {
440
441 if (!BlocksAreClusters)
442 O << ".explicitcluster\n";
443
444 if (ClusterDim[0] != 0) {
445 assert(llvm::all_of(ClusterDim, [](unsigned D) { return D != 0; }) &&
446 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
447 "should be non-zero as well");
448
449 O << formatv(".reqnctapercluster {0:$[, ]}\n",
450 make_range(ClusterDim.begin(), ClusterDim.end()));
451 } else {
452 assert(llvm::all_of(ClusterDim, [](unsigned D) { return D == 0; }) &&
453 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
454 "should be 0 as well");
455 }
456 }
457
458 if (BlocksAreClusters) {
459 LLVMContext &Ctx = F.getContext();
460 if (ReqNTID.empty() || ClusterDim.empty())
461 Ctx.diagnose(DiagnosticInfoUnsupported(
462 F, "blocksareclusters requires reqntid and cluster_dim attributes",
463 F.getSubprogram()));
464 else if (STI->getPTXVersion() < 90)
465 Ctx.diagnose(DiagnosticInfoUnsupported(
466 F, "blocksareclusters requires PTX version >= 9.0",
467 F.getSubprogram()));
468 else
469 O << ".blocksareclusters\n";
470 }
471
472 if (const auto Maxclusterrank = getMaxClusterRank(F))
473 O << ".maxclusterrank " << *Maxclusterrank << "\n";
474 }
475}
476
477std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
478 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
479
480 std::string Name;
481 raw_string_ostream NameStr(Name);
482
483 VRegRCMap::const_iterator I = VRegMapping.find(RC);
484 assert(I != VRegMapping.end() && "Bad register class");
485 const DenseMap<unsigned, unsigned> &RegMap = I->second;
486
487 VRegMap::const_iterator VI = RegMap.find(Reg);
488 assert(VI != RegMap.end() && "Bad virtual register");
489 unsigned MappedVR = VI->second;
490
491 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
492
493 return Name;
494}
495
496void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
497 raw_ostream &O) {
498 O << getVirtualRegisterName(vr);
499}
500
501void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
502 raw_ostream &O) {
504 if (!F || isKernelFunction(*F) || F->isDeclaration())
506 "NVPTX aliasee must be a non-kernel function definition");
507
508 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
510 report_fatal_error("NVPTX aliasee must not be '.weak'");
511
512 emitDeclarationWithName(F, getSymbol(GA), O);
513}
514
515void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
516 emitDeclarationWithName(F, getSymbol(F), O);
517}
518
519void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
520 raw_ostream &O) {
521 emitLinkageDirective(F, O);
522 if (isKernelFunction(*F))
523 O << ".entry ";
524 else
525 O << ".func ";
526 printReturnValStr(F, O);
527 S->print(O, MAI);
528 O << "\n";
529 emitFunctionParamList(F, O);
530 O << "\n";
532 O << ".noreturn";
533 O << ";\n";
534}
535
536static bool usedInGlobalVarDef(const Constant *C) {
537 if (!C)
538 return false;
539
541 return GV->getName() != "llvm.used";
542
543 for (const User *U : C->users())
544 if (const Constant *C = dyn_cast<Constant>(U))
546 return true;
547
548 return false;
549}
550
551static bool usedInOneFunc(const User *U, Function const *&OneFunc) {
552 if (const GlobalVariable *OtherGV = dyn_cast<GlobalVariable>(U))
553 if (OtherGV->getName() == "llvm.used")
554 return true;
555
556 if (const Instruction *I = dyn_cast<Instruction>(U)) {
557 if (const Function *CurFunc = I->getFunction()) {
558 if (OneFunc && (CurFunc != OneFunc))
559 return false;
560 OneFunc = CurFunc;
561 return true;
562 }
563 return false;
564 }
565
566 for (const User *UU : U->users())
567 if (!usedInOneFunc(UU, OneFunc))
568 return false;
569
570 return true;
571}
572
573/* Find out if a global variable can be demoted to local scope.
574 * Currently, this is valid for CUDA shared variables, which have local
575 * scope and global lifetime. So the conditions to check are :
576 * 1. Is the global variable in shared address space?
577 * 2. Does it have local linkage?
578 * 3. Is the global variable referenced only in one function?
579 */
580static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f) {
581 if (!GV->hasLocalLinkage())
582 return false;
584 return false;
585
586 const Function *oneFunc = nullptr;
587
588 bool flag = usedInOneFunc(GV, oneFunc);
589 if (!flag)
590 return false;
591 if (!oneFunc)
592 return false;
593 f = oneFunc;
594 return true;
595}
596
597static bool useFuncSeen(const Constant *C,
598 const SmallPtrSetImpl<const Function *> &SeenSet) {
599 for (const User *U : C->users()) {
600 if (const Constant *cu = dyn_cast<Constant>(U)) {
601 if (useFuncSeen(cu, SeenSet))
602 return true;
603 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
604 if (const Function *Caller = I->getFunction())
605 if (SeenSet.contains(Caller))
606 return true;
607 }
608 }
609 return false;
610}
611
612void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
613 SmallPtrSet<const Function *, 32> SeenSet;
614 for (const Function &F : M) {
615 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
616 emitDeclaration(&F, O);
617 continue;
618 }
619
620 if (F.isDeclaration()) {
621 if (F.use_empty())
622 continue;
623 if (F.getIntrinsicID())
624 continue;
625 emitDeclaration(&F, O);
626 continue;
627 }
628 for (const User *U : F.users()) {
629 if (const Constant *C = dyn_cast<Constant>(U)) {
630 if (usedInGlobalVarDef(C)) {
631 // The use is in the initialization of a global variable
632 // that is a function pointer, so print a declaration
633 // for the original function
634 emitDeclaration(&F, O);
635 break;
636 }
637 // Emit a declaration of this function if the function that
638 // uses this constant expr has already been seen.
639 if (useFuncSeen(C, SeenSet)) {
640 emitDeclaration(&F, O);
641 break;
642 }
643 }
644
645 if (!isa<Instruction>(U))
646 continue;
647 const Function *Caller = cast<Instruction>(U)->getFunction();
648 if (!Caller)
649 continue;
650
651 // If a caller has already been seen, then the caller is
652 // appearing in the module before the callee. so print out
653 // a declaration for the callee.
654 if (SeenSet.contains(Caller)) {
655 emitDeclaration(&F, O);
656 break;
657 }
658 }
659 SeenSet.insert(&F);
660 }
661 for (const GlobalAlias &GA : M.aliases())
662 emitAliasDeclaration(&GA, O);
663}
664
665void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
666 // Construct a default subtarget off of the TargetMachine defaults. The
667 // rest of NVPTX isn't friendly to change subtargets per function and
668 // so the default TargetMachine will have all of the options.
669 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
670 const NVPTXSubtarget *STI = NTM.getSubtargetImpl();
671 SmallString<128> Str1;
672 raw_svector_ostream OS1(Str1);
673
674 // Emit header before any dwarf directives are emitted below.
675 emitHeader(M, OS1, *STI);
676 OutStreamer->emitRawText(OS1.str());
677}
678
680 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
681 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
682 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
683 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
684
685 // We need to call the parent's one explicitly.
686 bool Result = AsmPrinter::doInitialization(M);
687
688 GlobalsEmitted = false;
689
690 return Result;
691}
692
693void NVPTXAsmPrinter::emitGlobals(const Module &M) {
694 SmallString<128> Str2;
695 raw_svector_ostream OS2(Str2);
696
697 emitDeclarations(M, OS2);
698
699 // As ptxas does not support forward references of globals, we need to first
700 // sort the list of module-level globals in def-use order. We visit each
701 // global variable in order, and ensure that we emit it *after* its dependent
702 // globals. We use a little extra memory maintaining both a set and a list to
703 // have fast searches while maintaining a strict ordering.
707
708 // Visit each global variable, in order
709 for (const GlobalVariable &I : M.globals())
710 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
711
712 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
713 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
714
715 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
716 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
717
718 // Print out module-level global variables in proper order
719 for (const GlobalVariable *GV : Globals)
720 printModuleLevelGV(GV, OS2, /*ProcessDemoted=*/false, STI);
721
722 OS2 << '\n';
723
724 OutStreamer->emitRawText(OS2.str());
725}
726
727void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
729 raw_svector_ostream OS(Str);
730
731 MCSymbol *Name = getSymbol(&GA);
732
733 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
734 << ";\n";
735
736 OutStreamer->emitRawText(OS.str());
737}
738
739void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
740 const NVPTXSubtarget &STI) {
741 const unsigned PTXVersion = STI.getPTXVersion();
742
743 O << "//\n"
744 "// Generated by LLVM NVPTX Back-End\n"
745 "//\n"
746 "\n"
747 << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"
748 << ".target " << STI.getTargetName();
749
750 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
751 if (NTM.getDrvInterface() == NVPTX::NVCL)
752 O << ", texmode_independent";
753
754 bool HasFullDebugInfo = false;
755 for (DICompileUnit *CU : M.debug_compile_units()) {
756 switch(CU->getEmissionKind()) {
759 break;
762 HasFullDebugInfo = true;
763 break;
764 }
765 if (HasFullDebugInfo)
766 break;
767 }
768 if (HasFullDebugInfo)
769 O << ", debug";
770
771 O << "\n"
772 << ".address_size " << (NTM.is64Bit() ? "64" : "32") << "\n"
773 << "\n";
774}
775
777 // If we did not emit any functions, then the global declarations have not
778 // yet been emitted.
779 if (!GlobalsEmitted) {
780 emitGlobals(M);
781 GlobalsEmitted = true;
782 }
783
784 // call doFinalization
785 bool ret = AsmPrinter::doFinalization(M);
786
788
789 auto *TS =
790 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
791 // Close the last emitted section
792 if (hasDebugInfo()) {
793 TS->closeLastSection();
794 // Emit empty .debug_macinfo section for better support of the empty files.
795 OutStreamer->emitRawText("\t.section\t.debug_macinfo\t{\t}");
796 }
797
798 // Output last DWARF .file directives, if any.
800
801 return ret;
802}
803
804// This function emits appropriate linkage directives for
805// functions and global variables.
806//
807// extern function declaration -> .extern
808// extern function definition -> .visible
809// external global variable with init -> .visible
810// external without init -> .extern
811// appending -> not allowed, assert.
812// for any linkage other than
813// internal, private, linker_private,
814// linker_private_weak, linker_private_weak_def_auto,
815// we emit -> .weak.
816
817void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
818 raw_ostream &O) {
819 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
820 if (V->hasExternalLinkage()) {
821 if (const auto *GVar = dyn_cast<GlobalVariable>(V))
822 O << (GVar->hasInitializer() ? ".visible " : ".extern ");
823 else if (V->isDeclaration())
824 O << ".extern ";
825 else
826 O << ".visible ";
827 } else if (V->hasAppendingLinkage()) {
828 report_fatal_error("Symbol '" + (V->hasName() ? V->getName() : "") +
829 "' has unsupported appending linkage type");
830 } else if (!V->hasInternalLinkage() && !V->hasPrivateLinkage()) {
831 O << ".weak ";
832 }
833 }
834}
835
836void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
837 raw_ostream &O, bool ProcessDemoted,
838 const NVPTXSubtarget &STI) {
839 // Skip meta data
840 if (GVar->hasSection())
841 if (GVar->getSection() == "llvm.metadata")
842 return;
843
844 // Skip LLVM intrinsic global variables
845 if (GVar->getName().starts_with("llvm.") ||
846 GVar->getName().starts_with("nvvm."))
847 return;
848
849 const DataLayout &DL = getDataLayout();
850
851 // GlobalVariables are always constant pointers themselves.
852 Type *ETy = GVar->getValueType();
853
854 if (GVar->hasExternalLinkage()) {
855 if (GVar->hasInitializer())
856 O << ".visible ";
857 else
858 O << ".extern ";
859 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
861 O << ".common ";
862 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
864 GVar->hasCommonLinkage()) {
865 O << ".weak ";
866 }
867
868 if (isTexture(*GVar)) {
869 O << ".global .texref " << getTextureName(*GVar) << ";\n";
870 return;
871 }
872
873 if (isSurface(*GVar)) {
874 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
875 return;
876 }
877
878 if (GVar->isDeclaration()) {
879 // (extern) declarations, no definition or initializer
880 // Currently the only known declaration is for an automatic __local
881 // (.shared) promoted to global.
882 emitPTXGlobalVariable(GVar, O, STI);
883 O << ";\n";
884 return;
885 }
886
887 if (isSampler(*GVar)) {
888 O << ".global .samplerref " << getSamplerName(*GVar);
889
890 const Constant *Initializer = nullptr;
891 if (GVar->hasInitializer())
892 Initializer = GVar->getInitializer();
893 const ConstantInt *CI = nullptr;
894 if (Initializer)
895 CI = dyn_cast<ConstantInt>(Initializer);
896 if (CI) {
897 unsigned sample = CI->getZExtValue();
898
899 O << " = { ";
900
901 for (int i = 0,
902 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
903 i < 3; i++) {
904 O << "addr_mode_" << i << " = ";
905 switch (addr) {
906 case 0:
907 O << "wrap";
908 break;
909 case 1:
910 O << "clamp_to_border";
911 break;
912 case 2:
913 O << "clamp_to_edge";
914 break;
915 case 3:
916 O << "wrap";
917 break;
918 case 4:
919 O << "mirror";
920 break;
921 }
922 O << ", ";
923 }
924 O << "filter_mode = ";
925 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
926 case 0:
927 O << "nearest";
928 break;
929 case 1:
930 O << "linear";
931 break;
932 case 2:
933 llvm_unreachable("Anisotropic filtering is not supported");
934 default:
935 O << "nearest";
936 break;
937 }
938 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
939 O << ", force_unnormalized_coords = 1";
940 }
941 O << " }";
942 }
943
944 O << ";\n";
945 return;
946 }
947
948 if (GVar->hasPrivateLinkage()) {
949 if (GVar->getName().starts_with("unrollpragma"))
950 return;
951
952 // FIXME - need better way (e.g. Metadata) to avoid generating this global
953 if (GVar->getName().starts_with("filename"))
954 return;
955 if (GVar->use_empty())
956 return;
957 }
958
959 const Function *DemotedFunc = nullptr;
960 if (!ProcessDemoted && canDemoteGlobalVar(GVar, DemotedFunc)) {
961 O << "// " << GVar->getName() << " has been demoted\n";
962 localDecls[DemotedFunc].push_back(GVar);
963 return;
964 }
965
966 O << ".";
967 emitPTXAddressSpace(GVar->getAddressSpace(), O);
968
969 if (isManaged(*GVar)) {
970 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
972 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
973 O << " .attribute(.managed)";
974 }
975
976 O << " .align "
977 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
978
979 if (ETy->isPointerTy() || ((ETy->isIntegerTy() || ETy->isFloatingPointTy()) &&
980 ETy->getScalarSizeInBits() <= 64)) {
981 O << " .";
982 // Special case: ABI requires that we use .u8 for predicates
983 if (ETy->isIntegerTy(1))
984 O << "u8";
985 else
986 O << getPTXFundamentalTypeStr(ETy, false);
987 O << " ";
988 getSymbol(GVar)->print(O, MAI);
989
990 // Ptx allows variable initilization only for constant and global state
991 // spaces.
992 if (GVar->hasInitializer()) {
993 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
995 const Constant *Initializer = GVar->getInitializer();
996 // 'undef' is treated as there is no value specified.
997 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
998 O << " = ";
999 printScalarConstant(Initializer, O);
1000 }
1001 } else {
1002 // The frontend adds zero-initializer to device and constant variables
1003 // that don't have an initial value, and UndefValue to shared
1004 // variables, so skip warning for this case.
1005 if (!GVar->getInitializer()->isNullValue() &&
1006 !isa<UndefValue>(GVar->getInitializer())) {
1007 report_fatal_error("initial value of '" + GVar->getName() +
1008 "' is not allowed in addrspace(" +
1009 Twine(GVar->getAddressSpace()) + ")");
1010 }
1011 }
1012 }
1013 } else {
1014 // Although PTX has direct support for struct type and array type and
1015 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1016 // targets that support these high level field accesses. Structs, arrays
1017 // and vectors are lowered into arrays of bytes.
1018 switch (ETy->getTypeID()) {
1019 case Type::IntegerTyID: // Integers larger than 64 bits
1020 case Type::FP128TyID:
1021 case Type::StructTyID:
1022 case Type::ArrayTyID:
1023 case Type::FixedVectorTyID: {
1024 const uint64_t ElementSize = DL.getTypeStoreSize(ETy);
1025 // Ptx allows variable initilization only for constant and
1026 // global state spaces.
1027 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1028 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1029 GVar->hasInitializer()) {
1030 const Constant *Initializer = GVar->getInitializer();
1031 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1032 AggBuffer aggBuffer(ElementSize, *this);
1033 bufferAggregateConstant(Initializer, &aggBuffer);
1034 if (aggBuffer.numSymbols()) {
1035 const unsigned int ptrSize = MAI->getCodePointerSize();
1036 if (ElementSize % ptrSize ||
1037 !aggBuffer.allSymbolsAligned(ptrSize)) {
1038 // Print in bytes and use the mask() operator for pointers.
1039 if (!STI.hasMaskOperator())
1041 "initialized packed aggregate with pointers '" +
1042 GVar->getName() +
1043 "' requires at least PTX ISA version 7.1");
1044 O << " .u8 ";
1045 getSymbol(GVar)->print(O, MAI);
1046 O << "[" << ElementSize << "] = {";
1047 aggBuffer.printBytes(O);
1048 O << "}";
1049 } else {
1050 O << " .u" << ptrSize * 8 << " ";
1051 getSymbol(GVar)->print(O, MAI);
1052 O << "[" << ElementSize / ptrSize << "] = {";
1053 aggBuffer.printWords(O);
1054 O << "}";
1055 }
1056 } else {
1057 O << " .b8 ";
1058 getSymbol(GVar)->print(O, MAI);
1059 O << "[" << ElementSize << "] = {";
1060 aggBuffer.printBytes(O);
1061 O << "}";
1062 }
1063 } else {
1064 O << " .b8 ";
1065 getSymbol(GVar)->print(O, MAI);
1066 if (ElementSize)
1067 O << "[" << ElementSize << "]";
1068 }
1069 } else {
1070 O << " .b8 ";
1071 getSymbol(GVar)->print(O, MAI);
1072 if (ElementSize)
1073 O << "[" << ElementSize << "]";
1074 }
1075 break;
1076 }
1077 default:
1078 llvm_unreachable("type not supported yet");
1079 }
1080 }
1081 O << ";\n";
1082}
1083
1084void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1085 const Value *v = Symbols[nSym];
1086 const Value *v0 = SymbolsBeforeStripping[nSym];
1087 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1088 MCSymbol *Name = AP.getSymbol(GVar);
1090 // Is v0 a generic pointer?
1091 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1092 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1093 os << "generic(";
1094 Name->print(os, AP.MAI);
1095 os << ")";
1096 } else {
1097 Name->print(os, AP.MAI);
1098 }
1099 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1100 const MCExpr *Expr = AP.lowerConstantForGV(CExpr, false);
1101 AP.printMCExpr(*Expr, os);
1102 } else
1103 llvm_unreachable("symbol type unknown");
1104}
1105
1106void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1107 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1108 // Do not emit trailing zero initializers. They will be zero-initialized by
1109 // ptxas. This saves on both space requirements for the generated PTX and on
1110 // memory use by ptxas. (See:
1111 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1112 unsigned int InitializerCount = size;
1113 // TODO: symbols make this harder, but it would still be good to trim trailing
1114 // 0s for aggs with symbols as well.
1115 if (numSymbols() == 0)
1116 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1117 InitializerCount--;
1118
1119 symbolPosInBuffer.push_back(InitializerCount);
1120 unsigned int nSym = 0;
1121 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1122 for (unsigned int pos = 0; pos < InitializerCount;) {
1123 if (pos)
1124 os << ", ";
1125 if (pos != nextSymbolPos) {
1126 os << (unsigned int)buffer[pos];
1127 ++pos;
1128 continue;
1129 }
1130 // Generate a per-byte mask() operator for the symbol, which looks like:
1131 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1132 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1133 std::string symText;
1134 llvm::raw_string_ostream oss(symText);
1135 printSymbol(nSym, oss);
1136 for (unsigned i = 0; i < ptrSize; ++i) {
1137 if (i)
1138 os << ", ";
1139 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1140 os << "(" << symText << ")";
1141 }
1142 pos += ptrSize;
1143 nextSymbolPos = symbolPosInBuffer[++nSym];
1144 assert(nextSymbolPos >= pos);
1145 }
1146}
1147
1148void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1149 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1150 symbolPosInBuffer.push_back(size);
1151 unsigned int nSym = 0;
1152 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1153 assert(nextSymbolPos % ptrSize == 0);
1154 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1155 if (pos)
1156 os << ", ";
1157 if (pos == nextSymbolPos) {
1158 printSymbol(nSym, os);
1159 nextSymbolPos = symbolPosInBuffer[++nSym];
1160 assert(nextSymbolPos % ptrSize == 0);
1161 assert(nextSymbolPos >= pos + ptrSize);
1162 } else if (ptrSize == 4)
1163 os << support::endian::read32le(&buffer[pos]);
1164 else
1165 os << support::endian::read64le(&buffer[pos]);
1166 }
1167}
1168
1169void NVPTXAsmPrinter::emitDemotedVars(const Function *F, raw_ostream &O) {
1170 auto It = localDecls.find(F);
1171 if (It == localDecls.end())
1172 return;
1173
1174 ArrayRef<const GlobalVariable *> GVars = It->second;
1175
1176 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1177 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
1178
1179 for (const GlobalVariable *GV : GVars) {
1180 O << "\t// demoted variable\n\t";
1181 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1182 }
1183}
1184
1185void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1186 raw_ostream &O) const {
1187 switch (AddressSpace) {
1189 O << "local";
1190 break;
1192 O << "global";
1193 break;
1195 O << "const";
1196 break;
1198 O << "shared";
1199 break;
1200 default:
1201 report_fatal_error("Bad address space found while emitting PTX: " +
1202 llvm::Twine(AddressSpace));
1203 break;
1204 }
1205}
1206
1207std::string
1208NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1209 switch (Ty->getTypeID()) {
1210 case Type::IntegerTyID: {
1211 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1212 if (NumBits == 1)
1213 return "pred";
1214 if (NumBits <= 64) {
1215 std::string name = "u";
1216 return name + utostr(NumBits);
1217 }
1218 llvm_unreachable("Integer too large");
1219 break;
1220 }
1221 case Type::BFloatTyID:
1222 case Type::HalfTyID:
1223 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1224 // PTX assembly.
1225 return "b16";
1226 case Type::FloatTyID:
1227 return "f32";
1228 case Type::DoubleTyID:
1229 return "f64";
1230 case Type::PointerTyID: {
1231 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1232 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1233
1234 if (PtrSize == 64)
1235 if (useB4PTR)
1236 return "b64";
1237 else
1238 return "u64";
1239 else if (useB4PTR)
1240 return "b32";
1241 else
1242 return "u32";
1243 }
1244 default:
1245 break;
1246 }
1247 llvm_unreachable("unexpected type");
1248}
1249
1250void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1251 raw_ostream &O,
1252 const NVPTXSubtarget &STI) {
1253 const DataLayout &DL = getDataLayout();
1254
1255 // GlobalVariables are always constant pointers themselves.
1256 Type *ETy = GVar->getValueType();
1257
1258 O << ".";
1259 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1260 if (isManaged(*GVar)) {
1261 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
1263 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1264
1265 O << " .attribute(.managed)";
1266 }
1267 O << " .align "
1268 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1269
1270 // Special case for i128/fp128
1271 if (ETy->getScalarSizeInBits() == 128) {
1272 O << " .b8 ";
1273 getSymbol(GVar)->print(O, MAI);
1274 O << "[16]";
1275 return;
1276 }
1277
1278 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1279 O << " ." << getPTXFundamentalTypeStr(ETy) << " ";
1280 getSymbol(GVar)->print(O, MAI);
1281 return;
1282 }
1283
1284 int64_t ElementSize = 0;
1285
1286 // Although PTX has direct support for struct type and array type and LLVM IR
1287 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1288 // support these high level field accesses. Structs and arrays are lowered
1289 // into arrays of bytes.
1290 switch (ETy->getTypeID()) {
1291 case Type::StructTyID:
1292 case Type::ArrayTyID:
1294 ElementSize = DL.getTypeStoreSize(ETy);
1295 O << " .b8 ";
1296 getSymbol(GVar)->print(O, MAI);
1297 O << "[";
1298 if (ElementSize) {
1299 O << ElementSize;
1300 }
1301 O << "]";
1302 break;
1303 default:
1304 llvm_unreachable("type not supported yet");
1305 }
1306}
1307
1308void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1309 const DataLayout &DL = getDataLayout();
1310 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1311 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1312 const NVPTXMachineFunctionInfo *MFI =
1313 MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1314
1315 bool IsFirst = true;
1316 const bool IsKernelFunc = isKernelFunction(*F);
1317
1318 if (F->arg_empty() && !F->isVarArg()) {
1319 O << "()";
1320 return;
1321 }
1322
1323 O << "(\n";
1324
1325 for (const Argument &Arg : F->args()) {
1326 Type *Ty = Arg.getType();
1327 const std::string ParamSym = TLI->getParamName(F, Arg.getArgNo());
1328
1329 if (!IsFirst)
1330 O << ",\n";
1331
1332 IsFirst = false;
1333
1334 // Handle image/sampler parameters
1335 if (IsKernelFunc) {
1336 const bool IsSampler = isSampler(Arg);
1337 const bool IsTexture = !IsSampler && isImageReadOnly(Arg);
1338 const bool IsSurface = !IsSampler && !IsTexture &&
1339 (isImageReadWrite(Arg) || isImageWriteOnly(Arg));
1340 if (IsSampler || IsTexture || IsSurface) {
1341 const bool EmitImgPtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
1342 O << "\t.param ";
1343 if (EmitImgPtr)
1344 O << ".u64 .ptr ";
1345
1346 if (IsSampler)
1347 O << ".samplerref ";
1348 else if (IsTexture)
1349 O << ".texref ";
1350 else // IsSurface
1351 O << ".surfref ";
1352 O << ParamSym;
1353 continue;
1354 }
1355 }
1356
1357 auto GetOptimalAlignForParam = [TLI, &DL, F, &Arg](Type *Ty) -> Align {
1358 if (MaybeAlign StackAlign =
1359 getAlign(*F, Arg.getArgNo() + AttributeList::FirstArgIndex))
1360 return StackAlign.value();
1361
1362 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1363 MaybeAlign ParamAlign =
1364 Arg.hasByValAttr() ? Arg.getParamAlign() : MaybeAlign();
1365 return std::max(TypeAlign, ParamAlign.valueOrOne());
1366 };
1367
1368 if (Arg.hasByValAttr()) {
1369 // param has byVal attribute.
1370 Type *ETy = Arg.getParamByValType();
1371 assert(ETy && "Param should have byval type");
1372
1373 // Print .param .align <a> .b8 .param[size];
1374 // <a> = optimal alignment for the element type; always multiple of
1375 // PAL.getParamAlignment
1376 // size = typeallocsize of element type
1377 const Align OptimalAlign =
1378 IsKernelFunc ? GetOptimalAlignForParam(ETy)
1379 : TLI->getFunctionByValParamAlign(
1380 F, ETy, Arg.getParamAlign().valueOrOne(), DL);
1381
1382 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1383 << "[" << DL.getTypeAllocSize(ETy) << "]";
1384 continue;
1385 }
1386
1387 if (shouldPassAsArray(Ty)) {
1388 // Just print .param .align <a> .b8 .param[size];
1389 // <a> = optimal alignment for the element type; always multiple of
1390 // PAL.getParamAlignment
1391 // size = typeallocsize of element type
1392 Align OptimalAlign = GetOptimalAlignForParam(Ty);
1393
1394 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1395 << "[" << DL.getTypeAllocSize(Ty) << "]";
1396
1397 continue;
1398 }
1399 // Just a scalar
1400 auto *PTy = dyn_cast<PointerType>(Ty);
1401 unsigned PTySizeInBits = 0;
1402 if (PTy) {
1403 PTySizeInBits =
1404 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1405 assert(PTySizeInBits && "Invalid pointer size");
1406 }
1407
1408 if (IsKernelFunc) {
1409 if (PTy) {
1410 O << "\t.param .u" << PTySizeInBits << " .ptr";
1411
1412 switch (PTy->getAddressSpace()) {
1413 default:
1414 break;
1416 O << " .global";
1417 break;
1419 O << " .shared";
1420 break;
1422 O << " .const";
1423 break;
1425 O << " .local";
1426 break;
1427 }
1428
1429 O << " .align " << Arg.getParamAlign().valueOrOne().value() << " "
1430 << ParamSym;
1431 continue;
1432 }
1433
1434 // non-pointer scalar to kernel func
1435 O << "\t.param .";
1436 // Special case: predicate operands become .u8 types
1437 if (Ty->isIntegerTy(1))
1438 O << "u8";
1439 else
1440 O << getPTXFundamentalTypeStr(Ty);
1441 O << " " << ParamSym;
1442 continue;
1443 }
1444 // Non-kernel function, just print .param .b<size> for ABI
1445 // and .reg .b<size> for non-ABI
1446 unsigned Size;
1447 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
1448 Size = promoteScalarArgumentSize(ITy->getBitWidth());
1449 } else if (PTy) {
1450 assert(PTySizeInBits && "Invalid pointer size");
1451 Size = PTySizeInBits;
1452 } else
1454 O << "\t.param .b" << Size << " " << ParamSym;
1455 }
1456
1457 if (F->isVarArg()) {
1458 if (!IsFirst)
1459 O << ",\n";
1460 O << "\t.param .align " << STI.getMaxRequiredAlignment() << " .b8 "
1461 << TLI->getParamName(F, /* vararg */ -1) << "[]";
1462 }
1463
1464 O << "\n)";
1465}
1466
1467void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1468 const MachineFunction &MF) {
1469 SmallString<128> Str;
1470 raw_svector_ostream O(Str);
1471
1472 // Map the global virtual register number to a register class specific
1473 // virtual register number starting from 1 with that class.
1474 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1475
1476 // Emit the Fake Stack Object
1477 const MachineFrameInfo &MFI = MF.getFrameInfo();
1478 int64_t NumBytes = MFI.getStackSize();
1479 if (NumBytes) {
1480 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1481 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1482 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1483 O << "\t.reg .b64 \t%SP;\n"
1484 << "\t.reg .b64 \t%SPL;\n";
1485 } else {
1486 O << "\t.reg .b32 \t%SP;\n"
1487 << "\t.reg .b32 \t%SPL;\n";
1488 }
1489 }
1490
1491 // Go through all virtual registers to establish the mapping between the
1492 // global virtual
1493 // register number and the per class virtual register number.
1494 // We use the per class virtual register number in the ptx output.
1495 for (unsigned I : llvm::seq(MRI->getNumVirtRegs())) {
1497 if (MRI->use_empty(VR) && MRI->def_empty(VR))
1498 continue;
1499 auto &RCRegMap = VRegMapping[MRI->getRegClass(VR)];
1500 RCRegMap[VR] = RCRegMap.size() + 1;
1501 }
1502
1503 // Emit declaration of the virtual registers or 'physical' registers for
1504 // each register class
1505 for (const TargetRegisterClass *RC : TRI->regclasses()) {
1506 const unsigned N = VRegMapping[RC].size();
1507
1508 // Only declare those registers that may be used.
1509 if (N) {
1510 const StringRef RCName = getNVPTXRegClassName(RC);
1511 const StringRef RCStr = getNVPTXRegClassStr(RC);
1512 O << "\t.reg " << RCName << " \t" << RCStr << "<" << (N + 1) << ">;\n";
1513 }
1514 }
1515
1516 OutStreamer->emitRawText(O.str());
1517}
1518
1519/// Translate virtual register numbers in DebugInfo locations to their printed
1520/// encodings, as used by CUDA-GDB.
1521void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1522 const MachineFunction &MF) {
1523 const NVPTXSubtarget &STI = MF.getSubtarget<NVPTXSubtarget>();
1524 const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo();
1525
1526 // Clear the old mapping, and add the new one. This mapping is used after the
1527 // printing of the current function is complete, but before the next function
1528 // is printed.
1529 registerInfo->clearDebugRegisterMap();
1530
1531 for (auto &classMap : VRegMapping) {
1532 for (auto &registerMapping : classMap.getSecond()) {
1533 auto reg = registerMapping.getFirst();
1534 registerInfo->addToDebugRegisterMap(reg, getVirtualRegisterName(reg));
1535 }
1536 }
1537}
1538
1539void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp,
1540 raw_ostream &O) const {
1541 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1542 bool ignored;
1543 unsigned int numHex;
1544 const char *lead;
1545
1546 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1547 numHex = 8;
1548 lead = "0f";
1550 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1551 numHex = 16;
1552 lead = "0d";
1554 } else
1555 llvm_unreachable("unsupported fp type");
1556
1557 APInt API = APF.bitcastToAPInt();
1558 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1559}
1560
1561void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1562 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1563 O << CI->getValue();
1564 return;
1565 }
1566 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1567 printFPConstant(CFP, O);
1568 return;
1569 }
1570 if (isa<ConstantPointerNull>(CPV)) {
1571 O << "0";
1572 return;
1573 }
1574 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1575 const bool IsNonGenericPointer = GVar->getAddressSpace() != 0;
1576 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1577 O << "generic(";
1578 getSymbol(GVar)->print(O, MAI);
1579 O << ")";
1580 } else {
1581 getSymbol(GVar)->print(O, MAI);
1582 }
1583 return;
1584 }
1585 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1586 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1587 printMCExpr(*E, O);
1588 return;
1589 }
1590 llvm_unreachable("Not scalar type found in printScalarConstant()");
1591}
1592
1593void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1594 AggBuffer *AggBuffer) {
1595 const DataLayout &DL = getDataLayout();
1596 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1597 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1598 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1599 // only the space allocated by CPV.
1600 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1601 return;
1602 }
1603
1604 // Helper for filling AggBuffer with APInts.
1605 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1606 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1607 SmallVector<unsigned char, 16> Buf(NumBytes);
1608 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1609 // input's bit width, and i1 arrays may not have a length that is a multuple
1610 // of 8. We handle the last byte separately, so we never request out of
1611 // bounds bits.
1612 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1613 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1614 }
1615 size_t LastBytePosition = (NumBytes - 1) * 8;
1616 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1617 Buf[NumBytes - 1] =
1618 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1619 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1620 };
1621
1622 switch (CPV->getType()->getTypeID()) {
1623 case Type::IntegerTyID:
1624 if (const auto *CI = dyn_cast<ConstantInt>(CPV)) {
1625 AddIntToBuffer(CI->getValue());
1626 break;
1627 }
1628 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1629 if (const auto *CI =
1631 AddIntToBuffer(CI->getValue());
1632 break;
1633 }
1634 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1635 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1636 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1637 AggBuffer->addZeros(AllocSize);
1638 break;
1639 }
1640 }
1641 llvm_unreachable("unsupported integer const type");
1642 break;
1643
1644 case Type::HalfTyID:
1645 case Type::BFloatTyID:
1646 case Type::FloatTyID:
1647 case Type::DoubleTyID:
1648 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1649 break;
1650
1651 case Type::PointerTyID: {
1652 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1653 AggBuffer->addSymbol(GVar, GVar);
1654 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1655 const Value *v = Cexpr->stripPointerCasts();
1656 AggBuffer->addSymbol(v, Cexpr);
1657 }
1658 AggBuffer->addZeros(AllocSize);
1659 break;
1660 }
1661
1662 case Type::ArrayTyID:
1664 case Type::StructTyID: {
1666 bufferAggregateConstant(CPV, AggBuffer);
1667 if (Bytes > AllocSize)
1668 AggBuffer->addZeros(Bytes - AllocSize);
1669 } else if (isa<ConstantAggregateZero>(CPV))
1670 AggBuffer->addZeros(Bytes);
1671 else
1672 llvm_unreachable("Unexpected Constant type");
1673 break;
1674 }
1675
1676 default:
1677 llvm_unreachable("unsupported type");
1678 }
1679}
1680
1681void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1682 AggBuffer *aggBuffer) {
1683 const DataLayout &DL = getDataLayout();
1684
1685 auto ExtendBuffer = [](APInt Val, AggBuffer *Buffer) {
1686 for (unsigned I : llvm::seq(Val.getBitWidth() / 8))
1687 Buffer->addByte(Val.extractBitsAsZExtValue(8, I * 8));
1688 };
1689
1690 // Integers of arbitrary width
1691 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1692 ExtendBuffer(CI->getValue(), aggBuffer);
1693 return;
1694 }
1695
1696 // f128
1697 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1698 if (CFP->getType()->isFP128Ty()) {
1699 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1700 return;
1701 }
1702 }
1703
1704 // Old constants
1705 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1706 for (const auto &Op : CPV->operands())
1707 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1708 return;
1709 }
1710
1711 if (const auto *CDS = dyn_cast<ConstantDataSequential>(CPV)) {
1712 for (unsigned I : llvm::seq(CDS->getNumElements()))
1713 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(I)), 0, aggBuffer);
1714 return;
1715 }
1716
1717 if (isa<ConstantStruct>(CPV)) {
1718 if (CPV->getNumOperands()) {
1719 StructType *ST = cast<StructType>(CPV->getType());
1720 for (unsigned I : llvm::seq(CPV->getNumOperands())) {
1721 int EndOffset = (I + 1 == CPV->getNumOperands())
1722 ? DL.getStructLayout(ST)->getElementOffset(0) +
1723 DL.getTypeAllocSize(ST)
1724 : DL.getStructLayout(ST)->getElementOffset(I + 1);
1725 int Bytes = EndOffset - DL.getStructLayout(ST)->getElementOffset(I);
1726 bufferLEByte(cast<Constant>(CPV->getOperand(I)), Bytes, aggBuffer);
1727 }
1728 }
1729 return;
1730 }
1731 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1732}
1733
1734/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1735/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1736/// expressions that are representable in PTX and create
1737/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1738const MCExpr *
1739NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV,
1740 bool ProcessingGeneric) const {
1741 MCContext &Ctx = OutContext;
1742
1743 if (CV->isNullValue() || isa<UndefValue>(CV))
1744 return MCConstantExpr::create(0, Ctx);
1745
1746 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1747 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1748
1749 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1750 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(getSymbol(GV), Ctx);
1751 if (ProcessingGeneric)
1752 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1753 return Expr;
1754 }
1755
1756 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1757 if (!CE) {
1758 llvm_unreachable("Unknown constant value to lower!");
1759 }
1760
1761 switch (CE->getOpcode()) {
1762 default:
1763 break; // Error
1764
1765 case Instruction::AddrSpaceCast: {
1766 // Strip the addrspacecast and pass along the operand
1767 PointerType *DstTy = cast<PointerType>(CE->getType());
1768 if (DstTy->getAddressSpace() == 0)
1769 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1770
1771 break; // Error
1772 }
1773
1774 case Instruction::GetElementPtr: {
1775 const DataLayout &DL = getDataLayout();
1776
1777 // Generate a symbolic expression for the byte address
1778 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1779 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1780
1781 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1782 ProcessingGeneric);
1783 if (!OffsetAI)
1784 return Base;
1785
1786 int64_t Offset = OffsetAI.getSExtValue();
1788 Ctx);
1789 }
1790
1791 case Instruction::Trunc:
1792 // We emit the value and depend on the assembler to truncate the generated
1793 // expression properly. This is important for differences between
1794 // blockaddress labels. Since the two labels are in the same function, it
1795 // is reasonable to treat their delta as a 32-bit value.
1796 [[fallthrough]];
1797 case Instruction::BitCast:
1798 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1799
1800 case Instruction::IntToPtr: {
1801 const DataLayout &DL = getDataLayout();
1802
1803 // Handle casts to pointers by changing them into casts to the appropriate
1804 // integer type. This promotes constant folding and simplifies this code.
1805 Constant *Op = CE->getOperand(0);
1806 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1807 /*IsSigned*/ false, DL);
1808 if (Op)
1809 return lowerConstantForGV(Op, ProcessingGeneric);
1810
1811 break; // Error
1812 }
1813
1814 case Instruction::PtrToInt: {
1815 const DataLayout &DL = getDataLayout();
1816
1817 // Support only foldable casts to/from pointers that can be eliminated by
1818 // changing the pointer to the appropriately sized integer type.
1819 Constant *Op = CE->getOperand(0);
1820 Type *Ty = CE->getType();
1821
1822 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
1823
1824 // We can emit the pointer value into this slot if the slot is an
1825 // integer slot equal to the size of the pointer.
1826 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
1827 return OpExpr;
1828
1829 // Otherwise the pointer is smaller than the resultant integer, mask off
1830 // the high bits so we are sure to get a proper truncation if the input is
1831 // a constant expr.
1832 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
1833 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
1834 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
1835 }
1836
1837 // The MC library also has a right-shift operator, but it isn't consistently
1838 // signed or unsigned between different targets.
1839 case Instruction::Add: {
1840 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1841 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
1842 switch (CE->getOpcode()) {
1843 default: llvm_unreachable("Unknown binary operator constant cast expr");
1844 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
1845 }
1846 }
1847 }
1848
1849 // If the code isn't optimized, there may be outstanding folding
1850 // opportunities. Attempt to fold the expression using DataLayout as a
1851 // last resort before giving up.
1853 if (C != CE)
1854 return lowerConstantForGV(C, ProcessingGeneric);
1855
1856 // Otherwise report the problem to the user.
1857 std::string S;
1858 raw_string_ostream OS(S);
1859 OS << "Unsupported expression in static initializer: ";
1860 CE->printAsOperand(OS, /*PrintType=*/false,
1861 !MF ? nullptr : MF->getFunction().getParent());
1862 report_fatal_error(Twine(OS.str()));
1863}
1864
1865void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) const {
1866 OutContext.getAsmInfo()->printExpr(OS, Expr);
1867}
1868
1869/// PrintAsmOperand - Print out an operand for an inline asm expression.
1870///
1871bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
1872 const char *ExtraCode, raw_ostream &O) {
1873 if (ExtraCode && ExtraCode[0]) {
1874 if (ExtraCode[1] != 0)
1875 return true; // Unknown modifier.
1876
1877 switch (ExtraCode[0]) {
1878 default:
1879 // See if this is a generic print operand
1880 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
1881 case 'r':
1882 break;
1883 }
1884 }
1885
1886 printOperand(MI, OpNo, O);
1887
1888 return false;
1889}
1890
1891bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
1892 unsigned OpNo,
1893 const char *ExtraCode,
1894 raw_ostream &O) {
1895 if (ExtraCode && ExtraCode[0])
1896 return true; // Unknown modifier
1897
1898 O << '[';
1899 printMemOperand(MI, OpNo, O);
1900 O << ']';
1901
1902 return false;
1903}
1904
1905void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
1906 raw_ostream &O) {
1907 const MachineOperand &MO = MI->getOperand(OpNum);
1908 switch (MO.getType()) {
1910 if (MO.getReg().isPhysical()) {
1911 if (MO.getReg() == NVPTX::VRDepot)
1913 else
1915 } else {
1916 emitVirtualRegister(MO.getReg(), O);
1917 }
1918 break;
1919
1921 O << MO.getImm();
1922 break;
1923
1925 printFPConstant(MO.getFPImm(), O);
1926 break;
1927
1929 PrintSymbolOperand(MO, O);
1930 break;
1931
1933 MO.getMBB()->getSymbol()->print(O, MAI);
1934 break;
1935
1936 default:
1937 llvm_unreachable("Operand type not supported.");
1938 }
1939}
1940
1941void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
1942 raw_ostream &O, const char *Modifier) {
1943 printOperand(MI, OpNum, O);
1944
1945 if (Modifier && strcmp(Modifier, "add") == 0) {
1946 O << ", ";
1947 printOperand(MI, OpNum + 1, O);
1948 } else {
1949 if (MI->getOperand(OpNum + 1).isImm() &&
1950 MI->getOperand(OpNum + 1).getImm() == 0)
1951 return; // don't print ',0' or '+0'
1952 O << "+";
1953 printOperand(MI, OpNum + 1, O);
1954 }
1955}
1956
1957char NVPTXAsmPrinter::ID = 0;
1958
1959INITIALIZE_PASS(NVPTXAsmPrinter, "nvptx-asm-printer", "NVPTX Assembly Printer",
1960 false, false)
1961
1962// Force static initialization.
1963extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
1964LLVMInitializeNVPTXAsmPrinter() {
1967}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define DEPOTNAME
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static const char * name
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
APInt bitcastToAPInt() const
Definition APFloat.h:1335
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition APInt.cpp:520
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
MCSymbol * getSymbol(const GlobalValue *GV) const
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:99
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition AsmPrinter.h:102
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:117
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition AsmPrinter.h:502
virtual void emitFunctionBodyStart()
Targets can override this to emit stuff before the first basic block in the function.
Definition AsmPrinter.h:611
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition AsmPrinter.h:136
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:106
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition AsmPrinter.h:449
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:111
virtual void emitFunctionBodyEnd()
Targets can override this to emit stuff after the last basic block in the function.
Definition AsmPrinter.h:615
const DataLayout & getDataLayout() const
Return information about data layout.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
const APFloat & getValueAPF() const
Definition Constants.h:320
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:75
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
bool hasLinkOnceLinkage() const
bool hasExternalLinkage() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
unsigned getAddressSpace() const
PointerType * getType() const
Global values are always pointers.
bool hasWeakLinkage() const
bool hasCommonLinkage() const
bool hasAvailableExternallyLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:348
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
void addOperand(const MCOperand Op)
Definition MCInst.h:215
void setOpcode(unsigned Op)
Definition MCInst.h:201
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition MCSymbol.cpp:59
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void outputDwarfFileDirectives()
Outputs the list of the DWARF '.file' directives to the streamer.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition Type.h:74
@ HalfTyID
16-bit floating point type
Definition Type.h:56
@ VoidTyID
type with no size
Definition Type.h:63
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ StructTyID
Structures.
Definition Type.h:73
@ IntegerTyID
Arbitrary bit width integers.
Definition Type.h:70
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition Type.h:75
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition Type.h:57
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ PointerTyID
Pointers.
Definition Type.h:72
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:136
op_range operands()
Definition User.h:292
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type size() const
Definition DenseSet.h:87
bool erase(const ValueT &V)
Definition DenseSet.h:100
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
uint64_t read64le(const void *P)
Definition Endian.h:435
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
MaybeAlign getAlign(const CallInst &I, unsigned Index)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
std::optional< unsigned > getMaxNReg(const Function &F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
StringRef getSamplerName(const Value &V)
bool isImageReadWrite(const Value &V)
bool isImageReadOnly(const Value &V)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::string utostr(uint64_t X, bool isNeg=false)
std::optional< unsigned > getMinCTASm(const Function &F)
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
bool isSampler(const Value &V)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isSurface(const Value &V)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition Format.h:204
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::optional< unsigned > getMaxClusterRank(const Function &F)
StringRef getTextureName(const Value &V)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
StringRef getSurfaceName(const Value &V)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool isTexture(const Value &V)
bool isImageWriteOnly(const Value &V)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Target & getTheNVPTXTarget32()
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...