LLVM 20.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/Twine.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/BasicBlock.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/Constants.h"
54#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/DebugLoc.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/GlobalAlias.h"
61#include "llvm/IR/GlobalValue.h"
63#include "llvm/IR/Instruction.h"
64#include "llvm/IR/LLVMContext.h"
65#include "llvm/IR/Module.h"
66#include "llvm/IR/Operator.h"
67#include "llvm/IR/Type.h"
68#include "llvm/IR/User.h"
69#include "llvm/MC/MCExpr.h"
70#include "llvm/MC/MCInst.h"
71#include "llvm/MC/MCInstrDesc.h"
72#include "llvm/MC/MCStreamer.h"
73#include "llvm/MC/MCSymbol.h"
78#include "llvm/Support/Endian.h"
81#include "llvm/Support/Path.h"
87#include <cassert>
88#include <cstdint>
89#include <cstring>
90#include <new>
91#include <string>
92#include <utility>
93#include <vector>
94
95using namespace llvm;
96
97static cl::opt<bool>
98 LowerCtorDtor("nvptx-lower-global-ctor-dtor",
99 cl::desc("Lower GPU ctor / dtors to globals on the device."),
100 cl::init(false), cl::Hidden);
101
102#define DEPOTNAME "__local_depot"
103
104/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
105/// depends.
106static void
109 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
110 Globals.insert(GV);
111 else {
112 if (const User *U = dyn_cast<User>(V)) {
113 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
114 DiscoverDependentGlobals(U->getOperand(i), Globals);
115 }
116 }
117 }
118}
119
120/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
121/// instances to be emitted, but only after any dependents have been added
122/// first.s
123static void
128 // Have we already visited this one?
129 if (Visited.count(GV))
130 return;
131
132 // Do we have a circular dependency?
133 if (!Visiting.insert(GV).second)
134 report_fatal_error("Circular dependency found in global variable set");
135
136 // Make sure we visit all dependents first
138 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
139 DiscoverDependentGlobals(GV->getOperand(i), Others);
140
141 for (const GlobalVariable *GV : Others)
142 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
143
144 // Now we can visit ourself
145 Order.push_back(GV);
146 Visited.insert(GV);
147 Visiting.erase(GV);
148}
149
150void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
151 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
152 getSubtargetInfo().getFeatureBits());
153
154 MCInst Inst;
155 lowerToMCInst(MI, Inst);
157}
158
159// Handle symbol backtracking for targets that do not support image handles
160bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
161 unsigned OpNo, MCOperand &MCOp) {
162 const MachineOperand &MO = MI->getOperand(OpNo);
163 const MCInstrDesc &MCID = MI->getDesc();
164
165 if (MCID.TSFlags & NVPTXII::IsTexFlag) {
166 // This is a texture fetch, so operand 4 is a texref and operand 5 is
167 // a samplerref
168 if (OpNo == 4 && MO.isImm()) {
169 lowerImageHandleSymbol(MO.getImm(), MCOp);
170 return true;
171 }
172 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
173 lowerImageHandleSymbol(MO.getImm(), MCOp);
174 return true;
175 }
176
177 return false;
178 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
179 unsigned VecSize =
180 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
181
182 // For a surface load of vector size N, the Nth operand will be the surfref
183 if (OpNo == VecSize && MO.isImm()) {
184 lowerImageHandleSymbol(MO.getImm(), MCOp);
185 return true;
186 }
187
188 return false;
189 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
190 // This is a surface store, so operand 0 is a surfref
191 if (OpNo == 0 && MO.isImm()) {
192 lowerImageHandleSymbol(MO.getImm(), MCOp);
193 return true;
194 }
195
196 return false;
197 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
198 // This is a query, so operand 1 is a surfref/texref
199 if (OpNo == 1 && MO.isImm()) {
200 lowerImageHandleSymbol(MO.getImm(), MCOp);
201 return true;
202 }
203
204 return false;
205 }
206
207 return false;
208}
209
210void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
211 // Ewwww
213 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
215 const char *Sym = MFI->getImageHandleSymbol(Index);
216 StringRef SymName = nvTM.getStrPool().save(Sym);
217 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
218}
219
220void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
221 OutMI.setOpcode(MI->getOpcode());
222 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
223 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
224 const MachineOperand &MO = MI->getOperand(0);
225 OutMI.addOperand(GetSymbolRef(
227 return;
228 }
229
230 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
231 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
232 const MachineOperand &MO = MI->getOperand(i);
233
234 MCOperand MCOp;
235 if (!STI.hasImageHandles()) {
236 if (lowerImageHandleOperand(MI, i, MCOp)) {
237 OutMI.addOperand(MCOp);
238 continue;
239 }
240 }
241
242 if (lowerOperand(MO, MCOp))
243 OutMI.addOperand(MCOp);
244 }
245}
246
247bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
248 MCOperand &MCOp) {
249 switch (MO.getType()) {
250 default: llvm_unreachable("unknown operand type");
252 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
253 break;
255 MCOp = MCOperand::createImm(MO.getImm());
256 break;
259 MO.getMBB()->getSymbol(), OutContext));
260 break;
262 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
263 break;
265 MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
266 break;
268 const ConstantFP *Cnt = MO.getFPImm();
269 const APFloat &Val = Cnt->getValueAPF();
270
271 switch (Cnt->getType()->getTypeID()) {
272 default: report_fatal_error("Unsupported FP type"); break;
273 case Type::HalfTyID:
276 break;
277 case Type::BFloatTyID:
280 break;
281 case Type::FloatTyID:
284 break;
285 case Type::DoubleTyID:
288 break;
289 }
290 break;
291 }
292 }
293 return true;
294}
295
296unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
298 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
299
300 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
301 unsigned RegNum = RegMap[Reg];
302
303 // Encode the register class in the upper 4 bits
304 // Must be kept in sync with NVPTXInstPrinter::printRegName
305 unsigned Ret = 0;
306 if (RC == &NVPTX::Int1RegsRegClass) {
307 Ret = (1 << 28);
308 } else if (RC == &NVPTX::Int16RegsRegClass) {
309 Ret = (2 << 28);
310 } else if (RC == &NVPTX::Int32RegsRegClass) {
311 Ret = (3 << 28);
312 } else if (RC == &NVPTX::Int64RegsRegClass) {
313 Ret = (4 << 28);
314 } else if (RC == &NVPTX::Float32RegsRegClass) {
315 Ret = (5 << 28);
316 } else if (RC == &NVPTX::Float64RegsRegClass) {
317 Ret = (6 << 28);
318 } else if (RC == &NVPTX::Int128RegsRegClass) {
319 Ret = (7 << 28);
320 } else {
321 report_fatal_error("Bad register class");
322 }
323
324 // Insert the vreg number
325 Ret |= (RegNum & 0x0FFFFFFF);
326 return Ret;
327 } else {
328 // Some special-use registers are actually physical registers.
329 // Encode this as the register class ID of 0 and the real register ID.
330 return Reg & 0x0FFFFFFF;
331 }
332}
333
334MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
335 const MCExpr *Expr;
337 OutContext);
338 return MCOperand::createExpr(Expr);
339}
340
341static bool ShouldPassAsArray(Type *Ty) {
342 return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
343 Ty->isHalfTy() || Ty->isBFloatTy();
344}
345
346void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
347 const DataLayout &DL = getDataLayout();
349 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
350
351 Type *Ty = F->getReturnType();
352
353 bool isABI = (STI.getSmVersion() >= 20);
354
355 if (Ty->getTypeID() == Type::VoidTyID)
356 return;
357 O << " (";
358
359 if (isABI) {
360 if ((Ty->isFloatingPointTy() || Ty->isIntegerTy()) &&
361 !ShouldPassAsArray(Ty)) {
362 unsigned size = 0;
363 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
364 size = ITy->getBitWidth();
365 } else {
366 assert(Ty->isFloatingPointTy() && "Floating point type expected here");
368 }
370 O << ".param .b" << size << " func_retval0";
371 } else if (isa<PointerType>(Ty)) {
372 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
373 << " func_retval0";
374 } else if (ShouldPassAsArray(Ty)) {
375 unsigned totalsz = DL.getTypeAllocSize(Ty);
376 Align RetAlignment = TLI->getFunctionArgumentAlignment(
378 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
379 << totalsz << "]";
380 } else
381 llvm_unreachable("Unknown return type");
382 } else {
383 SmallVector<EVT, 16> vtparts;
384 ComputeValueVTs(*TLI, DL, Ty, vtparts);
385 unsigned idx = 0;
386 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
387 unsigned elems = 1;
388 EVT elemtype = vtparts[i];
389 if (vtparts[i].isVector()) {
390 elems = vtparts[i].getVectorNumElements();
391 elemtype = vtparts[i].getVectorElementType();
392 }
393
394 for (unsigned j = 0, je = elems; j != je; ++j) {
395 unsigned sz = elemtype.getSizeInBits();
396 if (elemtype.isInteger())
398 O << ".reg .b" << sz << " func_retval" << idx;
399 if (j < je - 1)
400 O << ", ";
401 ++idx;
402 }
403 if (i < e - 1)
404 O << ", ";
405 }
406 }
407 O << ") ";
408}
409
410void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
411 raw_ostream &O) {
412 const Function &F = MF.getFunction();
413 printReturnValStr(&F, O);
414}
415
416// Return true if MBB is the header of a loop marked with
417// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
418bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
419 const MachineBasicBlock &MBB) const {
420 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
421 // We insert .pragma "nounroll" only to the loop header.
422 if (!LI.isLoopHeader(&MBB))
423 return false;
424
425 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
426 // we iterate through each back edge of the loop with header MBB, and check
427 // whether its metadata contains llvm.loop.unroll.disable.
428 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
429 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
430 // Edges from other loops to MBB are not back edges.
431 continue;
432 }
433 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
434 if (MDNode *LoopID =
435 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
436 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
437 return true;
438 if (MDNode *UnrollCountMD =
439 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
440 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
441 ->isOne())
442 return true;
443 }
444 }
445 }
446 }
447 return false;
448}
449
450void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
452 if (isLoopHeaderOfNoUnroll(MBB))
453 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
454}
455
456void NVPTXAsmPrinter::emitFunctionEntryLabel() {
459
460 if (!GlobalsEmitted) {
461 emitGlobals(*MF->getFunction().getParent());
462 GlobalsEmitted = true;
463 }
464
465 // Set up
466 MRI = &MF->getRegInfo();
467 F = &MF->getFunction();
468 emitLinkageDirective(F, O);
469 if (isKernelFunction(*F))
470 O << ".entry ";
471 else {
472 O << ".func ";
473 printReturnValStr(*MF, O);
474 }
475
477
478 emitFunctionParamList(F, O);
479 O << "\n";
480
481 if (isKernelFunction(*F))
482 emitKernelFunctionDirectives(*F, O);
483
485 O << ".noreturn";
486
487 OutStreamer->emitRawText(O.str());
488
489 VRegMapping.clear();
490 // Emit open brace for function body.
491 OutStreamer->emitRawText(StringRef("{\n"));
492 setAndEmitFunctionVirtualRegisters(*MF);
493 // Emit initial .loc debug directive for correct relocation symbol data.
494 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
495 assert(SP->getUnit());
496 if (!SP->getUnit()->isDebugDirectivesOnly())
498 }
499}
500
502 bool Result = AsmPrinter::runOnMachineFunction(F);
503 // Emit closing brace for the body of function F.
504 // The closing brace must be emitted here because we need to emit additional
505 // debug labels/data after the last basic block.
506 // We need to emit the closing brace here because we don't have function that
507 // finished emission of the function body.
508 OutStreamer->emitRawText(StringRef("}\n"));
509 return Result;
510}
511
512void NVPTXAsmPrinter::emitFunctionBodyStart() {
514 raw_svector_ostream O(Str);
515 emitDemotedVars(&MF->getFunction(), O);
516 OutStreamer->emitRawText(O.str());
517}
518
519void NVPTXAsmPrinter::emitFunctionBodyEnd() {
520 VRegMapping.clear();
521}
522
526 return OutContext.getOrCreateSymbol(Str);
527}
528
529void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
530 Register RegNo = MI->getOperand(0).getReg();
531 if (RegNo.isVirtual()) {
532 OutStreamer->AddComment(Twine("implicit-def: ") +
534 } else {
535 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
536 OutStreamer->AddComment(Twine("implicit-def: ") +
537 STI.getRegisterInfo()->getName(RegNo));
538 }
539 OutStreamer->addBlankLine();
540}
541
542void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
543 raw_ostream &O) const {
544 // If the NVVM IR has some of reqntid* specified, then output
545 // the reqntid directive, and set the unspecified ones to 1.
546 // If none of Reqntid* is specified, don't output reqntid directive.
547 std::optional<unsigned> Reqntidx = getReqNTIDx(F);
548 std::optional<unsigned> Reqntidy = getReqNTIDy(F);
549 std::optional<unsigned> Reqntidz = getReqNTIDz(F);
550
551 if (Reqntidx || Reqntidy || Reqntidz)
552 O << ".reqntid " << Reqntidx.value_or(1) << ", " << Reqntidy.value_or(1)
553 << ", " << Reqntidz.value_or(1) << "\n";
554
555 // If the NVVM IR has some of maxntid* specified, then output
556 // the maxntid directive, and set the unspecified ones to 1.
557 // If none of maxntid* is specified, don't output maxntid directive.
558 std::optional<unsigned> Maxntidx = getMaxNTIDx(F);
559 std::optional<unsigned> Maxntidy = getMaxNTIDy(F);
560 std::optional<unsigned> Maxntidz = getMaxNTIDz(F);
561
562 if (Maxntidx || Maxntidy || Maxntidz)
563 O << ".maxntid " << Maxntidx.value_or(1) << ", " << Maxntidy.value_or(1)
564 << ", " << Maxntidz.value_or(1) << "\n";
565
566 unsigned Mincta = 0;
567 if (getMinCTASm(F, Mincta))
568 O << ".minnctapersm " << Mincta << "\n";
569
570 unsigned Maxnreg = 0;
571 if (getMaxNReg(F, Maxnreg))
572 O << ".maxnreg " << Maxnreg << "\n";
573
574 // .maxclusterrank directive requires SM_90 or higher, make sure that we
575 // filter it out for lower SM versions, as it causes a hard ptxas crash.
576 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
577 const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
578 unsigned Maxclusterrank = 0;
579 if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90)
580 O << ".maxclusterrank " << Maxclusterrank << "\n";
581}
582
583std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
584 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
585
586 std::string Name;
587 raw_string_ostream NameStr(Name);
588
589 VRegRCMap::const_iterator I = VRegMapping.find(RC);
590 assert(I != VRegMapping.end() && "Bad register class");
591 const DenseMap<unsigned, unsigned> &RegMap = I->second;
592
593 VRegMap::const_iterator VI = RegMap.find(Reg);
594 assert(VI != RegMap.end() && "Bad virtual register");
595 unsigned MappedVR = VI->second;
596
597 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
598
599 NameStr.flush();
600 return Name;
601}
602
603void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
604 raw_ostream &O) {
605 O << getVirtualRegisterName(vr);
606}
607
608void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
609 raw_ostream &O) {
610 const Function *F = dyn_cast_or_null<Function>(GA->getAliaseeObject());
611 if (!F || isKernelFunction(*F) || F->isDeclaration())
613 "NVPTX aliasee must be a non-kernel function definition");
614
615 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
617 report_fatal_error("NVPTX aliasee must not be '.weak'");
618
619 emitDeclarationWithName(F, getSymbol(GA), O);
620}
621
622void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
623 emitDeclarationWithName(F, getSymbol(F), O);
624}
625
626void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
627 raw_ostream &O) {
628 emitLinkageDirective(F, O);
629 if (isKernelFunction(*F))
630 O << ".entry ";
631 else
632 O << ".func ";
633 printReturnValStr(F, O);
634 S->print(O, MAI);
635 O << "\n";
636 emitFunctionParamList(F, O);
637 O << "\n";
639 O << ".noreturn";
640 O << ";\n";
641}
642
643static bool usedInGlobalVarDef(const Constant *C) {
644 if (!C)
645 return false;
646
647 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
648 return GV->getName() != "llvm.used";
649 }
650
651 for (const User *U : C->users())
652 if (const Constant *C = dyn_cast<Constant>(U))
654 return true;
655
656 return false;
657}
658
659static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
660 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
661 if (othergv->getName() == "llvm.used")
662 return true;
663 }
664
665 if (const Instruction *instr = dyn_cast<Instruction>(U)) {
666 if (instr->getParent() && instr->getParent()->getParent()) {
667 const Function *curFunc = instr->getParent()->getParent();
668 if (oneFunc && (curFunc != oneFunc))
669 return false;
670 oneFunc = curFunc;
671 return true;
672 } else
673 return false;
674 }
675
676 for (const User *UU : U->users())
677 if (!usedInOneFunc(UU, oneFunc))
678 return false;
679
680 return true;
681}
682
683/* Find out if a global variable can be demoted to local scope.
684 * Currently, this is valid for CUDA shared variables, which have local
685 * scope and global lifetime. So the conditions to check are :
686 * 1. Is the global variable in shared address space?
687 * 2. Does it have local linkage?
688 * 3. Is the global variable referenced only in one function?
689 */
690static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
691 if (!gv->hasLocalLinkage())
692 return false;
693 PointerType *Pty = gv->getType();
694 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
695 return false;
696
697 const Function *oneFunc = nullptr;
698
699 bool flag = usedInOneFunc(gv, oneFunc);
700 if (!flag)
701 return false;
702 if (!oneFunc)
703 return false;
704 f = oneFunc;
705 return true;
706}
707
708static bool useFuncSeen(const Constant *C,
710 for (const User *U : C->users()) {
711 if (const Constant *cu = dyn_cast<Constant>(U)) {
712 if (useFuncSeen(cu, seenMap))
713 return true;
714 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
715 const BasicBlock *bb = I->getParent();
716 if (!bb)
717 continue;
718 const Function *caller = bb->getParent();
719 if (!caller)
720 continue;
721 if (seenMap.contains(caller))
722 return true;
723 }
724 }
725 return false;
726}
727
728void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
730 for (const Function &F : M) {
731 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
732 emitDeclaration(&F, O);
733 continue;
734 }
735
736 if (F.isDeclaration()) {
737 if (F.use_empty())
738 continue;
739 if (F.getIntrinsicID())
740 continue;
741 emitDeclaration(&F, O);
742 continue;
743 }
744 for (const User *U : F.users()) {
745 if (const Constant *C = dyn_cast<Constant>(U)) {
746 if (usedInGlobalVarDef(C)) {
747 // The use is in the initialization of a global variable
748 // that is a function pointer, so print a declaration
749 // for the original function
750 emitDeclaration(&F, O);
751 break;
752 }
753 // Emit a declaration of this function if the function that
754 // uses this constant expr has already been seen.
755 if (useFuncSeen(C, seenMap)) {
756 emitDeclaration(&F, O);
757 break;
758 }
759 }
760
761 if (!isa<Instruction>(U))
762 continue;
763 const Instruction *instr = cast<Instruction>(U);
764 const BasicBlock *bb = instr->getParent();
765 if (!bb)
766 continue;
767 const Function *caller = bb->getParent();
768 if (!caller)
769 continue;
770
771 // If a caller has already been seen, then the caller is
772 // appearing in the module before the callee. so print out
773 // a declaration for the callee.
774 if (seenMap.contains(caller)) {
775 emitDeclaration(&F, O);
776 break;
777 }
778 }
779 seenMap[&F] = true;
780 }
781 for (const GlobalAlias &GA : M.aliases())
782 emitAliasDeclaration(&GA, O);
783}
784
786 if (!GV) return true;
787 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
788 if (!InitList) return true; // Not an array; we don't know how to parse.
789 return InitList->getNumOperands() == 0;
790}
791
792void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
793 // Construct a default subtarget off of the TargetMachine defaults. The
794 // rest of NVPTX isn't friendly to change subtargets per function and
795 // so the default TargetMachine will have all of the options.
796 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
797 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
798 SmallString<128> Str1;
799 raw_svector_ostream OS1(Str1);
800
801 // Emit header before any dwarf directives are emitted below.
802 emitHeader(M, OS1, *STI);
803 OutStreamer->emitRawText(OS1.str());
804}
805
807 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
808 const NVPTXSubtarget &STI =
809 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
810 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
811 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
812
813 // OpenMP supports NVPTX global constructors and destructors.
814 bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;
815
816 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
817 !LowerCtorDtor && !IsOpenMP) {
819 "Module has a nontrivial global ctor, which NVPTX does not support.");
820 return true; // error
821 }
822 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
823 !LowerCtorDtor && !IsOpenMP) {
825 "Module has a nontrivial global dtor, which NVPTX does not support.");
826 return true; // error
827 }
828
829 // We need to call the parent's one explicitly.
830 bool Result = AsmPrinter::doInitialization(M);
831
832 GlobalsEmitted = false;
833
834 return Result;
835}
836
837void NVPTXAsmPrinter::emitGlobals(const Module &M) {
838 SmallString<128> Str2;
839 raw_svector_ostream OS2(Str2);
840
841 emitDeclarations(M, OS2);
842
843 // As ptxas does not support forward references of globals, we need to first
844 // sort the list of module-level globals in def-use order. We visit each
845 // global variable in order, and ensure that we emit it *after* its dependent
846 // globals. We use a little extra memory maintaining both a set and a list to
847 // have fast searches while maintaining a strict ordering.
851
852 // Visit each global variable, in order
853 for (const GlobalVariable &I : M.globals())
854 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
855
856 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
857 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
858
859 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
860 const NVPTXSubtarget &STI =
861 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
862
863 // Print out module-level global variables in proper order
864 for (const GlobalVariable *GV : Globals)
865 printModuleLevelGV(GV, OS2, /*processDemoted=*/false, STI);
866
867 OS2 << '\n';
868
869 OutStreamer->emitRawText(OS2.str());
870}
871
872void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
875
876 MCSymbol *Name = getSymbol(&GA);
877
878 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
879 << ";\n";
880
881 OutStreamer->emitRawText(OS.str());
882}
883
884void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
885 const NVPTXSubtarget &STI) {
886 O << "//\n";
887 O << "// Generated by LLVM NVPTX Back-End\n";
888 O << "//\n";
889 O << "\n";
890
891 unsigned PTXVersion = STI.getPTXVersion();
892 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
893
894 O << ".target ";
895 O << STI.getTargetName();
896
897 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
898 if (NTM.getDrvInterface() == NVPTX::NVCL)
899 O << ", texmode_independent";
900
901 bool HasFullDebugInfo = false;
902 for (DICompileUnit *CU : M.debug_compile_units()) {
903 switch(CU->getEmissionKind()) {
906 break;
909 HasFullDebugInfo = true;
910 break;
911 }
912 if (HasFullDebugInfo)
913 break;
914 }
915 if (HasFullDebugInfo)
916 O << ", debug";
917
918 O << "\n";
919
920 O << ".address_size ";
921 if (NTM.is64Bit())
922 O << "64";
923 else
924 O << "32";
925 O << "\n";
926
927 O << "\n";
928}
929
931 // If we did not emit any functions, then the global declarations have not
932 // yet been emitted.
933 if (!GlobalsEmitted) {
934 emitGlobals(M);
935 GlobalsEmitted = true;
936 }
937
938 // call doFinalization
939 bool ret = AsmPrinter::doFinalization(M);
940
942
943 auto *TS =
944 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
945 // Close the last emitted section
946 if (hasDebugInfo()) {
947 TS->closeLastSection();
948 // Emit empty .debug_loc section for better support of the empty files.
949 OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
950 }
951
952 // Output last DWARF .file directives, if any.
953 TS->outputDwarfFileDirectives();
954
955 return ret;
956}
957
958// This function emits appropriate linkage directives for
959// functions and global variables.
960//
961// extern function declaration -> .extern
962// extern function definition -> .visible
963// external global variable with init -> .visible
964// external without init -> .extern
965// appending -> not allowed, assert.
966// for any linkage other than
967// internal, private, linker_private,
968// linker_private_weak, linker_private_weak_def_auto,
969// we emit -> .weak.
970
971void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
972 raw_ostream &O) {
973 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
974 if (V->hasExternalLinkage()) {
975 if (isa<GlobalVariable>(V)) {
976 const GlobalVariable *GVar = cast<GlobalVariable>(V);
977 if (GVar) {
978 if (GVar->hasInitializer())
979 O << ".visible ";
980 else
981 O << ".extern ";
982 }
983 } else if (V->isDeclaration())
984 O << ".extern ";
985 else
986 O << ".visible ";
987 } else if (V->hasAppendingLinkage()) {
988 std::string msg;
989 msg.append("Error: ");
990 msg.append("Symbol ");
991 if (V->hasName())
992 msg.append(std::string(V->getName()));
993 msg.append("has unsupported appending linkage type");
994 llvm_unreachable(msg.c_str());
995 } else if (!V->hasInternalLinkage() &&
996 !V->hasPrivateLinkage()) {
997 O << ".weak ";
998 }
999 }
1000}
1001
1002void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1003 raw_ostream &O, bool processDemoted,
1004 const NVPTXSubtarget &STI) {
1005 // Skip meta data
1006 if (GVar->hasSection()) {
1007 if (GVar->getSection() == "llvm.metadata")
1008 return;
1009 }
1010
1011 // Skip LLVM intrinsic global variables
1012 if (GVar->getName().starts_with("llvm.") ||
1013 GVar->getName().starts_with("nvvm."))
1014 return;
1015
1016 const DataLayout &DL = getDataLayout();
1017
1018 // GlobalVariables are always constant pointers themselves.
1019 Type *ETy = GVar->getValueType();
1020
1021 if (GVar->hasExternalLinkage()) {
1022 if (GVar->hasInitializer())
1023 O << ".visible ";
1024 else
1025 O << ".extern ";
1026 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
1028 O << ".common ";
1029 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1031 GVar->hasCommonLinkage()) {
1032 O << ".weak ";
1033 }
1034
1035 if (isTexture(*GVar)) {
1036 O << ".global .texref " << getTextureName(*GVar) << ";\n";
1037 return;
1038 }
1039
1040 if (isSurface(*GVar)) {
1041 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1042 return;
1043 }
1044
1045 if (GVar->isDeclaration()) {
1046 // (extern) declarations, no definition or initializer
1047 // Currently the only known declaration is for an automatic __local
1048 // (.shared) promoted to global.
1049 emitPTXGlobalVariable(GVar, O, STI);
1050 O << ";\n";
1051 return;
1052 }
1053
1054 if (isSampler(*GVar)) {
1055 O << ".global .samplerref " << getSamplerName(*GVar);
1056
1057 const Constant *Initializer = nullptr;
1058 if (GVar->hasInitializer())
1059 Initializer = GVar->getInitializer();
1060 const ConstantInt *CI = nullptr;
1061 if (Initializer)
1062 CI = dyn_cast<ConstantInt>(Initializer);
1063 if (CI) {
1064 unsigned sample = CI->getZExtValue();
1065
1066 O << " = { ";
1067
1068 for (int i = 0,
1069 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1070 i < 3; i++) {
1071 O << "addr_mode_" << i << " = ";
1072 switch (addr) {
1073 case 0:
1074 O << "wrap";
1075 break;
1076 case 1:
1077 O << "clamp_to_border";
1078 break;
1079 case 2:
1080 O << "clamp_to_edge";
1081 break;
1082 case 3:
1083 O << "wrap";
1084 break;
1085 case 4:
1086 O << "mirror";
1087 break;
1088 }
1089 O << ", ";
1090 }
1091 O << "filter_mode = ";
1092 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1093 case 0:
1094 O << "nearest";
1095 break;
1096 case 1:
1097 O << "linear";
1098 break;
1099 case 2:
1100 llvm_unreachable("Anisotropic filtering is not supported");
1101 default:
1102 O << "nearest";
1103 break;
1104 }
1105 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1106 O << ", force_unnormalized_coords = 1";
1107 }
1108 O << " }";
1109 }
1110
1111 O << ";\n";
1112 return;
1113 }
1114
1115 if (GVar->hasPrivateLinkage()) {
1116 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1117 return;
1118
1119 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1120 if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1121 return;
1122 if (GVar->use_empty())
1123 return;
1124 }
1125
1126 const Function *demotedFunc = nullptr;
1127 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1128 O << "// " << GVar->getName() << " has been demoted\n";
1129 if (localDecls.find(demotedFunc) != localDecls.end())
1130 localDecls[demotedFunc].push_back(GVar);
1131 else {
1132 std::vector<const GlobalVariable *> temp;
1133 temp.push_back(GVar);
1134 localDecls[demotedFunc] = temp;
1135 }
1136 return;
1137 }
1138
1139 O << ".";
1140 emitPTXAddressSpace(GVar->getAddressSpace(), O);
1141
1142 if (isManaged(*GVar)) {
1143 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1145 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1146 }
1147 O << " .attribute(.managed)";
1148 }
1149
1150 if (MaybeAlign A = GVar->getAlign())
1151 O << " .align " << A->value();
1152 else
1153 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1154
1155 if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1156 (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1157 O << " .";
1158 // Special case: ABI requires that we use .u8 for predicates
1159 if (ETy->isIntegerTy(1))
1160 O << "u8";
1161 else
1162 O << getPTXFundamentalTypeStr(ETy, false);
1163 O << " ";
1164 getSymbol(GVar)->print(O, MAI);
1165
1166 // Ptx allows variable initilization only for constant and global state
1167 // spaces.
1168 if (GVar->hasInitializer()) {
1169 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1170 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1171 const Constant *Initializer = GVar->getInitializer();
1172 // 'undef' is treated as there is no value specified.
1173 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1174 O << " = ";
1175 printScalarConstant(Initializer, O);
1176 }
1177 } else {
1178 // The frontend adds zero-initializer to device and constant variables
1179 // that don't have an initial value, and UndefValue to shared
1180 // variables, so skip warning for this case.
1181 if (!GVar->getInitializer()->isNullValue() &&
1182 !isa<UndefValue>(GVar->getInitializer())) {
1183 report_fatal_error("initial value of '" + GVar->getName() +
1184 "' is not allowed in addrspace(" +
1185 Twine(GVar->getAddressSpace()) + ")");
1186 }
1187 }
1188 }
1189 } else {
1190 uint64_t ElementSize = 0;
1191
1192 // Although PTX has direct support for struct type and array type and
1193 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1194 // targets that support these high level field accesses. Structs, arrays
1195 // and vectors are lowered into arrays of bytes.
1196 switch (ETy->getTypeID()) {
1197 case Type::IntegerTyID: // Integers larger than 64 bits
1198 case Type::StructTyID:
1199 case Type::ArrayTyID:
1201 ElementSize = DL.getTypeStoreSize(ETy);
1202 // Ptx allows variable initilization only for constant and
1203 // global state spaces.
1204 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1205 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1206 GVar->hasInitializer()) {
1207 const Constant *Initializer = GVar->getInitializer();
1208 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1209 AggBuffer aggBuffer(ElementSize, *this);
1210 bufferAggregateConstant(Initializer, &aggBuffer);
1211 if (aggBuffer.numSymbols()) {
1212 unsigned int ptrSize = MAI->getCodePointerSize();
1213 if (ElementSize % ptrSize ||
1214 !aggBuffer.allSymbolsAligned(ptrSize)) {
1215 // Print in bytes and use the mask() operator for pointers.
1216 if (!STI.hasMaskOperator())
1218 "initialized packed aggregate with pointers '" +
1219 GVar->getName() +
1220 "' requires at least PTX ISA version 7.1");
1221 O << " .u8 ";
1222 getSymbol(GVar)->print(O, MAI);
1223 O << "[" << ElementSize << "] = {";
1224 aggBuffer.printBytes(O);
1225 O << "}";
1226 } else {
1227 O << " .u" << ptrSize * 8 << " ";
1228 getSymbol(GVar)->print(O, MAI);
1229 O << "[" << ElementSize / ptrSize << "] = {";
1230 aggBuffer.printWords(O);
1231 O << "}";
1232 }
1233 } else {
1234 O << " .b8 ";
1235 getSymbol(GVar)->print(O, MAI);
1236 O << "[" << ElementSize << "] = {";
1237 aggBuffer.printBytes(O);
1238 O << "}";
1239 }
1240 } else {
1241 O << " .b8 ";
1242 getSymbol(GVar)->print(O, MAI);
1243 if (ElementSize) {
1244 O << "[";
1245 O << ElementSize;
1246 O << "]";
1247 }
1248 }
1249 } else {
1250 O << " .b8 ";
1251 getSymbol(GVar)->print(O, MAI);
1252 if (ElementSize) {
1253 O << "[";
1254 O << ElementSize;
1255 O << "]";
1256 }
1257 }
1258 break;
1259 default:
1260 llvm_unreachable("type not supported yet");
1261 }
1262 }
1263 O << ";\n";
1264}
1265
1266void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1267 const Value *v = Symbols[nSym];
1268 const Value *v0 = SymbolsBeforeStripping[nSym];
1269 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1270 MCSymbol *Name = AP.getSymbol(GVar);
1271 PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1272 // Is v0 a generic pointer?
1273 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1274 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1275 os << "generic(";
1276 Name->print(os, AP.MAI);
1277 os << ")";
1278 } else {
1279 Name->print(os, AP.MAI);
1280 }
1281 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1282 const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
1283 AP.printMCExpr(*Expr, os);
1284 } else
1285 llvm_unreachable("symbol type unknown");
1286}
1287
1288void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1289 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1290 // Do not emit trailing zero initializers. They will be zero-initialized by
1291 // ptxas. This saves on both space requirements for the generated PTX and on
1292 // memory use by ptxas. (See:
1293 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1294 unsigned int InitializerCount = size;
1295 // TODO: symbols make this harder, but it would still be good to trim trailing
1296 // 0s for aggs with symbols as well.
1297 if (numSymbols() == 0)
1298 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1299 InitializerCount--;
1300
1301 symbolPosInBuffer.push_back(InitializerCount);
1302 unsigned int nSym = 0;
1303 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1304 for (unsigned int pos = 0; pos < InitializerCount;) {
1305 if (pos)
1306 os << ", ";
1307 if (pos != nextSymbolPos) {
1308 os << (unsigned int)buffer[pos];
1309 ++pos;
1310 continue;
1311 }
1312 // Generate a per-byte mask() operator for the symbol, which looks like:
1313 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1314 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1315 std::string symText;
1316 llvm::raw_string_ostream oss(symText);
1317 printSymbol(nSym, oss);
1318 for (unsigned i = 0; i < ptrSize; ++i) {
1319 if (i)
1320 os << ", ";
1321 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1322 os << "(" << symText << ")";
1323 }
1324 pos += ptrSize;
1325 nextSymbolPos = symbolPosInBuffer[++nSym];
1326 assert(nextSymbolPos >= pos);
1327 }
1328}
1329
1330void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1331 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1332 symbolPosInBuffer.push_back(size);
1333 unsigned int nSym = 0;
1334 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1335 assert(nextSymbolPos % ptrSize == 0);
1336 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1337 if (pos)
1338 os << ", ";
1339 if (pos == nextSymbolPos) {
1340 printSymbol(nSym, os);
1341 nextSymbolPos = symbolPosInBuffer[++nSym];
1342 assert(nextSymbolPos % ptrSize == 0);
1343 assert(nextSymbolPos >= pos + ptrSize);
1344 } else if (ptrSize == 4)
1345 os << support::endian::read32le(&buffer[pos]);
1346 else
1347 os << support::endian::read64le(&buffer[pos]);
1348 }
1349}
1350
1351void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1352 if (localDecls.find(f) == localDecls.end())
1353 return;
1354
1355 std::vector<const GlobalVariable *> &gvars = localDecls[f];
1356
1357 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1358 const NVPTXSubtarget &STI =
1359 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1360
1361 for (const GlobalVariable *GV : gvars) {
1362 O << "\t// demoted variable\n\t";
1363 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1364 }
1365}
1366
1367void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1368 raw_ostream &O) const {
1369 switch (AddressSpace) {
1371 O << "local";
1372 break;
1374 O << "global";
1375 break;
1377 O << "const";
1378 break;
1380 O << "shared";
1381 break;
1382 default:
1383 report_fatal_error("Bad address space found while emitting PTX: " +
1385 break;
1386 }
1387}
1388
1389std::string
1390NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1391 switch (Ty->getTypeID()) {
1392 case Type::IntegerTyID: {
1393 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1394 if (NumBits == 1)
1395 return "pred";
1396 else if (NumBits <= 64) {
1397 std::string name = "u";
1398 return name + utostr(NumBits);
1399 } else {
1400 llvm_unreachable("Integer too large");
1401 break;
1402 }
1403 break;
1404 }
1405 case Type::BFloatTyID:
1406 case Type::HalfTyID:
1407 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1408 // PTX assembly.
1409 return "b16";
1410 case Type::FloatTyID:
1411 return "f32";
1412 case Type::DoubleTyID:
1413 return "f64";
1414 case Type::PointerTyID: {
1415 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1416 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1417
1418 if (PtrSize == 64)
1419 if (useB4PTR)
1420 return "b64";
1421 else
1422 return "u64";
1423 else if (useB4PTR)
1424 return "b32";
1425 else
1426 return "u32";
1427 }
1428 default:
1429 break;
1430 }
1431 llvm_unreachable("unexpected type");
1432}
1433
1434void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1435 raw_ostream &O,
1436 const NVPTXSubtarget &STI) {
1437 const DataLayout &DL = getDataLayout();
1438
1439 // GlobalVariables are always constant pointers themselves.
1440 Type *ETy = GVar->getValueType();
1441
1442 O << ".";
1443 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1444 if (isManaged(*GVar)) {
1445 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1447 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1448 }
1449 O << " .attribute(.managed)";
1450 }
1451 if (MaybeAlign A = GVar->getAlign())
1452 O << " .align " << A->value();
1453 else
1454 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1455
1456 // Special case for i128
1457 if (ETy->isIntegerTy(128)) {
1458 O << " .b8 ";
1459 getSymbol(GVar)->print(O, MAI);
1460 O << "[16]";
1461 return;
1462 }
1463
1464 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1465 O << " .";
1466 O << getPTXFundamentalTypeStr(ETy);
1467 O << " ";
1468 getSymbol(GVar)->print(O, MAI);
1469 return;
1470 }
1471
1472 int64_t ElementSize = 0;
1473
1474 // Although PTX has direct support for struct type and array type and LLVM IR
1475 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1476 // support these high level field accesses. Structs and arrays are lowered
1477 // into arrays of bytes.
1478 switch (ETy->getTypeID()) {
1479 case Type::StructTyID:
1480 case Type::ArrayTyID:
1482 ElementSize = DL.getTypeStoreSize(ETy);
1483 O << " .b8 ";
1484 getSymbol(GVar)->print(O, MAI);
1485 O << "[";
1486 if (ElementSize) {
1487 O << ElementSize;
1488 }
1489 O << "]";
1490 break;
1491 default:
1492 llvm_unreachable("type not supported yet");
1493 }
1494}
1495
1496void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1497 const DataLayout &DL = getDataLayout();
1498 const AttributeList &PAL = F->getAttributes();
1499 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1500 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1501
1503 unsigned paramIndex = 0;
1504 bool first = true;
1505 bool isKernelFunc = isKernelFunction(*F);
1506 bool isABI = (STI.getSmVersion() >= 20);
1507 bool hasImageHandles = STI.hasImageHandles();
1508
1509 if (F->arg_empty() && !F->isVarArg()) {
1510 O << "()";
1511 return;
1512 }
1513
1514 O << "(\n";
1515
1516 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1517 Type *Ty = I->getType();
1518
1519 if (!first)
1520 O << ",\n";
1521
1522 first = false;
1523
1524 // Handle image/sampler parameters
1525 if (isKernelFunction(*F)) {
1526 if (isSampler(*I) || isImage(*I)) {
1527 if (isImage(*I)) {
1528 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1529 if (hasImageHandles)
1530 O << "\t.param .u64 .ptr .surfref ";
1531 else
1532 O << "\t.param .surfref ";
1533 O << TLI->getParamName(F, paramIndex);
1534 }
1535 else { // Default image is read_only
1536 if (hasImageHandles)
1537 O << "\t.param .u64 .ptr .texref ";
1538 else
1539 O << "\t.param .texref ";
1540 O << TLI->getParamName(F, paramIndex);
1541 }
1542 } else {
1543 if (hasImageHandles)
1544 O << "\t.param .u64 .ptr .samplerref ";
1545 else
1546 O << "\t.param .samplerref ";
1547 O << TLI->getParamName(F, paramIndex);
1548 }
1549 continue;
1550 }
1551 }
1552
1553 auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1554 paramIndex](Type *Ty) -> Align {
1555 if (MaybeAlign StackAlign =
1556 getAlign(*F, paramIndex + AttributeList::FirstArgIndex))
1557 return StackAlign.value();
1558
1559 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1560 MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1561 return std::max(TypeAlign, ParamAlign.valueOrOne());
1562 };
1563
1564 if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1565 if (ShouldPassAsArray(Ty)) {
1566 // Just print .param .align <a> .b8 .param[size];
1567 // <a> = optimal alignment for the element type; always multiple of
1568 // PAL.getParamAlignment
1569 // size = typeallocsize of element type
1570 Align OptimalAlign = getOptimalAlignForParam(Ty);
1571
1572 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1573 O << TLI->getParamName(F, paramIndex);
1574 O << "[" << DL.getTypeAllocSize(Ty) << "]";
1575
1576 continue;
1577 }
1578 // Just a scalar
1579 auto *PTy = dyn_cast<PointerType>(Ty);
1580 unsigned PTySizeInBits = 0;
1581 if (PTy) {
1582 PTySizeInBits =
1583 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1584 assert(PTySizeInBits && "Invalid pointer size");
1585 }
1586
1587 if (isKernelFunc) {
1588 if (PTy) {
1589 // Special handling for pointer arguments to kernel
1590 O << "\t.param .u" << PTySizeInBits << " ";
1591
1592 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1593 NVPTX::CUDA) {
1594 int addrSpace = PTy->getAddressSpace();
1595 switch (addrSpace) {
1596 default:
1597 O << ".ptr ";
1598 break;
1600 O << ".ptr .const ";
1601 break;
1603 O << ".ptr .shared ";
1604 break;
1606 O << ".ptr .global ";
1607 break;
1608 }
1609 Align ParamAlign = I->getParamAlign().valueOrOne();
1610 O << ".align " << ParamAlign.value() << " ";
1611 }
1612 O << TLI->getParamName(F, paramIndex);
1613 continue;
1614 }
1615
1616 // non-pointer scalar to kernel func
1617 O << "\t.param .";
1618 // Special case: predicate operands become .u8 types
1619 if (Ty->isIntegerTy(1))
1620 O << "u8";
1621 else
1622 O << getPTXFundamentalTypeStr(Ty);
1623 O << " ";
1624 O << TLI->getParamName(F, paramIndex);
1625 continue;
1626 }
1627 // Non-kernel function, just print .param .b<size> for ABI
1628 // and .reg .b<size> for non-ABI
1629 unsigned sz = 0;
1630 if (isa<IntegerType>(Ty)) {
1631 sz = cast<IntegerType>(Ty)->getBitWidth();
1633 } else if (PTy) {
1634 assert(PTySizeInBits && "Invalid pointer size");
1635 sz = PTySizeInBits;
1636 } else
1637 sz = Ty->getPrimitiveSizeInBits();
1638 if (isABI)
1639 O << "\t.param .b" << sz << " ";
1640 else
1641 O << "\t.reg .b" << sz << " ";
1642 O << TLI->getParamName(F, paramIndex);
1643 continue;
1644 }
1645
1646 // param has byVal attribute.
1647 Type *ETy = PAL.getParamByValType(paramIndex);
1648 assert(ETy && "Param should have byval type");
1649
1650 if (isABI || isKernelFunc) {
1651 // Just print .param .align <a> .b8 .param[size];
1652 // <a> = optimal alignment for the element type; always multiple of
1653 // PAL.getParamAlignment
1654 // size = typeallocsize of element type
1655 Align OptimalAlign =
1656 isKernelFunc
1657 ? getOptimalAlignForParam(ETy)
1658 : TLI->getFunctionByValParamAlign(
1659 F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL);
1660
1661 unsigned sz = DL.getTypeAllocSize(ETy);
1662 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1663 O << TLI->getParamName(F, paramIndex);
1664 O << "[" << sz << "]";
1665 continue;
1666 } else {
1667 // Split the ETy into constituent parts and
1668 // print .param .b<size> <name> for each part.
1669 // Further, if a part is vector, print the above for
1670 // each vector element.
1671 SmallVector<EVT, 16> vtparts;
1672 ComputeValueVTs(*TLI, DL, ETy, vtparts);
1673 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1674 unsigned elems = 1;
1675 EVT elemtype = vtparts[i];
1676 if (vtparts[i].isVector()) {
1677 elems = vtparts[i].getVectorNumElements();
1678 elemtype = vtparts[i].getVectorElementType();
1679 }
1680
1681 for (unsigned j = 0, je = elems; j != je; ++j) {
1682 unsigned sz = elemtype.getSizeInBits();
1683 if (elemtype.isInteger())
1685 O << "\t.reg .b" << sz << " ";
1686 O << TLI->getParamName(F, paramIndex);
1687 if (j < je - 1)
1688 O << ",\n";
1689 ++paramIndex;
1690 }
1691 if (i < e - 1)
1692 O << ",\n";
1693 }
1694 --paramIndex;
1695 continue;
1696 }
1697 }
1698
1699 if (F->isVarArg()) {
1700 if (!first)
1701 O << ",\n";
1702 O << "\t.param .align " << STI.getMaxRequiredAlignment();
1703 O << " .b8 ";
1704 O << TLI->getParamName(F, /* vararg */ -1) << "[]";
1705 }
1706
1707 O << "\n)";
1708}
1709
1710void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1711 const MachineFunction &MF) {
1712 SmallString<128> Str;
1714
1715 // Map the global virtual register number to a register class specific
1716 // virtual register number starting from 1 with that class.
1718 //unsigned numRegClasses = TRI->getNumRegClasses();
1719
1720 // Emit the Fake Stack Object
1721 const MachineFrameInfo &MFI = MF.getFrameInfo();
1722 int64_t NumBytes = MFI.getStackSize();
1723 if (NumBytes) {
1724 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1725 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1726 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1727 O << "\t.reg .b64 \t%SP;\n";
1728 O << "\t.reg .b64 \t%SPL;\n";
1729 } else {
1730 O << "\t.reg .b32 \t%SP;\n";
1731 O << "\t.reg .b32 \t%SPL;\n";
1732 }
1733 }
1734
1735 // Go through all virtual registers to establish the mapping between the
1736 // global virtual
1737 // register number and the per class virtual register number.
1738 // We use the per class virtual register number in the ptx output.
1739 unsigned int numVRs = MRI->getNumVirtRegs();
1740 for (unsigned i = 0; i < numVRs; i++) {
1742 const TargetRegisterClass *RC = MRI->getRegClass(vr);
1743 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1744 int n = regmap.size();
1745 regmap.insert(std::make_pair(vr, n + 1));
1746 }
1747
1748 // Emit register declarations
1749 // @TODO: Extract out the real register usage
1750 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1751 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1752 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1753 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1754 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1755 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1756 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1757
1758 // Emit declaration of the virtual registers or 'physical' registers for
1759 // each register class
1760 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1761 const TargetRegisterClass *RC = TRI->getRegClass(i);
1762 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1763 std::string rcname = getNVPTXRegClassName(RC);
1764 std::string rcStr = getNVPTXRegClassStr(RC);
1765 int n = regmap.size();
1766
1767 // Only declare those registers that may be used.
1768 if (n) {
1769 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1770 << ">;\n";
1771 }
1772 }
1773
1774 OutStreamer->emitRawText(O.str());
1775}
1776
1777void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1778 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1779 bool ignored;
1780 unsigned int numHex;
1781 const char *lead;
1782
1783 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1784 numHex = 8;
1785 lead = "0f";
1787 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1788 numHex = 16;
1789 lead = "0d";
1791 } else
1792 llvm_unreachable("unsupported fp type");
1793
1794 APInt API = APF.bitcastToAPInt();
1795 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1796}
1797
1798void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1799 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1800 O << CI->getValue();
1801 return;
1802 }
1803 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1804 printFPConstant(CFP, O);
1805 return;
1806 }
1807 if (isa<ConstantPointerNull>(CPV)) {
1808 O << "0";
1809 return;
1810 }
1811 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1812 bool IsNonGenericPointer = false;
1813 if (GVar->getType()->getAddressSpace() != 0) {
1814 IsNonGenericPointer = true;
1815 }
1816 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1817 O << "generic(";
1818 getSymbol(GVar)->print(O, MAI);
1819 O << ")";
1820 } else {
1821 getSymbol(GVar)->print(O, MAI);
1822 }
1823 return;
1824 }
1825 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1826 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1827 printMCExpr(*E, O);
1828 return;
1829 }
1830 llvm_unreachable("Not scalar type found in printScalarConstant()");
1831}
1832
1833void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1834 AggBuffer *AggBuffer) {
1835 const DataLayout &DL = getDataLayout();
1836 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1837 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1838 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1839 // only the space allocated by CPV.
1840 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1841 return;
1842 }
1843
1844 // Helper for filling AggBuffer with APInts.
1845 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1846 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1847 SmallVector<unsigned char, 16> Buf(NumBytes);
1848 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1849 // input's bit width, and i1 arrays may not have a length that is a multuple
1850 // of 8. We handle the last byte separately, so we never request out of
1851 // bounds bits.
1852 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1853 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1854 }
1855 size_t LastBytePosition = (NumBytes - 1) * 8;
1856 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1857 Buf[NumBytes - 1] =
1858 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1859 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1860 };
1861
1862 switch (CPV->getType()->getTypeID()) {
1863 case Type::IntegerTyID:
1864 if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1865 AddIntToBuffer(CI->getValue());
1866 break;
1867 }
1868 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1869 if (const auto *CI =
1870 dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1871 AddIntToBuffer(CI->getValue());
1872 break;
1873 }
1874 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1875 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1876 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1877 AggBuffer->addZeros(AllocSize);
1878 break;
1879 }
1880 }
1881 llvm_unreachable("unsupported integer const type");
1882 break;
1883
1884 case Type::HalfTyID:
1885 case Type::BFloatTyID:
1886 case Type::FloatTyID:
1887 case Type::DoubleTyID:
1888 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1889 break;
1890
1891 case Type::PointerTyID: {
1892 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1893 AggBuffer->addSymbol(GVar, GVar);
1894 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1895 const Value *v = Cexpr->stripPointerCasts();
1896 AggBuffer->addSymbol(v, Cexpr);
1897 }
1898 AggBuffer->addZeros(AllocSize);
1899 break;
1900 }
1901
1902 case Type::ArrayTyID:
1904 case Type::StructTyID: {
1905 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1906 bufferAggregateConstant(CPV, AggBuffer);
1907 if (Bytes > AllocSize)
1908 AggBuffer->addZeros(Bytes - AllocSize);
1909 } else if (isa<ConstantAggregateZero>(CPV))
1910 AggBuffer->addZeros(Bytes);
1911 else
1912 llvm_unreachable("Unexpected Constant type");
1913 break;
1914 }
1915
1916 default:
1917 llvm_unreachable("unsupported type");
1918 }
1919}
1920
1921void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1922 AggBuffer *aggBuffer) {
1923 const DataLayout &DL = getDataLayout();
1924 int Bytes;
1925
1926 // Integers of arbitrary width
1927 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1928 APInt Val = CI->getValue();
1929 for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1930 uint8_t Byte = Val.getLoBits(8).getZExtValue();
1931 aggBuffer->addBytes(&Byte, 1, 1);
1932 Val.lshrInPlace(8);
1933 }
1934 return;
1935 }
1936
1937 // Old constants
1938 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1939 if (CPV->getNumOperands())
1940 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1941 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1942 return;
1943 }
1944
1945 if (const ConstantDataSequential *CDS =
1946 dyn_cast<ConstantDataSequential>(CPV)) {
1947 if (CDS->getNumElements())
1948 for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1949 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1950 aggBuffer);
1951 return;
1952 }
1953
1954 if (isa<ConstantStruct>(CPV)) {
1955 if (CPV->getNumOperands()) {
1956 StructType *ST = cast<StructType>(CPV->getType());
1957 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1958 if (i == (e - 1))
1959 Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1960 DL.getTypeAllocSize(ST) -
1961 DL.getStructLayout(ST)->getElementOffset(i);
1962 else
1963 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1964 DL.getStructLayout(ST)->getElementOffset(i);
1965 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1966 }
1967 }
1968 return;
1969 }
1970 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1971}
1972
1973/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1974/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1975/// expressions that are representable in PTX and create
1976/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1977const MCExpr *
1978NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1979 MCContext &Ctx = OutContext;
1980
1981 if (CV->isNullValue() || isa<UndefValue>(CV))
1982 return MCConstantExpr::create(0, Ctx);
1983
1984 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1985 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1986
1987 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1988 const MCSymbolRefExpr *Expr =
1990 if (ProcessingGeneric) {
1991 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1992 } else {
1993 return Expr;
1994 }
1995 }
1996
1997 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1998 if (!CE) {
1999 llvm_unreachable("Unknown constant value to lower!");
2000 }
2001
2002 switch (CE->getOpcode()) {
2003 default:
2004 break; // Error
2005
2006 case Instruction::AddrSpaceCast: {
2007 // Strip the addrspacecast and pass along the operand
2008 PointerType *DstTy = cast<PointerType>(CE->getType());
2009 if (DstTy->getAddressSpace() == 0)
2010 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2011
2012 break; // Error
2013 }
2014
2015 case Instruction::GetElementPtr: {
2016 const DataLayout &DL = getDataLayout();
2017
2018 // Generate a symbolic expression for the byte address
2019 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2020 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2021
2022 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2023 ProcessingGeneric);
2024 if (!OffsetAI)
2025 return Base;
2026
2027 int64_t Offset = OffsetAI.getSExtValue();
2029 Ctx);
2030 }
2031
2032 case Instruction::Trunc:
2033 // We emit the value and depend on the assembler to truncate the generated
2034 // expression properly. This is important for differences between
2035 // blockaddress labels. Since the two labels are in the same function, it
2036 // is reasonable to treat their delta as a 32-bit value.
2037 [[fallthrough]];
2038 case Instruction::BitCast:
2039 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2040
2041 case Instruction::IntToPtr: {
2042 const DataLayout &DL = getDataLayout();
2043
2044 // Handle casts to pointers by changing them into casts to the appropriate
2045 // integer type. This promotes constant folding and simplifies this code.
2046 Constant *Op = CE->getOperand(0);
2047 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2048 /*IsSigned*/ false, DL);
2049 if (Op)
2050 return lowerConstantForGV(Op, ProcessingGeneric);
2051
2052 break; // Error
2053 }
2054
2055 case Instruction::PtrToInt: {
2056 const DataLayout &DL = getDataLayout();
2057
2058 // Support only foldable casts to/from pointers that can be eliminated by
2059 // changing the pointer to the appropriately sized integer type.
2060 Constant *Op = CE->getOperand(0);
2061 Type *Ty = CE->getType();
2062
2063 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2064
2065 // We can emit the pointer value into this slot if the slot is an
2066 // integer slot equal to the size of the pointer.
2067 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2068 return OpExpr;
2069
2070 // Otherwise the pointer is smaller than the resultant integer, mask off
2071 // the high bits so we are sure to get a proper truncation if the input is
2072 // a constant expr.
2073 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2074 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2075 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2076 }
2077
2078 // The MC library also has a right-shift operator, but it isn't consistently
2079 // signed or unsigned between different targets.
2080 case Instruction::Add: {
2081 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2082 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2083 switch (CE->getOpcode()) {
2084 default: llvm_unreachable("Unknown binary operator constant cast expr");
2085 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2086 }
2087 }
2088 }
2089
2090 // If the code isn't optimized, there may be outstanding folding
2091 // opportunities. Attempt to fold the expression using DataLayout as a
2092 // last resort before giving up.
2094 if (C != CE)
2095 return lowerConstantForGV(C, ProcessingGeneric);
2096
2097 // Otherwise report the problem to the user.
2098 std::string S;
2100 OS << "Unsupported expression in static initializer: ";
2101 CE->printAsOperand(OS, /*PrintType=*/false,
2102 !MF ? nullptr : MF->getFunction().getParent());
2103 report_fatal_error(Twine(OS.str()));
2104}
2105
2106// Copy of MCExpr::print customized for NVPTX
2107void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2108 switch (Expr.getKind()) {
2109 case MCExpr::Target:
2110 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2111 case MCExpr::Constant:
2112 OS << cast<MCConstantExpr>(Expr).getValue();
2113 return;
2114
2115 case MCExpr::SymbolRef: {
2116 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2117 const MCSymbol &Sym = SRE.getSymbol();
2118 Sym.print(OS, MAI);
2119 return;
2120 }
2121
2122 case MCExpr::Unary: {
2123 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2124 switch (UE.getOpcode()) {
2125 case MCUnaryExpr::LNot: OS << '!'; break;
2126 case MCUnaryExpr::Minus: OS << '-'; break;
2127 case MCUnaryExpr::Not: OS << '~'; break;
2128 case MCUnaryExpr::Plus: OS << '+'; break;
2129 }
2130 printMCExpr(*UE.getSubExpr(), OS);
2131 return;
2132 }
2133
2134 case MCExpr::Binary: {
2135 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2136
2137 // Only print parens around the LHS if it is non-trivial.
2138 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2139 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2140 printMCExpr(*BE.getLHS(), OS);
2141 } else {
2142 OS << '(';
2143 printMCExpr(*BE.getLHS(), OS);
2144 OS<< ')';
2145 }
2146
2147 switch (BE.getOpcode()) {
2148 case MCBinaryExpr::Add:
2149 // Print "X-42" instead of "X+-42".
2150 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2151 if (RHSC->getValue() < 0) {
2152 OS << RHSC->getValue();
2153 return;
2154 }
2155 }
2156
2157 OS << '+';
2158 break;
2159 default: llvm_unreachable("Unhandled binary operator");
2160 }
2161
2162 // Only print parens around the LHS if it is non-trivial.
2163 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2164 printMCExpr(*BE.getRHS(), OS);
2165 } else {
2166 OS << '(';
2167 printMCExpr(*BE.getRHS(), OS);
2168 OS << ')';
2169 }
2170 return;
2171 }
2172 }
2173
2174 llvm_unreachable("Invalid expression kind!");
2175}
2176
2177/// PrintAsmOperand - Print out an operand for an inline asm expression.
2178///
2179bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2180 const char *ExtraCode, raw_ostream &O) {
2181 if (ExtraCode && ExtraCode[0]) {
2182 if (ExtraCode[1] != 0)
2183 return true; // Unknown modifier.
2184
2185 switch (ExtraCode[0]) {
2186 default:
2187 // See if this is a generic print operand
2188 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2189 case 'r':
2190 break;
2191 }
2192 }
2193
2194 printOperand(MI, OpNo, O);
2195
2196 return false;
2197}
2198
2199bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2200 unsigned OpNo,
2201 const char *ExtraCode,
2202 raw_ostream &O) {
2203 if (ExtraCode && ExtraCode[0])
2204 return true; // Unknown modifier
2205
2206 O << '[';
2207 printMemOperand(MI, OpNo, O);
2208 O << ']';
2209
2210 return false;
2211}
2212
2213void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
2214 raw_ostream &O) {
2215 const MachineOperand &MO = MI->getOperand(OpNum);
2216 switch (MO.getType()) {
2218 if (MO.getReg().isPhysical()) {
2219 if (MO.getReg() == NVPTX::VRDepot)
2221 else
2223 } else {
2224 emitVirtualRegister(MO.getReg(), O);
2225 }
2226 break;
2227
2229 O << MO.getImm();
2230 break;
2231
2233 printFPConstant(MO.getFPImm(), O);
2234 break;
2235
2237 PrintSymbolOperand(MO, O);
2238 break;
2239
2241 MO.getMBB()->getSymbol()->print(O, MAI);
2242 break;
2243
2244 default:
2245 llvm_unreachable("Operand type not supported.");
2246 }
2247}
2248
2249void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
2250 raw_ostream &O, const char *Modifier) {
2251 printOperand(MI, OpNum, O);
2252
2253 if (Modifier && strcmp(Modifier, "add") == 0) {
2254 O << ", ";
2255 printOperand(MI, OpNum + 1, O);
2256 } else {
2257 if (MI->getOperand(OpNum + 1).isImm() &&
2258 MI->getOperand(OpNum + 1).getImm() == 0)
2259 return; // don't print ',0' or '+0'
2260 O << "+";
2261 printOperand(MI, OpNum + 1, O);
2262 }
2263}
2264
2265// Force static initialization.
2269}
static cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
static bool isEmptyXXStructor(GlobalVariable *GV)
#define DEPOTNAME
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
static bool usedInGlobalVarDef(const Constant *C)
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
static cl::opt< bool > LowerCtorDtor("nvptx-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(false), cl::Hidden)
static bool ShouldPassAsArray(Type *Ty)
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const char * name
Definition: SMEABIPass.cpp:50
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5337
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:676
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:403
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:89
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:92
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:104
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition: AsmPrinter.h:437
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:434
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:379
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:123
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:96
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCSymbol * GetExternalSymbolSymbol(Twine Sym) const
Return the MCSymbol for the specified ExternalSymbol.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:387
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:101
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:387
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:407
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:398
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
ConstantArray - Constant Array Declarations.
Definition: Constants.h:424
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:584
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1097
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValueAPF() const
Definition: Constants.h:312
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This is an important base class in LLVM.
Definition: Constant.h:42
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Subprogram description.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:146
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1837
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:588
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:118
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:110
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:515
bool hasExternalLinkage() const
Definition: GlobalValue.h:511
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:290
bool hasLocalLinkage() const
Definition: GlobalValue.h:528
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
unsigned getAddressSpace() const
Definition: GlobalValue.h:205
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
bool hasWeakLinkage() const
Definition: GlobalValue.h:522
bool hasCommonLinkage() const
Definition: GlobalValue.h:532
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:512
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:546
Binary assembler expressions.
Definition: MCExpr.h:488
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:635
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:638
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:537
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:532
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:632
@ Add
Addition.
Definition: MCExpr.h:491
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:193
Context object for machine code objects.
Definition: MCContext.h:83
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:213
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
@ Unary
Unary expressions.
Definition: MCExpr.h:40
@ Constant
Constant expressions.
Definition: MCExpr.h:38
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:39
@ Target
Target specific expression.
Definition: MCExpr.h:41
@ Binary
Binary expressions.
Definition: MCExpr.h:37
ExprKind getKind() const
Definition: MCExpr.h:78
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:188
const MCSymbol & getSymbol() const
Definition: MCExpr.h:406
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
Unary assembler expressions.
Definition: MCExpr.h:432
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:475
@ Minus
Unary minus.
Definition: MCExpr.h:436
@ Plus
Unary plus.
Definition: MCExpr.h:438
@ Not
Bitwise negation.
Definition: MCExpr.h:437
@ LNot
Logical negation.
Definition: MCExpr.h:435
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:478
Metadata node.
Definition: Metadata.h:1069
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:59
static const char * getRegisterName(MCRegister Reg)
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
bool hasImageHandles() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
UniqueStringSaver & getStrPool() const
Implments NVPTX-specific streamer.
void closeLastSection()
Close last section.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:679
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Class to represent struct types.
Definition: DerivedTypes.h:216
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getPointerSizeInBits(unsigned AS) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition: Type.h:74
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ VoidTyID
type with no size
Definition: Type.h:63
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ StructTyID
Structures.
Definition: Type.h:73
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:70
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:75
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition: Type.h:57
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ PointerTyID
Pointers.
Definition: Type.h:72
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:291
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:239
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
StringRef save(const char *S)
Definition: StringSaver.h:52
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool erase(const ValueT &V)
Definition: DenseSet.h:101
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ NVCL
Definition: NVPTX.h:75
@ CUDA
Definition: NVPTX.h:76
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
uint64_t read64le(const void *P)
Definition: Endian.h:428
uint32_t read32le(const void *P)
Definition: Endian.h:425
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
std::optional< unsigned > getMaxNTIDy(const Function &F)
std::string getSamplerName(const Value &val)
AddressSpace
Definition: NVPTXBaseInfo.h:21
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
bool getMinCTASm(const Function &F, unsigned &x)
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isImage(const Value &val)
std::optional< unsigned > getMaxNTIDz(const Function &F)
MaybeAlign getAlign(const Function &F, unsigned Index)
std::optional< unsigned > getMaxNTIDx(const Function &F)
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
bool isManaged(const Value &val)
unsigned promoteScalarArgumentSize(unsigned size)
bool isSurface(const Value &val)
void clearAnnotationCache(const Module *Mod)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
std::string getSurfaceName(const Value &val)
std::optional< unsigned > getReqNTIDy(const Function &F)
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:200
bool getMaxNReg(const Function &F, unsigned &x)
bool isTexture(const Value &val)
bool isImageWriteOnly(const Value &val)
bool isImageReadWrite(const Value &val)
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
std::string getTextureName(const Value &val)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
std::optional< unsigned > getReqNTIDz(const Function &F)
std::optional< unsigned > getReqNTIDx(const Function &F)
bool isSampler(const Value &val)
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool getMaxClusterRank(const Function &F, unsigned &x)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Target & getTheNVPTXTarget32()
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:281
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:282
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...