LLVM 18.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/Twine.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/BasicBlock.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/Constants.h"
54#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/DebugLoc.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/GlobalValue.h"
62#include "llvm/IR/Instruction.h"
63#include "llvm/IR/LLVMContext.h"
64#include "llvm/IR/Module.h"
65#include "llvm/IR/Operator.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/User.h"
68#include "llvm/MC/MCExpr.h"
69#include "llvm/MC/MCInst.h"
70#include "llvm/MC/MCInstrDesc.h"
71#include "llvm/MC/MCStreamer.h"
72#include "llvm/MC/MCSymbol.h"
76#include "llvm/Support/Endian.h"
79#include "llvm/Support/Path.h"
85#include <cassert>
86#include <cstdint>
87#include <cstring>
88#include <new>
89#include <string>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94
95static cl::opt<bool>
96 LowerCtorDtor("nvptx-lower-global-ctor-dtor",
97 cl::desc("Lower GPU ctor / dtors to globals on the device."),
98 cl::init(false), cl::Hidden);
99
100#define DEPOTNAME "__local_depot"
101
102/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
103/// depends.
104static void
107 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
108 Globals.insert(GV);
109 else {
110 if (const User *U = dyn_cast<User>(V)) {
111 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
112 DiscoverDependentGlobals(U->getOperand(i), Globals);
113 }
114 }
115 }
116}
117
118/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
119/// instances to be emitted, but only after any dependents have been added
120/// first.s
121static void
126 // Have we already visited this one?
127 if (Visited.count(GV))
128 return;
129
130 // Do we have a circular dependency?
131 if (!Visiting.insert(GV).second)
132 report_fatal_error("Circular dependency found in global variable set");
133
134 // Make sure we visit all dependents first
136 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
137 DiscoverDependentGlobals(GV->getOperand(i), Others);
138
139 for (const GlobalVariable *GV : Others)
140 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
141
142 // Now we can visit ourself
143 Order.push_back(GV);
144 Visited.insert(GV);
145 Visiting.erase(GV);
146}
147
148void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
149 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
150 getSubtargetInfo().getFeatureBits());
151
152 MCInst Inst;
153 lowerToMCInst(MI, Inst);
155}
156
157// Handle symbol backtracking for targets that do not support image handles
158bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
159 unsigned OpNo, MCOperand &MCOp) {
160 const MachineOperand &MO = MI->getOperand(OpNo);
161 const MCInstrDesc &MCID = MI->getDesc();
162
163 if (MCID.TSFlags & NVPTXII::IsTexFlag) {
164 // This is a texture fetch, so operand 4 is a texref and operand 5 is
165 // a samplerref
166 if (OpNo == 4 && MO.isImm()) {
167 lowerImageHandleSymbol(MO.getImm(), MCOp);
168 return true;
169 }
170 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
171 lowerImageHandleSymbol(MO.getImm(), MCOp);
172 return true;
173 }
174
175 return false;
176 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
177 unsigned VecSize =
178 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
179
180 // For a surface load of vector size N, the Nth operand will be the surfref
181 if (OpNo == VecSize && MO.isImm()) {
182 lowerImageHandleSymbol(MO.getImm(), MCOp);
183 return true;
184 }
185
186 return false;
187 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
188 // This is a surface store, so operand 0 is a surfref
189 if (OpNo == 0 && MO.isImm()) {
190 lowerImageHandleSymbol(MO.getImm(), MCOp);
191 return true;
192 }
193
194 return false;
195 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
196 // This is a query, so operand 1 is a surfref/texref
197 if (OpNo == 1 && MO.isImm()) {
198 lowerImageHandleSymbol(MO.getImm(), MCOp);
199 return true;
200 }
201
202 return false;
203 }
204
205 return false;
206}
207
208void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
209 // Ewwww
211 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
213 const char *Sym = MFI->getImageHandleSymbol(Index);
214 StringRef SymName = nvTM.getStrPool().save(Sym);
215 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
216}
217
218void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
219 OutMI.setOpcode(MI->getOpcode());
220 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
221 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
222 const MachineOperand &MO = MI->getOperand(0);
223 OutMI.addOperand(GetSymbolRef(
225 return;
226 }
227
228 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
229 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
230 const MachineOperand &MO = MI->getOperand(i);
231
232 MCOperand MCOp;
233 if (!STI.hasImageHandles()) {
234 if (lowerImageHandleOperand(MI, i, MCOp)) {
235 OutMI.addOperand(MCOp);
236 continue;
237 }
238 }
239
240 if (lowerOperand(MO, MCOp))
241 OutMI.addOperand(MCOp);
242 }
243}
244
245bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
246 MCOperand &MCOp) {
247 switch (MO.getType()) {
248 default: llvm_unreachable("unknown operand type");
250 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
251 break;
253 MCOp = MCOperand::createImm(MO.getImm());
254 break;
257 MO.getMBB()->getSymbol(), OutContext));
258 break;
260 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
261 break;
263 MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
264 break;
266 const ConstantFP *Cnt = MO.getFPImm();
267 const APFloat &Val = Cnt->getValueAPF();
268
269 switch (Cnt->getType()->getTypeID()) {
270 default: report_fatal_error("Unsupported FP type"); break;
271 case Type::HalfTyID:
274 break;
275 case Type::BFloatTyID:
278 break;
279 case Type::FloatTyID:
282 break;
283 case Type::DoubleTyID:
286 break;
287 }
288 break;
289 }
290 }
291 return true;
292}
293
294unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
296 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
297
298 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
299 unsigned RegNum = RegMap[Reg];
300
301 // Encode the register class in the upper 4 bits
302 // Must be kept in sync with NVPTXInstPrinter::printRegName
303 unsigned Ret = 0;
304 if (RC == &NVPTX::Int1RegsRegClass) {
305 Ret = (1 << 28);
306 } else if (RC == &NVPTX::Int16RegsRegClass) {
307 Ret = (2 << 28);
308 } else if (RC == &NVPTX::Int32RegsRegClass) {
309 Ret = (3 << 28);
310 } else if (RC == &NVPTX::Int64RegsRegClass) {
311 Ret = (4 << 28);
312 } else if (RC == &NVPTX::Float32RegsRegClass) {
313 Ret = (5 << 28);
314 } else if (RC == &NVPTX::Float64RegsRegClass) {
315 Ret = (6 << 28);
316 } else {
317 report_fatal_error("Bad register class");
318 }
319
320 // Insert the vreg number
321 Ret |= (RegNum & 0x0FFFFFFF);
322 return Ret;
323 } else {
324 // Some special-use registers are actually physical registers.
325 // Encode this as the register class ID of 0 and the real register ID.
326 return Reg & 0x0FFFFFFF;
327 }
328}
329
330MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
331 const MCExpr *Expr;
333 OutContext);
334 return MCOperand::createExpr(Expr);
335}
336
337static bool ShouldPassAsArray(Type *Ty) {
338 return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
339 Ty->isHalfTy() || Ty->isBFloatTy();
340}
341
342void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
343 const DataLayout &DL = getDataLayout();
345 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
346
347 Type *Ty = F->getReturnType();
348
349 bool isABI = (STI.getSmVersion() >= 20);
350
351 if (Ty->getTypeID() == Type::VoidTyID)
352 return;
353 O << " (";
354
355 if (isABI) {
356 if ((Ty->isFloatingPointTy() || Ty->isIntegerTy()) &&
357 !ShouldPassAsArray(Ty)) {
358 unsigned size = 0;
359 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
360 size = ITy->getBitWidth();
361 } else {
362 assert(Ty->isFloatingPointTy() && "Floating point type expected here");
364 }
366 O << ".param .b" << size << " func_retval0";
367 } else if (isa<PointerType>(Ty)) {
368 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
369 << " func_retval0";
370 } else if (ShouldPassAsArray(Ty)) {
371 unsigned totalsz = DL.getTypeAllocSize(Ty);
372 unsigned retAlignment = 0;
373 if (!getAlign(*F, 0, retAlignment))
374 retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
375 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
376 << "]";
377 } else
378 llvm_unreachable("Unknown return type");
379 } else {
380 SmallVector<EVT, 16> vtparts;
381 ComputeValueVTs(*TLI, DL, Ty, vtparts);
382 unsigned idx = 0;
383 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
384 unsigned elems = 1;
385 EVT elemtype = vtparts[i];
386 if (vtparts[i].isVector()) {
387 elems = vtparts[i].getVectorNumElements();
388 elemtype = vtparts[i].getVectorElementType();
389 }
390
391 for (unsigned j = 0, je = elems; j != je; ++j) {
392 unsigned sz = elemtype.getSizeInBits();
393 if (elemtype.isInteger())
395 O << ".reg .b" << sz << " func_retval" << idx;
396 if (j < je - 1)
397 O << ", ";
398 ++idx;
399 }
400 if (i < e - 1)
401 O << ", ";
402 }
403 }
404 O << ") ";
405}
406
407void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
408 raw_ostream &O) {
409 const Function &F = MF.getFunction();
410 printReturnValStr(&F, O);
411}
412
413// Return true if MBB is the header of a loop marked with
414// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
415bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
416 const MachineBasicBlock &MBB) const {
417 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
418 // We insert .pragma "nounroll" only to the loop header.
419 if (!LI.isLoopHeader(&MBB))
420 return false;
421
422 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
423 // we iterate through each back edge of the loop with header MBB, and check
424 // whether its metadata contains llvm.loop.unroll.disable.
425 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
426 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
427 // Edges from other loops to MBB are not back edges.
428 continue;
429 }
430 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
431 if (MDNode *LoopID =
432 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
433 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
434 return true;
435 if (MDNode *UnrollCountMD =
436 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
437 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
438 ->isOne())
439 return true;
440 }
441 }
442 }
443 }
444 return false;
445}
446
447void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
449 if (isLoopHeaderOfNoUnroll(MBB))
450 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
451}
452
453void NVPTXAsmPrinter::emitFunctionEntryLabel() {
456
457 if (!GlobalsEmitted) {
458 emitGlobals(*MF->getFunction().getParent());
459 GlobalsEmitted = true;
460 }
461
462 // Set up
463 MRI = &MF->getRegInfo();
464 F = &MF->getFunction();
465 emitLinkageDirective(F, O);
466 if (isKernelFunction(*F))
467 O << ".entry ";
468 else {
469 O << ".func ";
470 printReturnValStr(*MF, O);
471 }
472
474
475 emitFunctionParamList(F, O);
476 O << "\n";
477
478 if (isKernelFunction(*F))
479 emitKernelFunctionDirectives(*F, O);
480
482 O << ".noreturn";
483
484 OutStreamer->emitRawText(O.str());
485
486 VRegMapping.clear();
487 // Emit open brace for function body.
488 OutStreamer->emitRawText(StringRef("{\n"));
489 setAndEmitFunctionVirtualRegisters(*MF);
490 // Emit initial .loc debug directive for correct relocation symbol data.
491 if (MMI && MMI->hasDebugInfo())
493}
494
496 bool Result = AsmPrinter::runOnMachineFunction(F);
497 // Emit closing brace for the body of function F.
498 // The closing brace must be emitted here because we need to emit additional
499 // debug labels/data after the last basic block.
500 // We need to emit the closing brace here because we don't have function that
501 // finished emission of the function body.
502 OutStreamer->emitRawText(StringRef("}\n"));
503 return Result;
504}
505
506void NVPTXAsmPrinter::emitFunctionBodyStart() {
508 raw_svector_ostream O(Str);
509 emitDemotedVars(&MF->getFunction(), O);
510 OutStreamer->emitRawText(O.str());
511}
512
513void NVPTXAsmPrinter::emitFunctionBodyEnd() {
514 VRegMapping.clear();
515}
516
520 return OutContext.getOrCreateSymbol(Str);
521}
522
523void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
524 Register RegNo = MI->getOperand(0).getReg();
525 if (RegNo.isVirtual()) {
526 OutStreamer->AddComment(Twine("implicit-def: ") +
528 } else {
529 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
530 OutStreamer->AddComment(Twine("implicit-def: ") +
531 STI.getRegisterInfo()->getName(RegNo));
532 }
533 OutStreamer->addBlankLine();
534}
535
536void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
537 raw_ostream &O) const {
538 // If the NVVM IR has some of reqntid* specified, then output
539 // the reqntid directive, and set the unspecified ones to 1.
540 // If none of Reqntid* is specified, don't output reqntid directive.
541 unsigned Reqntidx, Reqntidy, Reqntidz;
542 Reqntidx = Reqntidy = Reqntidz = 1;
543 bool ReqSpecified = false;
544 ReqSpecified |= getReqNTIDx(F, Reqntidx);
545 ReqSpecified |= getReqNTIDy(F, Reqntidy);
546 ReqSpecified |= getReqNTIDz(F, Reqntidz);
547
548 if (ReqSpecified)
549 O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
550 << "\n";
551
552 // If the NVVM IR has some of maxntid* specified, then output
553 // the maxntid directive, and set the unspecified ones to 1.
554 // If none of maxntid* is specified, don't output maxntid directive.
555 unsigned Maxntidx, Maxntidy, Maxntidz;
556 Maxntidx = Maxntidy = Maxntidz = 1;
557 bool MaxSpecified = false;
558 MaxSpecified |= getMaxNTIDx(F, Maxntidx);
559 MaxSpecified |= getMaxNTIDy(F, Maxntidy);
560 MaxSpecified |= getMaxNTIDz(F, Maxntidz);
561
562 if (MaxSpecified)
563 O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
564 << "\n";
565
566 unsigned Mincta = 0;
567 if (getMinCTASm(F, Mincta))
568 O << ".minnctapersm " << Mincta << "\n";
569
570 unsigned Maxnreg = 0;
571 if (getMaxNReg(F, Maxnreg))
572 O << ".maxnreg " << Maxnreg << "\n";
573
574 // .maxclusterrank directive requires SM_90 or higher, make sure that we
575 // filter it out for lower SM versions, as it causes a hard ptxas crash.
576 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
577 const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
578 unsigned Maxclusterrank = 0;
579 if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90)
580 O << ".maxclusterrank " << Maxclusterrank << "\n";
581}
582
583std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
584 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
585
586 std::string Name;
587 raw_string_ostream NameStr(Name);
588
589 VRegRCMap::const_iterator I = VRegMapping.find(RC);
590 assert(I != VRegMapping.end() && "Bad register class");
591 const DenseMap<unsigned, unsigned> &RegMap = I->second;
592
593 VRegMap::const_iterator VI = RegMap.find(Reg);
594 assert(VI != RegMap.end() && "Bad virtual register");
595 unsigned MappedVR = VI->second;
596
597 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
598
599 NameStr.flush();
600 return Name;
601}
602
603void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
604 raw_ostream &O) {
605 O << getVirtualRegisterName(vr);
606}
607
608void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
609 emitLinkageDirective(F, O);
610 if (isKernelFunction(*F))
611 O << ".entry ";
612 else
613 O << ".func ";
614 printReturnValStr(F, O);
615 getSymbol(F)->print(O, MAI);
616 O << "\n";
617 emitFunctionParamList(F, O);
618 O << "\n";
620 O << ".noreturn";
621 O << ";\n";
622}
623
624static bool usedInGlobalVarDef(const Constant *C) {
625 if (!C)
626 return false;
627
628 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
629 return GV->getName() != "llvm.used";
630 }
631
632 for (const User *U : C->users())
633 if (const Constant *C = dyn_cast<Constant>(U))
635 return true;
636
637 return false;
638}
639
640static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
641 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
642 if (othergv->getName() == "llvm.used")
643 return true;
644 }
645
646 if (const Instruction *instr = dyn_cast<Instruction>(U)) {
647 if (instr->getParent() && instr->getParent()->getParent()) {
648 const Function *curFunc = instr->getParent()->getParent();
649 if (oneFunc && (curFunc != oneFunc))
650 return false;
651 oneFunc = curFunc;
652 return true;
653 } else
654 return false;
655 }
656
657 for (const User *UU : U->users())
658 if (!usedInOneFunc(UU, oneFunc))
659 return false;
660
661 return true;
662}
663
664/* Find out if a global variable can be demoted to local scope.
665 * Currently, this is valid for CUDA shared variables, which have local
666 * scope and global lifetime. So the conditions to check are :
667 * 1. Is the global variable in shared address space?
668 * 2. Does it have local linkage?
669 * 3. Is the global variable referenced only in one function?
670 */
671static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
672 if (!gv->hasLocalLinkage())
673 return false;
674 PointerType *Pty = gv->getType();
675 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
676 return false;
677
678 const Function *oneFunc = nullptr;
679
680 bool flag = usedInOneFunc(gv, oneFunc);
681 if (!flag)
682 return false;
683 if (!oneFunc)
684 return false;
685 f = oneFunc;
686 return true;
687}
688
689static bool useFuncSeen(const Constant *C,
691 for (const User *U : C->users()) {
692 if (const Constant *cu = dyn_cast<Constant>(U)) {
693 if (useFuncSeen(cu, seenMap))
694 return true;
695 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
696 const BasicBlock *bb = I->getParent();
697 if (!bb)
698 continue;
699 const Function *caller = bb->getParent();
700 if (!caller)
701 continue;
702 if (seenMap.contains(caller))
703 return true;
704 }
705 }
706 return false;
707}
708
709void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
711 for (const Function &F : M) {
712 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
713 emitDeclaration(&F, O);
714 continue;
715 }
716
717 if (F.isDeclaration()) {
718 if (F.use_empty())
719 continue;
720 if (F.getIntrinsicID())
721 continue;
722 emitDeclaration(&F, O);
723 continue;
724 }
725 for (const User *U : F.users()) {
726 if (const Constant *C = dyn_cast<Constant>(U)) {
727 if (usedInGlobalVarDef(C)) {
728 // The use is in the initialization of a global variable
729 // that is a function pointer, so print a declaration
730 // for the original function
731 emitDeclaration(&F, O);
732 break;
733 }
734 // Emit a declaration of this function if the function that
735 // uses this constant expr has already been seen.
736 if (useFuncSeen(C, seenMap)) {
737 emitDeclaration(&F, O);
738 break;
739 }
740 }
741
742 if (!isa<Instruction>(U))
743 continue;
744 const Instruction *instr = cast<Instruction>(U);
745 const BasicBlock *bb = instr->getParent();
746 if (!bb)
747 continue;
748 const Function *caller = bb->getParent();
749 if (!caller)
750 continue;
751
752 // If a caller has already been seen, then the caller is
753 // appearing in the module before the callee. so print out
754 // a declaration for the callee.
755 if (seenMap.contains(caller)) {
756 emitDeclaration(&F, O);
757 break;
758 }
759 }
760 seenMap[&F] = true;
761 }
762}
763
765 if (!GV) return true;
766 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
767 if (!InitList) return true; // Not an array; we don't know how to parse.
768 return InitList->getNumOperands() == 0;
769}
770
771void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
772 // Construct a default subtarget off of the TargetMachine defaults. The
773 // rest of NVPTX isn't friendly to change subtargets per function and
774 // so the default TargetMachine will have all of the options.
775 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
776 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
777 SmallString<128> Str1;
778 raw_svector_ostream OS1(Str1);
779
780 // Emit header before any dwarf directives are emitted below.
781 emitHeader(M, OS1, *STI);
782 OutStreamer->emitRawText(OS1.str());
783}
784
786 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
787 const NVPTXSubtarget &STI =
788 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
789 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
790 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
791
792 // OpenMP supports NVPTX global constructors and destructors.
793 bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;
794
795 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
796 !LowerCtorDtor && !IsOpenMP) {
798 "Module has a nontrivial global ctor, which NVPTX does not support.");
799 return true; // error
800 }
801 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
802 !LowerCtorDtor && !IsOpenMP) {
804 "Module has a nontrivial global dtor, which NVPTX does not support.");
805 return true; // error
806 }
807
808 // We need to call the parent's one explicitly.
809 bool Result = AsmPrinter::doInitialization(M);
810
811 GlobalsEmitted = false;
812
813 return Result;
814}
815
816void NVPTXAsmPrinter::emitGlobals(const Module &M) {
817 SmallString<128> Str2;
818 raw_svector_ostream OS2(Str2);
819
820 emitDeclarations(M, OS2);
821
822 // As ptxas does not support forward references of globals, we need to first
823 // sort the list of module-level globals in def-use order. We visit each
824 // global variable in order, and ensure that we emit it *after* its dependent
825 // globals. We use a little extra memory maintaining both a set and a list to
826 // have fast searches while maintaining a strict ordering.
830
831 // Visit each global variable, in order
832 for (const GlobalVariable &I : M.globals())
833 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
834
835 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
836 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
837
838 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
839 const NVPTXSubtarget &STI =
840 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
841
842 // Print out module-level global variables in proper order
843 for (unsigned i = 0, e = Globals.size(); i != e; ++i)
844 printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
845
846 OS2 << '\n';
847
848 OutStreamer->emitRawText(OS2.str());
849}
850
851void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
854
855 MCSymbol *Name = getSymbol(&GA);
856 const Function *F = dyn_cast<Function>(GA.getAliasee());
857 if (!F || isKernelFunction(*F))
858 report_fatal_error("NVPTX aliasee must be a non-kernel function");
859
860 if (GA.hasLinkOnceLinkage() || GA.hasWeakLinkage() ||
862 report_fatal_error("NVPTX aliasee must not be '.weak'");
863
864 OS << "\n";
865 emitLinkageDirective(F, OS);
866 OS << ".func ";
867 printReturnValStr(F, OS);
868 OS << Name->getName();
869 emitFunctionParamList(F, OS);
871 OS << "\n.noreturn";
872 OS << ";\n";
873
874 OS << ".alias " << Name->getName() << ", " << F->getName() << ";\n";
875
876 OutStreamer->emitRawText(OS.str());
877}
878
879void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
880 const NVPTXSubtarget &STI) {
881 O << "//\n";
882 O << "// Generated by LLVM NVPTX Back-End\n";
883 O << "//\n";
884 O << "\n";
885
886 unsigned PTXVersion = STI.getPTXVersion();
887 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
888
889 O << ".target ";
890 O << STI.getTargetName();
891
892 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
893 if (NTM.getDrvInterface() == NVPTX::NVCL)
894 O << ", texmode_independent";
895
896 bool HasFullDebugInfo = false;
897 for (DICompileUnit *CU : M.debug_compile_units()) {
898 switch(CU->getEmissionKind()) {
901 break;
904 HasFullDebugInfo = true;
905 break;
906 }
907 if (HasFullDebugInfo)
908 break;
909 }
910 if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
911 O << ", debug";
912
913 O << "\n";
914
915 O << ".address_size ";
916 if (NTM.is64Bit())
917 O << "64";
918 else
919 O << "32";
920 O << "\n";
921
922 O << "\n";
923}
924
926 bool HasDebugInfo = MMI && MMI->hasDebugInfo();
927
928 // If we did not emit any functions, then the global declarations have not
929 // yet been emitted.
930 if (!GlobalsEmitted) {
931 emitGlobals(M);
932 GlobalsEmitted = true;
933 }
934
935 // If we have any aliases we emit them at the end.
936 SmallVector<GlobalAlias *> AliasesToRemove;
937 for (GlobalAlias &Alias : M.aliases()) {
938 emitGlobalAlias(M, Alias);
939 AliasesToRemove.push_back(&Alias);
940 }
941
942 for (GlobalAlias *A : AliasesToRemove)
943 A->eraseFromParent();
944
945 // call doFinalization
946 bool ret = AsmPrinter::doFinalization(M);
947
949
950 auto *TS =
951 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
952 // Close the last emitted section
953 if (HasDebugInfo) {
954 TS->closeLastSection();
955 // Emit empty .debug_loc section for better support of the empty files.
956 OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
957 }
958
959 // Output last DWARF .file directives, if any.
960 TS->outputDwarfFileDirectives();
961
962 return ret;
963}
964
965// This function emits appropriate linkage directives for
966// functions and global variables.
967//
968// extern function declaration -> .extern
969// extern function definition -> .visible
970// external global variable with init -> .visible
971// external without init -> .extern
972// appending -> not allowed, assert.
973// for any linkage other than
974// internal, private, linker_private,
975// linker_private_weak, linker_private_weak_def_auto,
976// we emit -> .weak.
977
978void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
979 raw_ostream &O) {
980 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
981 if (V->hasExternalLinkage()) {
982 if (isa<GlobalVariable>(V)) {
983 const GlobalVariable *GVar = cast<GlobalVariable>(V);
984 if (GVar) {
985 if (GVar->hasInitializer())
986 O << ".visible ";
987 else
988 O << ".extern ";
989 }
990 } else if (V->isDeclaration())
991 O << ".extern ";
992 else
993 O << ".visible ";
994 } else if (V->hasAppendingLinkage()) {
995 std::string msg;
996 msg.append("Error: ");
997 msg.append("Symbol ");
998 if (V->hasName())
999 msg.append(std::string(V->getName()));
1000 msg.append("has unsupported appending linkage type");
1001 llvm_unreachable(msg.c_str());
1002 } else if (!V->hasInternalLinkage() &&
1003 !V->hasPrivateLinkage()) {
1004 O << ".weak ";
1005 }
1006 }
1007}
1008
1009void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1010 raw_ostream &O, bool processDemoted,
1011 const NVPTXSubtarget &STI) {
1012 // Skip meta data
1013 if (GVar->hasSection()) {
1014 if (GVar->getSection() == "llvm.metadata")
1015 return;
1016 }
1017
1018 // Skip LLVM intrinsic global variables
1019 if (GVar->getName().startswith("llvm.") ||
1020 GVar->getName().startswith("nvvm."))
1021 return;
1022
1023 const DataLayout &DL = getDataLayout();
1024
1025 // GlobalVariables are always constant pointers themselves.
1026 PointerType *PTy = GVar->getType();
1027 Type *ETy = GVar->getValueType();
1028
1029 if (GVar->hasExternalLinkage()) {
1030 if (GVar->hasInitializer())
1031 O << ".visible ";
1032 else
1033 O << ".extern ";
1034 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1036 GVar->hasCommonLinkage()) {
1037 O << ".weak ";
1038 }
1039
1040 if (isTexture(*GVar)) {
1041 O << ".global .texref " << getTextureName(*GVar) << ";\n";
1042 return;
1043 }
1044
1045 if (isSurface(*GVar)) {
1046 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1047 return;
1048 }
1049
1050 if (GVar->isDeclaration()) {
1051 // (extern) declarations, no definition or initializer
1052 // Currently the only known declaration is for an automatic __local
1053 // (.shared) promoted to global.
1054 emitPTXGlobalVariable(GVar, O, STI);
1055 O << ";\n";
1056 return;
1057 }
1058
1059 if (isSampler(*GVar)) {
1060 O << ".global .samplerref " << getSamplerName(*GVar);
1061
1062 const Constant *Initializer = nullptr;
1063 if (GVar->hasInitializer())
1064 Initializer = GVar->getInitializer();
1065 const ConstantInt *CI = nullptr;
1066 if (Initializer)
1067 CI = dyn_cast<ConstantInt>(Initializer);
1068 if (CI) {
1069 unsigned sample = CI->getZExtValue();
1070
1071 O << " = { ";
1072
1073 for (int i = 0,
1074 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1075 i < 3; i++) {
1076 O << "addr_mode_" << i << " = ";
1077 switch (addr) {
1078 case 0:
1079 O << "wrap";
1080 break;
1081 case 1:
1082 O << "clamp_to_border";
1083 break;
1084 case 2:
1085 O << "clamp_to_edge";
1086 break;
1087 case 3:
1088 O << "wrap";
1089 break;
1090 case 4:
1091 O << "mirror";
1092 break;
1093 }
1094 O << ", ";
1095 }
1096 O << "filter_mode = ";
1097 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1098 case 0:
1099 O << "nearest";
1100 break;
1101 case 1:
1102 O << "linear";
1103 break;
1104 case 2:
1105 llvm_unreachable("Anisotropic filtering is not supported");
1106 default:
1107 O << "nearest";
1108 break;
1109 }
1110 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1111 O << ", force_unnormalized_coords = 1";
1112 }
1113 O << " }";
1114 }
1115
1116 O << ";\n";
1117 return;
1118 }
1119
1120 if (GVar->hasPrivateLinkage()) {
1121 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1122 return;
1123
1124 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1125 if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1126 return;
1127 if (GVar->use_empty())
1128 return;
1129 }
1130
1131 const Function *demotedFunc = nullptr;
1132 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1133 O << "// " << GVar->getName() << " has been demoted\n";
1134 if (localDecls.find(demotedFunc) != localDecls.end())
1135 localDecls[demotedFunc].push_back(GVar);
1136 else {
1137 std::vector<const GlobalVariable *> temp;
1138 temp.push_back(GVar);
1139 localDecls[demotedFunc] = temp;
1140 }
1141 return;
1142 }
1143
1144 O << ".";
1145 emitPTXAddressSpace(PTy->getAddressSpace(), O);
1146
1147 if (isManaged(*GVar)) {
1148 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1150 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1151 }
1152 O << " .attribute(.managed)";
1153 }
1154
1155 if (MaybeAlign A = GVar->getAlign())
1156 O << " .align " << A->value();
1157 else
1158 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1159
1160 if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1161 (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1162 O << " .";
1163 // Special case: ABI requires that we use .u8 for predicates
1164 if (ETy->isIntegerTy(1))
1165 O << "u8";
1166 else
1167 O << getPTXFundamentalTypeStr(ETy, false);
1168 O << " ";
1169 getSymbol(GVar)->print(O, MAI);
1170
1171 // Ptx allows variable initilization only for constant and global state
1172 // spaces.
1173 if (GVar->hasInitializer()) {
1174 if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1175 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1176 const Constant *Initializer = GVar->getInitializer();
1177 // 'undef' is treated as there is no value specified.
1178 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1179 O << " = ";
1180 printScalarConstant(Initializer, O);
1181 }
1182 } else {
1183 // The frontend adds zero-initializer to device and constant variables
1184 // that don't have an initial value, and UndefValue to shared
1185 // variables, so skip warning for this case.
1186 if (!GVar->getInitializer()->isNullValue() &&
1187 !isa<UndefValue>(GVar->getInitializer())) {
1188 report_fatal_error("initial value of '" + GVar->getName() +
1189 "' is not allowed in addrspace(" +
1190 Twine(PTy->getAddressSpace()) + ")");
1191 }
1192 }
1193 }
1194 } else {
1195 uint64_t ElementSize = 0;
1196
1197 // Although PTX has direct support for struct type and array type and
1198 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1199 // targets that support these high level field accesses. Structs, arrays
1200 // and vectors are lowered into arrays of bytes.
1201 switch (ETy->getTypeID()) {
1202 case Type::IntegerTyID: // Integers larger than 64 bits
1203 case Type::StructTyID:
1204 case Type::ArrayTyID:
1206 ElementSize = DL.getTypeStoreSize(ETy);
1207 // Ptx allows variable initilization only for constant and
1208 // global state spaces.
1209 if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1210 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1211 GVar->hasInitializer()) {
1212 const Constant *Initializer = GVar->getInitializer();
1213 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1214 AggBuffer aggBuffer(ElementSize, *this);
1215 bufferAggregateConstant(Initializer, &aggBuffer);
1216 if (aggBuffer.numSymbols()) {
1217 unsigned int ptrSize = MAI->getCodePointerSize();
1218 if (ElementSize % ptrSize ||
1219 !aggBuffer.allSymbolsAligned(ptrSize)) {
1220 // Print in bytes and use the mask() operator for pointers.
1221 if (!STI.hasMaskOperator())
1223 "initialized packed aggregate with pointers '" +
1224 GVar->getName() +
1225 "' requires at least PTX ISA version 7.1");
1226 O << " .u8 ";
1227 getSymbol(GVar)->print(O, MAI);
1228 O << "[" << ElementSize << "] = {";
1229 aggBuffer.printBytes(O);
1230 O << "}";
1231 } else {
1232 O << " .u" << ptrSize * 8 << " ";
1233 getSymbol(GVar)->print(O, MAI);
1234 O << "[" << ElementSize / ptrSize << "] = {";
1235 aggBuffer.printWords(O);
1236 O << "}";
1237 }
1238 } else {
1239 O << " .b8 ";
1240 getSymbol(GVar)->print(O, MAI);
1241 O << "[" << ElementSize << "] = {";
1242 aggBuffer.printBytes(O);
1243 O << "}";
1244 }
1245 } else {
1246 O << " .b8 ";
1247 getSymbol(GVar)->print(O, MAI);
1248 if (ElementSize) {
1249 O << "[";
1250 O << ElementSize;
1251 O << "]";
1252 }
1253 }
1254 } else {
1255 O << " .b8 ";
1256 getSymbol(GVar)->print(O, MAI);
1257 if (ElementSize) {
1258 O << "[";
1259 O << ElementSize;
1260 O << "]";
1261 }
1262 }
1263 break;
1264 default:
1265 llvm_unreachable("type not supported yet");
1266 }
1267 }
1268 O << ";\n";
1269}
1270
1271void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1272 const Value *v = Symbols[nSym];
1273 const Value *v0 = SymbolsBeforeStripping[nSym];
1274 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1275 MCSymbol *Name = AP.getSymbol(GVar);
1276 PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1277 // Is v0 a generic pointer?
1278 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1279 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1280 os << "generic(";
1281 Name->print(os, AP.MAI);
1282 os << ")";
1283 } else {
1284 Name->print(os, AP.MAI);
1285 }
1286 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1287 const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
1288 AP.printMCExpr(*Expr, os);
1289 } else
1290 llvm_unreachable("symbol type unknown");
1291}
1292
1293void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1294 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1295 symbolPosInBuffer.push_back(size);
1296 unsigned int nSym = 0;
1297 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1298 for (unsigned int pos = 0; pos < size;) {
1299 if (pos)
1300 os << ", ";
1301 if (pos != nextSymbolPos) {
1302 os << (unsigned int)buffer[pos];
1303 ++pos;
1304 continue;
1305 }
1306 // Generate a per-byte mask() operator for the symbol, which looks like:
1307 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1308 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1309 std::string symText;
1310 llvm::raw_string_ostream oss(symText);
1311 printSymbol(nSym, oss);
1312 for (unsigned i = 0; i < ptrSize; ++i) {
1313 if (i)
1314 os << ", ";
1315 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1316 os << "(" << symText << ")";
1317 }
1318 pos += ptrSize;
1319 nextSymbolPos = symbolPosInBuffer[++nSym];
1320 assert(nextSymbolPos >= pos);
1321 }
1322}
1323
1324void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1325 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1326 symbolPosInBuffer.push_back(size);
1327 unsigned int nSym = 0;
1328 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1329 assert(nextSymbolPos % ptrSize == 0);
1330 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1331 if (pos)
1332 os << ", ";
1333 if (pos == nextSymbolPos) {
1334 printSymbol(nSym, os);
1335 nextSymbolPos = symbolPosInBuffer[++nSym];
1336 assert(nextSymbolPos % ptrSize == 0);
1337 assert(nextSymbolPos >= pos + ptrSize);
1338 } else if (ptrSize == 4)
1339 os << support::endian::read32le(&buffer[pos]);
1340 else
1341 os << support::endian::read64le(&buffer[pos]);
1342 }
1343}
1344
1345void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1346 if (localDecls.find(f) == localDecls.end())
1347 return;
1348
1349 std::vector<const GlobalVariable *> &gvars = localDecls[f];
1350
1351 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1352 const NVPTXSubtarget &STI =
1353 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1354
1355 for (const GlobalVariable *GV : gvars) {
1356 O << "\t// demoted variable\n\t";
1357 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1358 }
1359}
1360
1361void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1362 raw_ostream &O) const {
1363 switch (AddressSpace) {
1365 O << "local";
1366 break;
1368 O << "global";
1369 break;
1371 O << "const";
1372 break;
1374 O << "shared";
1375 break;
1376 default:
1377 report_fatal_error("Bad address space found while emitting PTX: " +
1379 break;
1380 }
1381}
1382
1383std::string
1384NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1385 switch (Ty->getTypeID()) {
1386 case Type::IntegerTyID: {
1387 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1388 if (NumBits == 1)
1389 return "pred";
1390 else if (NumBits <= 64) {
1391 std::string name = "u";
1392 return name + utostr(NumBits);
1393 } else {
1394 llvm_unreachable("Integer too large");
1395 break;
1396 }
1397 break;
1398 }
1399 case Type::BFloatTyID:
1400 case Type::HalfTyID:
1401 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1402 // PTX assembly.
1403 return "b16";
1404 case Type::FloatTyID:
1405 return "f32";
1406 case Type::DoubleTyID:
1407 return "f64";
1408 case Type::PointerTyID: {
1409 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1410 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1411
1412 if (PtrSize == 64)
1413 if (useB4PTR)
1414 return "b64";
1415 else
1416 return "u64";
1417 else if (useB4PTR)
1418 return "b32";
1419 else
1420 return "u32";
1421 }
1422 default:
1423 break;
1424 }
1425 llvm_unreachable("unexpected type");
1426}
1427
1428void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1429 raw_ostream &O,
1430 const NVPTXSubtarget &STI) {
1431 const DataLayout &DL = getDataLayout();
1432
1433 // GlobalVariables are always constant pointers themselves.
1434 Type *ETy = GVar->getValueType();
1435
1436 O << ".";
1437 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1438 if (isManaged(*GVar)) {
1439 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1441 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1442 }
1443 O << " .attribute(.managed)";
1444 }
1445 if (MaybeAlign A = GVar->getAlign())
1446 O << " .align " << A->value();
1447 else
1448 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1449
1450 // Special case for i128
1451 if (ETy->isIntegerTy(128)) {
1452 O << " .b8 ";
1453 getSymbol(GVar)->print(O, MAI);
1454 O << "[16]";
1455 return;
1456 }
1457
1458 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1459 O << " .";
1460 O << getPTXFundamentalTypeStr(ETy);
1461 O << " ";
1462 getSymbol(GVar)->print(O, MAI);
1463 return;
1464 }
1465
1466 int64_t ElementSize = 0;
1467
1468 // Although PTX has direct support for struct type and array type and LLVM IR
1469 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1470 // support these high level field accesses. Structs and arrays are lowered
1471 // into arrays of bytes.
1472 switch (ETy->getTypeID()) {
1473 case Type::StructTyID:
1474 case Type::ArrayTyID:
1476 ElementSize = DL.getTypeStoreSize(ETy);
1477 O << " .b8 ";
1478 getSymbol(GVar)->print(O, MAI);
1479 O << "[";
1480 if (ElementSize) {
1481 O << ElementSize;
1482 }
1483 O << "]";
1484 break;
1485 default:
1486 llvm_unreachable("type not supported yet");
1487 }
1488}
1489
1490void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1491 const DataLayout &DL = getDataLayout();
1492 const AttributeList &PAL = F->getAttributes();
1493 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1494 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1495
1497 unsigned paramIndex = 0;
1498 bool first = true;
1499 bool isKernelFunc = isKernelFunction(*F);
1500 bool isABI = (STI.getSmVersion() >= 20);
1501 bool hasImageHandles = STI.hasImageHandles();
1502
1503 if (F->arg_empty() && !F->isVarArg()) {
1504 O << "()";
1505 return;
1506 }
1507
1508 O << "(\n";
1509
1510 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1511 Type *Ty = I->getType();
1512
1513 if (!first)
1514 O << ",\n";
1515
1516 first = false;
1517
1518 // Handle image/sampler parameters
1519 if (isKernelFunction(*F)) {
1520 if (isSampler(*I) || isImage(*I)) {
1521 if (isImage(*I)) {
1522 std::string sname = std::string(I->getName());
1523 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1524 if (hasImageHandles)
1525 O << "\t.param .u64 .ptr .surfref ";
1526 else
1527 O << "\t.param .surfref ";
1528 O << TLI->getParamName(F, paramIndex);
1529 }
1530 else { // Default image is read_only
1531 if (hasImageHandles)
1532 O << "\t.param .u64 .ptr .texref ";
1533 else
1534 O << "\t.param .texref ";
1535 O << TLI->getParamName(F, paramIndex);
1536 }
1537 } else {
1538 if (hasImageHandles)
1539 O << "\t.param .u64 .ptr .samplerref ";
1540 else
1541 O << "\t.param .samplerref ";
1542 O << TLI->getParamName(F, paramIndex);
1543 }
1544 continue;
1545 }
1546 }
1547
1548 auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1549 paramIndex](Type *Ty) -> Align {
1550 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1551 MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1552 return std::max(TypeAlign, ParamAlign.valueOrOne());
1553 };
1554
1555 if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1556 if (ShouldPassAsArray(Ty)) {
1557 // Just print .param .align <a> .b8 .param[size];
1558 // <a> = optimal alignment for the element type; always multiple of
1559 // PAL.getParamAlignment
1560 // size = typeallocsize of element type
1561 Align OptimalAlign = getOptimalAlignForParam(Ty);
1562
1563 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1564 O << TLI->getParamName(F, paramIndex);
1565 O << "[" << DL.getTypeAllocSize(Ty) << "]";
1566
1567 continue;
1568 }
1569 // Just a scalar
1570 auto *PTy = dyn_cast<PointerType>(Ty);
1571 unsigned PTySizeInBits = 0;
1572 if (PTy) {
1573 PTySizeInBits =
1574 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1575 assert(PTySizeInBits && "Invalid pointer size");
1576 }
1577
1578 if (isKernelFunc) {
1579 if (PTy) {
1580 // Special handling for pointer arguments to kernel
1581 O << "\t.param .u" << PTySizeInBits << " ";
1582
1583 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1584 NVPTX::CUDA) {
1585 int addrSpace = PTy->getAddressSpace();
1586 switch (addrSpace) {
1587 default:
1588 O << ".ptr ";
1589 break;
1591 O << ".ptr .const ";
1592 break;
1594 O << ".ptr .shared ";
1595 break;
1597 O << ".ptr .global ";
1598 break;
1599 }
1600 Align ParamAlign = I->getParamAlign().valueOrOne();
1601 O << ".align " << ParamAlign.value() << " ";
1602 }
1603 O << TLI->getParamName(F, paramIndex);
1604 continue;
1605 }
1606
1607 // non-pointer scalar to kernel func
1608 O << "\t.param .";
1609 // Special case: predicate operands become .u8 types
1610 if (Ty->isIntegerTy(1))
1611 O << "u8";
1612 else
1613 O << getPTXFundamentalTypeStr(Ty);
1614 O << " ";
1615 O << TLI->getParamName(F, paramIndex);
1616 continue;
1617 }
1618 // Non-kernel function, just print .param .b<size> for ABI
1619 // and .reg .b<size> for non-ABI
1620 unsigned sz = 0;
1621 if (isa<IntegerType>(Ty)) {
1622 sz = cast<IntegerType>(Ty)->getBitWidth();
1624 } else if (PTy) {
1625 assert(PTySizeInBits && "Invalid pointer size");
1626 sz = PTySizeInBits;
1627 } else
1628 sz = Ty->getPrimitiveSizeInBits();
1629 if (isABI)
1630 O << "\t.param .b" << sz << " ";
1631 else
1632 O << "\t.reg .b" << sz << " ";
1633 O << TLI->getParamName(F, paramIndex);
1634 continue;
1635 }
1636
1637 // param has byVal attribute.
1638 Type *ETy = PAL.getParamByValType(paramIndex);
1639 assert(ETy && "Param should have byval type");
1640
1641 if (isABI || isKernelFunc) {
1642 // Just print .param .align <a> .b8 .param[size];
1643 // <a> = optimal alignment for the element type; always multiple of
1644 // PAL.getParamAlignment
1645 // size = typeallocsize of element type
1646 Align OptimalAlign =
1647 isKernelFunc
1648 ? getOptimalAlignForParam(ETy)
1649 : TLI->getFunctionByValParamAlign(
1650 F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL);
1651
1652 unsigned sz = DL.getTypeAllocSize(ETy);
1653 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1654 O << TLI->getParamName(F, paramIndex);
1655 O << "[" << sz << "]";
1656 continue;
1657 } else {
1658 // Split the ETy into constituent parts and
1659 // print .param .b<size> <name> for each part.
1660 // Further, if a part is vector, print the above for
1661 // each vector element.
1662 SmallVector<EVT, 16> vtparts;
1663 ComputeValueVTs(*TLI, DL, ETy, vtparts);
1664 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1665 unsigned elems = 1;
1666 EVT elemtype = vtparts[i];
1667 if (vtparts[i].isVector()) {
1668 elems = vtparts[i].getVectorNumElements();
1669 elemtype = vtparts[i].getVectorElementType();
1670 }
1671
1672 for (unsigned j = 0, je = elems; j != je; ++j) {
1673 unsigned sz = elemtype.getSizeInBits();
1674 if (elemtype.isInteger())
1676 O << "\t.reg .b" << sz << " ";
1677 O << TLI->getParamName(F, paramIndex);
1678 if (j < je - 1)
1679 O << ",\n";
1680 ++paramIndex;
1681 }
1682 if (i < e - 1)
1683 O << ",\n";
1684 }
1685 --paramIndex;
1686 continue;
1687 }
1688 }
1689
1690 if (F->isVarArg()) {
1691 if (!first)
1692 O << ",\n";
1693 O << "\t.param .align " << STI.getMaxRequiredAlignment();
1694 O << " .b8 ";
1695 O << TLI->getParamName(F, /* vararg */ -1) << "[]";
1696 }
1697
1698 O << "\n)";
1699}
1700
1701void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1702 const MachineFunction &MF) {
1703 SmallString<128> Str;
1705
1706 // Map the global virtual register number to a register class specific
1707 // virtual register number starting from 1 with that class.
1709 //unsigned numRegClasses = TRI->getNumRegClasses();
1710
1711 // Emit the Fake Stack Object
1712 const MachineFrameInfo &MFI = MF.getFrameInfo();
1713 int NumBytes = (int) MFI.getStackSize();
1714 if (NumBytes) {
1715 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1716 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1717 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1718 O << "\t.reg .b64 \t%SP;\n";
1719 O << "\t.reg .b64 \t%SPL;\n";
1720 } else {
1721 O << "\t.reg .b32 \t%SP;\n";
1722 O << "\t.reg .b32 \t%SPL;\n";
1723 }
1724 }
1725
1726 // Go through all virtual registers to establish the mapping between the
1727 // global virtual
1728 // register number and the per class virtual register number.
1729 // We use the per class virtual register number in the ptx output.
1730 unsigned int numVRs = MRI->getNumVirtRegs();
1731 for (unsigned i = 0; i < numVRs; i++) {
1733 const TargetRegisterClass *RC = MRI->getRegClass(vr);
1734 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1735 int n = regmap.size();
1736 regmap.insert(std::make_pair(vr, n + 1));
1737 }
1738
1739 // Emit register declarations
1740 // @TODO: Extract out the real register usage
1741 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1742 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1743 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1744 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1745 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1746 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1747 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1748
1749 // Emit declaration of the virtual registers or 'physical' registers for
1750 // each register class
1751 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1752 const TargetRegisterClass *RC = TRI->getRegClass(i);
1753 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1754 std::string rcname = getNVPTXRegClassName(RC);
1755 std::string rcStr = getNVPTXRegClassStr(RC);
1756 int n = regmap.size();
1757
1758 // Only declare those registers that may be used.
1759 if (n) {
1760 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1761 << ">;\n";
1762 }
1763 }
1764
1765 OutStreamer->emitRawText(O.str());
1766}
1767
1768void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1769 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1770 bool ignored;
1771 unsigned int numHex;
1772 const char *lead;
1773
1774 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1775 numHex = 8;
1776 lead = "0f";
1778 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1779 numHex = 16;
1780 lead = "0d";
1782 } else
1783 llvm_unreachable("unsupported fp type");
1784
1785 APInt API = APF.bitcastToAPInt();
1786 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1787}
1788
1789void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1790 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1791 O << CI->getValue();
1792 return;
1793 }
1794 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1795 printFPConstant(CFP, O);
1796 return;
1797 }
1798 if (isa<ConstantPointerNull>(CPV)) {
1799 O << "0";
1800 return;
1801 }
1802 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1803 bool IsNonGenericPointer = false;
1804 if (GVar->getType()->getAddressSpace() != 0) {
1805 IsNonGenericPointer = true;
1806 }
1807 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1808 O << "generic(";
1809 getSymbol(GVar)->print(O, MAI);
1810 O << ")";
1811 } else {
1812 getSymbol(GVar)->print(O, MAI);
1813 }
1814 return;
1815 }
1816 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1817 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1818 printMCExpr(*E, O);
1819 return;
1820 }
1821 llvm_unreachable("Not scalar type found in printScalarConstant()");
1822}
1823
1824void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1825 AggBuffer *AggBuffer) {
1826 const DataLayout &DL = getDataLayout();
1827 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1828 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1829 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1830 // only the space allocated by CPV.
1831 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1832 return;
1833 }
1834
1835 // Helper for filling AggBuffer with APInts.
1836 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1837 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1838 SmallVector<unsigned char, 16> Buf(NumBytes);
1839 for (unsigned I = 0; I < NumBytes; ++I) {
1840 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1841 }
1842 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1843 };
1844
1845 switch (CPV->getType()->getTypeID()) {
1846 case Type::IntegerTyID:
1847 if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1848 AddIntToBuffer(CI->getValue());
1849 break;
1850 }
1851 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1852 if (const auto *CI =
1853 dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1854 AddIntToBuffer(CI->getValue());
1855 break;
1856 }
1857 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1858 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1859 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1860 AggBuffer->addZeros(AllocSize);
1861 break;
1862 }
1863 }
1864 llvm_unreachable("unsupported integer const type");
1865 break;
1866
1867 case Type::HalfTyID:
1868 case Type::BFloatTyID:
1869 case Type::FloatTyID:
1870 case Type::DoubleTyID:
1871 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1872 break;
1873
1874 case Type::PointerTyID: {
1875 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1876 AggBuffer->addSymbol(GVar, GVar);
1877 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1878 const Value *v = Cexpr->stripPointerCasts();
1879 AggBuffer->addSymbol(v, Cexpr);
1880 }
1881 AggBuffer->addZeros(AllocSize);
1882 break;
1883 }
1884
1885 case Type::ArrayTyID:
1887 case Type::StructTyID: {
1888 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1889 bufferAggregateConstant(CPV, AggBuffer);
1890 if (Bytes > AllocSize)
1891 AggBuffer->addZeros(Bytes - AllocSize);
1892 } else if (isa<ConstantAggregateZero>(CPV))
1893 AggBuffer->addZeros(Bytes);
1894 else
1895 llvm_unreachable("Unexpected Constant type");
1896 break;
1897 }
1898
1899 default:
1900 llvm_unreachable("unsupported type");
1901 }
1902}
1903
1904void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1905 AggBuffer *aggBuffer) {
1906 const DataLayout &DL = getDataLayout();
1907 int Bytes;
1908
1909 // Integers of arbitrary width
1910 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1911 APInt Val = CI->getValue();
1912 for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1913 uint8_t Byte = Val.getLoBits(8).getZExtValue();
1914 aggBuffer->addBytes(&Byte, 1, 1);
1915 Val.lshrInPlace(8);
1916 }
1917 return;
1918 }
1919
1920 // Old constants
1921 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1922 if (CPV->getNumOperands())
1923 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1924 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1925 return;
1926 }
1927
1928 if (const ConstantDataSequential *CDS =
1929 dyn_cast<ConstantDataSequential>(CPV)) {
1930 if (CDS->getNumElements())
1931 for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1932 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1933 aggBuffer);
1934 return;
1935 }
1936
1937 if (isa<ConstantStruct>(CPV)) {
1938 if (CPV->getNumOperands()) {
1939 StructType *ST = cast<StructType>(CPV->getType());
1940 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1941 if (i == (e - 1))
1942 Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1943 DL.getTypeAllocSize(ST) -
1944 DL.getStructLayout(ST)->getElementOffset(i);
1945 else
1946 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1947 DL.getStructLayout(ST)->getElementOffset(i);
1948 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1949 }
1950 }
1951 return;
1952 }
1953 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1954}
1955
1956/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1957/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1958/// expressions that are representable in PTX and create
1959/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1960const MCExpr *
1961NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1962 MCContext &Ctx = OutContext;
1963
1964 if (CV->isNullValue() || isa<UndefValue>(CV))
1965 return MCConstantExpr::create(0, Ctx);
1966
1967 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1968 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1969
1970 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1971 const MCSymbolRefExpr *Expr =
1973 if (ProcessingGeneric) {
1974 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1975 } else {
1976 return Expr;
1977 }
1978 }
1979
1980 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1981 if (!CE) {
1982 llvm_unreachable("Unknown constant value to lower!");
1983 }
1984
1985 switch (CE->getOpcode()) {
1986 default:
1987 break; // Error
1988
1989 case Instruction::AddrSpaceCast: {
1990 // Strip the addrspacecast and pass along the operand
1991 PointerType *DstTy = cast<PointerType>(CE->getType());
1992 if (DstTy->getAddressSpace() == 0)
1993 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1994
1995 break; // Error
1996 }
1997
1998 case Instruction::GetElementPtr: {
1999 const DataLayout &DL = getDataLayout();
2000
2001 // Generate a symbolic expression for the byte address
2002 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2003 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2004
2005 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2006 ProcessingGeneric);
2007 if (!OffsetAI)
2008 return Base;
2009
2010 int64_t Offset = OffsetAI.getSExtValue();
2012 Ctx);
2013 }
2014
2015 case Instruction::Trunc:
2016 // We emit the value and depend on the assembler to truncate the generated
2017 // expression properly. This is important for differences between
2018 // blockaddress labels. Since the two labels are in the same function, it
2019 // is reasonable to treat their delta as a 32-bit value.
2020 [[fallthrough]];
2021 case Instruction::BitCast:
2022 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2023
2024 case Instruction::IntToPtr: {
2025 const DataLayout &DL = getDataLayout();
2026
2027 // Handle casts to pointers by changing them into casts to the appropriate
2028 // integer type. This promotes constant folding and simplifies this code.
2029 Constant *Op = CE->getOperand(0);
2030 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2031 /*IsSigned*/ false, DL);
2032 if (Op)
2033 return lowerConstantForGV(Op, ProcessingGeneric);
2034
2035 break; // Error
2036 }
2037
2038 case Instruction::PtrToInt: {
2039 const DataLayout &DL = getDataLayout();
2040
2041 // Support only foldable casts to/from pointers that can be eliminated by
2042 // changing the pointer to the appropriately sized integer type.
2043 Constant *Op = CE->getOperand(0);
2044 Type *Ty = CE->getType();
2045
2046 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2047
2048 // We can emit the pointer value into this slot if the slot is an
2049 // integer slot equal to the size of the pointer.
2050 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2051 return OpExpr;
2052
2053 // Otherwise the pointer is smaller than the resultant integer, mask off
2054 // the high bits so we are sure to get a proper truncation if the input is
2055 // a constant expr.
2056 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2057 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2058 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2059 }
2060
2061 // The MC library also has a right-shift operator, but it isn't consistently
2062 // signed or unsigned between different targets.
2063 case Instruction::Add: {
2064 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2065 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2066 switch (CE->getOpcode()) {
2067 default: llvm_unreachable("Unknown binary operator constant cast expr");
2068 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2069 }
2070 }
2071 }
2072
2073 // If the code isn't optimized, there may be outstanding folding
2074 // opportunities. Attempt to fold the expression using DataLayout as a
2075 // last resort before giving up.
2077 if (C != CE)
2078 return lowerConstantForGV(C, ProcessingGeneric);
2079
2080 // Otherwise report the problem to the user.
2081 std::string S;
2083 OS << "Unsupported expression in static initializer: ";
2084 CE->printAsOperand(OS, /*PrintType=*/false,
2085 !MF ? nullptr : MF->getFunction().getParent());
2086 report_fatal_error(Twine(OS.str()));
2087}
2088
2089// Copy of MCExpr::print customized for NVPTX
2090void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2091 switch (Expr.getKind()) {
2092 case MCExpr::Target:
2093 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2094 case MCExpr::Constant:
2095 OS << cast<MCConstantExpr>(Expr).getValue();
2096 return;
2097
2098 case MCExpr::SymbolRef: {
2099 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2100 const MCSymbol &Sym = SRE.getSymbol();
2101 Sym.print(OS, MAI);
2102 return;
2103 }
2104
2105 case MCExpr::Unary: {
2106 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2107 switch (UE.getOpcode()) {
2108 case MCUnaryExpr::LNot: OS << '!'; break;
2109 case MCUnaryExpr::Minus: OS << '-'; break;
2110 case MCUnaryExpr::Not: OS << '~'; break;
2111 case MCUnaryExpr::Plus: OS << '+'; break;
2112 }
2113 printMCExpr(*UE.getSubExpr(), OS);
2114 return;
2115 }
2116
2117 case MCExpr::Binary: {
2118 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2119
2120 // Only print parens around the LHS if it is non-trivial.
2121 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2122 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2123 printMCExpr(*BE.getLHS(), OS);
2124 } else {
2125 OS << '(';
2126 printMCExpr(*BE.getLHS(), OS);
2127 OS<< ')';
2128 }
2129
2130 switch (BE.getOpcode()) {
2131 case MCBinaryExpr::Add:
2132 // Print "X-42" instead of "X+-42".
2133 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2134 if (RHSC->getValue() < 0) {
2135 OS << RHSC->getValue();
2136 return;
2137 }
2138 }
2139
2140 OS << '+';
2141 break;
2142 default: llvm_unreachable("Unhandled binary operator");
2143 }
2144
2145 // Only print parens around the LHS if it is non-trivial.
2146 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2147 printMCExpr(*BE.getRHS(), OS);
2148 } else {
2149 OS << '(';
2150 printMCExpr(*BE.getRHS(), OS);
2151 OS << ')';
2152 }
2153 return;
2154 }
2155 }
2156
2157 llvm_unreachable("Invalid expression kind!");
2158}
2159
2160/// PrintAsmOperand - Print out an operand for an inline asm expression.
2161///
2162bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2163 const char *ExtraCode, raw_ostream &O) {
2164 if (ExtraCode && ExtraCode[0]) {
2165 if (ExtraCode[1] != 0)
2166 return true; // Unknown modifier.
2167
2168 switch (ExtraCode[0]) {
2169 default:
2170 // See if this is a generic print operand
2171 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2172 case 'r':
2173 break;
2174 }
2175 }
2176
2177 printOperand(MI, OpNo, O);
2178
2179 return false;
2180}
2181
2182bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2183 unsigned OpNo,
2184 const char *ExtraCode,
2185 raw_ostream &O) {
2186 if (ExtraCode && ExtraCode[0])
2187 return true; // Unknown modifier
2188
2189 O << '[';
2190 printMemOperand(MI, OpNo, O);
2191 O << ']';
2192
2193 return false;
2194}
2195
2196void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
2197 raw_ostream &O) {
2198 const MachineOperand &MO = MI->getOperand(OpNum);
2199 switch (MO.getType()) {
2201 if (MO.getReg().isPhysical()) {
2202 if (MO.getReg() == NVPTX::VRDepot)
2204 else
2206 } else {
2207 emitVirtualRegister(MO.getReg(), O);
2208 }
2209 break;
2210
2212 O << MO.getImm();
2213 break;
2214
2216 printFPConstant(MO.getFPImm(), O);
2217 break;
2218
2220 PrintSymbolOperand(MO, O);
2221 break;
2222
2224 MO.getMBB()->getSymbol()->print(O, MAI);
2225 break;
2226
2227 default:
2228 llvm_unreachable("Operand type not supported.");
2229 }
2230}
2231
2232void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
2233 raw_ostream &O, const char *Modifier) {
2234 printOperand(MI, OpNum, O);
2235
2236 if (Modifier && strcmp(Modifier, "add") == 0) {
2237 O << ", ";
2238 printOperand(MI, OpNum + 1, O);
2239 } else {
2240 if (MI->getOperand(OpNum + 1).isImm() &&
2241 MI->getOperand(OpNum + 1).getImm() == 0)
2242 return; // don't print ',0' or '+0'
2243 O << "+";
2244 printOperand(MI, OpNum + 1, O);
2245 }
2246}
2247
2248// Force static initialization.
2252}
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
Symbol * Sym
Definition: ELF_riscv.cpp:477
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
static bool isEmptyXXStructor(GlobalVariable *GV)
#define DEPOTNAME
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
static bool usedInGlobalVarDef(const Constant *C)
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
static cl::opt< bool > LowerCtorDtor("nvptx-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(false), cl::Hidden)
static bool ShouldPassAsArray(Type *Ty)
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const char * name
Definition: SMEABIPass.cpp:49
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1485
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:679
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:401
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:87
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:90
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:102
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:431
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:377
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:121
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:105
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:94
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:399
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:99
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:385
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:405
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:396
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
ConstantArray - Constant Array Declarations.
Definition: Constants.h:408
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:568
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1002
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
const APFloat & getValueAPF() const
Definition: Constants.h:296
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:136
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
const Constant * getAliasee() const
Definition: GlobalAlias.h:84
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:117
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:109
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:510
bool hasExternalLinkage() const
Definition: GlobalValue.h:506
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:273
bool hasLocalLinkage() const
Definition: GlobalValue.h:523
bool hasPrivateLinkage() const
Definition: GlobalValue.h:522
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:290
bool hasWeakLinkage() const
Definition: GlobalValue.h:517
bool hasCommonLinkage() const
Definition: GlobalValue.h:527
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:507
Type * getValueType() const
Definition: GlobalValue.h:292
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:553
Binary assembler expressions.
Definition: MCExpr.h:484
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:631
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:634
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:533
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:528
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:628
@ Add
Addition.
Definition: MCExpr.h:487
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:76
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:200
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
@ Unary
Unary expressions.
Definition: MCExpr.h:41
@ Constant
Constant expressions.
Definition: MCExpr.h:39
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
@ Target
Target specific expression.
Definition: MCExpr.h:42
@ Binary
Binary expressions.
Definition: MCExpr.h:38
ExprKind getKind() const
Definition: MCExpr.h:81
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
const MCSymbol & getSymbol() const
Definition: MCExpr.h:402
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:389
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
Unary assembler expressions.
Definition: MCExpr.h:428
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:471
@ Minus
Unary minus.
Definition: MCExpr.h:432
@ Plus
Unary plus.
Definition: MCExpr.h:434
@ Not
Bitwise negation.
Definition: MCExpr.h:433
@ LNot
Logical negation.
Definition: MCExpr.h:431
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:474
Metadata node.
Definition: Metadata.h:1037
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
bool hasDebugInfo() const
Returns true if valid debug info is present.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:59
static const char * getRegisterName(MCRegister Reg)
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
bool hasImageHandles() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
UniqueStringSaver & getStrPool() const
Implments NVPTX-specific streamer.
void closeLastSection()
Close last section.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:693
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool startswith(StringRef Prefix) const
Definition: StringRef.h:261
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Class to represent struct types.
Definition: DerivedTypes.h:216
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getPointerSizeInBits(unsigned AS) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:146
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ VoidTyID
type with no size
Definition: Type.h:63
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ StructTyID
Structures.
Definition: Type.h:74
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition: Type.h:57
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ PointerTyID
Pointers.
Definition: Type.h:73
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:295
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:243
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
StringRef save(const char *S)
Definition: StringSaver.h:52
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool erase(const ValueT &V)
Definition: DenseSet.h:101
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ NVCL
Definition: NVPTX.h:79
@ CUDA
Definition: NVPTX.h:80
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
constexpr double e
Definition: MathExtras.h:31
uint64_t read64le(const void *P)
Definition: Endian.h:412
uint32_t read32le(const void *P)
Definition: Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< TypeSize > *Offsets, TypeSize StartingOffset)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:122
@ Offset
Definition: DWP.cpp:440
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1684
std::string getSamplerName(const Value &val)
AddressSpace
Definition: NVPTXBaseInfo.h:21
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool getMinCTASm(const Function &F, unsigned &x)
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isImage(const Value &val)
bool getMaxNTIDz(const Function &F, unsigned &z)
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
bool isManaged(const Value &val)
unsigned promoteScalarArgumentSize(unsigned size)
bool isSurface(const Value &val)
void clearAnnotationCache(const Module *Mod)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
std::string getSurfaceName(const Value &val)
bool getReqNTIDx(const Function &F, unsigned &x)
bool getReqNTIDy(const Function &F, unsigned &y)
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:200
bool getMaxNReg(const Function &F, unsigned &x)
bool isTexture(const Value &val)
bool isImageWriteOnly(const Value &val)
bool isImageReadWrite(const Value &val)
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
std::string getTextureName(const Value &val)
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
bool getReqNTIDz(const Function &F, unsigned &z)
bool getMaxNTIDx(const Function &F, unsigned &x)
bool getMaxNTIDy(const Function &F, unsigned &y)
bool isSampler(const Value &val)
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool getMaxClusterRank(const Function &F, unsigned &x)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:927
Target & getTheNVPTXTarget32()
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...