LLVM 17.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/Twine.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/BasicBlock.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/Constants.h"
54#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/DebugLoc.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/GlobalValue.h"
62#include "llvm/IR/Instruction.h"
63#include "llvm/IR/LLVMContext.h"
64#include "llvm/IR/Module.h"
65#include "llvm/IR/Operator.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/User.h"
68#include "llvm/MC/MCExpr.h"
69#include "llvm/MC/MCInst.h"
70#include "llvm/MC/MCInstrDesc.h"
71#include "llvm/MC/MCStreamer.h"
72#include "llvm/MC/MCSymbol.h"
76#include "llvm/Support/Endian.h"
79#include "llvm/Support/Path.h"
85#include <cassert>
86#include <cstdint>
87#include <cstring>
88#include <new>
89#include <string>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94
95static cl::opt<bool>
96 LowerCtorDtor("nvptx-lower-global-ctor-dtor",
97 cl::desc("Lower GPU ctor / dtors to globals on the device."),
98 cl::init(false), cl::Hidden);
99
100#define DEPOTNAME "__local_depot"
101
102/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
103/// depends.
104static void
107 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
108 Globals.insert(GV);
109 else {
110 if (const User *U = dyn_cast<User>(V)) {
111 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
112 DiscoverDependentGlobals(U->getOperand(i), Globals);
113 }
114 }
115 }
116}
117
118/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
119/// instances to be emitted, but only after any dependents have been added
120/// first.s
121static void
126 // Have we already visited this one?
127 if (Visited.count(GV))
128 return;
129
130 // Do we have a circular dependency?
131 if (!Visiting.insert(GV).second)
132 report_fatal_error("Circular dependency found in global variable set");
133
134 // Make sure we visit all dependents first
136 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
137 DiscoverDependentGlobals(GV->getOperand(i), Others);
138
139 for (const GlobalVariable *GV : Others)
140 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
141
142 // Now we can visit ourself
143 Order.push_back(GV);
144 Visited.insert(GV);
145 Visiting.erase(GV);
146}
147
148void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
149 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
150 getSubtargetInfo().getFeatureBits());
151
152 MCInst Inst;
153 lowerToMCInst(MI, Inst);
155}
156
157// Handle symbol backtracking for targets that do not support image handles
158bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
159 unsigned OpNo, MCOperand &MCOp) {
160 const MachineOperand &MO = MI->getOperand(OpNo);
161 const MCInstrDesc &MCID = MI->getDesc();
162
163 if (MCID.TSFlags & NVPTXII::IsTexFlag) {
164 // This is a texture fetch, so operand 4 is a texref and operand 5 is
165 // a samplerref
166 if (OpNo == 4 && MO.isImm()) {
167 lowerImageHandleSymbol(MO.getImm(), MCOp);
168 return true;
169 }
170 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
171 lowerImageHandleSymbol(MO.getImm(), MCOp);
172 return true;
173 }
174
175 return false;
176 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
177 unsigned VecSize =
178 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
179
180 // For a surface load of vector size N, the Nth operand will be the surfref
181 if (OpNo == VecSize && MO.isImm()) {
182 lowerImageHandleSymbol(MO.getImm(), MCOp);
183 return true;
184 }
185
186 return false;
187 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
188 // This is a surface store, so operand 0 is a surfref
189 if (OpNo == 0 && MO.isImm()) {
190 lowerImageHandleSymbol(MO.getImm(), MCOp);
191 return true;
192 }
193
194 return false;
195 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
196 // This is a query, so operand 1 is a surfref/texref
197 if (OpNo == 1 && MO.isImm()) {
198 lowerImageHandleSymbol(MO.getImm(), MCOp);
199 return true;
200 }
201
202 return false;
203 }
204
205 return false;
206}
207
208void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
209 // Ewwww
211 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
213 const char *Sym = MFI->getImageHandleSymbol(Index);
214 StringRef SymName = nvTM.getStrPool().save(Sym);
215 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
216}
217
218void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
219 OutMI.setOpcode(MI->getOpcode());
220 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
221 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
222 const MachineOperand &MO = MI->getOperand(0);
223 OutMI.addOperand(GetSymbolRef(
225 return;
226 }
227
228 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
229 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
230 const MachineOperand &MO = MI->getOperand(i);
231
232 MCOperand MCOp;
233 if (!STI.hasImageHandles()) {
234 if (lowerImageHandleOperand(MI, i, MCOp)) {
235 OutMI.addOperand(MCOp);
236 continue;
237 }
238 }
239
240 if (lowerOperand(MO, MCOp))
241 OutMI.addOperand(MCOp);
242 }
243}
244
245bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
246 MCOperand &MCOp) {
247 switch (MO.getType()) {
248 default: llvm_unreachable("unknown operand type");
250 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
251 break;
253 MCOp = MCOperand::createImm(MO.getImm());
254 break;
257 MO.getMBB()->getSymbol(), OutContext));
258 break;
260 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
261 break;
263 MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
264 break;
266 const ConstantFP *Cnt = MO.getFPImm();
267 const APFloat &Val = Cnt->getValueAPF();
268
269 switch (Cnt->getType()->getTypeID()) {
270 default: report_fatal_error("Unsupported FP type"); break;
271 case Type::HalfTyID:
274 break;
275 case Type::FloatTyID:
278 break;
279 case Type::DoubleTyID:
282 break;
283 }
284 break;
285 }
286 }
287 return true;
288}
289
290unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
292 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
293
294 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
295 unsigned RegNum = RegMap[Reg];
296
297 // Encode the register class in the upper 4 bits
298 // Must be kept in sync with NVPTXInstPrinter::printRegName
299 unsigned Ret = 0;
300 if (RC == &NVPTX::Int1RegsRegClass) {
301 Ret = (1 << 28);
302 } else if (RC == &NVPTX::Int16RegsRegClass) {
303 Ret = (2 << 28);
304 } else if (RC == &NVPTX::Int32RegsRegClass) {
305 Ret = (3 << 28);
306 } else if (RC == &NVPTX::Int64RegsRegClass) {
307 Ret = (4 << 28);
308 } else if (RC == &NVPTX::Float32RegsRegClass) {
309 Ret = (5 << 28);
310 } else if (RC == &NVPTX::Float64RegsRegClass) {
311 Ret = (6 << 28);
312 } else if (RC == &NVPTX::Float16RegsRegClass) {
313 Ret = (7 << 28);
314 } else if (RC == &NVPTX::Float16x2RegsRegClass) {
315 Ret = (8 << 28);
316 } else {
317 report_fatal_error("Bad register class");
318 }
319
320 // Insert the vreg number
321 Ret |= (RegNum & 0x0FFFFFFF);
322 return Ret;
323 } else {
324 // Some special-use registers are actually physical registers.
325 // Encode this as the register class ID of 0 and the real register ID.
326 return Reg & 0x0FFFFFFF;
327 }
328}
329
330MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
331 const MCExpr *Expr;
333 OutContext);
334 return MCOperand::createExpr(Expr);
335}
336
337void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
338 const DataLayout &DL = getDataLayout();
340 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
341
342 Type *Ty = F->getReturnType();
343
344 bool isABI = (STI.getSmVersion() >= 20);
345
346 if (Ty->getTypeID() == Type::VoidTyID)
347 return;
348
349 O << " (";
350
351 if (isABI) {
352 if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
353 unsigned size = 0;
354 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
355 size = ITy->getBitWidth();
356 } else {
357 assert(Ty->isFloatingPointTy() && "Floating point type expected here");
359 }
360 // PTX ABI requires all scalar return values to be at least 32
361 // bits in size. fp16 normally uses .b16 as its storage type in
362 // PTX, so its size must be adjusted here, too.
364
365 O << ".param .b" << size << " func_retval0";
366 } else if (isa<PointerType>(Ty)) {
367 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
368 << " func_retval0";
369 } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
370 unsigned totalsz = DL.getTypeAllocSize(Ty);
371 unsigned retAlignment = 0;
372 if (!getAlign(*F, 0, retAlignment))
373 retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
374 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
375 << "]";
376 } else
377 llvm_unreachable("Unknown return type");
378 } else {
379 SmallVector<EVT, 16> vtparts;
380 ComputeValueVTs(*TLI, DL, Ty, vtparts);
381 unsigned idx = 0;
382 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
383 unsigned elems = 1;
384 EVT elemtype = vtparts[i];
385 if (vtparts[i].isVector()) {
386 elems = vtparts[i].getVectorNumElements();
387 elemtype = vtparts[i].getVectorElementType();
388 }
389
390 for (unsigned j = 0, je = elems; j != je; ++j) {
391 unsigned sz = elemtype.getSizeInBits();
392 if (elemtype.isInteger())
394 O << ".reg .b" << sz << " func_retval" << idx;
395 if (j < je - 1)
396 O << ", ";
397 ++idx;
398 }
399 if (i < e - 1)
400 O << ", ";
401 }
402 }
403 O << ") ";
404}
405
406void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
407 raw_ostream &O) {
408 const Function &F = MF.getFunction();
409 printReturnValStr(&F, O);
410}
411
412// Return true if MBB is the header of a loop marked with
413// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
414bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
415 const MachineBasicBlock &MBB) const {
416 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
417 // We insert .pragma "nounroll" only to the loop header.
418 if (!LI.isLoopHeader(&MBB))
419 return false;
420
421 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
422 // we iterate through each back edge of the loop with header MBB, and check
423 // whether its metadata contains llvm.loop.unroll.disable.
424 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
425 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
426 // Edges from other loops to MBB are not back edges.
427 continue;
428 }
429 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
430 if (MDNode *LoopID =
431 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
432 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
433 return true;
434 if (MDNode *UnrollCountMD =
435 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
436 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
437 ->isOne())
438 return true;
439 }
440 }
441 }
442 }
443 return false;
444}
445
446void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
448 if (isLoopHeaderOfNoUnroll(MBB))
449 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
450}
451
452void NVPTXAsmPrinter::emitFunctionEntryLabel() {
455
456 if (!GlobalsEmitted) {
457 emitGlobals(*MF->getFunction().getParent());
458 GlobalsEmitted = true;
459 }
460
461 // Set up
462 MRI = &MF->getRegInfo();
463 F = &MF->getFunction();
464 emitLinkageDirective(F, O);
465 if (isKernelFunction(*F))
466 O << ".entry ";
467 else {
468 O << ".func ";
469 printReturnValStr(*MF, O);
470 }
471
473
474 emitFunctionParamList(F, O);
475
476 if (isKernelFunction(*F))
477 emitKernelFunctionDirectives(*F, O);
478
480 O << ".noreturn";
481
482 OutStreamer->emitRawText(O.str());
483
484 VRegMapping.clear();
485 // Emit open brace for function body.
486 OutStreamer->emitRawText(StringRef("{\n"));
487 setAndEmitFunctionVirtualRegisters(*MF);
488 // Emit initial .loc debug directive for correct relocation symbol data.
489 if (MMI && MMI->hasDebugInfo())
491}
492
494 bool Result = AsmPrinter::runOnMachineFunction(F);
495 // Emit closing brace for the body of function F.
496 // The closing brace must be emitted here because we need to emit additional
497 // debug labels/data after the last basic block.
498 // We need to emit the closing brace here because we don't have function that
499 // finished emission of the function body.
500 OutStreamer->emitRawText(StringRef("}\n"));
501 return Result;
502}
503
504void NVPTXAsmPrinter::emitFunctionBodyStart() {
506 raw_svector_ostream O(Str);
507 emitDemotedVars(&MF->getFunction(), O);
508 OutStreamer->emitRawText(O.str());
509}
510
511void NVPTXAsmPrinter::emitFunctionBodyEnd() {
512 VRegMapping.clear();
513}
514
518 return OutContext.getOrCreateSymbol(Str);
519}
520
521void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
522 Register RegNo = MI->getOperand(0).getReg();
523 if (RegNo.isVirtual()) {
524 OutStreamer->AddComment(Twine("implicit-def: ") +
526 } else {
527 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
528 OutStreamer->AddComment(Twine("implicit-def: ") +
529 STI.getRegisterInfo()->getName(RegNo));
530 }
531 OutStreamer->addBlankLine();
532}
533
534void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
535 raw_ostream &O) const {
536 // If the NVVM IR has some of reqntid* specified, then output
537 // the reqntid directive, and set the unspecified ones to 1.
538 // If none of reqntid* is specified, don't output reqntid directive.
539 unsigned reqntidx, reqntidy, reqntidz;
540 bool specified = false;
541 if (!getReqNTIDx(F, reqntidx))
542 reqntidx = 1;
543 else
544 specified = true;
545 if (!getReqNTIDy(F, reqntidy))
546 reqntidy = 1;
547 else
548 specified = true;
549 if (!getReqNTIDz(F, reqntidz))
550 reqntidz = 1;
551 else
552 specified = true;
553
554 if (specified)
555 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
556 << "\n";
557
558 // If the NVVM IR has some of maxntid* specified, then output
559 // the maxntid directive, and set the unspecified ones to 1.
560 // If none of maxntid* is specified, don't output maxntid directive.
561 unsigned maxntidx, maxntidy, maxntidz;
562 specified = false;
563 if (!getMaxNTIDx(F, maxntidx))
564 maxntidx = 1;
565 else
566 specified = true;
567 if (!getMaxNTIDy(F, maxntidy))
568 maxntidy = 1;
569 else
570 specified = true;
571 if (!getMaxNTIDz(F, maxntidz))
572 maxntidz = 1;
573 else
574 specified = true;
575
576 if (specified)
577 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
578 << "\n";
579
580 unsigned mincta;
581 if (getMinCTASm(F, mincta))
582 O << ".minnctapersm " << mincta << "\n";
583
584 unsigned maxnreg;
585 if (getMaxNReg(F, maxnreg))
586 O << ".maxnreg " << maxnreg << "\n";
587}
588
589std::string
591 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
592
593 std::string Name;
594 raw_string_ostream NameStr(Name);
595
596 VRegRCMap::const_iterator I = VRegMapping.find(RC);
597 assert(I != VRegMapping.end() && "Bad register class");
598 const DenseMap<unsigned, unsigned> &RegMap = I->second;
599
600 VRegMap::const_iterator VI = RegMap.find(Reg);
601 assert(VI != RegMap.end() && "Bad virtual register");
602 unsigned MappedVR = VI->second;
603
604 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
605
606 NameStr.flush();
607 return Name;
608}
609
610void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
611 raw_ostream &O) {
612 O << getVirtualRegisterName(vr);
613}
614
615void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
616 emitLinkageDirective(F, O);
617 if (isKernelFunction(*F))
618 O << ".entry ";
619 else
620 O << ".func ";
621 printReturnValStr(F, O);
622 getSymbol(F)->print(O, MAI);
623 O << "\n";
624 emitFunctionParamList(F, O);
626 O << ".noreturn";
627 O << ";\n";
628}
629
630static bool usedInGlobalVarDef(const Constant *C) {
631 if (!C)
632 return false;
633
634 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
635 return GV->getName() != "llvm.used";
636 }
637
638 for (const User *U : C->users())
639 if (const Constant *C = dyn_cast<Constant>(U))
641 return true;
642
643 return false;
644}
645
646static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
647 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
648 if (othergv->getName() == "llvm.used")
649 return true;
650 }
651
652 if (const Instruction *instr = dyn_cast<Instruction>(U)) {
653 if (instr->getParent() && instr->getParent()->getParent()) {
654 const Function *curFunc = instr->getParent()->getParent();
655 if (oneFunc && (curFunc != oneFunc))
656 return false;
657 oneFunc = curFunc;
658 return true;
659 } else
660 return false;
661 }
662
663 for (const User *UU : U->users())
664 if (!usedInOneFunc(UU, oneFunc))
665 return false;
666
667 return true;
668}
669
670/* Find out if a global variable can be demoted to local scope.
671 * Currently, this is valid for CUDA shared variables, which have local
672 * scope and global lifetime. So the conditions to check are :
673 * 1. Is the global variable in shared address space?
674 * 2. Does it have internal linkage?
675 * 3. Is the global variable referenced only in one function?
676 */
677static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
678 if (!gv->hasInternalLinkage())
679 return false;
680 PointerType *Pty = gv->getType();
681 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
682 return false;
683
684 const Function *oneFunc = nullptr;
685
686 bool flag = usedInOneFunc(gv, oneFunc);
687 if (!flag)
688 return false;
689 if (!oneFunc)
690 return false;
691 f = oneFunc;
692 return true;
693}
694
695static bool useFuncSeen(const Constant *C,
697 for (const User *U : C->users()) {
698 if (const Constant *cu = dyn_cast<Constant>(U)) {
699 if (useFuncSeen(cu, seenMap))
700 return true;
701 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
702 const BasicBlock *bb = I->getParent();
703 if (!bb)
704 continue;
705 const Function *caller = bb->getParent();
706 if (!caller)
707 continue;
708 if (seenMap.contains(caller))
709 return true;
710 }
711 }
712 return false;
713}
714
715void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
717 for (const Function &F : M) {
718 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
719 emitDeclaration(&F, O);
720 continue;
721 }
722
723 if (F.isDeclaration()) {
724 if (F.use_empty())
725 continue;
726 if (F.getIntrinsicID())
727 continue;
728 emitDeclaration(&F, O);
729 continue;
730 }
731 for (const User *U : F.users()) {
732 if (const Constant *C = dyn_cast<Constant>(U)) {
733 if (usedInGlobalVarDef(C)) {
734 // The use is in the initialization of a global variable
735 // that is a function pointer, so print a declaration
736 // for the original function
737 emitDeclaration(&F, O);
738 break;
739 }
740 // Emit a declaration of this function if the function that
741 // uses this constant expr has already been seen.
742 if (useFuncSeen(C, seenMap)) {
743 emitDeclaration(&F, O);
744 break;
745 }
746 }
747
748 if (!isa<Instruction>(U))
749 continue;
750 const Instruction *instr = cast<Instruction>(U);
751 const BasicBlock *bb = instr->getParent();
752 if (!bb)
753 continue;
754 const Function *caller = bb->getParent();
755 if (!caller)
756 continue;
757
758 // If a caller has already been seen, then the caller is
759 // appearing in the module before the callee. so print out
760 // a declaration for the callee.
761 if (seenMap.contains(caller)) {
762 emitDeclaration(&F, O);
763 break;
764 }
765 }
766 seenMap[&F] = true;
767 }
768}
769
771 if (!GV) return true;
772 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
773 if (!InitList) return true; // Not an array; we don't know how to parse.
774 return InitList->getNumOperands() == 0;
775}
776
777void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
778 // Construct a default subtarget off of the TargetMachine defaults. The
779 // rest of NVPTX isn't friendly to change subtargets per function and
780 // so the default TargetMachine will have all of the options.
781 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
782 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
783 SmallString<128> Str1;
784 raw_svector_ostream OS1(Str1);
785
786 // Emit header before any dwarf directives are emitted below.
787 emitHeader(M, OS1, *STI);
788 OutStreamer->emitRawText(OS1.str());
789}
790
792 if (M.alias_size()) {
793 report_fatal_error("Module has aliases, which NVPTX does not support.");
794 return true; // error
795 }
796 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
797 !LowerCtorDtor) {
799 "Module has a nontrivial global ctor, which NVPTX does not support.");
800 return true; // error
801 }
802 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
803 !LowerCtorDtor) {
805 "Module has a nontrivial global dtor, which NVPTX does not support.");
806 return true; // error
807 }
808
809 // We need to call the parent's one explicitly.
810 bool Result = AsmPrinter::doInitialization(M);
811
812 GlobalsEmitted = false;
813
814 return Result;
815}
816
817void NVPTXAsmPrinter::emitGlobals(const Module &M) {
818 SmallString<128> Str2;
819 raw_svector_ostream OS2(Str2);
820
821 emitDeclarations(M, OS2);
822
823 // As ptxas does not support forward references of globals, we need to first
824 // sort the list of module-level globals in def-use order. We visit each
825 // global variable in order, and ensure that we emit it *after* its dependent
826 // globals. We use a little extra memory maintaining both a set and a list to
827 // have fast searches while maintaining a strict ordering.
831
832 // Visit each global variable, in order
833 for (const GlobalVariable &I : M.globals())
834 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
835
836 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
837 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
838
839 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
840 const NVPTXSubtarget &STI =
841 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
842
843 // Print out module-level global variables in proper order
844 for (unsigned i = 0, e = Globals.size(); i != e; ++i)
845 printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
846
847 OS2 << '\n';
848
849 OutStreamer->emitRawText(OS2.str());
850}
851
852void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
853 const NVPTXSubtarget &STI) {
854 O << "//\n";
855 O << "// Generated by LLVM NVPTX Back-End\n";
856 O << "//\n";
857 O << "\n";
858
859 unsigned PTXVersion = STI.getPTXVersion();
860 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
861
862 O << ".target ";
863 O << STI.getTargetName();
864
865 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
866 if (NTM.getDrvInterface() == NVPTX::NVCL)
867 O << ", texmode_independent";
868
869 bool HasFullDebugInfo = false;
870 for (DICompileUnit *CU : M.debug_compile_units()) {
871 switch(CU->getEmissionKind()) {
874 break;
877 HasFullDebugInfo = true;
878 break;
879 }
880 if (HasFullDebugInfo)
881 break;
882 }
883 if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
884 O << ", debug";
885
886 O << "\n";
887
888 O << ".address_size ";
889 if (NTM.is64Bit())
890 O << "64";
891 else
892 O << "32";
893 O << "\n";
894
895 O << "\n";
896}
897
899 bool HasDebugInfo = MMI && MMI->hasDebugInfo();
900
901 // If we did not emit any functions, then the global declarations have not
902 // yet been emitted.
903 if (!GlobalsEmitted) {
904 emitGlobals(M);
905 GlobalsEmitted = true;
906 }
907
908 // call doFinalization
909 bool ret = AsmPrinter::doFinalization(M);
910
912
913 auto *TS =
914 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
915 // Close the last emitted section
916 if (HasDebugInfo) {
917 TS->closeLastSection();
918 // Emit empty .debug_loc section for better support of the empty files.
919 OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
920 }
921
922 // Output last DWARF .file directives, if any.
923 TS->outputDwarfFileDirectives();
924
925 return ret;
926}
927
928// This function emits appropriate linkage directives for
929// functions and global variables.
930//
931// extern function declaration -> .extern
932// extern function definition -> .visible
933// external global variable with init -> .visible
934// external without init -> .extern
935// appending -> not allowed, assert.
936// for any linkage other than
937// internal, private, linker_private,
938// linker_private_weak, linker_private_weak_def_auto,
939// we emit -> .weak.
940
941void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
942 raw_ostream &O) {
943 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
944 if (V->hasExternalLinkage()) {
945 if (isa<GlobalVariable>(V)) {
946 const GlobalVariable *GVar = cast<GlobalVariable>(V);
947 if (GVar) {
948 if (GVar->hasInitializer())
949 O << ".visible ";
950 else
951 O << ".extern ";
952 }
953 } else if (V->isDeclaration())
954 O << ".extern ";
955 else
956 O << ".visible ";
957 } else if (V->hasAppendingLinkage()) {
958 std::string msg;
959 msg.append("Error: ");
960 msg.append("Symbol ");
961 if (V->hasName())
962 msg.append(std::string(V->getName()));
963 msg.append("has unsupported appending linkage type");
964 llvm_unreachable(msg.c_str());
965 } else if (!V->hasInternalLinkage() &&
966 !V->hasPrivateLinkage()) {
967 O << ".weak ";
968 }
969 }
970}
971
972void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
973 raw_ostream &O, bool processDemoted,
974 const NVPTXSubtarget &STI) {
975 // Skip meta data
976 if (GVar->hasSection()) {
977 if (GVar->getSection() == "llvm.metadata")
978 return;
979 }
980
981 // Skip LLVM intrinsic global variables
982 if (GVar->getName().startswith("llvm.") ||
983 GVar->getName().startswith("nvvm."))
984 return;
985
986 const DataLayout &DL = getDataLayout();
987
988 // GlobalVariables are always constant pointers themselves.
989 PointerType *PTy = GVar->getType();
990 Type *ETy = GVar->getValueType();
991
992 if (GVar->hasExternalLinkage()) {
993 if (GVar->hasInitializer())
994 O << ".visible ";
995 else
996 O << ".extern ";
997 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
999 GVar->hasCommonLinkage()) {
1000 O << ".weak ";
1001 }
1002
1003 if (isTexture(*GVar)) {
1004 O << ".global .texref " << getTextureName(*GVar) << ";\n";
1005 return;
1006 }
1007
1008 if (isSurface(*GVar)) {
1009 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1010 return;
1011 }
1012
1013 if (GVar->isDeclaration()) {
1014 // (extern) declarations, no definition or initializer
1015 // Currently the only known declaration is for an automatic __local
1016 // (.shared) promoted to global.
1017 emitPTXGlobalVariable(GVar, O, STI);
1018 O << ";\n";
1019 return;
1020 }
1021
1022 if (isSampler(*GVar)) {
1023 O << ".global .samplerref " << getSamplerName(*GVar);
1024
1025 const Constant *Initializer = nullptr;
1026 if (GVar->hasInitializer())
1027 Initializer = GVar->getInitializer();
1028 const ConstantInt *CI = nullptr;
1029 if (Initializer)
1030 CI = dyn_cast<ConstantInt>(Initializer);
1031 if (CI) {
1032 unsigned sample = CI->getZExtValue();
1033
1034 O << " = { ";
1035
1036 for (int i = 0,
1037 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1038 i < 3; i++) {
1039 O << "addr_mode_" << i << " = ";
1040 switch (addr) {
1041 case 0:
1042 O << "wrap";
1043 break;
1044 case 1:
1045 O << "clamp_to_border";
1046 break;
1047 case 2:
1048 O << "clamp_to_edge";
1049 break;
1050 case 3:
1051 O << "wrap";
1052 break;
1053 case 4:
1054 O << "mirror";
1055 break;
1056 }
1057 O << ", ";
1058 }
1059 O << "filter_mode = ";
1060 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1061 case 0:
1062 O << "nearest";
1063 break;
1064 case 1:
1065 O << "linear";
1066 break;
1067 case 2:
1068 llvm_unreachable("Anisotropic filtering is not supported");
1069 default:
1070 O << "nearest";
1071 break;
1072 }
1073 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1074 O << ", force_unnormalized_coords = 1";
1075 }
1076 O << " }";
1077 }
1078
1079 O << ";\n";
1080 return;
1081 }
1082
1083 if (GVar->hasPrivateLinkage()) {
1084 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1085 return;
1086
1087 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1088 if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1089 return;
1090 if (GVar->use_empty())
1091 return;
1092 }
1093
1094 const Function *demotedFunc = nullptr;
1095 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1096 O << "// " << GVar->getName() << " has been demoted\n";
1097 if (localDecls.find(demotedFunc) != localDecls.end())
1098 localDecls[demotedFunc].push_back(GVar);
1099 else {
1100 std::vector<const GlobalVariable *> temp;
1101 temp.push_back(GVar);
1102 localDecls[demotedFunc] = temp;
1103 }
1104 return;
1105 }
1106
1107 O << ".";
1108 emitPTXAddressSpace(PTy->getAddressSpace(), O);
1109
1110 if (isManaged(*GVar)) {
1111 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1113 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1114 }
1115 O << " .attribute(.managed)";
1116 }
1117
1118 if (MaybeAlign A = GVar->getAlign())
1119 O << " .align " << A->value();
1120 else
1121 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1122
1123 if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1124 (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1125 O << " .";
1126 // Special case: ABI requires that we use .u8 for predicates
1127 if (ETy->isIntegerTy(1))
1128 O << "u8";
1129 else
1130 O << getPTXFundamentalTypeStr(ETy, false);
1131 O << " ";
1132 getSymbol(GVar)->print(O, MAI);
1133
1134 // Ptx allows variable initilization only for constant and global state
1135 // spaces.
1136 if (GVar->hasInitializer()) {
1137 if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1138 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1139 const Constant *Initializer = GVar->getInitializer();
1140 // 'undef' is treated as there is no value specified.
1141 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1142 O << " = ";
1143 printScalarConstant(Initializer, O);
1144 }
1145 } else {
1146 // The frontend adds zero-initializer to device and constant variables
1147 // that don't have an initial value, and UndefValue to shared
1148 // variables, so skip warning for this case.
1149 if (!GVar->getInitializer()->isNullValue() &&
1150 !isa<UndefValue>(GVar->getInitializer())) {
1151 report_fatal_error("initial value of '" + GVar->getName() +
1152 "' is not allowed in addrspace(" +
1153 Twine(PTy->getAddressSpace()) + ")");
1154 }
1155 }
1156 }
1157 } else {
1158 uint64_t ElementSize = 0;
1159
1160 // Although PTX has direct support for struct type and array type and
1161 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1162 // targets that support these high level field accesses. Structs, arrays
1163 // and vectors are lowered into arrays of bytes.
1164 switch (ETy->getTypeID()) {
1165 case Type::IntegerTyID: // Integers larger than 64 bits
1166 case Type::StructTyID:
1167 case Type::ArrayTyID:
1169 ElementSize = DL.getTypeStoreSize(ETy);
1170 // Ptx allows variable initilization only for constant and
1171 // global state spaces.
1172 if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1173 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1174 GVar->hasInitializer()) {
1175 const Constant *Initializer = GVar->getInitializer();
1176 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1177 AggBuffer aggBuffer(ElementSize, *this);
1178 bufferAggregateConstant(Initializer, &aggBuffer);
1179 if (aggBuffer.numSymbols()) {
1180 unsigned int ptrSize = MAI->getCodePointerSize();
1181 if (ElementSize % ptrSize ||
1182 !aggBuffer.allSymbolsAligned(ptrSize)) {
1183 // Print in bytes and use the mask() operator for pointers.
1184 if (!STI.hasMaskOperator())
1186 "initialized packed aggregate with pointers '" +
1187 GVar->getName() +
1188 "' requires at least PTX ISA version 7.1");
1189 O << " .u8 ";
1190 getSymbol(GVar)->print(O, MAI);
1191 O << "[" << ElementSize << "] = {";
1192 aggBuffer.printBytes(O);
1193 O << "}";
1194 } else {
1195 O << " .u" << ptrSize * 8 << " ";
1196 getSymbol(GVar)->print(O, MAI);
1197 O << "[" << ElementSize / ptrSize << "] = {";
1198 aggBuffer.printWords(O);
1199 O << "}";
1200 }
1201 } else {
1202 O << " .b8 ";
1203 getSymbol(GVar)->print(O, MAI);
1204 O << "[" << ElementSize << "] = {";
1205 aggBuffer.printBytes(O);
1206 O << "}";
1207 }
1208 } else {
1209 O << " .b8 ";
1210 getSymbol(GVar)->print(O, MAI);
1211 if (ElementSize) {
1212 O << "[";
1213 O << ElementSize;
1214 O << "]";
1215 }
1216 }
1217 } else {
1218 O << " .b8 ";
1219 getSymbol(GVar)->print(O, MAI);
1220 if (ElementSize) {
1221 O << "[";
1222 O << ElementSize;
1223 O << "]";
1224 }
1225 }
1226 break;
1227 default:
1228 llvm_unreachable("type not supported yet");
1229 }
1230 }
1231 O << ";\n";
1232}
1233
1234void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1235 const Value *v = Symbols[nSym];
1236 const Value *v0 = SymbolsBeforeStripping[nSym];
1237 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1238 MCSymbol *Name = AP.getSymbol(GVar);
1239 PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1240 // Is v0 a generic pointer?
1241 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1242 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1243 os << "generic(";
1244 Name->print(os, AP.MAI);
1245 os << ")";
1246 } else {
1247 Name->print(os, AP.MAI);
1248 }
1249 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1250 const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
1251 AP.printMCExpr(*Expr, os);
1252 } else
1253 llvm_unreachable("symbol type unknown");
1254}
1255
1256void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1257 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1258 symbolPosInBuffer.push_back(size);
1259 unsigned int nSym = 0;
1260 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1261 for (unsigned int pos = 0; pos < size;) {
1262 if (pos)
1263 os << ", ";
1264 if (pos != nextSymbolPos) {
1265 os << (unsigned int)buffer[pos];
1266 ++pos;
1267 continue;
1268 }
1269 // Generate a per-byte mask() operator for the symbol, which looks like:
1270 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1271 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1272 std::string symText;
1273 llvm::raw_string_ostream oss(symText);
1274 printSymbol(nSym, oss);
1275 for (unsigned i = 0; i < ptrSize; ++i) {
1276 if (i)
1277 os << ", ";
1278 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1279 os << "(" << symText << ")";
1280 }
1281 pos += ptrSize;
1282 nextSymbolPos = symbolPosInBuffer[++nSym];
1283 assert(nextSymbolPos >= pos);
1284 }
1285}
1286
1287void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1288 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1289 symbolPosInBuffer.push_back(size);
1290 unsigned int nSym = 0;
1291 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1292 assert(nextSymbolPos % ptrSize == 0);
1293 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1294 if (pos)
1295 os << ", ";
1296 if (pos == nextSymbolPos) {
1297 printSymbol(nSym, os);
1298 nextSymbolPos = symbolPosInBuffer[++nSym];
1299 assert(nextSymbolPos % ptrSize == 0);
1300 assert(nextSymbolPos >= pos + ptrSize);
1301 } else if (ptrSize == 4)
1302 os << support::endian::read32le(&buffer[pos]);
1303 else
1304 os << support::endian::read64le(&buffer[pos]);
1305 }
1306}
1307
1308void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1309 if (localDecls.find(f) == localDecls.end())
1310 return;
1311
1312 std::vector<const GlobalVariable *> &gvars = localDecls[f];
1313
1314 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1315 const NVPTXSubtarget &STI =
1316 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1317
1318 for (const GlobalVariable *GV : gvars) {
1319 O << "\t// demoted variable\n\t";
1320 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1321 }
1322}
1323
1324void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1325 raw_ostream &O) const {
1326 switch (AddressSpace) {
1328 O << "local";
1329 break;
1331 O << "global";
1332 break;
1334 O << "const";
1335 break;
1337 O << "shared";
1338 break;
1339 default:
1340 report_fatal_error("Bad address space found while emitting PTX: " +
1342 break;
1343 }
1344}
1345
1346std::string
1347NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1348 switch (Ty->getTypeID()) {
1349 case Type::IntegerTyID: {
1350 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1351 if (NumBits == 1)
1352 return "pred";
1353 else if (NumBits <= 64) {
1354 std::string name = "u";
1355 return name + utostr(NumBits);
1356 } else {
1357 llvm_unreachable("Integer too large");
1358 break;
1359 }
1360 break;
1361 }
1362 case Type::HalfTyID:
1363 // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1364 return "b16";
1365 case Type::FloatTyID:
1366 return "f32";
1367 case Type::DoubleTyID:
1368 return "f64";
1369 case Type::PointerTyID: {
1370 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1371 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1372
1373 if (PtrSize == 64)
1374 if (useB4PTR)
1375 return "b64";
1376 else
1377 return "u64";
1378 else if (useB4PTR)
1379 return "b32";
1380 else
1381 return "u32";
1382 }
1383 default:
1384 break;
1385 }
1386 llvm_unreachable("unexpected type");
1387}
1388
1389void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1390 raw_ostream &O,
1391 const NVPTXSubtarget &STI) {
1392 const DataLayout &DL = getDataLayout();
1393
1394 // GlobalVariables are always constant pointers themselves.
1395 Type *ETy = GVar->getValueType();
1396
1397 O << ".";
1398 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1399 if (isManaged(*GVar)) {
1400 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1402 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1403 }
1404 O << " .attribute(.managed)";
1405 }
1406 if (MaybeAlign A = GVar->getAlign())
1407 O << " .align " << A->value();
1408 else
1409 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1410
1411 // Special case for i128
1412 if (ETy->isIntegerTy(128)) {
1413 O << " .b8 ";
1414 getSymbol(GVar)->print(O, MAI);
1415 O << "[16]";
1416 return;
1417 }
1418
1419 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1420 O << " .";
1421 O << getPTXFundamentalTypeStr(ETy);
1422 O << " ";
1423 getSymbol(GVar)->print(O, MAI);
1424 return;
1425 }
1426
1427 int64_t ElementSize = 0;
1428
1429 // Although PTX has direct support for struct type and array type and LLVM IR
1430 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1431 // support these high level field accesses. Structs and arrays are lowered
1432 // into arrays of bytes.
1433 switch (ETy->getTypeID()) {
1434 case Type::StructTyID:
1435 case Type::ArrayTyID:
1437 ElementSize = DL.getTypeStoreSize(ETy);
1438 O << " .b8 ";
1439 getSymbol(GVar)->print(O, MAI);
1440 O << "[";
1441 if (ElementSize) {
1442 O << ElementSize;
1443 }
1444 O << "]";
1445 break;
1446 default:
1447 llvm_unreachable("type not supported yet");
1448 }
1449}
1450
1451void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1452 const DataLayout &DL = getDataLayout();
1453 const AttributeList &PAL = F->getAttributes();
1454 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1455 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1456
1458 unsigned paramIndex = 0;
1459 bool first = true;
1460 bool isKernelFunc = isKernelFunction(*F);
1461 bool isABI = (STI.getSmVersion() >= 20);
1462 bool hasImageHandles = STI.hasImageHandles();
1463
1464 if (F->arg_empty() && !F->isVarArg()) {
1465 O << "()\n";
1466 return;
1467 }
1468
1469 O << "(\n";
1470
1471 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1472 Type *Ty = I->getType();
1473
1474 if (!first)
1475 O << ",\n";
1476
1477 first = false;
1478
1479 // Handle image/sampler parameters
1480 if (isKernelFunction(*F)) {
1481 if (isSampler(*I) || isImage(*I)) {
1482 if (isImage(*I)) {
1483 std::string sname = std::string(I->getName());
1484 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1485 if (hasImageHandles)
1486 O << "\t.param .u64 .ptr .surfref ";
1487 else
1488 O << "\t.param .surfref ";
1489 O << TLI->getParamName(F, paramIndex);
1490 }
1491 else { // Default image is read_only
1492 if (hasImageHandles)
1493 O << "\t.param .u64 .ptr .texref ";
1494 else
1495 O << "\t.param .texref ";
1496 O << TLI->getParamName(F, paramIndex);
1497 }
1498 } else {
1499 if (hasImageHandles)
1500 O << "\t.param .u64 .ptr .samplerref ";
1501 else
1502 O << "\t.param .samplerref ";
1503 O << TLI->getParamName(F, paramIndex);
1504 }
1505 continue;
1506 }
1507 }
1508
1509 auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1510 paramIndex](Type *Ty) -> Align {
1511 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1512 MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1513 return std::max(TypeAlign, ParamAlign.valueOrOne());
1514 };
1515
1516 if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1517 if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1518 // Just print .param .align <a> .b8 .param[size];
1519 // <a> = optimal alignment for the element type; always multiple of
1520 // PAL.getParamAlignment
1521 // size = typeallocsize of element type
1522 Align OptimalAlign = getOptimalAlignForParam(Ty);
1523
1524 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1525 O << TLI->getParamName(F, paramIndex);
1526 O << "[" << DL.getTypeAllocSize(Ty) << "]";
1527
1528 continue;
1529 }
1530 // Just a scalar
1531 auto *PTy = dyn_cast<PointerType>(Ty);
1532 unsigned PTySizeInBits = 0;
1533 if (PTy) {
1534 PTySizeInBits =
1535 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1536 assert(PTySizeInBits && "Invalid pointer size");
1537 }
1538
1539 if (isKernelFunc) {
1540 if (PTy) {
1541 // Special handling for pointer arguments to kernel
1542 O << "\t.param .u" << PTySizeInBits << " ";
1543
1544 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1545 NVPTX::CUDA) {
1546 int addrSpace = PTy->getAddressSpace();
1547 switch (addrSpace) {
1548 default:
1549 O << ".ptr ";
1550 break;
1552 O << ".ptr .const ";
1553 break;
1555 O << ".ptr .shared ";
1556 break;
1558 O << ".ptr .global ";
1559 break;
1560 }
1561 Align ParamAlign = I->getParamAlign().valueOrOne();
1562 O << ".align " << ParamAlign.value() << " ";
1563 }
1564 O << TLI->getParamName(F, paramIndex);
1565 continue;
1566 }
1567
1568 // non-pointer scalar to kernel func
1569 O << "\t.param .";
1570 // Special case: predicate operands become .u8 types
1571 if (Ty->isIntegerTy(1))
1572 O << "u8";
1573 else
1574 O << getPTXFundamentalTypeStr(Ty);
1575 O << " ";
1576 O << TLI->getParamName(F, paramIndex);
1577 continue;
1578 }
1579 // Non-kernel function, just print .param .b<size> for ABI
1580 // and .reg .b<size> for non-ABI
1581 unsigned sz = 0;
1582 if (isa<IntegerType>(Ty)) {
1583 sz = cast<IntegerType>(Ty)->getBitWidth();
1585 } else if (PTy) {
1586 assert(PTySizeInBits && "Invalid pointer size");
1587 sz = PTySizeInBits;
1588 } else if (Ty->isHalfTy())
1589 // PTX ABI requires all scalar parameters to be at least 32
1590 // bits in size. fp16 normally uses .b16 as its storage type
1591 // in PTX, so its size must be adjusted here, too.
1592 sz = 32;
1593 else
1594 sz = Ty->getPrimitiveSizeInBits();
1595 if (isABI)
1596 O << "\t.param .b" << sz << " ";
1597 else
1598 O << "\t.reg .b" << sz << " ";
1599 O << TLI->getParamName(F, paramIndex);
1600 continue;
1601 }
1602
1603 // param has byVal attribute.
1604 Type *ETy = PAL.getParamByValType(paramIndex);
1605 assert(ETy && "Param should have byval type");
1606
1607 if (isABI || isKernelFunc) {
1608 // Just print .param .align <a> .b8 .param[size];
1609 // <a> = optimal alignment for the element type; always multiple of
1610 // PAL.getParamAlignment
1611 // size = typeallocsize of element type
1612 Align OptimalAlign =
1613 isKernelFunc
1614 ? getOptimalAlignForParam(ETy)
1615 : TLI->getFunctionByValParamAlign(
1616 F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL);
1617
1618 unsigned sz = DL.getTypeAllocSize(ETy);
1619 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1620 O << TLI->getParamName(F, paramIndex);
1621 O << "[" << sz << "]";
1622 continue;
1623 } else {
1624 // Split the ETy into constituent parts and
1625 // print .param .b<size> <name> for each part.
1626 // Further, if a part is vector, print the above for
1627 // each vector element.
1628 SmallVector<EVT, 16> vtparts;
1629 ComputeValueVTs(*TLI, DL, ETy, vtparts);
1630 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1631 unsigned elems = 1;
1632 EVT elemtype = vtparts[i];
1633 if (vtparts[i].isVector()) {
1634 elems = vtparts[i].getVectorNumElements();
1635 elemtype = vtparts[i].getVectorElementType();
1636 }
1637
1638 for (unsigned j = 0, je = elems; j != je; ++j) {
1639 unsigned sz = elemtype.getSizeInBits();
1640 if (elemtype.isInteger())
1642 O << "\t.reg .b" << sz << " ";
1643 O << TLI->getParamName(F, paramIndex);
1644 if (j < je - 1)
1645 O << ",\n";
1646 ++paramIndex;
1647 }
1648 if (i < e - 1)
1649 O << ",\n";
1650 }
1651 --paramIndex;
1652 continue;
1653 }
1654 }
1655
1656 if (F->isVarArg()) {
1657 if (!first)
1658 O << ",\n";
1659 O << "\t.param .align " << STI.getMaxRequiredAlignment();
1660 O << " .b8 ";
1661 O << TLI->getParamName(F, /* vararg */ -1) << "[]";
1662 }
1663
1664 O << "\n)\n";
1665}
1666
1667void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1668 const MachineFunction &MF) {
1669 SmallString<128> Str;
1671
1672 // Map the global virtual register number to a register class specific
1673 // virtual register number starting from 1 with that class.
1675 //unsigned numRegClasses = TRI->getNumRegClasses();
1676
1677 // Emit the Fake Stack Object
1678 const MachineFrameInfo &MFI = MF.getFrameInfo();
1679 int NumBytes = (int) MFI.getStackSize();
1680 if (NumBytes) {
1681 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1682 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1683 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1684 O << "\t.reg .b64 \t%SP;\n";
1685 O << "\t.reg .b64 \t%SPL;\n";
1686 } else {
1687 O << "\t.reg .b32 \t%SP;\n";
1688 O << "\t.reg .b32 \t%SPL;\n";
1689 }
1690 }
1691
1692 // Go through all virtual registers to establish the mapping between the
1693 // global virtual
1694 // register number and the per class virtual register number.
1695 // We use the per class virtual register number in the ptx output.
1696 unsigned int numVRs = MRI->getNumVirtRegs();
1697 for (unsigned i = 0; i < numVRs; i++) {
1699 const TargetRegisterClass *RC = MRI->getRegClass(vr);
1700 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1701 int n = regmap.size();
1702 regmap.insert(std::make_pair(vr, n + 1));
1703 }
1704
1705 // Emit register declarations
1706 // @TODO: Extract out the real register usage
1707 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1708 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1709 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1710 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1711 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1712 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1713 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1714
1715 // Emit declaration of the virtual registers or 'physical' registers for
1716 // each register class
1717 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1718 const TargetRegisterClass *RC = TRI->getRegClass(i);
1719 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1720 std::string rcname = getNVPTXRegClassName(RC);
1721 std::string rcStr = getNVPTXRegClassStr(RC);
1722 int n = regmap.size();
1723
1724 // Only declare those registers that may be used.
1725 if (n) {
1726 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1727 << ">;\n";
1728 }
1729 }
1730
1731 OutStreamer->emitRawText(O.str());
1732}
1733
1734void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1735 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1736 bool ignored;
1737 unsigned int numHex;
1738 const char *lead;
1739
1740 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1741 numHex = 8;
1742 lead = "0f";
1744 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1745 numHex = 16;
1746 lead = "0d";
1748 } else
1749 llvm_unreachable("unsupported fp type");
1750
1751 APInt API = APF.bitcastToAPInt();
1752 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1753}
1754
1755void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1756 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1757 O << CI->getValue();
1758 return;
1759 }
1760 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1761 printFPConstant(CFP, O);
1762 return;
1763 }
1764 if (isa<ConstantPointerNull>(CPV)) {
1765 O << "0";
1766 return;
1767 }
1768 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1769 bool IsNonGenericPointer = false;
1770 if (GVar->getType()->getAddressSpace() != 0) {
1771 IsNonGenericPointer = true;
1772 }
1773 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1774 O << "generic(";
1775 getSymbol(GVar)->print(O, MAI);
1776 O << ")";
1777 } else {
1778 getSymbol(GVar)->print(O, MAI);
1779 }
1780 return;
1781 }
1782 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1783 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1784 printMCExpr(*E, O);
1785 return;
1786 }
1787 llvm_unreachable("Not scalar type found in printScalarConstant()");
1788}
1789
1790void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1791 AggBuffer *AggBuffer) {
1792 const DataLayout &DL = getDataLayout();
1793 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1794 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1795 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1796 // only the space allocated by CPV.
1797 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1798 return;
1799 }
1800
1801 // Helper for filling AggBuffer with APInts.
1802 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1803 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1804 SmallVector<unsigned char, 16> Buf(NumBytes);
1805 for (unsigned I = 0; I < NumBytes; ++I) {
1806 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1807 }
1808 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1809 };
1810
1811 switch (CPV->getType()->getTypeID()) {
1812 case Type::IntegerTyID:
1813 if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1814 AddIntToBuffer(CI->getValue());
1815 break;
1816 }
1817 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1818 if (const auto *CI =
1819 dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1820 AddIntToBuffer(CI->getValue());
1821 break;
1822 }
1823 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1824 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1825 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1826 AggBuffer->addZeros(AllocSize);
1827 break;
1828 }
1829 }
1830 llvm_unreachable("unsupported integer const type");
1831 break;
1832
1833 case Type::HalfTyID:
1834 case Type::BFloatTyID:
1835 case Type::FloatTyID:
1836 case Type::DoubleTyID:
1837 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1838 break;
1839
1840 case Type::PointerTyID: {
1841 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1842 AggBuffer->addSymbol(GVar, GVar);
1843 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1844 const Value *v = Cexpr->stripPointerCasts();
1845 AggBuffer->addSymbol(v, Cexpr);
1846 }
1847 AggBuffer->addZeros(AllocSize);
1848 break;
1849 }
1850
1851 case Type::ArrayTyID:
1853 case Type::StructTyID: {
1854 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1855 bufferAggregateConstant(CPV, AggBuffer);
1856 if (Bytes > AllocSize)
1857 AggBuffer->addZeros(Bytes - AllocSize);
1858 } else if (isa<ConstantAggregateZero>(CPV))
1859 AggBuffer->addZeros(Bytes);
1860 else
1861 llvm_unreachable("Unexpected Constant type");
1862 break;
1863 }
1864
1865 default:
1866 llvm_unreachable("unsupported type");
1867 }
1868}
1869
1870void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1871 AggBuffer *aggBuffer) {
1872 const DataLayout &DL = getDataLayout();
1873 int Bytes;
1874
1875 // Integers of arbitrary width
1876 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1877 APInt Val = CI->getValue();
1878 for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1879 uint8_t Byte = Val.getLoBits(8).getZExtValue();
1880 aggBuffer->addBytes(&Byte, 1, 1);
1881 Val.lshrInPlace(8);
1882 }
1883 return;
1884 }
1885
1886 // Old constants
1887 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1888 if (CPV->getNumOperands())
1889 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1890 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1891 return;
1892 }
1893
1894 if (const ConstantDataSequential *CDS =
1895 dyn_cast<ConstantDataSequential>(CPV)) {
1896 if (CDS->getNumElements())
1897 for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1898 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1899 aggBuffer);
1900 return;
1901 }
1902
1903 if (isa<ConstantStruct>(CPV)) {
1904 if (CPV->getNumOperands()) {
1905 StructType *ST = cast<StructType>(CPV->getType());
1906 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1907 if (i == (e - 1))
1908 Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1909 DL.getTypeAllocSize(ST) -
1910 DL.getStructLayout(ST)->getElementOffset(i);
1911 else
1912 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1913 DL.getStructLayout(ST)->getElementOffset(i);
1914 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1915 }
1916 }
1917 return;
1918 }
1919 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1920}
1921
1922/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1923/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1924/// expressions that are representable in PTX and create
1925/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1926const MCExpr *
1927NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1928 MCContext &Ctx = OutContext;
1929
1930 if (CV->isNullValue() || isa<UndefValue>(CV))
1931 return MCConstantExpr::create(0, Ctx);
1932
1933 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1934 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1935
1936 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1937 const MCSymbolRefExpr *Expr =
1939 if (ProcessingGeneric) {
1940 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1941 } else {
1942 return Expr;
1943 }
1944 }
1945
1946 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1947 if (!CE) {
1948 llvm_unreachable("Unknown constant value to lower!");
1949 }
1950
1951 switch (CE->getOpcode()) {
1952 default: {
1953 // If the code isn't optimized, there may be outstanding folding
1954 // opportunities. Attempt to fold the expression using DataLayout as a
1955 // last resort before giving up.
1957 if (C != CE)
1958 return lowerConstantForGV(C, ProcessingGeneric);
1959
1960 // Otherwise report the problem to the user.
1961 std::string S;
1963 OS << "Unsupported expression in static initializer: ";
1964 CE->printAsOperand(OS, /*PrintType=*/false,
1965 !MF ? nullptr : MF->getFunction().getParent());
1966 report_fatal_error(Twine(OS.str()));
1967 }
1968
1969 case Instruction::AddrSpaceCast: {
1970 // Strip the addrspacecast and pass along the operand
1971 PointerType *DstTy = cast<PointerType>(CE->getType());
1972 if (DstTy->getAddressSpace() == 0) {
1973 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1974 }
1975 std::string S;
1977 OS << "Unsupported expression in static initializer: ";
1978 CE->printAsOperand(OS, /*PrintType=*/ false,
1979 !MF ? nullptr : MF->getFunction().getParent());
1980 report_fatal_error(Twine(OS.str()));
1981 }
1982
1983 case Instruction::GetElementPtr: {
1984 const DataLayout &DL = getDataLayout();
1985
1986 // Generate a symbolic expression for the byte address
1987 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1988 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1989
1990 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1991 ProcessingGeneric);
1992 if (!OffsetAI)
1993 return Base;
1994
1995 int64_t Offset = OffsetAI.getSExtValue();
1997 Ctx);
1998 }
1999
2000 case Instruction::Trunc:
2001 // We emit the value and depend on the assembler to truncate the generated
2002 // expression properly. This is important for differences between
2003 // blockaddress labels. Since the two labels are in the same function, it
2004 // is reasonable to treat their delta as a 32-bit value.
2005 [[fallthrough]];
2006 case Instruction::BitCast:
2007 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2008
2009 case Instruction::IntToPtr: {
2010 const DataLayout &DL = getDataLayout();
2011
2012 // Handle casts to pointers by changing them into casts to the appropriate
2013 // integer type. This promotes constant folding and simplifies this code.
2014 Constant *Op = CE->getOperand(0);
2015 Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2016 false/*ZExt*/);
2017 return lowerConstantForGV(Op, ProcessingGeneric);
2018 }
2019
2020 case Instruction::PtrToInt: {
2021 const DataLayout &DL = getDataLayout();
2022
2023 // Support only foldable casts to/from pointers that can be eliminated by
2024 // changing the pointer to the appropriately sized integer type.
2025 Constant *Op = CE->getOperand(0);
2026 Type *Ty = CE->getType();
2027
2028 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2029
2030 // We can emit the pointer value into this slot if the slot is an
2031 // integer slot equal to the size of the pointer.
2032 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2033 return OpExpr;
2034
2035 // Otherwise the pointer is smaller than the resultant integer, mask off
2036 // the high bits so we are sure to get a proper truncation if the input is
2037 // a constant expr.
2038 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2039 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2040 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2041 }
2042
2043 // The MC library also has a right-shift operator, but it isn't consistently
2044 // signed or unsigned between different targets.
2045 case Instruction::Add: {
2046 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2047 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2048 switch (CE->getOpcode()) {
2049 default: llvm_unreachable("Unknown binary operator constant cast expr");
2050 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2051 }
2052 }
2053 }
2054}
2055
2056// Copy of MCExpr::print customized for NVPTX
2057void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2058 switch (Expr.getKind()) {
2059 case MCExpr::Target:
2060 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2061 case MCExpr::Constant:
2062 OS << cast<MCConstantExpr>(Expr).getValue();
2063 return;
2064
2065 case MCExpr::SymbolRef: {
2066 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2067 const MCSymbol &Sym = SRE.getSymbol();
2068 Sym.print(OS, MAI);
2069 return;
2070 }
2071
2072 case MCExpr::Unary: {
2073 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2074 switch (UE.getOpcode()) {
2075 case MCUnaryExpr::LNot: OS << '!'; break;
2076 case MCUnaryExpr::Minus: OS << '-'; break;
2077 case MCUnaryExpr::Not: OS << '~'; break;
2078 case MCUnaryExpr::Plus: OS << '+'; break;
2079 }
2080 printMCExpr(*UE.getSubExpr(), OS);
2081 return;
2082 }
2083
2084 case MCExpr::Binary: {
2085 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2086
2087 // Only print parens around the LHS if it is non-trivial.
2088 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2089 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2090 printMCExpr(*BE.getLHS(), OS);
2091 } else {
2092 OS << '(';
2093 printMCExpr(*BE.getLHS(), OS);
2094 OS<< ')';
2095 }
2096
2097 switch (BE.getOpcode()) {
2098 case MCBinaryExpr::Add:
2099 // Print "X-42" instead of "X+-42".
2100 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2101 if (RHSC->getValue() < 0) {
2102 OS << RHSC->getValue();
2103 return;
2104 }
2105 }
2106
2107 OS << '+';
2108 break;
2109 default: llvm_unreachable("Unhandled binary operator");
2110 }
2111
2112 // Only print parens around the LHS if it is non-trivial.
2113 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2114 printMCExpr(*BE.getRHS(), OS);
2115 } else {
2116 OS << '(';
2117 printMCExpr(*BE.getRHS(), OS);
2118 OS << ')';
2119 }
2120 return;
2121 }
2122 }
2123
2124 llvm_unreachable("Invalid expression kind!");
2125}
2126
2127/// PrintAsmOperand - Print out an operand for an inline asm expression.
2128///
2129bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2130 const char *ExtraCode, raw_ostream &O) {
2131 if (ExtraCode && ExtraCode[0]) {
2132 if (ExtraCode[1] != 0)
2133 return true; // Unknown modifier.
2134
2135 switch (ExtraCode[0]) {
2136 default:
2137 // See if this is a generic print operand
2138 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2139 case 'r':
2140 break;
2141 }
2142 }
2143
2144 printOperand(MI, OpNo, O);
2145
2146 return false;
2147}
2148
2149bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2150 unsigned OpNo,
2151 const char *ExtraCode,
2152 raw_ostream &O) {
2153 if (ExtraCode && ExtraCode[0])
2154 return true; // Unknown modifier
2155
2156 O << '[';
2157 printMemOperand(MI, OpNo, O);
2158 O << ']';
2159
2160 return false;
2161}
2162
2163void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2164 raw_ostream &O) {
2165 const MachineOperand &MO = MI->getOperand(opNum);
2166 switch (MO.getType()) {
2168 if (MO.getReg().isPhysical()) {
2169 if (MO.getReg() == NVPTX::VRDepot)
2171 else
2173 } else {
2174 emitVirtualRegister(MO.getReg(), O);
2175 }
2176 break;
2177
2179 O << MO.getImm();
2180 break;
2181
2183 printFPConstant(MO.getFPImm(), O);
2184 break;
2185
2187 PrintSymbolOperand(MO, O);
2188 break;
2189
2191 MO.getMBB()->getSymbol()->print(O, MAI);
2192 break;
2193
2194 default:
2195 llvm_unreachable("Operand type not supported.");
2196 }
2197}
2198
2199void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2200 raw_ostream &O, const char *Modifier) {
2201 printOperand(MI, opNum, O);
2202
2203 if (Modifier && strcmp(Modifier, "add") == 0) {
2204 O << ", ";
2205 printOperand(MI, opNum + 1, O);
2206 } else {
2207 if (MI->getOperand(opNum + 1).isImm() &&
2208 MI->getOperand(opNum + 1).getImm() == 0)
2209 return; // don't print ',0' or '+0'
2210 O << "+";
2211 printOperand(MI, opNum + 1, O);
2212 }
2213}
2214
2215// Force static initialization.
2219}
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
Symbol * Sym
Definition: ELF_riscv.cpp:463
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
static bool isEmptyXXStructor(GlobalVariable *GV)
#define DEPOTNAME
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
static bool usedInGlobalVarDef(const Constant *C)
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
static cl::opt< bool > LowerCtorDtor("nvptx-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(false), cl::Hidden)
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const char * name
Definition: SMEABIPass.cpp:49
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
@ Globals
Definition: TextStubV5.cpp:115
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5137
APInt bitcastToAPInt() const
Definition: APFloat.h:1184
Class for arbitrary precision integers.
Definition: APInt.h:75
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:604
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:846
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:663
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:399
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:87
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:90
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:102
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:429
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:376
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:121
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:105
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:94
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:399
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:99
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:384
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:403
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:394
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
ConstantArray - Constant Array Declarations.
Definition: Constants.h:408
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:568
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:997
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2051
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
const APFloat & getValueAPF() const
Definition: Constants.h:296
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:136
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:117
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:109
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:510
bool hasExternalLinkage() const
Definition: GlobalValue.h:506
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:273
bool hasPrivateLinkage() const
Definition: GlobalValue.h:522
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
bool hasInternalLinkage() const
Definition: GlobalValue.h:521
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:290
bool hasWeakLinkage() const
Definition: GlobalValue.h:517
bool hasCommonLinkage() const
Definition: GlobalValue.h:527
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:507
Type * getValueType() const
Definition: GlobalValue.h:292
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:550
Binary assembler expressions.
Definition: MCExpr.h:481
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:628
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:631
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:530
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:525
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:625
@ Add
Addition.
Definition: MCExpr.h:484
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:76
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
@ Unary
Unary expressions.
Definition: MCExpr.h:41
@ Constant
Constant expressions.
Definition: MCExpr.h:39
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
@ Target
Target specific expression.
Definition: MCExpr.h:42
@ Binary
Binary expressions.
Definition: MCExpr.h:38
ExprKind getKind() const
Definition: MCExpr.h:81
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
const MCSymbol & getSymbol() const
Definition: MCExpr.h:399
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:386
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
Unary assembler expressions.
Definition: MCExpr.h:425
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:468
@ Minus
Unary minus.
Definition: MCExpr.h:429
@ Plus
Unary plus.
Definition: MCExpr.h:431
@ Not
Bitwise negation.
Definition: MCExpr.h:430
@ LNot
Logical negation.
Definition: MCExpr.h:428
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:471
Metadata node.
Definition: Metadata.h:950
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
bool hasDebugInfo() const
Returns true if valid debug info is present.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:43
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:48
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:53
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:54
static const char * getRegisterName(MCRegister Reg)
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
bool hasImageHandles() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
UniqueStringSaver & getStrPool() const
Implments NVPTX-specific streamer.
void closeLastSection()
Close last section.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:693
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool startswith(StringRef Prefix) const
Definition: StringRef.h:261
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
Class to represent struct types.
Definition: DerivedTypes.h:213
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getPointerSizeInBits(unsigned AS) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:256
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ VoidTyID
type with no size
Definition: Type.h:63
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ StructTyID
Structures.
Definition: Type.h:74
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition: Type.h:57
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ PointerTyID
Pointers.
Definition: Type.h:73
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:295
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:244
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:229
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
StringRef save(const char *S)
Definition: StringSaver.h:52
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool erase(const ValueT &V)
Definition: DenseSet.h:101
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ NVCL
Definition: NVPTX.h:77
@ CUDA
Definition: NVPTX.h:78
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
constexpr double e
Definition: MathExtras.h:31
uint64_t read64le(const void *P)
Definition: Endian.h:382
uint32_t read32le(const void *P)
Definition: Endian.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< TypeSize > *Offsets, TypeSize StartingOffset)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:122
@ Offset
Definition: DWP.cpp:440
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1777
std::string getSamplerName(const Value &val)
AddressSpace
Definition: NVPTXBaseInfo.h:21
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool getMinCTASm(const Function &F, unsigned &x)
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isImage(const Value &val)
bool getMaxNTIDz(const Function &F, unsigned &z)
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
bool isManaged(const Value &val)
unsigned promoteScalarArgumentSize(unsigned size)
bool isSurface(const Value &val)
void clearAnnotationCache(const Module *Mod)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
std::string getSurfaceName(const Value &val)
bool getReqNTIDx(const Function &F, unsigned &x)
bool getReqNTIDy(const Function &F, unsigned &y)
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:199
bool getMaxNReg(const Function &F, unsigned &x)
bool isTexture(const Value &val)
bool isImageWriteOnly(const Value &val)
bool isImageReadWrite(const Value &val)
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
std::string getTextureName(const Value &val)
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
bool getReqNTIDz(const Function &F, unsigned &z)
bool getMaxNTIDx(const Function &F, unsigned &x)
bool getMaxNTIDy(const Function &F, unsigned &y)
bool isSampler(const Value &val)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:901
Target & getTheNVPTXTarget32()
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:244
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:225
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:245
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...