97#define DEPOTNAME "__local_depot"
100 assert(V.hasName() &&
"Found texture variable with no name");
105 assert(V.hasName() &&
"Found surface variable with no name");
110 assert(V.hasName() &&
"Found sampler variable with no name");
125 for (
const auto &O : U->operands())
138 if (Visited.
count(GV))
142 if (!Visiting.
insert(GV).second)
147 for (
const auto &O : GV->
operands())
160 NVPTX_MC::verifyInstructionPredicates(
MI->getOpcode(),
164 lowerToMCInst(
MI, Inst);
171 if (
MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
172 const MachineOperand &MO =
MI->getOperand(0);
178 for (
const auto MO :
MI->operands())
223unsigned NVPTXAsmPrinter::encodeVirtualRegister(
unsigned Reg) {
225 const TargetRegisterClass *RC = MRI->getRegClass(
Reg);
227 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
228 unsigned RegNum = RegMap[
Reg];
233 if (RC == &NVPTX::B1RegClass) {
235 }
else if (RC == &NVPTX::B16RegClass) {
237 }
else if (RC == &NVPTX::B32RegClass) {
239 }
else if (RC == &NVPTX::B64RegClass) {
241 }
else if (RC == &NVPTX::B128RegClass) {
248 Ret |= (RegNum & 0x0FFFFFFF);
253 return Reg & 0x0FFFFFFF;
265 const NVPTXSubtarget &STI =
TM.getSubtarget<NVPTXSubtarget>(*F);
268 Type *Ty = F->getReturnType();
273 auto PrintScalarRetVal = [&](
unsigned Size) {
277 const unsigned TotalSize =
DL.getTypeAllocSize(Ty);
278 const Align RetAlignment =
280 O <<
".param .align " << RetAlignment.
value() <<
" .b8 func_retval0["
285 PrintScalarRetVal(ITy->getBitWidth());
287 PrintScalarRetVal(TLI->getPointerTy(
DL).getSizeInBits());
296 printReturnValStr(&F, O);
301bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
316 if (
const BasicBlock *PBB = PMBB->getBasicBlock()) {
318 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
321 if (MDNode *UnrollCountMD =
335 if (isLoopHeaderOfNoUnroll(
MBB))
336 OutStreamer->emitRawText(StringRef(
"\t.pragma \"nounroll\";\n"));
340 SmallString<128> Str;
341 raw_svector_ostream
O(Str);
343 if (!GlobalsEmitted) {
344 emitGlobals(*
MF->getFunction().getParent());
345 GlobalsEmitted =
true;
349 MRI = &
MF->getRegInfo();
350 F = &
MF->getFunction();
351 emitLinkageDirective(F, O);
356 printReturnValStr(*
MF, O);
361 emitFunctionParamList(F, O);
365 emitKernelFunctionDirectives(*F, O);
375 setAndEmitFunctionVirtualRegisters(*
MF);
376 encodeDebugInfoRegisterNumbers(*
MF);
378 if (
const DISubprogram *SP =
MF->getFunction().getSubprogram()) {
380 if (!
SP->getUnit()->isDebugDirectivesOnly())
413void NVPTXAsmPrinter::emitImplicitDef(
const MachineInstr *
MI)
const {
426void NVPTXAsmPrinter::emitKernelFunctionDirectives(
const Function &
F,
433 O <<
formatv(
".reqntid {0:$[, ]}\n",
438 O <<
formatv(
".maxntid {0:$[, ]}\n",
442 O <<
".minnctapersm " << *Mincta <<
"\n";
445 O <<
".maxnreg " << *Maxnreg <<
"\n";
449 const NVPTXTargetMachine &NTM =
static_cast<const NVPTXTargetMachine &
>(
TM);
450 const NVPTXSubtarget *STI = &NTM.
getSubtarget<NVPTXSubtarget>(F);
458 if (!BlocksAreClusters)
459 O <<
".explicitcluster\n";
461 if (ClusterDim[0] != 0) {
463 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
464 "should be non-zero as well");
466 O <<
formatv(
".reqnctapercluster {0:$[, ]}\n",
470 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
471 "should be 0 as well");
475 if (BlocksAreClusters) {
476 LLVMContext &Ctx = F.getContext();
478 Ctx.
diagnose(DiagnosticInfoUnsupported(
479 F,
"blocksareclusters requires reqntid and cluster_dim attributes",
482 Ctx.
diagnose(DiagnosticInfoUnsupported(
483 F,
"blocksareclusters requires PTX version >= 9.0",
486 O <<
".blocksareclusters\n";
490 O <<
".maxclusterrank " << *Maxclusterrank <<
"\n";
501 assert(
I != VRegMapping.end() &&
"Bad register class");
505 assert(VI != RegMap.
end() &&
"Bad virtual register");
506 unsigned MappedVR = VI->second;
513void NVPTXAsmPrinter::emitVirtualRegister(
unsigned int vr,
518void NVPTXAsmPrinter::emitAliasDeclaration(
const GlobalAlias *GA,
523 "NVPTX aliasee must be a non-kernel function definition");
533 emitDeclarationWithName(F,
getSymbol(F), O);
538 emitLinkageDirective(F, O);
543 printReturnValStr(F, O);
546 emitFunctionParamList(F, O);
558 return GV->getName() !=
"llvm.used";
560 for (
const User *U :
C->users())
570 if (OtherGV->getName() ==
"llvm.used")
574 if (
const Function *CurFunc =
I->getFunction()) {
575 if (OneFunc && (CurFunc != OneFunc))
616 for (
const User *U :
C->users()) {
621 if (
const Function *Caller =
I->getFunction())
630 SmallPtrSet<const Function *, 32> SeenSet;
631 for (
const Function &F : M) {
632 if (F.getAttributes().hasFnAttr(
"nvptx-libcall-callee")) {
633 emitDeclaration(&F, O);
637 if (F.isDeclaration()) {
640 if (F.getIntrinsicID())
642 emitDeclaration(&F, O);
645 for (
const User *U : F.users()) {
651 emitDeclaration(&F, O);
657 emitDeclaration(&F, O);
672 emitDeclaration(&F, O);
678 for (
const GlobalAlias &GA :
M.aliases())
679 emitAliasDeclaration(&GA, O);
682void NVPTXAsmPrinter::emitStartOfAsmFile(
Module &M) {
686 const NVPTXTargetMachine &NTM =
static_cast<const NVPTXTargetMachine &
>(
TM);
688 SmallString<128> Str1;
689 raw_svector_ostream OS1(Str1);
692 emitHeader(M, OS1, *STI);
710 GlobalsEmitted =
false;
715void NVPTXAsmPrinter::emitGlobals(
const Module &M) {
719 emitDeclarations(M, OS2);
734 assert(GVVisited.
size() == M.global_size() &&
"Missed a global variable");
735 assert(GVVisiting.
size() == 0 &&
"Did not fully process a global variable");
742 printModuleLevelGV(GV, OS2,
false, STI);
766 "// Generated by LLVM NVPTX Back-End\n"
769 <<
".version " << (PTXVersion / 10) <<
"." << (PTXVersion % 10) <<
"\n"
772 const NVPTXTargetMachine &NTM =
static_cast<const NVPTXTargetMachine &
>(
TM);
774 O <<
", texmode_independent";
776 bool HasFullDebugInfo =
false;
777 for (DICompileUnit *CU :
M.debug_compile_units()) {
778 switch(CU->getEmissionKind()) {
784 HasFullDebugInfo =
true;
787 if (HasFullDebugInfo)
790 if (HasFullDebugInfo)
794 <<
".address_size " << (NTM.
is64Bit() ?
"64" :
"32") <<
"\n"
801 if (!GlobalsEmitted) {
803 GlobalsEmitted =
true;
815 TS->closeLastSection();
817 OutStreamer->emitRawText(
"\t.section\t.debug_macinfo\t{\t}");
839void NVPTXAsmPrinter::emitLinkageDirective(
const GlobalValue *V,
842 if (V->hasExternalLinkage()) {
844 O << (GVar->hasInitializer() ?
".visible " :
".extern ");
845 else if (V->isDeclaration())
849 }
else if (V->hasAppendingLinkage()) {
851 "' has unsupported appending linkage type");
852 }
else if (!
V->hasInternalLinkage() && !
V->hasPrivateLinkage()) {
858void NVPTXAsmPrinter::printModuleLevelGV(
const GlobalVariable *GVar,
906 emitPTXGlobalVariable(GVar, O, STI);
914 const Constant *Initializer =
nullptr;
917 const ConstantInt *CI =
nullptr;
928 O <<
"addr_mode_" << i <<
" = ";
934 O <<
"clamp_to_border";
937 O <<
"clamp_to_edge";
948 O <<
"filter_mode = ";
963 O <<
", force_unnormalized_coords = 1";
983 const Function *DemotedFunc =
nullptr;
985 O <<
"// " << GVar->
getName() <<
" has been demoted\n";
986 localDecls[DemotedFunc].push_back(GVar);
996 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
997 O <<
" .attribute(.managed)";
1001 << GVar->
getAlign().value_or(
DL.getPrefTypeAlign(ETy)).value();
1010 O << getPTXFundamentalTypeStr(ETy,
false);
1023 printScalarConstant(Initializer, O);
1032 "' is not allowed in addrspace(" +
1048 const uint64_t ElementSize =
DL.getTypeStoreSize(ETy);
1056 AggBuffer aggBuffer(ElementSize, *
this);
1057 bufferAggregateConstant(Initializer, &aggBuffer);
1058 if (aggBuffer.numSymbols()) {
1059 const unsigned int ptrSize =
MAI->getCodePointerSize();
1060 if (ElementSize % ptrSize ||
1061 !aggBuffer.allSymbolsAligned(ptrSize)) {
1065 "initialized packed aggregate with pointers '" +
1067 "' requires at least PTX ISA version 7.1");
1070 O <<
"[" << ElementSize <<
"] = {";
1071 aggBuffer.printBytes(O);
1074 O <<
" .u" << ptrSize * 8 <<
" ";
1076 O <<
"[" << ElementSize / ptrSize <<
"] = {";
1077 aggBuffer.printWords(O);
1083 O <<
"[" << ElementSize <<
"] = {";
1084 aggBuffer.printBytes(O);
1091 O <<
"[" << ElementSize <<
"]";
1097 O <<
"[" << ElementSize <<
"]";
1108void NVPTXAsmPrinter::AggBuffer::printSymbol(
unsigned nSym,
raw_ostream &os) {
1109 const Value *
v = Symbols[nSym];
1110 const Value *v0 = SymbolsBeforeStripping[nSym];
1115 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1118 Name->print(os, AP.MAI);
1121 Name->print(os, AP.MAI);
1124 const MCExpr *Expr = AP.lowerConstantForGV(CExpr,
false);
1125 AP.printMCExpr(*Expr, os);
1130void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1131 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1136 unsigned int InitializerCount =
size;
1139 if (numSymbols() == 0)
1140 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1143 symbolPosInBuffer.push_back(InitializerCount);
1144 unsigned int nSym = 0;
1145 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1146 for (
unsigned int pos = 0; pos < InitializerCount;) {
1149 if (pos != nextSymbolPos) {
1150 os << (
unsigned int)buffer[pos];
1157 std::string symText;
1158 llvm::raw_string_ostream oss(symText);
1159 printSymbol(nSym, oss);
1160 for (
unsigned i = 0; i < ptrSize; ++i) {
1164 os <<
"(" << symText <<
")";
1167 nextSymbolPos = symbolPosInBuffer[++nSym];
1168 assert(nextSymbolPos >= pos);
1172void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1173 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1174 symbolPosInBuffer.push_back(
size);
1175 unsigned int nSym = 0;
1176 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1177 assert(nextSymbolPos % ptrSize == 0);
1178 for (
unsigned int pos = 0; pos <
size; pos += ptrSize) {
1181 if (pos == nextSymbolPos) {
1182 printSymbol(nSym, os);
1183 nextSymbolPos = symbolPosInBuffer[++nSym];
1184 assert(nextSymbolPos % ptrSize == 0);
1185 assert(nextSymbolPos >= pos + ptrSize);
1186 }
else if (ptrSize == 4)
1193void NVPTXAsmPrinter::emitDemotedVars(
const Function *F, raw_ostream &O) {
1194 auto It = localDecls.find(F);
1195 if (It == localDecls.end())
1200 const NVPTXTargetMachine &NTM =
static_cast<const NVPTXTargetMachine &
>(
TM);
1203 for (
const GlobalVariable *GV : GVars) {
1204 O <<
"\t// demoted variable\n\t";
1205 printModuleLevelGV(GV, O,
true, STI);
1209void NVPTXAsmPrinter::emitPTXAddressSpace(
unsigned int AddressSpace,
1210 raw_ostream &O)
const {
1232NVPTXAsmPrinter::getPTXFundamentalTypeStr(
Type *Ty,
bool useB4PTR)
const {
1238 if (NumBits <= 64) {
1239 std::string
name =
"u";
1256 assert((PtrSize == 64 || PtrSize == 32) &&
"Unexpected pointer size");
1274void NVPTXAsmPrinter::emitPTXGlobalVariable(
const GlobalVariable *GVar,
1276 const NVPTXSubtarget &STI) {
1287 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1289 O <<
" .attribute(.managed)";
1292 << GVar->
getAlign().value_or(
DL.getPrefTypeAlign(ETy)).value();
1303 O <<
" ." << getPTXFundamentalTypeStr(ETy) <<
" ";
1308 int64_t ElementSize = 0;
1318 ElementSize =
DL.getTypeStoreSize(ETy);
1332void NVPTXAsmPrinter::emitFunctionParamList(
const Function *F, raw_ostream &O) {
1334 const NVPTXSubtarget &STI =
TM.getSubtarget<NVPTXSubtarget>(*F);
1336 const NVPTXMachineFunctionInfo *MFI =
1337 MF ?
MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1339 bool IsFirst =
true;
1342 if (F->arg_empty() && !F->isVarArg()) {
1349 for (
const Argument &Arg : F->args()) {
1350 Type *Ty = Arg.getType();
1351 const std::string ParamSym = TLI->getParamName(F, Arg.getArgNo());
1367 switch (ArgOpaqueType) {
1369 O <<
".samplerref ";
1385 auto GetOptimalAlignForParam = [&
DL, F, &Arg](
Type *Ty) -> Align {
1386 if (MaybeAlign StackAlign =
1387 getAlign(*F, Arg.getArgNo() + AttributeList::FirstArgIndex))
1388 return StackAlign.value();
1391 MaybeAlign ParamAlign =
1392 Arg.hasByValAttr() ? Arg.getParamAlign() : MaybeAlign();
1393 return std::max(TypeAlign, ParamAlign.
valueOrOne());
1396 if (Arg.hasByValAttr()) {
1398 Type *ETy = Arg.getParamByValType();
1399 assert(ETy &&
"Param should have byval type");
1405 const Align OptimalAlign =
1406 IsKernelFunc ? GetOptimalAlignForParam(ETy)
1408 F, ETy, Arg.getParamAlign().valueOrOne(),
DL);
1410 O <<
"\t.param .align " << OptimalAlign.
value() <<
" .b8 " << ParamSym
1411 <<
"[" <<
DL.getTypeAllocSize(ETy) <<
"]";
1420 Align OptimalAlign = GetOptimalAlignForParam(Ty);
1422 O <<
"\t.param .align " << OptimalAlign.
value() <<
" .b8 " << ParamSym
1423 <<
"[" <<
DL.getTypeAllocSize(Ty) <<
"]";
1429 unsigned PTySizeInBits = 0;
1432 TLI->getPointerTy(
DL, PTy->getAddressSpace()).getSizeInBits();
1433 assert(PTySizeInBits &&
"Invalid pointer size");
1438 O <<
"\t.param .u" << PTySizeInBits <<
" .ptr";
1440 switch (PTy->getAddressSpace()) {
1457 O <<
" .align " << Arg.getParamAlign().valueOrOne().value() <<
" "
1468 O << getPTXFundamentalTypeStr(Ty);
1469 O <<
" " << ParamSym;
1478 assert(PTySizeInBits &&
"Invalid pointer size");
1479 Size = PTySizeInBits;
1482 O <<
"\t.param .b" <<
Size <<
" " << ParamSym;
1485 if (F->isVarArg()) {
1489 << TLI->getParamName(F, -1) <<
"[]";
1495void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1496 const MachineFunction &
MF) {
1497 SmallString<128> Str;
1498 raw_svector_ostream
O(Str);
1502 const TargetRegisterInfo *
TRI =
MF.getSubtarget().getRegisterInfo();
1505 const MachineFrameInfo &MFI =
MF.getFrameInfo();
1510 if (
static_cast<const NVPTXTargetMachine &
>(
MF.getTarget()).is64Bit()) {
1511 O <<
"\t.reg .b64 \t%SP;\n"
1512 <<
"\t.reg .b64 \t%SPL;\n";
1514 O <<
"\t.reg .b32 \t%SP;\n"
1515 <<
"\t.reg .b32 \t%SPL;\n";
1523 for (
unsigned I :
llvm::seq(MRI->getNumVirtRegs())) {
1525 if (MRI->use_empty(VR) && MRI->def_empty(VR))
1527 auto &RCRegMap = VRegMapping[MRI->getRegClass(VR)];
1528 RCRegMap[VR] = RCRegMap.size() + 1;
1533 for (
const TargetRegisterClass *RC :
TRI->regclasses()) {
1534 const unsigned N = VRegMapping[RC].size();
1540 O <<
"\t.reg " << RCName <<
" \t" << RCStr <<
"<" << (
N + 1) <<
">;\n";
1549void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1550 const MachineFunction &
MF) {
1551 const NVPTXSubtarget &STI =
MF.getSubtarget<NVPTXSubtarget>();
1559 for (
auto &classMap : VRegMapping) {
1560 for (
auto ®isterMapping : classMap.getSecond()) {
1561 auto reg = registerMapping.getFirst();
1567void NVPTXAsmPrinter::printFPConstant(
const ConstantFP *Fp,
1568 raw_ostream &O)
const {
1571 unsigned int numHex;
1589void NVPTXAsmPrinter::printScalarConstant(
const Constant *CPV, raw_ostream &O) {
1595 printFPConstant(CFP, O);
1604 if (EmitGeneric && !
isa<Function>(CPV) && !IsNonGenericPointer) {
1621void NVPTXAsmPrinter::bufferLEByte(
const Constant *CPV,
int Bytes,
1622 AggBuffer *AggBuffer) {
1624 int AllocSize =
DL.getTypeAllocSize(CPV->
getType());
1628 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1633 auto AddIntToBuffer = [AggBuffer, Bytes](
const APInt &Val) {
1634 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1640 for (
unsigned I = 0;
I < NumBytes - 1; ++
I) {
1641 Buf[
I] = Val.extractBitsAsZExtValue(8,
I * 8);
1643 size_t LastBytePosition = (NumBytes - 1) * 8;
1644 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1646 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1647 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1657 if (
const auto *CI =
1662 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1663 Value *
V = Cexpr->getOperand(0)->stripPointerCasts();
1664 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1665 AggBuffer->addZeros(AllocSize);
1681 AggBuffer->addSymbol(GVar, GVar);
1683 const Value *
v = Cexpr->stripPointerCasts();
1684 AggBuffer->addSymbol(v, Cexpr);
1686 AggBuffer->addZeros(AllocSize);
1694 bufferAggregateConstant(CPV, AggBuffer);
1695 if (Bytes > AllocSize)
1696 AggBuffer->addZeros(Bytes - AllocSize);
1698 AggBuffer->addZeros(Bytes);
1709void NVPTXAsmPrinter::bufferAggregateConstant(
const Constant *CPV,
1710 AggBuffer *aggBuffer) {
1713 auto ExtendBuffer = [](APInt Val, AggBuffer *Buffer) {
1720 ExtendBuffer(CI->
getValue(), aggBuffer);
1726 if (CFP->getType()->isFP128Ty()) {
1727 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1740 for (
unsigned I :
llvm::seq(CDS->getNumElements()))
1741 bufferLEByte(
cast<Constant>(CDS->getElementAsConstant(
I)), 0, aggBuffer);
1750 ?
DL.getStructLayout(ST)->getElementOffset(0) +
1751 DL.getTypeAllocSize(ST)
1752 :
DL.getStructLayout(ST)->getElementOffset(
I + 1);
1753 int Bytes = EndOffset -
DL.getStructLayout(ST)->getElementOffset(
I);
1767NVPTXAsmPrinter::lowerConstantForGV(
const Constant *CV,
1768 bool ProcessingGeneric)
const {
1779 if (ProcessingGeneric)
1789 switch (
CE->getOpcode()) {
1793 case Instruction::AddrSpaceCast: {
1796 if (DstTy->getAddressSpace() == 0)
1802 case Instruction::GetElementPtr: {
1806 APInt OffsetAI(
DL.getPointerTypeSizeInBits(
CE->getType()), 0);
1809 const MCExpr *
Base = lowerConstantForGV(
CE->getOperand(0),
1814 int64_t
Offset = OffsetAI.getSExtValue();
1819 case Instruction::Trunc:
1825 case Instruction::BitCast:
1826 return lowerConstantForGV(
CE->getOperand(0), ProcessingGeneric);
1828 case Instruction::IntToPtr: {
1837 return lowerConstantForGV(
Op, ProcessingGeneric);
1842 case Instruction::PtrToInt: {
1848 Type *Ty =
CE->getType();
1850 const MCExpr *OpExpr = lowerConstantForGV(
Op, ProcessingGeneric);
1854 if (
DL.getTypeAllocSize(Ty) ==
DL.getTypeAllocSize(
Op->getType()))
1860 unsigned InBits =
DL.getTypeAllocSizeInBits(
Op->getType());
1867 case Instruction::Add: {
1868 const MCExpr *
LHS = lowerConstantForGV(
CE->getOperand(0), ProcessingGeneric);
1869 const MCExpr *
RHS = lowerConstantForGV(
CE->getOperand(1), ProcessingGeneric);
1870 switch (
CE->getOpcode()) {
1882 return lowerConstantForGV(
C, ProcessingGeneric);
1886 raw_string_ostream OS(S);
1887 OS <<
"Unsupported expression in static initializer: ";
1888 CE->printAsOperand(OS,
false,
1889 !
MF ?
nullptr :
MF->getFunction().getParent());
1893void NVPTXAsmPrinter::printMCExpr(
const MCExpr &Expr, raw_ostream &OS)
const {
1894 OutContext.getAsmInfo()->printExpr(OS, Expr);
1899bool NVPTXAsmPrinter::PrintAsmOperand(
const MachineInstr *
MI,
unsigned OpNo,
1900 const char *ExtraCode, raw_ostream &O) {
1901 if (ExtraCode && ExtraCode[0]) {
1902 if (ExtraCode[1] != 0)
1905 switch (ExtraCode[0]) {
1914 printOperand(
MI, OpNo, O);
1919bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
const MachineInstr *
MI,
1921 const char *ExtraCode,
1923 if (ExtraCode && ExtraCode[0])
1927 printMemOperand(
MI, OpNo, O);
1933void NVPTXAsmPrinter::printOperand(
const MachineInstr *
MI,
unsigned OpNum,
1935 const MachineOperand &MO =
MI->getOperand(OpNum);
1939 if (MO.
getReg() == NVPTX::VRDepot)
1944 emitVirtualRegister(MO.
getReg(), O);
1969void NVPTXAsmPrinter::printMemOperand(
const MachineInstr *
MI,
unsigned OpNum,
1970 raw_ostream &O,
const char *Modifier) {
1971 printOperand(
MI, OpNum, O);
1973 if (Modifier && strcmp(Modifier,
"add") == 0) {
1975 printOperand(
MI, OpNum + 1, O);
1977 if (
MI->getOperand(OpNum + 1).isImm() &&
1978 MI->getOperand(OpNum + 1).getImm() == 0)
1981 printOperand(
MI, OpNum + 1, O);
1992LLVMInitializeNVPTXAsmPrinter() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static StringRef getTextureName(const Value &V)
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static StringRef getSurfaceName(const Value &V)
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static StringRef getSamplerName(const Value &V)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the SmallString class.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static const fltSemantics & IEEEsingle()
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
APInt bitcastToAPInt() const
uint64_t getZExtValue() const
Get zero extended value.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getBitWidth() const
Return the number of bits in the APInt.
MCSymbol * getSymbol(const GlobalValue *GV) const
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
TargetMachine & TM
Target machine description.
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
MachineFunction * MF
The current machine function.
bool hasDebugInfo() const
Returns true if valid debug info is present.
virtual void emitFunctionBodyStart()
Targets can override this to emit stuff before the first basic block in the function.
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MCSymbol * CurrentFnSym
The symbol for the current function.
MCContext & OutContext
This is the context for the output file that we are streaming.
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
virtual void emitFunctionBodyEnd()
Targets can override this to emit stuff after the last basic block in the function.
const DataLayout & getDataLayout() const
Return information about data layout.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
iterator find(const_arg_type_t< KeyT > Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Implements a dense probed hash-table based set.
Collects and handles dwarf debug information.
LLVM_ABI const GlobalObject * getAliaseeObject() const
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
bool hasLinkOnceLinkage() const
bool hasExternalLinkage() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
unsigned getAddressSpace() const
PointerType * getType() const
Global values are always pointers.
bool hasWeakLinkage() const
bool hasCommonLinkage() const
bool hasAvailableExternallyLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Instances of this class represent a single low-level machine instruction.
void addOperand(const MCOperand Op)
void setOpcode(unsigned Op)
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
A Module instance is used to store all the information related to an LLVM module.
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
DwarfDebug * createDwarfDebug() override
Create NVPTX-specific DwarfDebug handler.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
NVPTX-specific DwarfDebug implementation.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
void clearDebugRegisterMap() const
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void outputDwarfFileDirectives()
Outputs the list of the DWARF '.file' directives to the streamer.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ HalfTyID
16-bit floating point type
@ VoidTyID
type with no size
@ FloatTyID
32-bit floating point type
@ IntegerTyID
Arbitrary bit width integers.
@ FixedVectorTyID
Fixed width SIMD vector type.
@ BFloatTyID
16-bit floating point type (7-bit significand)
@ DoubleTyID
64-bit floating point type
@ FP128TyID
128-bit floating point type (112-bit significand)
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeID getTypeID() const
Return the type id for the type.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ C
The default llvm calling convention, compatible with C.
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
constexpr StringLiteral BlocksAreClusters("nvvm.blocksareclusters")
@ CE
Windows NT (Windows on ARM)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
uint64_t read64le(const void *P)
uint32_t read32le(const void *P)
This is an optimization pass for GlobalISel generic memory operations.
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
FunctionAddr VTableAddr Value
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MaybeAlign getAlign(const CallInst &I, unsigned Index)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
std::optional< unsigned > getMaxNReg(const Function &F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::string utostr(uint64_t X, bool isNeg=false)
std::optional< unsigned > getMinCTASm(const Function &F)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
std::optional< unsigned > getMaxClusterRank(const Function &F)
Align getFunctionByValParamAlign(const Function *F, Type *ArgTy, Align InitialAlign, const DataLayout &DL)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx, const DataLayout &DL)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
PTXOpaqueType getPTXOpaqueType(const GlobalVariable &GV)
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, const DataLayout &DL)
Since function arguments are passed via .param space, we may want to increase their alignment in a wa...
Target & getTheNVPTXTarget32()
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...