97#define DEPOTNAME "__local_depot"
110 for (
const auto &O : U->operands())
123 if (Visited.
count(GV))
127 if (!Visiting.
insert(GV).second)
132 for (
const auto &O : GV->
operands())
145 NVPTX_MC::verifyInstructionPredicates(
MI->getOpcode(),
149 lowerToMCInst(
MI, Inst);
156 if (
MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
157 const MachineOperand &MO =
MI->getOperand(0);
163 for (
const auto MO :
MI->operands())
208unsigned NVPTXAsmPrinter::encodeVirtualRegister(
unsigned Reg) {
210 const TargetRegisterClass *RC = MRI->getRegClass(
Reg);
212 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
213 unsigned RegNum = RegMap[
Reg];
218 if (RC == &NVPTX::B1RegClass) {
220 }
else if (RC == &NVPTX::B16RegClass) {
222 }
else if (RC == &NVPTX::B32RegClass) {
224 }
else if (RC == &NVPTX::B64RegClass) {
226 }
else if (RC == &NVPTX::B128RegClass) {
233 Ret |= (RegNum & 0x0FFFFFFF);
238 return Reg & 0x0FFFFFFF;
250 const NVPTXSubtarget &STI =
TM.getSubtarget<NVPTXSubtarget>(*F);
253 Type *Ty = F->getReturnType();
258 auto PrintScalarRetVal = [&](
unsigned Size) {
262 const unsigned TotalSize =
DL.getTypeAllocSize(Ty);
263 const Align RetAlignment = TLI->getFunctionArgumentAlignment(
264 F, Ty, AttributeList::ReturnIndex,
DL);
265 O <<
".param .align " << RetAlignment.
value() <<
" .b8 func_retval0["
270 PrintScalarRetVal(ITy->getBitWidth());
272 PrintScalarRetVal(TLI->getPointerTy(
DL).getSizeInBits());
281 printReturnValStr(&F, O);
286bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
301 if (
const BasicBlock *PBB = PMBB->getBasicBlock()) {
303 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
306 if (MDNode *UnrollCountMD =
320 if (isLoopHeaderOfNoUnroll(
MBB))
321 OutStreamer->emitRawText(StringRef(
"\t.pragma \"nounroll\";\n"));
325 SmallString<128> Str;
326 raw_svector_ostream
O(Str);
328 if (!GlobalsEmitted) {
329 emitGlobals(*
MF->getFunction().getParent());
330 GlobalsEmitted =
true;
334 MRI = &
MF->getRegInfo();
335 F = &
MF->getFunction();
336 emitLinkageDirective(F, O);
341 printReturnValStr(*
MF, O);
346 emitFunctionParamList(F, O);
350 emitKernelFunctionDirectives(*F, O);
360 setAndEmitFunctionVirtualRegisters(*
MF);
361 encodeDebugInfoRegisterNumbers(*
MF);
363 if (
const DISubprogram *SP =
MF->getFunction().getSubprogram()) {
365 if (!
SP->getUnit()->isDebugDirectivesOnly())
398void NVPTXAsmPrinter::emitImplicitDef(
const MachineInstr *
MI)
const {
411void NVPTXAsmPrinter::emitKernelFunctionDirectives(
const Function &
F,
417 if (!ReqNTID.empty())
418 O <<
formatv(
".reqntid {0:$[, ]}\n",
422 if (!MaxNTID.empty())
423 O <<
formatv(
".maxntid {0:$[, ]}\n",
427 O <<
".minnctapersm " << *Mincta <<
"\n";
430 O <<
".maxnreg " << *Maxnreg <<
"\n";
434 const NVPTXTargetMachine &NTM =
static_cast<const NVPTXTargetMachine &
>(
TM);
435 const NVPTXSubtarget *STI = &NTM.
getSubtarget<NVPTXSubtarget>(F);
441 if (!ClusterDim.empty()) {
443 if (!BlocksAreClusters)
444 O <<
".explicitcluster\n";
446 if (ClusterDim[0] != 0) {
448 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
449 "should be non-zero as well");
451 O <<
formatv(
".reqnctapercluster {0:$[, ]}\n",
452 make_range(ClusterDim.begin(), ClusterDim.end()));
455 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
456 "should be 0 as well");
460 if (BlocksAreClusters) {
461 LLVMContext &Ctx = F.getContext();
462 if (ReqNTID.empty() || ClusterDim.empty())
463 Ctx.
diagnose(DiagnosticInfoUnsupported(
464 F,
"blocksareclusters requires reqntid and cluster_dim attributes",
467 Ctx.
diagnose(DiagnosticInfoUnsupported(
468 F,
"blocksareclusters requires PTX version >= 9.0",
471 O <<
".blocksareclusters\n";
475 O <<
".maxclusterrank " << *Maxclusterrank <<
"\n";
486 assert(
I != VRegMapping.end() &&
"Bad register class");
490 assert(VI != RegMap.
end() &&
"Bad virtual register");
491 unsigned MappedVR = VI->second;
498void NVPTXAsmPrinter::emitVirtualRegister(
unsigned int vr,
503void NVPTXAsmPrinter::emitAliasDeclaration(
const GlobalAlias *GA,
508 "NVPTX aliasee must be a non-kernel function definition");
518 emitDeclarationWithName(F,
getSymbol(F), O);
523 emitLinkageDirective(F, O);
528 printReturnValStr(F, O);
531 emitFunctionParamList(F, O);
543 return GV->getName() !=
"llvm.used";
545 for (
const User *U :
C->users())
555 if (OtherGV->getName() ==
"llvm.used")
559 if (
const Function *CurFunc =
I->getFunction()) {
560 if (OneFunc && (CurFunc != OneFunc))
601 for (
const User *U :
C->users()) {
606 if (
const Function *Caller =
I->getFunction())
615 SmallPtrSet<const Function *, 32> SeenSet;
616 for (
const Function &F : M) {
617 if (F.getAttributes().hasFnAttr(
"nvptx-libcall-callee")) {
618 emitDeclaration(&F, O);
622 if (F.isDeclaration()) {
625 if (F.getIntrinsicID())
627 emitDeclaration(&F, O);
630 for (
const User *U : F.users()) {
636 emitDeclaration(&F, O);
642 emitDeclaration(&F, O);
657 emitDeclaration(&F, O);
663 for (
const GlobalAlias &GA :
M.aliases())
664 emitAliasDeclaration(&GA, O);
667void NVPTXAsmPrinter::emitStartOfAsmFile(
Module &M) {
671 const NVPTXTargetMachine &NTM =
static_cast<const NVPTXTargetMachine &
>(
TM);
673 SmallString<128> Str1;
674 raw_svector_ostream OS1(Str1);
677 emitHeader(M, OS1, *STI);
690 GlobalsEmitted =
false;
695void NVPTXAsmPrinter::emitGlobals(
const Module &M) {
699 emitDeclarations(M, OS2);
714 assert(GVVisited.
size() == M.global_size() &&
"Missed a global variable");
715 assert(GVVisiting.
size() == 0 &&
"Did not fully process a global variable");
722 printModuleLevelGV(GV, OS2,
false, STI);
746 "// Generated by LLVM NVPTX Back-End\n"
749 <<
".version " << (PTXVersion / 10) <<
"." << (PTXVersion % 10) <<
"\n"
752 const NVPTXTargetMachine &NTM =
static_cast<const NVPTXTargetMachine &
>(
TM);
754 O <<
", texmode_independent";
756 bool HasFullDebugInfo =
false;
757 for (DICompileUnit *CU :
M.debug_compile_units()) {
758 switch(CU->getEmissionKind()) {
764 HasFullDebugInfo =
true;
767 if (HasFullDebugInfo)
770 if (HasFullDebugInfo)
774 <<
".address_size " << (NTM.
is64Bit() ?
"64" :
"32") <<
"\n"
781 if (!GlobalsEmitted) {
783 GlobalsEmitted =
true;
795 TS->closeLastSection();
797 OutStreamer->emitRawText(
"\t.section\t.debug_macinfo\t{\t}");
819void NVPTXAsmPrinter::emitLinkageDirective(
const GlobalValue *V,
822 if (V->hasExternalLinkage()) {
824 O << (GVar->hasInitializer() ?
".visible " :
".extern ");
825 else if (V->isDeclaration())
829 }
else if (V->hasAppendingLinkage()) {
831 "' has unsupported appending linkage type");
832 }
else if (!
V->hasInternalLinkage() && !
V->hasPrivateLinkage()) {
838void NVPTXAsmPrinter::printModuleLevelGV(
const GlobalVariable *GVar,
884 emitPTXGlobalVariable(GVar, O, STI);
892 const Constant *Initializer =
nullptr;
895 const ConstantInt *CI =
nullptr;
906 O <<
"addr_mode_" << i <<
" = ";
912 O <<
"clamp_to_border";
915 O <<
"clamp_to_edge";
926 O <<
"filter_mode = ";
941 O <<
", force_unnormalized_coords = 1";
961 const Function *DemotedFunc =
nullptr;
963 O <<
"// " << GVar->
getName() <<
" has been demoted\n";
964 localDecls[DemotedFunc].push_back(GVar);
974 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
975 O <<
" .attribute(.managed)";
979 << GVar->
getAlign().value_or(
DL.getPrefTypeAlign(ETy)).value();
988 O << getPTXFundamentalTypeStr(ETy,
false);
1001 printScalarConstant(Initializer, O);
1010 "' is not allowed in addrspace(" +
1026 const uint64_t ElementSize =
DL.getTypeStoreSize(ETy);
1034 AggBuffer aggBuffer(ElementSize, *
this);
1035 bufferAggregateConstant(Initializer, &aggBuffer);
1036 if (aggBuffer.numSymbols()) {
1037 const unsigned int ptrSize =
MAI->getCodePointerSize();
1038 if (ElementSize % ptrSize ||
1039 !aggBuffer.allSymbolsAligned(ptrSize)) {
1043 "initialized packed aggregate with pointers '" +
1045 "' requires at least PTX ISA version 7.1");
1048 O <<
"[" << ElementSize <<
"] = {";
1049 aggBuffer.printBytes(O);
1052 O <<
" .u" << ptrSize * 8 <<
" ";
1054 O <<
"[" << ElementSize / ptrSize <<
"] = {";
1055 aggBuffer.printWords(O);
1061 O <<
"[" << ElementSize <<
"] = {";
1062 aggBuffer.printBytes(O);
1069 O <<
"[" << ElementSize <<
"]";
1075 O <<
"[" << ElementSize <<
"]";
1086void NVPTXAsmPrinter::AggBuffer::printSymbol(
unsigned nSym,
raw_ostream &os) {
1087 const Value *
v = Symbols[nSym];
1088 const Value *v0 = SymbolsBeforeStripping[nSym];
1093 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1096 Name->print(os, AP.MAI);
1099 Name->print(os, AP.MAI);
1102 const MCExpr *Expr = AP.lowerConstantForGV(CExpr,
false);
1103 AP.printMCExpr(*Expr, os);
1108void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1109 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1114 unsigned int InitializerCount =
size;
1117 if (numSymbols() == 0)
1118 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1121 symbolPosInBuffer.push_back(InitializerCount);
1122 unsigned int nSym = 0;
1123 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1124 for (
unsigned int pos = 0; pos < InitializerCount;) {
1127 if (pos != nextSymbolPos) {
1128 os << (
unsigned int)buffer[pos];
1135 std::string symText;
1136 llvm::raw_string_ostream oss(symText);
1137 printSymbol(nSym, oss);
1138 for (
unsigned i = 0; i < ptrSize; ++i) {
1142 os <<
"(" << symText <<
")";
1145 nextSymbolPos = symbolPosInBuffer[++nSym];
1146 assert(nextSymbolPos >= pos);
1150void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1151 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1152 symbolPosInBuffer.push_back(
size);
1153 unsigned int nSym = 0;
1154 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1155 assert(nextSymbolPos % ptrSize == 0);
1156 for (
unsigned int pos = 0; pos <
size; pos += ptrSize) {
1159 if (pos == nextSymbolPos) {
1160 printSymbol(nSym, os);
1161 nextSymbolPos = symbolPosInBuffer[++nSym];
1162 assert(nextSymbolPos % ptrSize == 0);
1163 assert(nextSymbolPos >= pos + ptrSize);
1164 }
else if (ptrSize == 4)
1171void NVPTXAsmPrinter::emitDemotedVars(
const Function *F, raw_ostream &O) {
1172 auto It = localDecls.find(F);
1173 if (It == localDecls.end())
1178 const NVPTXTargetMachine &NTM =
static_cast<const NVPTXTargetMachine &
>(
TM);
1181 for (
const GlobalVariable *GV : GVars) {
1182 O <<
"\t// demoted variable\n\t";
1183 printModuleLevelGV(GV, O,
true, STI);
1187void NVPTXAsmPrinter::emitPTXAddressSpace(
unsigned int AddressSpace,
1188 raw_ostream &O)
const {
1210NVPTXAsmPrinter::getPTXFundamentalTypeStr(
Type *Ty,
bool useB4PTR)
const {
1216 if (NumBits <= 64) {
1217 std::string
name =
"u";
1234 assert((PtrSize == 64 || PtrSize == 32) &&
"Unexpected pointer size");
1252void NVPTXAsmPrinter::emitPTXGlobalVariable(
const GlobalVariable *GVar,
1254 const NVPTXSubtarget &STI) {
1265 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1267 O <<
" .attribute(.managed)";
1270 << GVar->
getAlign().value_or(
DL.getPrefTypeAlign(ETy)).value();
1281 O <<
" ." << getPTXFundamentalTypeStr(ETy) <<
" ";
1286 int64_t ElementSize = 0;
1296 ElementSize =
DL.getTypeStoreSize(ETy);
1310void NVPTXAsmPrinter::emitFunctionParamList(
const Function *F, raw_ostream &O) {
1312 const NVPTXSubtarget &STI =
TM.getSubtarget<NVPTXSubtarget>(*F);
1314 const NVPTXMachineFunctionInfo *MFI =
1315 MF ?
MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1317 bool IsFirst =
true;
1320 if (F->arg_empty() && !F->isVarArg()) {
1327 for (
const Argument &Arg : F->args()) {
1328 Type *Ty = Arg.getType();
1329 const std::string ParamSym = TLI->getParamName(F, Arg.getArgNo());
1340 const bool IsSurface = !IsSampler && !IsTexture &&
1342 if (IsSampler || IsTexture || IsSurface) {
1349 O <<
".samplerref ";
1359 auto GetOptimalAlignForParam = [TLI, &
DL, F, &Arg](
Type *Ty) -> Align {
1360 if (MaybeAlign StackAlign =
1361 getAlign(*F, Arg.getArgNo() + AttributeList::FirstArgIndex))
1362 return StackAlign.value();
1364 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty,
DL);
1365 MaybeAlign ParamAlign =
1366 Arg.hasByValAttr() ? Arg.getParamAlign() : MaybeAlign();
1367 return std::max(TypeAlign, ParamAlign.
valueOrOne());
1370 if (Arg.hasByValAttr()) {
1372 Type *ETy = Arg.getParamByValType();
1373 assert(ETy &&
"Param should have byval type");
1379 const Align OptimalAlign =
1380 IsKernelFunc ? GetOptimalAlignForParam(ETy)
1381 : TLI->getFunctionByValParamAlign(
1382 F, ETy, Arg.getParamAlign().valueOrOne(),
DL);
1384 O <<
"\t.param .align " << OptimalAlign.
value() <<
" .b8 " << ParamSym
1385 <<
"[" <<
DL.getTypeAllocSize(ETy) <<
"]";
1394 Align OptimalAlign = GetOptimalAlignForParam(Ty);
1396 O <<
"\t.param .align " << OptimalAlign.
value() <<
" .b8 " << ParamSym
1397 <<
"[" <<
DL.getTypeAllocSize(Ty) <<
"]";
1403 unsigned PTySizeInBits = 0;
1406 TLI->getPointerTy(
DL, PTy->getAddressSpace()).getSizeInBits();
1407 assert(PTySizeInBits &&
"Invalid pointer size");
1412 O <<
"\t.param .u" << PTySizeInBits <<
" .ptr";
1414 switch (PTy->getAddressSpace()) {
1431 O <<
" .align " << Arg.getParamAlign().valueOrOne().value() <<
" "
1442 O << getPTXFundamentalTypeStr(Ty);
1443 O <<
" " << ParamSym;
1452 assert(PTySizeInBits &&
"Invalid pointer size");
1453 Size = PTySizeInBits;
1456 O <<
"\t.param .b" <<
Size <<
" " << ParamSym;
1459 if (F->isVarArg()) {
1463 << TLI->getParamName(F, -1) <<
"[]";
1469void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1470 const MachineFunction &
MF) {
1471 SmallString<128> Str;
1472 raw_svector_ostream
O(Str);
1476 const TargetRegisterInfo *
TRI =
MF.getSubtarget().getRegisterInfo();
1479 const MachineFrameInfo &MFI =
MF.getFrameInfo();
1484 if (
static_cast<const NVPTXTargetMachine &
>(
MF.getTarget()).is64Bit()) {
1485 O <<
"\t.reg .b64 \t%SP;\n"
1486 <<
"\t.reg .b64 \t%SPL;\n";
1488 O <<
"\t.reg .b32 \t%SP;\n"
1489 <<
"\t.reg .b32 \t%SPL;\n";
1497 for (
unsigned I :
llvm::seq(MRI->getNumVirtRegs())) {
1499 if (MRI->use_empty(VR) && MRI->def_empty(VR))
1501 auto &RCRegMap = VRegMapping[MRI->getRegClass(VR)];
1502 RCRegMap[VR] = RCRegMap.size() + 1;
1507 for (
const TargetRegisterClass *RC :
TRI->regclasses()) {
1508 const unsigned N = VRegMapping[RC].size();
1514 O <<
"\t.reg " << RCName <<
" \t" << RCStr <<
"<" << (
N + 1) <<
">;\n";
1523void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1524 const MachineFunction &
MF) {
1525 const NVPTXSubtarget &STI =
MF.getSubtarget<NVPTXSubtarget>();
1533 for (
auto &classMap : VRegMapping) {
1534 for (
auto ®isterMapping : classMap.getSecond()) {
1535 auto reg = registerMapping.getFirst();
1541void NVPTXAsmPrinter::printFPConstant(
const ConstantFP *Fp,
1542 raw_ostream &O)
const {
1545 unsigned int numHex;
1563void NVPTXAsmPrinter::printScalarConstant(
const Constant *CPV, raw_ostream &O) {
1569 printFPConstant(CFP, O);
1578 if (EmitGeneric && !
isa<Function>(CPV) && !IsNonGenericPointer) {
1595void NVPTXAsmPrinter::bufferLEByte(
const Constant *CPV,
int Bytes,
1596 AggBuffer *AggBuffer) {
1598 int AllocSize =
DL.getTypeAllocSize(CPV->
getType());
1602 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1607 auto AddIntToBuffer = [AggBuffer, Bytes](
const APInt &Val) {
1608 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1614 for (
unsigned I = 0;
I < NumBytes - 1; ++
I) {
1615 Buf[
I] = Val.extractBitsAsZExtValue(8,
I * 8);
1617 size_t LastBytePosition = (NumBytes - 1) * 8;
1618 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1620 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1621 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1631 if (
const auto *CI =
1636 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1637 Value *
V = Cexpr->getOperand(0)->stripPointerCasts();
1638 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1639 AggBuffer->addZeros(AllocSize);
1655 AggBuffer->addSymbol(GVar, GVar);
1657 const Value *
v = Cexpr->stripPointerCasts();
1658 AggBuffer->addSymbol(v, Cexpr);
1660 AggBuffer->addZeros(AllocSize);
1668 bufferAggregateConstant(CPV, AggBuffer);
1669 if (Bytes > AllocSize)
1670 AggBuffer->addZeros(Bytes - AllocSize);
1672 AggBuffer->addZeros(Bytes);
1683void NVPTXAsmPrinter::bufferAggregateConstant(
const Constant *CPV,
1684 AggBuffer *aggBuffer) {
1687 auto ExtendBuffer = [](APInt Val, AggBuffer *Buffer) {
1694 ExtendBuffer(CI->
getValue(), aggBuffer);
1700 if (CFP->getType()->isFP128Ty()) {
1701 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1714 for (
unsigned I :
llvm::seq(CDS->getNumElements()))
1715 bufferLEByte(
cast<Constant>(CDS->getElementAsConstant(
I)), 0, aggBuffer);
1724 ?
DL.getStructLayout(ST)->getElementOffset(0) +
1725 DL.getTypeAllocSize(ST)
1726 :
DL.getStructLayout(ST)->getElementOffset(
I + 1);
1727 int Bytes = EndOffset -
DL.getStructLayout(ST)->getElementOffset(
I);
1741NVPTXAsmPrinter::lowerConstantForGV(
const Constant *CV,
1742 bool ProcessingGeneric)
const {
1753 if (ProcessingGeneric)
1763 switch (
CE->getOpcode()) {
1767 case Instruction::AddrSpaceCast: {
1770 if (DstTy->getAddressSpace() == 0)
1776 case Instruction::GetElementPtr: {
1780 APInt OffsetAI(
DL.getPointerTypeSizeInBits(
CE->getType()), 0);
1783 const MCExpr *
Base = lowerConstantForGV(
CE->getOperand(0),
1788 int64_t
Offset = OffsetAI.getSExtValue();
1793 case Instruction::Trunc:
1799 case Instruction::BitCast:
1800 return lowerConstantForGV(
CE->getOperand(0), ProcessingGeneric);
1802 case Instruction::IntToPtr: {
1811 return lowerConstantForGV(
Op, ProcessingGeneric);
1816 case Instruction::PtrToInt: {
1822 Type *Ty =
CE->getType();
1824 const MCExpr *OpExpr = lowerConstantForGV(
Op, ProcessingGeneric);
1828 if (
DL.getTypeAllocSize(Ty) ==
DL.getTypeAllocSize(
Op->getType()))
1834 unsigned InBits =
DL.getTypeAllocSizeInBits(
Op->getType());
1841 case Instruction::Add: {
1842 const MCExpr *
LHS = lowerConstantForGV(
CE->getOperand(0), ProcessingGeneric);
1843 const MCExpr *
RHS = lowerConstantForGV(
CE->getOperand(1), ProcessingGeneric);
1844 switch (
CE->getOpcode()) {
1856 return lowerConstantForGV(
C, ProcessingGeneric);
1860 raw_string_ostream OS(S);
1861 OS <<
"Unsupported expression in static initializer: ";
1862 CE->printAsOperand(OS,
false,
1863 !
MF ?
nullptr :
MF->getFunction().getParent());
1867void NVPTXAsmPrinter::printMCExpr(
const MCExpr &Expr, raw_ostream &OS)
const {
1868 OutContext.getAsmInfo()->printExpr(OS, Expr);
1873bool NVPTXAsmPrinter::PrintAsmOperand(
const MachineInstr *
MI,
unsigned OpNo,
1874 const char *ExtraCode, raw_ostream &O) {
1875 if (ExtraCode && ExtraCode[0]) {
1876 if (ExtraCode[1] != 0)
1879 switch (ExtraCode[0]) {
1888 printOperand(
MI, OpNo, O);
1893bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
const MachineInstr *
MI,
1895 const char *ExtraCode,
1897 if (ExtraCode && ExtraCode[0])
1901 printMemOperand(
MI, OpNo, O);
1907void NVPTXAsmPrinter::printOperand(
const MachineInstr *
MI,
unsigned OpNum,
1909 const MachineOperand &MO =
MI->getOperand(OpNum);
1913 if (MO.
getReg() == NVPTX::VRDepot)
1918 emitVirtualRegister(MO.
getReg(), O);
1943void NVPTXAsmPrinter::printMemOperand(
const MachineInstr *
MI,
unsigned OpNum,
1944 raw_ostream &O,
const char *Modifier) {
1945 printOperand(
MI, OpNum, O);
1947 if (Modifier && strcmp(Modifier,
"add") == 0) {
1949 printOperand(
MI, OpNum + 1, O);
1951 if (
MI->getOperand(OpNum + 1).isImm() &&
1952 MI->getOperand(OpNum + 1).getImm() == 0)
1955 printOperand(
MI, OpNum + 1, O);
1966LLVMInitializeNVPTXAsmPrinter() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the SmallString class.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
APInt bitcastToAPInt() const
uint64_t getZExtValue() const
Get zero extended value.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getBitWidth() const
Return the number of bits in the APInt.
MCSymbol * getSymbol(const GlobalValue *GV) const
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
TargetMachine & TM
Target machine description.
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
MachineFunction * MF
The current machine function.
bool hasDebugInfo() const
Returns true if valid debug info is present.
virtual void emitFunctionBodyStart()
Targets can override this to emit stuff before the first basic block in the function.
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MCSymbol * CurrentFnSym
The symbol for the current function.
MCContext & OutContext
This is the context for the output file that we are streaming.
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
virtual void emitFunctionBodyEnd()
Targets can override this to emit stuff after the last basic block in the function.
const DataLayout & getDataLayout() const
Return information about data layout.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
iterator find(const_arg_type_t< KeyT > Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Implements a dense probed hash-table based set.
LLVM_ABI const GlobalObject * getAliaseeObject() const
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
bool hasLinkOnceLinkage() const
bool hasExternalLinkage() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
unsigned getAddressSpace() const
PointerType * getType() const
Global values are always pointers.
bool hasWeakLinkage() const
bool hasCommonLinkage() const
bool hasAvailableExternallyLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Instances of this class represent a single low-level machine instruction.
void addOperand(const MCOperand Op)
void setOpcode(unsigned Op)
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
A Module instance is used to store all the information related to an LLVM module.
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
void clearDebugRegisterMap() const
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void outputDwarfFileDirectives()
Outputs the list of the DWARF '.file' directives to the streamer.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ HalfTyID
16-bit floating point type
@ VoidTyID
type with no size
@ FloatTyID
32-bit floating point type
@ IntegerTyID
Arbitrary bit width integers.
@ FixedVectorTyID
Fixed width SIMD vector type.
@ BFloatTyID
16-bit floating point type (7-bit significand)
@ DoubleTyID
64-bit floating point type
@ FP128TyID
128-bit floating point type (112-bit significand)
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeID getTypeID() const
Return the type id for the type.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
uint64_t read64le(const void *P)
uint32_t read32le(const void *P)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MaybeAlign getAlign(const CallInst &I, unsigned Index)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
std::optional< unsigned > getMaxNReg(const Function &F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
StringRef getSamplerName(const Value &V)
bool isImageReadWrite(const Value &V)
bool isImageReadOnly(const Value &V)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::string utostr(uint64_t X, bool isNeg=false)
std::optional< unsigned > getMinCTASm(const Function &F)
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
bool isSampler(const Value &V)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isSurface(const Value &V)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
std::optional< unsigned > getMaxClusterRank(const Function &F)
StringRef getTextureName(const Value &V)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
StringRef getSurfaceName(const Value &V)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool isTexture(const Value &V)
bool isImageWriteOnly(const Value &V)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Target & getTheNVPTXTarget32()
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
uint64_t value() const
This is a hole in the type system and should not be abused.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...