LLVM API Documentation

NVPTXAsmPrinter.cpp
Go to the documentation of this file.
00001 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains a printer that converts from our internal representation
00011 // of machine-dependent LLVM code to NVPTX assembly language.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "NVPTXAsmPrinter.h"
00016 #include "InstPrinter/NVPTXInstPrinter.h"
00017 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
00018 #include "NVPTX.h"
00019 #include "NVPTXInstrInfo.h"
00020 #include "NVPTXMachineFunctionInfo.h"
00021 #include "NVPTXMCExpr.h"
00022 #include "NVPTXRegisterInfo.h"
00023 #include "NVPTXTargetMachine.h"
00024 #include "NVPTXUtilities.h"
00025 #include "cl_common_defines.h"
00026 #include "llvm/ADT/StringExtras.h"
00027 #include "llvm/Analysis/ConstantFolding.h"
00028 #include "llvm/CodeGen/Analysis.h"
00029 #include "llvm/CodeGen/MachineFrameInfo.h"
00030 #include "llvm/CodeGen/MachineModuleInfo.h"
00031 #include "llvm/CodeGen/MachineRegisterInfo.h"
00032 #include "llvm/IR/DebugInfo.h"
00033 #include "llvm/IR/DerivedTypes.h"
00034 #include "llvm/IR/Function.h"
00035 #include "llvm/IR/GlobalVariable.h"
00036 #include "llvm/IR/Mangler.h"
00037 #include "llvm/IR/Module.h"
00038 #include "llvm/IR/Operator.h"
00039 #include "llvm/MC/MCStreamer.h"
00040 #include "llvm/MC/MCSymbol.h"
00041 #include "llvm/Support/CommandLine.h"
00042 #include "llvm/Support/ErrorHandling.h"
00043 #include "llvm/Support/FormattedStream.h"
00044 #include "llvm/Support/Path.h"
00045 #include "llvm/Support/TargetRegistry.h"
00046 #include "llvm/Support/TimeValue.h"
00047 #include "llvm/Target/TargetLoweringObjectFile.h"
00048 #include <sstream>
00049 using namespace llvm;
00050 
00051 #define DEPOTNAME "__local_depot"
00052 
00053 static cl::opt<bool>
00054 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
00055                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
00056                 cl::init(true));
00057 
00058 static cl::opt<bool>
00059 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
00060               cl::desc("NVPTX Specific: Emit source line in ptx file"),
00061               cl::init(false));
00062 
00063 namespace {
00064 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
00065 /// depends.
00066 void DiscoverDependentGlobals(const Value *V,
00067                               DenseSet<const GlobalVariable *> &Globals) {
00068   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
00069     Globals.insert(GV);
00070   else {
00071     if (const User *U = dyn_cast<User>(V)) {
00072       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
00073         DiscoverDependentGlobals(U->getOperand(i), Globals);
00074       }
00075     }
00076   }
00077 }
00078 
00079 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
00080 /// instances to be emitted, but only after any dependents have been added
00081 /// first.
00082 void VisitGlobalVariableForEmission(
00083     const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
00084     DenseSet<const GlobalVariable *> &Visited,
00085     DenseSet<const GlobalVariable *> &Visiting) {
00086   // Have we already visited this one?
00087   if (Visited.count(GV))
00088     return;
00089 
00090   // Do we have a circular dependency?
00091   if (!Visiting.insert(GV).second)
00092     report_fatal_error("Circular dependency found in global variable set");
00093 
00094   // Make sure we visit all dependents first
00095   DenseSet<const GlobalVariable *> Others;
00096   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
00097     DiscoverDependentGlobals(GV->getOperand(i), Others);
00098 
00099   for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
00100                                                   E = Others.end();
00101        I != E; ++I)
00102     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
00103 
00104   // Now we can visit ourself
00105   Order.push_back(GV);
00106   Visited.insert(GV);
00107   Visiting.erase(GV);
00108 }
00109 }
00110 
00111 // @TODO: This is a copy from AsmPrinter.cpp.  The function is static, so we
00112 // cannot just link to the existing version.
00113 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
00114 ///
00115 using namespace nvptx;
00116 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
00117   MCContext &Ctx = AP.OutContext;
00118 
00119   if (CV->isNullValue() || isa<UndefValue>(CV))
00120     return MCConstantExpr::Create(0, Ctx);
00121 
00122   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
00123     return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
00124 
00125   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
00126     return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
00127 
00128   if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
00129     return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
00130 
00131   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
00132   if (!CE)
00133     llvm_unreachable("Unknown constant value to lower!");
00134 
00135   switch (CE->getOpcode()) {
00136   default:
00137     // If the code isn't optimized, there may be outstanding folding
00138     // opportunities. Attempt to fold the expression using DataLayout as a
00139     // last resort before giving up.
00140     if (Constant *C = ConstantFoldConstantExpression(
00141             CE, AP.TM.getSubtargetImpl()->getDataLayout()))
00142       if (C != CE)
00143         return LowerConstant(C, AP);
00144 
00145     // Otherwise report the problem to the user.
00146     {
00147       std::string S;
00148       raw_string_ostream OS(S);
00149       OS << "Unsupported expression in static initializer: ";
00150       CE->printAsOperand(OS, /*PrintType=*/ false,
00151                          !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
00152       report_fatal_error(OS.str());
00153     }
00154   case Instruction::AddrSpaceCast: {
00155     // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be
00156     // handled by the generic() logic in the MCExpr printer
00157     PointerType *DstTy            = cast<PointerType>(CE->getType());
00158     PointerType *SrcTy            = cast<PointerType>(CE->getOperand(0)->getType());
00159     if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) {
00160       return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP);
00161     }
00162     std::string S;
00163     raw_string_ostream OS(S);
00164     OS << "Unsupported expression in static initializer: ";
00165     CE->printAsOperand(OS, /*PrintType=*/ false,
00166                        !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
00167     report_fatal_error(OS.str());
00168   }
00169   case Instruction::GetElementPtr: {
00170     const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout();
00171     // Generate a symbolic expression for the byte address
00172     APInt OffsetAI(TD.getPointerSizeInBits(), 0);
00173     cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
00174 
00175     const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
00176     if (!OffsetAI)
00177       return Base;
00178 
00179     int64_t Offset = OffsetAI.getSExtValue();
00180     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
00181                                    Ctx);
00182   }
00183 
00184   case Instruction::Trunc:
00185     // We emit the value and depend on the assembler to truncate the generated
00186     // expression properly.  This is important for differences between
00187     // blockaddress labels.  Since the two labels are in the same function, it
00188     // is reasonable to treat their delta as a 32-bit value.
00189   // FALL THROUGH.
00190   case Instruction::BitCast:
00191     return LowerConstant(CE->getOperand(0), AP);
00192 
00193   case Instruction::IntToPtr: {
00194     const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout();
00195     // Handle casts to pointers by changing them into casts to the appropriate
00196     // integer type.  This promotes constant folding and simplifies this code.
00197     Constant *Op = CE->getOperand(0);
00198     Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
00199                                       false /*ZExt*/);
00200     return LowerConstant(Op, AP);
00201   }
00202 
00203   case Instruction::PtrToInt: {
00204     const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout();
00205     // Support only foldable casts to/from pointers that can be eliminated by
00206     // changing the pointer to the appropriately sized integer type.
00207     Constant *Op = CE->getOperand(0);
00208     Type *Ty = CE->getType();
00209 
00210     const MCExpr *OpExpr = LowerConstant(Op, AP);
00211 
00212     // We can emit the pointer value into this slot if the slot is an
00213     // integer slot equal to the size of the pointer.
00214     if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
00215       return OpExpr;
00216 
00217     // Otherwise the pointer is smaller than the resultant integer, mask off
00218     // the high bits so we are sure to get a proper truncation if the input is
00219     // a constant expr.
00220     unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
00221     const MCExpr *MaskExpr =
00222         MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
00223     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
00224   }
00225 
00226     // The MC library also has a right-shift operator, but it isn't consistently
00227   // signed or unsigned between different targets.
00228   case Instruction::Add:
00229   case Instruction::Sub:
00230   case Instruction::Mul:
00231   case Instruction::SDiv:
00232   case Instruction::SRem:
00233   case Instruction::Shl:
00234   case Instruction::And:
00235   case Instruction::Or:
00236   case Instruction::Xor: {
00237     const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
00238     const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
00239     switch (CE->getOpcode()) {
00240     default:
00241       llvm_unreachable("Unknown binary operator constant cast expr");
00242     case Instruction::Add:
00243       return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
00244     case Instruction::Sub:
00245       return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
00246     case Instruction::Mul:
00247       return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
00248     case Instruction::SDiv:
00249       return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
00250     case Instruction::SRem:
00251       return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
00252     case Instruction::Shl:
00253       return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
00254     case Instruction::And:
00255       return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
00256     case Instruction::Or:
00257       return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
00258     case Instruction::Xor:
00259       return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
00260     }
00261   }
00262   }
00263 }
00264 
00265 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
00266   if (!EmitLineNumbers)
00267     return;
00268   if (ignoreLoc(MI))
00269     return;
00270 
00271   DebugLoc curLoc = MI.getDebugLoc();
00272 
00273   if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
00274     return;
00275 
00276   if (prevDebugLoc == curLoc)
00277     return;
00278 
00279   prevDebugLoc = curLoc;
00280 
00281   if (curLoc.isUnknown())
00282     return;
00283 
00284   const MachineFunction *MF = MI.getParent()->getParent();
00285   //const TargetMachine &TM = MF->getTarget();
00286 
00287   const LLVMContext &ctx = MF->getFunction()->getContext();
00288   DIScope Scope(curLoc.getScope(ctx));
00289 
00290   assert((!Scope || Scope.isScope()) &&
00291     "Scope of a DebugLoc should be null or a DIScope.");
00292   if (!Scope)
00293      return;
00294 
00295   StringRef fileName(Scope.getFilename());
00296   StringRef dirName(Scope.getDirectory());
00297   SmallString<128> FullPathName = dirName;
00298   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
00299     sys::path::append(FullPathName, fileName);
00300     fileName = FullPathName.str();
00301   }
00302 
00303   if (filenameMap.find(fileName.str()) == filenameMap.end())
00304     return;
00305 
00306   // Emit the line from the source file.
00307   if (InterleaveSrc)
00308     this->emitSrcInText(fileName.str(), curLoc.getLine());
00309 
00310   std::stringstream temp;
00311   temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
00312        << " " << curLoc.getCol();
00313   OutStreamer.EmitRawText(Twine(temp.str().c_str()));
00314 }
00315 
00316 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
00317   SmallString<128> Str;
00318   raw_svector_ostream OS(Str);
00319   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
00320     emitLineNumberAsDotLoc(*MI);
00321 
00322   MCInst Inst;
00323   lowerToMCInst(MI, Inst);
00324   EmitToStreamer(OutStreamer, Inst);
00325 }
00326 
00327 // Handle symbol backtracking for targets that do not support image handles
00328 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
00329                                            unsigned OpNo, MCOperand &MCOp) {
00330   const MachineOperand &MO = MI->getOperand(OpNo);
00331   const MCInstrDesc &MCID = MI->getDesc();
00332 
00333   if (MCID.TSFlags & NVPTXII::IsTexFlag) {
00334     // This is a texture fetch, so operand 4 is a texref and operand 5 is
00335     // a samplerref
00336     if (OpNo == 4 && MO.isImm()) {
00337       lowerImageHandleSymbol(MO.getImm(), MCOp);
00338       return true;
00339     }
00340     if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
00341       lowerImageHandleSymbol(MO.getImm(), MCOp);
00342       return true;
00343     }
00344 
00345     return false;
00346   } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
00347     unsigned VecSize =
00348       1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
00349 
00350     // For a surface load of vector size N, the Nth operand will be the surfref
00351     if (OpNo == VecSize && MO.isImm()) {
00352       lowerImageHandleSymbol(MO.getImm(), MCOp);
00353       return true;
00354     }
00355 
00356     return false;
00357   } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
00358     // This is a surface store, so operand 0 is a surfref
00359     if (OpNo == 0 && MO.isImm()) {
00360       lowerImageHandleSymbol(MO.getImm(), MCOp);
00361       return true;
00362     }
00363 
00364     return false;
00365   } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
00366     // This is a query, so operand 1 is a surfref/texref
00367     if (OpNo == 1 && MO.isImm()) {
00368       lowerImageHandleSymbol(MO.getImm(), MCOp);
00369       return true;
00370     }
00371 
00372     return false;
00373   }
00374 
00375   return false;
00376 }
00377 
00378 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
00379   // Ewwww
00380   TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
00381   NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
00382   const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
00383   const char *Sym = MFI->getImageHandleSymbol(Index);
00384   std::string *SymNamePtr =
00385     nvTM.getManagedStrPool()->getManagedString(Sym);
00386   MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
00387     StringRef(SymNamePtr->c_str())));
00388 }
00389 
00390 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
00391   OutMI.setOpcode(MI->getOpcode());
00392   const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
00393 
00394   // Special: Do not mangle symbol operand of CALL_PROTOTYPE
00395   if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
00396     const MachineOperand &MO = MI->getOperand(0);
00397     OutMI.addOperand(GetSymbolRef(
00398       OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
00399     return;
00400   }
00401 
00402   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00403     const MachineOperand &MO = MI->getOperand(i);
00404 
00405     MCOperand MCOp;
00406     if (!ST.hasImageHandles()) {
00407       if (lowerImageHandleOperand(MI, i, MCOp)) {
00408         OutMI.addOperand(MCOp);
00409         continue;
00410       }
00411     }
00412 
00413     if (lowerOperand(MO, MCOp))
00414       OutMI.addOperand(MCOp);
00415   }
00416 }
00417 
00418 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
00419                                    MCOperand &MCOp) {
00420   switch (MO.getType()) {
00421   default: llvm_unreachable("unknown operand type");
00422   case MachineOperand::MO_Register:
00423     MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg()));
00424     break;
00425   case MachineOperand::MO_Immediate:
00426     MCOp = MCOperand::CreateImm(MO.getImm());
00427     break;
00428   case MachineOperand::MO_MachineBasicBlock:
00429     MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
00430         MO.getMBB()->getSymbol(), OutContext));
00431     break;
00432   case MachineOperand::MO_ExternalSymbol:
00433     MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
00434     break;
00435   case MachineOperand::MO_GlobalAddress:
00436     MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
00437     break;
00438   case MachineOperand::MO_FPImmediate: {
00439     const ConstantFP *Cnt = MO.getFPImm();
00440     APFloat Val = Cnt->getValueAPF();
00441 
00442     switch (Cnt->getType()->getTypeID()) {
00443     default: report_fatal_error("Unsupported FP type"); break;
00444     case Type::FloatTyID:
00445       MCOp = MCOperand::CreateExpr(
00446         NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
00447       break;
00448     case Type::DoubleTyID:
00449       MCOp = MCOperand::CreateExpr(
00450         NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
00451       break;
00452     }
00453     break;
00454   }
00455   }
00456   return true;
00457 }
00458 
00459 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
00460   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
00461     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
00462 
00463     DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
00464     unsigned RegNum = RegMap[Reg];
00465 
00466     // Encode the register class in the upper 4 bits
00467     // Must be kept in sync with NVPTXInstPrinter::printRegName
00468     unsigned Ret = 0;
00469     if (RC == &NVPTX::Int1RegsRegClass) {
00470       Ret = (1 << 28);
00471     } else if (RC == &NVPTX::Int16RegsRegClass) {
00472       Ret = (2 << 28);
00473     } else if (RC == &NVPTX::Int32RegsRegClass) {
00474       Ret = (3 << 28);
00475     } else if (RC == &NVPTX::Int64RegsRegClass) {
00476       Ret = (4 << 28);
00477     } else if (RC == &NVPTX::Float32RegsRegClass) {
00478       Ret = (5 << 28);
00479     } else if (RC == &NVPTX::Float64RegsRegClass) {
00480       Ret = (6 << 28);
00481     } else {
00482       report_fatal_error("Bad register class");
00483     }
00484 
00485     // Insert the vreg number
00486     Ret |= (RegNum & 0x0FFFFFFF);
00487     return Ret;
00488   } else {
00489     // Some special-use registers are actually physical registers.
00490     // Encode this as the register class ID of 0 and the real register ID.
00491     return Reg & 0x0FFFFFFF;
00492   }
00493 }
00494 
00495 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
00496   const MCExpr *Expr;
00497   Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
00498                                  OutContext);
00499   return MCOperand::CreateExpr(Expr);
00500 }
00501 
00502 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
00503   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
00504   const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
00505 
00506   Type *Ty = F->getReturnType();
00507 
00508   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
00509 
00510   if (Ty->getTypeID() == Type::VoidTyID)
00511     return;
00512 
00513   O << " (";
00514 
00515   if (isABI) {
00516     if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
00517       unsigned size = 0;
00518       if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
00519         size = ITy->getBitWidth();
00520         if (size < 32)
00521           size = 32;
00522       } else {
00523         assert(Ty->isFloatingPointTy() && "Floating point type expected here");
00524         size = Ty->getPrimitiveSizeInBits();
00525       }
00526 
00527       O << ".param .b" << size << " func_retval0";
00528     } else if (isa<PointerType>(Ty)) {
00529       O << ".param .b" << TLI->getPointerTy().getSizeInBits()
00530         << " func_retval0";
00531     } else {
00532       if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
00533         unsigned totalsz = TD->getTypeAllocSize(Ty);
00534         unsigned retAlignment = 0;
00535         if (!llvm::getAlign(*F, 0, retAlignment))
00536           retAlignment = TD->getABITypeAlignment(Ty);
00537         O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
00538           << "]";
00539       } else
00540         assert(false && "Unknown return type");
00541     }
00542   } else {
00543     SmallVector<EVT, 16> vtparts;
00544     ComputeValueVTs(*TLI, Ty, vtparts);
00545     unsigned idx = 0;
00546     for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
00547       unsigned elems = 1;
00548       EVT elemtype = vtparts[i];
00549       if (vtparts[i].isVector()) {
00550         elems = vtparts[i].getVectorNumElements();
00551         elemtype = vtparts[i].getVectorElementType();
00552       }
00553 
00554       for (unsigned j = 0, je = elems; j != je; ++j) {
00555         unsigned sz = elemtype.getSizeInBits();
00556         if (elemtype.isInteger() && (sz < 32))
00557           sz = 32;
00558         O << ".reg .b" << sz << " func_retval" << idx;
00559         if (j < je - 1)
00560           O << ", ";
00561         ++idx;
00562       }
00563       if (i < e - 1)
00564         O << ", ";
00565     }
00566   }
00567   O << ") ";
00568   return;
00569 }
00570 
00571 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
00572                                         raw_ostream &O) {
00573   const Function *F = MF.getFunction();
00574   printReturnValStr(F, O);
00575 }
00576 
00577 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
00578   SmallString<128> Str;
00579   raw_svector_ostream O(Str);
00580 
00581   if (!GlobalsEmitted) {
00582     emitGlobals(*MF->getFunction()->getParent());
00583     GlobalsEmitted = true;
00584   }
00585   
00586   // Set up
00587   MRI = &MF->getRegInfo();
00588   F = MF->getFunction();
00589   emitLinkageDirective(F, O);
00590   if (llvm::isKernelFunction(*F))
00591     O << ".entry ";
00592   else {
00593     O << ".func ";
00594     printReturnValStr(*MF, O);
00595   }
00596 
00597   O << *CurrentFnSym;
00598 
00599   emitFunctionParamList(*MF, O);
00600 
00601   if (llvm::isKernelFunction(*F))
00602     emitKernelFunctionDirectives(*F, O);
00603 
00604   OutStreamer.EmitRawText(O.str());
00605 
00606   prevDebugLoc = DebugLoc();
00607 }
00608 
00609 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
00610   VRegMapping.clear();
00611   OutStreamer.EmitRawText(StringRef("{\n"));
00612   setAndEmitFunctionVirtualRegisters(*MF);
00613 
00614   SmallString<128> Str;
00615   raw_svector_ostream O(Str);
00616   emitDemotedVars(MF->getFunction(), O);
00617   OutStreamer.EmitRawText(O.str());
00618 }
00619 
00620 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
00621   OutStreamer.EmitRawText(StringRef("}\n"));
00622   VRegMapping.clear();
00623 }
00624 
00625 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
00626   unsigned RegNo = MI->getOperand(0).getReg();
00627   const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
00628   if (TRI->isVirtualRegister(RegNo)) {
00629     OutStreamer.AddComment(Twine("implicit-def: ") +
00630                            getVirtualRegisterName(RegNo));
00631   } else {
00632     OutStreamer.AddComment(
00633         Twine("implicit-def: ") +
00634         TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
00635   }
00636   OutStreamer.AddBlankLine();
00637 }
00638 
00639 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
00640                                                    raw_ostream &O) const {
00641   // If the NVVM IR has some of reqntid* specified, then output
00642   // the reqntid directive, and set the unspecified ones to 1.
00643   // If none of reqntid* is specified, don't output reqntid directive.
00644   unsigned reqntidx, reqntidy, reqntidz;
00645   bool specified = false;
00646   if (llvm::getReqNTIDx(F, reqntidx) == false)
00647     reqntidx = 1;
00648   else
00649     specified = true;
00650   if (llvm::getReqNTIDy(F, reqntidy) == false)
00651     reqntidy = 1;
00652   else
00653     specified = true;
00654   if (llvm::getReqNTIDz(F, reqntidz) == false)
00655     reqntidz = 1;
00656   else
00657     specified = true;
00658 
00659   if (specified)
00660     O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
00661       << "\n";
00662 
00663   // If the NVVM IR has some of maxntid* specified, then output
00664   // the maxntid directive, and set the unspecified ones to 1.
00665   // If none of maxntid* is specified, don't output maxntid directive.
00666   unsigned maxntidx, maxntidy, maxntidz;
00667   specified = false;
00668   if (llvm::getMaxNTIDx(F, maxntidx) == false)
00669     maxntidx = 1;
00670   else
00671     specified = true;
00672   if (llvm::getMaxNTIDy(F, maxntidy) == false)
00673     maxntidy = 1;
00674   else
00675     specified = true;
00676   if (llvm::getMaxNTIDz(F, maxntidz) == false)
00677     maxntidz = 1;
00678   else
00679     specified = true;
00680 
00681   if (specified)
00682     O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
00683       << "\n";
00684 
00685   unsigned mincta;
00686   if (llvm::getMinCTASm(F, mincta))
00687     O << ".minnctapersm " << mincta << "\n";
00688 }
00689 
00690 std::string
00691 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
00692   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
00693 
00694   std::string Name;
00695   raw_string_ostream NameStr(Name);
00696 
00697   VRegRCMap::const_iterator I = VRegMapping.find(RC);
00698   assert(I != VRegMapping.end() && "Bad register class");
00699   const DenseMap<unsigned, unsigned> &RegMap = I->second;
00700 
00701   VRegMap::const_iterator VI = RegMap.find(Reg);
00702   assert(VI != RegMap.end() && "Bad virtual register");
00703   unsigned MappedVR = VI->second;
00704 
00705   NameStr << getNVPTXRegClassStr(RC) << MappedVR;
00706 
00707   NameStr.flush();
00708   return Name;
00709 }
00710 
00711 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
00712                                           raw_ostream &O) {
00713   O << getVirtualRegisterName(vr);
00714 }
00715 
00716 void NVPTXAsmPrinter::printVecModifiedImmediate(
00717     const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
00718   static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
00719   int Imm = (int) MO.getImm();
00720   if (0 == strcmp(Modifier, "vecelem"))
00721     O << "_" << vecelem[Imm];
00722   else if (0 == strcmp(Modifier, "vecv4comm1")) {
00723     if ((Imm < 0) || (Imm > 3))
00724       O << "//";
00725   } else if (0 == strcmp(Modifier, "vecv4comm2")) {
00726     if ((Imm < 4) || (Imm > 7))
00727       O << "//";
00728   } else if (0 == strcmp(Modifier, "vecv4pos")) {
00729     if (Imm < 0)
00730       Imm = 0;
00731     O << "_" << vecelem[Imm % 4];
00732   } else if (0 == strcmp(Modifier, "vecv2comm1")) {
00733     if ((Imm < 0) || (Imm > 1))
00734       O << "//";
00735   } else if (0 == strcmp(Modifier, "vecv2comm2")) {
00736     if ((Imm < 2) || (Imm > 3))
00737       O << "//";
00738   } else if (0 == strcmp(Modifier, "vecv2pos")) {
00739     if (Imm < 0)
00740       Imm = 0;
00741     O << "_" << vecelem[Imm % 2];
00742   } else
00743     llvm_unreachable("Unknown Modifier on immediate operand");
00744 }
00745 
00746 
00747 
00748 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
00749 
00750   emitLinkageDirective(F, O);
00751   if (llvm::isKernelFunction(*F))
00752     O << ".entry ";
00753   else
00754     O << ".func ";
00755   printReturnValStr(F, O);
00756   O << *getSymbol(F) << "\n";
00757   emitFunctionParamList(F, O);
00758   O << ";\n";
00759 }
00760 
00761 static bool usedInGlobalVarDef(const Constant *C) {
00762   if (!C)
00763     return false;
00764 
00765   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
00766     if (GV->getName().str() == "llvm.used")
00767       return false;
00768     return true;
00769   }
00770 
00771   for (const User *U : C->users())
00772     if (const Constant *C = dyn_cast<Constant>(U))
00773       if (usedInGlobalVarDef(C))
00774         return true;
00775 
00776   return false;
00777 }
00778 
00779 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
00780   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
00781     if (othergv->getName().str() == "llvm.used")
00782       return true;
00783   }
00784 
00785   if (const Instruction *instr = dyn_cast<Instruction>(U)) {
00786     if (instr->getParent() && instr->getParent()->getParent()) {
00787       const Function *curFunc = instr->getParent()->getParent();
00788       if (oneFunc && (curFunc != oneFunc))
00789         return false;
00790       oneFunc = curFunc;
00791       return true;
00792     } else
00793       return false;
00794   }
00795 
00796   if (const MDNode *md = dyn_cast<MDNode>(U))
00797     if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
00798                           (md->getName().str() == "llvm.dbg.sp")))
00799       return true;
00800 
00801   for (const User *UU : U->users())
00802     if (usedInOneFunc(UU, oneFunc) == false)
00803       return false;
00804 
00805   return true;
00806 }
00807 
00808 /* Find out if a global variable can be demoted to local scope.
00809  * Currently, this is valid for CUDA shared variables, which have local
00810  * scope and global lifetime. So the conditions to check are :
00811  * 1. Is the global variable in shared address space?
00812  * 2. Does it have internal linkage?
00813  * 3. Is the global variable referenced only in one function?
00814  */
00815 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
00816   if (gv->hasInternalLinkage() == false)
00817     return false;
00818   const PointerType *Pty = gv->getType();
00819   if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
00820     return false;
00821 
00822   const Function *oneFunc = nullptr;
00823 
00824   bool flag = usedInOneFunc(gv, oneFunc);
00825   if (flag == false)
00826     return false;
00827   if (!oneFunc)
00828     return false;
00829   f = oneFunc;
00830   return true;
00831 }
00832 
00833 static bool useFuncSeen(const Constant *C,
00834                         llvm::DenseMap<const Function *, bool> &seenMap) {
00835   for (const User *U : C->users()) {
00836     if (const Constant *cu = dyn_cast<Constant>(U)) {
00837       if (useFuncSeen(cu, seenMap))
00838         return true;
00839     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
00840       const BasicBlock *bb = I->getParent();
00841       if (!bb)
00842         continue;
00843       const Function *caller = bb->getParent();
00844       if (!caller)
00845         continue;
00846       if (seenMap.find(caller) != seenMap.end())
00847         return true;
00848     }
00849   }
00850   return false;
00851 }
00852 
00853 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
00854   llvm::DenseMap<const Function *, bool> seenMap;
00855   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
00856     const Function *F = FI;
00857 
00858     if (F->isDeclaration()) {
00859       if (F->use_empty())
00860         continue;
00861       if (F->getIntrinsicID())
00862         continue;
00863       emitDeclaration(F, O);
00864       continue;
00865     }
00866     for (const User *U : F->users()) {
00867       if (const Constant *C = dyn_cast<Constant>(U)) {
00868         if (usedInGlobalVarDef(C)) {
00869           // The use is in the initialization of a global variable
00870           // that is a function pointer, so print a declaration
00871           // for the original function
00872           emitDeclaration(F, O);
00873           break;
00874         }
00875         // Emit a declaration of this function if the function that
00876         // uses this constant expr has already been seen.
00877         if (useFuncSeen(C, seenMap)) {
00878           emitDeclaration(F, O);
00879           break;
00880         }
00881       }
00882 
00883       if (!isa<Instruction>(U))
00884         continue;
00885       const Instruction *instr = cast<Instruction>(U);
00886       const BasicBlock *bb = instr->getParent();
00887       if (!bb)
00888         continue;
00889       const Function *caller = bb->getParent();
00890       if (!caller)
00891         continue;
00892 
00893       // If a caller has already been seen, then the caller is
00894       // appearing in the module before the callee. so print out
00895       // a declaration for the callee.
00896       if (seenMap.find(caller) != seenMap.end()) {
00897         emitDeclaration(F, O);
00898         break;
00899       }
00900     }
00901     seenMap[F] = true;
00902   }
00903 }
00904 
00905 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
00906   DebugInfoFinder DbgFinder;
00907   DbgFinder.processModule(M);
00908 
00909   unsigned i = 1;
00910   for (DICompileUnit DIUnit : DbgFinder.compile_units()) {
00911     StringRef Filename(DIUnit.getFilename());
00912     StringRef Dirname(DIUnit.getDirectory());
00913     SmallString<128> FullPathName = Dirname;
00914     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
00915       sys::path::append(FullPathName, Filename);
00916       Filename = FullPathName.str();
00917     }
00918     if (filenameMap.find(Filename.str()) != filenameMap.end())
00919       continue;
00920     filenameMap[Filename.str()] = i;
00921     OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
00922     ++i;
00923   }
00924 
00925   for (DISubprogram SP : DbgFinder.subprograms()) {
00926     StringRef Filename(SP.getFilename());
00927     StringRef Dirname(SP.getDirectory());
00928     SmallString<128> FullPathName = Dirname;
00929     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
00930       sys::path::append(FullPathName, Filename);
00931       Filename = FullPathName.str();
00932     }
00933     if (filenameMap.find(Filename.str()) != filenameMap.end())
00934       continue;
00935     filenameMap[Filename.str()] = i;
00936     ++i;
00937   }
00938 }
00939 
00940 bool NVPTXAsmPrinter::doInitialization(Module &M) {
00941 
00942   SmallString<128> Str1;
00943   raw_svector_ostream OS1(Str1);
00944 
00945   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
00946   MMI->AnalyzeModule(M);
00947 
00948   // We need to call the parent's one explicitly.
00949   //bool Result = AsmPrinter::doInitialization(M);
00950 
00951   // Initialize TargetLoweringObjectFile.
00952   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
00953       .Initialize(OutContext, TM);
00954 
00955   Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
00956 
00957   // Emit header before any dwarf directives are emitted below.
00958   emitHeader(M, OS1);
00959   OutStreamer.EmitRawText(OS1.str());
00960 
00961   // Already commented out
00962   //bool Result = AsmPrinter::doInitialization(M);
00963 
00964   // Emit module-level inline asm if it exists.
00965   if (!M.getModuleInlineAsm().empty()) {
00966     OutStreamer.AddComment("Start of file scope inline assembly");
00967     OutStreamer.AddBlankLine();
00968     OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm()));
00969     OutStreamer.AddBlankLine();
00970     OutStreamer.AddComment("End of file scope inline assembly");
00971     OutStreamer.AddBlankLine();
00972   }
00973 
00974   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
00975     recordAndEmitFilenames(M);
00976 
00977   GlobalsEmitted = false;
00978     
00979   return false; // success
00980 }
00981 
00982 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
00983   SmallString<128> Str2;
00984   raw_svector_ostream OS2(Str2);
00985 
00986   emitDeclarations(M, OS2);
00987 
00988   // As ptxas does not support forward references of globals, we need to first
00989   // sort the list of module-level globals in def-use order. We visit each
00990   // global variable in order, and ensure that we emit it *after* its dependent
00991   // globals. We use a little extra memory maintaining both a set and a list to
00992   // have fast searches while maintaining a strict ordering.
00993   SmallVector<const GlobalVariable *, 8> Globals;
00994   DenseSet<const GlobalVariable *> GVVisited;
00995   DenseSet<const GlobalVariable *> GVVisiting;
00996 
00997   // Visit each global variable, in order
00998   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
00999        I != E; ++I)
01000     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
01001 
01002   assert(GVVisited.size() == M.getGlobalList().size() &&
01003          "Missed a global variable");
01004   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
01005 
01006   // Print out module-level global variables in proper order
01007   for (unsigned i = 0, e = Globals.size(); i != e; ++i)
01008     printModuleLevelGV(Globals[i], OS2);
01009 
01010   OS2 << '\n';
01011 
01012   OutStreamer.EmitRawText(OS2.str());
01013 }
01014 
01015 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
01016   O << "//\n";
01017   O << "// Generated by LLVM NVPTX Back-End\n";
01018   O << "//\n";
01019   O << "\n";
01020 
01021   unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
01022   O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
01023 
01024   O << ".target ";
01025   O << nvptxSubtarget.getTargetName();
01026 
01027   if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
01028     O << ", texmode_independent";
01029   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
01030     if (!nvptxSubtarget.hasDouble())
01031       O << ", map_f64_to_f32";
01032   }
01033 
01034   if (MAI->doesSupportDebugInformation())
01035     O << ", debug";
01036 
01037   O << "\n";
01038 
01039   O << ".address_size ";
01040   if (nvptxSubtarget.is64Bit())
01041     O << "64";
01042   else
01043     O << "32";
01044   O << "\n";
01045 
01046   O << "\n";
01047 }
01048 
01049 bool NVPTXAsmPrinter::doFinalization(Module &M) {
01050 
01051   // If we did not emit any functions, then the global declarations have not
01052   // yet been emitted.
01053   if (!GlobalsEmitted) {
01054     emitGlobals(M);
01055     GlobalsEmitted = true;
01056   }
01057 
01058   // XXX Temproarily remove global variables so that doFinalization() will not
01059   // emit them again (global variables are emitted at beginning).
01060 
01061   Module::GlobalListType &global_list = M.getGlobalList();
01062   int i, n = global_list.size();
01063   GlobalVariable **gv_array = new GlobalVariable *[n];
01064 
01065   // first, back-up GlobalVariable in gv_array
01066   i = 0;
01067   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
01068        I != E; ++I)
01069     gv_array[i++] = &*I;
01070 
01071   // second, empty global_list
01072   while (!global_list.empty())
01073     global_list.remove(global_list.begin());
01074 
01075   // call doFinalization
01076   bool ret = AsmPrinter::doFinalization(M);
01077 
01078   // now we restore global variables
01079   for (i = 0; i < n; i++)
01080     global_list.insert(global_list.end(), gv_array[i]);
01081 
01082   clearAnnotationCache(&M);
01083 
01084   delete[] gv_array;
01085   return ret;
01086 
01087   //bool Result = AsmPrinter::doFinalization(M);
01088   // Instead of calling the parents doFinalization, we may
01089   // clone parents doFinalization and customize here.
01090   // Currently, we if NVISA out the EmitGlobals() in
01091   // parent's doFinalization, which is too intrusive.
01092   //
01093   // Same for the doInitialization.
01094   //return Result;
01095 }
01096 
01097 // This function emits appropriate linkage directives for
01098 // functions and global variables.
01099 //
01100 // extern function declaration            -> .extern
01101 // extern function definition             -> .visible
01102 // external global variable with init     -> .visible
01103 // external without init                  -> .extern
01104 // appending                              -> not allowed, assert.
01105 // for any linkage other than
01106 // internal, private, linker_private,
01107 // linker_private_weak, linker_private_weak_def_auto,
01108 // we emit                                -> .weak.
01109 
01110 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
01111                                            raw_ostream &O) {
01112   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
01113     if (V->hasExternalLinkage()) {
01114       if (isa<GlobalVariable>(V)) {
01115         const GlobalVariable *GVar = cast<GlobalVariable>(V);
01116         if (GVar) {
01117           if (GVar->hasInitializer())
01118             O << ".visible ";
01119           else
01120             O << ".extern ";
01121         }
01122       } else if (V->isDeclaration())
01123         O << ".extern ";
01124       else
01125         O << ".visible ";
01126     } else if (V->hasAppendingLinkage()) {
01127       std::string msg;
01128       msg.append("Error: ");
01129       msg.append("Symbol ");
01130       if (V->hasName())
01131         msg.append(V->getName().str());
01132       msg.append("has unsupported appending linkage type");
01133       llvm_unreachable(msg.c_str());
01134     } else if (!V->hasInternalLinkage() &&
01135                !V->hasPrivateLinkage()) {
01136       O << ".weak ";
01137     }
01138   }
01139 }
01140 
01141 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
01142                                          raw_ostream &O,
01143                                          bool processDemoted) {
01144 
01145   // Skip meta data
01146   if (GVar->hasSection()) {
01147     if (GVar->getSection() == StringRef("llvm.metadata"))
01148       return;
01149   }
01150 
01151   // Skip LLVM intrinsic global variables
01152   if (GVar->getName().startswith("llvm.") ||
01153       GVar->getName().startswith("nvvm."))
01154     return;
01155 
01156   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
01157 
01158   // GlobalVariables are always constant pointers themselves.
01159   const PointerType *PTy = GVar->getType();
01160   Type *ETy = PTy->getElementType();
01161 
01162   if (GVar->hasExternalLinkage()) {
01163     if (GVar->hasInitializer())
01164       O << ".visible ";
01165     else
01166       O << ".extern ";
01167   } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
01168              GVar->hasAvailableExternallyLinkage() ||
01169              GVar->hasCommonLinkage()) {
01170     O << ".weak ";
01171   }
01172 
01173   if (llvm::isTexture(*GVar)) {
01174     O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
01175     return;
01176   }
01177 
01178   if (llvm::isSurface(*GVar)) {
01179     O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
01180     return;
01181   }
01182 
01183   if (GVar->isDeclaration()) {
01184     // (extern) declarations, no definition or initializer
01185     // Currently the only known declaration is for an automatic __local
01186     // (.shared) promoted to global.
01187     emitPTXGlobalVariable(GVar, O);
01188     O << ";\n";
01189     return;
01190   }
01191 
01192   if (llvm::isSampler(*GVar)) {
01193     O << ".global .samplerref " << llvm::getSamplerName(*GVar);
01194 
01195     const Constant *Initializer = nullptr;
01196     if (GVar->hasInitializer())
01197       Initializer = GVar->getInitializer();
01198     const ConstantInt *CI = nullptr;
01199     if (Initializer)
01200       CI = dyn_cast<ConstantInt>(Initializer);
01201     if (CI) {
01202       unsigned sample = CI->getZExtValue();
01203 
01204       O << " = { ";
01205 
01206       for (int i = 0,
01207                addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
01208            i < 3; i++) {
01209         O << "addr_mode_" << i << " = ";
01210         switch (addr) {
01211         case 0:
01212           O << "wrap";
01213           break;
01214         case 1:
01215           O << "clamp_to_border";
01216           break;
01217         case 2:
01218           O << "clamp_to_edge";
01219           break;
01220         case 3:
01221           O << "wrap";
01222           break;
01223         case 4:
01224           O << "mirror";
01225           break;
01226         }
01227         O << ", ";
01228       }
01229       O << "filter_mode = ";
01230       switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
01231       case 0:
01232         O << "nearest";
01233         break;
01234       case 1:
01235         O << "linear";
01236         break;
01237       case 2:
01238         llvm_unreachable("Anisotropic filtering is not supported");
01239       default:
01240         O << "nearest";
01241         break;
01242       }
01243       if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
01244         O << ", force_unnormalized_coords = 1";
01245       }
01246       O << " }";
01247     }
01248 
01249     O << ";\n";
01250     return;
01251   }
01252 
01253   if (GVar->hasPrivateLinkage()) {
01254 
01255     if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
01256       return;
01257 
01258     // FIXME - need better way (e.g. Metadata) to avoid generating this global
01259     if (!strncmp(GVar->getName().data(), "filename", 8))
01260       return;
01261     if (GVar->use_empty())
01262       return;
01263   }
01264 
01265   const Function *demotedFunc = nullptr;
01266   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
01267     O << "// " << GVar->getName().str() << " has been demoted\n";
01268     if (localDecls.find(demotedFunc) != localDecls.end())
01269       localDecls[demotedFunc].push_back(GVar);
01270     else {
01271       std::vector<const GlobalVariable *> temp;
01272       temp.push_back(GVar);
01273       localDecls[demotedFunc] = temp;
01274     }
01275     return;
01276   }
01277 
01278   O << ".";
01279   emitPTXAddressSpace(PTy->getAddressSpace(), O);
01280 
01281   if (isManaged(*GVar)) {
01282     O << " .attribute(.managed)";
01283   }
01284 
01285   if (GVar->getAlignment() == 0)
01286     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
01287   else
01288     O << " .align " << GVar->getAlignment();
01289 
01290   if (ETy->isSingleValueType()) {
01291     O << " .";
01292     // Special case: ABI requires that we use .u8 for predicates
01293     if (ETy->isIntegerTy(1))
01294       O << "u8";
01295     else
01296       O << getPTXFundamentalTypeStr(ETy, false);
01297     O << " ";
01298     O << *getSymbol(GVar);
01299 
01300     // Ptx allows variable initilization only for constant and global state
01301     // spaces.
01302     if (GVar->hasInitializer()) {
01303       if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
01304           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
01305         const Constant *Initializer = GVar->getInitializer();
01306         // 'undef' is treated as there is no value spefied.
01307         if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
01308           O << " = ";
01309           printScalarConstant(Initializer, O);
01310         }
01311       } else {
01312         // The frontend adds zero-initializer to variables that don't have an
01313         // initial value, so skip warning for this case.
01314         if (!GVar->getInitializer()->isNullValue()) {
01315           std::string warnMsg = "initial value of '" + GVar->getName().str() +
01316               "' is not allowed in addrspace(" +
01317               llvm::utostr_32(PTy->getAddressSpace()) + ")";
01318           report_fatal_error(warnMsg.c_str());
01319         }
01320       }
01321     }
01322   } else {
01323     unsigned int ElementSize = 0;
01324 
01325     // Although PTX has direct support for struct type and array type and
01326     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
01327     // targets that support these high level field accesses. Structs, arrays
01328     // and vectors are lowered into arrays of bytes.
01329     switch (ETy->getTypeID()) {
01330     case Type::StructTyID:
01331     case Type::ArrayTyID:
01332     case Type::VectorTyID:
01333       ElementSize = TD->getTypeStoreSize(ETy);
01334       // Ptx allows variable initilization only for constant and
01335       // global state spaces.
01336       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
01337            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
01338           GVar->hasInitializer()) {
01339         const Constant *Initializer = GVar->getInitializer();
01340         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
01341           AggBuffer aggBuffer(ElementSize, O, *this);
01342           bufferAggregateConstant(Initializer, &aggBuffer);
01343           if (aggBuffer.numSymbols) {
01344             if (nvptxSubtarget.is64Bit()) {
01345               O << " .u64 " << *getSymbol(GVar) << "[";
01346               O << ElementSize / 8;
01347             } else {
01348               O << " .u32 " << *getSymbol(GVar) << "[";
01349               O << ElementSize / 4;
01350             }
01351             O << "]";
01352           } else {
01353             O << " .b8 " << *getSymbol(GVar) << "[";
01354             O << ElementSize;
01355             O << "]";
01356           }
01357           O << " = {";
01358           aggBuffer.print();
01359           O << "}";
01360         } else {
01361           O << " .b8 " << *getSymbol(GVar);
01362           if (ElementSize) {
01363             O << "[";
01364             O << ElementSize;
01365             O << "]";
01366           }
01367         }
01368       } else {
01369         O << " .b8 " << *getSymbol(GVar);
01370         if (ElementSize) {
01371           O << "[";
01372           O << ElementSize;
01373           O << "]";
01374         }
01375       }
01376       break;
01377     default:
01378       llvm_unreachable("type not supported yet");
01379     }
01380 
01381   }
01382   O << ";\n";
01383 }
01384 
01385 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
01386   if (localDecls.find(f) == localDecls.end())
01387     return;
01388 
01389   std::vector<const GlobalVariable *> &gvars = localDecls[f];
01390 
01391   for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
01392     O << "\t// demoted variable\n\t";
01393     printModuleLevelGV(gvars[i], O, true);
01394   }
01395 }
01396 
01397 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
01398                                           raw_ostream &O) const {
01399   switch (AddressSpace) {
01400   case llvm::ADDRESS_SPACE_LOCAL:
01401     O << "local";
01402     break;
01403   case llvm::ADDRESS_SPACE_GLOBAL:
01404     O << "global";
01405     break;
01406   case llvm::ADDRESS_SPACE_CONST:
01407     O << "const";
01408     break;
01409   case llvm::ADDRESS_SPACE_SHARED:
01410     O << "shared";
01411     break;
01412   default:
01413     report_fatal_error("Bad address space found while emitting PTX");
01414     break;
01415   }
01416 }
01417 
01418 std::string
01419 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
01420   switch (Ty->getTypeID()) {
01421   default:
01422     llvm_unreachable("unexpected type");
01423     break;
01424   case Type::IntegerTyID: {
01425     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
01426     if (NumBits == 1)
01427       return "pred";
01428     else if (NumBits <= 64) {
01429       std::string name = "u";
01430       return name + utostr(NumBits);
01431     } else {
01432       llvm_unreachable("Integer too large");
01433       break;
01434     }
01435     break;
01436   }
01437   case Type::FloatTyID:
01438     return "f32";
01439   case Type::DoubleTyID:
01440     return "f64";
01441   case Type::PointerTyID:
01442     if (nvptxSubtarget.is64Bit())
01443       if (useB4PTR)
01444         return "b64";
01445       else
01446         return "u64";
01447     else if (useB4PTR)
01448       return "b32";
01449     else
01450       return "u32";
01451   }
01452   llvm_unreachable("unexpected type");
01453   return nullptr;
01454 }
01455 
01456 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
01457                                             raw_ostream &O) {
01458 
01459   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
01460 
01461   // GlobalVariables are always constant pointers themselves.
01462   const PointerType *PTy = GVar->getType();
01463   Type *ETy = PTy->getElementType();
01464 
01465   O << ".";
01466   emitPTXAddressSpace(PTy->getAddressSpace(), O);
01467   if (GVar->getAlignment() == 0)
01468     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
01469   else
01470     O << " .align " << GVar->getAlignment();
01471 
01472   if (ETy->isSingleValueType()) {
01473     O << " .";
01474     O << getPTXFundamentalTypeStr(ETy);
01475     O << " ";
01476     O << *getSymbol(GVar);
01477     return;
01478   }
01479 
01480   int64_t ElementSize = 0;
01481 
01482   // Although PTX has direct support for struct type and array type and LLVM IR
01483   // is very similar to PTX, the LLVM CodeGen does not support for targets that
01484   // support these high level field accesses. Structs and arrays are lowered
01485   // into arrays of bytes.
01486   switch (ETy->getTypeID()) {
01487   case Type::StructTyID:
01488   case Type::ArrayTyID:
01489   case Type::VectorTyID:
01490     ElementSize = TD->getTypeStoreSize(ETy);
01491     O << " .b8 " << *getSymbol(GVar) << "[";
01492     if (ElementSize) {
01493       O << itostr(ElementSize);
01494     }
01495     O << "]";
01496     break;
01497   default:
01498     llvm_unreachable("type not supported yet");
01499   }
01500   return;
01501 }
01502 
01503 static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
01504   if (Ty->isSingleValueType())
01505     return TD->getPrefTypeAlignment(Ty);
01506 
01507   const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
01508   if (ATy)
01509     return getOpenCLAlignment(TD, ATy->getElementType());
01510 
01511   const VectorType *VTy = dyn_cast<VectorType>(Ty);
01512   if (VTy) {
01513     Type *ETy = VTy->getElementType();
01514     unsigned int numE = VTy->getNumElements();
01515     unsigned int alignE = TD->getPrefTypeAlignment(ETy);
01516     if (numE == 3)
01517       return 4 * alignE;
01518     else
01519       return numE * alignE;
01520   }
01521 
01522   const StructType *STy = dyn_cast<StructType>(Ty);
01523   if (STy) {
01524     unsigned int alignStruct = 1;
01525     // Go through each element of the struct and find the
01526     // largest alignment.
01527     for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
01528       Type *ETy = STy->getElementType(i);
01529       unsigned int align = getOpenCLAlignment(TD, ETy);
01530       if (align > alignStruct)
01531         alignStruct = align;
01532     }
01533     return alignStruct;
01534   }
01535 
01536   const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
01537   if (FTy)
01538     return TD->getPointerPrefAlignment();
01539   return TD->getPrefTypeAlignment(Ty);
01540 }
01541 
01542 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
01543                                      int paramIndex, raw_ostream &O) {
01544   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
01545       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
01546     O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
01547   else {
01548     std::string argName = I->getName();
01549     const char *p = argName.c_str();
01550     while (*p) {
01551       if (*p == '.')
01552         O << "_";
01553       else
01554         O << *p;
01555       p++;
01556     }
01557   }
01558 }
01559 
01560 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
01561   Function::const_arg_iterator I, E;
01562   int i = 0;
01563 
01564   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
01565       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
01566     O << *CurrentFnSym << "_param_" << paramIndex;
01567     return;
01568   }
01569 
01570   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
01571     if (i == paramIndex) {
01572       printParamName(I, paramIndex, O);
01573       return;
01574     }
01575   }
01576   llvm_unreachable("paramIndex out of bound");
01577 }
01578 
01579 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
01580   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
01581   const AttributeSet &PAL = F->getAttributes();
01582   const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
01583   Function::const_arg_iterator I, E;
01584   unsigned paramIndex = 0;
01585   bool first = true;
01586   bool isKernelFunc = llvm::isKernelFunction(*F);
01587   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
01588   MVT thePointerTy = TLI->getPointerTy();
01589 
01590   O << "(\n";
01591 
01592   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
01593     Type *Ty = I->getType();
01594 
01595     if (!first)
01596       O << ",\n";
01597 
01598     first = false;
01599 
01600     // Handle image/sampler parameters
01601     if (isKernelFunction(*F)) {
01602       if (isSampler(*I) || isImage(*I)) {
01603         if (isImage(*I)) {
01604           std::string sname = I->getName();
01605           if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
01606             if (nvptxSubtarget.hasImageHandles())
01607               O << "\t.param .u64 .ptr .surfref ";
01608             else
01609               O << "\t.param .surfref ";
01610             O << *CurrentFnSym << "_param_" << paramIndex;
01611           }
01612           else { // Default image is read_only
01613             if (nvptxSubtarget.hasImageHandles())
01614               O << "\t.param .u64 .ptr .texref ";
01615             else
01616               O << "\t.param .texref ";
01617             O << *CurrentFnSym << "_param_" << paramIndex;
01618           }
01619         } else {
01620           if (nvptxSubtarget.hasImageHandles())
01621             O << "\t.param .u64 .ptr .samplerref ";
01622           else
01623             O << "\t.param .samplerref ";
01624           O << *CurrentFnSym << "_param_" << paramIndex;
01625         }
01626         continue;
01627       }
01628     }
01629 
01630     if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
01631       if (Ty->isAggregateType() || Ty->isVectorTy()) {
01632         // Just print .param .align <a> .b8 .param[size];
01633         // <a> = PAL.getparamalignment
01634         // size = typeallocsize of element type
01635         unsigned align = PAL.getParamAlignment(paramIndex + 1);
01636         if (align == 0)
01637           align = TD->getABITypeAlignment(Ty);
01638 
01639         unsigned sz = TD->getTypeAllocSize(Ty);
01640         O << "\t.param .align " << align << " .b8 ";
01641         printParamName(I, paramIndex, O);
01642         O << "[" << sz << "]";
01643 
01644         continue;
01645       }
01646       // Just a scalar
01647       const PointerType *PTy = dyn_cast<PointerType>(Ty);
01648       if (isKernelFunc) {
01649         if (PTy) {
01650           // Special handling for pointer arguments to kernel
01651           O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
01652 
01653           if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
01654             Type *ETy = PTy->getElementType();
01655             int addrSpace = PTy->getAddressSpace();
01656             switch (addrSpace) {
01657             default:
01658               O << ".ptr ";
01659               break;
01660             case llvm::ADDRESS_SPACE_CONST:
01661               O << ".ptr .const ";
01662               break;
01663             case llvm::ADDRESS_SPACE_SHARED:
01664               O << ".ptr .shared ";
01665               break;
01666             case llvm::ADDRESS_SPACE_GLOBAL:
01667               O << ".ptr .global ";
01668               break;
01669             }
01670             O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
01671           }
01672           printParamName(I, paramIndex, O);
01673           continue;
01674         }
01675 
01676         // non-pointer scalar to kernel func
01677         O << "\t.param .";
01678         // Special case: predicate operands become .u8 types
01679         if (Ty->isIntegerTy(1))
01680           O << "u8";
01681         else
01682           O << getPTXFundamentalTypeStr(Ty);
01683         O << " ";
01684         printParamName(I, paramIndex, O);
01685         continue;
01686       }
01687       // Non-kernel function, just print .param .b<size> for ABI
01688       // and .reg .b<size> for non-ABI
01689       unsigned sz = 0;
01690       if (isa<IntegerType>(Ty)) {
01691         sz = cast<IntegerType>(Ty)->getBitWidth();
01692         if (sz < 32)
01693           sz = 32;
01694       } else if (isa<PointerType>(Ty))
01695         sz = thePointerTy.getSizeInBits();
01696       else
01697         sz = Ty->getPrimitiveSizeInBits();
01698       if (isABI)
01699         O << "\t.param .b" << sz << " ";
01700       else
01701         O << "\t.reg .b" << sz << " ";
01702       printParamName(I, paramIndex, O);
01703       continue;
01704     }
01705 
01706     // param has byVal attribute. So should be a pointer
01707     const PointerType *PTy = dyn_cast<PointerType>(Ty);
01708     assert(PTy && "Param with byval attribute should be a pointer type");
01709     Type *ETy = PTy->getElementType();
01710 
01711     if (isABI || isKernelFunc) {
01712       // Just print .param .align <a> .b8 .param[size];
01713       // <a> = PAL.getparamalignment
01714       // size = typeallocsize of element type
01715       unsigned align = PAL.getParamAlignment(paramIndex + 1);
01716       if (align == 0)
01717         align = TD->getABITypeAlignment(ETy);
01718 
01719       unsigned sz = TD->getTypeAllocSize(ETy);
01720       O << "\t.param .align " << align << " .b8 ";
01721       printParamName(I, paramIndex, O);
01722       O << "[" << sz << "]";
01723       continue;
01724     } else {
01725       // Split the ETy into constituent parts and
01726       // print .param .b<size> <name> for each part.
01727       // Further, if a part is vector, print the above for
01728       // each vector element.
01729       SmallVector<EVT, 16> vtparts;
01730       ComputeValueVTs(*TLI, ETy, vtparts);
01731       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
01732         unsigned elems = 1;
01733         EVT elemtype = vtparts[i];
01734         if (vtparts[i].isVector()) {
01735           elems = vtparts[i].getVectorNumElements();
01736           elemtype = vtparts[i].getVectorElementType();
01737         }
01738 
01739         for (unsigned j = 0, je = elems; j != je; ++j) {
01740           unsigned sz = elemtype.getSizeInBits();
01741           if (elemtype.isInteger() && (sz < 32))
01742             sz = 32;
01743           O << "\t.reg .b" << sz << " ";
01744           printParamName(I, paramIndex, O);
01745           if (j < je - 1)
01746             O << ",\n";
01747           ++paramIndex;
01748         }
01749         if (i < e - 1)
01750           O << ",\n";
01751       }
01752       --paramIndex;
01753       continue;
01754     }
01755   }
01756 
01757   O << "\n)\n";
01758 }
01759 
01760 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
01761                                             raw_ostream &O) {
01762   const Function *F = MF.getFunction();
01763   emitFunctionParamList(F, O);
01764 }
01765 
01766 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
01767     const MachineFunction &MF) {
01768   SmallString<128> Str;
01769   raw_svector_ostream O(Str);
01770 
01771   // Map the global virtual register number to a register class specific
01772   // virtual register number starting from 1 with that class.
01773   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
01774   //unsigned numRegClasses = TRI->getNumRegClasses();
01775 
01776   // Emit the Fake Stack Object
01777   const MachineFrameInfo *MFI = MF.getFrameInfo();
01778   int NumBytes = (int) MFI->getStackSize();
01779   if (NumBytes) {
01780     O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
01781       << getFunctionNumber() << "[" << NumBytes << "];\n";
01782     if (nvptxSubtarget.is64Bit()) {
01783       O << "\t.reg .b64 \t%SP;\n";
01784       O << "\t.reg .b64 \t%SPL;\n";
01785     } else {
01786       O << "\t.reg .b32 \t%SP;\n";
01787       O << "\t.reg .b32 \t%SPL;\n";
01788     }
01789   }
01790 
01791   // Go through all virtual registers to establish the mapping between the
01792   // global virtual
01793   // register number and the per class virtual register number.
01794   // We use the per class virtual register number in the ptx output.
01795   unsigned int numVRs = MRI->getNumVirtRegs();
01796   for (unsigned i = 0; i < numVRs; i++) {
01797     unsigned int vr = TRI->index2VirtReg(i);
01798     const TargetRegisterClass *RC = MRI->getRegClass(vr);
01799     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
01800     int n = regmap.size();
01801     regmap.insert(std::make_pair(vr, n + 1));
01802   }
01803 
01804   // Emit register declarations
01805   // @TODO: Extract out the real register usage
01806   // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
01807   // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
01808   // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
01809   // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
01810   // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
01811   // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
01812   // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
01813 
01814   // Emit declaration of the virtual registers or 'physical' registers for
01815   // each register class
01816   for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
01817     const TargetRegisterClass *RC = TRI->getRegClass(i);
01818     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
01819     std::string rcname = getNVPTXRegClassName(RC);
01820     std::string rcStr = getNVPTXRegClassStr(RC);
01821     int n = regmap.size();
01822 
01823     // Only declare those registers that may be used.
01824     if (n) {
01825        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
01826          << ">;\n";
01827     }
01828   }
01829 
01830   OutStreamer.EmitRawText(O.str());
01831 }
01832 
01833 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
01834   APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
01835   bool ignored;
01836   unsigned int numHex;
01837   const char *lead;
01838 
01839   if (Fp->getType()->getTypeID() == Type::FloatTyID) {
01840     numHex = 8;
01841     lead = "0f";
01842     APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
01843   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
01844     numHex = 16;
01845     lead = "0d";
01846     APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
01847   } else
01848     llvm_unreachable("unsupported fp type");
01849 
01850   APInt API = APF.bitcastToAPInt();
01851   std::string hexstr(utohexstr(API.getZExtValue()));
01852   O << lead;
01853   if (hexstr.length() < numHex)
01854     O << std::string(numHex - hexstr.length(), '0');
01855   O << utohexstr(API.getZExtValue());
01856 }
01857 
01858 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
01859   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
01860     O << CI->getValue();
01861     return;
01862   }
01863   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
01864     printFPConstant(CFP, O);
01865     return;
01866   }
01867   if (isa<ConstantPointerNull>(CPV)) {
01868     O << "0";
01869     return;
01870   }
01871   if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
01872     PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
01873     bool IsNonGenericPointer = false;
01874     if (PTy && PTy->getAddressSpace() != 0) {
01875       IsNonGenericPointer = true;
01876     }
01877     if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
01878       O << "generic(";
01879       O << *getSymbol(GVar);
01880       O << ")";
01881     } else {
01882       O << *getSymbol(GVar);
01883     }
01884     return;
01885   }
01886   if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01887     const Value *v = Cexpr->stripPointerCasts();
01888     PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
01889     bool IsNonGenericPointer = false;
01890     if (PTy && PTy->getAddressSpace() != 0) {
01891       IsNonGenericPointer = true;
01892     }
01893     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
01894       if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
01895         O << "generic(";
01896         O << *getSymbol(GVar);
01897         O << ")";
01898       } else {
01899         O << *getSymbol(GVar);
01900       }
01901       return;
01902     } else {
01903       O << *LowerConstant(CPV, *this);
01904       return;
01905     }
01906   }
01907   llvm_unreachable("Not scalar type found in printScalarConstant()");
01908 }
01909 
01910 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
01911                                    AggBuffer *aggBuffer) {
01912 
01913   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
01914 
01915   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
01916     int s = TD->getTypeAllocSize(CPV->getType());
01917     if (s < Bytes)
01918       s = Bytes;
01919     aggBuffer->addZeros(s);
01920     return;
01921   }
01922 
01923   unsigned char *ptr;
01924   switch (CPV->getType()->getTypeID()) {
01925 
01926   case Type::IntegerTyID: {
01927     const Type *ETy = CPV->getType();
01928     if (ETy == Type::getInt8Ty(CPV->getContext())) {
01929       unsigned char c =
01930           (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
01931       ptr = &c;
01932       aggBuffer->addBytes(ptr, 1, Bytes);
01933     } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
01934       short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
01935       ptr = (unsigned char *)&int16;
01936       aggBuffer->addBytes(ptr, 2, Bytes);
01937     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
01938       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
01939         int int32 = (int)(constInt->getZExtValue());
01940         ptr = (unsigned char *)&int32;
01941         aggBuffer->addBytes(ptr, 4, Bytes);
01942         break;
01943       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01944         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
01945                 ConstantFoldConstantExpression(Cexpr, TD))) {
01946           int int32 = (int)(constInt->getZExtValue());
01947           ptr = (unsigned char *)&int32;
01948           aggBuffer->addBytes(ptr, 4, Bytes);
01949           break;
01950         }
01951         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
01952           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
01953           aggBuffer->addSymbol(v);
01954           aggBuffer->addZeros(4);
01955           break;
01956         }
01957       }
01958       llvm_unreachable("unsupported integer const type");
01959     } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
01960       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
01961         long long int64 = (long long)(constInt->getZExtValue());
01962         ptr = (unsigned char *)&int64;
01963         aggBuffer->addBytes(ptr, 8, Bytes);
01964         break;
01965       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01966         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
01967                 ConstantFoldConstantExpression(Cexpr, TD))) {
01968           long long int64 = (long long)(constInt->getZExtValue());
01969           ptr = (unsigned char *)&int64;
01970           aggBuffer->addBytes(ptr, 8, Bytes);
01971           break;
01972         }
01973         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
01974           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
01975           aggBuffer->addSymbol(v);
01976           aggBuffer->addZeros(8);
01977           break;
01978         }
01979       }
01980       llvm_unreachable("unsupported integer const type");
01981     } else
01982       llvm_unreachable("unsupported integer const type");
01983     break;
01984   }
01985   case Type::FloatTyID:
01986   case Type::DoubleTyID: {
01987     const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
01988     const Type *Ty = CFP->getType();
01989     if (Ty == Type::getFloatTy(CPV->getContext())) {
01990       float float32 = (float) CFP->getValueAPF().convertToFloat();
01991       ptr = (unsigned char *)&float32;
01992       aggBuffer->addBytes(ptr, 4, Bytes);
01993     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
01994       double float64 = CFP->getValueAPF().convertToDouble();
01995       ptr = (unsigned char *)&float64;
01996       aggBuffer->addBytes(ptr, 8, Bytes);
01997     } else {
01998       llvm_unreachable("unsupported fp const type");
01999     }
02000     break;
02001   }
02002   case Type::PointerTyID: {
02003     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
02004       aggBuffer->addSymbol(GVar);
02005     } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
02006       const Value *v = Cexpr->stripPointerCasts();
02007       aggBuffer->addSymbol(v);
02008     }
02009     unsigned int s = TD->getTypeAllocSize(CPV->getType());
02010     aggBuffer->addZeros(s);
02011     break;
02012   }
02013 
02014   case Type::ArrayTyID:
02015   case Type::VectorTyID:
02016   case Type::StructTyID: {
02017     if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
02018         isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
02019       int ElementSize = TD->getTypeAllocSize(CPV->getType());
02020       bufferAggregateConstant(CPV, aggBuffer);
02021       if (Bytes > ElementSize)
02022         aggBuffer->addZeros(Bytes - ElementSize);
02023     } else if (isa<ConstantAggregateZero>(CPV))
02024       aggBuffer->addZeros(Bytes);
02025     else
02026       llvm_unreachable("Unexpected Constant type");
02027     break;
02028   }
02029 
02030   default:
02031     llvm_unreachable("unsupported type");
02032   }
02033 }
02034 
02035 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
02036                                               AggBuffer *aggBuffer) {
02037   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
02038   int Bytes;
02039 
02040   // Old constants
02041   if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
02042     if (CPV->getNumOperands())
02043       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
02044         bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
02045     return;
02046   }
02047 
02048   if (const ConstantDataSequential *CDS =
02049           dyn_cast<ConstantDataSequential>(CPV)) {
02050     if (CDS->getNumElements())
02051       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
02052         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
02053                      aggBuffer);
02054     return;
02055   }
02056 
02057   if (isa<ConstantStruct>(CPV)) {
02058     if (CPV->getNumOperands()) {
02059       StructType *ST = cast<StructType>(CPV->getType());
02060       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
02061         if (i == (e - 1))
02062           Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
02063                   TD->getTypeAllocSize(ST) -
02064                   TD->getStructLayout(ST)->getElementOffset(i);
02065         else
02066           Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
02067                   TD->getStructLayout(ST)->getElementOffset(i);
02068         bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
02069       }
02070     }
02071     return;
02072   }
02073   llvm_unreachable("unsupported constant type in printAggregateConstant()");
02074 }
02075 
02076 // buildTypeNameMap - Run through symbol table looking for type names.
02077 //
02078 
02079 bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
02080 
02081   std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
02082 
02083   if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
02084                                   !PI->second.compare("struct._image2d_t") ||
02085                                   !PI->second.compare("struct._image3d_t")))
02086     return true;
02087 
02088   return false;
02089 }
02090 
02091 
02092 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
02093   switch (MI.getOpcode()) {
02094   default:
02095     return false;
02096   case NVPTX::CallArgBeginInst:
02097   case NVPTX::CallArgEndInst0:
02098   case NVPTX::CallArgEndInst1:
02099   case NVPTX::CallArgF32:
02100   case NVPTX::CallArgF64:
02101   case NVPTX::CallArgI16:
02102   case NVPTX::CallArgI32:
02103   case NVPTX::CallArgI32imm:
02104   case NVPTX::CallArgI64:
02105   case NVPTX::CallArgParam:
02106   case NVPTX::CallVoidInst:
02107   case NVPTX::CallVoidInstReg:
02108   case NVPTX::Callseq_End:
02109   case NVPTX::CallVoidInstReg64:
02110   case NVPTX::DeclareParamInst:
02111   case NVPTX::DeclareRetMemInst:
02112   case NVPTX::DeclareRetRegInst:
02113   case NVPTX::DeclareRetScalarInst:
02114   case NVPTX::DeclareScalarParamInst:
02115   case NVPTX::DeclareScalarRegInst:
02116   case NVPTX::StoreParamF32:
02117   case NVPTX::StoreParamF64:
02118   case NVPTX::StoreParamI16:
02119   case NVPTX::StoreParamI32:
02120   case NVPTX::StoreParamI64:
02121   case NVPTX::StoreParamI8:
02122   case NVPTX::StoreRetvalF32:
02123   case NVPTX::StoreRetvalF64:
02124   case NVPTX::StoreRetvalI16:
02125   case NVPTX::StoreRetvalI32:
02126   case NVPTX::StoreRetvalI64:
02127   case NVPTX::StoreRetvalI8:
02128   case NVPTX::LastCallArgF32:
02129   case NVPTX::LastCallArgF64:
02130   case NVPTX::LastCallArgI16:
02131   case NVPTX::LastCallArgI32:
02132   case NVPTX::LastCallArgI32imm:
02133   case NVPTX::LastCallArgI64:
02134   case NVPTX::LastCallArgParam:
02135   case NVPTX::LoadParamMemF32:
02136   case NVPTX::LoadParamMemF64:
02137   case NVPTX::LoadParamMemI16:
02138   case NVPTX::LoadParamMemI32:
02139   case NVPTX::LoadParamMemI64:
02140   case NVPTX::LoadParamMemI8:
02141   case NVPTX::PrototypeInst:
02142   case NVPTX::DBG_VALUE:
02143     return true;
02144   }
02145   return false;
02146 }
02147 
02148 /// PrintAsmOperand - Print out an operand for an inline asm expression.
02149 ///
02150 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
02151                                       unsigned AsmVariant,
02152                                       const char *ExtraCode, raw_ostream &O) {
02153   if (ExtraCode && ExtraCode[0]) {
02154     if (ExtraCode[1] != 0)
02155       return true; // Unknown modifier.
02156 
02157     switch (ExtraCode[0]) {
02158     default:
02159       // See if this is a generic print operand
02160       return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
02161     case 'r':
02162       break;
02163     }
02164   }
02165 
02166   printOperand(MI, OpNo, O);
02167 
02168   return false;
02169 }
02170 
02171 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
02172     const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
02173     const char *ExtraCode, raw_ostream &O) {
02174   if (ExtraCode && ExtraCode[0])
02175     return true; // Unknown modifier
02176 
02177   O << '[';
02178   printMemOperand(MI, OpNo, O);
02179   O << ']';
02180 
02181   return false;
02182 }
02183 
02184 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
02185                                    raw_ostream &O, const char *Modifier) {
02186   const MachineOperand &MO = MI->getOperand(opNum);
02187   switch (MO.getType()) {
02188   case MachineOperand::MO_Register:
02189     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
02190       if (MO.getReg() == NVPTX::VRDepot)
02191         O << DEPOTNAME << getFunctionNumber();
02192       else
02193         O << NVPTXInstPrinter::getRegisterName(MO.getReg());
02194     } else {
02195       emitVirtualRegister(MO.getReg(), O);
02196     }
02197     return;
02198 
02199   case MachineOperand::MO_Immediate:
02200     if (!Modifier)
02201       O << MO.getImm();
02202     else if (strstr(Modifier, "vec") == Modifier)
02203       printVecModifiedImmediate(MO, Modifier, O);
02204     else
02205       llvm_unreachable(
02206           "Don't know how to handle modifier on immediate operand");
02207     return;
02208 
02209   case MachineOperand::MO_FPImmediate:
02210     printFPConstant(MO.getFPImm(), O);
02211     break;
02212 
02213   case MachineOperand::MO_GlobalAddress:
02214     O << *getSymbol(MO.getGlobal());
02215     break;
02216 
02217   case MachineOperand::MO_MachineBasicBlock:
02218     O << *MO.getMBB()->getSymbol();
02219     return;
02220 
02221   default:
02222     llvm_unreachable("Operand type not supported.");
02223   }
02224 }
02225 
02226 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
02227                                       raw_ostream &O, const char *Modifier) {
02228   printOperand(MI, opNum, O);
02229 
02230   if (Modifier && !strcmp(Modifier, "add")) {
02231     O << ", ";
02232     printOperand(MI, opNum + 1, O);
02233   } else {
02234     if (MI->getOperand(opNum + 1).isImm() &&
02235         MI->getOperand(opNum + 1).getImm() == 0)
02236       return; // don't print ',0' or '+0'
02237     O << "+";
02238     printOperand(MI, opNum + 1, O);
02239   }
02240 }
02241 
02242 
02243 // Force static initialization.
02244 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
02245   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
02246   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
02247 }
02248 
02249 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
02250   std::stringstream temp;
02251   LineReader *reader = this->getReader(filename.str());
02252   temp << "\n//";
02253   temp << filename.str();
02254   temp << ":";
02255   temp << line;
02256   temp << " ";
02257   temp << reader->readLine(line);
02258   temp << "\n";
02259   this->OutStreamer.EmitRawText(Twine(temp.str()));
02260 }
02261 
02262 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
02263   if (!reader) {
02264     reader = new LineReader(filename);
02265   }
02266 
02267   if (reader->fileName() != filename) {
02268     delete reader;
02269     reader = new LineReader(filename);
02270   }
02271 
02272   return reader;
02273 }
02274 
02275 std::string LineReader::readLine(unsigned lineNum) {
02276   if (lineNum < theCurLine) {
02277     theCurLine = 0;
02278     fstr.seekg(0, std::ios::beg);
02279   }
02280   while (theCurLine < lineNum) {
02281     fstr.getline(buff, 500);
02282     theCurLine++;
02283   }
02284   return buff;
02285 }
02286 
02287 // Force static initialization.
02288 extern "C" void LLVMInitializeNVPTXAsmPrinter() {
02289   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
02290   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
02291 }