LLVM API Documentation

NVPTXAsmPrinter.cpp
Go to the documentation of this file.
00001 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains a printer that converts from our internal representation
00011 // of machine-dependent LLVM code to NVPTX assembly language.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "NVPTXAsmPrinter.h"
00016 #include "InstPrinter/NVPTXInstPrinter.h"
00017 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
00018 #include "NVPTX.h"
00019 #include "NVPTXInstrInfo.h"
00020 #include "NVPTXMachineFunctionInfo.h"
00021 #include "NVPTXMCExpr.h"
00022 #include "NVPTXRegisterInfo.h"
00023 #include "NVPTXTargetMachine.h"
00024 #include "NVPTXUtilities.h"
00025 #include "cl_common_defines.h"
00026 #include "llvm/ADT/StringExtras.h"
00027 #include "llvm/Analysis/ConstantFolding.h"
00028 #include "llvm/CodeGen/Analysis.h"
00029 #include "llvm/CodeGen/MachineFrameInfo.h"
00030 #include "llvm/CodeGen/MachineModuleInfo.h"
00031 #include "llvm/CodeGen/MachineRegisterInfo.h"
00032 #include "llvm/IR/DebugInfo.h"
00033 #include "llvm/IR/DerivedTypes.h"
00034 #include "llvm/IR/Function.h"
00035 #include "llvm/IR/GlobalVariable.h"
00036 #include "llvm/IR/Mangler.h"
00037 #include "llvm/IR/Module.h"
00038 #include "llvm/IR/Operator.h"
00039 #include "llvm/MC/MCStreamer.h"
00040 #include "llvm/MC/MCSymbol.h"
00041 #include "llvm/Support/CommandLine.h"
00042 #include "llvm/Support/ErrorHandling.h"
00043 #include "llvm/Support/FormattedStream.h"
00044 #include "llvm/Support/Path.h"
00045 #include "llvm/Support/TargetRegistry.h"
00046 #include "llvm/Support/TimeValue.h"
00047 #include "llvm/Target/TargetLoweringObjectFile.h"
00048 #include <sstream>
00049 using namespace llvm;
00050 
00051 #define DEPOTNAME "__local_depot"
00052 
00053 static cl::opt<bool>
00054 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
00055                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
00056                 cl::init(true));
00057 
00058 static cl::opt<bool>
00059 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
00060               cl::desc("NVPTX Specific: Emit source line in ptx file"),
00061               cl::init(false));
00062 
00063 namespace {
00064 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
00065 /// depends.
00066 void DiscoverDependentGlobals(const Value *V,
00067                               DenseSet<const GlobalVariable *> &Globals) {
00068   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
00069     Globals.insert(GV);
00070   else {
00071     if (const User *U = dyn_cast<User>(V)) {
00072       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
00073         DiscoverDependentGlobals(U->getOperand(i), Globals);
00074       }
00075     }
00076   }
00077 }
00078 
00079 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
00080 /// instances to be emitted, but only after any dependents have been added
00081 /// first.
00082 void VisitGlobalVariableForEmission(
00083     const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
00084     DenseSet<const GlobalVariable *> &Visited,
00085     DenseSet<const GlobalVariable *> &Visiting) {
00086   // Have we already visited this one?
00087   if (Visited.count(GV))
00088     return;
00089 
00090   // Do we have a circular dependency?
00091   if (!Visiting.insert(GV).second)
00092     report_fatal_error("Circular dependency found in global variable set");
00093 
00094   // Make sure we visit all dependents first
00095   DenseSet<const GlobalVariable *> Others;
00096   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
00097     DiscoverDependentGlobals(GV->getOperand(i), Others);
00098 
00099   for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
00100                                                   E = Others.end();
00101        I != E; ++I)
00102     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
00103 
00104   // Now we can visit ourself
00105   Order.push_back(GV);
00106   Visited.insert(GV);
00107   Visiting.erase(GV);
00108 }
00109 }
00110 
00111 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
00112   if (!EmitLineNumbers)
00113     return;
00114   if (ignoreLoc(MI))
00115     return;
00116 
00117   DebugLoc curLoc = MI.getDebugLoc();
00118 
00119   if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
00120     return;
00121 
00122   if (prevDebugLoc == curLoc)
00123     return;
00124 
00125   prevDebugLoc = curLoc;
00126 
00127   if (curLoc.isUnknown())
00128     return;
00129 
00130   const MachineFunction *MF = MI.getParent()->getParent();
00131   //const TargetMachine &TM = MF->getTarget();
00132 
00133   const LLVMContext &ctx = MF->getFunction()->getContext();
00134   DIScope Scope(curLoc.getScope(ctx));
00135 
00136   assert((!Scope || Scope.isScope()) &&
00137     "Scope of a DebugLoc should be null or a DIScope.");
00138   if (!Scope)
00139      return;
00140 
00141   StringRef fileName(Scope.getFilename());
00142   StringRef dirName(Scope.getDirectory());
00143   SmallString<128> FullPathName = dirName;
00144   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
00145     sys::path::append(FullPathName, fileName);
00146     fileName = FullPathName.str();
00147   }
00148 
00149   if (filenameMap.find(fileName.str()) == filenameMap.end())
00150     return;
00151 
00152   // Emit the line from the source file.
00153   if (InterleaveSrc)
00154     this->emitSrcInText(fileName.str(), curLoc.getLine());
00155 
00156   std::stringstream temp;
00157   temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
00158        << " " << curLoc.getCol();
00159   OutStreamer.EmitRawText(Twine(temp.str().c_str()));
00160 }
00161 
00162 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
00163   SmallString<128> Str;
00164   raw_svector_ostream OS(Str);
00165   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
00166     emitLineNumberAsDotLoc(*MI);
00167 
00168   MCInst Inst;
00169   lowerToMCInst(MI, Inst);
00170   EmitToStreamer(OutStreamer, Inst);
00171 }
00172 
00173 // Handle symbol backtracking for targets that do not support image handles
00174 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
00175                                            unsigned OpNo, MCOperand &MCOp) {
00176   const MachineOperand &MO = MI->getOperand(OpNo);
00177   const MCInstrDesc &MCID = MI->getDesc();
00178 
00179   if (MCID.TSFlags & NVPTXII::IsTexFlag) {
00180     // This is a texture fetch, so operand 4 is a texref and operand 5 is
00181     // a samplerref
00182     if (OpNo == 4 && MO.isImm()) {
00183       lowerImageHandleSymbol(MO.getImm(), MCOp);
00184       return true;
00185     }
00186     if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
00187       lowerImageHandleSymbol(MO.getImm(), MCOp);
00188       return true;
00189     }
00190 
00191     return false;
00192   } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
00193     unsigned VecSize =
00194       1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
00195 
00196     // For a surface load of vector size N, the Nth operand will be the surfref
00197     if (OpNo == VecSize && MO.isImm()) {
00198       lowerImageHandleSymbol(MO.getImm(), MCOp);
00199       return true;
00200     }
00201 
00202     return false;
00203   } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
00204     // This is a surface store, so operand 0 is a surfref
00205     if (OpNo == 0 && MO.isImm()) {
00206       lowerImageHandleSymbol(MO.getImm(), MCOp);
00207       return true;
00208     }
00209 
00210     return false;
00211   } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
00212     // This is a query, so operand 1 is a surfref/texref
00213     if (OpNo == 1 && MO.isImm()) {
00214       lowerImageHandleSymbol(MO.getImm(), MCOp);
00215       return true;
00216     }
00217 
00218     return false;
00219   }
00220 
00221   return false;
00222 }
00223 
00224 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
00225   // Ewwww
00226   TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
00227   NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
00228   const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
00229   const char *Sym = MFI->getImageHandleSymbol(Index);
00230   std::string *SymNamePtr =
00231     nvTM.getManagedStrPool()->getManagedString(Sym);
00232   MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
00233     StringRef(SymNamePtr->c_str())));
00234 }
00235 
00236 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
00237   OutMI.setOpcode(MI->getOpcode());
00238   const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
00239 
00240   // Special: Do not mangle symbol operand of CALL_PROTOTYPE
00241   if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
00242     const MachineOperand &MO = MI->getOperand(0);
00243     OutMI.addOperand(GetSymbolRef(
00244       OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
00245     return;
00246   }
00247 
00248   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00249     const MachineOperand &MO = MI->getOperand(i);
00250 
00251     MCOperand MCOp;
00252     if (!ST.hasImageHandles()) {
00253       if (lowerImageHandleOperand(MI, i, MCOp)) {
00254         OutMI.addOperand(MCOp);
00255         continue;
00256       }
00257     }
00258 
00259     if (lowerOperand(MO, MCOp))
00260       OutMI.addOperand(MCOp);
00261   }
00262 }
00263 
00264 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
00265                                    MCOperand &MCOp) {
00266   switch (MO.getType()) {
00267   default: llvm_unreachable("unknown operand type");
00268   case MachineOperand::MO_Register:
00269     MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg()));
00270     break;
00271   case MachineOperand::MO_Immediate:
00272     MCOp = MCOperand::CreateImm(MO.getImm());
00273     break;
00274   case MachineOperand::MO_MachineBasicBlock:
00275     MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
00276         MO.getMBB()->getSymbol(), OutContext));
00277     break;
00278   case MachineOperand::MO_ExternalSymbol:
00279     MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
00280     break;
00281   case MachineOperand::MO_GlobalAddress:
00282     MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
00283     break;
00284   case MachineOperand::MO_FPImmediate: {
00285     const ConstantFP *Cnt = MO.getFPImm();
00286     APFloat Val = Cnt->getValueAPF();
00287 
00288     switch (Cnt->getType()->getTypeID()) {
00289     default: report_fatal_error("Unsupported FP type"); break;
00290     case Type::FloatTyID:
00291       MCOp = MCOperand::CreateExpr(
00292         NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
00293       break;
00294     case Type::DoubleTyID:
00295       MCOp = MCOperand::CreateExpr(
00296         NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
00297       break;
00298     }
00299     break;
00300   }
00301   }
00302   return true;
00303 }
00304 
00305 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
00306   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
00307     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
00308 
00309     DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
00310     unsigned RegNum = RegMap[Reg];
00311 
00312     // Encode the register class in the upper 4 bits
00313     // Must be kept in sync with NVPTXInstPrinter::printRegName
00314     unsigned Ret = 0;
00315     if (RC == &NVPTX::Int1RegsRegClass) {
00316       Ret = (1 << 28);
00317     } else if (RC == &NVPTX::Int16RegsRegClass) {
00318       Ret = (2 << 28);
00319     } else if (RC == &NVPTX::Int32RegsRegClass) {
00320       Ret = (3 << 28);
00321     } else if (RC == &NVPTX::Int64RegsRegClass) {
00322       Ret = (4 << 28);
00323     } else if (RC == &NVPTX::Float32RegsRegClass) {
00324       Ret = (5 << 28);
00325     } else if (RC == &NVPTX::Float64RegsRegClass) {
00326       Ret = (6 << 28);
00327     } else {
00328       report_fatal_error("Bad register class");
00329     }
00330 
00331     // Insert the vreg number
00332     Ret |= (RegNum & 0x0FFFFFFF);
00333     return Ret;
00334   } else {
00335     // Some special-use registers are actually physical registers.
00336     // Encode this as the register class ID of 0 and the real register ID.
00337     return Reg & 0x0FFFFFFF;
00338   }
00339 }
00340 
00341 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
00342   const MCExpr *Expr;
00343   Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
00344                                  OutContext);
00345   return MCOperand::CreateExpr(Expr);
00346 }
00347 
00348 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
00349   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
00350   const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
00351 
00352   Type *Ty = F->getReturnType();
00353 
00354   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
00355 
00356   if (Ty->getTypeID() == Type::VoidTyID)
00357     return;
00358 
00359   O << " (";
00360 
00361   if (isABI) {
00362     if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
00363       unsigned size = 0;
00364       if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
00365         size = ITy->getBitWidth();
00366         if (size < 32)
00367           size = 32;
00368       } else {
00369         assert(Ty->isFloatingPointTy() && "Floating point type expected here");
00370         size = Ty->getPrimitiveSizeInBits();
00371       }
00372 
00373       O << ".param .b" << size << " func_retval0";
00374     } else if (isa<PointerType>(Ty)) {
00375       O << ".param .b" << TLI->getPointerTy().getSizeInBits()
00376         << " func_retval0";
00377     } else {
00378       if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
00379         unsigned totalsz = TD->getTypeAllocSize(Ty);
00380         unsigned retAlignment = 0;
00381         if (!llvm::getAlign(*F, 0, retAlignment))
00382           retAlignment = TD->getABITypeAlignment(Ty);
00383         O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
00384           << "]";
00385       } else
00386         assert(false && "Unknown return type");
00387     }
00388   } else {
00389     SmallVector<EVT, 16> vtparts;
00390     ComputeValueVTs(*TLI, Ty, vtparts);
00391     unsigned idx = 0;
00392     for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
00393       unsigned elems = 1;
00394       EVT elemtype = vtparts[i];
00395       if (vtparts[i].isVector()) {
00396         elems = vtparts[i].getVectorNumElements();
00397         elemtype = vtparts[i].getVectorElementType();
00398       }
00399 
00400       for (unsigned j = 0, je = elems; j != je; ++j) {
00401         unsigned sz = elemtype.getSizeInBits();
00402         if (elemtype.isInteger() && (sz < 32))
00403           sz = 32;
00404         O << ".reg .b" << sz << " func_retval" << idx;
00405         if (j < je - 1)
00406           O << ", ";
00407         ++idx;
00408       }
00409       if (i < e - 1)
00410         O << ", ";
00411     }
00412   }
00413   O << ") ";
00414   return;
00415 }
00416 
00417 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
00418                                         raw_ostream &O) {
00419   const Function *F = MF.getFunction();
00420   printReturnValStr(F, O);
00421 }
00422 
00423 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
00424   SmallString<128> Str;
00425   raw_svector_ostream O(Str);
00426 
00427   if (!GlobalsEmitted) {
00428     emitGlobals(*MF->getFunction()->getParent());
00429     GlobalsEmitted = true;
00430   }
00431   
00432   // Set up
00433   MRI = &MF->getRegInfo();
00434   F = MF->getFunction();
00435   emitLinkageDirective(F, O);
00436   if (llvm::isKernelFunction(*F))
00437     O << ".entry ";
00438   else {
00439     O << ".func ";
00440     printReturnValStr(*MF, O);
00441   }
00442 
00443   O << *CurrentFnSym;
00444 
00445   emitFunctionParamList(*MF, O);
00446 
00447   if (llvm::isKernelFunction(*F))
00448     emitKernelFunctionDirectives(*F, O);
00449 
00450   OutStreamer.EmitRawText(O.str());
00451 
00452   prevDebugLoc = DebugLoc();
00453 }
00454 
00455 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
00456   VRegMapping.clear();
00457   OutStreamer.EmitRawText(StringRef("{\n"));
00458   setAndEmitFunctionVirtualRegisters(*MF);
00459 
00460   SmallString<128> Str;
00461   raw_svector_ostream O(Str);
00462   emitDemotedVars(MF->getFunction(), O);
00463   OutStreamer.EmitRawText(O.str());
00464 }
00465 
00466 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
00467   OutStreamer.EmitRawText(StringRef("}\n"));
00468   VRegMapping.clear();
00469 }
00470 
00471 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
00472   unsigned RegNo = MI->getOperand(0).getReg();
00473   const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
00474   if (TRI->isVirtualRegister(RegNo)) {
00475     OutStreamer.AddComment(Twine("implicit-def: ") +
00476                            getVirtualRegisterName(RegNo));
00477   } else {
00478     OutStreamer.AddComment(
00479         Twine("implicit-def: ") +
00480         TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
00481   }
00482   OutStreamer.AddBlankLine();
00483 }
00484 
00485 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
00486                                                    raw_ostream &O) const {
00487   // If the NVVM IR has some of reqntid* specified, then output
00488   // the reqntid directive, and set the unspecified ones to 1.
00489   // If none of reqntid* is specified, don't output reqntid directive.
00490   unsigned reqntidx, reqntidy, reqntidz;
00491   bool specified = false;
00492   if (llvm::getReqNTIDx(F, reqntidx) == false)
00493     reqntidx = 1;
00494   else
00495     specified = true;
00496   if (llvm::getReqNTIDy(F, reqntidy) == false)
00497     reqntidy = 1;
00498   else
00499     specified = true;
00500   if (llvm::getReqNTIDz(F, reqntidz) == false)
00501     reqntidz = 1;
00502   else
00503     specified = true;
00504 
00505   if (specified)
00506     O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
00507       << "\n";
00508 
00509   // If the NVVM IR has some of maxntid* specified, then output
00510   // the maxntid directive, and set the unspecified ones to 1.
00511   // If none of maxntid* is specified, don't output maxntid directive.
00512   unsigned maxntidx, maxntidy, maxntidz;
00513   specified = false;
00514   if (llvm::getMaxNTIDx(F, maxntidx) == false)
00515     maxntidx = 1;
00516   else
00517     specified = true;
00518   if (llvm::getMaxNTIDy(F, maxntidy) == false)
00519     maxntidy = 1;
00520   else
00521     specified = true;
00522   if (llvm::getMaxNTIDz(F, maxntidz) == false)
00523     maxntidz = 1;
00524   else
00525     specified = true;
00526 
00527   if (specified)
00528     O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
00529       << "\n";
00530 
00531   unsigned mincta;
00532   if (llvm::getMinCTASm(F, mincta))
00533     O << ".minnctapersm " << mincta << "\n";
00534 }
00535 
00536 std::string
00537 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
00538   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
00539 
00540   std::string Name;
00541   raw_string_ostream NameStr(Name);
00542 
00543   VRegRCMap::const_iterator I = VRegMapping.find(RC);
00544   assert(I != VRegMapping.end() && "Bad register class");
00545   const DenseMap<unsigned, unsigned> &RegMap = I->second;
00546 
00547   VRegMap::const_iterator VI = RegMap.find(Reg);
00548   assert(VI != RegMap.end() && "Bad virtual register");
00549   unsigned MappedVR = VI->second;
00550 
00551   NameStr << getNVPTXRegClassStr(RC) << MappedVR;
00552 
00553   NameStr.flush();
00554   return Name;
00555 }
00556 
00557 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
00558                                           raw_ostream &O) {
00559   O << getVirtualRegisterName(vr);
00560 }
00561 
00562 void NVPTXAsmPrinter::printVecModifiedImmediate(
00563     const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
00564   static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
00565   int Imm = (int) MO.getImm();
00566   if (0 == strcmp(Modifier, "vecelem"))
00567     O << "_" << vecelem[Imm];
00568   else if (0 == strcmp(Modifier, "vecv4comm1")) {
00569     if ((Imm < 0) || (Imm > 3))
00570       O << "//";
00571   } else if (0 == strcmp(Modifier, "vecv4comm2")) {
00572     if ((Imm < 4) || (Imm > 7))
00573       O << "//";
00574   } else if (0 == strcmp(Modifier, "vecv4pos")) {
00575     if (Imm < 0)
00576       Imm = 0;
00577     O << "_" << vecelem[Imm % 4];
00578   } else if (0 == strcmp(Modifier, "vecv2comm1")) {
00579     if ((Imm < 0) || (Imm > 1))
00580       O << "//";
00581   } else if (0 == strcmp(Modifier, "vecv2comm2")) {
00582     if ((Imm < 2) || (Imm > 3))
00583       O << "//";
00584   } else if (0 == strcmp(Modifier, "vecv2pos")) {
00585     if (Imm < 0)
00586       Imm = 0;
00587     O << "_" << vecelem[Imm % 2];
00588   } else
00589     llvm_unreachable("Unknown Modifier on immediate operand");
00590 }
00591 
00592 
00593 
00594 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
00595 
00596   emitLinkageDirective(F, O);
00597   if (llvm::isKernelFunction(*F))
00598     O << ".entry ";
00599   else
00600     O << ".func ";
00601   printReturnValStr(F, O);
00602   O << *getSymbol(F) << "\n";
00603   emitFunctionParamList(F, O);
00604   O << ";\n";
00605 }
00606 
00607 static bool usedInGlobalVarDef(const Constant *C) {
00608   if (!C)
00609     return false;
00610 
00611   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
00612     if (GV->getName().str() == "llvm.used")
00613       return false;
00614     return true;
00615   }
00616 
00617   for (const User *U : C->users())
00618     if (const Constant *C = dyn_cast<Constant>(U))
00619       if (usedInGlobalVarDef(C))
00620         return true;
00621 
00622   return false;
00623 }
00624 
00625 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
00626   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
00627     if (othergv->getName().str() == "llvm.used")
00628       return true;
00629   }
00630 
00631   if (const Instruction *instr = dyn_cast<Instruction>(U)) {
00632     if (instr->getParent() && instr->getParent()->getParent()) {
00633       const Function *curFunc = instr->getParent()->getParent();
00634       if (oneFunc && (curFunc != oneFunc))
00635         return false;
00636       oneFunc = curFunc;
00637       return true;
00638     } else
00639       return false;
00640   }
00641 
00642   for (const User *UU : U->users())
00643     if (usedInOneFunc(UU, oneFunc) == false)
00644       return false;
00645 
00646   return true;
00647 }
00648 
00649 /* Find out if a global variable can be demoted to local scope.
00650  * Currently, this is valid for CUDA shared variables, which have local
00651  * scope and global lifetime. So the conditions to check are :
00652  * 1. Is the global variable in shared address space?
00653  * 2. Does it have internal linkage?
00654  * 3. Is the global variable referenced only in one function?
00655  */
00656 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
00657   if (gv->hasInternalLinkage() == false)
00658     return false;
00659   const PointerType *Pty = gv->getType();
00660   if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
00661     return false;
00662 
00663   const Function *oneFunc = nullptr;
00664 
00665   bool flag = usedInOneFunc(gv, oneFunc);
00666   if (flag == false)
00667     return false;
00668   if (!oneFunc)
00669     return false;
00670   f = oneFunc;
00671   return true;
00672 }
00673 
00674 static bool useFuncSeen(const Constant *C,
00675                         llvm::DenseMap<const Function *, bool> &seenMap) {
00676   for (const User *U : C->users()) {
00677     if (const Constant *cu = dyn_cast<Constant>(U)) {
00678       if (useFuncSeen(cu, seenMap))
00679         return true;
00680     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
00681       const BasicBlock *bb = I->getParent();
00682       if (!bb)
00683         continue;
00684       const Function *caller = bb->getParent();
00685       if (!caller)
00686         continue;
00687       if (seenMap.find(caller) != seenMap.end())
00688         return true;
00689     }
00690   }
00691   return false;
00692 }
00693 
00694 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
00695   llvm::DenseMap<const Function *, bool> seenMap;
00696   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
00697     const Function *F = FI;
00698 
00699     if (F->isDeclaration()) {
00700       if (F->use_empty())
00701         continue;
00702       if (F->getIntrinsicID())
00703         continue;
00704       emitDeclaration(F, O);
00705       continue;
00706     }
00707     for (const User *U : F->users()) {
00708       if (const Constant *C = dyn_cast<Constant>(U)) {
00709         if (usedInGlobalVarDef(C)) {
00710           // The use is in the initialization of a global variable
00711           // that is a function pointer, so print a declaration
00712           // for the original function
00713           emitDeclaration(F, O);
00714           break;
00715         }
00716         // Emit a declaration of this function if the function that
00717         // uses this constant expr has already been seen.
00718         if (useFuncSeen(C, seenMap)) {
00719           emitDeclaration(F, O);
00720           break;
00721         }
00722       }
00723 
00724       if (!isa<Instruction>(U))
00725         continue;
00726       const Instruction *instr = cast<Instruction>(U);
00727       const BasicBlock *bb = instr->getParent();
00728       if (!bb)
00729         continue;
00730       const Function *caller = bb->getParent();
00731       if (!caller)
00732         continue;
00733 
00734       // If a caller has already been seen, then the caller is
00735       // appearing in the module before the callee. so print out
00736       // a declaration for the callee.
00737       if (seenMap.find(caller) != seenMap.end()) {
00738         emitDeclaration(F, O);
00739         break;
00740       }
00741     }
00742     seenMap[F] = true;
00743   }
00744 }
00745 
00746 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
00747   DebugInfoFinder DbgFinder;
00748   DbgFinder.processModule(M);
00749 
00750   unsigned i = 1;
00751   for (DICompileUnit DIUnit : DbgFinder.compile_units()) {
00752     StringRef Filename(DIUnit.getFilename());
00753     StringRef Dirname(DIUnit.getDirectory());
00754     SmallString<128> FullPathName = Dirname;
00755     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
00756       sys::path::append(FullPathName, Filename);
00757       Filename = FullPathName.str();
00758     }
00759     if (filenameMap.find(Filename.str()) != filenameMap.end())
00760       continue;
00761     filenameMap[Filename.str()] = i;
00762     OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
00763     ++i;
00764   }
00765 
00766   for (DISubprogram SP : DbgFinder.subprograms()) {
00767     StringRef Filename(SP.getFilename());
00768     StringRef Dirname(SP.getDirectory());
00769     SmallString<128> FullPathName = Dirname;
00770     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
00771       sys::path::append(FullPathName, Filename);
00772       Filename = FullPathName.str();
00773     }
00774     if (filenameMap.find(Filename.str()) != filenameMap.end())
00775       continue;
00776     filenameMap[Filename.str()] = i;
00777     ++i;
00778   }
00779 }
00780 
00781 bool NVPTXAsmPrinter::doInitialization(Module &M) {
00782 
00783   SmallString<128> Str1;
00784   raw_svector_ostream OS1(Str1);
00785 
00786   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
00787   MMI->AnalyzeModule(M);
00788 
00789   // We need to call the parent's one explicitly.
00790   //bool Result = AsmPrinter::doInitialization(M);
00791 
00792   // Initialize TargetLoweringObjectFile.
00793   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
00794       .Initialize(OutContext, TM);
00795 
00796   Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
00797 
00798   // Emit header before any dwarf directives are emitted below.
00799   emitHeader(M, OS1);
00800   OutStreamer.EmitRawText(OS1.str());
00801 
00802   // Already commented out
00803   //bool Result = AsmPrinter::doInitialization(M);
00804 
00805   // Emit module-level inline asm if it exists.
00806   if (!M.getModuleInlineAsm().empty()) {
00807     OutStreamer.AddComment("Start of file scope inline assembly");
00808     OutStreamer.AddBlankLine();
00809     OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm()));
00810     OutStreamer.AddBlankLine();
00811     OutStreamer.AddComment("End of file scope inline assembly");
00812     OutStreamer.AddBlankLine();
00813   }
00814 
00815   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
00816     recordAndEmitFilenames(M);
00817 
00818   GlobalsEmitted = false;
00819     
00820   return false; // success
00821 }
00822 
00823 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
00824   SmallString<128> Str2;
00825   raw_svector_ostream OS2(Str2);
00826 
00827   emitDeclarations(M, OS2);
00828 
00829   // As ptxas does not support forward references of globals, we need to first
00830   // sort the list of module-level globals in def-use order. We visit each
00831   // global variable in order, and ensure that we emit it *after* its dependent
00832   // globals. We use a little extra memory maintaining both a set and a list to
00833   // have fast searches while maintaining a strict ordering.
00834   SmallVector<const GlobalVariable *, 8> Globals;
00835   DenseSet<const GlobalVariable *> GVVisited;
00836   DenseSet<const GlobalVariable *> GVVisiting;
00837 
00838   // Visit each global variable, in order
00839   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
00840        I != E; ++I)
00841     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
00842 
00843   assert(GVVisited.size() == M.getGlobalList().size() &&
00844          "Missed a global variable");
00845   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
00846 
00847   // Print out module-level global variables in proper order
00848   for (unsigned i = 0, e = Globals.size(); i != e; ++i)
00849     printModuleLevelGV(Globals[i], OS2);
00850 
00851   OS2 << '\n';
00852 
00853   OutStreamer.EmitRawText(OS2.str());
00854 }
00855 
00856 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
00857   O << "//\n";
00858   O << "// Generated by LLVM NVPTX Back-End\n";
00859   O << "//\n";
00860   O << "\n";
00861 
00862   unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
00863   O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
00864 
00865   O << ".target ";
00866   O << nvptxSubtarget.getTargetName();
00867 
00868   if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
00869     O << ", texmode_independent";
00870   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
00871     if (!nvptxSubtarget.hasDouble())
00872       O << ", map_f64_to_f32";
00873   }
00874 
00875   if (MAI->doesSupportDebugInformation())
00876     O << ", debug";
00877 
00878   O << "\n";
00879 
00880   O << ".address_size ";
00881   if (nvptxSubtarget.is64Bit())
00882     O << "64";
00883   else
00884     O << "32";
00885   O << "\n";
00886 
00887   O << "\n";
00888 }
00889 
00890 bool NVPTXAsmPrinter::doFinalization(Module &M) {
00891 
00892   // If we did not emit any functions, then the global declarations have not
00893   // yet been emitted.
00894   if (!GlobalsEmitted) {
00895     emitGlobals(M);
00896     GlobalsEmitted = true;
00897   }
00898 
00899   // XXX Temproarily remove global variables so that doFinalization() will not
00900   // emit them again (global variables are emitted at beginning).
00901 
00902   Module::GlobalListType &global_list = M.getGlobalList();
00903   int i, n = global_list.size();
00904   GlobalVariable **gv_array = new GlobalVariable *[n];
00905 
00906   // first, back-up GlobalVariable in gv_array
00907   i = 0;
00908   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
00909        I != E; ++I)
00910     gv_array[i++] = &*I;
00911 
00912   // second, empty global_list
00913   while (!global_list.empty())
00914     global_list.remove(global_list.begin());
00915 
00916   // call doFinalization
00917   bool ret = AsmPrinter::doFinalization(M);
00918 
00919   // now we restore global variables
00920   for (i = 0; i < n; i++)
00921     global_list.insert(global_list.end(), gv_array[i]);
00922 
00923   clearAnnotationCache(&M);
00924 
00925   delete[] gv_array;
00926   return ret;
00927 
00928   //bool Result = AsmPrinter::doFinalization(M);
00929   // Instead of calling the parents doFinalization, we may
00930   // clone parents doFinalization and customize here.
00931   // Currently, we if NVISA out the EmitGlobals() in
00932   // parent's doFinalization, which is too intrusive.
00933   //
00934   // Same for the doInitialization.
00935   //return Result;
00936 }
00937 
00938 // This function emits appropriate linkage directives for
00939 // functions and global variables.
00940 //
00941 // extern function declaration            -> .extern
00942 // extern function definition             -> .visible
00943 // external global variable with init     -> .visible
00944 // external without init                  -> .extern
00945 // appending                              -> not allowed, assert.
00946 // for any linkage other than
00947 // internal, private, linker_private,
00948 // linker_private_weak, linker_private_weak_def_auto,
00949 // we emit                                -> .weak.
00950 
00951 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
00952                                            raw_ostream &O) {
00953   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
00954     if (V->hasExternalLinkage()) {
00955       if (isa<GlobalVariable>(V)) {
00956         const GlobalVariable *GVar = cast<GlobalVariable>(V);
00957         if (GVar) {
00958           if (GVar->hasInitializer())
00959             O << ".visible ";
00960           else
00961             O << ".extern ";
00962         }
00963       } else if (V->isDeclaration())
00964         O << ".extern ";
00965       else
00966         O << ".visible ";
00967     } else if (V->hasAppendingLinkage()) {
00968       std::string msg;
00969       msg.append("Error: ");
00970       msg.append("Symbol ");
00971       if (V->hasName())
00972         msg.append(V->getName().str());
00973       msg.append("has unsupported appending linkage type");
00974       llvm_unreachable(msg.c_str());
00975     } else if (!V->hasInternalLinkage() &&
00976                !V->hasPrivateLinkage()) {
00977       O << ".weak ";
00978     }
00979   }
00980 }
00981 
00982 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
00983                                          raw_ostream &O,
00984                                          bool processDemoted) {
00985 
00986   // Skip meta data
00987   if (GVar->hasSection()) {
00988     if (GVar->getSection() == StringRef("llvm.metadata"))
00989       return;
00990   }
00991 
00992   // Skip LLVM intrinsic global variables
00993   if (GVar->getName().startswith("llvm.") ||
00994       GVar->getName().startswith("nvvm."))
00995     return;
00996 
00997   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
00998 
00999   // GlobalVariables are always constant pointers themselves.
01000   const PointerType *PTy = GVar->getType();
01001   Type *ETy = PTy->getElementType();
01002 
01003   if (GVar->hasExternalLinkage()) {
01004     if (GVar->hasInitializer())
01005       O << ".visible ";
01006     else
01007       O << ".extern ";
01008   } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
01009              GVar->hasAvailableExternallyLinkage() ||
01010              GVar->hasCommonLinkage()) {
01011     O << ".weak ";
01012   }
01013 
01014   if (llvm::isTexture(*GVar)) {
01015     O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
01016     return;
01017   }
01018 
01019   if (llvm::isSurface(*GVar)) {
01020     O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
01021     return;
01022   }
01023 
01024   if (GVar->isDeclaration()) {
01025     // (extern) declarations, no definition or initializer
01026     // Currently the only known declaration is for an automatic __local
01027     // (.shared) promoted to global.
01028     emitPTXGlobalVariable(GVar, O);
01029     O << ";\n";
01030     return;
01031   }
01032 
01033   if (llvm::isSampler(*GVar)) {
01034     O << ".global .samplerref " << llvm::getSamplerName(*GVar);
01035 
01036     const Constant *Initializer = nullptr;
01037     if (GVar->hasInitializer())
01038       Initializer = GVar->getInitializer();
01039     const ConstantInt *CI = nullptr;
01040     if (Initializer)
01041       CI = dyn_cast<ConstantInt>(Initializer);
01042     if (CI) {
01043       unsigned sample = CI->getZExtValue();
01044 
01045       O << " = { ";
01046 
01047       for (int i = 0,
01048                addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
01049            i < 3; i++) {
01050         O << "addr_mode_" << i << " = ";
01051         switch (addr) {
01052         case 0:
01053           O << "wrap";
01054           break;
01055         case 1:
01056           O << "clamp_to_border";
01057           break;
01058         case 2:
01059           O << "clamp_to_edge";
01060           break;
01061         case 3:
01062           O << "wrap";
01063           break;
01064         case 4:
01065           O << "mirror";
01066           break;
01067         }
01068         O << ", ";
01069       }
01070       O << "filter_mode = ";
01071       switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
01072       case 0:
01073         O << "nearest";
01074         break;
01075       case 1:
01076         O << "linear";
01077         break;
01078       case 2:
01079         llvm_unreachable("Anisotropic filtering is not supported");
01080       default:
01081         O << "nearest";
01082         break;
01083       }
01084       if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
01085         O << ", force_unnormalized_coords = 1";
01086       }
01087       O << " }";
01088     }
01089 
01090     O << ";\n";
01091     return;
01092   }
01093 
01094   if (GVar->hasPrivateLinkage()) {
01095 
01096     if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
01097       return;
01098 
01099     // FIXME - need better way (e.g. Metadata) to avoid generating this global
01100     if (!strncmp(GVar->getName().data(), "filename", 8))
01101       return;
01102     if (GVar->use_empty())
01103       return;
01104   }
01105 
01106   const Function *demotedFunc = nullptr;
01107   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
01108     O << "// " << GVar->getName().str() << " has been demoted\n";
01109     if (localDecls.find(demotedFunc) != localDecls.end())
01110       localDecls[demotedFunc].push_back(GVar);
01111     else {
01112       std::vector<const GlobalVariable *> temp;
01113       temp.push_back(GVar);
01114       localDecls[demotedFunc] = temp;
01115     }
01116     return;
01117   }
01118 
01119   O << ".";
01120   emitPTXAddressSpace(PTy->getAddressSpace(), O);
01121 
01122   if (isManaged(*GVar)) {
01123     O << " .attribute(.managed)";
01124   }
01125 
01126   if (GVar->getAlignment() == 0)
01127     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
01128   else
01129     O << " .align " << GVar->getAlignment();
01130 
01131   if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
01132     O << " .";
01133     // Special case: ABI requires that we use .u8 for predicates
01134     if (ETy->isIntegerTy(1))
01135       O << "u8";
01136     else
01137       O << getPTXFundamentalTypeStr(ETy, false);
01138     O << " ";
01139     O << *getSymbol(GVar);
01140 
01141     // Ptx allows variable initilization only for constant and global state
01142     // spaces.
01143     if (GVar->hasInitializer()) {
01144       if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
01145           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
01146         const Constant *Initializer = GVar->getInitializer();
01147         // 'undef' is treated as there is no value spefied.
01148         if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
01149           O << " = ";
01150           printScalarConstant(Initializer, O);
01151         }
01152       } else {
01153         // The frontend adds zero-initializer to variables that don't have an
01154         // initial value, so skip warning for this case.
01155         if (!GVar->getInitializer()->isNullValue()) {
01156           std::string warnMsg = "initial value of '" + GVar->getName().str() +
01157               "' is not allowed in addrspace(" +
01158               llvm::utostr_32(PTy->getAddressSpace()) + ")";
01159           report_fatal_error(warnMsg.c_str());
01160         }
01161       }
01162     }
01163   } else {
01164     unsigned int ElementSize = 0;
01165 
01166     // Although PTX has direct support for struct type and array type and
01167     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
01168     // targets that support these high level field accesses. Structs, arrays
01169     // and vectors are lowered into arrays of bytes.
01170     switch (ETy->getTypeID()) {
01171     case Type::StructTyID:
01172     case Type::ArrayTyID:
01173     case Type::VectorTyID:
01174       ElementSize = TD->getTypeStoreSize(ETy);
01175       // Ptx allows variable initilization only for constant and
01176       // global state spaces.
01177       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
01178            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
01179           GVar->hasInitializer()) {
01180         const Constant *Initializer = GVar->getInitializer();
01181         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
01182           AggBuffer aggBuffer(ElementSize, O, *this);
01183           bufferAggregateConstant(Initializer, &aggBuffer);
01184           if (aggBuffer.numSymbols) {
01185             if (nvptxSubtarget.is64Bit()) {
01186               O << " .u64 " << *getSymbol(GVar) << "[";
01187               O << ElementSize / 8;
01188             } else {
01189               O << " .u32 " << *getSymbol(GVar) << "[";
01190               O << ElementSize / 4;
01191             }
01192             O << "]";
01193           } else {
01194             O << " .b8 " << *getSymbol(GVar) << "[";
01195             O << ElementSize;
01196             O << "]";
01197           }
01198           O << " = {";
01199           aggBuffer.print();
01200           O << "}";
01201         } else {
01202           O << " .b8 " << *getSymbol(GVar);
01203           if (ElementSize) {
01204             O << "[";
01205             O << ElementSize;
01206             O << "]";
01207           }
01208         }
01209       } else {
01210         O << " .b8 " << *getSymbol(GVar);
01211         if (ElementSize) {
01212           O << "[";
01213           O << ElementSize;
01214           O << "]";
01215         }
01216       }
01217       break;
01218     default:
01219       llvm_unreachable("type not supported yet");
01220     }
01221 
01222   }
01223   O << ";\n";
01224 }
01225 
01226 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
01227   if (localDecls.find(f) == localDecls.end())
01228     return;
01229 
01230   std::vector<const GlobalVariable *> &gvars = localDecls[f];
01231 
01232   for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
01233     O << "\t// demoted variable\n\t";
01234     printModuleLevelGV(gvars[i], O, true);
01235   }
01236 }
01237 
01238 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
01239                                           raw_ostream &O) const {
01240   switch (AddressSpace) {
01241   case llvm::ADDRESS_SPACE_LOCAL:
01242     O << "local";
01243     break;
01244   case llvm::ADDRESS_SPACE_GLOBAL:
01245     O << "global";
01246     break;
01247   case llvm::ADDRESS_SPACE_CONST:
01248     O << "const";
01249     break;
01250   case llvm::ADDRESS_SPACE_SHARED:
01251     O << "shared";
01252     break;
01253   default:
01254     report_fatal_error("Bad address space found while emitting PTX");
01255     break;
01256   }
01257 }
01258 
01259 std::string
01260 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
01261   switch (Ty->getTypeID()) {
01262   default:
01263     llvm_unreachable("unexpected type");
01264     break;
01265   case Type::IntegerTyID: {
01266     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
01267     if (NumBits == 1)
01268       return "pred";
01269     else if (NumBits <= 64) {
01270       std::string name = "u";
01271       return name + utostr(NumBits);
01272     } else {
01273       llvm_unreachable("Integer too large");
01274       break;
01275     }
01276     break;
01277   }
01278   case Type::FloatTyID:
01279     return "f32";
01280   case Type::DoubleTyID:
01281     return "f64";
01282   case Type::PointerTyID:
01283     if (nvptxSubtarget.is64Bit())
01284       if (useB4PTR)
01285         return "b64";
01286       else
01287         return "u64";
01288     else if (useB4PTR)
01289       return "b32";
01290     else
01291       return "u32";
01292   }
01293   llvm_unreachable("unexpected type");
01294   return nullptr;
01295 }
01296 
01297 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
01298                                             raw_ostream &O) {
01299 
01300   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
01301 
01302   // GlobalVariables are always constant pointers themselves.
01303   const PointerType *PTy = GVar->getType();
01304   Type *ETy = PTy->getElementType();
01305 
01306   O << ".";
01307   emitPTXAddressSpace(PTy->getAddressSpace(), O);
01308   if (GVar->getAlignment() == 0)
01309     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
01310   else
01311     O << " .align " << GVar->getAlignment();
01312 
01313   if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
01314     O << " .";
01315     O << getPTXFundamentalTypeStr(ETy);
01316     O << " ";
01317     O << *getSymbol(GVar);
01318     return;
01319   }
01320 
01321   int64_t ElementSize = 0;
01322 
01323   // Although PTX has direct support for struct type and array type and LLVM IR
01324   // is very similar to PTX, the LLVM CodeGen does not support for targets that
01325   // support these high level field accesses. Structs and arrays are lowered
01326   // into arrays of bytes.
01327   switch (ETy->getTypeID()) {
01328   case Type::StructTyID:
01329   case Type::ArrayTyID:
01330   case Type::VectorTyID:
01331     ElementSize = TD->getTypeStoreSize(ETy);
01332     O << " .b8 " << *getSymbol(GVar) << "[";
01333     if (ElementSize) {
01334       O << itostr(ElementSize);
01335     }
01336     O << "]";
01337     break;
01338   default:
01339     llvm_unreachable("type not supported yet");
01340   }
01341   return;
01342 }
01343 
01344 static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
01345   if (Ty->isSingleValueType())
01346     return TD->getPrefTypeAlignment(Ty);
01347 
01348   const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
01349   if (ATy)
01350     return getOpenCLAlignment(TD, ATy->getElementType());
01351 
01352   const StructType *STy = dyn_cast<StructType>(Ty);
01353   if (STy) {
01354     unsigned int alignStruct = 1;
01355     // Go through each element of the struct and find the
01356     // largest alignment.
01357     for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
01358       Type *ETy = STy->getElementType(i);
01359       unsigned int align = getOpenCLAlignment(TD, ETy);
01360       if (align > alignStruct)
01361         alignStruct = align;
01362     }
01363     return alignStruct;
01364   }
01365 
01366   const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
01367   if (FTy)
01368     return TD->getPointerPrefAlignment();
01369   return TD->getPrefTypeAlignment(Ty);
01370 }
01371 
01372 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
01373                                      int paramIndex, raw_ostream &O) {
01374   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
01375       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
01376     O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
01377   else {
01378     std::string argName = I->getName();
01379     const char *p = argName.c_str();
01380     while (*p) {
01381       if (*p == '.')
01382         O << "_";
01383       else
01384         O << *p;
01385       p++;
01386     }
01387   }
01388 }
01389 
01390 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
01391   Function::const_arg_iterator I, E;
01392   int i = 0;
01393 
01394   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
01395       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
01396     O << *CurrentFnSym << "_param_" << paramIndex;
01397     return;
01398   }
01399 
01400   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
01401     if (i == paramIndex) {
01402       printParamName(I, paramIndex, O);
01403       return;
01404     }
01405   }
01406   llvm_unreachable("paramIndex out of bound");
01407 }
01408 
01409 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
01410   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
01411   const AttributeSet &PAL = F->getAttributes();
01412   const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
01413   Function::const_arg_iterator I, E;
01414   unsigned paramIndex = 0;
01415   bool first = true;
01416   bool isKernelFunc = llvm::isKernelFunction(*F);
01417   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
01418   MVT thePointerTy = TLI->getPointerTy();
01419 
01420   O << "(\n";
01421 
01422   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
01423     Type *Ty = I->getType();
01424 
01425     if (!first)
01426       O << ",\n";
01427 
01428     first = false;
01429 
01430     // Handle image/sampler parameters
01431     if (isKernelFunction(*F)) {
01432       if (isSampler(*I) || isImage(*I)) {
01433         if (isImage(*I)) {
01434           std::string sname = I->getName();
01435           if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
01436             if (nvptxSubtarget.hasImageHandles())
01437               O << "\t.param .u64 .ptr .surfref ";
01438             else
01439               O << "\t.param .surfref ";
01440             O << *CurrentFnSym << "_param_" << paramIndex;
01441           }
01442           else { // Default image is read_only
01443             if (nvptxSubtarget.hasImageHandles())
01444               O << "\t.param .u64 .ptr .texref ";
01445             else
01446               O << "\t.param .texref ";
01447             O << *CurrentFnSym << "_param_" << paramIndex;
01448           }
01449         } else {
01450           if (nvptxSubtarget.hasImageHandles())
01451             O << "\t.param .u64 .ptr .samplerref ";
01452           else
01453             O << "\t.param .samplerref ";
01454           O << *CurrentFnSym << "_param_" << paramIndex;
01455         }
01456         continue;
01457       }
01458     }
01459 
01460     if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
01461       if (Ty->isAggregateType() || Ty->isVectorTy()) {
01462         // Just print .param .align <a> .b8 .param[size];
01463         // <a> = PAL.getparamalignment
01464         // size = typeallocsize of element type
01465         unsigned align = PAL.getParamAlignment(paramIndex + 1);
01466         if (align == 0)
01467           align = TD->getABITypeAlignment(Ty);
01468 
01469         unsigned sz = TD->getTypeAllocSize(Ty);
01470         O << "\t.param .align " << align << " .b8 ";
01471         printParamName(I, paramIndex, O);
01472         O << "[" << sz << "]";
01473 
01474         continue;
01475       }
01476       // Just a scalar
01477       const PointerType *PTy = dyn_cast<PointerType>(Ty);
01478       if (isKernelFunc) {
01479         if (PTy) {
01480           // Special handling for pointer arguments to kernel
01481           O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
01482 
01483           if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
01484             Type *ETy = PTy->getElementType();
01485             int addrSpace = PTy->getAddressSpace();
01486             switch (addrSpace) {
01487             default:
01488               O << ".ptr ";
01489               break;
01490             case llvm::ADDRESS_SPACE_CONST:
01491               O << ".ptr .const ";
01492               break;
01493             case llvm::ADDRESS_SPACE_SHARED:
01494               O << ".ptr .shared ";
01495               break;
01496             case llvm::ADDRESS_SPACE_GLOBAL:
01497               O << ".ptr .global ";
01498               break;
01499             }
01500             O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
01501           }
01502           printParamName(I, paramIndex, O);
01503           continue;
01504         }
01505 
01506         // non-pointer scalar to kernel func
01507         O << "\t.param .";
01508         // Special case: predicate operands become .u8 types
01509         if (Ty->isIntegerTy(1))
01510           O << "u8";
01511         else
01512           O << getPTXFundamentalTypeStr(Ty);
01513         O << " ";
01514         printParamName(I, paramIndex, O);
01515         continue;
01516       }
01517       // Non-kernel function, just print .param .b<size> for ABI
01518       // and .reg .b<size> for non-ABI
01519       unsigned sz = 0;
01520       if (isa<IntegerType>(Ty)) {
01521         sz = cast<IntegerType>(Ty)->getBitWidth();
01522         if (sz < 32)
01523           sz = 32;
01524       } else if (isa<PointerType>(Ty))
01525         sz = thePointerTy.getSizeInBits();
01526       else
01527         sz = Ty->getPrimitiveSizeInBits();
01528       if (isABI)
01529         O << "\t.param .b" << sz << " ";
01530       else
01531         O << "\t.reg .b" << sz << " ";
01532       printParamName(I, paramIndex, O);
01533       continue;
01534     }
01535 
01536     // param has byVal attribute. So should be a pointer
01537     const PointerType *PTy = dyn_cast<PointerType>(Ty);
01538     assert(PTy && "Param with byval attribute should be a pointer type");
01539     Type *ETy = PTy->getElementType();
01540 
01541     if (isABI || isKernelFunc) {
01542       // Just print .param .align <a> .b8 .param[size];
01543       // <a> = PAL.getparamalignment
01544       // size = typeallocsize of element type
01545       unsigned align = PAL.getParamAlignment(paramIndex + 1);
01546       if (align == 0)
01547         align = TD->getABITypeAlignment(ETy);
01548 
01549       unsigned sz = TD->getTypeAllocSize(ETy);
01550       O << "\t.param .align " << align << " .b8 ";
01551       printParamName(I, paramIndex, O);
01552       O << "[" << sz << "]";
01553       continue;
01554     } else {
01555       // Split the ETy into constituent parts and
01556       // print .param .b<size> <name> for each part.
01557       // Further, if a part is vector, print the above for
01558       // each vector element.
01559       SmallVector<EVT, 16> vtparts;
01560       ComputeValueVTs(*TLI, ETy, vtparts);
01561       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
01562         unsigned elems = 1;
01563         EVT elemtype = vtparts[i];
01564         if (vtparts[i].isVector()) {
01565           elems = vtparts[i].getVectorNumElements();
01566           elemtype = vtparts[i].getVectorElementType();
01567         }
01568 
01569         for (unsigned j = 0, je = elems; j != je; ++j) {
01570           unsigned sz = elemtype.getSizeInBits();
01571           if (elemtype.isInteger() && (sz < 32))
01572             sz = 32;
01573           O << "\t.reg .b" << sz << " ";
01574           printParamName(I, paramIndex, O);
01575           if (j < je - 1)
01576             O << ",\n";
01577           ++paramIndex;
01578         }
01579         if (i < e - 1)
01580           O << ",\n";
01581       }
01582       --paramIndex;
01583       continue;
01584     }
01585   }
01586 
01587   O << "\n)\n";
01588 }
01589 
01590 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
01591                                             raw_ostream &O) {
01592   const Function *F = MF.getFunction();
01593   emitFunctionParamList(F, O);
01594 }
01595 
01596 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
01597     const MachineFunction &MF) {
01598   SmallString<128> Str;
01599   raw_svector_ostream O(Str);
01600 
01601   // Map the global virtual register number to a register class specific
01602   // virtual register number starting from 1 with that class.
01603   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
01604   //unsigned numRegClasses = TRI->getNumRegClasses();
01605 
01606   // Emit the Fake Stack Object
01607   const MachineFrameInfo *MFI = MF.getFrameInfo();
01608   int NumBytes = (int) MFI->getStackSize();
01609   if (NumBytes) {
01610     O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
01611       << getFunctionNumber() << "[" << NumBytes << "];\n";
01612     if (nvptxSubtarget.is64Bit()) {
01613       O << "\t.reg .b64 \t%SP;\n";
01614       O << "\t.reg .b64 \t%SPL;\n";
01615     } else {
01616       O << "\t.reg .b32 \t%SP;\n";
01617       O << "\t.reg .b32 \t%SPL;\n";
01618     }
01619   }
01620 
01621   // Go through all virtual registers to establish the mapping between the
01622   // global virtual
01623   // register number and the per class virtual register number.
01624   // We use the per class virtual register number in the ptx output.
01625   unsigned int numVRs = MRI->getNumVirtRegs();
01626   for (unsigned i = 0; i < numVRs; i++) {
01627     unsigned int vr = TRI->index2VirtReg(i);
01628     const TargetRegisterClass *RC = MRI->getRegClass(vr);
01629     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
01630     int n = regmap.size();
01631     regmap.insert(std::make_pair(vr, n + 1));
01632   }
01633 
01634   // Emit register declarations
01635   // @TODO: Extract out the real register usage
01636   // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
01637   // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
01638   // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
01639   // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
01640   // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
01641   // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
01642   // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
01643 
01644   // Emit declaration of the virtual registers or 'physical' registers for
01645   // each register class
01646   for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
01647     const TargetRegisterClass *RC = TRI->getRegClass(i);
01648     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
01649     std::string rcname = getNVPTXRegClassName(RC);
01650     std::string rcStr = getNVPTXRegClassStr(RC);
01651     int n = regmap.size();
01652 
01653     // Only declare those registers that may be used.
01654     if (n) {
01655        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
01656          << ">;\n";
01657     }
01658   }
01659 
01660   OutStreamer.EmitRawText(O.str());
01661 }
01662 
01663 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
01664   APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
01665   bool ignored;
01666   unsigned int numHex;
01667   const char *lead;
01668 
01669   if (Fp->getType()->getTypeID() == Type::FloatTyID) {
01670     numHex = 8;
01671     lead = "0f";
01672     APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
01673   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
01674     numHex = 16;
01675     lead = "0d";
01676     APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
01677   } else
01678     llvm_unreachable("unsupported fp type");
01679 
01680   APInt API = APF.bitcastToAPInt();
01681   std::string hexstr(utohexstr(API.getZExtValue()));
01682   O << lead;
01683   if (hexstr.length() < numHex)
01684     O << std::string(numHex - hexstr.length(), '0');
01685   O << utohexstr(API.getZExtValue());
01686 }
01687 
01688 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
01689   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
01690     O << CI->getValue();
01691     return;
01692   }
01693   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
01694     printFPConstant(CFP, O);
01695     return;
01696   }
01697   if (isa<ConstantPointerNull>(CPV)) {
01698     O << "0";
01699     return;
01700   }
01701   if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
01702     PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
01703     bool IsNonGenericPointer = false;
01704     if (PTy && PTy->getAddressSpace() != 0) {
01705       IsNonGenericPointer = true;
01706     }
01707     if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
01708       O << "generic(";
01709       O << *getSymbol(GVar);
01710       O << ")";
01711     } else {
01712       O << *getSymbol(GVar);
01713     }
01714     return;
01715   }
01716   if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01717     const Value *v = Cexpr->stripPointerCasts();
01718     PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
01719     bool IsNonGenericPointer = false;
01720     if (PTy && PTy->getAddressSpace() != 0) {
01721       IsNonGenericPointer = true;
01722     }
01723     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
01724       if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
01725         O << "generic(";
01726         O << *getSymbol(GVar);
01727         O << ")";
01728       } else {
01729         O << *getSymbol(GVar);
01730       }
01731       return;
01732     } else {
01733       O << *lowerConstant(CPV);
01734       return;
01735     }
01736   }
01737   llvm_unreachable("Not scalar type found in printScalarConstant()");
01738 }
01739 
01740 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
01741                                    AggBuffer *aggBuffer) {
01742 
01743   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
01744 
01745   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
01746     int s = TD->getTypeAllocSize(CPV->getType());
01747     if (s < Bytes)
01748       s = Bytes;
01749     aggBuffer->addZeros(s);
01750     return;
01751   }
01752 
01753   unsigned char *ptr;
01754   switch (CPV->getType()->getTypeID()) {
01755 
01756   case Type::IntegerTyID: {
01757     const Type *ETy = CPV->getType();
01758     if (ETy == Type::getInt8Ty(CPV->getContext())) {
01759       unsigned char c =
01760           (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
01761       ptr = &c;
01762       aggBuffer->addBytes(ptr, 1, Bytes);
01763     } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
01764       short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
01765       ptr = (unsigned char *)&int16;
01766       aggBuffer->addBytes(ptr, 2, Bytes);
01767     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
01768       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
01769         int int32 = (int)(constInt->getZExtValue());
01770         ptr = (unsigned char *)&int32;
01771         aggBuffer->addBytes(ptr, 4, Bytes);
01772         break;
01773       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01774         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
01775                 ConstantFoldConstantExpression(Cexpr, TD))) {
01776           int int32 = (int)(constInt->getZExtValue());
01777           ptr = (unsigned char *)&int32;
01778           aggBuffer->addBytes(ptr, 4, Bytes);
01779           break;
01780         }
01781         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
01782           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
01783           aggBuffer->addSymbol(v);
01784           aggBuffer->addZeros(4);
01785           break;
01786         }
01787       }
01788       llvm_unreachable("unsupported integer const type");
01789     } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
01790       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
01791         long long int64 = (long long)(constInt->getZExtValue());
01792         ptr = (unsigned char *)&int64;
01793         aggBuffer->addBytes(ptr, 8, Bytes);
01794         break;
01795       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01796         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
01797                 ConstantFoldConstantExpression(Cexpr, TD))) {
01798           long long int64 = (long long)(constInt->getZExtValue());
01799           ptr = (unsigned char *)&int64;
01800           aggBuffer->addBytes(ptr, 8, Bytes);
01801           break;
01802         }
01803         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
01804           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
01805           aggBuffer->addSymbol(v);
01806           aggBuffer->addZeros(8);
01807           break;
01808         }
01809       }
01810       llvm_unreachable("unsupported integer const type");
01811     } else
01812       llvm_unreachable("unsupported integer const type");
01813     break;
01814   }
01815   case Type::FloatTyID:
01816   case Type::DoubleTyID: {
01817     const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
01818     const Type *Ty = CFP->getType();
01819     if (Ty == Type::getFloatTy(CPV->getContext())) {
01820       float float32 = (float) CFP->getValueAPF().convertToFloat();
01821       ptr = (unsigned char *)&float32;
01822       aggBuffer->addBytes(ptr, 4, Bytes);
01823     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
01824       double float64 = CFP->getValueAPF().convertToDouble();
01825       ptr = (unsigned char *)&float64;
01826       aggBuffer->addBytes(ptr, 8, Bytes);
01827     } else {
01828       llvm_unreachable("unsupported fp const type");
01829     }
01830     break;
01831   }
01832   case Type::PointerTyID: {
01833     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
01834       aggBuffer->addSymbol(GVar);
01835     } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01836       const Value *v = Cexpr->stripPointerCasts();
01837       aggBuffer->addSymbol(v);
01838     }
01839     unsigned int s = TD->getTypeAllocSize(CPV->getType());
01840     aggBuffer->addZeros(s);
01841     break;
01842   }
01843 
01844   case Type::ArrayTyID:
01845   case Type::VectorTyID:
01846   case Type::StructTyID: {
01847     if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
01848         isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
01849       int ElementSize = TD->getTypeAllocSize(CPV->getType());
01850       bufferAggregateConstant(CPV, aggBuffer);
01851       if (Bytes > ElementSize)
01852         aggBuffer->addZeros(Bytes - ElementSize);
01853     } else if (isa<ConstantAggregateZero>(CPV))
01854       aggBuffer->addZeros(Bytes);
01855     else
01856       llvm_unreachable("Unexpected Constant type");
01857     break;
01858   }
01859 
01860   default:
01861     llvm_unreachable("unsupported type");
01862   }
01863 }
01864 
01865 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
01866                                               AggBuffer *aggBuffer) {
01867   const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
01868   int Bytes;
01869 
01870   // Old constants
01871   if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
01872     if (CPV->getNumOperands())
01873       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
01874         bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
01875     return;
01876   }
01877 
01878   if (const ConstantDataSequential *CDS =
01879           dyn_cast<ConstantDataSequential>(CPV)) {
01880     if (CDS->getNumElements())
01881       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
01882         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
01883                      aggBuffer);
01884     return;
01885   }
01886 
01887   if (isa<ConstantStruct>(CPV)) {
01888     if (CPV->getNumOperands()) {
01889       StructType *ST = cast<StructType>(CPV->getType());
01890       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
01891         if (i == (e - 1))
01892           Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
01893                   TD->getTypeAllocSize(ST) -
01894                   TD->getStructLayout(ST)->getElementOffset(i);
01895         else
01896           Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
01897                   TD->getStructLayout(ST)->getElementOffset(i);
01898         bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
01899       }
01900     }
01901     return;
01902   }
01903   llvm_unreachable("unsupported constant type in printAggregateConstant()");
01904 }
01905 
01906 // buildTypeNameMap - Run through symbol table looking for type names.
01907 //
01908 
01909 bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
01910 
01911   std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
01912 
01913   if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
01914                                   !PI->second.compare("struct._image2d_t") ||
01915                                   !PI->second.compare("struct._image3d_t")))
01916     return true;
01917 
01918   return false;
01919 }
01920 
01921 
01922 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
01923   switch (MI.getOpcode()) {
01924   default:
01925     return false;
01926   case NVPTX::CallArgBeginInst:
01927   case NVPTX::CallArgEndInst0:
01928   case NVPTX::CallArgEndInst1:
01929   case NVPTX::CallArgF32:
01930   case NVPTX::CallArgF64:
01931   case NVPTX::CallArgI16:
01932   case NVPTX::CallArgI32:
01933   case NVPTX::CallArgI32imm:
01934   case NVPTX::CallArgI64:
01935   case NVPTX::CallArgParam:
01936   case NVPTX::CallVoidInst:
01937   case NVPTX::CallVoidInstReg:
01938   case NVPTX::Callseq_End:
01939   case NVPTX::CallVoidInstReg64:
01940   case NVPTX::DeclareParamInst:
01941   case NVPTX::DeclareRetMemInst:
01942   case NVPTX::DeclareRetRegInst:
01943   case NVPTX::DeclareRetScalarInst:
01944   case NVPTX::DeclareScalarParamInst:
01945   case NVPTX::DeclareScalarRegInst:
01946   case NVPTX::StoreParamF32:
01947   case NVPTX::StoreParamF64:
01948   case NVPTX::StoreParamI16:
01949   case NVPTX::StoreParamI32:
01950   case NVPTX::StoreParamI64:
01951   case NVPTX::StoreParamI8:
01952   case NVPTX::StoreRetvalF32:
01953   case NVPTX::StoreRetvalF64:
01954   case NVPTX::StoreRetvalI16:
01955   case NVPTX::StoreRetvalI32:
01956   case NVPTX::StoreRetvalI64:
01957   case NVPTX::StoreRetvalI8:
01958   case NVPTX::LastCallArgF32:
01959   case NVPTX::LastCallArgF64:
01960   case NVPTX::LastCallArgI16:
01961   case NVPTX::LastCallArgI32:
01962   case NVPTX::LastCallArgI32imm:
01963   case NVPTX::LastCallArgI64:
01964   case NVPTX::LastCallArgParam:
01965   case NVPTX::LoadParamMemF32:
01966   case NVPTX::LoadParamMemF64:
01967   case NVPTX::LoadParamMemI16:
01968   case NVPTX::LoadParamMemI32:
01969   case NVPTX::LoadParamMemI64:
01970   case NVPTX::LoadParamMemI8:
01971   case NVPTX::PrototypeInst:
01972   case NVPTX::DBG_VALUE:
01973     return true;
01974   }
01975   return false;
01976 }
01977 
01978 /// PrintAsmOperand - Print out an operand for an inline asm expression.
01979 ///
01980 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
01981                                       unsigned AsmVariant,
01982                                       const char *ExtraCode, raw_ostream &O) {
01983   if (ExtraCode && ExtraCode[0]) {
01984     if (ExtraCode[1] != 0)
01985       return true; // Unknown modifier.
01986 
01987     switch (ExtraCode[0]) {
01988     default:
01989       // See if this is a generic print operand
01990       return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
01991     case 'r':
01992       break;
01993     }
01994   }
01995 
01996   printOperand(MI, OpNo, O);
01997 
01998   return false;
01999 }
02000 
02001 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
02002     const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
02003     const char *ExtraCode, raw_ostream &O) {
02004   if (ExtraCode && ExtraCode[0])
02005     return true; // Unknown modifier
02006 
02007   O << '[';
02008   printMemOperand(MI, OpNo, O);
02009   O << ']';
02010 
02011   return false;
02012 }
02013 
02014 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
02015                                    raw_ostream &O, const char *Modifier) {
02016   const MachineOperand &MO = MI->getOperand(opNum);
02017   switch (MO.getType()) {
02018   case MachineOperand::MO_Register:
02019     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
02020       if (MO.getReg() == NVPTX::VRDepot)
02021         O << DEPOTNAME << getFunctionNumber();
02022       else
02023         O << NVPTXInstPrinter::getRegisterName(MO.getReg());
02024     } else {
02025       emitVirtualRegister(MO.getReg(), O);
02026     }
02027     return;
02028 
02029   case MachineOperand::MO_Immediate:
02030     if (!Modifier)
02031       O << MO.getImm();
02032     else if (strstr(Modifier, "vec") == Modifier)
02033       printVecModifiedImmediate(MO, Modifier, O);
02034     else
02035       llvm_unreachable(
02036           "Don't know how to handle modifier on immediate operand");
02037     return;
02038 
02039   case MachineOperand::MO_FPImmediate:
02040     printFPConstant(MO.getFPImm(), O);
02041     break;
02042 
02043   case MachineOperand::MO_GlobalAddress:
02044     O << *getSymbol(MO.getGlobal());
02045     break;
02046 
02047   case MachineOperand::MO_MachineBasicBlock:
02048     O << *MO.getMBB()->getSymbol();
02049     return;
02050 
02051   default:
02052     llvm_unreachable("Operand type not supported.");
02053   }
02054 }
02055 
02056 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
02057                                       raw_ostream &O, const char *Modifier) {
02058   printOperand(MI, opNum, O);
02059 
02060   if (Modifier && !strcmp(Modifier, "add")) {
02061     O << ", ";
02062     printOperand(MI, opNum + 1, O);
02063   } else {
02064     if (MI->getOperand(opNum + 1).isImm() &&
02065         MI->getOperand(opNum + 1).getImm() == 0)
02066       return; // don't print ',0' or '+0'
02067     O << "+";
02068     printOperand(MI, opNum + 1, O);
02069   }
02070 }
02071 
02072 
02073 // Force static initialization.
02074 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
02075   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
02076   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
02077 }
02078 
02079 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
02080   std::stringstream temp;
02081   LineReader *reader = this->getReader(filename.str());
02082   temp << "\n//";
02083   temp << filename.str();
02084   temp << ":";
02085   temp << line;
02086   temp << " ";
02087   temp << reader->readLine(line);
02088   temp << "\n";
02089   this->OutStreamer.EmitRawText(Twine(temp.str()));
02090 }
02091 
02092 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
02093   if (!reader) {
02094     reader = new LineReader(filename);
02095   }
02096 
02097   if (reader->fileName() != filename) {
02098     delete reader;
02099     reader = new LineReader(filename);
02100   }
02101 
02102   return reader;
02103 }
02104 
02105 std::string LineReader::readLine(unsigned lineNum) {
02106   if (lineNum < theCurLine) {
02107     theCurLine = 0;
02108     fstr.seekg(0, std::ios::beg);
02109   }
02110   while (theCurLine < lineNum) {
02111     fstr.getline(buff, 500);
02112     theCurLine++;
02113   }
02114   return buff;
02115 }
02116 
02117 // Force static initialization.
02118 extern "C" void LLVMInitializeNVPTXAsmPrinter() {
02119   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
02120   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
02121 }