LLVM API Documentation

NVPTXAsmPrinter.cpp
Go to the documentation of this file.
00001 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains a printer that converts from our internal representation
00011 // of machine-dependent LLVM code to NVPTX assembly language.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "NVPTXAsmPrinter.h"
00016 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
00017 #include "NVPTX.h"
00018 #include "NVPTXInstrInfo.h"
00019 #include "NVPTXNumRegisters.h"
00020 #include "NVPTXRegisterInfo.h"
00021 #include "NVPTXTargetMachine.h"
00022 #include "NVPTXUtilities.h"
00023 #include "cl_common_defines.h"
00024 #include "llvm/ADT/StringExtras.h"
00025 #include "llvm/Analysis/ConstantFolding.h"
00026 #include "llvm/Assembly/Writer.h"
00027 #include "llvm/CodeGen/Analysis.h"
00028 #include "llvm/CodeGen/MachineFrameInfo.h"
00029 #include "llvm/CodeGen/MachineModuleInfo.h"
00030 #include "llvm/CodeGen/MachineRegisterInfo.h"
00031 #include "llvm/DebugInfo.h"
00032 #include "llvm/IR/DerivedTypes.h"
00033 #include "llvm/IR/Function.h"
00034 #include "llvm/IR/GlobalVariable.h"
00035 #include "llvm/IR/Module.h"
00036 #include "llvm/IR/Operator.h"
00037 #include "llvm/MC/MCStreamer.h"
00038 #include "llvm/MC/MCSymbol.h"
00039 #include "llvm/Support/CommandLine.h"
00040 #include "llvm/Support/ErrorHandling.h"
00041 #include "llvm/Support/FormattedStream.h"
00042 #include "llvm/Support/Path.h"
00043 #include "llvm/Support/TargetRegistry.h"
00044 #include "llvm/Support/TimeValue.h"
00045 #include "llvm/Target/Mangler.h"
00046 #include "llvm/Target/TargetLoweringObjectFile.h"
00047 #include <sstream>
00048 using namespace llvm;
00049 
00050 #include "NVPTXGenAsmWriter.inc"
00051 
00052 bool RegAllocNilUsed = true;
00053 
00054 #define DEPOTNAME "__local_depot"
00055 
00056 static cl::opt<bool>
00057 EmitLineNumbers("nvptx-emit-line-numbers",
00058                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
00059                 cl::init(true));
00060 
00061 namespace llvm { bool InterleaveSrcInPtx = false; }
00062 
00063 static cl::opt<bool, true>
00064 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
00065               cl::desc("NVPTX Specific: Emit source line in ptx file"),
00066               cl::location(llvm::InterleaveSrcInPtx));
00067 
00068 namespace {
00069 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
00070 /// depends.
00071 void DiscoverDependentGlobals(const Value *V,
00072                               DenseSet<const GlobalVariable *> &Globals) {
00073   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
00074     Globals.insert(GV);
00075   else {
00076     if (const User *U = dyn_cast<User>(V)) {
00077       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
00078         DiscoverDependentGlobals(U->getOperand(i), Globals);
00079       }
00080     }
00081   }
00082 }
00083 
00084 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
00085 /// instances to be emitted, but only after any dependents have been added
00086 /// first.
00087 void VisitGlobalVariableForEmission(
00088     const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
00089     DenseSet<const GlobalVariable *> &Visited,
00090     DenseSet<const GlobalVariable *> &Visiting) {
00091   // Have we already visited this one?
00092   if (Visited.count(GV))
00093     return;
00094 
00095   // Do we have a circular dependency?
00096   if (Visiting.count(GV))
00097     report_fatal_error("Circular dependency found in global variable set");
00098 
00099   // Start visiting this global
00100   Visiting.insert(GV);
00101 
00102   // Make sure we visit all dependents first
00103   DenseSet<const GlobalVariable *> Others;
00104   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
00105     DiscoverDependentGlobals(GV->getOperand(i), Others);
00106 
00107   for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
00108                                                   E = Others.end();
00109        I != E; ++I)
00110     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
00111 
00112   // Now we can visit ourself
00113   Order.push_back(GV);
00114   Visited.insert(GV);
00115   Visiting.erase(GV);
00116 }
00117 }
00118 
00119 // @TODO: This is a copy from AsmPrinter.cpp.  The function is static, so we
00120 // cannot just link to the existing version.
00121 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
00122 ///
00123 using namespace nvptx;
00124 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
00125   MCContext &Ctx = AP.OutContext;
00126 
00127   if (CV->isNullValue() || isa<UndefValue>(CV))
00128     return MCConstantExpr::Create(0, Ctx);
00129 
00130   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
00131     return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
00132 
00133   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
00134     return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
00135 
00136   if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
00137     return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
00138 
00139   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
00140   if (CE == 0)
00141     llvm_unreachable("Unknown constant value to lower!");
00142 
00143   switch (CE->getOpcode()) {
00144   default:
00145     // If the code isn't optimized, there may be outstanding folding
00146     // opportunities. Attempt to fold the expression using DataLayout as a
00147     // last resort before giving up.
00148     if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
00149       if (C != CE)
00150         return LowerConstant(C, AP);
00151 
00152     // Otherwise report the problem to the user.
00153     {
00154       std::string S;
00155       raw_string_ostream OS(S);
00156       OS << "Unsupported expression in static initializer: ";
00157       WriteAsOperand(OS, CE, /*PrintType=*/ false,
00158                      !AP.MF ? 0 : AP.MF->getFunction()->getParent());
00159       report_fatal_error(OS.str());
00160     }
00161   case Instruction::GetElementPtr: {
00162     const DataLayout &TD = *AP.TM.getDataLayout();
00163     // Generate a symbolic expression for the byte address
00164     APInt OffsetAI(TD.getPointerSizeInBits(), 0);
00165     cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
00166 
00167     const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
00168     if (!OffsetAI)
00169       return Base;
00170 
00171     int64_t Offset = OffsetAI.getSExtValue();
00172     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
00173                                    Ctx);
00174   }
00175 
00176   case Instruction::Trunc:
00177     // We emit the value and depend on the assembler to truncate the generated
00178     // expression properly.  This is important for differences between
00179     // blockaddress labels.  Since the two labels are in the same function, it
00180     // is reasonable to treat their delta as a 32-bit value.
00181   // FALL THROUGH.
00182   case Instruction::BitCast:
00183     return LowerConstant(CE->getOperand(0), AP);
00184 
00185   case Instruction::IntToPtr: {
00186     const DataLayout &TD = *AP.TM.getDataLayout();
00187     // Handle casts to pointers by changing them into casts to the appropriate
00188     // integer type.  This promotes constant folding and simplifies this code.
00189     Constant *Op = CE->getOperand(0);
00190     Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
00191                                       false /*ZExt*/);
00192     return LowerConstant(Op, AP);
00193   }
00194 
00195   case Instruction::PtrToInt: {
00196     const DataLayout &TD = *AP.TM.getDataLayout();
00197     // Support only foldable casts to/from pointers that can be eliminated by
00198     // changing the pointer to the appropriately sized integer type.
00199     Constant *Op = CE->getOperand(0);
00200     Type *Ty = CE->getType();
00201 
00202     const MCExpr *OpExpr = LowerConstant(Op, AP);
00203 
00204     // We can emit the pointer value into this slot if the slot is an
00205     // integer slot equal to the size of the pointer.
00206     if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
00207       return OpExpr;
00208 
00209     // Otherwise the pointer is smaller than the resultant integer, mask off
00210     // the high bits so we are sure to get a proper truncation if the input is
00211     // a constant expr.
00212     unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
00213     const MCExpr *MaskExpr =
00214         MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
00215     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
00216   }
00217 
00218     // The MC library also has a right-shift operator, but it isn't consistently
00219   // signed or unsigned between different targets.
00220   case Instruction::Add:
00221   case Instruction::Sub:
00222   case Instruction::Mul:
00223   case Instruction::SDiv:
00224   case Instruction::SRem:
00225   case Instruction::Shl:
00226   case Instruction::And:
00227   case Instruction::Or:
00228   case Instruction::Xor: {
00229     const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
00230     const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
00231     switch (CE->getOpcode()) {
00232     default:
00233       llvm_unreachable("Unknown binary operator constant cast expr");
00234     case Instruction::Add:
00235       return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
00236     case Instruction::Sub:
00237       return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
00238     case Instruction::Mul:
00239       return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
00240     case Instruction::SDiv:
00241       return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
00242     case Instruction::SRem:
00243       return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
00244     case Instruction::Shl:
00245       return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
00246     case Instruction::And:
00247       return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
00248     case Instruction::Or:
00249       return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
00250     case Instruction::Xor:
00251       return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
00252     }
00253   }
00254   }
00255 }
00256 
00257 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
00258   if (!EmitLineNumbers)
00259     return;
00260   if (ignoreLoc(MI))
00261     return;
00262 
00263   DebugLoc curLoc = MI.getDebugLoc();
00264 
00265   if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
00266     return;
00267 
00268   if (prevDebugLoc == curLoc)
00269     return;
00270 
00271   prevDebugLoc = curLoc;
00272 
00273   if (curLoc.isUnknown())
00274     return;
00275 
00276   const MachineFunction *MF = MI.getParent()->getParent();
00277   //const TargetMachine &TM = MF->getTarget();
00278 
00279   const LLVMContext &ctx = MF->getFunction()->getContext();
00280   DIScope Scope(curLoc.getScope(ctx));
00281 
00282   if (!Scope.Verify())
00283     return;
00284 
00285   StringRef fileName(Scope.getFilename());
00286   StringRef dirName(Scope.getDirectory());
00287   SmallString<128> FullPathName = dirName;
00288   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
00289     sys::path::append(FullPathName, fileName);
00290     fileName = FullPathName.str();
00291   }
00292 
00293   if (filenameMap.find(fileName.str()) == filenameMap.end())
00294     return;
00295 
00296   // Emit the line from the source file.
00297   if (llvm::InterleaveSrcInPtx)
00298     this->emitSrcInText(fileName.str(), curLoc.getLine());
00299 
00300   std::stringstream temp;
00301   temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
00302        << " " << curLoc.getCol();
00303   OutStreamer.EmitRawText(Twine(temp.str().c_str()));
00304 }
00305 
00306 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
00307   SmallString<128> Str;
00308   raw_svector_ostream OS(Str);
00309   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
00310     emitLineNumberAsDotLoc(*MI);
00311   printInstruction(MI, OS);
00312   OutStreamer.EmitRawText(OS.str());
00313 }
00314 
00315 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
00316   const DataLayout *TD = TM.getDataLayout();
00317   const TargetLowering *TLI = TM.getTargetLowering();
00318 
00319   Type *Ty = F->getReturnType();
00320 
00321   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
00322 
00323   if (Ty->getTypeID() == Type::VoidTyID)
00324     return;
00325 
00326   O << " (";
00327 
00328   if (isABI) {
00329     if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
00330       unsigned size = 0;
00331       if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
00332         size = ITy->getBitWidth();
00333         if (size < 32)
00334           size = 32;
00335       } else {
00336         assert(Ty->isFloatingPointTy() && "Floating point type expected here");
00337         size = Ty->getPrimitiveSizeInBits();
00338       }
00339 
00340       O << ".param .b" << size << " func_retval0";
00341     } else if (isa<PointerType>(Ty)) {
00342       O << ".param .b" << TLI->getPointerTy().getSizeInBits()
00343         << " func_retval0";
00344     } else {
00345       if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
00346         SmallVector<EVT, 16> vtparts;
00347         ComputeValueVTs(*TLI, Ty, vtparts);
00348         unsigned totalsz = 0;
00349         for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
00350           unsigned elems = 1;
00351           EVT elemtype = vtparts[i];
00352           if (vtparts[i].isVector()) {
00353             elems = vtparts[i].getVectorNumElements();
00354             elemtype = vtparts[i].getVectorElementType();
00355           }
00356           for (unsigned j = 0, je = elems; j != je; ++j) {
00357             unsigned sz = elemtype.getSizeInBits();
00358             if (elemtype.isInteger() && (sz < 8))
00359               sz = 8;
00360             totalsz += sz / 8;
00361           }
00362         }
00363         unsigned retAlignment = 0;
00364         if (!llvm::getAlign(*F, 0, retAlignment))
00365           retAlignment = TD->getABITypeAlignment(Ty);
00366         O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
00367           << "]";
00368       } else
00369         assert(false && "Unknown return type");
00370     }
00371   } else {
00372     SmallVector<EVT, 16> vtparts;
00373     ComputeValueVTs(*TLI, Ty, vtparts);
00374     unsigned idx = 0;
00375     for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
00376       unsigned elems = 1;
00377       EVT elemtype = vtparts[i];
00378       if (vtparts[i].isVector()) {
00379         elems = vtparts[i].getVectorNumElements();
00380         elemtype = vtparts[i].getVectorElementType();
00381       }
00382 
00383       for (unsigned j = 0, je = elems; j != je; ++j) {
00384         unsigned sz = elemtype.getSizeInBits();
00385         if (elemtype.isInteger() && (sz < 32))
00386           sz = 32;
00387         O << ".reg .b" << sz << " func_retval" << idx;
00388         if (j < je - 1)
00389           O << ", ";
00390         ++idx;
00391       }
00392       if (i < e - 1)
00393         O << ", ";
00394     }
00395   }
00396   O << ") ";
00397   return;
00398 }
00399 
00400 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
00401                                         raw_ostream &O) {
00402   const Function *F = MF.getFunction();
00403   printReturnValStr(F, O);
00404 }
00405 
00406 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
00407   SmallString<128> Str;
00408   raw_svector_ostream O(Str);
00409 
00410   if (!GlobalsEmitted) {
00411     emitGlobals(*MF->getFunction()->getParent());
00412     GlobalsEmitted = true;
00413   }
00414   
00415   // Set up
00416   MRI = &MF->getRegInfo();
00417   F = MF->getFunction();
00418   emitLinkageDirective(F, O);
00419   if (llvm::isKernelFunction(*F))
00420     O << ".entry ";
00421   else {
00422     O << ".func ";
00423     printReturnValStr(*MF, O);
00424   }
00425 
00426   O << *CurrentFnSym;
00427 
00428   emitFunctionParamList(*MF, O);
00429 
00430   if (llvm::isKernelFunction(*F))
00431     emitKernelFunctionDirectives(*F, O);
00432 
00433   OutStreamer.EmitRawText(O.str());
00434 
00435   prevDebugLoc = DebugLoc();
00436 }
00437 
00438 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
00439   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
00440   unsigned numRegClasses = TRI.getNumRegClasses();
00441   VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1];
00442   OutStreamer.EmitRawText(StringRef("{\n"));
00443   setAndEmitFunctionVirtualRegisters(*MF);
00444 
00445   SmallString<128> Str;
00446   raw_svector_ostream O(Str);
00447   emitDemotedVars(MF->getFunction(), O);
00448   OutStreamer.EmitRawText(O.str());
00449 }
00450 
00451 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
00452   OutStreamer.EmitRawText(StringRef("}\n"));
00453   delete[] VRidGlobal2LocalMap;
00454 }
00455 
00456 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
00457                                                    raw_ostream &O) const {
00458   // If the NVVM IR has some of reqntid* specified, then output
00459   // the reqntid directive, and set the unspecified ones to 1.
00460   // If none of reqntid* is specified, don't output reqntid directive.
00461   unsigned reqntidx, reqntidy, reqntidz;
00462   bool specified = false;
00463   if (llvm::getReqNTIDx(F, reqntidx) == false)
00464     reqntidx = 1;
00465   else
00466     specified = true;
00467   if (llvm::getReqNTIDy(F, reqntidy) == false)
00468     reqntidy = 1;
00469   else
00470     specified = true;
00471   if (llvm::getReqNTIDz(F, reqntidz) == false)
00472     reqntidz = 1;
00473   else
00474     specified = true;
00475 
00476   if (specified)
00477     O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
00478       << "\n";
00479 
00480   // If the NVVM IR has some of maxntid* specified, then output
00481   // the maxntid directive, and set the unspecified ones to 1.
00482   // If none of maxntid* is specified, don't output maxntid directive.
00483   unsigned maxntidx, maxntidy, maxntidz;
00484   specified = false;
00485   if (llvm::getMaxNTIDx(F, maxntidx) == false)
00486     maxntidx = 1;
00487   else
00488     specified = true;
00489   if (llvm::getMaxNTIDy(F, maxntidy) == false)
00490     maxntidy = 1;
00491   else
00492     specified = true;
00493   if (llvm::getMaxNTIDz(F, maxntidz) == false)
00494     maxntidz = 1;
00495   else
00496     specified = true;
00497 
00498   if (specified)
00499     O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
00500       << "\n";
00501 
00502   unsigned mincta;
00503   if (llvm::getMinCTASm(F, mincta))
00504     O << ".minnctapersm " << mincta << "\n";
00505 }
00506 
00507 void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
00508                                              raw_ostream &O) {
00509   const TargetRegisterClass *RC = MRI->getRegClass(vr);
00510   unsigned id = RC->getID();
00511 
00512   std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
00513   unsigned mapped_vr = regmap[vr];
00514 
00515   if (!isVec) {
00516     O << getNVPTXRegClassStr(RC) << mapped_vr;
00517     return;
00518   }
00519   report_fatal_error("Bad register!");
00520 }
00521 
00522 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
00523                                           raw_ostream &O) {
00524   getVirtualRegisterName(vr, isVec, O);
00525 }
00526 
00527 void NVPTXAsmPrinter::printVecModifiedImmediate(
00528     const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
00529   static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
00530   int Imm = (int) MO.getImm();
00531   if (0 == strcmp(Modifier, "vecelem"))
00532     O << "_" << vecelem[Imm];
00533   else if (0 == strcmp(Modifier, "vecv4comm1")) {
00534     if ((Imm < 0) || (Imm > 3))
00535       O << "//";
00536   } else if (0 == strcmp(Modifier, "vecv4comm2")) {
00537     if ((Imm < 4) || (Imm > 7))
00538       O << "//";
00539   } else if (0 == strcmp(Modifier, "vecv4pos")) {
00540     if (Imm < 0)
00541       Imm = 0;
00542     O << "_" << vecelem[Imm % 4];
00543   } else if (0 == strcmp(Modifier, "vecv2comm1")) {
00544     if ((Imm < 0) || (Imm > 1))
00545       O << "//";
00546   } else if (0 == strcmp(Modifier, "vecv2comm2")) {
00547     if ((Imm < 2) || (Imm > 3))
00548       O << "//";
00549   } else if (0 == strcmp(Modifier, "vecv2pos")) {
00550     if (Imm < 0)
00551       Imm = 0;
00552     O << "_" << vecelem[Imm % 2];
00553   } else
00554     llvm_unreachable("Unknown Modifier on immediate operand");
00555 }
00556 
00557 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
00558                                    raw_ostream &O, const char *Modifier) {
00559   const MachineOperand &MO = MI->getOperand(opNum);
00560   switch (MO.getType()) {
00561   case MachineOperand::MO_Register:
00562     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
00563       if (MO.getReg() == NVPTX::VRDepot)
00564         O << DEPOTNAME << getFunctionNumber();
00565       else
00566         O << getRegisterName(MO.getReg());
00567     } else {
00568       if (!Modifier)
00569         emitVirtualRegister(MO.getReg(), false, O);
00570       else {
00571         if (strcmp(Modifier, "vecfull") == 0)
00572           emitVirtualRegister(MO.getReg(), true, O);
00573         else
00574           llvm_unreachable(
00575               "Don't know how to handle the modifier on virtual register.");
00576       }
00577     }
00578     return;
00579 
00580   case MachineOperand::MO_Immediate:
00581     if (!Modifier)
00582       O << MO.getImm();
00583     else if (strstr(Modifier, "vec") == Modifier)
00584       printVecModifiedImmediate(MO, Modifier, O);
00585     else
00586       llvm_unreachable(
00587           "Don't know how to handle modifier on immediate operand");
00588     return;
00589 
00590   case MachineOperand::MO_FPImmediate:
00591     printFPConstant(MO.getFPImm(), O);
00592     break;
00593 
00594   case MachineOperand::MO_GlobalAddress:
00595     O << *Mang->getSymbol(MO.getGlobal());
00596     break;
00597 
00598   case MachineOperand::MO_ExternalSymbol: {
00599     const char *symbname = MO.getSymbolName();
00600     if (strstr(symbname, ".PARAM") == symbname) {
00601       unsigned index;
00602       sscanf(symbname + 6, "%u[];", &index);
00603       printParamName(index, O);
00604     } else if (strstr(symbname, ".HLPPARAM") == symbname) {
00605       unsigned index;
00606       sscanf(symbname + 9, "%u[];", &index);
00607       O << *CurrentFnSym << "_param_" << index << "_offset";
00608     } else
00609       O << symbname;
00610     break;
00611   }
00612 
00613   case MachineOperand::MO_MachineBasicBlock:
00614     O << *MO.getMBB()->getSymbol();
00615     return;
00616 
00617   default:
00618     llvm_unreachable("Operand type not supported.");
00619   }
00620 }
00621 
00622 void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI,
00623                                        raw_ostream &O) const {
00624 #ifndef __OPTIMIZE__
00625   O << "\t// Implicit def :";
00626   //printOperand(MI, 0);
00627   O << "\n";
00628 #endif
00629 }
00630 
00631 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
00632                                       raw_ostream &O, const char *Modifier) {
00633   printOperand(MI, opNum, O);
00634 
00635   if (Modifier && !strcmp(Modifier, "add")) {
00636     O << ", ";
00637     printOperand(MI, opNum + 1, O);
00638   } else {
00639     if (MI->getOperand(opNum + 1).isImm() &&
00640         MI->getOperand(opNum + 1).getImm() == 0)
00641       return; // don't print ',0' or '+0'
00642     O << "+";
00643     printOperand(MI, opNum + 1, O);
00644   }
00645 }
00646 
00647 void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
00648                                     raw_ostream &O, const char *Modifier) {
00649   if (Modifier) {
00650     const MachineOperand &MO = MI->getOperand(opNum);
00651     int Imm = (int) MO.getImm();
00652     if (!strcmp(Modifier, "volatile")) {
00653       if (Imm)
00654         O << ".volatile";
00655     } else if (!strcmp(Modifier, "addsp")) {
00656       switch (Imm) {
00657       case NVPTX::PTXLdStInstCode::GLOBAL:
00658         O << ".global";
00659         break;
00660       case NVPTX::PTXLdStInstCode::SHARED:
00661         O << ".shared";
00662         break;
00663       case NVPTX::PTXLdStInstCode::LOCAL:
00664         O << ".local";
00665         break;
00666       case NVPTX::PTXLdStInstCode::PARAM:
00667         O << ".param";
00668         break;
00669       case NVPTX::PTXLdStInstCode::CONSTANT:
00670         O << ".const";
00671         break;
00672       case NVPTX::PTXLdStInstCode::GENERIC:
00673         if (!nvptxSubtarget.hasGenericLdSt())
00674           O << ".global";
00675         break;
00676       default:
00677         llvm_unreachable("Wrong Address Space");
00678       }
00679     } else if (!strcmp(Modifier, "sign")) {
00680       if (Imm == NVPTX::PTXLdStInstCode::Signed)
00681         O << "s";
00682       else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
00683         O << "u";
00684       else
00685         O << "f";
00686     } else if (!strcmp(Modifier, "vec")) {
00687       if (Imm == NVPTX::PTXLdStInstCode::V2)
00688         O << ".v2";
00689       else if (Imm == NVPTX::PTXLdStInstCode::V4)
00690         O << ".v4";
00691     } else
00692       llvm_unreachable("Unknown Modifier");
00693   } else
00694     llvm_unreachable("Empty Modifier");
00695 }
00696 
00697 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
00698 
00699   emitLinkageDirective(F, O);
00700   if (llvm::isKernelFunction(*F))
00701     O << ".entry ";
00702   else
00703     O << ".func ";
00704   printReturnValStr(F, O);
00705   O << *Mang->getSymbol(F) << "\n";
00706   emitFunctionParamList(F, O);
00707   O << ";\n";
00708 }
00709 
00710 static bool usedInGlobalVarDef(const Constant *C) {
00711   if (!C)
00712     return false;
00713 
00714   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
00715     if (GV->getName().str() == "llvm.used")
00716       return false;
00717     return true;
00718   }
00719 
00720   for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
00721        ui != ue; ++ui) {
00722     const Constant *C = dyn_cast<Constant>(*ui);
00723     if (usedInGlobalVarDef(C))
00724       return true;
00725   }
00726   return false;
00727 }
00728 
00729 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
00730   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
00731     if (othergv->getName().str() == "llvm.used")
00732       return true;
00733   }
00734 
00735   if (const Instruction *instr = dyn_cast<Instruction>(U)) {
00736     if (instr->getParent() && instr->getParent()->getParent()) {
00737       const Function *curFunc = instr->getParent()->getParent();
00738       if (oneFunc && (curFunc != oneFunc))
00739         return false;
00740       oneFunc = curFunc;
00741       return true;
00742     } else
00743       return false;
00744   }
00745 
00746   if (const MDNode *md = dyn_cast<MDNode>(U))
00747     if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
00748                           (md->getName().str() == "llvm.dbg.sp")))
00749       return true;
00750 
00751   for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end();
00752        ui != ue; ++ui) {
00753     if (usedInOneFunc(*ui, oneFunc) == false)
00754       return false;
00755   }
00756   return true;
00757 }
00758 
00759 /* Find out if a global variable can be demoted to local scope.
00760  * Currently, this is valid for CUDA shared variables, which have local
00761  * scope and global lifetime. So the conditions to check are :
00762  * 1. Is the global variable in shared address space?
00763  * 2. Does it have internal linkage?
00764  * 3. Is the global variable referenced only in one function?
00765  */
00766 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
00767   if (gv->hasInternalLinkage() == false)
00768     return false;
00769   const PointerType *Pty = gv->getType();
00770   if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
00771     return false;
00772 
00773   const Function *oneFunc = 0;
00774 
00775   bool flag = usedInOneFunc(gv, oneFunc);
00776   if (flag == false)
00777     return false;
00778   if (!oneFunc)
00779     return false;
00780   f = oneFunc;
00781   return true;
00782 }
00783 
00784 static bool useFuncSeen(const Constant *C,
00785                         llvm::DenseMap<const Function *, bool> &seenMap) {
00786   for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
00787        ui != ue; ++ui) {
00788     if (const Constant *cu = dyn_cast<Constant>(*ui)) {
00789       if (useFuncSeen(cu, seenMap))
00790         return true;
00791     } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
00792       const BasicBlock *bb = I->getParent();
00793       if (!bb)
00794         continue;
00795       const Function *caller = bb->getParent();
00796       if (!caller)
00797         continue;
00798       if (seenMap.find(caller) != seenMap.end())
00799         return true;
00800     }
00801   }
00802   return false;
00803 }
00804 
00805 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
00806   llvm::DenseMap<const Function *, bool> seenMap;
00807   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
00808     const Function *F = FI;
00809 
00810     if (F->isDeclaration()) {
00811       if (F->use_empty())
00812         continue;
00813       if (F->getIntrinsicID())
00814         continue;
00815       emitDeclaration(F, O);
00816       continue;
00817     }
00818     for (Value::const_use_iterator iter = F->use_begin(),
00819                                    iterEnd = F->use_end();
00820          iter != iterEnd; ++iter) {
00821       if (const Constant *C = dyn_cast<Constant>(*iter)) {
00822         if (usedInGlobalVarDef(C)) {
00823           // The use is in the initialization of a global variable
00824           // that is a function pointer, so print a declaration
00825           // for the original function
00826           emitDeclaration(F, O);
00827           break;
00828         }
00829         // Emit a declaration of this function if the function that
00830         // uses this constant expr has already been seen.
00831         if (useFuncSeen(C, seenMap)) {
00832           emitDeclaration(F, O);
00833           break;
00834         }
00835       }
00836 
00837       if (!isa<Instruction>(*iter))
00838         continue;
00839       const Instruction *instr = cast<Instruction>(*iter);
00840       const BasicBlock *bb = instr->getParent();
00841       if (!bb)
00842         continue;
00843       const Function *caller = bb->getParent();
00844       if (!caller)
00845         continue;
00846 
00847       // If a caller has already been seen, then the caller is
00848       // appearing in the module before the callee. so print out
00849       // a declaration for the callee.
00850       if (seenMap.find(caller) != seenMap.end()) {
00851         emitDeclaration(F, O);
00852         break;
00853       }
00854     }
00855     seenMap[F] = true;
00856   }
00857 }
00858 
00859 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
00860   DebugInfoFinder DbgFinder;
00861   DbgFinder.processModule(M);
00862 
00863   unsigned i = 1;
00864   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
00865                                  E = DbgFinder.compile_unit_end();
00866        I != E; ++I) {
00867     DICompileUnit DIUnit(*I);
00868     StringRef Filename(DIUnit.getFilename());
00869     StringRef Dirname(DIUnit.getDirectory());
00870     SmallString<128> FullPathName = Dirname;
00871     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
00872       sys::path::append(FullPathName, Filename);
00873       Filename = FullPathName.str();
00874     }
00875     if (filenameMap.find(Filename.str()) != filenameMap.end())
00876       continue;
00877     filenameMap[Filename.str()] = i;
00878     OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
00879     ++i;
00880   }
00881 
00882   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
00883                                  E = DbgFinder.subprogram_end();
00884        I != E; ++I) {
00885     DISubprogram SP(*I);
00886     StringRef Filename(SP.getFilename());
00887     StringRef Dirname(SP.getDirectory());
00888     SmallString<128> FullPathName = Dirname;
00889     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
00890       sys::path::append(FullPathName, Filename);
00891       Filename = FullPathName.str();
00892     }
00893     if (filenameMap.find(Filename.str()) != filenameMap.end())
00894       continue;
00895     filenameMap[Filename.str()] = i;
00896     ++i;
00897   }
00898 }
00899 
00900 bool NVPTXAsmPrinter::doInitialization(Module &M) {
00901 
00902   SmallString<128> Str1;
00903   raw_svector_ostream OS1(Str1);
00904 
00905   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
00906   MMI->AnalyzeModule(M);
00907 
00908   // We need to call the parent's one explicitly.
00909   //bool Result = AsmPrinter::doInitialization(M);
00910 
00911   // Initialize TargetLoweringObjectFile.
00912   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
00913       .Initialize(OutContext, TM);
00914 
00915   Mang = new Mangler(OutContext, *TM.getDataLayout());
00916 
00917   // Emit header before any dwarf directives are emitted below.
00918   emitHeader(M, OS1);
00919   OutStreamer.EmitRawText(OS1.str());
00920 
00921   // Already commented out
00922   //bool Result = AsmPrinter::doInitialization(M);
00923 
00924   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
00925     recordAndEmitFilenames(M);
00926 
00927   GlobalsEmitted = false;
00928     
00929   return false; // success
00930 }
00931 
00932 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
00933   SmallString<128> Str2;
00934   raw_svector_ostream OS2(Str2);
00935 
00936   emitDeclarations(M, OS2);
00937 
00938   // As ptxas does not support forward references of globals, we need to first
00939   // sort the list of module-level globals in def-use order. We visit each
00940   // global variable in order, and ensure that we emit it *after* its dependent
00941   // globals. We use a little extra memory maintaining both a set and a list to
00942   // have fast searches while maintaining a strict ordering.
00943   SmallVector<const GlobalVariable *, 8> Globals;
00944   DenseSet<const GlobalVariable *> GVVisited;
00945   DenseSet<const GlobalVariable *> GVVisiting;
00946 
00947   // Visit each global variable, in order
00948   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
00949        I != E; ++I)
00950     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
00951 
00952   assert(GVVisited.size() == M.getGlobalList().size() &&
00953          "Missed a global variable");
00954   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
00955 
00956   // Print out module-level global variables in proper order
00957   for (unsigned i = 0, e = Globals.size(); i != e; ++i)
00958     printModuleLevelGV(Globals[i], OS2);
00959 
00960   OS2 << '\n';
00961 
00962   OutStreamer.EmitRawText(OS2.str());
00963 }
00964 
00965 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
00966   O << "//\n";
00967   O << "// Generated by LLVM NVPTX Back-End\n";
00968   O << "//\n";
00969   O << "\n";
00970 
00971   unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
00972   O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
00973 
00974   O << ".target ";
00975   O << nvptxSubtarget.getTargetName();
00976 
00977   if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
00978     O << ", texmode_independent";
00979   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
00980     if (!nvptxSubtarget.hasDouble())
00981       O << ", map_f64_to_f32";
00982   }
00983 
00984   if (MAI->doesSupportDebugInformation())
00985     O << ", debug";
00986 
00987   O << "\n";
00988 
00989   O << ".address_size ";
00990   if (nvptxSubtarget.is64Bit())
00991     O << "64";
00992   else
00993     O << "32";
00994   O << "\n";
00995 
00996   O << "\n";
00997 }
00998 
00999 bool NVPTXAsmPrinter::doFinalization(Module &M) {
01000 
01001   // If we did not emit any functions, then the global declarations have not
01002   // yet been emitted.
01003   if (!GlobalsEmitted) {
01004     emitGlobals(M);
01005     GlobalsEmitted = true;
01006   }
01007 
01008   // XXX Temproarily remove global variables so that doFinalization() will not
01009   // emit them again (global variables are emitted at beginning).
01010 
01011   Module::GlobalListType &global_list = M.getGlobalList();
01012   int i, n = global_list.size();
01013   GlobalVariable **gv_array = new GlobalVariable *[n];
01014 
01015   // first, back-up GlobalVariable in gv_array
01016   i = 0;
01017   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
01018        I != E; ++I)
01019     gv_array[i++] = &*I;
01020 
01021   // second, empty global_list
01022   while (!global_list.empty())
01023     global_list.remove(global_list.begin());
01024 
01025   // call doFinalization
01026   bool ret = AsmPrinter::doFinalization(M);
01027 
01028   // now we restore global variables
01029   for (i = 0; i < n; i++)
01030     global_list.insert(global_list.end(), gv_array[i]);
01031 
01032   delete[] gv_array;
01033   return ret;
01034 
01035   //bool Result = AsmPrinter::doFinalization(M);
01036   // Instead of calling the parents doFinalization, we may
01037   // clone parents doFinalization and customize here.
01038   // Currently, we if NVISA out the EmitGlobals() in
01039   // parent's doFinalization, which is too intrusive.
01040   //
01041   // Same for the doInitialization.
01042   //return Result;
01043 }
01044 
01045 // This function emits appropriate linkage directives for
01046 // functions and global variables.
01047 //
01048 // extern function declaration            -> .extern
01049 // extern function definition             -> .visible
01050 // external global variable with init     -> .visible
01051 // external without init                  -> .extern
01052 // appending                              -> not allowed, assert.
01053 
01054 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
01055                                            raw_ostream &O) {
01056   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
01057     if (V->hasExternalLinkage()) {
01058       if (isa<GlobalVariable>(V)) {
01059         const GlobalVariable *GVar = cast<GlobalVariable>(V);
01060         if (GVar) {
01061           if (GVar->hasInitializer())
01062             O << ".visible ";
01063           else
01064             O << ".extern ";
01065         }
01066       } else if (V->isDeclaration())
01067         O << ".extern ";
01068       else
01069         O << ".visible ";
01070     } else if (V->hasAppendingLinkage()) {
01071       std::string msg;
01072       msg.append("Error: ");
01073       msg.append("Symbol ");
01074       if (V->hasName())
01075         msg.append(V->getName().str());
01076       msg.append("has unsupported appending linkage type");
01077       llvm_unreachable(msg.c_str());
01078     }
01079   }
01080 }
01081 
01082 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
01083                                          raw_ostream &O,
01084                                          bool processDemoted) {
01085 
01086   // Skip meta data
01087   if (GVar->hasSection()) {
01088     if (GVar->getSection() == "llvm.metadata")
01089       return;
01090   }
01091 
01092   const DataLayout *TD = TM.getDataLayout();
01093 
01094   // GlobalVariables are always constant pointers themselves.
01095   const PointerType *PTy = GVar->getType();
01096   Type *ETy = PTy->getElementType();
01097 
01098   if (GVar->hasExternalLinkage()) {
01099     if (GVar->hasInitializer())
01100       O << ".visible ";
01101     else
01102       O << ".extern ";
01103   }
01104 
01105   if (llvm::isTexture(*GVar)) {
01106     O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
01107     return;
01108   }
01109 
01110   if (llvm::isSurface(*GVar)) {
01111     O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
01112     return;
01113   }
01114 
01115   if (GVar->isDeclaration()) {
01116     // (extern) declarations, no definition or initializer
01117     // Currently the only known declaration is for an automatic __local
01118     // (.shared) promoted to global.
01119     emitPTXGlobalVariable(GVar, O);
01120     O << ";\n";
01121     return;
01122   }
01123 
01124   if (llvm::isSampler(*GVar)) {
01125     O << ".global .samplerref " << llvm::getSamplerName(*GVar);
01126 
01127     const Constant *Initializer = NULL;
01128     if (GVar->hasInitializer())
01129       Initializer = GVar->getInitializer();
01130     const ConstantInt *CI = NULL;
01131     if (Initializer)
01132       CI = dyn_cast<ConstantInt>(Initializer);
01133     if (CI) {
01134       unsigned sample = CI->getZExtValue();
01135 
01136       O << " = { ";
01137 
01138       for (int i = 0,
01139                addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
01140            i < 3; i++) {
01141         O << "addr_mode_" << i << " = ";
01142         switch (addr) {
01143         case 0:
01144           O << "wrap";
01145           break;
01146         case 1:
01147           O << "clamp_to_border";
01148           break;
01149         case 2:
01150           O << "clamp_to_edge";
01151           break;
01152         case 3:
01153           O << "wrap";
01154           break;
01155         case 4:
01156           O << "mirror";
01157           break;
01158         }
01159         O << ", ";
01160       }
01161       O << "filter_mode = ";
01162       switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
01163       case 0:
01164         O << "nearest";
01165         break;
01166       case 1:
01167         O << "linear";
01168         break;
01169       case 2:
01170         assert(0 && "Anisotropic filtering is not supported");
01171       default:
01172         O << "nearest";
01173         break;
01174       }
01175       if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
01176         O << ", force_unnormalized_coords = 1";
01177       }
01178       O << " }";
01179     }
01180 
01181     O << ";\n";
01182     return;
01183   }
01184 
01185   if (GVar->hasPrivateLinkage()) {
01186 
01187     if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
01188       return;
01189 
01190     // FIXME - need better way (e.g. Metadata) to avoid generating this global
01191     if (!strncmp(GVar->getName().data(), "filename", 8))
01192       return;
01193     if (GVar->use_empty())
01194       return;
01195   }
01196 
01197   const Function *demotedFunc = 0;
01198   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
01199     O << "// " << GVar->getName().str() << " has been demoted\n";
01200     if (localDecls.find(demotedFunc) != localDecls.end())
01201       localDecls[demotedFunc].push_back(GVar);
01202     else {
01203       std::vector<const GlobalVariable *> temp;
01204       temp.push_back(GVar);
01205       localDecls[demotedFunc] = temp;
01206     }
01207     return;
01208   }
01209 
01210   O << ".";
01211   emitPTXAddressSpace(PTy->getAddressSpace(), O);
01212   if (GVar->getAlignment() == 0)
01213     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
01214   else
01215     O << " .align " << GVar->getAlignment();
01216 
01217   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
01218     O << " .";
01219     // Special case: ABI requires that we use .u8 for predicates
01220     if (ETy->isIntegerTy(1))
01221       O << "u8";
01222     else
01223       O << getPTXFundamentalTypeStr(ETy, false);
01224     O << " ";
01225     O << *Mang->getSymbol(GVar);
01226 
01227     // Ptx allows variable initilization only for constant and global state
01228     // spaces.
01229     if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
01230          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
01231          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
01232         GVar->hasInitializer()) {
01233       const Constant *Initializer = GVar->getInitializer();
01234       if (!Initializer->isNullValue()) {
01235         O << " = ";
01236         printScalarConstant(Initializer, O);
01237       }
01238     }
01239   } else {
01240     unsigned int ElementSize = 0;
01241 
01242     // Although PTX has direct support for struct type and array type and
01243     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
01244     // targets that support these high level field accesses. Structs, arrays
01245     // and vectors are lowered into arrays of bytes.
01246     switch (ETy->getTypeID()) {
01247     case Type::StructTyID:
01248     case Type::ArrayTyID:
01249     case Type::VectorTyID:
01250       ElementSize = TD->getTypeStoreSize(ETy);
01251       // Ptx allows variable initilization only for constant and
01252       // global state spaces.
01253       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
01254            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
01255            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
01256           GVar->hasInitializer()) {
01257         const Constant *Initializer = GVar->getInitializer();
01258         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
01259           AggBuffer aggBuffer(ElementSize, O, *this);
01260           bufferAggregateConstant(Initializer, &aggBuffer);
01261           if (aggBuffer.numSymbols) {
01262             if (nvptxSubtarget.is64Bit()) {
01263               O << " .u64 " << *Mang->getSymbol(GVar) << "[";
01264               O << ElementSize / 8;
01265             } else {
01266               O << " .u32 " << *Mang->getSymbol(GVar) << "[";
01267               O << ElementSize / 4;
01268             }
01269             O << "]";
01270           } else {
01271             O << " .b8 " << *Mang->getSymbol(GVar) << "[";
01272             O << ElementSize;
01273             O << "]";
01274           }
01275           O << " = {";
01276           aggBuffer.print();
01277           O << "}";
01278         } else {
01279           O << " .b8 " << *Mang->getSymbol(GVar);
01280           if (ElementSize) {
01281             O << "[";
01282             O << ElementSize;
01283             O << "]";
01284           }
01285         }
01286       } else {
01287         O << " .b8 " << *Mang->getSymbol(GVar);
01288         if (ElementSize) {
01289           O << "[";
01290           O << ElementSize;
01291           O << "]";
01292         }
01293       }
01294       break;
01295     default:
01296       assert(0 && "type not supported yet");
01297     }
01298 
01299   }
01300   O << ";\n";
01301 }
01302 
01303 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
01304   if (localDecls.find(f) == localDecls.end())
01305     return;
01306 
01307   std::vector<const GlobalVariable *> &gvars = localDecls[f];
01308 
01309   for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
01310     O << "\t// demoted variable\n\t";
01311     printModuleLevelGV(gvars[i], O, true);
01312   }
01313 }
01314 
01315 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
01316                                           raw_ostream &O) const {
01317   switch (AddressSpace) {
01318   case llvm::ADDRESS_SPACE_LOCAL:
01319     O << "local";
01320     break;
01321   case llvm::ADDRESS_SPACE_GLOBAL:
01322     O << "global";
01323     break;
01324   case llvm::ADDRESS_SPACE_CONST:
01325     // This logic should be consistent with that in
01326     // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
01327     if (nvptxSubtarget.hasGenericLdSt())
01328       O << "global";
01329     else
01330       O << "const";
01331     break;
01332   case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
01333     O << "const";
01334     break;
01335   case llvm::ADDRESS_SPACE_SHARED:
01336     O << "shared";
01337     break;
01338   default:
01339     report_fatal_error("Bad address space found while emitting PTX");
01340     break;
01341   }
01342 }
01343 
01344 std::string
01345 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
01346   switch (Ty->getTypeID()) {
01347   default:
01348     llvm_unreachable("unexpected type");
01349     break;
01350   case Type::IntegerTyID: {
01351     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
01352     if (NumBits == 1)
01353       return "pred";
01354     else if (NumBits <= 64) {
01355       std::string name = "u";
01356       return name + utostr(NumBits);
01357     } else {
01358       llvm_unreachable("Integer too large");
01359       break;
01360     }
01361     break;
01362   }
01363   case Type::FloatTyID:
01364     return "f32";
01365   case Type::DoubleTyID:
01366     return "f64";
01367   case Type::PointerTyID:
01368     if (nvptxSubtarget.is64Bit())
01369       if (useB4PTR)
01370         return "b64";
01371       else
01372         return "u64";
01373     else if (useB4PTR)
01374       return "b32";
01375     else
01376       return "u32";
01377   }
01378   llvm_unreachable("unexpected type");
01379   return NULL;
01380 }
01381 
01382 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
01383                                             raw_ostream &O) {
01384 
01385   const DataLayout *TD = TM.getDataLayout();
01386 
01387   // GlobalVariables are always constant pointers themselves.
01388   const PointerType *PTy = GVar->getType();
01389   Type *ETy = PTy->getElementType();
01390 
01391   O << ".";
01392   emitPTXAddressSpace(PTy->getAddressSpace(), O);
01393   if (GVar->getAlignment() == 0)
01394     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
01395   else
01396     O << " .align " << GVar->getAlignment();
01397 
01398   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
01399     O << " .";
01400     O << getPTXFundamentalTypeStr(ETy);
01401     O << " ";
01402     O << *Mang->getSymbol(GVar);
01403     return;
01404   }
01405 
01406   int64_t ElementSize = 0;
01407 
01408   // Although PTX has direct support for struct type and array type and LLVM IR
01409   // is very similar to PTX, the LLVM CodeGen does not support for targets that
01410   // support these high level field accesses. Structs and arrays are lowered
01411   // into arrays of bytes.
01412   switch (ETy->getTypeID()) {
01413   case Type::StructTyID:
01414   case Type::ArrayTyID:
01415   case Type::VectorTyID:
01416     ElementSize = TD->getTypeStoreSize(ETy);
01417     O << " .b8 " << *Mang->getSymbol(GVar) << "[";
01418     if (ElementSize) {
01419       O << itostr(ElementSize);
01420     }
01421     O << "]";
01422     break;
01423   default:
01424     assert(0 && "type not supported yet");
01425   }
01426   return;
01427 }
01428 
01429 static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
01430   if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
01431     return TD->getPrefTypeAlignment(Ty);
01432 
01433   const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
01434   if (ATy)
01435     return getOpenCLAlignment(TD, ATy->getElementType());
01436 
01437   const VectorType *VTy = dyn_cast<VectorType>(Ty);
01438   if (VTy) {
01439     Type *ETy = VTy->getElementType();
01440     unsigned int numE = VTy->getNumElements();
01441     unsigned int alignE = TD->getPrefTypeAlignment(ETy);
01442     if (numE == 3)
01443       return 4 * alignE;
01444     else
01445       return numE * alignE;
01446   }
01447 
01448   const StructType *STy = dyn_cast<StructType>(Ty);
01449   if (STy) {
01450     unsigned int alignStruct = 1;
01451     // Go through each element of the struct and find the
01452     // largest alignment.
01453     for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
01454       Type *ETy = STy->getElementType(i);
01455       unsigned int align = getOpenCLAlignment(TD, ETy);
01456       if (align > alignStruct)
01457         alignStruct = align;
01458     }
01459     return alignStruct;
01460   }
01461 
01462   const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
01463   if (FTy)
01464     return TD->getPointerPrefAlignment();
01465   return TD->getPrefTypeAlignment(Ty);
01466 }
01467 
01468 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
01469                                      int paramIndex, raw_ostream &O) {
01470   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
01471       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
01472     O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex;
01473   else {
01474     std::string argName = I->getName();
01475     const char *p = argName.c_str();
01476     while (*p) {
01477       if (*p == '.')
01478         O << "_";
01479       else
01480         O << *p;
01481       p++;
01482     }
01483   }
01484 }
01485 
01486 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
01487   Function::const_arg_iterator I, E;
01488   int i = 0;
01489 
01490   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
01491       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
01492     O << *CurrentFnSym << "_param_" << paramIndex;
01493     return;
01494   }
01495 
01496   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
01497     if (i == paramIndex) {
01498       printParamName(I, paramIndex, O);
01499       return;
01500     }
01501   }
01502   llvm_unreachable("paramIndex out of bound");
01503 }
01504 
01505 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
01506   const DataLayout *TD = TM.getDataLayout();
01507   const AttributeSet &PAL = F->getAttributes();
01508   const TargetLowering *TLI = TM.getTargetLowering();
01509   Function::const_arg_iterator I, E;
01510   unsigned paramIndex = 0;
01511   bool first = true;
01512   bool isKernelFunc = llvm::isKernelFunction(*F);
01513   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
01514   MVT thePointerTy = TLI->getPointerTy();
01515 
01516   O << "(\n";
01517 
01518   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
01519     Type *Ty = I->getType();
01520 
01521     if (!first)
01522       O << ",\n";
01523 
01524     first = false;
01525 
01526     // Handle image/sampler parameters
01527     if (llvm::isSampler(*I) || llvm::isImage(*I)) {
01528       if (llvm::isImage(*I)) {
01529         std::string sname = I->getName();
01530         if (llvm::isImageWriteOnly(*I))
01531           O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_"
01532             << paramIndex;
01533         else // Default image is read_only
01534           O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_"
01535             << paramIndex;
01536       } else // Should be llvm::isSampler(*I)
01537         O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_"
01538           << paramIndex;
01539       continue;
01540     }
01541 
01542     if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
01543       if (Ty->isVectorTy()) {
01544         // Just print .param .b8 .align <a> .param[size];
01545         // <a> = PAL.getparamalignment
01546         // size = typeallocsize of element type
01547         unsigned align = PAL.getParamAlignment(paramIndex + 1);
01548         if (align == 0)
01549           align = TD->getABITypeAlignment(Ty);
01550 
01551         unsigned sz = TD->getTypeAllocSize(Ty);
01552         O << "\t.param .align " << align << " .b8 ";
01553         printParamName(I, paramIndex, O);
01554         O << "[" << sz << "]";
01555 
01556         continue;
01557       }
01558       // Just a scalar
01559       const PointerType *PTy = dyn_cast<PointerType>(Ty);
01560       if (isKernelFunc) {
01561         if (PTy) {
01562           // Special handling for pointer arguments to kernel
01563           O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
01564 
01565           if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
01566             Type *ETy = PTy->getElementType();
01567             int addrSpace = PTy->getAddressSpace();
01568             switch (addrSpace) {
01569             default:
01570               O << ".ptr ";
01571               break;
01572             case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
01573               O << ".ptr .const ";
01574               break;
01575             case llvm::ADDRESS_SPACE_SHARED:
01576               O << ".ptr .shared ";
01577               break;
01578             case llvm::ADDRESS_SPACE_GLOBAL:
01579             case llvm::ADDRESS_SPACE_CONST:
01580               O << ".ptr .global ";
01581               break;
01582             }
01583             O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
01584           }
01585           printParamName(I, paramIndex, O);
01586           continue;
01587         }
01588 
01589         // non-pointer scalar to kernel func
01590         O << "\t.param .";
01591         // Special case: predicate operands become .u8 types
01592         if (Ty->isIntegerTy(1))
01593           O << "u8";
01594         else
01595           O << getPTXFundamentalTypeStr(Ty);
01596         O << " ";
01597         printParamName(I, paramIndex, O);
01598         continue;
01599       }
01600       // Non-kernel function, just print .param .b<size> for ABI
01601       // and .reg .b<size> for non ABY
01602       unsigned sz = 0;
01603       if (isa<IntegerType>(Ty)) {
01604         sz = cast<IntegerType>(Ty)->getBitWidth();
01605         if (sz < 32)
01606           sz = 32;
01607       } else if (isa<PointerType>(Ty))
01608         sz = thePointerTy.getSizeInBits();
01609       else
01610         sz = Ty->getPrimitiveSizeInBits();
01611       if (isABI)
01612         O << "\t.param .b" << sz << " ";
01613       else
01614         O << "\t.reg .b" << sz << " ";
01615       printParamName(I, paramIndex, O);
01616       continue;
01617     }
01618 
01619     // param has byVal attribute. So should be a pointer
01620     const PointerType *PTy = dyn_cast<PointerType>(Ty);
01621     assert(PTy && "Param with byval attribute should be a pointer type");
01622     Type *ETy = PTy->getElementType();
01623 
01624     if (isABI || isKernelFunc) {
01625       // Just print .param .b8 .align <a> .param[size];
01626       // <a> = PAL.getparamalignment
01627       // size = typeallocsize of element type
01628       unsigned align = PAL.getParamAlignment(paramIndex + 1);
01629       if (align == 0)
01630         align = TD->getABITypeAlignment(ETy);
01631 
01632       unsigned sz = TD->getTypeAllocSize(ETy);
01633       O << "\t.param .align " << align << " .b8 ";
01634       printParamName(I, paramIndex, O);
01635       O << "[" << sz << "]";
01636       continue;
01637     } else {
01638       // Split the ETy into constituent parts and
01639       // print .param .b<size> <name> for each part.
01640       // Further, if a part is vector, print the above for
01641       // each vector element.
01642       SmallVector<EVT, 16> vtparts;
01643       ComputeValueVTs(*TLI, ETy, vtparts);
01644       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
01645         unsigned elems = 1;
01646         EVT elemtype = vtparts[i];
01647         if (vtparts[i].isVector()) {
01648           elems = vtparts[i].getVectorNumElements();
01649           elemtype = vtparts[i].getVectorElementType();
01650         }
01651 
01652         for (unsigned j = 0, je = elems; j != je; ++j) {
01653           unsigned sz = elemtype.getSizeInBits();
01654           if (elemtype.isInteger() && (sz < 32))
01655             sz = 32;
01656           O << "\t.reg .b" << sz << " ";
01657           printParamName(I, paramIndex, O);
01658           if (j < je - 1)
01659             O << ",\n";
01660           ++paramIndex;
01661         }
01662         if (i < e - 1)
01663           O << ",\n";
01664       }
01665       --paramIndex;
01666       continue;
01667     }
01668   }
01669 
01670   O << "\n)\n";
01671 }
01672 
01673 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
01674                                             raw_ostream &O) {
01675   const Function *F = MF.getFunction();
01676   emitFunctionParamList(F, O);
01677 }
01678 
01679 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
01680     const MachineFunction &MF) {
01681   SmallString<128> Str;
01682   raw_svector_ostream O(Str);
01683 
01684   // Map the global virtual register number to a register class specific
01685   // virtual register number starting from 1 with that class.
01686   const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
01687   //unsigned numRegClasses = TRI->getNumRegClasses();
01688 
01689   // Emit the Fake Stack Object
01690   const MachineFrameInfo *MFI = MF.getFrameInfo();
01691   int NumBytes = (int) MFI->getStackSize();
01692   if (NumBytes) {
01693     O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
01694       << getFunctionNumber() << "[" << NumBytes << "];\n";
01695     if (nvptxSubtarget.is64Bit()) {
01696       O << "\t.reg .b64 \t%SP;\n";
01697       O << "\t.reg .b64 \t%SPL;\n";
01698     } else {
01699       O << "\t.reg .b32 \t%SP;\n";
01700       O << "\t.reg .b32 \t%SPL;\n";
01701     }
01702   }
01703 
01704   // Go through all virtual registers to establish the mapping between the
01705   // global virtual
01706   // register number and the per class virtual register number.
01707   // We use the per class virtual register number in the ptx output.
01708   unsigned int numVRs = MRI->getNumVirtRegs();
01709   for (unsigned i = 0; i < numVRs; i++) {
01710     unsigned int vr = TRI->index2VirtReg(i);
01711     const TargetRegisterClass *RC = MRI->getRegClass(vr);
01712     std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
01713     int n = regmap.size();
01714     regmap.insert(std::make_pair(vr, n + 1));
01715   }
01716 
01717   // Emit register declarations
01718   // @TODO: Extract out the real register usage
01719   O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
01720   O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
01721   O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
01722   O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
01723   O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
01724   O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
01725   O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
01726 
01727   // Emit declaration of the virtual registers or 'physical' registers for
01728   // each register class
01729   //for (unsigned i=0; i< numRegClasses; i++) {
01730   //    std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[i];
01731   //    const TargetRegisterClass *RC = TRI->getRegClass(i);
01732   //    std::string rcname = getNVPTXRegClassName(RC);
01733   //    std::string rcStr = getNVPTXRegClassStr(RC);
01734   //    //int n = regmap.size();
01735   //    if (!isNVPTXVectorRegClass(RC)) {
01736   //      O << "\t.reg " << rcname << " \t" << rcStr << "<"
01737   //        << NVPTXNumRegisters << ">;\n";
01738   //    }
01739 
01740   // Only declare those registers that may be used. And do not emit vector
01741   // registers as
01742   // they are all elementized to scalar registers.
01743   //if (n && !isNVPTXVectorRegClass(RC)) {
01744   //    if (RegAllocNilUsed) {
01745   //        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
01746   //          << ">;\n";
01747   //    }
01748   //    else {
01749   //        O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr)
01750   //          << "<" << 32 << ">;\n";
01751   //    }
01752   //}
01753   //}
01754 
01755   OutStreamer.EmitRawText(O.str());
01756 }
01757 
01758 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
01759   APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
01760   bool ignored;
01761   unsigned int numHex;
01762   const char *lead;
01763 
01764   if (Fp->getType()->getTypeID() == Type::FloatTyID) {
01765     numHex = 8;
01766     lead = "0f";
01767     APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
01768   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
01769     numHex = 16;
01770     lead = "0d";
01771     APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
01772   } else
01773     llvm_unreachable("unsupported fp type");
01774 
01775   APInt API = APF.bitcastToAPInt();
01776   std::string hexstr(utohexstr(API.getZExtValue()));
01777   O << lead;
01778   if (hexstr.length() < numHex)
01779     O << std::string(numHex - hexstr.length(), '0');
01780   O << utohexstr(API.getZExtValue());
01781 }
01782 
01783 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
01784   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
01785     O << CI->getValue();
01786     return;
01787   }
01788   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
01789     printFPConstant(CFP, O);
01790     return;
01791   }
01792   if (isa<ConstantPointerNull>(CPV)) {
01793     O << "0";
01794     return;
01795   }
01796   if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
01797     O << *Mang->getSymbol(GVar);
01798     return;
01799   }
01800   if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01801     const Value *v = Cexpr->stripPointerCasts();
01802     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
01803       O << *Mang->getSymbol(GVar);
01804       return;
01805     } else {
01806       O << *LowerConstant(CPV, *this);
01807       return;
01808     }
01809   }
01810   llvm_unreachable("Not scalar type found in printScalarConstant()");
01811 }
01812 
01813 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
01814                                    AggBuffer *aggBuffer) {
01815 
01816   const DataLayout *TD = TM.getDataLayout();
01817 
01818   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
01819     int s = TD->getTypeAllocSize(CPV->getType());
01820     if (s < Bytes)
01821       s = Bytes;
01822     aggBuffer->addZeros(s);
01823     return;
01824   }
01825 
01826   unsigned char *ptr;
01827   switch (CPV->getType()->getTypeID()) {
01828 
01829   case Type::IntegerTyID: {
01830     const Type *ETy = CPV->getType();
01831     if (ETy == Type::getInt8Ty(CPV->getContext())) {
01832       unsigned char c =
01833           (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
01834       ptr = &c;
01835       aggBuffer->addBytes(ptr, 1, Bytes);
01836     } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
01837       short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
01838       ptr = (unsigned char *)&int16;
01839       aggBuffer->addBytes(ptr, 2, Bytes);
01840     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
01841       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
01842         int int32 = (int)(constInt->getZExtValue());
01843         ptr = (unsigned char *)&int32;
01844         aggBuffer->addBytes(ptr, 4, Bytes);
01845         break;
01846       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01847         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
01848                 ConstantFoldConstantExpression(Cexpr, TD))) {
01849           int int32 = (int)(constInt->getZExtValue());
01850           ptr = (unsigned char *)&int32;
01851           aggBuffer->addBytes(ptr, 4, Bytes);
01852           break;
01853         }
01854         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
01855           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
01856           aggBuffer->addSymbol(v);
01857           aggBuffer->addZeros(4);
01858           break;
01859         }
01860       }
01861       llvm_unreachable("unsupported integer const type");
01862     } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
01863       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
01864         long long int64 = (long long)(constInt->getZExtValue());
01865         ptr = (unsigned char *)&int64;
01866         aggBuffer->addBytes(ptr, 8, Bytes);
01867         break;
01868       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01869         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
01870                 ConstantFoldConstantExpression(Cexpr, TD))) {
01871           long long int64 = (long long)(constInt->getZExtValue());
01872           ptr = (unsigned char *)&int64;
01873           aggBuffer->addBytes(ptr, 8, Bytes);
01874           break;
01875         }
01876         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
01877           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
01878           aggBuffer->addSymbol(v);
01879           aggBuffer->addZeros(8);
01880           break;
01881         }
01882       }
01883       llvm_unreachable("unsupported integer const type");
01884     } else
01885       llvm_unreachable("unsupported integer const type");
01886     break;
01887   }
01888   case Type::FloatTyID:
01889   case Type::DoubleTyID: {
01890     const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
01891     const Type *Ty = CFP->getType();
01892     if (Ty == Type::getFloatTy(CPV->getContext())) {
01893       float float32 = (float) CFP->getValueAPF().convertToFloat();
01894       ptr = (unsigned char *)&float32;
01895       aggBuffer->addBytes(ptr, 4, Bytes);
01896     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
01897       double float64 = CFP->getValueAPF().convertToDouble();
01898       ptr = (unsigned char *)&float64;
01899       aggBuffer->addBytes(ptr, 8, Bytes);
01900     } else {
01901       llvm_unreachable("unsupported fp const type");
01902     }
01903     break;
01904   }
01905   case Type::PointerTyID: {
01906     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
01907       aggBuffer->addSymbol(GVar);
01908     } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01909       const Value *v = Cexpr->stripPointerCasts();
01910       aggBuffer->addSymbol(v);
01911     }
01912     unsigned int s = TD->getTypeAllocSize(CPV->getType());
01913     aggBuffer->addZeros(s);
01914     break;
01915   }
01916 
01917   case Type::ArrayTyID:
01918   case Type::VectorTyID:
01919   case Type::StructTyID: {
01920     if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
01921         isa<ConstantStruct>(CPV)) {
01922       int ElementSize = TD->getTypeAllocSize(CPV->getType());
01923       bufferAggregateConstant(CPV, aggBuffer);
01924       if (Bytes > ElementSize)
01925         aggBuffer->addZeros(Bytes - ElementSize);
01926     } else if (isa<ConstantAggregateZero>(CPV))
01927       aggBuffer->addZeros(Bytes);
01928     else
01929       llvm_unreachable("Unexpected Constant type");
01930     break;
01931   }
01932 
01933   default:
01934     llvm_unreachable("unsupported type");
01935   }
01936 }
01937 
01938 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
01939                                               AggBuffer *aggBuffer) {
01940   const DataLayout *TD = TM.getDataLayout();
01941   int Bytes;
01942 
01943   // Old constants
01944   if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
01945     if (CPV->getNumOperands())
01946       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
01947         bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
01948     return;
01949   }
01950 
01951   if (const ConstantDataSequential *CDS =
01952           dyn_cast<ConstantDataSequential>(CPV)) {
01953     if (CDS->getNumElements())
01954       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
01955         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
01956                      aggBuffer);
01957     return;
01958   }
01959 
01960   if (isa<ConstantStruct>(CPV)) {
01961     if (CPV->getNumOperands()) {
01962       StructType *ST = cast<StructType>(CPV->getType());
01963       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
01964         if (i == (e - 1))
01965           Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
01966                   TD->getTypeAllocSize(ST) -
01967                   TD->getStructLayout(ST)->getElementOffset(i);
01968         else
01969           Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
01970                   TD->getStructLayout(ST)->getElementOffset(i);
01971         bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
01972       }
01973     }
01974     return;
01975   }
01976   llvm_unreachable("unsupported constant type in printAggregateConstant()");
01977 }
01978 
01979 // buildTypeNameMap - Run through symbol table looking for type names.
01980 //
01981 
01982 bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
01983 
01984   std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
01985 
01986   if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
01987                                   !PI->second.compare("struct._image2d_t") ||
01988                                   !PI->second.compare("struct._image3d_t")))
01989     return true;
01990 
01991   return false;
01992 }
01993 
01994 /// PrintAsmOperand - Print out an operand for an inline asm expression.
01995 ///
01996 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
01997                                       unsigned AsmVariant,
01998                                       const char *ExtraCode, raw_ostream &O) {
01999   if (ExtraCode && ExtraCode[0]) {
02000     if (ExtraCode[1] != 0)
02001       return true; // Unknown modifier.
02002 
02003     switch (ExtraCode[0]) {
02004     default:
02005       // See if this is a generic print operand
02006       return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
02007     case 'r':
02008       break;
02009     }
02010   }
02011 
02012   printOperand(MI, OpNo, O);
02013 
02014   return false;
02015 }
02016 
02017 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
02018     const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
02019     const char *ExtraCode, raw_ostream &O) {
02020   if (ExtraCode && ExtraCode[0])
02021     return true; // Unknown modifier
02022 
02023   O << '[';
02024   printMemOperand(MI, OpNo, O);
02025   O << ']';
02026 
02027   return false;
02028 }
02029 
02030 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
02031   switch (MI.getOpcode()) {
02032   default:
02033     return false;
02034   case NVPTX::CallArgBeginInst:
02035   case NVPTX::CallArgEndInst0:
02036   case NVPTX::CallArgEndInst1:
02037   case NVPTX::CallArgF32:
02038   case NVPTX::CallArgF64:
02039   case NVPTX::CallArgI16:
02040   case NVPTX::CallArgI32:
02041   case NVPTX::CallArgI32imm:
02042   case NVPTX::CallArgI64:
02043   case NVPTX::CallArgI8:
02044   case NVPTX::CallArgParam:
02045   case NVPTX::CallVoidInst:
02046   case NVPTX::CallVoidInstReg:
02047   case NVPTX::Callseq_End:
02048   case NVPTX::CallVoidInstReg64:
02049   case NVPTX::DeclareParamInst:
02050   case NVPTX::DeclareRetMemInst:
02051   case NVPTX::DeclareRetRegInst:
02052   case NVPTX::DeclareRetScalarInst:
02053   case NVPTX::DeclareScalarParamInst:
02054   case NVPTX::DeclareScalarRegInst:
02055   case NVPTX::StoreParamF32:
02056   case NVPTX::StoreParamF64:
02057   case NVPTX::StoreParamI16:
02058   case NVPTX::StoreParamI32:
02059   case NVPTX::StoreParamI64:
02060   case NVPTX::StoreParamI8:
02061   case NVPTX::StoreParamS32I8:
02062   case NVPTX::StoreParamU32I8:
02063   case NVPTX::StoreParamS32I16:
02064   case NVPTX::StoreParamU32I16:
02065   case NVPTX::StoreRetvalF32:
02066   case NVPTX::StoreRetvalF64:
02067   case NVPTX::StoreRetvalI16:
02068   case NVPTX::StoreRetvalI32:
02069   case NVPTX::StoreRetvalI64:
02070   case NVPTX::StoreRetvalI8:
02071   case NVPTX::LastCallArgF32:
02072   case NVPTX::LastCallArgF64:
02073   case NVPTX::LastCallArgI16:
02074   case NVPTX::LastCallArgI32:
02075   case NVPTX::LastCallArgI32imm:
02076   case NVPTX::LastCallArgI64:
02077   case NVPTX::LastCallArgI8:
02078   case NVPTX::LastCallArgParam:
02079   case NVPTX::LoadParamMemF32:
02080   case NVPTX::LoadParamMemF64:
02081   case NVPTX::LoadParamMemI16:
02082   case NVPTX::LoadParamMemI32:
02083   case NVPTX::LoadParamMemI64:
02084   case NVPTX::LoadParamMemI8:
02085   case NVPTX::LoadParamRegF32:
02086   case NVPTX::LoadParamRegF64:
02087   case NVPTX::LoadParamRegI16:
02088   case NVPTX::LoadParamRegI32:
02089   case NVPTX::LoadParamRegI64:
02090   case NVPTX::LoadParamRegI8:
02091   case NVPTX::PrototypeInst:
02092   case NVPTX::DBG_VALUE:
02093     return true;
02094   }
02095   return false;
02096 }
02097 
02098 // Force static initialization.
02099 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
02100   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
02101   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
02102 }
02103 
02104 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
02105   std::stringstream temp;
02106   LineReader *reader = this->getReader(filename.str());
02107   temp << "\n//";
02108   temp << filename.str();
02109   temp << ":";
02110   temp << line;
02111   temp << " ";
02112   temp << reader->readLine(line);
02113   temp << "\n";
02114   this->OutStreamer.EmitRawText(Twine(temp.str()));
02115 }
02116 
02117 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
02118   if (reader == NULL) {
02119     reader = new LineReader(filename);
02120   }
02121 
02122   if (reader->fileName() != filename) {
02123     delete reader;
02124     reader = new LineReader(filename);
02125   }
02126 
02127   return reader;
02128 }
02129 
02130 std::string LineReader::readLine(unsigned lineNum) {
02131   if (lineNum < theCurLine) {
02132     theCurLine = 0;
02133     fstr.seekg(0, std::ios::beg);
02134   }
02135   while (theCurLine < lineNum) {
02136     fstr.getline(buff, 500);
02137     theCurLine++;
02138   }
02139   return buff;
02140 }
02141 
02142 // Force static initialization.
02143 extern "C" void LLVMInitializeNVPTXAsmPrinter() {
02144   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
02145   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
02146 }