LLVM API Documentation

NVPTXAsmPrinter.cpp
Go to the documentation of this file.
00001 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains a printer that converts from our internal representation
00011 // of machine-dependent LLVM code to NVPTX assembly language.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "NVPTXAsmPrinter.h"
00016 #include "InstPrinter/NVPTXInstPrinter.h"
00017 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
00018 #include "NVPTX.h"
00019 #include "NVPTXInstrInfo.h"
00020 #include "NVPTXMachineFunctionInfo.h"
00021 #include "NVPTXMCExpr.h"
00022 #include "NVPTXRegisterInfo.h"
00023 #include "NVPTXTargetMachine.h"
00024 #include "NVPTXUtilities.h"
00025 #include "cl_common_defines.h"
00026 #include "llvm/ADT/StringExtras.h"
00027 #include "llvm/Analysis/ConstantFolding.h"
00028 #include "llvm/CodeGen/Analysis.h"
00029 #include "llvm/CodeGen/MachineFrameInfo.h"
00030 #include "llvm/CodeGen/MachineModuleInfo.h"
00031 #include "llvm/CodeGen/MachineRegisterInfo.h"
00032 #include "llvm/IR/DebugInfo.h"
00033 #include "llvm/IR/DerivedTypes.h"
00034 #include "llvm/IR/Function.h"
00035 #include "llvm/IR/GlobalVariable.h"
00036 #include "llvm/IR/Mangler.h"
00037 #include "llvm/IR/Module.h"
00038 #include "llvm/IR/Operator.h"
00039 #include "llvm/MC/MCStreamer.h"
00040 #include "llvm/MC/MCSymbol.h"
00041 #include "llvm/Support/CommandLine.h"
00042 #include "llvm/Support/ErrorHandling.h"
00043 #include "llvm/Support/FormattedStream.h"
00044 #include "llvm/Support/Path.h"
00045 #include "llvm/Support/TargetRegistry.h"
00046 #include "llvm/Support/TimeValue.h"
00047 #include "llvm/Target/TargetLoweringObjectFile.h"
00048 #include <sstream>
00049 using namespace llvm;
00050 
00051 #define DEPOTNAME "__local_depot"
00052 
00053 static cl::opt<bool>
00054 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
00055                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
00056                 cl::init(true));
00057 
00058 static cl::opt<bool>
00059 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
00060               cl::desc("NVPTX Specific: Emit source line in ptx file"),
00061               cl::init(false));
00062 
00063 namespace {
00064 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
00065 /// depends.
00066 void DiscoverDependentGlobals(const Value *V,
00067                               DenseSet<const GlobalVariable *> &Globals) {
00068   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
00069     Globals.insert(GV);
00070   else {
00071     if (const User *U = dyn_cast<User>(V)) {
00072       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
00073         DiscoverDependentGlobals(U->getOperand(i), Globals);
00074       }
00075     }
00076   }
00077 }
00078 
00079 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
00080 /// instances to be emitted, but only after any dependents have been added
00081 /// first.
00082 void VisitGlobalVariableForEmission(
00083     const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
00084     DenseSet<const GlobalVariable *> &Visited,
00085     DenseSet<const GlobalVariable *> &Visiting) {
00086   // Have we already visited this one?
00087   if (Visited.count(GV))
00088     return;
00089 
00090   // Do we have a circular dependency?
00091   if (Visiting.count(GV))
00092     report_fatal_error("Circular dependency found in global variable set");
00093 
00094   // Start visiting this global
00095   Visiting.insert(GV);
00096 
00097   // Make sure we visit all dependents first
00098   DenseSet<const GlobalVariable *> Others;
00099   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
00100     DiscoverDependentGlobals(GV->getOperand(i), Others);
00101 
00102   for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
00103                                                   E = Others.end();
00104        I != E; ++I)
00105     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
00106 
00107   // Now we can visit ourself
00108   Order.push_back(GV);
00109   Visited.insert(GV);
00110   Visiting.erase(GV);
00111 }
00112 }
00113 
00114 // @TODO: This is a copy from AsmPrinter.cpp.  The function is static, so we
00115 // cannot just link to the existing version.
00116 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
00117 ///
00118 using namespace nvptx;
00119 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
00120   MCContext &Ctx = AP.OutContext;
00121 
00122   if (CV->isNullValue() || isa<UndefValue>(CV))
00123     return MCConstantExpr::Create(0, Ctx);
00124 
00125   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
00126     return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
00127 
00128   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
00129     return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
00130 
00131   if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
00132     return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
00133 
00134   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
00135   if (!CE)
00136     llvm_unreachable("Unknown constant value to lower!");
00137 
00138   switch (CE->getOpcode()) {
00139   default:
00140     // If the code isn't optimized, there may be outstanding folding
00141     // opportunities. Attempt to fold the expression using DataLayout as a
00142     // last resort before giving up.
00143     if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
00144       if (C != CE)
00145         return LowerConstant(C, AP);
00146 
00147     // Otherwise report the problem to the user.
00148     {
00149       std::string S;
00150       raw_string_ostream OS(S);
00151       OS << "Unsupported expression in static initializer: ";
00152       CE->printAsOperand(OS, /*PrintType=*/ false,
00153                          !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
00154       report_fatal_error(OS.str());
00155     }
00156   case Instruction::AddrSpaceCast: {
00157     // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be
00158     // handled by the generic() logic in the MCExpr printer
00159     PointerType *DstTy            = cast<PointerType>(CE->getType());
00160     PointerType *SrcTy            = cast<PointerType>(CE->getOperand(0)->getType());
00161     if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) {
00162       return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP);
00163     }
00164     std::string S;
00165     raw_string_ostream OS(S);
00166     OS << "Unsupported expression in static initializer: ";
00167     CE->printAsOperand(OS, /*PrintType=*/ false,
00168                        !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
00169     report_fatal_error(OS.str());
00170   }
00171   case Instruction::GetElementPtr: {
00172     const DataLayout &TD = *AP.TM.getDataLayout();
00173     // Generate a symbolic expression for the byte address
00174     APInt OffsetAI(TD.getPointerSizeInBits(), 0);
00175     cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
00176 
00177     const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
00178     if (!OffsetAI)
00179       return Base;
00180 
00181     int64_t Offset = OffsetAI.getSExtValue();
00182     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
00183                                    Ctx);
00184   }
00185 
00186   case Instruction::Trunc:
00187     // We emit the value and depend on the assembler to truncate the generated
00188     // expression properly.  This is important for differences between
00189     // blockaddress labels.  Since the two labels are in the same function, it
00190     // is reasonable to treat their delta as a 32-bit value.
00191   // FALL THROUGH.
00192   case Instruction::BitCast:
00193     return LowerConstant(CE->getOperand(0), AP);
00194 
00195   case Instruction::IntToPtr: {
00196     const DataLayout &TD = *AP.TM.getDataLayout();
00197     // Handle casts to pointers by changing them into casts to the appropriate
00198     // integer type.  This promotes constant folding and simplifies this code.
00199     Constant *Op = CE->getOperand(0);
00200     Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
00201                                       false /*ZExt*/);
00202     return LowerConstant(Op, AP);
00203   }
00204 
00205   case Instruction::PtrToInt: {
00206     const DataLayout &TD = *AP.TM.getDataLayout();
00207     // Support only foldable casts to/from pointers that can be eliminated by
00208     // changing the pointer to the appropriately sized integer type.
00209     Constant *Op = CE->getOperand(0);
00210     Type *Ty = CE->getType();
00211 
00212     const MCExpr *OpExpr = LowerConstant(Op, AP);
00213 
00214     // We can emit the pointer value into this slot if the slot is an
00215     // integer slot equal to the size of the pointer.
00216     if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
00217       return OpExpr;
00218 
00219     // Otherwise the pointer is smaller than the resultant integer, mask off
00220     // the high bits so we are sure to get a proper truncation if the input is
00221     // a constant expr.
00222     unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
00223     const MCExpr *MaskExpr =
00224         MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
00225     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
00226   }
00227 
00228     // The MC library also has a right-shift operator, but it isn't consistently
00229   // signed or unsigned between different targets.
00230   case Instruction::Add:
00231   case Instruction::Sub:
00232   case Instruction::Mul:
00233   case Instruction::SDiv:
00234   case Instruction::SRem:
00235   case Instruction::Shl:
00236   case Instruction::And:
00237   case Instruction::Or:
00238   case Instruction::Xor: {
00239     const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
00240     const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
00241     switch (CE->getOpcode()) {
00242     default:
00243       llvm_unreachable("Unknown binary operator constant cast expr");
00244     case Instruction::Add:
00245       return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
00246     case Instruction::Sub:
00247       return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
00248     case Instruction::Mul:
00249       return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
00250     case Instruction::SDiv:
00251       return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
00252     case Instruction::SRem:
00253       return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
00254     case Instruction::Shl:
00255       return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
00256     case Instruction::And:
00257       return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
00258     case Instruction::Or:
00259       return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
00260     case Instruction::Xor:
00261       return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
00262     }
00263   }
00264   }
00265 }
00266 
00267 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
00268   if (!EmitLineNumbers)
00269     return;
00270   if (ignoreLoc(MI))
00271     return;
00272 
00273   DebugLoc curLoc = MI.getDebugLoc();
00274 
00275   if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
00276     return;
00277 
00278   if (prevDebugLoc == curLoc)
00279     return;
00280 
00281   prevDebugLoc = curLoc;
00282 
00283   if (curLoc.isUnknown())
00284     return;
00285 
00286   const MachineFunction *MF = MI.getParent()->getParent();
00287   //const TargetMachine &TM = MF->getTarget();
00288 
00289   const LLVMContext &ctx = MF->getFunction()->getContext();
00290   DIScope Scope(curLoc.getScope(ctx));
00291 
00292   assert((!Scope || Scope.isScope()) &&
00293     "Scope of a DebugLoc should be null or a DIScope.");
00294   if (!Scope)
00295      return;
00296 
00297   StringRef fileName(Scope.getFilename());
00298   StringRef dirName(Scope.getDirectory());
00299   SmallString<128> FullPathName = dirName;
00300   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
00301     sys::path::append(FullPathName, fileName);
00302     fileName = FullPathName.str();
00303   }
00304 
00305   if (filenameMap.find(fileName.str()) == filenameMap.end())
00306     return;
00307 
00308   // Emit the line from the source file.
00309   if (InterleaveSrc)
00310     this->emitSrcInText(fileName.str(), curLoc.getLine());
00311 
00312   std::stringstream temp;
00313   temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
00314        << " " << curLoc.getCol();
00315   OutStreamer.EmitRawText(Twine(temp.str().c_str()));
00316 }
00317 
00318 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
00319   SmallString<128> Str;
00320   raw_svector_ostream OS(Str);
00321   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
00322     emitLineNumberAsDotLoc(*MI);
00323 
00324   MCInst Inst;
00325   lowerToMCInst(MI, Inst);
00326   EmitToStreamer(OutStreamer, Inst);
00327 }
00328 
00329 // Handle symbol backtracking for targets that do not support image handles
00330 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
00331                                            unsigned OpNo, MCOperand &MCOp) {
00332   const MachineOperand &MO = MI->getOperand(OpNo);
00333   const MCInstrDesc &MCID = MI->getDesc();
00334 
00335   if (MCID.TSFlags & NVPTXII::IsTexFlag) {
00336     // This is a texture fetch, so operand 4 is a texref and operand 5 is
00337     // a samplerref
00338     if (OpNo == 4 && MO.isImm()) {
00339       lowerImageHandleSymbol(MO.getImm(), MCOp);
00340       return true;
00341     }
00342     if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
00343       lowerImageHandleSymbol(MO.getImm(), MCOp);
00344       return true;
00345     }
00346 
00347     return false;
00348   } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
00349     unsigned VecSize =
00350       1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
00351 
00352     // For a surface load of vector size N, the Nth operand will be the surfref
00353     if (OpNo == VecSize && MO.isImm()) {
00354       lowerImageHandleSymbol(MO.getImm(), MCOp);
00355       return true;
00356     }
00357 
00358     return false;
00359   } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
00360     // This is a surface store, so operand 0 is a surfref
00361     if (OpNo == 0 && MO.isImm()) {
00362       lowerImageHandleSymbol(MO.getImm(), MCOp);
00363       return true;
00364     }
00365 
00366     return false;
00367   } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
00368     // This is a query, so operand 1 is a surfref/texref
00369     if (OpNo == 1 && MO.isImm()) {
00370       lowerImageHandleSymbol(MO.getImm(), MCOp);
00371       return true;
00372     }
00373 
00374     return false;
00375   }
00376 
00377   return false;
00378 }
00379 
00380 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
00381   // Ewwww
00382   TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
00383   NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
00384   const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
00385   const char *Sym = MFI->getImageHandleSymbol(Index);
00386   std::string *SymNamePtr =
00387     nvTM.getManagedStrPool()->getManagedString(Sym);
00388   MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
00389     StringRef(SymNamePtr->c_str())));
00390 }
00391 
00392 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
00393   OutMI.setOpcode(MI->getOpcode());
00394   const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
00395 
00396   // Special: Do not mangle symbol operand of CALL_PROTOTYPE
00397   if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
00398     const MachineOperand &MO = MI->getOperand(0);
00399     OutMI.addOperand(GetSymbolRef(
00400       OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
00401     return;
00402   }
00403 
00404   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
00405     const MachineOperand &MO = MI->getOperand(i);
00406 
00407     MCOperand MCOp;
00408     if (!ST.hasImageHandles()) {
00409       if (lowerImageHandleOperand(MI, i, MCOp)) {
00410         OutMI.addOperand(MCOp);
00411         continue;
00412       }
00413     }
00414 
00415     if (lowerOperand(MO, MCOp))
00416       OutMI.addOperand(MCOp);
00417   }
00418 }
00419 
00420 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
00421                                    MCOperand &MCOp) {
00422   switch (MO.getType()) {
00423   default: llvm_unreachable("unknown operand type");
00424   case MachineOperand::MO_Register:
00425     MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg()));
00426     break;
00427   case MachineOperand::MO_Immediate:
00428     MCOp = MCOperand::CreateImm(MO.getImm());
00429     break;
00430   case MachineOperand::MO_MachineBasicBlock:
00431     MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
00432         MO.getMBB()->getSymbol(), OutContext));
00433     break;
00434   case MachineOperand::MO_ExternalSymbol:
00435     MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
00436     break;
00437   case MachineOperand::MO_GlobalAddress:
00438     MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
00439     break;
00440   case MachineOperand::MO_FPImmediate: {
00441     const ConstantFP *Cnt = MO.getFPImm();
00442     APFloat Val = Cnt->getValueAPF();
00443 
00444     switch (Cnt->getType()->getTypeID()) {
00445     default: report_fatal_error("Unsupported FP type"); break;
00446     case Type::FloatTyID:
00447       MCOp = MCOperand::CreateExpr(
00448         NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
00449       break;
00450     case Type::DoubleTyID:
00451       MCOp = MCOperand::CreateExpr(
00452         NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
00453       break;
00454     }
00455     break;
00456   }
00457   }
00458   return true;
00459 }
00460 
00461 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
00462   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
00463     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
00464 
00465     DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
00466     unsigned RegNum = RegMap[Reg];
00467 
00468     // Encode the register class in the upper 4 bits
00469     // Must be kept in sync with NVPTXInstPrinter::printRegName
00470     unsigned Ret = 0;
00471     if (RC == &NVPTX::Int1RegsRegClass) {
00472       Ret = (1 << 28);
00473     } else if (RC == &NVPTX::Int16RegsRegClass) {
00474       Ret = (2 << 28);
00475     } else if (RC == &NVPTX::Int32RegsRegClass) {
00476       Ret = (3 << 28);
00477     } else if (RC == &NVPTX::Int64RegsRegClass) {
00478       Ret = (4 << 28);
00479     } else if (RC == &NVPTX::Float32RegsRegClass) {
00480       Ret = (5 << 28);
00481     } else if (RC == &NVPTX::Float64RegsRegClass) {
00482       Ret = (6 << 28);
00483     } else {
00484       report_fatal_error("Bad register class");
00485     }
00486 
00487     // Insert the vreg number
00488     Ret |= (RegNum & 0x0FFFFFFF);
00489     return Ret;
00490   } else {
00491     // Some special-use registers are actually physical registers.
00492     // Encode this as the register class ID of 0 and the real register ID.
00493     return Reg & 0x0FFFFFFF;
00494   }
00495 }
00496 
00497 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
00498   const MCExpr *Expr;
00499   Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
00500                                  OutContext);
00501   return MCOperand::CreateExpr(Expr);
00502 }
00503 
00504 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
00505   const DataLayout *TD = TM.getDataLayout();
00506   const TargetLowering *TLI = TM.getTargetLowering();
00507 
00508   Type *Ty = F->getReturnType();
00509 
00510   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
00511 
00512   if (Ty->getTypeID() == Type::VoidTyID)
00513     return;
00514 
00515   O << " (";
00516 
00517   if (isABI) {
00518     if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
00519       unsigned size = 0;
00520       if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
00521         size = ITy->getBitWidth();
00522         if (size < 32)
00523           size = 32;
00524       } else {
00525         assert(Ty->isFloatingPointTy() && "Floating point type expected here");
00526         size = Ty->getPrimitiveSizeInBits();
00527       }
00528 
00529       O << ".param .b" << size << " func_retval0";
00530     } else if (isa<PointerType>(Ty)) {
00531       O << ".param .b" << TLI->getPointerTy().getSizeInBits()
00532         << " func_retval0";
00533     } else {
00534       if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
00535         unsigned totalsz = TD->getTypeAllocSize(Ty);
00536         unsigned retAlignment = 0;
00537         if (!llvm::getAlign(*F, 0, retAlignment))
00538           retAlignment = TD->getABITypeAlignment(Ty);
00539         O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
00540           << "]";
00541       } else
00542         assert(false && "Unknown return type");
00543     }
00544   } else {
00545     SmallVector<EVT, 16> vtparts;
00546     ComputeValueVTs(*TLI, Ty, vtparts);
00547     unsigned idx = 0;
00548     for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
00549       unsigned elems = 1;
00550       EVT elemtype = vtparts[i];
00551       if (vtparts[i].isVector()) {
00552         elems = vtparts[i].getVectorNumElements();
00553         elemtype = vtparts[i].getVectorElementType();
00554       }
00555 
00556       for (unsigned j = 0, je = elems; j != je; ++j) {
00557         unsigned sz = elemtype.getSizeInBits();
00558         if (elemtype.isInteger() && (sz < 32))
00559           sz = 32;
00560         O << ".reg .b" << sz << " func_retval" << idx;
00561         if (j < je - 1)
00562           O << ", ";
00563         ++idx;
00564       }
00565       if (i < e - 1)
00566         O << ", ";
00567     }
00568   }
00569   O << ") ";
00570   return;
00571 }
00572 
00573 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
00574                                         raw_ostream &O) {
00575   const Function *F = MF.getFunction();
00576   printReturnValStr(F, O);
00577 }
00578 
00579 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
00580   SmallString<128> Str;
00581   raw_svector_ostream O(Str);
00582 
00583   if (!GlobalsEmitted) {
00584     emitGlobals(*MF->getFunction()->getParent());
00585     GlobalsEmitted = true;
00586   }
00587   
00588   // Set up
00589   MRI = &MF->getRegInfo();
00590   F = MF->getFunction();
00591   emitLinkageDirective(F, O);
00592   if (llvm::isKernelFunction(*F))
00593     O << ".entry ";
00594   else {
00595     O << ".func ";
00596     printReturnValStr(*MF, O);
00597   }
00598 
00599   O << *CurrentFnSym;
00600 
00601   emitFunctionParamList(*MF, O);
00602 
00603   if (llvm::isKernelFunction(*F))
00604     emitKernelFunctionDirectives(*F, O);
00605 
00606   OutStreamer.EmitRawText(O.str());
00607 
00608   prevDebugLoc = DebugLoc();
00609 }
00610 
00611 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
00612   VRegMapping.clear();
00613   OutStreamer.EmitRawText(StringRef("{\n"));
00614   setAndEmitFunctionVirtualRegisters(*MF);
00615 
00616   SmallString<128> Str;
00617   raw_svector_ostream O(Str);
00618   emitDemotedVars(MF->getFunction(), O);
00619   OutStreamer.EmitRawText(O.str());
00620 }
00621 
00622 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
00623   OutStreamer.EmitRawText(StringRef("}\n"));
00624   VRegMapping.clear();
00625 }
00626 
00627 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
00628   unsigned RegNo = MI->getOperand(0).getReg();
00629   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
00630   if (TRI->isVirtualRegister(RegNo)) {
00631     OutStreamer.AddComment(Twine("implicit-def: ") +
00632                            getVirtualRegisterName(RegNo));
00633   } else {
00634     OutStreamer.AddComment(Twine("implicit-def: ") +
00635                            TM.getRegisterInfo()->getName(RegNo));
00636   }
00637   OutStreamer.AddBlankLine();
00638 }
00639 
00640 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
00641                                                    raw_ostream &O) const {
00642   // If the NVVM IR has some of reqntid* specified, then output
00643   // the reqntid directive, and set the unspecified ones to 1.
00644   // If none of reqntid* is specified, don't output reqntid directive.
00645   unsigned reqntidx, reqntidy, reqntidz;
00646   bool specified = false;
00647   if (llvm::getReqNTIDx(F, reqntidx) == false)
00648     reqntidx = 1;
00649   else
00650     specified = true;
00651   if (llvm::getReqNTIDy(F, reqntidy) == false)
00652     reqntidy = 1;
00653   else
00654     specified = true;
00655   if (llvm::getReqNTIDz(F, reqntidz) == false)
00656     reqntidz = 1;
00657   else
00658     specified = true;
00659 
00660   if (specified)
00661     O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
00662       << "\n";
00663 
00664   // If the NVVM IR has some of maxntid* specified, then output
00665   // the maxntid directive, and set the unspecified ones to 1.
00666   // If none of maxntid* is specified, don't output maxntid directive.
00667   unsigned maxntidx, maxntidy, maxntidz;
00668   specified = false;
00669   if (llvm::getMaxNTIDx(F, maxntidx) == false)
00670     maxntidx = 1;
00671   else
00672     specified = true;
00673   if (llvm::getMaxNTIDy(F, maxntidy) == false)
00674     maxntidy = 1;
00675   else
00676     specified = true;
00677   if (llvm::getMaxNTIDz(F, maxntidz) == false)
00678     maxntidz = 1;
00679   else
00680     specified = true;
00681 
00682   if (specified)
00683     O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
00684       << "\n";
00685 
00686   unsigned mincta;
00687   if (llvm::getMinCTASm(F, mincta))
00688     O << ".minnctapersm " << mincta << "\n";
00689 }
00690 
00691 std::string
00692 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
00693   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
00694 
00695   std::string Name;
00696   raw_string_ostream NameStr(Name);
00697 
00698   VRegRCMap::const_iterator I = VRegMapping.find(RC);
00699   assert(I != VRegMapping.end() && "Bad register class");
00700   const DenseMap<unsigned, unsigned> &RegMap = I->second;
00701 
00702   VRegMap::const_iterator VI = RegMap.find(Reg);
00703   assert(VI != RegMap.end() && "Bad virtual register");
00704   unsigned MappedVR = VI->second;
00705 
00706   NameStr << getNVPTXRegClassStr(RC) << MappedVR;
00707 
00708   NameStr.flush();
00709   return Name;
00710 }
00711 
00712 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
00713                                           raw_ostream &O) {
00714   O << getVirtualRegisterName(vr);
00715 }
00716 
00717 void NVPTXAsmPrinter::printVecModifiedImmediate(
00718     const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
00719   static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
00720   int Imm = (int) MO.getImm();
00721   if (0 == strcmp(Modifier, "vecelem"))
00722     O << "_" << vecelem[Imm];
00723   else if (0 == strcmp(Modifier, "vecv4comm1")) {
00724     if ((Imm < 0) || (Imm > 3))
00725       O << "//";
00726   } else if (0 == strcmp(Modifier, "vecv4comm2")) {
00727     if ((Imm < 4) || (Imm > 7))
00728       O << "//";
00729   } else if (0 == strcmp(Modifier, "vecv4pos")) {
00730     if (Imm < 0)
00731       Imm = 0;
00732     O << "_" << vecelem[Imm % 4];
00733   } else if (0 == strcmp(Modifier, "vecv2comm1")) {
00734     if ((Imm < 0) || (Imm > 1))
00735       O << "//";
00736   } else if (0 == strcmp(Modifier, "vecv2comm2")) {
00737     if ((Imm < 2) || (Imm > 3))
00738       O << "//";
00739   } else if (0 == strcmp(Modifier, "vecv2pos")) {
00740     if (Imm < 0)
00741       Imm = 0;
00742     O << "_" << vecelem[Imm % 2];
00743   } else
00744     llvm_unreachable("Unknown Modifier on immediate operand");
00745 }
00746 
00747 
00748 
00749 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
00750 
00751   emitLinkageDirective(F, O);
00752   if (llvm::isKernelFunction(*F))
00753     O << ".entry ";
00754   else
00755     O << ".func ";
00756   printReturnValStr(F, O);
00757   O << *getSymbol(F) << "\n";
00758   emitFunctionParamList(F, O);
00759   O << ";\n";
00760 }
00761 
00762 static bool usedInGlobalVarDef(const Constant *C) {
00763   if (!C)
00764     return false;
00765 
00766   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
00767     if (GV->getName().str() == "llvm.used")
00768       return false;
00769     return true;
00770   }
00771 
00772   for (const User *U : C->users())
00773     if (const Constant *C = dyn_cast<Constant>(U))
00774       if (usedInGlobalVarDef(C))
00775         return true;
00776 
00777   return false;
00778 }
00779 
00780 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
00781   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
00782     if (othergv->getName().str() == "llvm.used")
00783       return true;
00784   }
00785 
00786   if (const Instruction *instr = dyn_cast<Instruction>(U)) {
00787     if (instr->getParent() && instr->getParent()->getParent()) {
00788       const Function *curFunc = instr->getParent()->getParent();
00789       if (oneFunc && (curFunc != oneFunc))
00790         return false;
00791       oneFunc = curFunc;
00792       return true;
00793     } else
00794       return false;
00795   }
00796 
00797   if (const MDNode *md = dyn_cast<MDNode>(U))
00798     if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
00799                           (md->getName().str() == "llvm.dbg.sp")))
00800       return true;
00801 
00802   for (const User *UU : U->users())
00803     if (usedInOneFunc(UU, oneFunc) == false)
00804       return false;
00805 
00806   return true;
00807 }
00808 
00809 /* Find out if a global variable can be demoted to local scope.
00810  * Currently, this is valid for CUDA shared variables, which have local
00811  * scope and global lifetime. So the conditions to check are :
00812  * 1. Is the global variable in shared address space?
00813  * 2. Does it have internal linkage?
00814  * 3. Is the global variable referenced only in one function?
00815  */
00816 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
00817   if (gv->hasInternalLinkage() == false)
00818     return false;
00819   const PointerType *Pty = gv->getType();
00820   if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
00821     return false;
00822 
00823   const Function *oneFunc = nullptr;
00824 
00825   bool flag = usedInOneFunc(gv, oneFunc);
00826   if (flag == false)
00827     return false;
00828   if (!oneFunc)
00829     return false;
00830   f = oneFunc;
00831   return true;
00832 }
00833 
00834 static bool useFuncSeen(const Constant *C,
00835                         llvm::DenseMap<const Function *, bool> &seenMap) {
00836   for (const User *U : C->users()) {
00837     if (const Constant *cu = dyn_cast<Constant>(U)) {
00838       if (useFuncSeen(cu, seenMap))
00839         return true;
00840     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
00841       const BasicBlock *bb = I->getParent();
00842       if (!bb)
00843         continue;
00844       const Function *caller = bb->getParent();
00845       if (!caller)
00846         continue;
00847       if (seenMap.find(caller) != seenMap.end())
00848         return true;
00849     }
00850   }
00851   return false;
00852 }
00853 
00854 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
00855   llvm::DenseMap<const Function *, bool> seenMap;
00856   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
00857     const Function *F = FI;
00858 
00859     if (F->isDeclaration()) {
00860       if (F->use_empty())
00861         continue;
00862       if (F->getIntrinsicID())
00863         continue;
00864       emitDeclaration(F, O);
00865       continue;
00866     }
00867     for (const User *U : F->users()) {
00868       if (const Constant *C = dyn_cast<Constant>(U)) {
00869         if (usedInGlobalVarDef(C)) {
00870           // The use is in the initialization of a global variable
00871           // that is a function pointer, so print a declaration
00872           // for the original function
00873           emitDeclaration(F, O);
00874           break;
00875         }
00876         // Emit a declaration of this function if the function that
00877         // uses this constant expr has already been seen.
00878         if (useFuncSeen(C, seenMap)) {
00879           emitDeclaration(F, O);
00880           break;
00881         }
00882       }
00883 
00884       if (!isa<Instruction>(U))
00885         continue;
00886       const Instruction *instr = cast<Instruction>(U);
00887       const BasicBlock *bb = instr->getParent();
00888       if (!bb)
00889         continue;
00890       const Function *caller = bb->getParent();
00891       if (!caller)
00892         continue;
00893 
00894       // If a caller has already been seen, then the caller is
00895       // appearing in the module before the callee. so print out
00896       // a declaration for the callee.
00897       if (seenMap.find(caller) != seenMap.end()) {
00898         emitDeclaration(F, O);
00899         break;
00900       }
00901     }
00902     seenMap[F] = true;
00903   }
00904 }
00905 
00906 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
00907   DebugInfoFinder DbgFinder;
00908   DbgFinder.processModule(M);
00909 
00910   unsigned i = 1;
00911   for (DICompileUnit DIUnit : DbgFinder.compile_units()) {
00912     StringRef Filename(DIUnit.getFilename());
00913     StringRef Dirname(DIUnit.getDirectory());
00914     SmallString<128> FullPathName = Dirname;
00915     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
00916       sys::path::append(FullPathName, Filename);
00917       Filename = FullPathName.str();
00918     }
00919     if (filenameMap.find(Filename.str()) != filenameMap.end())
00920       continue;
00921     filenameMap[Filename.str()] = i;
00922     OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
00923     ++i;
00924   }
00925 
00926   for (DISubprogram SP : DbgFinder.subprograms()) {
00927     StringRef Filename(SP.getFilename());
00928     StringRef Dirname(SP.getDirectory());
00929     SmallString<128> FullPathName = Dirname;
00930     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
00931       sys::path::append(FullPathName, Filename);
00932       Filename = FullPathName.str();
00933     }
00934     if (filenameMap.find(Filename.str()) != filenameMap.end())
00935       continue;
00936     filenameMap[Filename.str()] = i;
00937     ++i;
00938   }
00939 }
00940 
00941 bool NVPTXAsmPrinter::doInitialization(Module &M) {
00942 
00943   SmallString<128> Str1;
00944   raw_svector_ostream OS1(Str1);
00945 
00946   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
00947   MMI->AnalyzeModule(M);
00948 
00949   // We need to call the parent's one explicitly.
00950   //bool Result = AsmPrinter::doInitialization(M);
00951 
00952   // Initialize TargetLoweringObjectFile.
00953   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
00954       .Initialize(OutContext, TM);
00955 
00956   Mang = new Mangler(TM.getDataLayout());
00957 
00958   // Emit header before any dwarf directives are emitted below.
00959   emitHeader(M, OS1);
00960   OutStreamer.EmitRawText(OS1.str());
00961 
00962   // Already commented out
00963   //bool Result = AsmPrinter::doInitialization(M);
00964 
00965   // Emit module-level inline asm if it exists.
00966   if (!M.getModuleInlineAsm().empty()) {
00967     OutStreamer.AddComment("Start of file scope inline assembly");
00968     OutStreamer.AddBlankLine();
00969     OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm()));
00970     OutStreamer.AddBlankLine();
00971     OutStreamer.AddComment("End of file scope inline assembly");
00972     OutStreamer.AddBlankLine();
00973   }
00974 
00975   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
00976     recordAndEmitFilenames(M);
00977 
00978   GlobalsEmitted = false;
00979     
00980   return false; // success
00981 }
00982 
00983 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
00984   SmallString<128> Str2;
00985   raw_svector_ostream OS2(Str2);
00986 
00987   emitDeclarations(M, OS2);
00988 
00989   // As ptxas does not support forward references of globals, we need to first
00990   // sort the list of module-level globals in def-use order. We visit each
00991   // global variable in order, and ensure that we emit it *after* its dependent
00992   // globals. We use a little extra memory maintaining both a set and a list to
00993   // have fast searches while maintaining a strict ordering.
00994   SmallVector<const GlobalVariable *, 8> Globals;
00995   DenseSet<const GlobalVariable *> GVVisited;
00996   DenseSet<const GlobalVariable *> GVVisiting;
00997 
00998   // Visit each global variable, in order
00999   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
01000        I != E; ++I)
01001     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
01002 
01003   assert(GVVisited.size() == M.getGlobalList().size() &&
01004          "Missed a global variable");
01005   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
01006 
01007   // Print out module-level global variables in proper order
01008   for (unsigned i = 0, e = Globals.size(); i != e; ++i)
01009     printModuleLevelGV(Globals[i], OS2);
01010 
01011   OS2 << '\n';
01012 
01013   OutStreamer.EmitRawText(OS2.str());
01014 }
01015 
01016 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
01017   O << "//\n";
01018   O << "// Generated by LLVM NVPTX Back-End\n";
01019   O << "//\n";
01020   O << "\n";
01021 
01022   unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
01023   O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
01024 
01025   O << ".target ";
01026   O << nvptxSubtarget.getTargetName();
01027 
01028   if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
01029     O << ", texmode_independent";
01030   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
01031     if (!nvptxSubtarget.hasDouble())
01032       O << ", map_f64_to_f32";
01033   }
01034 
01035   if (MAI->doesSupportDebugInformation())
01036     O << ", debug";
01037 
01038   O << "\n";
01039 
01040   O << ".address_size ";
01041   if (nvptxSubtarget.is64Bit())
01042     O << "64";
01043   else
01044     O << "32";
01045   O << "\n";
01046 
01047   O << "\n";
01048 }
01049 
01050 bool NVPTXAsmPrinter::doFinalization(Module &M) {
01051 
01052   // If we did not emit any functions, then the global declarations have not
01053   // yet been emitted.
01054   if (!GlobalsEmitted) {
01055     emitGlobals(M);
01056     GlobalsEmitted = true;
01057   }
01058 
01059   // XXX Temproarily remove global variables so that doFinalization() will not
01060   // emit them again (global variables are emitted at beginning).
01061 
01062   Module::GlobalListType &global_list = M.getGlobalList();
01063   int i, n = global_list.size();
01064   GlobalVariable **gv_array = new GlobalVariable *[n];
01065 
01066   // first, back-up GlobalVariable in gv_array
01067   i = 0;
01068   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
01069        I != E; ++I)
01070     gv_array[i++] = &*I;
01071 
01072   // second, empty global_list
01073   while (!global_list.empty())
01074     global_list.remove(global_list.begin());
01075 
01076   // call doFinalization
01077   bool ret = AsmPrinter::doFinalization(M);
01078 
01079   // now we restore global variables
01080   for (i = 0; i < n; i++)
01081     global_list.insert(global_list.end(), gv_array[i]);
01082 
01083   clearAnnotationCache(&M);
01084 
01085   delete[] gv_array;
01086   return ret;
01087 
01088   //bool Result = AsmPrinter::doFinalization(M);
01089   // Instead of calling the parents doFinalization, we may
01090   // clone parents doFinalization and customize here.
01091   // Currently, we if NVISA out the EmitGlobals() in
01092   // parent's doFinalization, which is too intrusive.
01093   //
01094   // Same for the doInitialization.
01095   //return Result;
01096 }
01097 
01098 // This function emits appropriate linkage directives for
01099 // functions and global variables.
01100 //
01101 // extern function declaration            -> .extern
01102 // extern function definition             -> .visible
01103 // external global variable with init     -> .visible
01104 // external without init                  -> .extern
01105 // appending                              -> not allowed, assert.
01106 // for any linkage other than
01107 // internal, private, linker_private,
01108 // linker_private_weak, linker_private_weak_def_auto,
01109 // we emit                                -> .weak.
01110 
01111 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
01112                                            raw_ostream &O) {
01113   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
01114     if (V->hasExternalLinkage()) {
01115       if (isa<GlobalVariable>(V)) {
01116         const GlobalVariable *GVar = cast<GlobalVariable>(V);
01117         if (GVar) {
01118           if (GVar->hasInitializer())
01119             O << ".visible ";
01120           else
01121             O << ".extern ";
01122         }
01123       } else if (V->isDeclaration())
01124         O << ".extern ";
01125       else
01126         O << ".visible ";
01127     } else if (V->hasAppendingLinkage()) {
01128       std::string msg;
01129       msg.append("Error: ");
01130       msg.append("Symbol ");
01131       if (V->hasName())
01132         msg.append(V->getName().str());
01133       msg.append("has unsupported appending linkage type");
01134       llvm_unreachable(msg.c_str());
01135     } else if (!V->hasInternalLinkage() &&
01136                !V->hasPrivateLinkage()) {
01137       O << ".weak ";
01138     }
01139   }
01140 }
01141 
01142 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
01143                                          raw_ostream &O,
01144                                          bool processDemoted) {
01145 
01146   // Skip meta data
01147   if (GVar->hasSection()) {
01148     if (GVar->getSection() == StringRef("llvm.metadata"))
01149       return;
01150   }
01151 
01152   // Skip LLVM intrinsic global variables
01153   if (GVar->getName().startswith("llvm.") ||
01154       GVar->getName().startswith("nvvm."))
01155     return;
01156 
01157   const DataLayout *TD = TM.getDataLayout();
01158 
01159   // GlobalVariables are always constant pointers themselves.
01160   const PointerType *PTy = GVar->getType();
01161   Type *ETy = PTy->getElementType();
01162 
01163   if (GVar->hasExternalLinkage()) {
01164     if (GVar->hasInitializer())
01165       O << ".visible ";
01166     else
01167       O << ".extern ";
01168   } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
01169              GVar->hasAvailableExternallyLinkage() ||
01170              GVar->hasCommonLinkage()) {
01171     O << ".weak ";
01172   }
01173 
01174   if (llvm::isTexture(*GVar)) {
01175     O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
01176     return;
01177   }
01178 
01179   if (llvm::isSurface(*GVar)) {
01180     O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
01181     return;
01182   }
01183 
01184   if (GVar->isDeclaration()) {
01185     // (extern) declarations, no definition or initializer
01186     // Currently the only known declaration is for an automatic __local
01187     // (.shared) promoted to global.
01188     emitPTXGlobalVariable(GVar, O);
01189     O << ";\n";
01190     return;
01191   }
01192 
01193   if (llvm::isSampler(*GVar)) {
01194     O << ".global .samplerref " << llvm::getSamplerName(*GVar);
01195 
01196     const Constant *Initializer = nullptr;
01197     if (GVar->hasInitializer())
01198       Initializer = GVar->getInitializer();
01199     const ConstantInt *CI = nullptr;
01200     if (Initializer)
01201       CI = dyn_cast<ConstantInt>(Initializer);
01202     if (CI) {
01203       unsigned sample = CI->getZExtValue();
01204 
01205       O << " = { ";
01206 
01207       for (int i = 0,
01208                addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
01209            i < 3; i++) {
01210         O << "addr_mode_" << i << " = ";
01211         switch (addr) {
01212         case 0:
01213           O << "wrap";
01214           break;
01215         case 1:
01216           O << "clamp_to_border";
01217           break;
01218         case 2:
01219           O << "clamp_to_edge";
01220           break;
01221         case 3:
01222           O << "wrap";
01223           break;
01224         case 4:
01225           O << "mirror";
01226           break;
01227         }
01228         O << ", ";
01229       }
01230       O << "filter_mode = ";
01231       switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
01232       case 0:
01233         O << "nearest";
01234         break;
01235       case 1:
01236         O << "linear";
01237         break;
01238       case 2:
01239         llvm_unreachable("Anisotropic filtering is not supported");
01240       default:
01241         O << "nearest";
01242         break;
01243       }
01244       if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
01245         O << ", force_unnormalized_coords = 1";
01246       }
01247       O << " }";
01248     }
01249 
01250     O << ";\n";
01251     return;
01252   }
01253 
01254   if (GVar->hasPrivateLinkage()) {
01255 
01256     if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
01257       return;
01258 
01259     // FIXME - need better way (e.g. Metadata) to avoid generating this global
01260     if (!strncmp(GVar->getName().data(), "filename", 8))
01261       return;
01262     if (GVar->use_empty())
01263       return;
01264   }
01265 
01266   const Function *demotedFunc = nullptr;
01267   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
01268     O << "// " << GVar->getName().str() << " has been demoted\n";
01269     if (localDecls.find(demotedFunc) != localDecls.end())
01270       localDecls[demotedFunc].push_back(GVar);
01271     else {
01272       std::vector<const GlobalVariable *> temp;
01273       temp.push_back(GVar);
01274       localDecls[demotedFunc] = temp;
01275     }
01276     return;
01277   }
01278 
01279   O << ".";
01280   emitPTXAddressSpace(PTy->getAddressSpace(), O);
01281 
01282   if (isManaged(*GVar)) {
01283     O << " .attribute(.managed)";
01284   }
01285 
01286   if (GVar->getAlignment() == 0)
01287     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
01288   else
01289     O << " .align " << GVar->getAlignment();
01290 
01291   if (ETy->isSingleValueType()) {
01292     O << " .";
01293     // Special case: ABI requires that we use .u8 for predicates
01294     if (ETy->isIntegerTy(1))
01295       O << "u8";
01296     else
01297       O << getPTXFundamentalTypeStr(ETy, false);
01298     O << " ";
01299     O << *getSymbol(GVar);
01300 
01301     // Ptx allows variable initilization only for constant and global state
01302     // spaces.
01303     if (GVar->hasInitializer()) {
01304       if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
01305           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
01306         const Constant *Initializer = GVar->getInitializer();
01307         // 'undef' is treated as there is no value spefied.
01308         if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
01309           O << " = ";
01310           printScalarConstant(Initializer, O);
01311         }
01312       } else {
01313         // The frontend adds zero-initializer to variables that don't have an
01314         // initial value, so skip warning for this case.
01315         if (!GVar->getInitializer()->isNullValue()) {
01316           std::string warnMsg = "initial value of '" + GVar->getName().str() +
01317               "' is not allowed in addrspace(" +
01318               llvm::utostr_32(PTy->getAddressSpace()) + ")";
01319           report_fatal_error(warnMsg.c_str());
01320         }
01321       }
01322     }
01323   } else {
01324     unsigned int ElementSize = 0;
01325 
01326     // Although PTX has direct support for struct type and array type and
01327     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
01328     // targets that support these high level field accesses. Structs, arrays
01329     // and vectors are lowered into arrays of bytes.
01330     switch (ETy->getTypeID()) {
01331     case Type::StructTyID:
01332     case Type::ArrayTyID:
01333     case Type::VectorTyID:
01334       ElementSize = TD->getTypeStoreSize(ETy);
01335       // Ptx allows variable initilization only for constant and
01336       // global state spaces.
01337       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
01338            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
01339           GVar->hasInitializer()) {
01340         const Constant *Initializer = GVar->getInitializer();
01341         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
01342           AggBuffer aggBuffer(ElementSize, O, *this);
01343           bufferAggregateConstant(Initializer, &aggBuffer);
01344           if (aggBuffer.numSymbols) {
01345             if (nvptxSubtarget.is64Bit()) {
01346               O << " .u64 " << *getSymbol(GVar) << "[";
01347               O << ElementSize / 8;
01348             } else {
01349               O << " .u32 " << *getSymbol(GVar) << "[";
01350               O << ElementSize / 4;
01351             }
01352             O << "]";
01353           } else {
01354             O << " .b8 " << *getSymbol(GVar) << "[";
01355             O << ElementSize;
01356             O << "]";
01357           }
01358           O << " = {";
01359           aggBuffer.print();
01360           O << "}";
01361         } else {
01362           O << " .b8 " << *getSymbol(GVar);
01363           if (ElementSize) {
01364             O << "[";
01365             O << ElementSize;
01366             O << "]";
01367           }
01368         }
01369       } else {
01370         O << " .b8 " << *getSymbol(GVar);
01371         if (ElementSize) {
01372           O << "[";
01373           O << ElementSize;
01374           O << "]";
01375         }
01376       }
01377       break;
01378     default:
01379       llvm_unreachable("type not supported yet");
01380     }
01381 
01382   }
01383   O << ";\n";
01384 }
01385 
01386 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
01387   if (localDecls.find(f) == localDecls.end())
01388     return;
01389 
01390   std::vector<const GlobalVariable *> &gvars = localDecls[f];
01391 
01392   for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
01393     O << "\t// demoted variable\n\t";
01394     printModuleLevelGV(gvars[i], O, true);
01395   }
01396 }
01397 
01398 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
01399                                           raw_ostream &O) const {
01400   switch (AddressSpace) {
01401   case llvm::ADDRESS_SPACE_LOCAL:
01402     O << "local";
01403     break;
01404   case llvm::ADDRESS_SPACE_GLOBAL:
01405     O << "global";
01406     break;
01407   case llvm::ADDRESS_SPACE_CONST:
01408     O << "const";
01409     break;
01410   case llvm::ADDRESS_SPACE_SHARED:
01411     O << "shared";
01412     break;
01413   default:
01414     report_fatal_error("Bad address space found while emitting PTX");
01415     break;
01416   }
01417 }
01418 
01419 std::string
01420 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
01421   switch (Ty->getTypeID()) {
01422   default:
01423     llvm_unreachable("unexpected type");
01424     break;
01425   case Type::IntegerTyID: {
01426     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
01427     if (NumBits == 1)
01428       return "pred";
01429     else if (NumBits <= 64) {
01430       std::string name = "u";
01431       return name + utostr(NumBits);
01432     } else {
01433       llvm_unreachable("Integer too large");
01434       break;
01435     }
01436     break;
01437   }
01438   case Type::FloatTyID:
01439     return "f32";
01440   case Type::DoubleTyID:
01441     return "f64";
01442   case Type::PointerTyID:
01443     if (nvptxSubtarget.is64Bit())
01444       if (useB4PTR)
01445         return "b64";
01446       else
01447         return "u64";
01448     else if (useB4PTR)
01449       return "b32";
01450     else
01451       return "u32";
01452   }
01453   llvm_unreachable("unexpected type");
01454   return nullptr;
01455 }
01456 
01457 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
01458                                             raw_ostream &O) {
01459 
01460   const DataLayout *TD = TM.getDataLayout();
01461 
01462   // GlobalVariables are always constant pointers themselves.
01463   const PointerType *PTy = GVar->getType();
01464   Type *ETy = PTy->getElementType();
01465 
01466   O << ".";
01467   emitPTXAddressSpace(PTy->getAddressSpace(), O);
01468   if (GVar->getAlignment() == 0)
01469     O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
01470   else
01471     O << " .align " << GVar->getAlignment();
01472 
01473   if (ETy->isSingleValueType()) {
01474     O << " .";
01475     O << getPTXFundamentalTypeStr(ETy);
01476     O << " ";
01477     O << *getSymbol(GVar);
01478     return;
01479   }
01480 
01481   int64_t ElementSize = 0;
01482 
01483   // Although PTX has direct support for struct type and array type and LLVM IR
01484   // is very similar to PTX, the LLVM CodeGen does not support for targets that
01485   // support these high level field accesses. Structs and arrays are lowered
01486   // into arrays of bytes.
01487   switch (ETy->getTypeID()) {
01488   case Type::StructTyID:
01489   case Type::ArrayTyID:
01490   case Type::VectorTyID:
01491     ElementSize = TD->getTypeStoreSize(ETy);
01492     O << " .b8 " << *getSymbol(GVar) << "[";
01493     if (ElementSize) {
01494       O << itostr(ElementSize);
01495     }
01496     O << "]";
01497     break;
01498   default:
01499     llvm_unreachable("type not supported yet");
01500   }
01501   return;
01502 }
01503 
01504 static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
01505   if (Ty->isSingleValueType())
01506     return TD->getPrefTypeAlignment(Ty);
01507 
01508   const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
01509   if (ATy)
01510     return getOpenCLAlignment(TD, ATy->getElementType());
01511 
01512   const VectorType *VTy = dyn_cast<VectorType>(Ty);
01513   if (VTy) {
01514     Type *ETy = VTy->getElementType();
01515     unsigned int numE = VTy->getNumElements();
01516     unsigned int alignE = TD->getPrefTypeAlignment(ETy);
01517     if (numE == 3)
01518       return 4 * alignE;
01519     else
01520       return numE * alignE;
01521   }
01522 
01523   const StructType *STy = dyn_cast<StructType>(Ty);
01524   if (STy) {
01525     unsigned int alignStruct = 1;
01526     // Go through each element of the struct and find the
01527     // largest alignment.
01528     for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
01529       Type *ETy = STy->getElementType(i);
01530       unsigned int align = getOpenCLAlignment(TD, ETy);
01531       if (align > alignStruct)
01532         alignStruct = align;
01533     }
01534     return alignStruct;
01535   }
01536 
01537   const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
01538   if (FTy)
01539     return TD->getPointerPrefAlignment();
01540   return TD->getPrefTypeAlignment(Ty);
01541 }
01542 
01543 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
01544                                      int paramIndex, raw_ostream &O) {
01545   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
01546       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
01547     O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
01548   else {
01549     std::string argName = I->getName();
01550     const char *p = argName.c_str();
01551     while (*p) {
01552       if (*p == '.')
01553         O << "_";
01554       else
01555         O << *p;
01556       p++;
01557     }
01558   }
01559 }
01560 
01561 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
01562   Function::const_arg_iterator I, E;
01563   int i = 0;
01564 
01565   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
01566       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
01567     O << *CurrentFnSym << "_param_" << paramIndex;
01568     return;
01569   }
01570 
01571   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
01572     if (i == paramIndex) {
01573       printParamName(I, paramIndex, O);
01574       return;
01575     }
01576   }
01577   llvm_unreachable("paramIndex out of bound");
01578 }
01579 
01580 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
01581   const DataLayout *TD = TM.getDataLayout();
01582   const AttributeSet &PAL = F->getAttributes();
01583   const TargetLowering *TLI = TM.getTargetLowering();
01584   Function::const_arg_iterator I, E;
01585   unsigned paramIndex = 0;
01586   bool first = true;
01587   bool isKernelFunc = llvm::isKernelFunction(*F);
01588   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
01589   MVT thePointerTy = TLI->getPointerTy();
01590 
01591   O << "(\n";
01592 
01593   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
01594     Type *Ty = I->getType();
01595 
01596     if (!first)
01597       O << ",\n";
01598 
01599     first = false;
01600 
01601     // Handle image/sampler parameters
01602     if (isKernelFunction(*F)) {
01603       if (isSampler(*I) || isImage(*I)) {
01604         if (isImage(*I)) {
01605           std::string sname = I->getName();
01606           if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
01607             if (nvptxSubtarget.hasImageHandles())
01608               O << "\t.param .u64 .ptr .surfref ";
01609             else
01610               O << "\t.param .surfref ";
01611             O << *CurrentFnSym << "_param_" << paramIndex;
01612           }
01613           else { // Default image is read_only
01614             if (nvptxSubtarget.hasImageHandles())
01615               O << "\t.param .u64 .ptr .texref ";
01616             else
01617               O << "\t.param .texref ";
01618             O << *CurrentFnSym << "_param_" << paramIndex;
01619           }
01620         } else {
01621           if (nvptxSubtarget.hasImageHandles())
01622             O << "\t.param .u64 .ptr .samplerref ";
01623           else
01624             O << "\t.param .samplerref ";
01625           O << *CurrentFnSym << "_param_" << paramIndex;
01626         }
01627         continue;
01628       }
01629     }
01630 
01631     if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
01632       if (Ty->isAggregateType() || Ty->isVectorTy()) {
01633         // Just print .param .align <a> .b8 .param[size];
01634         // <a> = PAL.getparamalignment
01635         // size = typeallocsize of element type
01636         unsigned align = PAL.getParamAlignment(paramIndex + 1);
01637         if (align == 0)
01638           align = TD->getABITypeAlignment(Ty);
01639 
01640         unsigned sz = TD->getTypeAllocSize(Ty);
01641         O << "\t.param .align " << align << " .b8 ";
01642         printParamName(I, paramIndex, O);
01643         O << "[" << sz << "]";
01644 
01645         continue;
01646       }
01647       // Just a scalar
01648       const PointerType *PTy = dyn_cast<PointerType>(Ty);
01649       if (isKernelFunc) {
01650         if (PTy) {
01651           // Special handling for pointer arguments to kernel
01652           O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
01653 
01654           if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
01655             Type *ETy = PTy->getElementType();
01656             int addrSpace = PTy->getAddressSpace();
01657             switch (addrSpace) {
01658             default:
01659               O << ".ptr ";
01660               break;
01661             case llvm::ADDRESS_SPACE_CONST:
01662               O << ".ptr .const ";
01663               break;
01664             case llvm::ADDRESS_SPACE_SHARED:
01665               O << ".ptr .shared ";
01666               break;
01667             case llvm::ADDRESS_SPACE_GLOBAL:
01668               O << ".ptr .global ";
01669               break;
01670             }
01671             O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
01672           }
01673           printParamName(I, paramIndex, O);
01674           continue;
01675         }
01676 
01677         // non-pointer scalar to kernel func
01678         O << "\t.param .";
01679         // Special case: predicate operands become .u8 types
01680         if (Ty->isIntegerTy(1))
01681           O << "u8";
01682         else
01683           O << getPTXFundamentalTypeStr(Ty);
01684         O << " ";
01685         printParamName(I, paramIndex, O);
01686         continue;
01687       }
01688       // Non-kernel function, just print .param .b<size> for ABI
01689       // and .reg .b<size> for non-ABI
01690       unsigned sz = 0;
01691       if (isa<IntegerType>(Ty)) {
01692         sz = cast<IntegerType>(Ty)->getBitWidth();
01693         if (sz < 32)
01694           sz = 32;
01695       } else if (isa<PointerType>(Ty))
01696         sz = thePointerTy.getSizeInBits();
01697       else
01698         sz = Ty->getPrimitiveSizeInBits();
01699       if (isABI)
01700         O << "\t.param .b" << sz << " ";
01701       else
01702         O << "\t.reg .b" << sz << " ";
01703       printParamName(I, paramIndex, O);
01704       continue;
01705     }
01706 
01707     // param has byVal attribute. So should be a pointer
01708     const PointerType *PTy = dyn_cast<PointerType>(Ty);
01709     assert(PTy && "Param with byval attribute should be a pointer type");
01710     Type *ETy = PTy->getElementType();
01711 
01712     if (isABI || isKernelFunc) {
01713       // Just print .param .align <a> .b8 .param[size];
01714       // <a> = PAL.getparamalignment
01715       // size = typeallocsize of element type
01716       unsigned align = PAL.getParamAlignment(paramIndex + 1);
01717       if (align == 0)
01718         align = TD->getABITypeAlignment(ETy);
01719 
01720       unsigned sz = TD->getTypeAllocSize(ETy);
01721       O << "\t.param .align " << align << " .b8 ";
01722       printParamName(I, paramIndex, O);
01723       O << "[" << sz << "]";
01724       continue;
01725     } else {
01726       // Split the ETy into constituent parts and
01727       // print .param .b<size> <name> for each part.
01728       // Further, if a part is vector, print the above for
01729       // each vector element.
01730       SmallVector<EVT, 16> vtparts;
01731       ComputeValueVTs(*TLI, ETy, vtparts);
01732       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
01733         unsigned elems = 1;
01734         EVT elemtype = vtparts[i];
01735         if (vtparts[i].isVector()) {
01736           elems = vtparts[i].getVectorNumElements();
01737           elemtype = vtparts[i].getVectorElementType();
01738         }
01739 
01740         for (unsigned j = 0, je = elems; j != je; ++j) {
01741           unsigned sz = elemtype.getSizeInBits();
01742           if (elemtype.isInteger() && (sz < 32))
01743             sz = 32;
01744           O << "\t.reg .b" << sz << " ";
01745           printParamName(I, paramIndex, O);
01746           if (j < je - 1)
01747             O << ",\n";
01748           ++paramIndex;
01749         }
01750         if (i < e - 1)
01751           O << ",\n";
01752       }
01753       --paramIndex;
01754       continue;
01755     }
01756   }
01757 
01758   O << "\n)\n";
01759 }
01760 
01761 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
01762                                             raw_ostream &O) {
01763   const Function *F = MF.getFunction();
01764   emitFunctionParamList(F, O);
01765 }
01766 
01767 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
01768     const MachineFunction &MF) {
01769   SmallString<128> Str;
01770   raw_svector_ostream O(Str);
01771 
01772   // Map the global virtual register number to a register class specific
01773   // virtual register number starting from 1 with that class.
01774   const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
01775   //unsigned numRegClasses = TRI->getNumRegClasses();
01776 
01777   // Emit the Fake Stack Object
01778   const MachineFrameInfo *MFI = MF.getFrameInfo();
01779   int NumBytes = (int) MFI->getStackSize();
01780   if (NumBytes) {
01781     O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
01782       << getFunctionNumber() << "[" << NumBytes << "];\n";
01783     if (nvptxSubtarget.is64Bit()) {
01784       O << "\t.reg .b64 \t%SP;\n";
01785       O << "\t.reg .b64 \t%SPL;\n";
01786     } else {
01787       O << "\t.reg .b32 \t%SP;\n";
01788       O << "\t.reg .b32 \t%SPL;\n";
01789     }
01790   }
01791 
01792   // Go through all virtual registers to establish the mapping between the
01793   // global virtual
01794   // register number and the per class virtual register number.
01795   // We use the per class virtual register number in the ptx output.
01796   unsigned int numVRs = MRI->getNumVirtRegs();
01797   for (unsigned i = 0; i < numVRs; i++) {
01798     unsigned int vr = TRI->index2VirtReg(i);
01799     const TargetRegisterClass *RC = MRI->getRegClass(vr);
01800     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
01801     int n = regmap.size();
01802     regmap.insert(std::make_pair(vr, n + 1));
01803   }
01804 
01805   // Emit register declarations
01806   // @TODO: Extract out the real register usage
01807   // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
01808   // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
01809   // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
01810   // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
01811   // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
01812   // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
01813   // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
01814 
01815   // Emit declaration of the virtual registers or 'physical' registers for
01816   // each register class
01817   for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
01818     const TargetRegisterClass *RC = TRI->getRegClass(i);
01819     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
01820     std::string rcname = getNVPTXRegClassName(RC);
01821     std::string rcStr = getNVPTXRegClassStr(RC);
01822     int n = regmap.size();
01823 
01824     // Only declare those registers that may be used.
01825     if (n) {
01826        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
01827          << ">;\n";
01828     }
01829   }
01830 
01831   OutStreamer.EmitRawText(O.str());
01832 }
01833 
01834 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
01835   APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
01836   bool ignored;
01837   unsigned int numHex;
01838   const char *lead;
01839 
01840   if (Fp->getType()->getTypeID() == Type::FloatTyID) {
01841     numHex = 8;
01842     lead = "0f";
01843     APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
01844   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
01845     numHex = 16;
01846     lead = "0d";
01847     APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
01848   } else
01849     llvm_unreachable("unsupported fp type");
01850 
01851   APInt API = APF.bitcastToAPInt();
01852   std::string hexstr(utohexstr(API.getZExtValue()));
01853   O << lead;
01854   if (hexstr.length() < numHex)
01855     O << std::string(numHex - hexstr.length(), '0');
01856   O << utohexstr(API.getZExtValue());
01857 }
01858 
01859 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
01860   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
01861     O << CI->getValue();
01862     return;
01863   }
01864   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
01865     printFPConstant(CFP, O);
01866     return;
01867   }
01868   if (isa<ConstantPointerNull>(CPV)) {
01869     O << "0";
01870     return;
01871   }
01872   if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
01873     PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
01874     bool IsNonGenericPointer = false;
01875     if (PTy && PTy->getAddressSpace() != 0) {
01876       IsNonGenericPointer = true;
01877     }
01878     if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
01879       O << "generic(";
01880       O << *getSymbol(GVar);
01881       O << ")";
01882     } else {
01883       O << *getSymbol(GVar);
01884     }
01885     return;
01886   }
01887   if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01888     const Value *v = Cexpr->stripPointerCasts();
01889     PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
01890     bool IsNonGenericPointer = false;
01891     if (PTy && PTy->getAddressSpace() != 0) {
01892       IsNonGenericPointer = true;
01893     }
01894     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
01895       if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
01896         O << "generic(";
01897         O << *getSymbol(GVar);
01898         O << ")";
01899       } else {
01900         O << *getSymbol(GVar);
01901       }
01902       return;
01903     } else {
01904       O << *LowerConstant(CPV, *this);
01905       return;
01906     }
01907   }
01908   llvm_unreachable("Not scalar type found in printScalarConstant()");
01909 }
01910 
01911 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
01912                                    AggBuffer *aggBuffer) {
01913 
01914   const DataLayout *TD = TM.getDataLayout();
01915 
01916   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
01917     int s = TD->getTypeAllocSize(CPV->getType());
01918     if (s < Bytes)
01919       s = Bytes;
01920     aggBuffer->addZeros(s);
01921     return;
01922   }
01923 
01924   unsigned char *ptr;
01925   switch (CPV->getType()->getTypeID()) {
01926 
01927   case Type::IntegerTyID: {
01928     const Type *ETy = CPV->getType();
01929     if (ETy == Type::getInt8Ty(CPV->getContext())) {
01930       unsigned char c =
01931           (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
01932       ptr = &c;
01933       aggBuffer->addBytes(ptr, 1, Bytes);
01934     } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
01935       short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
01936       ptr = (unsigned char *)&int16;
01937       aggBuffer->addBytes(ptr, 2, Bytes);
01938     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
01939       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
01940         int int32 = (int)(constInt->getZExtValue());
01941         ptr = (unsigned char *)&int32;
01942         aggBuffer->addBytes(ptr, 4, Bytes);
01943         break;
01944       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01945         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
01946                 ConstantFoldConstantExpression(Cexpr, TD))) {
01947           int int32 = (int)(constInt->getZExtValue());
01948           ptr = (unsigned char *)&int32;
01949           aggBuffer->addBytes(ptr, 4, Bytes);
01950           break;
01951         }
01952         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
01953           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
01954           aggBuffer->addSymbol(v);
01955           aggBuffer->addZeros(4);
01956           break;
01957         }
01958       }
01959       llvm_unreachable("unsupported integer const type");
01960     } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
01961       if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
01962         long long int64 = (long long)(constInt->getZExtValue());
01963         ptr = (unsigned char *)&int64;
01964         aggBuffer->addBytes(ptr, 8, Bytes);
01965         break;
01966       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
01967         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
01968                 ConstantFoldConstantExpression(Cexpr, TD))) {
01969           long long int64 = (long long)(constInt->getZExtValue());
01970           ptr = (unsigned char *)&int64;
01971           aggBuffer->addBytes(ptr, 8, Bytes);
01972           break;
01973         }
01974         if (Cexpr->getOpcode() == Instruction::PtrToInt) {
01975           Value *v = Cexpr->getOperand(0)->stripPointerCasts();
01976           aggBuffer->addSymbol(v);
01977           aggBuffer->addZeros(8);
01978           break;
01979         }
01980       }
01981       llvm_unreachable("unsupported integer const type");
01982     } else
01983       llvm_unreachable("unsupported integer const type");
01984     break;
01985   }
01986   case Type::FloatTyID:
01987   case Type::DoubleTyID: {
01988     const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
01989     const Type *Ty = CFP->getType();
01990     if (Ty == Type::getFloatTy(CPV->getContext())) {
01991       float float32 = (float) CFP->getValueAPF().convertToFloat();
01992       ptr = (unsigned char *)&float32;
01993       aggBuffer->addBytes(ptr, 4, Bytes);
01994     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
01995       double float64 = CFP->getValueAPF().convertToDouble();
01996       ptr = (unsigned char *)&float64;
01997       aggBuffer->addBytes(ptr, 8, Bytes);
01998     } else {
01999       llvm_unreachable("unsupported fp const type");
02000     }
02001     break;
02002   }
02003   case Type::PointerTyID: {
02004     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
02005       aggBuffer->addSymbol(GVar);
02006     } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
02007       const Value *v = Cexpr->stripPointerCasts();
02008       aggBuffer->addSymbol(v);
02009     }
02010     unsigned int s = TD->getTypeAllocSize(CPV->getType());
02011     aggBuffer->addZeros(s);
02012     break;
02013   }
02014 
02015   case Type::ArrayTyID:
02016   case Type::VectorTyID:
02017   case Type::StructTyID: {
02018     if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
02019         isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
02020       int ElementSize = TD->getTypeAllocSize(CPV->getType());
02021       bufferAggregateConstant(CPV, aggBuffer);
02022       if (Bytes > ElementSize)
02023         aggBuffer->addZeros(Bytes - ElementSize);
02024     } else if (isa<ConstantAggregateZero>(CPV))
02025       aggBuffer->addZeros(Bytes);
02026     else
02027       llvm_unreachable("Unexpected Constant type");
02028     break;
02029   }
02030 
02031   default:
02032     llvm_unreachable("unsupported type");
02033   }
02034 }
02035 
02036 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
02037                                               AggBuffer *aggBuffer) {
02038   const DataLayout *TD = TM.getDataLayout();
02039   int Bytes;
02040 
02041   // Old constants
02042   if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
02043     if (CPV->getNumOperands())
02044       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
02045         bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
02046     return;
02047   }
02048 
02049   if (const ConstantDataSequential *CDS =
02050           dyn_cast<ConstantDataSequential>(CPV)) {
02051     if (CDS->getNumElements())
02052       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
02053         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
02054                      aggBuffer);
02055     return;
02056   }
02057 
02058   if (isa<ConstantStruct>(CPV)) {
02059     if (CPV->getNumOperands()) {
02060       StructType *ST = cast<StructType>(CPV->getType());
02061       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
02062         if (i == (e - 1))
02063           Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
02064                   TD->getTypeAllocSize(ST) -
02065                   TD->getStructLayout(ST)->getElementOffset(i);
02066         else
02067           Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
02068                   TD->getStructLayout(ST)->getElementOffset(i);
02069         bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
02070       }
02071     }
02072     return;
02073   }
02074   llvm_unreachable("unsupported constant type in printAggregateConstant()");
02075 }
02076 
02077 // buildTypeNameMap - Run through symbol table looking for type names.
02078 //
02079 
02080 bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
02081 
02082   std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
02083 
02084   if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
02085                                   !PI->second.compare("struct._image2d_t") ||
02086                                   !PI->second.compare("struct._image3d_t")))
02087     return true;
02088 
02089   return false;
02090 }
02091 
02092 
02093 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
02094   switch (MI.getOpcode()) {
02095   default:
02096     return false;
02097   case NVPTX::CallArgBeginInst:
02098   case NVPTX::CallArgEndInst0:
02099   case NVPTX::CallArgEndInst1:
02100   case NVPTX::CallArgF32:
02101   case NVPTX::CallArgF64:
02102   case NVPTX::CallArgI16:
02103   case NVPTX::CallArgI32:
02104   case NVPTX::CallArgI32imm:
02105   case NVPTX::CallArgI64:
02106   case NVPTX::CallArgParam:
02107   case NVPTX::CallVoidInst:
02108   case NVPTX::CallVoidInstReg:
02109   case NVPTX::Callseq_End:
02110   case NVPTX::CallVoidInstReg64:
02111   case NVPTX::DeclareParamInst:
02112   case NVPTX::DeclareRetMemInst:
02113   case NVPTX::DeclareRetRegInst:
02114   case NVPTX::DeclareRetScalarInst:
02115   case NVPTX::DeclareScalarParamInst:
02116   case NVPTX::DeclareScalarRegInst:
02117   case NVPTX::StoreParamF32:
02118   case NVPTX::StoreParamF64:
02119   case NVPTX::StoreParamI16:
02120   case NVPTX::StoreParamI32:
02121   case NVPTX::StoreParamI64:
02122   case NVPTX::StoreParamI8:
02123   case NVPTX::StoreRetvalF32:
02124   case NVPTX::StoreRetvalF64:
02125   case NVPTX::StoreRetvalI16:
02126   case NVPTX::StoreRetvalI32:
02127   case NVPTX::StoreRetvalI64:
02128   case NVPTX::StoreRetvalI8:
02129   case NVPTX::LastCallArgF32:
02130   case NVPTX::LastCallArgF64:
02131   case NVPTX::LastCallArgI16:
02132   case NVPTX::LastCallArgI32:
02133   case NVPTX::LastCallArgI32imm:
02134   case NVPTX::LastCallArgI64:
02135   case NVPTX::LastCallArgParam:
02136   case NVPTX::LoadParamMemF32:
02137   case NVPTX::LoadParamMemF64:
02138   case NVPTX::LoadParamMemI16:
02139   case NVPTX::LoadParamMemI32:
02140   case NVPTX::LoadParamMemI64:
02141   case NVPTX::LoadParamMemI8:
02142   case NVPTX::PrototypeInst:
02143   case NVPTX::DBG_VALUE:
02144     return true;
02145   }
02146   return false;
02147 }
02148 
02149 /// PrintAsmOperand - Print out an operand for an inline asm expression.
02150 ///
02151 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
02152                                       unsigned AsmVariant,
02153                                       const char *ExtraCode, raw_ostream &O) {
02154   if (ExtraCode && ExtraCode[0]) {
02155     if (ExtraCode[1] != 0)
02156       return true; // Unknown modifier.
02157 
02158     switch (ExtraCode[0]) {
02159     default:
02160       // See if this is a generic print operand
02161       return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
02162     case 'r':
02163       break;
02164     }
02165   }
02166 
02167   printOperand(MI, OpNo, O);
02168 
02169   return false;
02170 }
02171 
02172 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
02173     const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
02174     const char *ExtraCode, raw_ostream &O) {
02175   if (ExtraCode && ExtraCode[0])
02176     return true; // Unknown modifier
02177 
02178   O << '[';
02179   printMemOperand(MI, OpNo, O);
02180   O << ']';
02181 
02182   return false;
02183 }
02184 
02185 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
02186                                    raw_ostream &O, const char *Modifier) {
02187   const MachineOperand &MO = MI->getOperand(opNum);
02188   switch (MO.getType()) {
02189   case MachineOperand::MO_Register:
02190     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
02191       if (MO.getReg() == NVPTX::VRDepot)
02192         O << DEPOTNAME << getFunctionNumber();
02193       else
02194         O << NVPTXInstPrinter::getRegisterName(MO.getReg());
02195     } else {
02196       emitVirtualRegister(MO.getReg(), O);
02197     }
02198     return;
02199 
02200   case MachineOperand::MO_Immediate:
02201     if (!Modifier)
02202       O << MO.getImm();
02203     else if (strstr(Modifier, "vec") == Modifier)
02204       printVecModifiedImmediate(MO, Modifier, O);
02205     else
02206       llvm_unreachable(
02207           "Don't know how to handle modifier on immediate operand");
02208     return;
02209 
02210   case MachineOperand::MO_FPImmediate:
02211     printFPConstant(MO.getFPImm(), O);
02212     break;
02213 
02214   case MachineOperand::MO_GlobalAddress:
02215     O << *getSymbol(MO.getGlobal());
02216     break;
02217 
02218   case MachineOperand::MO_MachineBasicBlock:
02219     O << *MO.getMBB()->getSymbol();
02220     return;
02221 
02222   default:
02223     llvm_unreachable("Operand type not supported.");
02224   }
02225 }
02226 
02227 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
02228                                       raw_ostream &O, const char *Modifier) {
02229   printOperand(MI, opNum, O);
02230 
02231   if (Modifier && !strcmp(Modifier, "add")) {
02232     O << ", ";
02233     printOperand(MI, opNum + 1, O);
02234   } else {
02235     if (MI->getOperand(opNum + 1).isImm() &&
02236         MI->getOperand(opNum + 1).getImm() == 0)
02237       return; // don't print ',0' or '+0'
02238     O << "+";
02239     printOperand(MI, opNum + 1, O);
02240   }
02241 }
02242 
02243 
02244 // Force static initialization.
02245 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
02246   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
02247   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
02248 }
02249 
02250 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
02251   std::stringstream temp;
02252   LineReader *reader = this->getReader(filename.str());
02253   temp << "\n//";
02254   temp << filename.str();
02255   temp << ":";
02256   temp << line;
02257   temp << " ";
02258   temp << reader->readLine(line);
02259   temp << "\n";
02260   this->OutStreamer.EmitRawText(Twine(temp.str()));
02261 }
02262 
02263 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
02264   if (!reader) {
02265     reader = new LineReader(filename);
02266   }
02267 
02268   if (reader->fileName() != filename) {
02269     delete reader;
02270     reader = new LineReader(filename);
02271   }
02272 
02273   return reader;
02274 }
02275 
02276 std::string LineReader::readLine(unsigned lineNum) {
02277   if (lineNum < theCurLine) {
02278     theCurLine = 0;
02279     fstr.seekg(0, std::ios::beg);
02280   }
02281   while (theCurLine < lineNum) {
02282     fstr.getline(buff, 500);
02283     theCurLine++;
02284   }
02285   return buff;
02286 }
02287 
02288 // Force static initialization.
02289 extern "C" void LLVMInitializeNVPTXAsmPrinter() {
02290   RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
02291   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
02292 }