LLVM API Documentation
00001 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains a printer that converts from our internal representation 00011 // of machine-dependent LLVM code to NVPTX assembly language. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "NVPTXAsmPrinter.h" 00016 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 00017 #include "NVPTX.h" 00018 #include "NVPTXInstrInfo.h" 00019 #include "NVPTXNumRegisters.h" 00020 #include "NVPTXRegisterInfo.h" 00021 #include "NVPTXTargetMachine.h" 00022 #include "NVPTXUtilities.h" 00023 #include "cl_common_defines.h" 00024 #include "llvm/ADT/StringExtras.h" 00025 #include "llvm/Analysis/ConstantFolding.h" 00026 #include "llvm/Assembly/Writer.h" 00027 #include "llvm/CodeGen/Analysis.h" 00028 #include "llvm/CodeGen/MachineFrameInfo.h" 00029 #include "llvm/CodeGen/MachineModuleInfo.h" 00030 #include "llvm/CodeGen/MachineRegisterInfo.h" 00031 #include "llvm/DebugInfo.h" 00032 #include "llvm/IR/DerivedTypes.h" 00033 #include "llvm/IR/Function.h" 00034 #include "llvm/IR/GlobalVariable.h" 00035 #include "llvm/IR/Module.h" 00036 #include "llvm/IR/Operator.h" 00037 #include "llvm/MC/MCStreamer.h" 00038 #include "llvm/MC/MCSymbol.h" 00039 #include "llvm/Support/CommandLine.h" 00040 #include "llvm/Support/ErrorHandling.h" 00041 #include "llvm/Support/FormattedStream.h" 00042 #include "llvm/Support/Path.h" 00043 #include "llvm/Support/TargetRegistry.h" 00044 #include "llvm/Support/TimeValue.h" 00045 #include "llvm/Target/Mangler.h" 00046 #include "llvm/Target/TargetLoweringObjectFile.h" 00047 #include <sstream> 00048 using namespace llvm; 00049 00050 #include "NVPTXGenAsmWriter.inc" 00051 00052 bool RegAllocNilUsed = true; 00053 00054 #define DEPOTNAME "__local_depot" 00055 00056 static cl::opt<bool> 00057 EmitLineNumbers("nvptx-emit-line-numbers", 00058 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 00059 cl::init(true)); 00060 00061 namespace llvm { bool InterleaveSrcInPtx = false; } 00062 00063 static cl::opt<bool, true> 00064 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, 00065 cl::desc("NVPTX Specific: Emit source line in ptx file"), 00066 cl::location(llvm::InterleaveSrcInPtx)); 00067 00068 namespace { 00069 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 00070 /// depends. 00071 void DiscoverDependentGlobals(const Value *V, 00072 DenseSet<const GlobalVariable *> &Globals) { 00073 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 00074 Globals.insert(GV); 00075 else { 00076 if (const User *U = dyn_cast<User>(V)) { 00077 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 00078 DiscoverDependentGlobals(U->getOperand(i), Globals); 00079 } 00080 } 00081 } 00082 } 00083 00084 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 00085 /// instances to be emitted, but only after any dependents have been added 00086 /// first. 00087 void VisitGlobalVariableForEmission( 00088 const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 00089 DenseSet<const GlobalVariable *> &Visited, 00090 DenseSet<const GlobalVariable *> &Visiting) { 00091 // Have we already visited this one? 00092 if (Visited.count(GV)) 00093 return; 00094 00095 // Do we have a circular dependency? 00096 if (Visiting.count(GV)) 00097 report_fatal_error("Circular dependency found in global variable set"); 00098 00099 // Start visiting this global 00100 Visiting.insert(GV); 00101 00102 // Make sure we visit all dependents first 00103 DenseSet<const GlobalVariable *> Others; 00104 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 00105 DiscoverDependentGlobals(GV->getOperand(i), Others); 00106 00107 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 00108 E = Others.end(); 00109 I != E; ++I) 00110 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 00111 00112 // Now we can visit ourself 00113 Order.push_back(GV); 00114 Visited.insert(GV); 00115 Visiting.erase(GV); 00116 } 00117 } 00118 00119 // @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 00120 // cannot just link to the existing version. 00121 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 00122 /// 00123 using namespace nvptx; 00124 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 00125 MCContext &Ctx = AP.OutContext; 00126 00127 if (CV->isNullValue() || isa<UndefValue>(CV)) 00128 return MCConstantExpr::Create(0, Ctx); 00129 00130 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 00131 return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 00132 00133 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 00134 return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); 00135 00136 if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 00137 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 00138 00139 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 00140 if (CE == 0) 00141 llvm_unreachable("Unknown constant value to lower!"); 00142 00143 switch (CE->getOpcode()) { 00144 default: 00145 // If the code isn't optimized, there may be outstanding folding 00146 // opportunities. Attempt to fold the expression using DataLayout as a 00147 // last resort before giving up. 00148 if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) 00149 if (C != CE) 00150 return LowerConstant(C, AP); 00151 00152 // Otherwise report the problem to the user. 00153 { 00154 std::string S; 00155 raw_string_ostream OS(S); 00156 OS << "Unsupported expression in static initializer: "; 00157 WriteAsOperand(OS, CE, /*PrintType=*/ false, 00158 !AP.MF ? 0 : AP.MF->getFunction()->getParent()); 00159 report_fatal_error(OS.str()); 00160 } 00161 case Instruction::GetElementPtr: { 00162 const DataLayout &TD = *AP.TM.getDataLayout(); 00163 // Generate a symbolic expression for the byte address 00164 APInt OffsetAI(TD.getPointerSizeInBits(), 0); 00165 cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); 00166 00167 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 00168 if (!OffsetAI) 00169 return Base; 00170 00171 int64_t Offset = OffsetAI.getSExtValue(); 00172 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 00173 Ctx); 00174 } 00175 00176 case Instruction::Trunc: 00177 // We emit the value and depend on the assembler to truncate the generated 00178 // expression properly. This is important for differences between 00179 // blockaddress labels. Since the two labels are in the same function, it 00180 // is reasonable to treat their delta as a 32-bit value. 00181 // FALL THROUGH. 00182 case Instruction::BitCast: 00183 return LowerConstant(CE->getOperand(0), AP); 00184 00185 case Instruction::IntToPtr: { 00186 const DataLayout &TD = *AP.TM.getDataLayout(); 00187 // Handle casts to pointers by changing them into casts to the appropriate 00188 // integer type. This promotes constant folding and simplifies this code. 00189 Constant *Op = CE->getOperand(0); 00190 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 00191 false /*ZExt*/); 00192 return LowerConstant(Op, AP); 00193 } 00194 00195 case Instruction::PtrToInt: { 00196 const DataLayout &TD = *AP.TM.getDataLayout(); 00197 // Support only foldable casts to/from pointers that can be eliminated by 00198 // changing the pointer to the appropriately sized integer type. 00199 Constant *Op = CE->getOperand(0); 00200 Type *Ty = CE->getType(); 00201 00202 const MCExpr *OpExpr = LowerConstant(Op, AP); 00203 00204 // We can emit the pointer value into this slot if the slot is an 00205 // integer slot equal to the size of the pointer. 00206 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 00207 return OpExpr; 00208 00209 // Otherwise the pointer is smaller than the resultant integer, mask off 00210 // the high bits so we are sure to get a proper truncation if the input is 00211 // a constant expr. 00212 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 00213 const MCExpr *MaskExpr = 00214 MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); 00215 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 00216 } 00217 00218 // The MC library also has a right-shift operator, but it isn't consistently 00219 // signed or unsigned between different targets. 00220 case Instruction::Add: 00221 case Instruction::Sub: 00222 case Instruction::Mul: 00223 case Instruction::SDiv: 00224 case Instruction::SRem: 00225 case Instruction::Shl: 00226 case Instruction::And: 00227 case Instruction::Or: 00228 case Instruction::Xor: { 00229 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 00230 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 00231 switch (CE->getOpcode()) { 00232 default: 00233 llvm_unreachable("Unknown binary operator constant cast expr"); 00234 case Instruction::Add: 00235 return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 00236 case Instruction::Sub: 00237 return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 00238 case Instruction::Mul: 00239 return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 00240 case Instruction::SDiv: 00241 return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 00242 case Instruction::SRem: 00243 return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 00244 case Instruction::Shl: 00245 return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 00246 case Instruction::And: 00247 return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 00248 case Instruction::Or: 00249 return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); 00250 case Instruction::Xor: 00251 return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 00252 } 00253 } 00254 } 00255 } 00256 00257 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 00258 if (!EmitLineNumbers) 00259 return; 00260 if (ignoreLoc(MI)) 00261 return; 00262 00263 DebugLoc curLoc = MI.getDebugLoc(); 00264 00265 if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 00266 return; 00267 00268 if (prevDebugLoc == curLoc) 00269 return; 00270 00271 prevDebugLoc = curLoc; 00272 00273 if (curLoc.isUnknown()) 00274 return; 00275 00276 const MachineFunction *MF = MI.getParent()->getParent(); 00277 //const TargetMachine &TM = MF->getTarget(); 00278 00279 const LLVMContext &ctx = MF->getFunction()->getContext(); 00280 DIScope Scope(curLoc.getScope(ctx)); 00281 00282 if (!Scope.Verify()) 00283 return; 00284 00285 StringRef fileName(Scope.getFilename()); 00286 StringRef dirName(Scope.getDirectory()); 00287 SmallString<128> FullPathName = dirName; 00288 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 00289 sys::path::append(FullPathName, fileName); 00290 fileName = FullPathName.str(); 00291 } 00292 00293 if (filenameMap.find(fileName.str()) == filenameMap.end()) 00294 return; 00295 00296 // Emit the line from the source file. 00297 if (llvm::InterleaveSrcInPtx) 00298 this->emitSrcInText(fileName.str(), curLoc.getLine()); 00299 00300 std::stringstream temp; 00301 temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() 00302 << " " << curLoc.getCol(); 00303 OutStreamer.EmitRawText(Twine(temp.str().c_str())); 00304 } 00305 00306 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 00307 SmallString<128> Str; 00308 raw_svector_ostream OS(Str); 00309 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 00310 emitLineNumberAsDotLoc(*MI); 00311 printInstruction(MI, OS); 00312 OutStreamer.EmitRawText(OS.str()); 00313 } 00314 00315 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 00316 const DataLayout *TD = TM.getDataLayout(); 00317 const TargetLowering *TLI = TM.getTargetLowering(); 00318 00319 Type *Ty = F->getReturnType(); 00320 00321 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 00322 00323 if (Ty->getTypeID() == Type::VoidTyID) 00324 return; 00325 00326 O << " ("; 00327 00328 if (isABI) { 00329 if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { 00330 unsigned size = 0; 00331 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 00332 size = ITy->getBitWidth(); 00333 if (size < 32) 00334 size = 32; 00335 } else { 00336 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 00337 size = Ty->getPrimitiveSizeInBits(); 00338 } 00339 00340 O << ".param .b" << size << " func_retval0"; 00341 } else if (isa<PointerType>(Ty)) { 00342 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 00343 << " func_retval0"; 00344 } else { 00345 if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 00346 SmallVector<EVT, 16> vtparts; 00347 ComputeValueVTs(*TLI, Ty, vtparts); 00348 unsigned totalsz = 0; 00349 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 00350 unsigned elems = 1; 00351 EVT elemtype = vtparts[i]; 00352 if (vtparts[i].isVector()) { 00353 elems = vtparts[i].getVectorNumElements(); 00354 elemtype = vtparts[i].getVectorElementType(); 00355 } 00356 for (unsigned j = 0, je = elems; j != je; ++j) { 00357 unsigned sz = elemtype.getSizeInBits(); 00358 if (elemtype.isInteger() && (sz < 8)) 00359 sz = 8; 00360 totalsz += sz / 8; 00361 } 00362 } 00363 unsigned retAlignment = 0; 00364 if (!llvm::getAlign(*F, 0, retAlignment)) 00365 retAlignment = TD->getABITypeAlignment(Ty); 00366 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 00367 << "]"; 00368 } else 00369 assert(false && "Unknown return type"); 00370 } 00371 } else { 00372 SmallVector<EVT, 16> vtparts; 00373 ComputeValueVTs(*TLI, Ty, vtparts); 00374 unsigned idx = 0; 00375 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 00376 unsigned elems = 1; 00377 EVT elemtype = vtparts[i]; 00378 if (vtparts[i].isVector()) { 00379 elems = vtparts[i].getVectorNumElements(); 00380 elemtype = vtparts[i].getVectorElementType(); 00381 } 00382 00383 for (unsigned j = 0, je = elems; j != je; ++j) { 00384 unsigned sz = elemtype.getSizeInBits(); 00385 if (elemtype.isInteger() && (sz < 32)) 00386 sz = 32; 00387 O << ".reg .b" << sz << " func_retval" << idx; 00388 if (j < je - 1) 00389 O << ", "; 00390 ++idx; 00391 } 00392 if (i < e - 1) 00393 O << ", "; 00394 } 00395 } 00396 O << ") "; 00397 return; 00398 } 00399 00400 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 00401 raw_ostream &O) { 00402 const Function *F = MF.getFunction(); 00403 printReturnValStr(F, O); 00404 } 00405 00406 void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 00407 SmallString<128> Str; 00408 raw_svector_ostream O(Str); 00409 00410 if (!GlobalsEmitted) { 00411 emitGlobals(*MF->getFunction()->getParent()); 00412 GlobalsEmitted = true; 00413 } 00414 00415 // Set up 00416 MRI = &MF->getRegInfo(); 00417 F = MF->getFunction(); 00418 emitLinkageDirective(F, O); 00419 if (llvm::isKernelFunction(*F)) 00420 O << ".entry "; 00421 else { 00422 O << ".func "; 00423 printReturnValStr(*MF, O); 00424 } 00425 00426 O << *CurrentFnSym; 00427 00428 emitFunctionParamList(*MF, O); 00429 00430 if (llvm::isKernelFunction(*F)) 00431 emitKernelFunctionDirectives(*F, O); 00432 00433 OutStreamer.EmitRawText(O.str()); 00434 00435 prevDebugLoc = DebugLoc(); 00436 } 00437 00438 void NVPTXAsmPrinter::EmitFunctionBodyStart() { 00439 const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); 00440 unsigned numRegClasses = TRI.getNumRegClasses(); 00441 VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1]; 00442 OutStreamer.EmitRawText(StringRef("{\n")); 00443 setAndEmitFunctionVirtualRegisters(*MF); 00444 00445 SmallString<128> Str; 00446 raw_svector_ostream O(Str); 00447 emitDemotedVars(MF->getFunction(), O); 00448 OutStreamer.EmitRawText(O.str()); 00449 } 00450 00451 void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 00452 OutStreamer.EmitRawText(StringRef("}\n")); 00453 delete[] VRidGlobal2LocalMap; 00454 } 00455 00456 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 00457 raw_ostream &O) const { 00458 // If the NVVM IR has some of reqntid* specified, then output 00459 // the reqntid directive, and set the unspecified ones to 1. 00460 // If none of reqntid* is specified, don't output reqntid directive. 00461 unsigned reqntidx, reqntidy, reqntidz; 00462 bool specified = false; 00463 if (llvm::getReqNTIDx(F, reqntidx) == false) 00464 reqntidx = 1; 00465 else 00466 specified = true; 00467 if (llvm::getReqNTIDy(F, reqntidy) == false) 00468 reqntidy = 1; 00469 else 00470 specified = true; 00471 if (llvm::getReqNTIDz(F, reqntidz) == false) 00472 reqntidz = 1; 00473 else 00474 specified = true; 00475 00476 if (specified) 00477 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 00478 << "\n"; 00479 00480 // If the NVVM IR has some of maxntid* specified, then output 00481 // the maxntid directive, and set the unspecified ones to 1. 00482 // If none of maxntid* is specified, don't output maxntid directive. 00483 unsigned maxntidx, maxntidy, maxntidz; 00484 specified = false; 00485 if (llvm::getMaxNTIDx(F, maxntidx) == false) 00486 maxntidx = 1; 00487 else 00488 specified = true; 00489 if (llvm::getMaxNTIDy(F, maxntidy) == false) 00490 maxntidy = 1; 00491 else 00492 specified = true; 00493 if (llvm::getMaxNTIDz(F, maxntidz) == false) 00494 maxntidz = 1; 00495 else 00496 specified = true; 00497 00498 if (specified) 00499 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 00500 << "\n"; 00501 00502 unsigned mincta; 00503 if (llvm::getMinCTASm(F, mincta)) 00504 O << ".minnctapersm " << mincta << "\n"; 00505 } 00506 00507 void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, 00508 raw_ostream &O) { 00509 const TargetRegisterClass *RC = MRI->getRegClass(vr); 00510 unsigned id = RC->getID(); 00511 00512 std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id]; 00513 unsigned mapped_vr = regmap[vr]; 00514 00515 if (!isVec) { 00516 O << getNVPTXRegClassStr(RC) << mapped_vr; 00517 return; 00518 } 00519 report_fatal_error("Bad register!"); 00520 } 00521 00522 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, 00523 raw_ostream &O) { 00524 getVirtualRegisterName(vr, isVec, O); 00525 } 00526 00527 void NVPTXAsmPrinter::printVecModifiedImmediate( 00528 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 00529 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 00530 int Imm = (int) MO.getImm(); 00531 if (0 == strcmp(Modifier, "vecelem")) 00532 O << "_" << vecelem[Imm]; 00533 else if (0 == strcmp(Modifier, "vecv4comm1")) { 00534 if ((Imm < 0) || (Imm > 3)) 00535 O << "//"; 00536 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 00537 if ((Imm < 4) || (Imm > 7)) 00538 O << "//"; 00539 } else if (0 == strcmp(Modifier, "vecv4pos")) { 00540 if (Imm < 0) 00541 Imm = 0; 00542 O << "_" << vecelem[Imm % 4]; 00543 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 00544 if ((Imm < 0) || (Imm > 1)) 00545 O << "//"; 00546 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 00547 if ((Imm < 2) || (Imm > 3)) 00548 O << "//"; 00549 } else if (0 == strcmp(Modifier, "vecv2pos")) { 00550 if (Imm < 0) 00551 Imm = 0; 00552 O << "_" << vecelem[Imm % 2]; 00553 } else 00554 llvm_unreachable("Unknown Modifier on immediate operand"); 00555 } 00556 00557 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 00558 raw_ostream &O, const char *Modifier) { 00559 const MachineOperand &MO = MI->getOperand(opNum); 00560 switch (MO.getType()) { 00561 case MachineOperand::MO_Register: 00562 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 00563 if (MO.getReg() == NVPTX::VRDepot) 00564 O << DEPOTNAME << getFunctionNumber(); 00565 else 00566 O << getRegisterName(MO.getReg()); 00567 } else { 00568 if (!Modifier) 00569 emitVirtualRegister(MO.getReg(), false, O); 00570 else { 00571 if (strcmp(Modifier, "vecfull") == 0) 00572 emitVirtualRegister(MO.getReg(), true, O); 00573 else 00574 llvm_unreachable( 00575 "Don't know how to handle the modifier on virtual register."); 00576 } 00577 } 00578 return; 00579 00580 case MachineOperand::MO_Immediate: 00581 if (!Modifier) 00582 O << MO.getImm(); 00583 else if (strstr(Modifier, "vec") == Modifier) 00584 printVecModifiedImmediate(MO, Modifier, O); 00585 else 00586 llvm_unreachable( 00587 "Don't know how to handle modifier on immediate operand"); 00588 return; 00589 00590 case MachineOperand::MO_FPImmediate: 00591 printFPConstant(MO.getFPImm(), O); 00592 break; 00593 00594 case MachineOperand::MO_GlobalAddress: 00595 O << *Mang->getSymbol(MO.getGlobal()); 00596 break; 00597 00598 case MachineOperand::MO_ExternalSymbol: { 00599 const char *symbname = MO.getSymbolName(); 00600 if (strstr(symbname, ".PARAM") == symbname) { 00601 unsigned index; 00602 sscanf(symbname + 6, "%u[];", &index); 00603 printParamName(index, O); 00604 } else if (strstr(symbname, ".HLPPARAM") == symbname) { 00605 unsigned index; 00606 sscanf(symbname + 9, "%u[];", &index); 00607 O << *CurrentFnSym << "_param_" << index << "_offset"; 00608 } else 00609 O << symbname; 00610 break; 00611 } 00612 00613 case MachineOperand::MO_MachineBasicBlock: 00614 O << *MO.getMBB()->getSymbol(); 00615 return; 00616 00617 default: 00618 llvm_unreachable("Operand type not supported."); 00619 } 00620 } 00621 00622 void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI, 00623 raw_ostream &O) const { 00624 #ifndef __OPTIMIZE__ 00625 O << "\t// Implicit def :"; 00626 //printOperand(MI, 0); 00627 O << "\n"; 00628 #endif 00629 } 00630 00631 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 00632 raw_ostream &O, const char *Modifier) { 00633 printOperand(MI, opNum, O); 00634 00635 if (Modifier && !strcmp(Modifier, "add")) { 00636 O << ", "; 00637 printOperand(MI, opNum + 1, O); 00638 } else { 00639 if (MI->getOperand(opNum + 1).isImm() && 00640 MI->getOperand(opNum + 1).getImm() == 0) 00641 return; // don't print ',0' or '+0' 00642 O << "+"; 00643 printOperand(MI, opNum + 1, O); 00644 } 00645 } 00646 00647 void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, 00648 raw_ostream &O, const char *Modifier) { 00649 if (Modifier) { 00650 const MachineOperand &MO = MI->getOperand(opNum); 00651 int Imm = (int) MO.getImm(); 00652 if (!strcmp(Modifier, "volatile")) { 00653 if (Imm) 00654 O << ".volatile"; 00655 } else if (!strcmp(Modifier, "addsp")) { 00656 switch (Imm) { 00657 case NVPTX::PTXLdStInstCode::GLOBAL: 00658 O << ".global"; 00659 break; 00660 case NVPTX::PTXLdStInstCode::SHARED: 00661 O << ".shared"; 00662 break; 00663 case NVPTX::PTXLdStInstCode::LOCAL: 00664 O << ".local"; 00665 break; 00666 case NVPTX::PTXLdStInstCode::PARAM: 00667 O << ".param"; 00668 break; 00669 case NVPTX::PTXLdStInstCode::CONSTANT: 00670 O << ".const"; 00671 break; 00672 case NVPTX::PTXLdStInstCode::GENERIC: 00673 if (!nvptxSubtarget.hasGenericLdSt()) 00674 O << ".global"; 00675 break; 00676 default: 00677 llvm_unreachable("Wrong Address Space"); 00678 } 00679 } else if (!strcmp(Modifier, "sign")) { 00680 if (Imm == NVPTX::PTXLdStInstCode::Signed) 00681 O << "s"; 00682 else if (Imm == NVPTX::PTXLdStInstCode::Unsigned) 00683 O << "u"; 00684 else 00685 O << "f"; 00686 } else if (!strcmp(Modifier, "vec")) { 00687 if (Imm == NVPTX::PTXLdStInstCode::V2) 00688 O << ".v2"; 00689 else if (Imm == NVPTX::PTXLdStInstCode::V4) 00690 O << ".v4"; 00691 } else 00692 llvm_unreachable("Unknown Modifier"); 00693 } else 00694 llvm_unreachable("Empty Modifier"); 00695 } 00696 00697 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 00698 00699 emitLinkageDirective(F, O); 00700 if (llvm::isKernelFunction(*F)) 00701 O << ".entry "; 00702 else 00703 O << ".func "; 00704 printReturnValStr(F, O); 00705 O << *Mang->getSymbol(F) << "\n"; 00706 emitFunctionParamList(F, O); 00707 O << ";\n"; 00708 } 00709 00710 static bool usedInGlobalVarDef(const Constant *C) { 00711 if (!C) 00712 return false; 00713 00714 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 00715 if (GV->getName().str() == "llvm.used") 00716 return false; 00717 return true; 00718 } 00719 00720 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 00721 ui != ue; ++ui) { 00722 const Constant *C = dyn_cast<Constant>(*ui); 00723 if (usedInGlobalVarDef(C)) 00724 return true; 00725 } 00726 return false; 00727 } 00728 00729 static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 00730 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 00731 if (othergv->getName().str() == "llvm.used") 00732 return true; 00733 } 00734 00735 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 00736 if (instr->getParent() && instr->getParent()->getParent()) { 00737 const Function *curFunc = instr->getParent()->getParent(); 00738 if (oneFunc && (curFunc != oneFunc)) 00739 return false; 00740 oneFunc = curFunc; 00741 return true; 00742 } else 00743 return false; 00744 } 00745 00746 if (const MDNode *md = dyn_cast<MDNode>(U)) 00747 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 00748 (md->getName().str() == "llvm.dbg.sp"))) 00749 return true; 00750 00751 for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); 00752 ui != ue; ++ui) { 00753 if (usedInOneFunc(*ui, oneFunc) == false) 00754 return false; 00755 } 00756 return true; 00757 } 00758 00759 /* Find out if a global variable can be demoted to local scope. 00760 * Currently, this is valid for CUDA shared variables, which have local 00761 * scope and global lifetime. So the conditions to check are : 00762 * 1. Is the global variable in shared address space? 00763 * 2. Does it have internal linkage? 00764 * 3. Is the global variable referenced only in one function? 00765 */ 00766 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 00767 if (gv->hasInternalLinkage() == false) 00768 return false; 00769 const PointerType *Pty = gv->getType(); 00770 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 00771 return false; 00772 00773 const Function *oneFunc = 0; 00774 00775 bool flag = usedInOneFunc(gv, oneFunc); 00776 if (flag == false) 00777 return false; 00778 if (!oneFunc) 00779 return false; 00780 f = oneFunc; 00781 return true; 00782 } 00783 00784 static bool useFuncSeen(const Constant *C, 00785 llvm::DenseMap<const Function *, bool> &seenMap) { 00786 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 00787 ui != ue; ++ui) { 00788 if (const Constant *cu = dyn_cast<Constant>(*ui)) { 00789 if (useFuncSeen(cu, seenMap)) 00790 return true; 00791 } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { 00792 const BasicBlock *bb = I->getParent(); 00793 if (!bb) 00794 continue; 00795 const Function *caller = bb->getParent(); 00796 if (!caller) 00797 continue; 00798 if (seenMap.find(caller) != seenMap.end()) 00799 return true; 00800 } 00801 } 00802 return false; 00803 } 00804 00805 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 00806 llvm::DenseMap<const Function *, bool> seenMap; 00807 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 00808 const Function *F = FI; 00809 00810 if (F->isDeclaration()) { 00811 if (F->use_empty()) 00812 continue; 00813 if (F->getIntrinsicID()) 00814 continue; 00815 emitDeclaration(F, O); 00816 continue; 00817 } 00818 for (Value::const_use_iterator iter = F->use_begin(), 00819 iterEnd = F->use_end(); 00820 iter != iterEnd; ++iter) { 00821 if (const Constant *C = dyn_cast<Constant>(*iter)) { 00822 if (usedInGlobalVarDef(C)) { 00823 // The use is in the initialization of a global variable 00824 // that is a function pointer, so print a declaration 00825 // for the original function 00826 emitDeclaration(F, O); 00827 break; 00828 } 00829 // Emit a declaration of this function if the function that 00830 // uses this constant expr has already been seen. 00831 if (useFuncSeen(C, seenMap)) { 00832 emitDeclaration(F, O); 00833 break; 00834 } 00835 } 00836 00837 if (!isa<Instruction>(*iter)) 00838 continue; 00839 const Instruction *instr = cast<Instruction>(*iter); 00840 const BasicBlock *bb = instr->getParent(); 00841 if (!bb) 00842 continue; 00843 const Function *caller = bb->getParent(); 00844 if (!caller) 00845 continue; 00846 00847 // If a caller has already been seen, then the caller is 00848 // appearing in the module before the callee. so print out 00849 // a declaration for the callee. 00850 if (seenMap.find(caller) != seenMap.end()) { 00851 emitDeclaration(F, O); 00852 break; 00853 } 00854 } 00855 seenMap[F] = true; 00856 } 00857 } 00858 00859 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 00860 DebugInfoFinder DbgFinder; 00861 DbgFinder.processModule(M); 00862 00863 unsigned i = 1; 00864 for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), 00865 E = DbgFinder.compile_unit_end(); 00866 I != E; ++I) { 00867 DICompileUnit DIUnit(*I); 00868 StringRef Filename(DIUnit.getFilename()); 00869 StringRef Dirname(DIUnit.getDirectory()); 00870 SmallString<128> FullPathName = Dirname; 00871 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 00872 sys::path::append(FullPathName, Filename); 00873 Filename = FullPathName.str(); 00874 } 00875 if (filenameMap.find(Filename.str()) != filenameMap.end()) 00876 continue; 00877 filenameMap[Filename.str()] = i; 00878 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 00879 ++i; 00880 } 00881 00882 for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), 00883 E = DbgFinder.subprogram_end(); 00884 I != E; ++I) { 00885 DISubprogram SP(*I); 00886 StringRef Filename(SP.getFilename()); 00887 StringRef Dirname(SP.getDirectory()); 00888 SmallString<128> FullPathName = Dirname; 00889 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 00890 sys::path::append(FullPathName, Filename); 00891 Filename = FullPathName.str(); 00892 } 00893 if (filenameMap.find(Filename.str()) != filenameMap.end()) 00894 continue; 00895 filenameMap[Filename.str()] = i; 00896 ++i; 00897 } 00898 } 00899 00900 bool NVPTXAsmPrinter::doInitialization(Module &M) { 00901 00902 SmallString<128> Str1; 00903 raw_svector_ostream OS1(Str1); 00904 00905 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 00906 MMI->AnalyzeModule(M); 00907 00908 // We need to call the parent's one explicitly. 00909 //bool Result = AsmPrinter::doInitialization(M); 00910 00911 // Initialize TargetLoweringObjectFile. 00912 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 00913 .Initialize(OutContext, TM); 00914 00915 Mang = new Mangler(OutContext, *TM.getDataLayout()); 00916 00917 // Emit header before any dwarf directives are emitted below. 00918 emitHeader(M, OS1); 00919 OutStreamer.EmitRawText(OS1.str()); 00920 00921 // Already commented out 00922 //bool Result = AsmPrinter::doInitialization(M); 00923 00924 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 00925 recordAndEmitFilenames(M); 00926 00927 GlobalsEmitted = false; 00928 00929 return false; // success 00930 } 00931 00932 void NVPTXAsmPrinter::emitGlobals(const Module &M) { 00933 SmallString<128> Str2; 00934 raw_svector_ostream OS2(Str2); 00935 00936 emitDeclarations(M, OS2); 00937 00938 // As ptxas does not support forward references of globals, we need to first 00939 // sort the list of module-level globals in def-use order. We visit each 00940 // global variable in order, and ensure that we emit it *after* its dependent 00941 // globals. We use a little extra memory maintaining both a set and a list to 00942 // have fast searches while maintaining a strict ordering. 00943 SmallVector<const GlobalVariable *, 8> Globals; 00944 DenseSet<const GlobalVariable *> GVVisited; 00945 DenseSet<const GlobalVariable *> GVVisiting; 00946 00947 // Visit each global variable, in order 00948 for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 00949 I != E; ++I) 00950 VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); 00951 00952 assert(GVVisited.size() == M.getGlobalList().size() && 00953 "Missed a global variable"); 00954 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 00955 00956 // Print out module-level global variables in proper order 00957 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 00958 printModuleLevelGV(Globals[i], OS2); 00959 00960 OS2 << '\n'; 00961 00962 OutStreamer.EmitRawText(OS2.str()); 00963 } 00964 00965 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { 00966 O << "//\n"; 00967 O << "// Generated by LLVM NVPTX Back-End\n"; 00968 O << "//\n"; 00969 O << "\n"; 00970 00971 unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); 00972 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 00973 00974 O << ".target "; 00975 O << nvptxSubtarget.getTargetName(); 00976 00977 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 00978 O << ", texmode_independent"; 00979 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 00980 if (!nvptxSubtarget.hasDouble()) 00981 O << ", map_f64_to_f32"; 00982 } 00983 00984 if (MAI->doesSupportDebugInformation()) 00985 O << ", debug"; 00986 00987 O << "\n"; 00988 00989 O << ".address_size "; 00990 if (nvptxSubtarget.is64Bit()) 00991 O << "64"; 00992 else 00993 O << "32"; 00994 O << "\n"; 00995 00996 O << "\n"; 00997 } 00998 00999 bool NVPTXAsmPrinter::doFinalization(Module &M) { 01000 01001 // If we did not emit any functions, then the global declarations have not 01002 // yet been emitted. 01003 if (!GlobalsEmitted) { 01004 emitGlobals(M); 01005 GlobalsEmitted = true; 01006 } 01007 01008 // XXX Temproarily remove global variables so that doFinalization() will not 01009 // emit them again (global variables are emitted at beginning). 01010 01011 Module::GlobalListType &global_list = M.getGlobalList(); 01012 int i, n = global_list.size(); 01013 GlobalVariable **gv_array = new GlobalVariable *[n]; 01014 01015 // first, back-up GlobalVariable in gv_array 01016 i = 0; 01017 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 01018 I != E; ++I) 01019 gv_array[i++] = &*I; 01020 01021 // second, empty global_list 01022 while (!global_list.empty()) 01023 global_list.remove(global_list.begin()); 01024 01025 // call doFinalization 01026 bool ret = AsmPrinter::doFinalization(M); 01027 01028 // now we restore global variables 01029 for (i = 0; i < n; i++) 01030 global_list.insert(global_list.end(), gv_array[i]); 01031 01032 delete[] gv_array; 01033 return ret; 01034 01035 //bool Result = AsmPrinter::doFinalization(M); 01036 // Instead of calling the parents doFinalization, we may 01037 // clone parents doFinalization and customize here. 01038 // Currently, we if NVISA out the EmitGlobals() in 01039 // parent's doFinalization, which is too intrusive. 01040 // 01041 // Same for the doInitialization. 01042 //return Result; 01043 } 01044 01045 // This function emits appropriate linkage directives for 01046 // functions and global variables. 01047 // 01048 // extern function declaration -> .extern 01049 // extern function definition -> .visible 01050 // external global variable with init -> .visible 01051 // external without init -> .extern 01052 // appending -> not allowed, assert. 01053 01054 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 01055 raw_ostream &O) { 01056 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 01057 if (V->hasExternalLinkage()) { 01058 if (isa<GlobalVariable>(V)) { 01059 const GlobalVariable *GVar = cast<GlobalVariable>(V); 01060 if (GVar) { 01061 if (GVar->hasInitializer()) 01062 O << ".visible "; 01063 else 01064 O << ".extern "; 01065 } 01066 } else if (V->isDeclaration()) 01067 O << ".extern "; 01068 else 01069 O << ".visible "; 01070 } else if (V->hasAppendingLinkage()) { 01071 std::string msg; 01072 msg.append("Error: "); 01073 msg.append("Symbol "); 01074 if (V->hasName()) 01075 msg.append(V->getName().str()); 01076 msg.append("has unsupported appending linkage type"); 01077 llvm_unreachable(msg.c_str()); 01078 } 01079 } 01080 } 01081 01082 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 01083 raw_ostream &O, 01084 bool processDemoted) { 01085 01086 // Skip meta data 01087 if (GVar->hasSection()) { 01088 if (GVar->getSection() == "llvm.metadata") 01089 return; 01090 } 01091 01092 const DataLayout *TD = TM.getDataLayout(); 01093 01094 // GlobalVariables are always constant pointers themselves. 01095 const PointerType *PTy = GVar->getType(); 01096 Type *ETy = PTy->getElementType(); 01097 01098 if (GVar->hasExternalLinkage()) { 01099 if (GVar->hasInitializer()) 01100 O << ".visible "; 01101 else 01102 O << ".extern "; 01103 } 01104 01105 if (llvm::isTexture(*GVar)) { 01106 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 01107 return; 01108 } 01109 01110 if (llvm::isSurface(*GVar)) { 01111 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 01112 return; 01113 } 01114 01115 if (GVar->isDeclaration()) { 01116 // (extern) declarations, no definition or initializer 01117 // Currently the only known declaration is for an automatic __local 01118 // (.shared) promoted to global. 01119 emitPTXGlobalVariable(GVar, O); 01120 O << ";\n"; 01121 return; 01122 } 01123 01124 if (llvm::isSampler(*GVar)) { 01125 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 01126 01127 const Constant *Initializer = NULL; 01128 if (GVar->hasInitializer()) 01129 Initializer = GVar->getInitializer(); 01130 const ConstantInt *CI = NULL; 01131 if (Initializer) 01132 CI = dyn_cast<ConstantInt>(Initializer); 01133 if (CI) { 01134 unsigned sample = CI->getZExtValue(); 01135 01136 O << " = { "; 01137 01138 for (int i = 0, 01139 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 01140 i < 3; i++) { 01141 O << "addr_mode_" << i << " = "; 01142 switch (addr) { 01143 case 0: 01144 O << "wrap"; 01145 break; 01146 case 1: 01147 O << "clamp_to_border"; 01148 break; 01149 case 2: 01150 O << "clamp_to_edge"; 01151 break; 01152 case 3: 01153 O << "wrap"; 01154 break; 01155 case 4: 01156 O << "mirror"; 01157 break; 01158 } 01159 O << ", "; 01160 } 01161 O << "filter_mode = "; 01162 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 01163 case 0: 01164 O << "nearest"; 01165 break; 01166 case 1: 01167 O << "linear"; 01168 break; 01169 case 2: 01170 assert(0 && "Anisotropic filtering is not supported"); 01171 default: 01172 O << "nearest"; 01173 break; 01174 } 01175 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 01176 O << ", force_unnormalized_coords = 1"; 01177 } 01178 O << " }"; 01179 } 01180 01181 O << ";\n"; 01182 return; 01183 } 01184 01185 if (GVar->hasPrivateLinkage()) { 01186 01187 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 01188 return; 01189 01190 // FIXME - need better way (e.g. Metadata) to avoid generating this global 01191 if (!strncmp(GVar->getName().data(), "filename", 8)) 01192 return; 01193 if (GVar->use_empty()) 01194 return; 01195 } 01196 01197 const Function *demotedFunc = 0; 01198 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 01199 O << "// " << GVar->getName().str() << " has been demoted\n"; 01200 if (localDecls.find(demotedFunc) != localDecls.end()) 01201 localDecls[demotedFunc].push_back(GVar); 01202 else { 01203 std::vector<const GlobalVariable *> temp; 01204 temp.push_back(GVar); 01205 localDecls[demotedFunc] = temp; 01206 } 01207 return; 01208 } 01209 01210 O << "."; 01211 emitPTXAddressSpace(PTy->getAddressSpace(), O); 01212 if (GVar->getAlignment() == 0) 01213 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 01214 else 01215 O << " .align " << GVar->getAlignment(); 01216 01217 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 01218 O << " ."; 01219 // Special case: ABI requires that we use .u8 for predicates 01220 if (ETy->isIntegerTy(1)) 01221 O << "u8"; 01222 else 01223 O << getPTXFundamentalTypeStr(ETy, false); 01224 O << " "; 01225 O << *Mang->getSymbol(GVar); 01226 01227 // Ptx allows variable initilization only for constant and global state 01228 // spaces. 01229 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 01230 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || 01231 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 01232 GVar->hasInitializer()) { 01233 const Constant *Initializer = GVar->getInitializer(); 01234 if (!Initializer->isNullValue()) { 01235 O << " = "; 01236 printScalarConstant(Initializer, O); 01237 } 01238 } 01239 } else { 01240 unsigned int ElementSize = 0; 01241 01242 // Although PTX has direct support for struct type and array type and 01243 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 01244 // targets that support these high level field accesses. Structs, arrays 01245 // and vectors are lowered into arrays of bytes. 01246 switch (ETy->getTypeID()) { 01247 case Type::StructTyID: 01248 case Type::ArrayTyID: 01249 case Type::VectorTyID: 01250 ElementSize = TD->getTypeStoreSize(ETy); 01251 // Ptx allows variable initilization only for constant and 01252 // global state spaces. 01253 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 01254 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || 01255 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 01256 GVar->hasInitializer()) { 01257 const Constant *Initializer = GVar->getInitializer(); 01258 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 01259 AggBuffer aggBuffer(ElementSize, O, *this); 01260 bufferAggregateConstant(Initializer, &aggBuffer); 01261 if (aggBuffer.numSymbols) { 01262 if (nvptxSubtarget.is64Bit()) { 01263 O << " .u64 " << *Mang->getSymbol(GVar) << "["; 01264 O << ElementSize / 8; 01265 } else { 01266 O << " .u32 " << *Mang->getSymbol(GVar) << "["; 01267 O << ElementSize / 4; 01268 } 01269 O << "]"; 01270 } else { 01271 O << " .b8 " << *Mang->getSymbol(GVar) << "["; 01272 O << ElementSize; 01273 O << "]"; 01274 } 01275 O << " = {"; 01276 aggBuffer.print(); 01277 O << "}"; 01278 } else { 01279 O << " .b8 " << *Mang->getSymbol(GVar); 01280 if (ElementSize) { 01281 O << "["; 01282 O << ElementSize; 01283 O << "]"; 01284 } 01285 } 01286 } else { 01287 O << " .b8 " << *Mang->getSymbol(GVar); 01288 if (ElementSize) { 01289 O << "["; 01290 O << ElementSize; 01291 O << "]"; 01292 } 01293 } 01294 break; 01295 default: 01296 assert(0 && "type not supported yet"); 01297 } 01298 01299 } 01300 O << ";\n"; 01301 } 01302 01303 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 01304 if (localDecls.find(f) == localDecls.end()) 01305 return; 01306 01307 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 01308 01309 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 01310 O << "\t// demoted variable\n\t"; 01311 printModuleLevelGV(gvars[i], O, true); 01312 } 01313 } 01314 01315 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 01316 raw_ostream &O) const { 01317 switch (AddressSpace) { 01318 case llvm::ADDRESS_SPACE_LOCAL: 01319 O << "local"; 01320 break; 01321 case llvm::ADDRESS_SPACE_GLOBAL: 01322 O << "global"; 01323 break; 01324 case llvm::ADDRESS_SPACE_CONST: 01325 // This logic should be consistent with that in 01326 // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) 01327 if (nvptxSubtarget.hasGenericLdSt()) 01328 O << "global"; 01329 else 01330 O << "const"; 01331 break; 01332 case llvm::ADDRESS_SPACE_CONST_NOT_GEN: 01333 O << "const"; 01334 break; 01335 case llvm::ADDRESS_SPACE_SHARED: 01336 O << "shared"; 01337 break; 01338 default: 01339 report_fatal_error("Bad address space found while emitting PTX"); 01340 break; 01341 } 01342 } 01343 01344 std::string 01345 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { 01346 switch (Ty->getTypeID()) { 01347 default: 01348 llvm_unreachable("unexpected type"); 01349 break; 01350 case Type::IntegerTyID: { 01351 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 01352 if (NumBits == 1) 01353 return "pred"; 01354 else if (NumBits <= 64) { 01355 std::string name = "u"; 01356 return name + utostr(NumBits); 01357 } else { 01358 llvm_unreachable("Integer too large"); 01359 break; 01360 } 01361 break; 01362 } 01363 case Type::FloatTyID: 01364 return "f32"; 01365 case Type::DoubleTyID: 01366 return "f64"; 01367 case Type::PointerTyID: 01368 if (nvptxSubtarget.is64Bit()) 01369 if (useB4PTR) 01370 return "b64"; 01371 else 01372 return "u64"; 01373 else if (useB4PTR) 01374 return "b32"; 01375 else 01376 return "u32"; 01377 } 01378 llvm_unreachable("unexpected type"); 01379 return NULL; 01380 } 01381 01382 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 01383 raw_ostream &O) { 01384 01385 const DataLayout *TD = TM.getDataLayout(); 01386 01387 // GlobalVariables are always constant pointers themselves. 01388 const PointerType *PTy = GVar->getType(); 01389 Type *ETy = PTy->getElementType(); 01390 01391 O << "."; 01392 emitPTXAddressSpace(PTy->getAddressSpace(), O); 01393 if (GVar->getAlignment() == 0) 01394 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 01395 else 01396 O << " .align " << GVar->getAlignment(); 01397 01398 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 01399 O << " ."; 01400 O << getPTXFundamentalTypeStr(ETy); 01401 O << " "; 01402 O << *Mang->getSymbol(GVar); 01403 return; 01404 } 01405 01406 int64_t ElementSize = 0; 01407 01408 // Although PTX has direct support for struct type and array type and LLVM IR 01409 // is very similar to PTX, the LLVM CodeGen does not support for targets that 01410 // support these high level field accesses. Structs and arrays are lowered 01411 // into arrays of bytes. 01412 switch (ETy->getTypeID()) { 01413 case Type::StructTyID: 01414 case Type::ArrayTyID: 01415 case Type::VectorTyID: 01416 ElementSize = TD->getTypeStoreSize(ETy); 01417 O << " .b8 " << *Mang->getSymbol(GVar) << "["; 01418 if (ElementSize) { 01419 O << itostr(ElementSize); 01420 } 01421 O << "]"; 01422 break; 01423 default: 01424 assert(0 && "type not supported yet"); 01425 } 01426 return; 01427 } 01428 01429 static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { 01430 if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) 01431 return TD->getPrefTypeAlignment(Ty); 01432 01433 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 01434 if (ATy) 01435 return getOpenCLAlignment(TD, ATy->getElementType()); 01436 01437 const VectorType *VTy = dyn_cast<VectorType>(Ty); 01438 if (VTy) { 01439 Type *ETy = VTy->getElementType(); 01440 unsigned int numE = VTy->getNumElements(); 01441 unsigned int alignE = TD->getPrefTypeAlignment(ETy); 01442 if (numE == 3) 01443 return 4 * alignE; 01444 else 01445 return numE * alignE; 01446 } 01447 01448 const StructType *STy = dyn_cast<StructType>(Ty); 01449 if (STy) { 01450 unsigned int alignStruct = 1; 01451 // Go through each element of the struct and find the 01452 // largest alignment. 01453 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 01454 Type *ETy = STy->getElementType(i); 01455 unsigned int align = getOpenCLAlignment(TD, ETy); 01456 if (align > alignStruct) 01457 alignStruct = align; 01458 } 01459 return alignStruct; 01460 } 01461 01462 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 01463 if (FTy) 01464 return TD->getPointerPrefAlignment(); 01465 return TD->getPrefTypeAlignment(Ty); 01466 } 01467 01468 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 01469 int paramIndex, raw_ostream &O) { 01470 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 01471 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 01472 O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex; 01473 else { 01474 std::string argName = I->getName(); 01475 const char *p = argName.c_str(); 01476 while (*p) { 01477 if (*p == '.') 01478 O << "_"; 01479 else 01480 O << *p; 01481 p++; 01482 } 01483 } 01484 } 01485 01486 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 01487 Function::const_arg_iterator I, E; 01488 int i = 0; 01489 01490 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 01491 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 01492 O << *CurrentFnSym << "_param_" << paramIndex; 01493 return; 01494 } 01495 01496 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 01497 if (i == paramIndex) { 01498 printParamName(I, paramIndex, O); 01499 return; 01500 } 01501 } 01502 llvm_unreachable("paramIndex out of bound"); 01503 } 01504 01505 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 01506 const DataLayout *TD = TM.getDataLayout(); 01507 const AttributeSet &PAL = F->getAttributes(); 01508 const TargetLowering *TLI = TM.getTargetLowering(); 01509 Function::const_arg_iterator I, E; 01510 unsigned paramIndex = 0; 01511 bool first = true; 01512 bool isKernelFunc = llvm::isKernelFunction(*F); 01513 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 01514 MVT thePointerTy = TLI->getPointerTy(); 01515 01516 O << "(\n"; 01517 01518 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 01519 Type *Ty = I->getType(); 01520 01521 if (!first) 01522 O << ",\n"; 01523 01524 first = false; 01525 01526 // Handle image/sampler parameters 01527 if (llvm::isSampler(*I) || llvm::isImage(*I)) { 01528 if (llvm::isImage(*I)) { 01529 std::string sname = I->getName(); 01530 if (llvm::isImageWriteOnly(*I)) 01531 O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_" 01532 << paramIndex; 01533 else // Default image is read_only 01534 O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_" 01535 << paramIndex; 01536 } else // Should be llvm::isSampler(*I) 01537 O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_" 01538 << paramIndex; 01539 continue; 01540 } 01541 01542 if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { 01543 if (Ty->isVectorTy()) { 01544 // Just print .param .b8 .align <a> .param[size]; 01545 // <a> = PAL.getparamalignment 01546 // size = typeallocsize of element type 01547 unsigned align = PAL.getParamAlignment(paramIndex + 1); 01548 if (align == 0) 01549 align = TD->getABITypeAlignment(Ty); 01550 01551 unsigned sz = TD->getTypeAllocSize(Ty); 01552 O << "\t.param .align " << align << " .b8 "; 01553 printParamName(I, paramIndex, O); 01554 O << "[" << sz << "]"; 01555 01556 continue; 01557 } 01558 // Just a scalar 01559 const PointerType *PTy = dyn_cast<PointerType>(Ty); 01560 if (isKernelFunc) { 01561 if (PTy) { 01562 // Special handling for pointer arguments to kernel 01563 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 01564 01565 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 01566 Type *ETy = PTy->getElementType(); 01567 int addrSpace = PTy->getAddressSpace(); 01568 switch (addrSpace) { 01569 default: 01570 O << ".ptr "; 01571 break; 01572 case llvm::ADDRESS_SPACE_CONST_NOT_GEN: 01573 O << ".ptr .const "; 01574 break; 01575 case llvm::ADDRESS_SPACE_SHARED: 01576 O << ".ptr .shared "; 01577 break; 01578 case llvm::ADDRESS_SPACE_GLOBAL: 01579 case llvm::ADDRESS_SPACE_CONST: 01580 O << ".ptr .global "; 01581 break; 01582 } 01583 O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; 01584 } 01585 printParamName(I, paramIndex, O); 01586 continue; 01587 } 01588 01589 // non-pointer scalar to kernel func 01590 O << "\t.param ."; 01591 // Special case: predicate operands become .u8 types 01592 if (Ty->isIntegerTy(1)) 01593 O << "u8"; 01594 else 01595 O << getPTXFundamentalTypeStr(Ty); 01596 O << " "; 01597 printParamName(I, paramIndex, O); 01598 continue; 01599 } 01600 // Non-kernel function, just print .param .b<size> for ABI 01601 // and .reg .b<size> for non ABY 01602 unsigned sz = 0; 01603 if (isa<IntegerType>(Ty)) { 01604 sz = cast<IntegerType>(Ty)->getBitWidth(); 01605 if (sz < 32) 01606 sz = 32; 01607 } else if (isa<PointerType>(Ty)) 01608 sz = thePointerTy.getSizeInBits(); 01609 else 01610 sz = Ty->getPrimitiveSizeInBits(); 01611 if (isABI) 01612 O << "\t.param .b" << sz << " "; 01613 else 01614 O << "\t.reg .b" << sz << " "; 01615 printParamName(I, paramIndex, O); 01616 continue; 01617 } 01618 01619 // param has byVal attribute. So should be a pointer 01620 const PointerType *PTy = dyn_cast<PointerType>(Ty); 01621 assert(PTy && "Param with byval attribute should be a pointer type"); 01622 Type *ETy = PTy->getElementType(); 01623 01624 if (isABI || isKernelFunc) { 01625 // Just print .param .b8 .align <a> .param[size]; 01626 // <a> = PAL.getparamalignment 01627 // size = typeallocsize of element type 01628 unsigned align = PAL.getParamAlignment(paramIndex + 1); 01629 if (align == 0) 01630 align = TD->getABITypeAlignment(ETy); 01631 01632 unsigned sz = TD->getTypeAllocSize(ETy); 01633 O << "\t.param .align " << align << " .b8 "; 01634 printParamName(I, paramIndex, O); 01635 O << "[" << sz << "]"; 01636 continue; 01637 } else { 01638 // Split the ETy into constituent parts and 01639 // print .param .b<size> <name> for each part. 01640 // Further, if a part is vector, print the above for 01641 // each vector element. 01642 SmallVector<EVT, 16> vtparts; 01643 ComputeValueVTs(*TLI, ETy, vtparts); 01644 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 01645 unsigned elems = 1; 01646 EVT elemtype = vtparts[i]; 01647 if (vtparts[i].isVector()) { 01648 elems = vtparts[i].getVectorNumElements(); 01649 elemtype = vtparts[i].getVectorElementType(); 01650 } 01651 01652 for (unsigned j = 0, je = elems; j != je; ++j) { 01653 unsigned sz = elemtype.getSizeInBits(); 01654 if (elemtype.isInteger() && (sz < 32)) 01655 sz = 32; 01656 O << "\t.reg .b" << sz << " "; 01657 printParamName(I, paramIndex, O); 01658 if (j < je - 1) 01659 O << ",\n"; 01660 ++paramIndex; 01661 } 01662 if (i < e - 1) 01663 O << ",\n"; 01664 } 01665 --paramIndex; 01666 continue; 01667 } 01668 } 01669 01670 O << "\n)\n"; 01671 } 01672 01673 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 01674 raw_ostream &O) { 01675 const Function *F = MF.getFunction(); 01676 emitFunctionParamList(F, O); 01677 } 01678 01679 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 01680 const MachineFunction &MF) { 01681 SmallString<128> Str; 01682 raw_svector_ostream O(Str); 01683 01684 // Map the global virtual register number to a register class specific 01685 // virtual register number starting from 1 with that class. 01686 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 01687 //unsigned numRegClasses = TRI->getNumRegClasses(); 01688 01689 // Emit the Fake Stack Object 01690 const MachineFrameInfo *MFI = MF.getFrameInfo(); 01691 int NumBytes = (int) MFI->getStackSize(); 01692 if (NumBytes) { 01693 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 01694 << getFunctionNumber() << "[" << NumBytes << "];\n"; 01695 if (nvptxSubtarget.is64Bit()) { 01696 O << "\t.reg .b64 \t%SP;\n"; 01697 O << "\t.reg .b64 \t%SPL;\n"; 01698 } else { 01699 O << "\t.reg .b32 \t%SP;\n"; 01700 O << "\t.reg .b32 \t%SPL;\n"; 01701 } 01702 } 01703 01704 // Go through all virtual registers to establish the mapping between the 01705 // global virtual 01706 // register number and the per class virtual register number. 01707 // We use the per class virtual register number in the ptx output. 01708 unsigned int numVRs = MRI->getNumVirtRegs(); 01709 for (unsigned i = 0; i < numVRs; i++) { 01710 unsigned int vr = TRI->index2VirtReg(i); 01711 const TargetRegisterClass *RC = MRI->getRegClass(vr); 01712 std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()]; 01713 int n = regmap.size(); 01714 regmap.insert(std::make_pair(vr, n + 1)); 01715 } 01716 01717 // Emit register declarations 01718 // @TODO: Extract out the real register usage 01719 O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 01720 O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 01721 O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 01722 O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 01723 O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; 01724 O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 01725 O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; 01726 01727 // Emit declaration of the virtual registers or 'physical' registers for 01728 // each register class 01729 //for (unsigned i=0; i< numRegClasses; i++) { 01730 // std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[i]; 01731 // const TargetRegisterClass *RC = TRI->getRegClass(i); 01732 // std::string rcname = getNVPTXRegClassName(RC); 01733 // std::string rcStr = getNVPTXRegClassStr(RC); 01734 // //int n = regmap.size(); 01735 // if (!isNVPTXVectorRegClass(RC)) { 01736 // O << "\t.reg " << rcname << " \t" << rcStr << "<" 01737 // << NVPTXNumRegisters << ">;\n"; 01738 // } 01739 01740 // Only declare those registers that may be used. And do not emit vector 01741 // registers as 01742 // they are all elementized to scalar registers. 01743 //if (n && !isNVPTXVectorRegClass(RC)) { 01744 // if (RegAllocNilUsed) { 01745 // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 01746 // << ">;\n"; 01747 // } 01748 // else { 01749 // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr) 01750 // << "<" << 32 << ">;\n"; 01751 // } 01752 //} 01753 //} 01754 01755 OutStreamer.EmitRawText(O.str()); 01756 } 01757 01758 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 01759 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 01760 bool ignored; 01761 unsigned int numHex; 01762 const char *lead; 01763 01764 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 01765 numHex = 8; 01766 lead = "0f"; 01767 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 01768 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 01769 numHex = 16; 01770 lead = "0d"; 01771 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 01772 } else 01773 llvm_unreachable("unsupported fp type"); 01774 01775 APInt API = APF.bitcastToAPInt(); 01776 std::string hexstr(utohexstr(API.getZExtValue())); 01777 O << lead; 01778 if (hexstr.length() < numHex) 01779 O << std::string(numHex - hexstr.length(), '0'); 01780 O << utohexstr(API.getZExtValue()); 01781 } 01782 01783 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 01784 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 01785 O << CI->getValue(); 01786 return; 01787 } 01788 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 01789 printFPConstant(CFP, O); 01790 return; 01791 } 01792 if (isa<ConstantPointerNull>(CPV)) { 01793 O << "0"; 01794 return; 01795 } 01796 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 01797 O << *Mang->getSymbol(GVar); 01798 return; 01799 } 01800 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 01801 const Value *v = Cexpr->stripPointerCasts(); 01802 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 01803 O << *Mang->getSymbol(GVar); 01804 return; 01805 } else { 01806 O << *LowerConstant(CPV, *this); 01807 return; 01808 } 01809 } 01810 llvm_unreachable("Not scalar type found in printScalarConstant()"); 01811 } 01812 01813 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 01814 AggBuffer *aggBuffer) { 01815 01816 const DataLayout *TD = TM.getDataLayout(); 01817 01818 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 01819 int s = TD->getTypeAllocSize(CPV->getType()); 01820 if (s < Bytes) 01821 s = Bytes; 01822 aggBuffer->addZeros(s); 01823 return; 01824 } 01825 01826 unsigned char *ptr; 01827 switch (CPV->getType()->getTypeID()) { 01828 01829 case Type::IntegerTyID: { 01830 const Type *ETy = CPV->getType(); 01831 if (ETy == Type::getInt8Ty(CPV->getContext())) { 01832 unsigned char c = 01833 (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 01834 ptr = &c; 01835 aggBuffer->addBytes(ptr, 1, Bytes); 01836 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 01837 short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 01838 ptr = (unsigned char *)&int16; 01839 aggBuffer->addBytes(ptr, 2, Bytes); 01840 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 01841 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 01842 int int32 = (int)(constInt->getZExtValue()); 01843 ptr = (unsigned char *)&int32; 01844 aggBuffer->addBytes(ptr, 4, Bytes); 01845 break; 01846 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 01847 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 01848 ConstantFoldConstantExpression(Cexpr, TD))) { 01849 int int32 = (int)(constInt->getZExtValue()); 01850 ptr = (unsigned char *)&int32; 01851 aggBuffer->addBytes(ptr, 4, Bytes); 01852 break; 01853 } 01854 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 01855 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 01856 aggBuffer->addSymbol(v); 01857 aggBuffer->addZeros(4); 01858 break; 01859 } 01860 } 01861 llvm_unreachable("unsupported integer const type"); 01862 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 01863 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 01864 long long int64 = (long long)(constInt->getZExtValue()); 01865 ptr = (unsigned char *)&int64; 01866 aggBuffer->addBytes(ptr, 8, Bytes); 01867 break; 01868 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 01869 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 01870 ConstantFoldConstantExpression(Cexpr, TD))) { 01871 long long int64 = (long long)(constInt->getZExtValue()); 01872 ptr = (unsigned char *)&int64; 01873 aggBuffer->addBytes(ptr, 8, Bytes); 01874 break; 01875 } 01876 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 01877 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 01878 aggBuffer->addSymbol(v); 01879 aggBuffer->addZeros(8); 01880 break; 01881 } 01882 } 01883 llvm_unreachable("unsupported integer const type"); 01884 } else 01885 llvm_unreachable("unsupported integer const type"); 01886 break; 01887 } 01888 case Type::FloatTyID: 01889 case Type::DoubleTyID: { 01890 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 01891 const Type *Ty = CFP->getType(); 01892 if (Ty == Type::getFloatTy(CPV->getContext())) { 01893 float float32 = (float) CFP->getValueAPF().convertToFloat(); 01894 ptr = (unsigned char *)&float32; 01895 aggBuffer->addBytes(ptr, 4, Bytes); 01896 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 01897 double float64 = CFP->getValueAPF().convertToDouble(); 01898 ptr = (unsigned char *)&float64; 01899 aggBuffer->addBytes(ptr, 8, Bytes); 01900 } else { 01901 llvm_unreachable("unsupported fp const type"); 01902 } 01903 break; 01904 } 01905 case Type::PointerTyID: { 01906 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 01907 aggBuffer->addSymbol(GVar); 01908 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 01909 const Value *v = Cexpr->stripPointerCasts(); 01910 aggBuffer->addSymbol(v); 01911 } 01912 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 01913 aggBuffer->addZeros(s); 01914 break; 01915 } 01916 01917 case Type::ArrayTyID: 01918 case Type::VectorTyID: 01919 case Type::StructTyID: { 01920 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 01921 isa<ConstantStruct>(CPV)) { 01922 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 01923 bufferAggregateConstant(CPV, aggBuffer); 01924 if (Bytes > ElementSize) 01925 aggBuffer->addZeros(Bytes - ElementSize); 01926 } else if (isa<ConstantAggregateZero>(CPV)) 01927 aggBuffer->addZeros(Bytes); 01928 else 01929 llvm_unreachable("Unexpected Constant type"); 01930 break; 01931 } 01932 01933 default: 01934 llvm_unreachable("unsupported type"); 01935 } 01936 } 01937 01938 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 01939 AggBuffer *aggBuffer) { 01940 const DataLayout *TD = TM.getDataLayout(); 01941 int Bytes; 01942 01943 // Old constants 01944 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 01945 if (CPV->getNumOperands()) 01946 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 01947 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 01948 return; 01949 } 01950 01951 if (const ConstantDataSequential *CDS = 01952 dyn_cast<ConstantDataSequential>(CPV)) { 01953 if (CDS->getNumElements()) 01954 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 01955 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 01956 aggBuffer); 01957 return; 01958 } 01959 01960 if (isa<ConstantStruct>(CPV)) { 01961 if (CPV->getNumOperands()) { 01962 StructType *ST = cast<StructType>(CPV->getType()); 01963 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 01964 if (i == (e - 1)) 01965 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 01966 TD->getTypeAllocSize(ST) - 01967 TD->getStructLayout(ST)->getElementOffset(i); 01968 else 01969 Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - 01970 TD->getStructLayout(ST)->getElementOffset(i); 01971 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 01972 } 01973 } 01974 return; 01975 } 01976 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 01977 } 01978 01979 // buildTypeNameMap - Run through symbol table looking for type names. 01980 // 01981 01982 bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 01983 01984 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 01985 01986 if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || 01987 !PI->second.compare("struct._image2d_t") || 01988 !PI->second.compare("struct._image3d_t"))) 01989 return true; 01990 01991 return false; 01992 } 01993 01994 /// PrintAsmOperand - Print out an operand for an inline asm expression. 01995 /// 01996 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 01997 unsigned AsmVariant, 01998 const char *ExtraCode, raw_ostream &O) { 01999 if (ExtraCode && ExtraCode[0]) { 02000 if (ExtraCode[1] != 0) 02001 return true; // Unknown modifier. 02002 02003 switch (ExtraCode[0]) { 02004 default: 02005 // See if this is a generic print operand 02006 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 02007 case 'r': 02008 break; 02009 } 02010 } 02011 02012 printOperand(MI, OpNo, O); 02013 02014 return false; 02015 } 02016 02017 bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 02018 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 02019 const char *ExtraCode, raw_ostream &O) { 02020 if (ExtraCode && ExtraCode[0]) 02021 return true; // Unknown modifier 02022 02023 O << '['; 02024 printMemOperand(MI, OpNo, O); 02025 O << ']'; 02026 02027 return false; 02028 } 02029 02030 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 02031 switch (MI.getOpcode()) { 02032 default: 02033 return false; 02034 case NVPTX::CallArgBeginInst: 02035 case NVPTX::CallArgEndInst0: 02036 case NVPTX::CallArgEndInst1: 02037 case NVPTX::CallArgF32: 02038 case NVPTX::CallArgF64: 02039 case NVPTX::CallArgI16: 02040 case NVPTX::CallArgI32: 02041 case NVPTX::CallArgI32imm: 02042 case NVPTX::CallArgI64: 02043 case NVPTX::CallArgI8: 02044 case NVPTX::CallArgParam: 02045 case NVPTX::CallVoidInst: 02046 case NVPTX::CallVoidInstReg: 02047 case NVPTX::Callseq_End: 02048 case NVPTX::CallVoidInstReg64: 02049 case NVPTX::DeclareParamInst: 02050 case NVPTX::DeclareRetMemInst: 02051 case NVPTX::DeclareRetRegInst: 02052 case NVPTX::DeclareRetScalarInst: 02053 case NVPTX::DeclareScalarParamInst: 02054 case NVPTX::DeclareScalarRegInst: 02055 case NVPTX::StoreParamF32: 02056 case NVPTX::StoreParamF64: 02057 case NVPTX::StoreParamI16: 02058 case NVPTX::StoreParamI32: 02059 case NVPTX::StoreParamI64: 02060 case NVPTX::StoreParamI8: 02061 case NVPTX::StoreParamS32I8: 02062 case NVPTX::StoreParamU32I8: 02063 case NVPTX::StoreParamS32I16: 02064 case NVPTX::StoreParamU32I16: 02065 case NVPTX::StoreRetvalF32: 02066 case NVPTX::StoreRetvalF64: 02067 case NVPTX::StoreRetvalI16: 02068 case NVPTX::StoreRetvalI32: 02069 case NVPTX::StoreRetvalI64: 02070 case NVPTX::StoreRetvalI8: 02071 case NVPTX::LastCallArgF32: 02072 case NVPTX::LastCallArgF64: 02073 case NVPTX::LastCallArgI16: 02074 case NVPTX::LastCallArgI32: 02075 case NVPTX::LastCallArgI32imm: 02076 case NVPTX::LastCallArgI64: 02077 case NVPTX::LastCallArgI8: 02078 case NVPTX::LastCallArgParam: 02079 case NVPTX::LoadParamMemF32: 02080 case NVPTX::LoadParamMemF64: 02081 case NVPTX::LoadParamMemI16: 02082 case NVPTX::LoadParamMemI32: 02083 case NVPTX::LoadParamMemI64: 02084 case NVPTX::LoadParamMemI8: 02085 case NVPTX::LoadParamRegF32: 02086 case NVPTX::LoadParamRegF64: 02087 case NVPTX::LoadParamRegI16: 02088 case NVPTX::LoadParamRegI32: 02089 case NVPTX::LoadParamRegI64: 02090 case NVPTX::LoadParamRegI8: 02091 case NVPTX::PrototypeInst: 02092 case NVPTX::DBG_VALUE: 02093 return true; 02094 } 02095 return false; 02096 } 02097 02098 // Force static initialization. 02099 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 02100 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 02101 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 02102 } 02103 02104 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 02105 std::stringstream temp; 02106 LineReader *reader = this->getReader(filename.str()); 02107 temp << "\n//"; 02108 temp << filename.str(); 02109 temp << ":"; 02110 temp << line; 02111 temp << " "; 02112 temp << reader->readLine(line); 02113 temp << "\n"; 02114 this->OutStreamer.EmitRawText(Twine(temp.str())); 02115 } 02116 02117 LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 02118 if (reader == NULL) { 02119 reader = new LineReader(filename); 02120 } 02121 02122 if (reader->fileName() != filename) { 02123 delete reader; 02124 reader = new LineReader(filename); 02125 } 02126 02127 return reader; 02128 } 02129 02130 std::string LineReader::readLine(unsigned lineNum) { 02131 if (lineNum < theCurLine) { 02132 theCurLine = 0; 02133 fstr.seekg(0, std::ios::beg); 02134 } 02135 while (theCurLine < lineNum) { 02136 fstr.getline(buff, 500); 02137 theCurLine++; 02138 } 02139 return buff; 02140 } 02141 02142 // Force static initialization. 02143 extern "C" void LLVMInitializeNVPTXAsmPrinter() { 02144 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 02145 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 02146 }