LLVM  4.0.0
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a printer that converts from our internal representation
11 // of machine-dependent LLVM code to NVPTX assembly language.
12 //
13 //===----------------------------------------------------------------------===//
14 
18 #include "NVPTX.h"
19 #include "NVPTXAsmPrinter.h"
20 #include "NVPTXMCExpr.h"
22 #include "NVPTXRegisterInfo.h"
23 #include "NVPTXSubtarget.h"
24 #include "NVPTXTargetMachine.h"
25 #include "NVPTXUtilities.h"
26 #include "cl_common_defines.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/DenseSet.h"
31 #include "llvm/ADT/SmallString.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/ADT/Triple.h"
36 #include "llvm/ADT/Twine.h"
38 #include "llvm/CodeGen/Analysis.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/BasicBlock.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/Constants.h"
53 #include "llvm/IR/DataLayout.h"
54 #include "llvm/IR/DebugInfo.h"
56 #include "llvm/IR/DebugLoc.h"
57 #include "llvm/IR/DerivedTypes.h"
58 #include "llvm/IR/Function.h"
59 #include "llvm/IR/GlobalValue.h"
60 #include "llvm/IR/GlobalVariable.h"
61 #include "llvm/IR/Instruction.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/Module.h"
64 #include "llvm/IR/Operator.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/User.h"
67 #include "llvm/MC/MCExpr.h"
68 #include "llvm/MC/MCInst.h"
69 #include "llvm/MC/MCInstrDesc.h"
70 #include "llvm/MC/MCStreamer.h"
71 #include "llvm/MC/MCSymbol.h"
72 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Path.h"
83 #include <cassert>
84 #include <cstdint>
85 #include <cstring>
86 #include <new>
87 #include <sstream>
88 #include <string>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 
94 #define DEPOTNAME "__local_depot"
95 
96 static cl::opt<bool>
97 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
98  cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
99  cl::init(true));
100 
101 static cl::opt<bool>
102 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
103  cl::desc("NVPTX Specific: Emit source line in ptx file"),
104  cl::init(false));
105 
106 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
107 /// depends.
108 static void
111  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
112  Globals.insert(GV);
113  else {
114  if (const User *U = dyn_cast<User>(V)) {
115  for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
116  DiscoverDependentGlobals(U->getOperand(i), Globals);
117  }
118  }
119  }
120 }
121 
122 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
123 /// instances to be emitted, but only after any dependents have been added
124 /// first.s
125 static void
130  // Have we already visited this one?
131  if (Visited.count(GV))
132  return;
133 
134  // Do we have a circular dependency?
135  if (!Visiting.insert(GV).second)
136  report_fatal_error("Circular dependency found in global variable set");
137 
138  // Make sure we visit all dependents first
140  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
141  DiscoverDependentGlobals(GV->getOperand(i), Others);
142 
144  E = Others.end();
145  I != E; ++I)
146  VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
147 
148  // Now we can visit ourself
149  Order.push_back(GV);
150  Visited.insert(GV);
151  Visiting.erase(GV);
152 }
153 
155  if (!EmitLineNumbers)
156  return;
157  if (ignoreLoc(MI))
158  return;
159 
160  const DebugLoc &curLoc = MI.getDebugLoc();
161 
162  if (!prevDebugLoc && !curLoc)
163  return;
164 
165  if (prevDebugLoc == curLoc)
166  return;
167 
168  prevDebugLoc = curLoc;
169 
170  if (!curLoc)
171  return;
172 
173  auto *Scope = cast_or_null<DIScope>(curLoc.getScope());
174  if (!Scope)
175  return;
176 
177  StringRef fileName(Scope->getFilename());
178  StringRef dirName(Scope->getDirectory());
179  SmallString<128> FullPathName = dirName;
180  if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
181  sys::path::append(FullPathName, fileName);
182  fileName = FullPathName;
183  }
184 
185  if (filenameMap.find(fileName) == filenameMap.end())
186  return;
187 
188  // Emit the line from the source file.
189  if (InterleaveSrc)
190  this->emitSrcInText(fileName, curLoc.getLine());
191 
192  std::stringstream temp;
193  temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine()
194  << " " << curLoc.getCol();
195  OutStreamer->EmitRawText(temp.str());
196 }
197 
198 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
199  SmallString<128> Str;
200  raw_svector_ostream OS(Str);
201  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA)
203 
204  MCInst Inst;
205  lowerToMCInst(MI, Inst);
206  EmitToStreamer(*OutStreamer, Inst);
207 }
208 
209 // Handle symbol backtracking for targets that do not support image handles
210 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
211  unsigned OpNo, MCOperand &MCOp) {
212  const MachineOperand &MO = MI->getOperand(OpNo);
213  const MCInstrDesc &MCID = MI->getDesc();
214 
215  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
216  // This is a texture fetch, so operand 4 is a texref and operand 5 is
217  // a samplerref
218  if (OpNo == 4 && MO.isImm()) {
219  lowerImageHandleSymbol(MO.getImm(), MCOp);
220  return true;
221  }
222  if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
223  lowerImageHandleSymbol(MO.getImm(), MCOp);
224  return true;
225  }
226 
227  return false;
228  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
229  unsigned VecSize =
230  1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
231 
232  // For a surface load of vector size N, the Nth operand will be the surfref
233  if (OpNo == VecSize && MO.isImm()) {
234  lowerImageHandleSymbol(MO.getImm(), MCOp);
235  return true;
236  }
237 
238  return false;
239  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
240  // This is a surface store, so operand 0 is a surfref
241  if (OpNo == 0 && MO.isImm()) {
242  lowerImageHandleSymbol(MO.getImm(), MCOp);
243  return true;
244  }
245 
246  return false;
247  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
248  // This is a query, so operand 1 is a surfref/texref
249  if (OpNo == 1 && MO.isImm()) {
250  lowerImageHandleSymbol(MO.getImm(), MCOp);
251  return true;
252  }
253 
254  return false;
255  }
256 
257  return false;
258 }
259 
260 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
261  // Ewwww
262  TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
263  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
265  const char *Sym = MFI->getImageHandleSymbol(Index);
266  std::string *SymNamePtr =
267  nvTM.getManagedStrPool()->getManagedString(Sym);
268  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
269 }
270 
271 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
272  OutMI.setOpcode(MI->getOpcode());
273  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
274  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
275  const MachineOperand &MO = MI->getOperand(0);
276  OutMI.addOperand(GetSymbolRef(
278  return;
279  }
280 
281  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
282  const MachineOperand &MO = MI->getOperand(i);
283 
284  MCOperand MCOp;
285  if (!nvptxSubtarget->hasImageHandles()) {
286  if (lowerImageHandleOperand(MI, i, MCOp)) {
287  OutMI.addOperand(MCOp);
288  continue;
289  }
290  }
291 
292  if (lowerOperand(MO, MCOp))
293  OutMI.addOperand(MCOp);
294  }
295 }
296 
297 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
298  MCOperand &MCOp) {
299  switch (MO.getType()) {
300  default: llvm_unreachable("unknown operand type");
302  MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
303  break;
305  MCOp = MCOperand::createImm(MO.getImm());
306  break;
309  MO.getMBB()->getSymbol(), OutContext));
310  break;
313  break;
315  MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
316  break;
318  const ConstantFP *Cnt = MO.getFPImm();
319  const APFloat &Val = Cnt->getValueAPF();
320 
321  switch (Cnt->getType()->getTypeID()) {
322  default: report_fatal_error("Unsupported FP type"); break;
323  case Type::FloatTyID:
324  MCOp = MCOperand::createExpr(
326  break;
327  case Type::DoubleTyID:
328  MCOp = MCOperand::createExpr(
330  break;
331  }
332  break;
333  }
334  }
335  return true;
336 }
337 
338 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
340  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
341 
342  DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
343  unsigned RegNum = RegMap[Reg];
344 
345  // Encode the register class in the upper 4 bits
346  // Must be kept in sync with NVPTXInstPrinter::printRegName
347  unsigned Ret = 0;
348  if (RC == &NVPTX::Int1RegsRegClass) {
349  Ret = (1 << 28);
350  } else if (RC == &NVPTX::Int16RegsRegClass) {
351  Ret = (2 << 28);
352  } else if (RC == &NVPTX::Int32RegsRegClass) {
353  Ret = (3 << 28);
354  } else if (RC == &NVPTX::Int64RegsRegClass) {
355  Ret = (4 << 28);
356  } else if (RC == &NVPTX::Float32RegsRegClass) {
357  Ret = (5 << 28);
358  } else if (RC == &NVPTX::Float64RegsRegClass) {
359  Ret = (6 << 28);
360  } else {
361  report_fatal_error("Bad register class");
362  }
363 
364  // Insert the vreg number
365  Ret |= (RegNum & 0x0FFFFFFF);
366  return Ret;
367  } else {
368  // Some special-use registers are actually physical registers.
369  // Encode this as the register class ID of 0 and the real register ID.
370  return Reg & 0x0FFFFFFF;
371  }
372 }
373 
375  const MCExpr *Expr;
377  OutContext);
378  return MCOperand::createExpr(Expr);
379 }
380 
381 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
382  const DataLayout &DL = getDataLayout();
383  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
384 
385  Type *Ty = F->getReturnType();
386 
387  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
388 
389  if (Ty->getTypeID() == Type::VoidTyID)
390  return;
391 
392  O << " (";
393 
394  if (isABI) {
395  if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
396  unsigned size = 0;
397  if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
398  size = ITy->getBitWidth();
399  if (size < 32)
400  size = 32;
401  } else {
402  assert(Ty->isFloatingPointTy() && "Floating point type expected here");
403  size = Ty->getPrimitiveSizeInBits();
404  }
405 
406  O << ".param .b" << size << " func_retval0";
407  } else if (isa<PointerType>(Ty)) {
408  O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
409  << " func_retval0";
410  } else if (Ty->isAggregateType() || Ty->isVectorTy()) {
411  unsigned totalsz = DL.getTypeAllocSize(Ty);
412  unsigned retAlignment = 0;
413  if (!getAlign(*F, 0, retAlignment))
414  retAlignment = DL.getABITypeAlignment(Ty);
415  O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
416  << "]";
417  } else
418  llvm_unreachable("Unknown return type");
419  } else {
420  SmallVector<EVT, 16> vtparts;
421  ComputeValueVTs(*TLI, DL, Ty, vtparts);
422  unsigned idx = 0;
423  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
424  unsigned elems = 1;
425  EVT elemtype = vtparts[i];
426  if (vtparts[i].isVector()) {
427  elems = vtparts[i].getVectorNumElements();
428  elemtype = vtparts[i].getVectorElementType();
429  }
430 
431  for (unsigned j = 0, je = elems; j != je; ++j) {
432  unsigned sz = elemtype.getSizeInBits();
433  if (elemtype.isInteger() && (sz < 32))
434  sz = 32;
435  O << ".reg .b" << sz << " func_retval" << idx;
436  if (j < je - 1)
437  O << ", ";
438  ++idx;
439  }
440  if (i < e - 1)
441  O << ", ";
442  }
443  }
444  O << ") ";
445 }
446 
447 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
448  raw_ostream &O) {
449  const Function *F = MF.getFunction();
450  printReturnValStr(F, O);
451 }
452 
453 // Return true if MBB is the header of a loop marked with
454 // llvm.loop.unroll.disable.
455 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
456 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
457  const MachineBasicBlock &MBB) const {
458  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
459  // We insert .pragma "nounroll" only to the loop header.
460  if (!LI.isLoopHeader(&MBB))
461  return false;
462 
463  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
464  // we iterate through each back edge of the loop with header MBB, and check
465  // whether its metadata contains llvm.loop.unroll.disable.
466  for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
467  const MachineBasicBlock *PMBB = *I;
468  if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
469  // Edges from other loops to MBB are not back edges.
470  continue;
471  }
472  if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
473  if (MDNode *LoopID =
474  PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
475  if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
476  return true;
477  }
478  }
479  }
480  return false;
481 }
482 
483 void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
485  if (isLoopHeaderOfNoUnroll(MBB))
486  OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
487 }
488 
489 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
490  SmallString<128> Str;
491  raw_svector_ostream O(Str);
492 
493  if (!GlobalsEmitted) {
494  emitGlobals(*MF->getFunction()->getParent());
495  GlobalsEmitted = true;
496  }
497 
498  // Set up
499  MRI = &MF->getRegInfo();
500  F = MF->getFunction();
501  emitLinkageDirective(F, O);
502  if (isKernelFunction(*F))
503  O << ".entry ";
504  else {
505  O << ".func ";
506  printReturnValStr(*MF, O);
507  }
508 
509  CurrentFnSym->print(O, MAI);
510 
511  emitFunctionParamList(*MF, O);
512 
513  if (isKernelFunction(*F))
514  emitKernelFunctionDirectives(*F, O);
515 
516  OutStreamer->EmitRawText(O.str());
517 
519 }
520 
521 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
522  VRegMapping.clear();
523  OutStreamer->EmitRawText(StringRef("{\n"));
524  setAndEmitFunctionVirtualRegisters(*MF);
525 
526  SmallString<128> Str;
527  raw_svector_ostream O(Str);
528  emitDemotedVars(MF->getFunction(), O);
529  OutStreamer->EmitRawText(O.str());
530 }
531 
532 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
533  OutStreamer->EmitRawText(StringRef("}\n"));
534  VRegMapping.clear();
535 }
536 
537 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
538  unsigned RegNo = MI->getOperand(0).getReg();
540  OutStreamer->AddComment(Twine("implicit-def: ") +
541  getVirtualRegisterName(RegNo));
542  } else {
543  OutStreamer->AddComment(Twine("implicit-def: ") +
544  nvptxSubtarget->getRegisterInfo()->getName(RegNo));
545  }
546  OutStreamer->AddBlankLine();
547 }
548 
549 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
550  raw_ostream &O) const {
551  // If the NVVM IR has some of reqntid* specified, then output
552  // the reqntid directive, and set the unspecified ones to 1.
553  // If none of reqntid* is specified, don't output reqntid directive.
554  unsigned reqntidx, reqntidy, reqntidz;
555  bool specified = false;
556  if (!getReqNTIDx(F, reqntidx))
557  reqntidx = 1;
558  else
559  specified = true;
560  if (!getReqNTIDy(F, reqntidy))
561  reqntidy = 1;
562  else
563  specified = true;
564  if (!getReqNTIDz(F, reqntidz))
565  reqntidz = 1;
566  else
567  specified = true;
568 
569  if (specified)
570  O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
571  << "\n";
572 
573  // If the NVVM IR has some of maxntid* specified, then output
574  // the maxntid directive, and set the unspecified ones to 1.
575  // If none of maxntid* is specified, don't output maxntid directive.
576  unsigned maxntidx, maxntidy, maxntidz;
577  specified = false;
578  if (!getMaxNTIDx(F, maxntidx))
579  maxntidx = 1;
580  else
581  specified = true;
582  if (!getMaxNTIDy(F, maxntidy))
583  maxntidy = 1;
584  else
585  specified = true;
586  if (!getMaxNTIDz(F, maxntidz))
587  maxntidz = 1;
588  else
589  specified = true;
590 
591  if (specified)
592  O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
593  << "\n";
594 
595  unsigned mincta;
596  if (getMinCTASm(F, mincta))
597  O << ".minnctapersm " << mincta << "\n";
598 
599  unsigned maxnreg;
600  if (getMaxNReg(F, maxnreg))
601  O << ".maxnreg " << maxnreg << "\n";
602 }
603 
604 std::string
606  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
607 
608  std::string Name;
609  raw_string_ostream NameStr(Name);
610 
611  VRegRCMap::const_iterator I = VRegMapping.find(RC);
612  assert(I != VRegMapping.end() && "Bad register class");
613  const DenseMap<unsigned, unsigned> &RegMap = I->second;
614 
615  VRegMap::const_iterator VI = RegMap.find(Reg);
616  assert(VI != RegMap.end() && "Bad virtual register");
617  unsigned MappedVR = VI->second;
618 
619  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
620 
621  NameStr.flush();
622  return Name;
623 }
624 
625 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
626  raw_ostream &O) {
627  O << getVirtualRegisterName(vr);
628 }
629 
630 void NVPTXAsmPrinter::printVecModifiedImmediate(
631  const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
632  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
633  int Imm = (int) MO.getImm();
634  if (0 == strcmp(Modifier, "vecelem"))
635  O << "_" << vecelem[Imm];
636  else if (0 == strcmp(Modifier, "vecv4comm1")) {
637  if ((Imm < 0) || (Imm > 3))
638  O << "//";
639  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
640  if ((Imm < 4) || (Imm > 7))
641  O << "//";
642  } else if (0 == strcmp(Modifier, "vecv4pos")) {
643  if (Imm < 0)
644  Imm = 0;
645  O << "_" << vecelem[Imm % 4];
646  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
647  if ((Imm < 0) || (Imm > 1))
648  O << "//";
649  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
650  if ((Imm < 2) || (Imm > 3))
651  O << "//";
652  } else if (0 == strcmp(Modifier, "vecv2pos")) {
653  if (Imm < 0)
654  Imm = 0;
655  O << "_" << vecelem[Imm % 2];
656  } else
657  llvm_unreachable("Unknown Modifier on immediate operand");
658 }
659 
660 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
661  emitLinkageDirective(F, O);
662  if (isKernelFunction(*F))
663  O << ".entry ";
664  else
665  O << ".func ";
666  printReturnValStr(F, O);
667  getSymbol(F)->print(O, MAI);
668  O << "\n";
669  emitFunctionParamList(F, O);
670  O << ";\n";
671 }
672 
673 static bool usedInGlobalVarDef(const Constant *C) {
674  if (!C)
675  return false;
676 
677  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
678  return GV->getName() != "llvm.used";
679  }
680 
681  for (const User *U : C->users())
682  if (const Constant *C = dyn_cast<Constant>(U))
683  if (usedInGlobalVarDef(C))
684  return true;
685 
686  return false;
687 }
688 
689 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
690  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
691  if (othergv->getName() == "llvm.used")
692  return true;
693  }
694 
695  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
696  if (instr->getParent() && instr->getParent()->getParent()) {
697  const Function *curFunc = instr->getParent()->getParent();
698  if (oneFunc && (curFunc != oneFunc))
699  return false;
700  oneFunc = curFunc;
701  return true;
702  } else
703  return false;
704  }
705 
706  for (const User *UU : U->users())
707  if (!usedInOneFunc(UU, oneFunc))
708  return false;
709 
710  return true;
711 }
712 
713 /* Find out if a global variable can be demoted to local scope.
714  * Currently, this is valid for CUDA shared variables, which have local
715  * scope and global lifetime. So the conditions to check are :
716  * 1. Is the global variable in shared address space?
717  * 2. Does it have internal linkage?
718  * 3. Is the global variable referenced only in one function?
719  */
720 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
721  if (!gv->hasInternalLinkage())
722  return false;
723  PointerType *Pty = gv->getType();
725  return false;
726 
727  const Function *oneFunc = nullptr;
728 
729  bool flag = usedInOneFunc(gv, oneFunc);
730  if (!flag)
731  return false;
732  if (!oneFunc)
733  return false;
734  f = oneFunc;
735  return true;
736 }
737 
738 static bool useFuncSeen(const Constant *C,
740  for (const User *U : C->users()) {
741  if (const Constant *cu = dyn_cast<Constant>(U)) {
742  if (useFuncSeen(cu, seenMap))
743  return true;
744  } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
745  const BasicBlock *bb = I->getParent();
746  if (!bb)
747  continue;
748  const Function *caller = bb->getParent();
749  if (!caller)
750  continue;
751  if (seenMap.find(caller) != seenMap.end())
752  return true;
753  }
754  }
755  return false;
756 }
757 
758 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
760  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
761  const Function *F = &*FI;
762 
763  if (F->isDeclaration()) {
764  if (F->use_empty())
765  continue;
766  if (F->getIntrinsicID())
767  continue;
768  emitDeclaration(F, O);
769  continue;
770  }
771  for (const User *U : F->users()) {
772  if (const Constant *C = dyn_cast<Constant>(U)) {
773  if (usedInGlobalVarDef(C)) {
774  // The use is in the initialization of a global variable
775  // that is a function pointer, so print a declaration
776  // for the original function
777  emitDeclaration(F, O);
778  break;
779  }
780  // Emit a declaration of this function if the function that
781  // uses this constant expr has already been seen.
782  if (useFuncSeen(C, seenMap)) {
783  emitDeclaration(F, O);
784  break;
785  }
786  }
787 
788  if (!isa<Instruction>(U))
789  continue;
790  const Instruction *instr = cast<Instruction>(U);
791  const BasicBlock *bb = instr->getParent();
792  if (!bb)
793  continue;
794  const Function *caller = bb->getParent();
795  if (!caller)
796  continue;
797 
798  // If a caller has already been seen, then the caller is
799  // appearing in the module before the callee. so print out
800  // a declaration for the callee.
801  if (seenMap.find(caller) != seenMap.end()) {
802  emitDeclaration(F, O);
803  break;
804  }
805  }
806  seenMap[F] = true;
807  }
808 }
809 
810 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
811  DebugInfoFinder DbgFinder;
812  DbgFinder.processModule(M);
813 
814  unsigned i = 1;
815  for (const DICompileUnit *DIUnit : DbgFinder.compile_units()) {
816  StringRef Filename = DIUnit->getFilename();
817  StringRef Dirname = DIUnit->getDirectory();
818  SmallString<128> FullPathName = Dirname;
819  if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
820  sys::path::append(FullPathName, Filename);
821  Filename = FullPathName;
822  }
823  if (filenameMap.find(Filename) != filenameMap.end())
824  continue;
825  filenameMap[Filename] = i;
826  OutStreamer->EmitDwarfFileDirective(i, "", Filename);
827  ++i;
828  }
829 
830  for (DISubprogram *SP : DbgFinder.subprograms()) {
831  StringRef Filename = SP->getFilename();
832  StringRef Dirname = SP->getDirectory();
833  SmallString<128> FullPathName = Dirname;
834  if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
835  sys::path::append(FullPathName, Filename);
836  Filename = FullPathName;
837  }
838  if (filenameMap.find(Filename) != filenameMap.end())
839  continue;
840  filenameMap[Filename] = i;
841  OutStreamer->EmitDwarfFileDirective(i, "", Filename);
842  ++i;
843  }
844 }
845 
847  if (!GV) return true;
848  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
849  if (!InitList) return true; // Not an array; we don't know how to parse.
850  return InitList->getNumOperands() == 0;
851 }
852 
854  // Construct a default subtarget off of the TargetMachine defaults. The
855  // rest of NVPTX isn't friendly to change subtargets per function and
856  // so the default TargetMachine will have all of the options.
857  const Triple &TT = TM.getTargetTriple();
858  StringRef CPU = TM.getTargetCPU();
860  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
861  const NVPTXSubtarget STI(TT, CPU, FS, NTM);
862 
863  if (M.alias_size()) {
864  report_fatal_error("Module has aliases, which NVPTX does not support.");
865  return true; // error
866  }
867  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
869  "Module has a nontrivial global ctor, which NVPTX does not support.");
870  return true; // error
871  }
872  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
874  "Module has a nontrivial global dtor, which NVPTX does not support.");
875  return true; // error
876  }
877 
878  SmallString<128> Str1;
879  raw_svector_ostream OS1(Str1);
880 
881  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
882 
883  // We need to call the parent's one explicitly.
884  //bool Result = AsmPrinter::doInitialization(M);
885 
886  // Initialize TargetLoweringObjectFile since we didn't do in
887  // AsmPrinter::doInitialization either right above or where it's commented out
888  // below.
890  .Initialize(OutContext, TM);
891 
892  // Emit header before any dwarf directives are emitted below.
893  emitHeader(M, OS1, STI);
894  OutStreamer->EmitRawText(OS1.str());
895 
896  // Already commented out
897  //bool Result = AsmPrinter::doInitialization(M);
898 
899  // Emit module-level inline asm if it exists.
900  if (!M.getModuleInlineAsm().empty()) {
901  OutStreamer->AddComment("Start of file scope inline assembly");
902  OutStreamer->AddBlankLine();
903  OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
904  OutStreamer->AddBlankLine();
905  OutStreamer->AddComment("End of file scope inline assembly");
906  OutStreamer->AddBlankLine();
907  }
908 
909  // If we're not NVCL we're CUDA, go ahead and emit filenames.
910  if (TM.getTargetTriple().getOS() != Triple::NVCL)
911  recordAndEmitFilenames(M);
912 
913  GlobalsEmitted = false;
914 
915  return false; // success
916 }
917 
918 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
919  SmallString<128> Str2;
920  raw_svector_ostream OS2(Str2);
921 
922  emitDeclarations(M, OS2);
923 
924  // As ptxas does not support forward references of globals, we need to first
925  // sort the list of module-level globals in def-use order. We visit each
926  // global variable in order, and ensure that we emit it *after* its dependent
927  // globals. We use a little extra memory maintaining both a set and a list to
928  // have fast searches while maintaining a strict ordering.
932 
933  // Visit each global variable, in order
934  for (const GlobalVariable &I : M.globals())
935  VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
936 
937  assert(GVVisited.size() == M.getGlobalList().size() &&
938  "Missed a global variable");
939  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
940 
941  // Print out module-level global variables in proper order
942  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
943  printModuleLevelGV(Globals[i], OS2);
944 
945  OS2 << '\n';
946 
947  OutStreamer->EmitRawText(OS2.str());
948 }
949 
950 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
951  const NVPTXSubtarget &STI) {
952  O << "//\n";
953  O << "// Generated by LLVM NVPTX Back-End\n";
954  O << "//\n";
955  O << "\n";
956 
957  unsigned PTXVersion = STI.getPTXVersion();
958  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
959 
960  O << ".target ";
961  O << STI.getTargetName();
962 
963  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
964  if (NTM.getDrvInterface() == NVPTX::NVCL)
965  O << ", texmode_independent";
966  else {
967  if (!STI.hasDouble())
968  O << ", map_f64_to_f32";
969  }
970 
972  O << ", debug";
973 
974  O << "\n";
975 
976  O << ".address_size ";
977  if (NTM.is64Bit())
978  O << "64";
979  else
980  O << "32";
981  O << "\n";
982 
983  O << "\n";
984 }
985 
987  // If we did not emit any functions, then the global declarations have not
988  // yet been emitted.
989  if (!GlobalsEmitted) {
990  emitGlobals(M);
991  GlobalsEmitted = true;
992  }
993 
994  // XXX Temproarily remove global variables so that doFinalization() will not
995  // emit them again (global variables are emitted at beginning).
996 
997  Module::GlobalListType &global_list = M.getGlobalList();
998  int i, n = global_list.size();
999  GlobalVariable **gv_array = new GlobalVariable *[n];
1000 
1001  // first, back-up GlobalVariable in gv_array
1002  i = 0;
1003  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
1004  I != E; ++I)
1005  gv_array[i++] = &*I;
1006 
1007  // second, empty global_list
1008  while (!global_list.empty())
1009  global_list.remove(global_list.begin());
1010 
1011  // call doFinalization
1012  bool ret = AsmPrinter::doFinalization(M);
1013 
1014  // now we restore global variables
1015  for (i = 0; i < n; i++)
1016  global_list.insert(global_list.end(), gv_array[i]);
1017 
1019 
1020  delete[] gv_array;
1021  return ret;
1022 
1023  //bool Result = AsmPrinter::doFinalization(M);
1024  // Instead of calling the parents doFinalization, we may
1025  // clone parents doFinalization and customize here.
1026  // Currently, we if NVISA out the EmitGlobals() in
1027  // parent's doFinalization, which is too intrusive.
1028  //
1029  // Same for the doInitialization.
1030  //return Result;
1031 }
1032 
1033 // This function emits appropriate linkage directives for
1034 // functions and global variables.
1035 //
1036 // extern function declaration -> .extern
1037 // extern function definition -> .visible
1038 // external global variable with init -> .visible
1039 // external without init -> .extern
1040 // appending -> not allowed, assert.
1041 // for any linkage other than
1042 // internal, private, linker_private,
1043 // linker_private_weak, linker_private_weak_def_auto,
1044 // we emit -> .weak.
1045 
1046 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
1047  raw_ostream &O) {
1048  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
1049  if (V->hasExternalLinkage()) {
1050  if (isa<GlobalVariable>(V)) {
1051  const GlobalVariable *GVar = cast<GlobalVariable>(V);
1052  if (GVar) {
1053  if (GVar->hasInitializer())
1054  O << ".visible ";
1055  else
1056  O << ".extern ";
1057  }
1058  } else if (V->isDeclaration())
1059  O << ".extern ";
1060  else
1061  O << ".visible ";
1062  } else if (V->hasAppendingLinkage()) {
1063  std::string msg;
1064  msg.append("Error: ");
1065  msg.append("Symbol ");
1066  if (V->hasName())
1067  msg.append(V->getName());
1068  msg.append("has unsupported appending linkage type");
1069  llvm_unreachable(msg.c_str());
1070  } else if (!V->hasInternalLinkage() &&
1071  !V->hasPrivateLinkage()) {
1072  O << ".weak ";
1073  }
1074  }
1075 }
1076 
1077 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1078  raw_ostream &O,
1079  bool processDemoted) {
1080  // Skip meta data
1081  if (GVar->hasSection()) {
1082  if (GVar->getSection() == "llvm.metadata")
1083  return;
1084  }
1085 
1086  // Skip LLVM intrinsic global variables
1087  if (GVar->getName().startswith("llvm.") ||
1088  GVar->getName().startswith("nvvm."))
1089  return;
1090 
1091  const DataLayout &DL = getDataLayout();
1092 
1093  // GlobalVariables are always constant pointers themselves.
1094  PointerType *PTy = GVar->getType();
1095  Type *ETy = GVar->getValueType();
1096 
1097  if (GVar->hasExternalLinkage()) {
1098  if (GVar->hasInitializer())
1099  O << ".visible ";
1100  else
1101  O << ".extern ";
1102  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1104  GVar->hasCommonLinkage()) {
1105  O << ".weak ";
1106  }
1107 
1108  if (isTexture(*GVar)) {
1109  O << ".global .texref " << getTextureName(*GVar) << ";\n";
1110  return;
1111  }
1112 
1113  if (isSurface(*GVar)) {
1114  O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1115  return;
1116  }
1117 
1118  if (GVar->isDeclaration()) {
1119  // (extern) declarations, no definition or initializer
1120  // Currently the only known declaration is for an automatic __local
1121  // (.shared) promoted to global.
1122  emitPTXGlobalVariable(GVar, O);
1123  O << ";\n";
1124  return;
1125  }
1126 
1127  if (isSampler(*GVar)) {
1128  O << ".global .samplerref " << getSamplerName(*GVar);
1129 
1130  const Constant *Initializer = nullptr;
1131  if (GVar->hasInitializer())
1132  Initializer = GVar->getInitializer();
1133  const ConstantInt *CI = nullptr;
1134  if (Initializer)
1135  CI = dyn_cast<ConstantInt>(Initializer);
1136  if (CI) {
1137  unsigned sample = CI->getZExtValue();
1138 
1139  O << " = { ";
1140 
1141  for (int i = 0,
1142  addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1143  i < 3; i++) {
1144  O << "addr_mode_" << i << " = ";
1145  switch (addr) {
1146  case 0:
1147  O << "wrap";
1148  break;
1149  case 1:
1150  O << "clamp_to_border";
1151  break;
1152  case 2:
1153  O << "clamp_to_edge";
1154  break;
1155  case 3:
1156  O << "wrap";
1157  break;
1158  case 4:
1159  O << "mirror";
1160  break;
1161  }
1162  O << ", ";
1163  }
1164  O << "filter_mode = ";
1165  switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1166  case 0:
1167  O << "nearest";
1168  break;
1169  case 1:
1170  O << "linear";
1171  break;
1172  case 2:
1173  llvm_unreachable("Anisotropic filtering is not supported");
1174  default:
1175  O << "nearest";
1176  break;
1177  }
1178  if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1179  O << ", force_unnormalized_coords = 1";
1180  }
1181  O << " }";
1182  }
1183 
1184  O << ";\n";
1185  return;
1186  }
1187 
1188  if (GVar->hasPrivateLinkage()) {
1189  if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1190  return;
1191 
1192  // FIXME - need better way (e.g. Metadata) to avoid generating this global
1193  if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1194  return;
1195  if (GVar->use_empty())
1196  return;
1197  }
1198 
1199  const Function *demotedFunc = nullptr;
1200  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1201  O << "// " << GVar->getName() << " has been demoted\n";
1202  if (localDecls.find(demotedFunc) != localDecls.end())
1203  localDecls[demotedFunc].push_back(GVar);
1204  else {
1205  std::vector<const GlobalVariable *> temp;
1206  temp.push_back(GVar);
1207  localDecls[demotedFunc] = temp;
1208  }
1209  return;
1210  }
1211 
1212  O << ".";
1213  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1214 
1215  if (isManaged(*GVar)) {
1216  O << " .attribute(.managed)";
1217  }
1218 
1219  if (GVar->getAlignment() == 0)
1220  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1221  else
1222  O << " .align " << GVar->getAlignment();
1223 
1224  if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
1225  O << " .";
1226  // Special case: ABI requires that we use .u8 for predicates
1227  if (ETy->isIntegerTy(1))
1228  O << "u8";
1229  else
1230  O << getPTXFundamentalTypeStr(ETy, false);
1231  O << " ";
1232  getSymbol(GVar)->print(O, MAI);
1233 
1234  // Ptx allows variable initilization only for constant and global state
1235  // spaces.
1236  if (GVar->hasInitializer()) {
1237  if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1238  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1239  const Constant *Initializer = GVar->getInitializer();
1240  // 'undef' is treated as there is no value specified.
1241  if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1242  O << " = ";
1243  printScalarConstant(Initializer, O);
1244  }
1245  } else {
1246  // The frontend adds zero-initializer to device and constant variables
1247  // that don't have an initial value, and UndefValue to shared
1248  // variables, so skip warning for this case.
1249  if (!GVar->getInitializer()->isNullValue() &&
1250  !isa<UndefValue>(GVar->getInitializer())) {
1251  report_fatal_error("initial value of '" + GVar->getName() +
1252  "' is not allowed in addrspace(" +
1253  Twine(PTy->getAddressSpace()) + ")");
1254  }
1255  }
1256  }
1257  } else {
1258  unsigned int ElementSize = 0;
1259 
1260  // Although PTX has direct support for struct type and array type and
1261  // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1262  // targets that support these high level field accesses. Structs, arrays
1263  // and vectors are lowered into arrays of bytes.
1264  switch (ETy->getTypeID()) {
1265  case Type::StructTyID:
1266  case Type::ArrayTyID:
1267  case Type::VectorTyID:
1268  ElementSize = DL.getTypeStoreSize(ETy);
1269  // Ptx allows variable initilization only for constant and
1270  // global state spaces.
1271  if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1272  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1273  GVar->hasInitializer()) {
1274  const Constant *Initializer = GVar->getInitializer();
1275  if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1276  AggBuffer aggBuffer(ElementSize, O, *this);
1277  bufferAggregateConstant(Initializer, &aggBuffer);
1278  if (aggBuffer.numSymbols) {
1279  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1280  O << " .u64 ";
1281  getSymbol(GVar)->print(O, MAI);
1282  O << "[";
1283  O << ElementSize / 8;
1284  } else {
1285  O << " .u32 ";
1286  getSymbol(GVar)->print(O, MAI);
1287  O << "[";
1288  O << ElementSize / 4;
1289  }
1290  O << "]";
1291  } else {
1292  O << " .b8 ";
1293  getSymbol(GVar)->print(O, MAI);
1294  O << "[";
1295  O << ElementSize;
1296  O << "]";
1297  }
1298  O << " = {";
1299  aggBuffer.print();
1300  O << "}";
1301  } else {
1302  O << " .b8 ";
1303  getSymbol(GVar)->print(O, MAI);
1304  if (ElementSize) {
1305  O << "[";
1306  O << ElementSize;
1307  O << "]";
1308  }
1309  }
1310  } else {
1311  O << " .b8 ";
1312  getSymbol(GVar)->print(O, MAI);
1313  if (ElementSize) {
1314  O << "[";
1315  O << ElementSize;
1316  O << "]";
1317  }
1318  }
1319  break;
1320  default:
1321  llvm_unreachable("type not supported yet");
1322  }
1323  }
1324  O << ";\n";
1325 }
1326 
1327 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1328  if (localDecls.find(f) == localDecls.end())
1329  return;
1330 
1331  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1332 
1333  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1334  O << "\t// demoted variable\n\t";
1335  printModuleLevelGV(gvars[i], O, true);
1336  }
1337 }
1338 
1339 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1340  raw_ostream &O) const {
1341  switch (AddressSpace) {
1342  case ADDRESS_SPACE_LOCAL:
1343  O << "local";
1344  break;
1345  case ADDRESS_SPACE_GLOBAL:
1346  O << "global";
1347  break;
1348  case ADDRESS_SPACE_CONST:
1349  O << "const";
1350  break;
1351  case ADDRESS_SPACE_SHARED:
1352  O << "shared";
1353  break;
1354  default:
1355  report_fatal_error("Bad address space found while emitting PTX");
1356  break;
1357  }
1358 }
1359 
1360 std::string
1361 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1362  switch (Ty->getTypeID()) {
1363  default:
1364  llvm_unreachable("unexpected type");
1365  break;
1366  case Type::IntegerTyID: {
1367  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1368  if (NumBits == 1)
1369  return "pred";
1370  else if (NumBits <= 64) {
1371  std::string name = "u";
1372  return name + utostr(NumBits);
1373  } else {
1374  llvm_unreachable("Integer too large");
1375  break;
1376  }
1377  break;
1378  }
1379  case Type::FloatTyID:
1380  return "f32";
1381  case Type::DoubleTyID:
1382  return "f64";
1383  case Type::PointerTyID:
1384  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1385  if (useB4PTR)
1386  return "b64";
1387  else
1388  return "u64";
1389  else if (useB4PTR)
1390  return "b32";
1391  else
1392  return "u32";
1393  }
1394  llvm_unreachable("unexpected type");
1395  return nullptr;
1396 }
1397 
1398 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1399  raw_ostream &O) {
1400  const DataLayout &DL = getDataLayout();
1401 
1402  // GlobalVariables are always constant pointers themselves.
1403  Type *ETy = GVar->getValueType();
1404 
1405  O << ".";
1406  emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1407  if (GVar->getAlignment() == 0)
1408  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1409  else
1410  O << " .align " << GVar->getAlignment();
1411 
1412  if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
1413  O << " .";
1414  O << getPTXFundamentalTypeStr(ETy);
1415  O << " ";
1416  getSymbol(GVar)->print(O, MAI);
1417  return;
1418  }
1419 
1420  int64_t ElementSize = 0;
1421 
1422  // Although PTX has direct support for struct type and array type and LLVM IR
1423  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1424  // support these high level field accesses. Structs and arrays are lowered
1425  // into arrays of bytes.
1426  switch (ETy->getTypeID()) {
1427  case Type::StructTyID:
1428  case Type::ArrayTyID:
1429  case Type::VectorTyID:
1430  ElementSize = DL.getTypeStoreSize(ETy);
1431  O << " .b8 ";
1432  getSymbol(GVar)->print(O, MAI);
1433  O << "[";
1434  if (ElementSize) {
1435  O << ElementSize;
1436  }
1437  O << "]";
1438  break;
1439  default:
1440  llvm_unreachable("type not supported yet");
1441  }
1442 }
1443 
1444 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
1445  if (Ty->isSingleValueType())
1446  return DL.getPrefTypeAlignment(Ty);
1447 
1448  auto *ATy = dyn_cast<ArrayType>(Ty);
1449  if (ATy)
1450  return getOpenCLAlignment(DL, ATy->getElementType());
1451 
1452  auto *STy = dyn_cast<StructType>(Ty);
1453  if (STy) {
1454  unsigned int alignStruct = 1;
1455  // Go through each element of the struct and find the
1456  // largest alignment.
1457  for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1458  Type *ETy = STy->getElementType(i);
1459  unsigned int align = getOpenCLAlignment(DL, ETy);
1460  if (align > alignStruct)
1461  alignStruct = align;
1462  }
1463  return alignStruct;
1464  }
1465 
1466  auto *FTy = dyn_cast<FunctionType>(Ty);
1467  if (FTy)
1468  return DL.getPointerPrefAlignment();
1469  return DL.getPrefTypeAlignment(Ty);
1470 }
1471 
1472 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1473  int paramIndex, raw_ostream &O) {
1474  getSymbol(I->getParent())->print(O, MAI);
1475  O << "_param_" << paramIndex;
1476 }
1477 
1478 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1479  const DataLayout &DL = getDataLayout();
1480  const AttributeSet &PAL = F->getAttributes();
1481  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
1483  unsigned paramIndex = 0;
1484  bool first = true;
1485  bool isKernelFunc = isKernelFunction(*F);
1486  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
1487  MVT thePointerTy = TLI->getPointerTy(DL);
1488 
1489  if (F->arg_empty()) {
1490  O << "()\n";
1491  return;
1492  }
1493 
1494  O << "(\n";
1495 
1496  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1497  Type *Ty = I->getType();
1498 
1499  if (!first)
1500  O << ",\n";
1501 
1502  first = false;
1503 
1504  // Handle image/sampler parameters
1505  if (isKernelFunction(*F)) {
1506  if (isSampler(*I) || isImage(*I)) {
1507  if (isImage(*I)) {
1508  std::string sname = I->getName();
1509  if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1510  if (nvptxSubtarget->hasImageHandles())
1511  O << "\t.param .u64 .ptr .surfref ";
1512  else
1513  O << "\t.param .surfref ";
1514  CurrentFnSym->print(O, MAI);
1515  O << "_param_" << paramIndex;
1516  }
1517  else { // Default image is read_only
1518  if (nvptxSubtarget->hasImageHandles())
1519  O << "\t.param .u64 .ptr .texref ";
1520  else
1521  O << "\t.param .texref ";
1522  CurrentFnSym->print(O, MAI);
1523  O << "_param_" << paramIndex;
1524  }
1525  } else {
1526  if (nvptxSubtarget->hasImageHandles())
1527  O << "\t.param .u64 .ptr .samplerref ";
1528  else
1529  O << "\t.param .samplerref ";
1530  CurrentFnSym->print(O, MAI);
1531  O << "_param_" << paramIndex;
1532  }
1533  continue;
1534  }
1535  }
1536 
1537  if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
1538  if (Ty->isAggregateType() || Ty->isVectorTy()) {
1539  // Just print .param .align <a> .b8 .param[size];
1540  // <a> = PAL.getparamalignment
1541  // size = typeallocsize of element type
1542  unsigned align = PAL.getParamAlignment(paramIndex + 1);
1543  if (align == 0)
1544  align = DL.getABITypeAlignment(Ty);
1545 
1546  unsigned sz = DL.getTypeAllocSize(Ty);
1547  O << "\t.param .align " << align << " .b8 ";
1548  printParamName(I, paramIndex, O);
1549  O << "[" << sz << "]";
1550 
1551  continue;
1552  }
1553  // Just a scalar
1554  auto *PTy = dyn_cast<PointerType>(Ty);
1555  if (isKernelFunc) {
1556  if (PTy) {
1557  // Special handling for pointer arguments to kernel
1558  O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1559 
1560  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1561  NVPTX::CUDA) {
1562  Type *ETy = PTy->getElementType();
1563  int addrSpace = PTy->getAddressSpace();
1564  switch (addrSpace) {
1565  default:
1566  O << ".ptr ";
1567  break;
1568  case ADDRESS_SPACE_CONST:
1569  O << ".ptr .const ";
1570  break;
1571  case ADDRESS_SPACE_SHARED:
1572  O << ".ptr .shared ";
1573  break;
1574  case ADDRESS_SPACE_GLOBAL:
1575  O << ".ptr .global ";
1576  break;
1577  }
1578  O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
1579  }
1580  printParamName(I, paramIndex, O);
1581  continue;
1582  }
1583 
1584  // non-pointer scalar to kernel func
1585  O << "\t.param .";
1586  // Special case: predicate operands become .u8 types
1587  if (Ty->isIntegerTy(1))
1588  O << "u8";
1589  else
1590  O << getPTXFundamentalTypeStr(Ty);
1591  O << " ";
1592  printParamName(I, paramIndex, O);
1593  continue;
1594  }
1595  // Non-kernel function, just print .param .b<size> for ABI
1596  // and .reg .b<size> for non-ABI
1597  unsigned sz = 0;
1598  if (isa<IntegerType>(Ty)) {
1599  sz = cast<IntegerType>(Ty)->getBitWidth();
1600  if (sz < 32)
1601  sz = 32;
1602  } else if (isa<PointerType>(Ty))
1603  sz = thePointerTy.getSizeInBits();
1604  else
1605  sz = Ty->getPrimitiveSizeInBits();
1606  if (isABI)
1607  O << "\t.param .b" << sz << " ";
1608  else
1609  O << "\t.reg .b" << sz << " ";
1610  printParamName(I, paramIndex, O);
1611  continue;
1612  }
1613 
1614  // param has byVal attribute. So should be a pointer
1615  auto *PTy = dyn_cast<PointerType>(Ty);
1616  assert(PTy && "Param with byval attribute should be a pointer type");
1617  Type *ETy = PTy->getElementType();
1618 
1619  if (isABI || isKernelFunc) {
1620  // Just print .param .align <a> .b8 .param[size];
1621  // <a> = PAL.getparamalignment
1622  // size = typeallocsize of element type
1623  unsigned align = PAL.getParamAlignment(paramIndex + 1);
1624  if (align == 0)
1625  align = DL.getABITypeAlignment(ETy);
1626  // Work around a bug in ptxas. When PTX code takes address of
1627  // byval parameter with alignment < 4, ptxas generates code to
1628  // spill argument into memory. Alas on sm_50+ ptxas generates
1629  // SASS code that fails with misaligned access. To work around
1630  // the problem, make sure that we align byval parameters by at
1631  // least 4. Matching change must be made in LowerCall() where we
1632  // prepare parameters for the call.
1633  //
1634  // TODO: this will need to be undone when we get to support multi-TU
1635  // device-side compilation as it breaks ABI compatibility with nvcc.
1636  // Hopefully ptxas bug is fixed by then.
1637  if (!isKernelFunc && align < 4)
1638  align = 4;
1639  unsigned sz = DL.getTypeAllocSize(ETy);
1640  O << "\t.param .align " << align << " .b8 ";
1641  printParamName(I, paramIndex, O);
1642  O << "[" << sz << "]";
1643  continue;
1644  } else {
1645  // Split the ETy into constituent parts and
1646  // print .param .b<size> <name> for each part.
1647  // Further, if a part is vector, print the above for
1648  // each vector element.
1649  SmallVector<EVT, 16> vtparts;
1650  ComputeValueVTs(*TLI, DL, ETy, vtparts);
1651  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1652  unsigned elems = 1;
1653  EVT elemtype = vtparts[i];
1654  if (vtparts[i].isVector()) {
1655  elems = vtparts[i].getVectorNumElements();
1656  elemtype = vtparts[i].getVectorElementType();
1657  }
1658 
1659  for (unsigned j = 0, je = elems; j != je; ++j) {
1660  unsigned sz = elemtype.getSizeInBits();
1661  if (elemtype.isInteger() && (sz < 32))
1662  sz = 32;
1663  O << "\t.reg .b" << sz << " ";
1664  printParamName(I, paramIndex, O);
1665  if (j < je - 1)
1666  O << ",\n";
1667  ++paramIndex;
1668  }
1669  if (i < e - 1)
1670  O << ",\n";
1671  }
1672  --paramIndex;
1673  continue;
1674  }
1675  }
1676 
1677  O << "\n)\n";
1678 }
1679 
1680 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1681  raw_ostream &O) {
1682  const Function *F = MF.getFunction();
1683  emitFunctionParamList(F, O);
1684 }
1685 
1686 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1687  const MachineFunction &MF) {
1688  SmallString<128> Str;
1689  raw_svector_ostream O(Str);
1690 
1691  // Map the global virtual register number to a register class specific
1692  // virtual register number starting from 1 with that class.
1693  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1694  //unsigned numRegClasses = TRI->getNumRegClasses();
1695 
1696  // Emit the Fake Stack Object
1697  const MachineFrameInfo &MFI = MF.getFrameInfo();
1698  int NumBytes = (int) MFI.getStackSize();
1699  if (NumBytes) {
1700  O << "\t.local .align " << MFI.getMaxAlignment() << " .b8 \t" << DEPOTNAME
1701  << getFunctionNumber() << "[" << NumBytes << "];\n";
1702  if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1703  O << "\t.reg .b64 \t%SP;\n";
1704  O << "\t.reg .b64 \t%SPL;\n";
1705  } else {
1706  O << "\t.reg .b32 \t%SP;\n";
1707  O << "\t.reg .b32 \t%SPL;\n";
1708  }
1709  }
1710 
1711  // Go through all virtual registers to establish the mapping between the
1712  // global virtual
1713  // register number and the per class virtual register number.
1714  // We use the per class virtual register number in the ptx output.
1715  unsigned int numVRs = MRI->getNumVirtRegs();
1716  for (unsigned i = 0; i < numVRs; i++) {
1717  unsigned int vr = TRI->index2VirtReg(i);
1718  const TargetRegisterClass *RC = MRI->getRegClass(vr);
1719  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1720  int n = regmap.size();
1721  regmap.insert(std::make_pair(vr, n + 1));
1722  }
1723 
1724  // Emit register declarations
1725  // @TODO: Extract out the real register usage
1726  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1727  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1728  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1729  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1730  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1731  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1732  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1733 
1734  // Emit declaration of the virtual registers or 'physical' registers for
1735  // each register class
1736  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1737  const TargetRegisterClass *RC = TRI->getRegClass(i);
1738  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1739  std::string rcname = getNVPTXRegClassName(RC);
1740  std::string rcStr = getNVPTXRegClassStr(RC);
1741  int n = regmap.size();
1742 
1743  // Only declare those registers that may be used.
1744  if (n) {
1745  O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1746  << ">;\n";
1747  }
1748  }
1749 
1750  OutStreamer->EmitRawText(O.str());
1751 }
1752 
1753 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1754  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1755  bool ignored;
1756  unsigned int numHex;
1757  const char *lead;
1758 
1759  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1760  numHex = 8;
1761  lead = "0f";
1763  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1764  numHex = 16;
1765  lead = "0d";
1767  } else
1768  llvm_unreachable("unsupported fp type");
1769 
1770  APInt API = APF.bitcastToAPInt();
1771  std::string hexstr(utohexstr(API.getZExtValue()));
1772  O << lead;
1773  if (hexstr.length() < numHex)
1774  O << std::string(numHex - hexstr.length(), '0');
1775  O << utohexstr(API.getZExtValue());
1776 }
1777 
1778 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1779  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1780  O << CI->getValue();
1781  return;
1782  }
1783  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1784  printFPConstant(CFP, O);
1785  return;
1786  }
1787  if (isa<ConstantPointerNull>(CPV)) {
1788  O << "0";
1789  return;
1790  }
1791  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1792  bool IsNonGenericPointer = false;
1793  if (GVar->getType()->getAddressSpace() != 0) {
1794  IsNonGenericPointer = true;
1795  }
1796  if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1797  O << "generic(";
1798  getSymbol(GVar)->print(O, MAI);
1799  O << ")";
1800  } else {
1801  getSymbol(GVar)->print(O, MAI);
1802  }
1803  return;
1804  }
1805  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1806  const Value *v = Cexpr->stripPointerCasts();
1807  PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1808  bool IsNonGenericPointer = false;
1809  if (PTy && PTy->getAddressSpace() != 0) {
1810  IsNonGenericPointer = true;
1811  }
1812  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1813  if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1814  O << "generic(";
1815  getSymbol(GVar)->print(O, MAI);
1816  O << ")";
1817  } else {
1818  getSymbol(GVar)->print(O, MAI);
1819  }
1820  return;
1821  } else {
1822  lowerConstant(CPV)->print(O, MAI);
1823  return;
1824  }
1825  }
1826  llvm_unreachable("Not scalar type found in printScalarConstant()");
1827 }
1828 
1829 // These utility functions assure we get the right sequence of bytes for a given
1830 // type even for big-endian machines
1831 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1832  int64_t vp = (int64_t)val;
1833  for (unsigned i = 0; i < sizeof(T); ++i) {
1834  p[i] = (unsigned char)vp;
1835  vp >>= 8;
1836  }
1837 }
1838 static void ConvertFloatToBytes(unsigned char *p, float val) {
1839  int32_t *vp = (int32_t *)&val;
1840  for (unsigned i = 0; i < sizeof(int32_t); ++i) {
1841  p[i] = (unsigned char)*vp;
1842  *vp >>= 8;
1843  }
1844 }
1845 static void ConvertDoubleToBytes(unsigned char *p, double val) {
1846  int64_t *vp = (int64_t *)&val;
1847  for (unsigned i = 0; i < sizeof(int64_t); ++i) {
1848  p[i] = (unsigned char)*vp;
1849  *vp >>= 8;
1850  }
1851 }
1852 
1853 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1854  AggBuffer *aggBuffer) {
1855  const DataLayout &DL = getDataLayout();
1856 
1857  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1858  int s = DL.getTypeAllocSize(CPV->getType());
1859  if (s < Bytes)
1860  s = Bytes;
1861  aggBuffer->addZeros(s);
1862  return;
1863  }
1864 
1865  unsigned char ptr[8];
1866  switch (CPV->getType()->getTypeID()) {
1867 
1868  case Type::IntegerTyID: {
1869  Type *ETy = CPV->getType();
1870  if (ETy == Type::getInt8Ty(CPV->getContext())) {
1871  unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
1872  ConvertIntToBytes<>(ptr, c);
1873  aggBuffer->addBytes(ptr, 1, Bytes);
1874  } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
1875  short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
1876  ConvertIntToBytes<>(ptr, int16);
1877  aggBuffer->addBytes(ptr, 2, Bytes);
1878  } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
1879  if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1880  int int32 = (int)(constInt->getZExtValue());
1881  ConvertIntToBytes<>(ptr, int32);
1882  aggBuffer->addBytes(ptr, 4, Bytes);
1883  break;
1884  } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1885  if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
1886  ConstantFoldConstant(Cexpr, DL))) {
1887  int int32 = (int)(constInt->getZExtValue());
1888  ConvertIntToBytes<>(ptr, int32);
1889  aggBuffer->addBytes(ptr, 4, Bytes);
1890  break;
1891  }
1892  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1893  Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1894  aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1895  aggBuffer->addZeros(4);
1896  break;
1897  }
1898  }
1899  llvm_unreachable("unsupported integer const type");
1900  } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
1901  if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1902  long long int64 = (long long)(constInt->getZExtValue());
1903  ConvertIntToBytes<>(ptr, int64);
1904  aggBuffer->addBytes(ptr, 8, Bytes);
1905  break;
1906  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1907  if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
1908  ConstantFoldConstant(Cexpr, DL))) {
1909  long long int64 = (long long)(constInt->getZExtValue());
1910  ConvertIntToBytes<>(ptr, int64);
1911  aggBuffer->addBytes(ptr, 8, Bytes);
1912  break;
1913  }
1914  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1915  Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1916  aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1917  aggBuffer->addZeros(8);
1918  break;
1919  }
1920  }
1921  llvm_unreachable("unsupported integer const type");
1922  } else
1923  llvm_unreachable("unsupported integer const type");
1924  break;
1925  }
1926  case Type::FloatTyID:
1927  case Type::DoubleTyID: {
1928  const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
1929  Type *Ty = CFP->getType();
1930  if (Ty == Type::getFloatTy(CPV->getContext())) {
1931  float float32 = (float) CFP->getValueAPF().convertToFloat();
1932  ConvertFloatToBytes(ptr, float32);
1933  aggBuffer->addBytes(ptr, 4, Bytes);
1934  } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
1935  double float64 = CFP->getValueAPF().convertToDouble();
1936  ConvertDoubleToBytes(ptr, float64);
1937  aggBuffer->addBytes(ptr, 8, Bytes);
1938  } else {
1939  llvm_unreachable("unsupported fp const type");
1940  }
1941  break;
1942  }
1943  case Type::PointerTyID: {
1944  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1945  aggBuffer->addSymbol(GVar, GVar);
1946  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1947  const Value *v = Cexpr->stripPointerCasts();
1948  aggBuffer->addSymbol(v, Cexpr);
1949  }
1950  unsigned int s = DL.getTypeAllocSize(CPV->getType());
1951  aggBuffer->addZeros(s);
1952  break;
1953  }
1954 
1955  case Type::ArrayTyID:
1956  case Type::VectorTyID:
1957  case Type::StructTyID: {
1958  if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1959  int ElementSize = DL.getTypeAllocSize(CPV->getType());
1960  bufferAggregateConstant(CPV, aggBuffer);
1961  if (Bytes > ElementSize)
1962  aggBuffer->addZeros(Bytes - ElementSize);
1963  } else if (isa<ConstantAggregateZero>(CPV))
1964  aggBuffer->addZeros(Bytes);
1965  else
1966  llvm_unreachable("Unexpected Constant type");
1967  break;
1968  }
1969 
1970  default:
1971  llvm_unreachable("unsupported type");
1972  }
1973 }
1974 
1975 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1976  AggBuffer *aggBuffer) {
1977  const DataLayout &DL = getDataLayout();
1978  int Bytes;
1979 
1980  // Old constants
1981  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1982  if (CPV->getNumOperands())
1983  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1984  bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1985  return;
1986  }
1987 
1988  if (const ConstantDataSequential *CDS =
1989  dyn_cast<ConstantDataSequential>(CPV)) {
1990  if (CDS->getNumElements())
1991  for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1992  bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1993  aggBuffer);
1994  return;
1995  }
1996 
1997  if (isa<ConstantStruct>(CPV)) {
1998  if (CPV->getNumOperands()) {
1999  StructType *ST = cast<StructType>(CPV->getType());
2000  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
2001  if (i == (e - 1))
2002  Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
2003  DL.getTypeAllocSize(ST) -
2004  DL.getStructLayout(ST)->getElementOffset(i);
2005  else
2006  Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
2007  DL.getStructLayout(ST)->getElementOffset(i);
2008  bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
2009  }
2010  }
2011  return;
2012  }
2013  llvm_unreachable("unsupported constant type in printAggregateConstant()");
2014 }
2015 
2016 // buildTypeNameMap - Run through symbol table looking for type names.
2017 //
2018 
2020  switch (MI.getOpcode()) {
2021  default:
2022  return false;
2023  case NVPTX::CallArgBeginInst:
2024  case NVPTX::CallArgEndInst0:
2025  case NVPTX::CallArgEndInst1:
2026  case NVPTX::CallArgF32:
2027  case NVPTX::CallArgF64:
2028  case NVPTX::CallArgI16:
2029  case NVPTX::CallArgI32:
2030  case NVPTX::CallArgI32imm:
2031  case NVPTX::CallArgI64:
2032  case NVPTX::CallArgParam:
2033  case NVPTX::CallVoidInst:
2034  case NVPTX::CallVoidInstReg:
2035  case NVPTX::Callseq_End:
2036  case NVPTX::CallVoidInstReg64:
2037  case NVPTX::DeclareParamInst:
2038  case NVPTX::DeclareRetMemInst:
2039  case NVPTX::DeclareRetRegInst:
2040  case NVPTX::DeclareRetScalarInst:
2041  case NVPTX::DeclareScalarParamInst:
2042  case NVPTX::DeclareScalarRegInst:
2043  case NVPTX::StoreParamF32:
2044  case NVPTX::StoreParamF64:
2045  case NVPTX::StoreParamI16:
2046  case NVPTX::StoreParamI32:
2047  case NVPTX::StoreParamI64:
2048  case NVPTX::StoreParamI8:
2049  case NVPTX::StoreRetvalF32:
2050  case NVPTX::StoreRetvalF64:
2051  case NVPTX::StoreRetvalI16:
2052  case NVPTX::StoreRetvalI32:
2053  case NVPTX::StoreRetvalI64:
2054  case NVPTX::StoreRetvalI8:
2055  case NVPTX::LastCallArgF32:
2056  case NVPTX::LastCallArgF64:
2057  case NVPTX::LastCallArgI16:
2058  case NVPTX::LastCallArgI32:
2059  case NVPTX::LastCallArgI32imm:
2060  case NVPTX::LastCallArgI64:
2061  case NVPTX::LastCallArgParam:
2062  case NVPTX::LoadParamMemF32:
2063  case NVPTX::LoadParamMemF64:
2064  case NVPTX::LoadParamMemI16:
2065  case NVPTX::LoadParamMemI32:
2066  case NVPTX::LoadParamMemI64:
2067  case NVPTX::LoadParamMemI8:
2068  case NVPTX::PrototypeInst:
2069  case NVPTX::DBG_VALUE:
2070  return true;
2071  }
2072  return false;
2073 }
2074 
2075 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
2076 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
2077 /// expressions that are representable in PTX and create
2078 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
2079 const MCExpr *
2080 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
2081  MCContext &Ctx = OutContext;
2082 
2083  if (CV->isNullValue() || isa<UndefValue>(CV))
2084  return MCConstantExpr::create(0, Ctx);
2085 
2086  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
2087  return MCConstantExpr::create(CI->getZExtValue(), Ctx);
2088 
2089  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
2090  const MCSymbolRefExpr *Expr =
2092  if (ProcessingGeneric) {
2093  return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
2094  } else {
2095  return Expr;
2096  }
2097  }
2098 
2099  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
2100  if (!CE) {
2101  llvm_unreachable("Unknown constant value to lower!");
2102  }
2103 
2104  switch (CE->getOpcode()) {
2105  default:
2106  // If the code isn't optimized, there may be outstanding folding
2107  // opportunities. Attempt to fold the expression using DataLayout as a
2108  // last resort before giving up.
2110  if (C && C != CE)
2111  return lowerConstantForGV(C, ProcessingGeneric);
2112 
2113  // Otherwise report the problem to the user.
2114  {
2115  std::string S;
2116  raw_string_ostream OS(S);
2117  OS << "Unsupported expression in static initializer: ";
2118  CE->printAsOperand(OS, /*PrintType=*/false,
2119  !MF ? nullptr : MF->getFunction()->getParent());
2120  report_fatal_error(OS.str());
2121  }
2122 
2123  case Instruction::AddrSpaceCast: {
2124  // Strip the addrspacecast and pass along the operand
2125  PointerType *DstTy = cast<PointerType>(CE->getType());
2126  if (DstTy->getAddressSpace() == 0) {
2127  return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2128  }
2129  std::string S;
2130  raw_string_ostream OS(S);
2131  OS << "Unsupported expression in static initializer: ";
2132  CE->printAsOperand(OS, /*PrintType=*/ false,
2133  !MF ? nullptr : MF->getFunction()->getParent());
2134  report_fatal_error(OS.str());
2135  }
2136 
2137  case Instruction::GetElementPtr: {
2138  const DataLayout &DL = getDataLayout();
2139 
2140  // Generate a symbolic expression for the byte address
2141  APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2142  cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2143 
2144  const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2145  ProcessingGeneric);
2146  if (!OffsetAI)
2147  return Base;
2148 
2149  int64_t Offset = OffsetAI.getSExtValue();
2150  return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
2151  Ctx);
2152  }
2153 
2154  case Instruction::Trunc:
2155  // We emit the value and depend on the assembler to truncate the generated
2156  // expression properly. This is important for differences between
2157  // blockaddress labels. Since the two labels are in the same function, it
2158  // is reasonable to treat their delta as a 32-bit value.
2160  case Instruction::BitCast:
2161  return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2162 
2163  case Instruction::IntToPtr: {
2164  const DataLayout &DL = getDataLayout();
2165 
2166  // Handle casts to pointers by changing them into casts to the appropriate
2167  // integer type. This promotes constant folding and simplifies this code.
2168  Constant *Op = CE->getOperand(0);
2170  false/*ZExt*/);
2171  return lowerConstantForGV(Op, ProcessingGeneric);
2172  }
2173 
2174  case Instruction::PtrToInt: {
2175  const DataLayout &DL = getDataLayout();
2176 
2177  // Support only foldable casts to/from pointers that can be eliminated by
2178  // changing the pointer to the appropriately sized integer type.
2179  Constant *Op = CE->getOperand(0);
2180  Type *Ty = CE->getType();
2181 
2182  const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2183 
2184  // We can emit the pointer value into this slot if the slot is an
2185  // integer slot equal to the size of the pointer.
2186  if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2187  return OpExpr;
2188 
2189  // Otherwise the pointer is smaller than the resultant integer, mask off
2190  // the high bits so we are sure to get a proper truncation if the input is
2191  // a constant expr.
2192  unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2193  const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2194  return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2195  }
2196 
2197  // The MC library also has a right-shift operator, but it isn't consistently
2198  // signed or unsigned between different targets.
2199  case Instruction::Add: {
2200  const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2201  const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2202  switch (CE->getOpcode()) {
2203  default: llvm_unreachable("Unknown binary operator constant cast expr");
2204  case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2205  }
2206  }
2207  }
2208 }
2209 
2210 // Copy of MCExpr::print customized for NVPTX
2211 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2212  switch (Expr.getKind()) {
2213  case MCExpr::Target:
2214  return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2215  case MCExpr::Constant:
2216  OS << cast<MCConstantExpr>(Expr).getValue();
2217  return;
2218 
2219  case MCExpr::SymbolRef: {
2220  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2221  const MCSymbol &Sym = SRE.getSymbol();
2222  Sym.print(OS, MAI);
2223  return;
2224  }
2225 
2226  case MCExpr::Unary: {
2227  const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2228  switch (UE.getOpcode()) {
2229  case MCUnaryExpr::LNot: OS << '!'; break;
2230  case MCUnaryExpr::Minus: OS << '-'; break;
2231  case MCUnaryExpr::Not: OS << '~'; break;
2232  case MCUnaryExpr::Plus: OS << '+'; break;
2233  }
2234  printMCExpr(*UE.getSubExpr(), OS);
2235  return;
2236  }
2237 
2238  case MCExpr::Binary: {
2239  const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2240 
2241  // Only print parens around the LHS if it is non-trivial.
2242  if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2243  isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2244  printMCExpr(*BE.getLHS(), OS);
2245  } else {
2246  OS << '(';
2247  printMCExpr(*BE.getLHS(), OS);
2248  OS<< ')';
2249  }
2250 
2251  switch (BE.getOpcode()) {
2252  case MCBinaryExpr::Add:
2253  // Print "X-42" instead of "X+-42".
2254  if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2255  if (RHSC->getValue() < 0) {
2256  OS << RHSC->getValue();
2257  return;
2258  }
2259  }
2260 
2261  OS << '+';
2262  break;
2263  default: llvm_unreachable("Unhandled binary operator");
2264  }
2265 
2266  // Only print parens around the LHS if it is non-trivial.
2267  if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2268  printMCExpr(*BE.getRHS(), OS);
2269  } else {
2270  OS << '(';
2271  printMCExpr(*BE.getRHS(), OS);
2272  OS << ')';
2273  }
2274  return;
2275  }
2276  }
2277 
2278  llvm_unreachable("Invalid expression kind!");
2279 }
2280 
2281 /// PrintAsmOperand - Print out an operand for an inline asm expression.
2282 ///
2283 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2284  unsigned AsmVariant,
2285  const char *ExtraCode, raw_ostream &O) {
2286  if (ExtraCode && ExtraCode[0]) {
2287  if (ExtraCode[1] != 0)
2288  return true; // Unknown modifier.
2289 
2290  switch (ExtraCode[0]) {
2291  default:
2292  // See if this is a generic print operand
2293  return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
2294  case 'r':
2295  break;
2296  }
2297  }
2298 
2299  printOperand(MI, OpNo, O);
2300 
2301  return false;
2302 }
2303 
2304 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
2305  const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
2306  const char *ExtraCode, raw_ostream &O) {
2307  if (ExtraCode && ExtraCode[0])
2308  return true; // Unknown modifier
2309 
2310  O << '[';
2311  printMemOperand(MI, OpNo, O);
2312  O << ']';
2313 
2314  return false;
2315 }
2316 
2317 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2318  raw_ostream &O, const char *Modifier) {
2319  const MachineOperand &MO = MI->getOperand(opNum);
2320  switch (MO.getType()) {
2323  if (MO.getReg() == NVPTX::VRDepot)
2324  O << DEPOTNAME << getFunctionNumber();
2325  else
2327  } else {
2328  emitVirtualRegister(MO.getReg(), O);
2329  }
2330  return;
2331 
2333  if (!Modifier)
2334  O << MO.getImm();
2335  else if (strstr(Modifier, "vec") == Modifier)
2336  printVecModifiedImmediate(MO, Modifier, O);
2337  else
2339  "Don't know how to handle modifier on immediate operand");
2340  return;
2341 
2343  printFPConstant(MO.getFPImm(), O);
2344  break;
2345 
2347  getSymbol(MO.getGlobal())->print(O, MAI);
2348  break;
2349 
2351  MO.getMBB()->getSymbol()->print(O, MAI);
2352  return;
2353 
2354  default:
2355  llvm_unreachable("Operand type not supported.");
2356  }
2357 }
2358 
2359 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2360  raw_ostream &O, const char *Modifier) {
2361  printOperand(MI, opNum, O);
2362 
2363  if (Modifier && strcmp(Modifier, "add") == 0) {
2364  O << ", ";
2365  printOperand(MI, opNum + 1, O);
2366  } else {
2367  if (MI->getOperand(opNum + 1).isImm() &&
2368  MI->getOperand(opNum + 1).getImm() == 0)
2369  return; // don't print ',0' or '+0'
2370  O << "+";
2371  printOperand(MI, opNum + 1, O);
2372  }
2373 }
2374 
2375 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
2376  std::stringstream temp;
2377  LineReader *reader = this->getReader(filename);
2378  temp << "\n//";
2379  temp << filename.str();
2380  temp << ":";
2381  temp << line;
2382  temp << " ";
2383  temp << reader->readLine(line);
2384  temp << "\n";
2385  this->OutStreamer->EmitRawText(temp.str());
2386 }
2387 
2388 LineReader *NVPTXAsmPrinter::getReader(const std::string &filename) {
2389  if (!reader) {
2390  reader = new LineReader(filename);
2391  }
2392 
2393  if (reader->fileName() != filename) {
2394  delete reader;
2395  reader = new LineReader(filename);
2396  }
2397 
2398  return reader;
2399 }
2400 
2401 std::string LineReader::readLine(unsigned lineNum) {
2402  if (lineNum < theCurLine) {
2403  theCurLine = 0;
2404  fstr.seekg(0, std::ios::beg);
2405  }
2406  while (theCurLine < lineNum) {
2407  fstr.getline(buff, 500);
2408  theCurLine++;
2409  }
2410  return buff;
2411 }
2412 
2413 // Force static initialization.
2417 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:279
bool doesSupportDebugInformation() const
Definition: MCAsmInfo.h:534
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
static cl::opt< bool > InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Emit source line in ptx file"), cl::init(false))
StringRef getTargetCPU() const
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:158
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
Bitwise negation.
Definition: MCExpr.h:340
const MCSymbol & getSymbol() const
Definition: MCExpr.h:311
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:84
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:371
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:298
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool hasName() const
Definition: Value.h:236
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:53
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
size_t i
MDNode * getScope() const
Definition: DebugLoc.cpp:35
bool getMaxNReg(const Function &F, unsigned &x)
const ConstantFP * getFPImm() const
MachineBasicBlock * getMBB() const
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
static unsigned index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
float convertToFloat() const
Definition: APFloat.h:1014
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:148
2: 32-bit floating point type
Definition: Type.h:58
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:79
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:129
Implements a dense probed hash-table based set.
Definition: DenseSet.h:202
const GlobalListType & getGlobalList() const
Get the Module's list of global variables (constant).
Definition: Module.h:471
unsigned getNumOperands() const
Definition: User.h:167
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:617
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:699
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool isTexture(const Value &val)
Type * getValueType() const
Definition: GlobalValue.h:261
bool hasAppendingLinkage() const
Definition: GlobalValue.h:412
unsigned getPointerPrefAlignment(unsigned AS=0) const
Return target's alignment for stack-based pointers FIXME: The defaults need to be removed once all of...
Definition: DataLayout.cpp:599
const MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:87
ExprKind getKind() const
Definition: MCExpr.h:70
unsigned getSizeInBits() const
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:402
Unary plus.
Definition: MCExpr.h:341
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:490
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:270
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.cpp:238
MachineBasicBlock reference.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
arg_iterator arg_end()
Definition: Function.h:559
13: Structures
Definition: Type.h:72
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:140
const char * getSymbolName() const
unsigned getParamAlignment(unsigned Index) const
Return the alignment for the specified function parameter.
A debug info location.
Definition: DebugLoc.h:34
Metadata node.
Definition: Metadata.h:830
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:433
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:471
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:170
15: Pointers
Definition: Type.h:74
Type * getElementType() const
Definition: DerivedTypes.h:462
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:168
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:994
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with strcmp
unsigned getOpcode() const
Return the opcode at the root of this constant expression.
Definition: Constants.h:1182
std::string getVirtualRegisterName(unsigned) const
void processModule(const Module &M)
Process entire module and collect debug info anchors.
Definition: DebugInfo.cpp:53
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
Definition: DataLayout.h:418
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:43
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const Triple & getTargetTriple() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:239
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
std::string readLine(unsigned line)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition: Path.cpp:448
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1535
bool hasCommonLinkage() const
Definition: GlobalValue.h:419
unsigned getNumRegClasses() const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:111
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:566
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:157
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:143
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
Class to represent struct types.
Definition: DerivedTypes.h:199
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
bool hasInternalLinkage() const
Definition: GlobalValue.h:413
void clearAnnotationCache(const Module *Mod)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Name of external global symbol.
Utility to find all debug info in a module.
Definition: DebugInfo.h:70
Reg
All possible values of the reg field in the ModR/M byte.
This file contains the simple types necessary to represent the attributes associated with functions a...
APInt bitcastToAPInt() const
Definition: APFloat.h:1012
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:161
bool is_absolute(const Twine &path)
Is path absolute?
Definition: Path.cpp:686
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:48
Windows NT (Windows on ARM)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:123
This file implements a class to represent arbitrary precision integral constant values and operations...
bool hasPrivateLinkage() const
Definition: GlobalValue.h:414
Target & getTheNVPTXTarget64()
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
bool getMaxNTIDz(const Function &F, unsigned &z)
bool ignoreLoc(const MachineInstr &)
Context object for machine code objects.
Definition: MCContext.h:51
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Attempt to fold the constant using the specified DataLayout.
Subprogram description.
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:873
Class to represent function types.
Definition: DerivedTypes.h:102
bool isKernelFunction(const Function &F)
#define F(x, y, z)
Definition: MD5.cpp:51
bool isSurface(const Value &val)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:264
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4139
unsigned int getSmVersion() const
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:564
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:3473
MachineBasicBlock * MBB
#define T
unsigned getAlignment() const
Definition: GlobalObject.h:59
Unary assembler expressions.
Definition: MCExpr.h:335
Class to represent array types.
Definition: DerivedTypes.h:345
bool getMaxNTIDx(const Function &F, unsigned &x)
std::string getTextureName(const Value &val)
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:81
static std::string utostr(uint64_t X, bool isNeg=false)
Definition: StringExtras.h:79
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:160
iterator_range< compile_unit_iterator > compile_units() const
Definition: DebugInfo.h:106
int64_t getImm() const
#define DEPOTNAME
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:429
bool isSampler(const Value &val)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Unary expressions.
Definition: MCExpr.h:40
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:90
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:150
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:50
Class to represent pointers.
Definition: DerivedTypes.h:443
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator_range< subprogram_iterator > subprograms() const
Definition: DebugInfo.h:110
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
bool hasDouble() const
static const fltSemantics & IEEEsingle()
Definition: APFloat.cpp:100
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:517
11: Arbitrary bit width integers
Definition: Type.h:70
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:73
Target & getTheNVPTXTarget32()
unsigned getLine() const
Definition: DebugLoc.cpp:25
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:514
0: type with no size
Definition: Type.h:56
StringRef filename(StringRef path)
Get filename.
Definition: Path.cpp:584
Address of a global value.
static bool isEmptyXXStructor(GlobalVariable *GV)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
unsigned const MachineRegisterInfo * MRI
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:95
MVT - Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:75
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
This is an important base class in LLVM.
Definition: Constant.h:42
static bool is64Bit(const char *name)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, HexagonAsmPrinter &Printer, bool MustExtend)
const NVPTXTargetLowering * getTargetLowering() const override
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:71
This file declares a class to represent arbitrary precision floating point values and provide a varie...
uint32_t Offset
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
static const char * getRegisterName(unsigned RegNo)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
ManagedStringPool * getManagedStrPool() const
Value * getOperand(unsigned i) const
Definition: User.h:145
arg_iterator arg_begin()
Definition: Function.h:550
Logical negation.
Definition: MCExpr.h:338
void emitLineNumberAsDotLoc(const MachineInstr &)
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:225
EVT - Extended Value Type.
Definition: ValueTypes.h:31
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
bool hasWeakLinkage() const
Definition: GlobalValue.h:409
Binary assembler expressions.
Definition: MCExpr.h:388
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:654
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool getMaxNTIDy(const Function &F, unsigned &y)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool getReqNTIDx(const Function &F, unsigned &x)
const std::string & getModuleInlineAsm() const
Get any module-scope inline assembly blocks.
Definition: Module.h:226
unsigned getCol() const
Definition: DebugLoc.cpp:30
bool isImage(const Value &val)
static cl::opt< bool > EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden, cl::desc("NVPTX Specific: Emit Line numbers even without -G"), cl::init(true))
14: Arrays
Definition: Type.h:73
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:709
bool getReqNTIDz(const Function &F, unsigned &z)
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:689
bool hasExternalLinkage() const
Definition: GlobalValue.h:401
Iterator for intrusive lists based on ilist_node.
void setOpcode(unsigned Op)
Definition: MCInst.h:158
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
static void ConvertFloatToBytes(unsigned char *p, float val)
std::string * getManagedString(const char *S)
StringRef getTargetFeatureString() const
16: SIMD 'packed' format, or other vector type
Definition: Type.h:75
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:408
virtual const MCExpr * lowerConstant(const Constant *CV)
Lower the specified LLVM Constant to an MCExpr.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
bool isVector(MCInstrInfo const &MCII, MCInst const &MCI)
AddressSpace
Definition: NVPTXBaseInfo.h:22
bool isImageReadWrite(const Value &val)
GlobalVariable * getNamedGlobal(StringRef Name)
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:357
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:161
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
std::string getTargetName() const
Value * stripPointerCasts()
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:490
StringRef str()
Return a StringRef for the vector contents.
Definition: raw_ostream.h:515
std::string getSurfaceName(const Value &val)
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:146
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:176
std::string getSamplerName(const Value &val)
bool arg_empty() const
Definition: Function.cpp:330
virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const
Targets can override this to emit stuff at the start of a basic block.
Class for arbitrary precision integers.
Definition: APInt.h:77
bool hasInitializer() const
Definitions have initializers, declarations don't.
ConstantArray - Constant Array Declarations.
Definition: Constants.h:411
bool isManaged(const Value &val)
bool getReqNTIDy(const Function &F, unsigned &y)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
iterator_range< user_iterator > users()
Definition: Value.h:370
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:130
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:760
double convertToDouble() const
Definition: APFloat.h:1013
static const fltSemantics & IEEEdouble()
Definition: APFloat.cpp:103
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:517
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:405
Representation of each machine instruction.
Definition: MachineInstr.h:52
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
pointer remove(iterator &IT)
Definition: ilist.h:264
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:259
NVPTXTargetMachine.
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:378
iterator insert(iterator where, pointer New)
Definition: ilist.h:241
unsigned size() const
Definition: DenseMap.h:83
iterator end()
Definition: Module.h:537
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:247
size_type count(const ValueT &V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:81
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:169
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:114
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:188
static bool usedInGlobalVarDef(const Constant *C)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
#define I(x, y, z)
Definition: MD5.cpp:54
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:511
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
iterator begin()
Definition: Module.h:535
iterator end()
Definition: DenseMap.h:69
size_t alias_size() const
Definition: Module.h:561
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
References to labels and assigned expressions.
Definition: MCExpr.h:39
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
Unary minus.
Definition: MCExpr.h:339
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:391
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
static std::string utohexstr(uint64_t X, bool LowerCase=false)
Definition: StringExtras.h:48
const APFloat & getValueAPF() const
Definition: Constants.h:300
void print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens=false) const
Definition: MCExpr.cpp:33
3: 64-bit floating point type
Definition: Type.h:59
unsigned getReg() const
getReg - Returns the register number.
bool use_empty() const
Definition: Value.h:299
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:144
static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
bool hasImageHandles() const
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:463
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
LLVM Value Representation.
Definition: Value.h:71
static const char * name
Floating-point immediate operand.
RegisterAsmPrinter - Helper template for registering a target specific assembly printer, for use in the target machine initialization function.
Constant expressions.
Definition: MCExpr.h:38
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
Binary expressions.
Definition: MCExpr.h:37
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:125
Primary interface to the complete machine description for the target machine.
static void ConvertDoubleToBytes(unsigned char *p, double val)
iterator_range< global_iterator > globals()
Definition: Module.h:524
IRTranslator LLVM IR MI
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void addOperand(const MCOperand &Op)
Definition: MCInst.h:168
virtual void print(raw_ostream &O, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:117
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
bool getMinCTASm(const Function &F, unsigned &x)
Target specific expression.
Definition: MCExpr.h:41
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
static void ConvertIntToBytes(unsigned char *p, T val)
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:33
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:117
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:167
const BasicBlock * getParent() const
Definition: Instruction.h:62
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx)
Definition: MCExpr.cpp:149
bool isImageWriteOnly(const Value &val)
NVPTX::DrvInterface getDrvInterface() const
void LLVMInitializeNVPTXAsmPrinter()
bool doFinalization(Module &M) override
Shut down the asmprinter.
unsigned getPTXVersion() const
This file describes how to lower LLVM code to machine code.
std::string fileName()
const NVPTXRegisterInfo * getRegisterInfo() const override
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:375
const char * getName(unsigned RegNo) const