LLVM  7.0.0svn
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a printer that converts from our internal representation
11 // of machine-dependent LLVM code to NVPTX assembly language.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "NVPTXAsmPrinter.h"
19 #include "NVPTX.h"
20 #include "NVPTXMCExpr.h"
22 #include "NVPTXRegisterInfo.h"
23 #include "NVPTXSubtarget.h"
24 #include "NVPTXTargetMachine.h"
25 #include "NVPTXUtilities.h"
26 #include "cl_common_defines.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/DenseSet.h"
31 #include "llvm/ADT/SmallString.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/ADT/Triple.h"
36 #include "llvm/ADT/Twine.h"
38 #include "llvm/CodeGen/Analysis.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/Constant.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/DataLayout.h"
55 #include "llvm/IR/DebugInfo.h"
57 #include "llvm/IR/DebugLoc.h"
58 #include "llvm/IR/DerivedTypes.h"
59 #include "llvm/IR/Function.h"
60 #include "llvm/IR/GlobalValue.h"
61 #include "llvm/IR/GlobalVariable.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/IR/Module.h"
65 #include "llvm/IR/Operator.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/MC/MCExpr.h"
69 #include "llvm/MC/MCInst.h"
70 #include "llvm/MC/MCInstrDesc.h"
71 #include "llvm/MC/MCStreamer.h"
72 #include "llvm/MC/MCSymbol.h"
73 #include "llvm/Support/Casting.h"
77 #include "llvm/Support/Path.h"
83 #include <cassert>
84 #include <cstdint>
85 #include <cstring>
86 #include <new>
87 #include <string>
88 #include <utility>
89 #include <vector>
90 
91 using namespace llvm;
92 
93 #define DEPOTNAME "__local_depot"
94 
95 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
96 /// depends.
97 static void
100  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
101  Globals.insert(GV);
102  else {
103  if (const User *U = dyn_cast<User>(V)) {
104  for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
105  DiscoverDependentGlobals(U->getOperand(i), Globals);
106  }
107  }
108  }
109 }
110 
111 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
112 /// instances to be emitted, but only after any dependents have been added
113 /// first.s
114 static void
119  // Have we already visited this one?
120  if (Visited.count(GV))
121  return;
122 
123  // Do we have a circular dependency?
124  if (!Visiting.insert(GV).second)
125  report_fatal_error("Circular dependency found in global variable set");
126 
127  // Make sure we visit all dependents first
129  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
130  DiscoverDependentGlobals(GV->getOperand(i), Others);
131 
133  E = Others.end();
134  I != E; ++I)
135  VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
136 
137  // Now we can visit ourself
138  Order.push_back(GV);
139  Visited.insert(GV);
140  Visiting.erase(GV);
141 }
142 
143 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
144  MCInst Inst;
145  lowerToMCInst(MI, Inst);
146  EmitToStreamer(*OutStreamer, Inst);
147 }
148 
149 // Handle symbol backtracking for targets that do not support image handles
150 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
151  unsigned OpNo, MCOperand &MCOp) {
152  const MachineOperand &MO = MI->getOperand(OpNo);
153  const MCInstrDesc &MCID = MI->getDesc();
154 
155  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
156  // This is a texture fetch, so operand 4 is a texref and operand 5 is
157  // a samplerref
158  if (OpNo == 4 && MO.isImm()) {
159  lowerImageHandleSymbol(MO.getImm(), MCOp);
160  return true;
161  }
162  if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
163  lowerImageHandleSymbol(MO.getImm(), MCOp);
164  return true;
165  }
166 
167  return false;
168  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
169  unsigned VecSize =
170  1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
171 
172  // For a surface load of vector size N, the Nth operand will be the surfref
173  if (OpNo == VecSize && MO.isImm()) {
174  lowerImageHandleSymbol(MO.getImm(), MCOp);
175  return true;
176  }
177 
178  return false;
179  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
180  // This is a surface store, so operand 0 is a surfref
181  if (OpNo == 0 && MO.isImm()) {
182  lowerImageHandleSymbol(MO.getImm(), MCOp);
183  return true;
184  }
185 
186  return false;
187  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
188  // This is a query, so operand 1 is a surfref/texref
189  if (OpNo == 1 && MO.isImm()) {
190  lowerImageHandleSymbol(MO.getImm(), MCOp);
191  return true;
192  }
193 
194  return false;
195  }
196 
197  return false;
198 }
199 
200 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
201  // Ewwww
202  TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
203  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
205  const char *Sym = MFI->getImageHandleSymbol(Index);
206  std::string *SymNamePtr =
207  nvTM.getManagedStrPool()->getManagedString(Sym);
208  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
209 }
210 
211 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
212  OutMI.setOpcode(MI->getOpcode());
213  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
214  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
215  const MachineOperand &MO = MI->getOperand(0);
216  OutMI.addOperand(GetSymbolRef(
218  return;
219  }
220 
221  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
222  const MachineOperand &MO = MI->getOperand(i);
223 
224  MCOperand MCOp;
225  if (!nvptxSubtarget->hasImageHandles()) {
226  if (lowerImageHandleOperand(MI, i, MCOp)) {
227  OutMI.addOperand(MCOp);
228  continue;
229  }
230  }
231 
232  if (lowerOperand(MO, MCOp))
233  OutMI.addOperand(MCOp);
234  }
235 }
236 
237 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
238  MCOperand &MCOp) {
239  switch (MO.getType()) {
240  default: llvm_unreachable("unknown operand type");
242  MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
243  break;
245  MCOp = MCOperand::createImm(MO.getImm());
246  break;
249  MO.getMBB()->getSymbol(), OutContext));
250  break;
253  break;
255  MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
256  break;
258  const ConstantFP *Cnt = MO.getFPImm();
259  const APFloat &Val = Cnt->getValueAPF();
260 
261  switch (Cnt->getType()->getTypeID()) {
262  default: report_fatal_error("Unsupported FP type"); break;
263  case Type::HalfTyID:
264  MCOp = MCOperand::createExpr(
266  break;
267  case Type::FloatTyID:
268  MCOp = MCOperand::createExpr(
270  break;
271  case Type::DoubleTyID:
272  MCOp = MCOperand::createExpr(
274  break;
275  }
276  break;
277  }
278  }
279  return true;
280 }
281 
282 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
284  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
285 
286  DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
287  unsigned RegNum = RegMap[Reg];
288 
289  // Encode the register class in the upper 4 bits
290  // Must be kept in sync with NVPTXInstPrinter::printRegName
291  unsigned Ret = 0;
292  if (RC == &NVPTX::Int1RegsRegClass) {
293  Ret = (1 << 28);
294  } else if (RC == &NVPTX::Int16RegsRegClass) {
295  Ret = (2 << 28);
296  } else if (RC == &NVPTX::Int32RegsRegClass) {
297  Ret = (3 << 28);
298  } else if (RC == &NVPTX::Int64RegsRegClass) {
299  Ret = (4 << 28);
300  } else if (RC == &NVPTX::Float32RegsRegClass) {
301  Ret = (5 << 28);
302  } else if (RC == &NVPTX::Float64RegsRegClass) {
303  Ret = (6 << 28);
304  } else if (RC == &NVPTX::Float16RegsRegClass) {
305  Ret = (7 << 28);
306  } else if (RC == &NVPTX::Float16x2RegsRegClass) {
307  Ret = (8 << 28);
308  } else {
309  report_fatal_error("Bad register class");
310  }
311 
312  // Insert the vreg number
313  Ret |= (RegNum & 0x0FFFFFFF);
314  return Ret;
315  } else {
316  // Some special-use registers are actually physical registers.
317  // Encode this as the register class ID of 0 and the real register ID.
318  return Reg & 0x0FFFFFFF;
319  }
320 }
321 
323  const MCExpr *Expr;
325  OutContext);
326  return MCOperand::createExpr(Expr);
327 }
328 
329 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
330  const DataLayout &DL = getDataLayout();
331  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
332 
333  Type *Ty = F->getReturnType();
334 
335  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
336 
337  if (Ty->getTypeID() == Type::VoidTyID)
338  return;
339 
340  O << " (";
341 
342  if (isABI) {
343  if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
344  unsigned size = 0;
345  if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
346  size = ITy->getBitWidth();
347  } else {
348  assert(Ty->isFloatingPointTy() && "Floating point type expected here");
349  size = Ty->getPrimitiveSizeInBits();
350  }
351  // PTX ABI requires all scalar return values to be at least 32
352  // bits in size. fp16 normally uses .b16 as its storage type in
353  // PTX, so its size must be adjusted here, too.
354  if (size < 32)
355  size = 32;
356 
357  O << ".param .b" << size << " func_retval0";
358  } else if (isa<PointerType>(Ty)) {
359  O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
360  << " func_retval0";
361  } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
362  unsigned totalsz = DL.getTypeAllocSize(Ty);
363  unsigned retAlignment = 0;
364  if (!getAlign(*F, 0, retAlignment))
365  retAlignment = DL.getABITypeAlignment(Ty);
366  O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
367  << "]";
368  } else
369  llvm_unreachable("Unknown return type");
370  } else {
371  SmallVector<EVT, 16> vtparts;
372  ComputeValueVTs(*TLI, DL, Ty, vtparts);
373  unsigned idx = 0;
374  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
375  unsigned elems = 1;
376  EVT elemtype = vtparts[i];
377  if (vtparts[i].isVector()) {
378  elems = vtparts[i].getVectorNumElements();
379  elemtype = vtparts[i].getVectorElementType();
380  }
381 
382  for (unsigned j = 0, je = elems; j != je; ++j) {
383  unsigned sz = elemtype.getSizeInBits();
384  if (elemtype.isInteger() && (sz < 32))
385  sz = 32;
386  O << ".reg .b" << sz << " func_retval" << idx;
387  if (j < je - 1)
388  O << ", ";
389  ++idx;
390  }
391  if (i < e - 1)
392  O << ", ";
393  }
394  }
395  O << ") ";
396 }
397 
398 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
399  raw_ostream &O) {
400  const Function &F = MF.getFunction();
401  printReturnValStr(&F, O);
402 }
403 
404 // Return true if MBB is the header of a loop marked with
405 // llvm.loop.unroll.disable.
406 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
407 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
408  const MachineBasicBlock &MBB) const {
409  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
410  // We insert .pragma "nounroll" only to the loop header.
411  if (!LI.isLoopHeader(&MBB))
412  return false;
413 
414  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
415  // we iterate through each back edge of the loop with header MBB, and check
416  // whether its metadata contains llvm.loop.unroll.disable.
417  for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
418  const MachineBasicBlock *PMBB = *I;
419  if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
420  // Edges from other loops to MBB are not back edges.
421  continue;
422  }
423  if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
424  if (MDNode *LoopID =
425  PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
426  if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
427  return true;
428  }
429  }
430  }
431  return false;
432 }
433 
434 void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
436  if (isLoopHeaderOfNoUnroll(MBB))
437  OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
438 }
439 
440 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
441  SmallString<128> Str;
442  raw_svector_ostream O(Str);
443 
444  if (!GlobalsEmitted) {
445  emitGlobals(*MF->getFunction().getParent());
446  GlobalsEmitted = true;
447  }
448 
449  // Set up
450  MRI = &MF->getRegInfo();
451  F = &MF->getFunction();
452  emitLinkageDirective(F, O);
453  if (isKernelFunction(*F))
454  O << ".entry ";
455  else {
456  O << ".func ";
457  printReturnValStr(*MF, O);
458  }
459 
460  CurrentFnSym->print(O, MAI);
461 
462  emitFunctionParamList(*MF, O);
463 
464  if (isKernelFunction(*F))
465  emitKernelFunctionDirectives(*F, O);
466 
467  OutStreamer->EmitRawText(O.str());
468 
469  VRegMapping.clear();
470  // Emit open brace for function body.
471  OutStreamer->EmitRawText(StringRef("{\n"));
472  setAndEmitFunctionVirtualRegisters(*MF);
473 }
474 
476  nvptxSubtarget = &F.getSubtarget<NVPTXSubtarget>();
477  bool Result = AsmPrinter::runOnMachineFunction(F);
478  // Emit closing brace for the body of function F.
479  // The closing brace must be emitted here because we need to emit additional
480  // debug labels/data after the last basic block.
481  // We need to emit the closing brace here because we don't have function that
482  // finished emission of the function body.
483  OutStreamer->EmitRawText(StringRef("}\n"));
484  return Result;
485 }
486 
487 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
488  SmallString<128> Str;
489  raw_svector_ostream O(Str);
490  emitDemotedVars(&MF->getFunction(), O);
491  OutStreamer->EmitRawText(O.str());
492 }
493 
494 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
495  VRegMapping.clear();
496 }
497 
498 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
499  unsigned RegNo = MI->getOperand(0).getReg();
501  OutStreamer->AddComment(Twine("implicit-def: ") +
502  getVirtualRegisterName(RegNo));
503  } else {
504  OutStreamer->AddComment(Twine("implicit-def: ") +
505  nvptxSubtarget->getRegisterInfo()->getName(RegNo));
506  }
507  OutStreamer->AddBlankLine();
508 }
509 
510 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
511  raw_ostream &O) const {
512  // If the NVVM IR has some of reqntid* specified, then output
513  // the reqntid directive, and set the unspecified ones to 1.
514  // If none of reqntid* is specified, don't output reqntid directive.
515  unsigned reqntidx, reqntidy, reqntidz;
516  bool specified = false;
517  if (!getReqNTIDx(F, reqntidx))
518  reqntidx = 1;
519  else
520  specified = true;
521  if (!getReqNTIDy(F, reqntidy))
522  reqntidy = 1;
523  else
524  specified = true;
525  if (!getReqNTIDz(F, reqntidz))
526  reqntidz = 1;
527  else
528  specified = true;
529 
530  if (specified)
531  O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
532  << "\n";
533 
534  // If the NVVM IR has some of maxntid* specified, then output
535  // the maxntid directive, and set the unspecified ones to 1.
536  // If none of maxntid* is specified, don't output maxntid directive.
537  unsigned maxntidx, maxntidy, maxntidz;
538  specified = false;
539  if (!getMaxNTIDx(F, maxntidx))
540  maxntidx = 1;
541  else
542  specified = true;
543  if (!getMaxNTIDy(F, maxntidy))
544  maxntidy = 1;
545  else
546  specified = true;
547  if (!getMaxNTIDz(F, maxntidz))
548  maxntidz = 1;
549  else
550  specified = true;
551 
552  if (specified)
553  O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
554  << "\n";
555 
556  unsigned mincta;
557  if (getMinCTASm(F, mincta))
558  O << ".minnctapersm " << mincta << "\n";
559 
560  unsigned maxnreg;
561  if (getMaxNReg(F, maxnreg))
562  O << ".maxnreg " << maxnreg << "\n";
563 }
564 
565 std::string
567  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
568 
569  std::string Name;
570  raw_string_ostream NameStr(Name);
571 
572  VRegRCMap::const_iterator I = VRegMapping.find(RC);
573  assert(I != VRegMapping.end() && "Bad register class");
574  const DenseMap<unsigned, unsigned> &RegMap = I->second;
575 
576  VRegMap::const_iterator VI = RegMap.find(Reg);
577  assert(VI != RegMap.end() && "Bad virtual register");
578  unsigned MappedVR = VI->second;
579 
580  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
581 
582  NameStr.flush();
583  return Name;
584 }
585 
586 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
587  raw_ostream &O) {
588  O << getVirtualRegisterName(vr);
589 }
590 
591 void NVPTXAsmPrinter::printVecModifiedImmediate(
592  const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
593  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
594  int Imm = (int) MO.getImm();
595  if (0 == strcmp(Modifier, "vecelem"))
596  O << "_" << vecelem[Imm];
597  else if (0 == strcmp(Modifier, "vecv4comm1")) {
598  if ((Imm < 0) || (Imm > 3))
599  O << "//";
600  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
601  if ((Imm < 4) || (Imm > 7))
602  O << "//";
603  } else if (0 == strcmp(Modifier, "vecv4pos")) {
604  if (Imm < 0)
605  Imm = 0;
606  O << "_" << vecelem[Imm % 4];
607  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
608  if ((Imm < 0) || (Imm > 1))
609  O << "//";
610  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
611  if ((Imm < 2) || (Imm > 3))
612  O << "//";
613  } else if (0 == strcmp(Modifier, "vecv2pos")) {
614  if (Imm < 0)
615  Imm = 0;
616  O << "_" << vecelem[Imm % 2];
617  } else
618  llvm_unreachable("Unknown Modifier on immediate operand");
619 }
620 
621 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
622  emitLinkageDirective(F, O);
623  if (isKernelFunction(*F))
624  O << ".entry ";
625  else
626  O << ".func ";
627  printReturnValStr(F, O);
628  getSymbol(F)->print(O, MAI);
629  O << "\n";
630  emitFunctionParamList(F, O);
631  O << ";\n";
632 }
633 
634 static bool usedInGlobalVarDef(const Constant *C) {
635  if (!C)
636  return false;
637 
638  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
639  return GV->getName() != "llvm.used";
640  }
641 
642  for (const User *U : C->users())
643  if (const Constant *C = dyn_cast<Constant>(U))
644  if (usedInGlobalVarDef(C))
645  return true;
646 
647  return false;
648 }
649 
650 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
651  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
652  if (othergv->getName() == "llvm.used")
653  return true;
654  }
655 
656  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
657  if (instr->getParent() && instr->getParent()->getParent()) {
658  const Function *curFunc = instr->getParent()->getParent();
659  if (oneFunc && (curFunc != oneFunc))
660  return false;
661  oneFunc = curFunc;
662  return true;
663  } else
664  return false;
665  }
666 
667  for (const User *UU : U->users())
668  if (!usedInOneFunc(UU, oneFunc))
669  return false;
670 
671  return true;
672 }
673 
674 /* Find out if a global variable can be demoted to local scope.
675  * Currently, this is valid for CUDA shared variables, which have local
676  * scope and global lifetime. So the conditions to check are :
677  * 1. Is the global variable in shared address space?
678  * 2. Does it have internal linkage?
679  * 3. Is the global variable referenced only in one function?
680  */
681 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
682  if (!gv->hasInternalLinkage())
683  return false;
684  PointerType *Pty = gv->getType();
686  return false;
687 
688  const Function *oneFunc = nullptr;
689 
690  bool flag = usedInOneFunc(gv, oneFunc);
691  if (!flag)
692  return false;
693  if (!oneFunc)
694  return false;
695  f = oneFunc;
696  return true;
697 }
698 
699 static bool useFuncSeen(const Constant *C,
701  for (const User *U : C->users()) {
702  if (const Constant *cu = dyn_cast<Constant>(U)) {
703  if (useFuncSeen(cu, seenMap))
704  return true;
705  } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
706  const BasicBlock *bb = I->getParent();
707  if (!bb)
708  continue;
709  const Function *caller = bb->getParent();
710  if (!caller)
711  continue;
712  if (seenMap.find(caller) != seenMap.end())
713  return true;
714  }
715  }
716  return false;
717 }
718 
719 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
721  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
722  const Function *F = &*FI;
723 
724  if (F->isDeclaration()) {
725  if (F->use_empty())
726  continue;
727  if (F->getIntrinsicID())
728  continue;
729  emitDeclaration(F, O);
730  continue;
731  }
732  for (const User *U : F->users()) {
733  if (const Constant *C = dyn_cast<Constant>(U)) {
734  if (usedInGlobalVarDef(C)) {
735  // The use is in the initialization of a global variable
736  // that is a function pointer, so print a declaration
737  // for the original function
738  emitDeclaration(F, O);
739  break;
740  }
741  // Emit a declaration of this function if the function that
742  // uses this constant expr has already been seen.
743  if (useFuncSeen(C, seenMap)) {
744  emitDeclaration(F, O);
745  break;
746  }
747  }
748 
749  if (!isa<Instruction>(U))
750  continue;
751  const Instruction *instr = cast<Instruction>(U);
752  const BasicBlock *bb = instr->getParent();
753  if (!bb)
754  continue;
755  const Function *caller = bb->getParent();
756  if (!caller)
757  continue;
758 
759  // If a caller has already been seen, then the caller is
760  // appearing in the module before the callee. so print out
761  // a declaration for the callee.
762  if (seenMap.find(caller) != seenMap.end()) {
763  emitDeclaration(F, O);
764  break;
765  }
766  }
767  seenMap[F] = true;
768  }
769 }
770 
772  if (!GV) return true;
773  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
774  if (!InitList) return true; // Not an array; we don't know how to parse.
775  return InitList->getNumOperands() == 0;
776 }
777 
779  // Construct a default subtarget off of the TargetMachine defaults. The
780  // rest of NVPTX isn't friendly to change subtargets per function and
781  // so the default TargetMachine will have all of the options.
782  const Triple &TT = TM.getTargetTriple();
783  StringRef CPU = TM.getTargetCPU();
785  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
786  const NVPTXSubtarget STI(TT, CPU, FS, NTM);
787 
788  if (M.alias_size()) {
789  report_fatal_error("Module has aliases, which NVPTX does not support.");
790  return true; // error
791  }
792  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
794  "Module has a nontrivial global ctor, which NVPTX does not support.");
795  return true; // error
796  }
797  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
799  "Module has a nontrivial global dtor, which NVPTX does not support.");
800  return true; // error
801  }
802 
803  SmallString<128> Str1;
804  raw_svector_ostream OS1(Str1);
805 
806  // We need to call the parent's one explicitly.
807  bool Result = AsmPrinter::doInitialization(M);
808 
809  // Emit header before any dwarf directives are emitted below.
810  emitHeader(M, OS1, STI);
811  OutStreamer->EmitRawText(OS1.str());
812 
813  // Emit module-level inline asm if it exists.
814  if (!M.getModuleInlineAsm().empty()) {
815  OutStreamer->AddComment("Start of file scope inline assembly");
816  OutStreamer->AddBlankLine();
817  OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
818  OutStreamer->AddBlankLine();
819  OutStreamer->AddComment("End of file scope inline assembly");
820  OutStreamer->AddBlankLine();
821  }
822 
823  GlobalsEmitted = false;
824 
825  return Result;
826 }
827 
828 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
829  SmallString<128> Str2;
830  raw_svector_ostream OS2(Str2);
831 
832  emitDeclarations(M, OS2);
833 
834  // As ptxas does not support forward references of globals, we need to first
835  // sort the list of module-level globals in def-use order. We visit each
836  // global variable in order, and ensure that we emit it *after* its dependent
837  // globals. We use a little extra memory maintaining both a set and a list to
838  // have fast searches while maintaining a strict ordering.
842 
843  // Visit each global variable, in order
844  for (const GlobalVariable &I : M.globals())
845  VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
846 
847  assert(GVVisited.size() == M.getGlobalList().size() &&
848  "Missed a global variable");
849  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
850 
851  // Print out module-level global variables in proper order
852  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
853  printModuleLevelGV(Globals[i], OS2);
854 
855  OS2 << '\n';
856 
857  OutStreamer->EmitRawText(OS2.str());
858 }
859 
860 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
861  const NVPTXSubtarget &STI) {
862  O << "//\n";
863  O << "// Generated by LLVM NVPTX Back-End\n";
864  O << "//\n";
865  O << "\n";
866 
867  unsigned PTXVersion = STI.getPTXVersion();
868  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
869 
870  O << ".target ";
871  O << STI.getTargetName();
872 
873  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
874  if (NTM.getDrvInterface() == NVPTX::NVCL)
875  O << ", texmode_independent";
876 
877  // FIXME: remove comment once debug info is properly supported.
878  if (MMI && MMI->hasDebugInfo())
879  O << "//, debug";
880 
881  O << "\n";
882 
883  O << ".address_size ";
884  if (NTM.is64Bit())
885  O << "64";
886  else
887  O << "32";
888  O << "\n";
889 
890  O << "\n";
891 }
892 
894  bool HasDebugInfo = MMI && MMI->hasDebugInfo();
895 
896  // If we did not emit any functions, then the global declarations have not
897  // yet been emitted.
898  if (!GlobalsEmitted) {
899  emitGlobals(M);
900  GlobalsEmitted = true;
901  }
902 
903  // XXX Temproarily remove global variables so that doFinalization() will not
904  // emit them again (global variables are emitted at beginning).
905 
906  Module::GlobalListType &global_list = M.getGlobalList();
907  int i, n = global_list.size();
908  GlobalVariable **gv_array = new GlobalVariable *[n];
909 
910  // first, back-up GlobalVariable in gv_array
911  i = 0;
912  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
913  I != E; ++I)
914  gv_array[i++] = &*I;
915 
916  // second, empty global_list
917  while (!global_list.empty())
918  global_list.remove(global_list.begin());
919 
920  // call doFinalization
921  bool ret = AsmPrinter::doFinalization(M);
922 
923  // now we restore global variables
924  for (i = 0; i < n; i++)
925  global_list.insert(global_list.end(), gv_array[i]);
926 
928 
929  delete[] gv_array;
930  // FIXME: remove comment once debug info is properly supported.
931  // Close the last emitted section
932  if (HasDebugInfo)
933  OutStreamer->EmitRawText("//\t}");
934 
935  return ret;
936 
937  //bool Result = AsmPrinter::doFinalization(M);
938  // Instead of calling the parents doFinalization, we may
939  // clone parents doFinalization and customize here.
940  // Currently, we if NVISA out the EmitGlobals() in
941  // parent's doFinalization, which is too intrusive.
942  //
943  // Same for the doInitialization.
944  //return Result;
945 }
946 
947 // This function emits appropriate linkage directives for
948 // functions and global variables.
949 //
950 // extern function declaration -> .extern
951 // extern function definition -> .visible
952 // external global variable with init -> .visible
953 // external without init -> .extern
954 // appending -> not allowed, assert.
955 // for any linkage other than
956 // internal, private, linker_private,
957 // linker_private_weak, linker_private_weak_def_auto,
958 // we emit -> .weak.
959 
960 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
961  raw_ostream &O) {
962  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
963  if (V->hasExternalLinkage()) {
964  if (isa<GlobalVariable>(V)) {
965  const GlobalVariable *GVar = cast<GlobalVariable>(V);
966  if (GVar) {
967  if (GVar->hasInitializer())
968  O << ".visible ";
969  else
970  O << ".extern ";
971  }
972  } else if (V->isDeclaration())
973  O << ".extern ";
974  else
975  O << ".visible ";
976  } else if (V->hasAppendingLinkage()) {
977  std::string msg;
978  msg.append("Error: ");
979  msg.append("Symbol ");
980  if (V->hasName())
981  msg.append(V->getName());
982  msg.append("has unsupported appending linkage type");
983  llvm_unreachable(msg.c_str());
984  } else if (!V->hasInternalLinkage() &&
985  !V->hasPrivateLinkage()) {
986  O << ".weak ";
987  }
988  }
989 }
990 
991 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
992  raw_ostream &O,
993  bool processDemoted) {
994  // Skip meta data
995  if (GVar->hasSection()) {
996  if (GVar->getSection() == "llvm.metadata")
997  return;
998  }
999 
1000  // Skip LLVM intrinsic global variables
1001  if (GVar->getName().startswith("llvm.") ||
1002  GVar->getName().startswith("nvvm."))
1003  return;
1004 
1005  const DataLayout &DL = getDataLayout();
1006 
1007  // GlobalVariables are always constant pointers themselves.
1008  PointerType *PTy = GVar->getType();
1009  Type *ETy = GVar->getValueType();
1010 
1011  if (GVar->hasExternalLinkage()) {
1012  if (GVar->hasInitializer())
1013  O << ".visible ";
1014  else
1015  O << ".extern ";
1016  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1018  GVar->hasCommonLinkage()) {
1019  O << ".weak ";
1020  }
1021 
1022  if (isTexture(*GVar)) {
1023  O << ".global .texref " << getTextureName(*GVar) << ";\n";
1024  return;
1025  }
1026 
1027  if (isSurface(*GVar)) {
1028  O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1029  return;
1030  }
1031 
1032  if (GVar->isDeclaration()) {
1033  // (extern) declarations, no definition or initializer
1034  // Currently the only known declaration is for an automatic __local
1035  // (.shared) promoted to global.
1036  emitPTXGlobalVariable(GVar, O);
1037  O << ";\n";
1038  return;
1039  }
1040 
1041  if (isSampler(*GVar)) {
1042  O << ".global .samplerref " << getSamplerName(*GVar);
1043 
1044  const Constant *Initializer = nullptr;
1045  if (GVar->hasInitializer())
1046  Initializer = GVar->getInitializer();
1047  const ConstantInt *CI = nullptr;
1048  if (Initializer)
1049  CI = dyn_cast<ConstantInt>(Initializer);
1050  if (CI) {
1051  unsigned sample = CI->getZExtValue();
1052 
1053  O << " = { ";
1054 
1055  for (int i = 0,
1056  addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1057  i < 3; i++) {
1058  O << "addr_mode_" << i << " = ";
1059  switch (addr) {
1060  case 0:
1061  O << "wrap";
1062  break;
1063  case 1:
1064  O << "clamp_to_border";
1065  break;
1066  case 2:
1067  O << "clamp_to_edge";
1068  break;
1069  case 3:
1070  O << "wrap";
1071  break;
1072  case 4:
1073  O << "mirror";
1074  break;
1075  }
1076  O << ", ";
1077  }
1078  O << "filter_mode = ";
1079  switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1080  case 0:
1081  O << "nearest";
1082  break;
1083  case 1:
1084  O << "linear";
1085  break;
1086  case 2:
1087  llvm_unreachable("Anisotropic filtering is not supported");
1088  default:
1089  O << "nearest";
1090  break;
1091  }
1092  if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1093  O << ", force_unnormalized_coords = 1";
1094  }
1095  O << " }";
1096  }
1097 
1098  O << ";\n";
1099  return;
1100  }
1101 
1102  if (GVar->hasPrivateLinkage()) {
1103  if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1104  return;
1105 
1106  // FIXME - need better way (e.g. Metadata) to avoid generating this global
1107  if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1108  return;
1109  if (GVar->use_empty())
1110  return;
1111  }
1112 
1113  const Function *demotedFunc = nullptr;
1114  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1115  O << "// " << GVar->getName() << " has been demoted\n";
1116  if (localDecls.find(demotedFunc) != localDecls.end())
1117  localDecls[demotedFunc].push_back(GVar);
1118  else {
1119  std::vector<const GlobalVariable *> temp;
1120  temp.push_back(GVar);
1121  localDecls[demotedFunc] = temp;
1122  }
1123  return;
1124  }
1125 
1126  O << ".";
1127  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1128 
1129  if (isManaged(*GVar)) {
1130  O << " .attribute(.managed)";
1131  }
1132 
1133  if (GVar->getAlignment() == 0)
1134  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1135  else
1136  O << " .align " << GVar->getAlignment();
1137 
1138  if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1139  (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1140  O << " .";
1141  // Special case: ABI requires that we use .u8 for predicates
1142  if (ETy->isIntegerTy(1))
1143  O << "u8";
1144  else
1145  O << getPTXFundamentalTypeStr(ETy, false);
1146  O << " ";
1147  getSymbol(GVar)->print(O, MAI);
1148 
1149  // Ptx allows variable initilization only for constant and global state
1150  // spaces.
1151  if (GVar->hasInitializer()) {
1152  if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1153  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1154  const Constant *Initializer = GVar->getInitializer();
1155  // 'undef' is treated as there is no value specified.
1156  if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1157  O << " = ";
1158  printScalarConstant(Initializer, O);
1159  }
1160  } else {
1161  // The frontend adds zero-initializer to device and constant variables
1162  // that don't have an initial value, and UndefValue to shared
1163  // variables, so skip warning for this case.
1164  if (!GVar->getInitializer()->isNullValue() &&
1165  !isa<UndefValue>(GVar->getInitializer())) {
1166  report_fatal_error("initial value of '" + GVar->getName() +
1167  "' is not allowed in addrspace(" +
1168  Twine(PTy->getAddressSpace()) + ")");
1169  }
1170  }
1171  }
1172  } else {
1173  unsigned int ElementSize = 0;
1174 
1175  // Although PTX has direct support for struct type and array type and
1176  // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1177  // targets that support these high level field accesses. Structs, arrays
1178  // and vectors are lowered into arrays of bytes.
1179  switch (ETy->getTypeID()) {
1180  case Type::IntegerTyID: // Integers larger than 64 bits
1181  case Type::StructTyID:
1182  case Type::ArrayTyID:
1183  case Type::VectorTyID:
1184  ElementSize = DL.getTypeStoreSize(ETy);
1185  // Ptx allows variable initilization only for constant and
1186  // global state spaces.
1187  if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1188  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1189  GVar->hasInitializer()) {
1190  const Constant *Initializer = GVar->getInitializer();
1191  if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1192  AggBuffer aggBuffer(ElementSize, O, *this);
1193  bufferAggregateConstant(Initializer, &aggBuffer);
1194  if (aggBuffer.numSymbols) {
1195  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1196  O << " .u64 ";
1197  getSymbol(GVar)->print(O, MAI);
1198  O << "[";
1199  O << ElementSize / 8;
1200  } else {
1201  O << " .u32 ";
1202  getSymbol(GVar)->print(O, MAI);
1203  O << "[";
1204  O << ElementSize / 4;
1205  }
1206  O << "]";
1207  } else {
1208  O << " .b8 ";
1209  getSymbol(GVar)->print(O, MAI);
1210  O << "[";
1211  O << ElementSize;
1212  O << "]";
1213  }
1214  O << " = {";
1215  aggBuffer.print();
1216  O << "}";
1217  } else {
1218  O << " .b8 ";
1219  getSymbol(GVar)->print(O, MAI);
1220  if (ElementSize) {
1221  O << "[";
1222  O << ElementSize;
1223  O << "]";
1224  }
1225  }
1226  } else {
1227  O << " .b8 ";
1228  getSymbol(GVar)->print(O, MAI);
1229  if (ElementSize) {
1230  O << "[";
1231  O << ElementSize;
1232  O << "]";
1233  }
1234  }
1235  break;
1236  default:
1237  llvm_unreachable("type not supported yet");
1238  }
1239  }
1240  O << ";\n";
1241 }
1242 
1243 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1244  if (localDecls.find(f) == localDecls.end())
1245  return;
1246 
1247  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1248 
1249  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1250  O << "\t// demoted variable\n\t";
1251  printModuleLevelGV(gvars[i], O, true);
1252  }
1253 }
1254 
1255 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1256  raw_ostream &O) const {
1257  switch (AddressSpace) {
1258  case ADDRESS_SPACE_LOCAL:
1259  O << "local";
1260  break;
1261  case ADDRESS_SPACE_GLOBAL:
1262  O << "global";
1263  break;
1264  case ADDRESS_SPACE_CONST:
1265  O << "const";
1266  break;
1267  case ADDRESS_SPACE_SHARED:
1268  O << "shared";
1269  break;
1270  default:
1271  report_fatal_error("Bad address space found while emitting PTX: " +
1273  break;
1274  }
1275 }
1276 
1277 std::string
1278 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1279  switch (Ty->getTypeID()) {
1280  default:
1281  llvm_unreachable("unexpected type");
1282  break;
1283  case Type::IntegerTyID: {
1284  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1285  if (NumBits == 1)
1286  return "pred";
1287  else if (NumBits <= 64) {
1288  std::string name = "u";
1289  return name + utostr(NumBits);
1290  } else {
1291  llvm_unreachable("Integer too large");
1292  break;
1293  }
1294  break;
1295  }
1296  case Type::HalfTyID:
1297  // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1298  return "b16";
1299  case Type::FloatTyID:
1300  return "f32";
1301  case Type::DoubleTyID:
1302  return "f64";
1303  case Type::PointerTyID:
1304  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1305  if (useB4PTR)
1306  return "b64";
1307  else
1308  return "u64";
1309  else if (useB4PTR)
1310  return "b32";
1311  else
1312  return "u32";
1313  }
1314  llvm_unreachable("unexpected type");
1315  return nullptr;
1316 }
1317 
1318 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1319  raw_ostream &O) {
1320  const DataLayout &DL = getDataLayout();
1321 
1322  // GlobalVariables are always constant pointers themselves.
1323  Type *ETy = GVar->getValueType();
1324 
1325  O << ".";
1326  emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1327  if (GVar->getAlignment() == 0)
1328  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1329  else
1330  O << " .align " << GVar->getAlignment();
1331 
1332  // Special case for i128
1333  if (ETy->isIntegerTy(128)) {
1334  O << " .b8 ";
1335  getSymbol(GVar)->print(O, MAI);
1336  O << "[16]";
1337  return;
1338  }
1339 
1340  if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
1341  O << " .";
1342  O << getPTXFundamentalTypeStr(ETy);
1343  O << " ";
1344  getSymbol(GVar)->print(O, MAI);
1345  return;
1346  }
1347 
1348  int64_t ElementSize = 0;
1349 
1350  // Although PTX has direct support for struct type and array type and LLVM IR
1351  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1352  // support these high level field accesses. Structs and arrays are lowered
1353  // into arrays of bytes.
1354  switch (ETy->getTypeID()) {
1355  case Type::StructTyID:
1356  case Type::ArrayTyID:
1357  case Type::VectorTyID:
1358  ElementSize = DL.getTypeStoreSize(ETy);
1359  O << " .b8 ";
1360  getSymbol(GVar)->print(O, MAI);
1361  O << "[";
1362  if (ElementSize) {
1363  O << ElementSize;
1364  }
1365  O << "]";
1366  break;
1367  default:
1368  llvm_unreachable("type not supported yet");
1369  }
1370 }
1371 
1372 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
1373  if (Ty->isSingleValueType())
1374  return DL.getPrefTypeAlignment(Ty);
1375 
1376  auto *ATy = dyn_cast<ArrayType>(Ty);
1377  if (ATy)
1378  return getOpenCLAlignment(DL, ATy->getElementType());
1379 
1380  auto *STy = dyn_cast<StructType>(Ty);
1381  if (STy) {
1382  unsigned int alignStruct = 1;
1383  // Go through each element of the struct and find the
1384  // largest alignment.
1385  for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1386  Type *ETy = STy->getElementType(i);
1387  unsigned int align = getOpenCLAlignment(DL, ETy);
1388  if (align > alignStruct)
1389  alignStruct = align;
1390  }
1391  return alignStruct;
1392  }
1393 
1394  auto *FTy = dyn_cast<FunctionType>(Ty);
1395  if (FTy)
1396  return DL.getPointerPrefAlignment();
1397  return DL.getPrefTypeAlignment(Ty);
1398 }
1399 
1400 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1401  int paramIndex, raw_ostream &O) {
1402  getSymbol(I->getParent())->print(O, MAI);
1403  O << "_param_" << paramIndex;
1404 }
1405 
1406 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1407  const DataLayout &DL = getDataLayout();
1408  const AttributeList &PAL = F->getAttributes();
1409  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
1411  unsigned paramIndex = 0;
1412  bool first = true;
1413  bool isKernelFunc = isKernelFunction(*F);
1414  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
1415  MVT thePointerTy = TLI->getPointerTy(DL);
1416 
1417  if (F->arg_empty()) {
1418  O << "()\n";
1419  return;
1420  }
1421 
1422  O << "(\n";
1423 
1424  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1425  Type *Ty = I->getType();
1426 
1427  if (!first)
1428  O << ",\n";
1429 
1430  first = false;
1431 
1432  // Handle image/sampler parameters
1433  if (isKernelFunction(*F)) {
1434  if (isSampler(*I) || isImage(*I)) {
1435  if (isImage(*I)) {
1436  std::string sname = I->getName();
1437  if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1438  if (nvptxSubtarget->hasImageHandles())
1439  O << "\t.param .u64 .ptr .surfref ";
1440  else
1441  O << "\t.param .surfref ";
1442  CurrentFnSym->print(O, MAI);
1443  O << "_param_" << paramIndex;
1444  }
1445  else { // Default image is read_only
1446  if (nvptxSubtarget->hasImageHandles())
1447  O << "\t.param .u64 .ptr .texref ";
1448  else
1449  O << "\t.param .texref ";
1450  CurrentFnSym->print(O, MAI);
1451  O << "_param_" << paramIndex;
1452  }
1453  } else {
1454  if (nvptxSubtarget->hasImageHandles())
1455  O << "\t.param .u64 .ptr .samplerref ";
1456  else
1457  O << "\t.param .samplerref ";
1458  CurrentFnSym->print(O, MAI);
1459  O << "_param_" << paramIndex;
1460  }
1461  continue;
1462  }
1463  }
1464 
1465  if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
1466  if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1467  // Just print .param .align <a> .b8 .param[size];
1468  // <a> = PAL.getparamalignment
1469  // size = typeallocsize of element type
1470  unsigned align = PAL.getParamAlignment(paramIndex);
1471  if (align == 0)
1472  align = DL.getABITypeAlignment(Ty);
1473 
1474  unsigned sz = DL.getTypeAllocSize(Ty);
1475  O << "\t.param .align " << align << " .b8 ";
1476  printParamName(I, paramIndex, O);
1477  O << "[" << sz << "]";
1478 
1479  continue;
1480  }
1481  // Just a scalar
1482  auto *PTy = dyn_cast<PointerType>(Ty);
1483  if (isKernelFunc) {
1484  if (PTy) {
1485  // Special handling for pointer arguments to kernel
1486  O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1487 
1488  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1489  NVPTX::CUDA) {
1490  Type *ETy = PTy->getElementType();
1491  int addrSpace = PTy->getAddressSpace();
1492  switch (addrSpace) {
1493  default:
1494  O << ".ptr ";
1495  break;
1496  case ADDRESS_SPACE_CONST:
1497  O << ".ptr .const ";
1498  break;
1499  case ADDRESS_SPACE_SHARED:
1500  O << ".ptr .shared ";
1501  break;
1502  case ADDRESS_SPACE_GLOBAL:
1503  O << ".ptr .global ";
1504  break;
1505  }
1506  O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
1507  }
1508  printParamName(I, paramIndex, O);
1509  continue;
1510  }
1511 
1512  // non-pointer scalar to kernel func
1513  O << "\t.param .";
1514  // Special case: predicate operands become .u8 types
1515  if (Ty->isIntegerTy(1))
1516  O << "u8";
1517  else
1518  O << getPTXFundamentalTypeStr(Ty);
1519  O << " ";
1520  printParamName(I, paramIndex, O);
1521  continue;
1522  }
1523  // Non-kernel function, just print .param .b<size> for ABI
1524  // and .reg .b<size> for non-ABI
1525  unsigned sz = 0;
1526  if (isa<IntegerType>(Ty)) {
1527  sz = cast<IntegerType>(Ty)->getBitWidth();
1528  if (sz < 32)
1529  sz = 32;
1530  } else if (isa<PointerType>(Ty))
1531  sz = thePointerTy.getSizeInBits();
1532  else if (Ty->isHalfTy())
1533  // PTX ABI requires all scalar parameters to be at least 32
1534  // bits in size. fp16 normally uses .b16 as its storage type
1535  // in PTX, so its size must be adjusted here, too.
1536  sz = 32;
1537  else
1538  sz = Ty->getPrimitiveSizeInBits();
1539  if (isABI)
1540  O << "\t.param .b" << sz << " ";
1541  else
1542  O << "\t.reg .b" << sz << " ";
1543  printParamName(I, paramIndex, O);
1544  continue;
1545  }
1546 
1547  // param has byVal attribute. So should be a pointer
1548  auto *PTy = dyn_cast<PointerType>(Ty);
1549  assert(PTy && "Param with byval attribute should be a pointer type");
1550  Type *ETy = PTy->getElementType();
1551 
1552  if (isABI || isKernelFunc) {
1553  // Just print .param .align <a> .b8 .param[size];
1554  // <a> = PAL.getparamalignment
1555  // size = typeallocsize of element type
1556  unsigned align = PAL.getParamAlignment(paramIndex);
1557  if (align == 0)
1558  align = DL.getABITypeAlignment(ETy);
1559  // Work around a bug in ptxas. When PTX code takes address of
1560  // byval parameter with alignment < 4, ptxas generates code to
1561  // spill argument into memory. Alas on sm_50+ ptxas generates
1562  // SASS code that fails with misaligned access. To work around
1563  // the problem, make sure that we align byval parameters by at
1564  // least 4. Matching change must be made in LowerCall() where we
1565  // prepare parameters for the call.
1566  //
1567  // TODO: this will need to be undone when we get to support multi-TU
1568  // device-side compilation as it breaks ABI compatibility with nvcc.
1569  // Hopefully ptxas bug is fixed by then.
1570  if (!isKernelFunc && align < 4)
1571  align = 4;
1572  unsigned sz = DL.getTypeAllocSize(ETy);
1573  O << "\t.param .align " << align << " .b8 ";
1574  printParamName(I, paramIndex, O);
1575  O << "[" << sz << "]";
1576  continue;
1577  } else {
1578  // Split the ETy into constituent parts and
1579  // print .param .b<size> <name> for each part.
1580  // Further, if a part is vector, print the above for
1581  // each vector element.
1582  SmallVector<EVT, 16> vtparts;
1583  ComputeValueVTs(*TLI, DL, ETy, vtparts);
1584  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1585  unsigned elems = 1;
1586  EVT elemtype = vtparts[i];
1587  if (vtparts[i].isVector()) {
1588  elems = vtparts[i].getVectorNumElements();
1589  elemtype = vtparts[i].getVectorElementType();
1590  }
1591 
1592  for (unsigned j = 0, je = elems; j != je; ++j) {
1593  unsigned sz = elemtype.getSizeInBits();
1594  if (elemtype.isInteger() && (sz < 32))
1595  sz = 32;
1596  O << "\t.reg .b" << sz << " ";
1597  printParamName(I, paramIndex, O);
1598  if (j < je - 1)
1599  O << ",\n";
1600  ++paramIndex;
1601  }
1602  if (i < e - 1)
1603  O << ",\n";
1604  }
1605  --paramIndex;
1606  continue;
1607  }
1608  }
1609 
1610  O << "\n)\n";
1611 }
1612 
1613 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1614  raw_ostream &O) {
1615  const Function &F = MF.getFunction();
1616  emitFunctionParamList(&F, O);
1617 }
1618 
1619 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1620  const MachineFunction &MF) {
1621  SmallString<128> Str;
1622  raw_svector_ostream O(Str);
1623 
1624  // Map the global virtual register number to a register class specific
1625  // virtual register number starting from 1 with that class.
1627  //unsigned numRegClasses = TRI->getNumRegClasses();
1628 
1629  // Emit the Fake Stack Object
1630  const MachineFrameInfo &MFI = MF.getFrameInfo();
1631  int NumBytes = (int) MFI.getStackSize();
1632  if (NumBytes) {
1633  O << "\t.local .align " << MFI.getMaxAlignment() << " .b8 \t" << DEPOTNAME
1634  << getFunctionNumber() << "[" << NumBytes << "];\n";
1635  if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1636  O << "\t.reg .b64 \t%SP;\n";
1637  O << "\t.reg .b64 \t%SPL;\n";
1638  } else {
1639  O << "\t.reg .b32 \t%SP;\n";
1640  O << "\t.reg .b32 \t%SPL;\n";
1641  }
1642  }
1643 
1644  // Go through all virtual registers to establish the mapping between the
1645  // global virtual
1646  // register number and the per class virtual register number.
1647  // We use the per class virtual register number in the ptx output.
1648  unsigned int numVRs = MRI->getNumVirtRegs();
1649  for (unsigned i = 0; i < numVRs; i++) {
1650  unsigned int vr = TRI->index2VirtReg(i);
1651  const TargetRegisterClass *RC = MRI->getRegClass(vr);
1652  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1653  int n = regmap.size();
1654  regmap.insert(std::make_pair(vr, n + 1));
1655  }
1656 
1657  // Emit register declarations
1658  // @TODO: Extract out the real register usage
1659  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1660  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1661  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1662  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1663  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1664  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1665  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1666 
1667  // Emit declaration of the virtual registers or 'physical' registers for
1668  // each register class
1669  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1670  const TargetRegisterClass *RC = TRI->getRegClass(i);
1671  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1672  std::string rcname = getNVPTXRegClassName(RC);
1673  std::string rcStr = getNVPTXRegClassStr(RC);
1674  int n = regmap.size();
1675 
1676  // Only declare those registers that may be used.
1677  if (n) {
1678  O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1679  << ">;\n";
1680  }
1681  }
1682 
1683  OutStreamer->EmitRawText(O.str());
1684 }
1685 
1686 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1687  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1688  bool ignored;
1689  unsigned int numHex;
1690  const char *lead;
1691 
1692  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1693  numHex = 8;
1694  lead = "0f";
1696  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1697  numHex = 16;
1698  lead = "0d";
1700  } else
1701  llvm_unreachable("unsupported fp type");
1702 
1703  APInt API = APF.bitcastToAPInt();
1704  O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1705 }
1706 
1707 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1708  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1709  O << CI->getValue();
1710  return;
1711  }
1712  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1713  printFPConstant(CFP, O);
1714  return;
1715  }
1716  if (isa<ConstantPointerNull>(CPV)) {
1717  O << "0";
1718  return;
1719  }
1720  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1721  bool IsNonGenericPointer = false;
1722  if (GVar->getType()->getAddressSpace() != 0) {
1723  IsNonGenericPointer = true;
1724  }
1725  if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1726  O << "generic(";
1727  getSymbol(GVar)->print(O, MAI);
1728  O << ")";
1729  } else {
1730  getSymbol(GVar)->print(O, MAI);
1731  }
1732  return;
1733  }
1734  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1735  const Value *v = Cexpr->stripPointerCasts();
1736  PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1737  bool IsNonGenericPointer = false;
1738  if (PTy && PTy->getAddressSpace() != 0) {
1739  IsNonGenericPointer = true;
1740  }
1741  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1742  if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1743  O << "generic(";
1744  getSymbol(GVar)->print(O, MAI);
1745  O << ")";
1746  } else {
1747  getSymbol(GVar)->print(O, MAI);
1748  }
1749  return;
1750  } else {
1751  lowerConstant(CPV)->print(O, MAI);
1752  return;
1753  }
1754  }
1755  llvm_unreachable("Not scalar type found in printScalarConstant()");
1756 }
1757 
1758 // These utility functions assure we get the right sequence of bytes for a given
1759 // type even for big-endian machines
1760 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1761  int64_t vp = (int64_t)val;
1762  for (unsigned i = 0; i < sizeof(T); ++i) {
1763  p[i] = (unsigned char)vp;
1764  vp >>= 8;
1765  }
1766 }
1767 static void ConvertFloatToBytes(unsigned char *p, float val) {
1768  int32_t *vp = (int32_t *)&val;
1769  for (unsigned i = 0; i < sizeof(int32_t); ++i) {
1770  p[i] = (unsigned char)*vp;
1771  *vp >>= 8;
1772  }
1773 }
1774 static void ConvertDoubleToBytes(unsigned char *p, double val) {
1775  int64_t *vp = (int64_t *)&val;
1776  for (unsigned i = 0; i < sizeof(int64_t); ++i) {
1777  p[i] = (unsigned char)*vp;
1778  *vp >>= 8;
1779  }
1780 }
1781 
1782 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1783  AggBuffer *aggBuffer) {
1784  const DataLayout &DL = getDataLayout();
1785 
1786  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1787  int s = DL.getTypeAllocSize(CPV->getType());
1788  if (s < Bytes)
1789  s = Bytes;
1790  aggBuffer->addZeros(s);
1791  return;
1792  }
1793 
1794  unsigned char ptr[8];
1795  switch (CPV->getType()->getTypeID()) {
1796 
1797  case Type::IntegerTyID: {
1798  Type *ETy = CPV->getType();
1799  if (ETy == Type::getInt8Ty(CPV->getContext())) {
1800  unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
1801  ConvertIntToBytes<>(ptr, c);
1802  aggBuffer->addBytes(ptr, 1, Bytes);
1803  } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
1804  short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
1805  ConvertIntToBytes<>(ptr, int16);
1806  aggBuffer->addBytes(ptr, 2, Bytes);
1807  } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
1808  if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1809  int int32 = (int)(constInt->getZExtValue());
1810  ConvertIntToBytes<>(ptr, int32);
1811  aggBuffer->addBytes(ptr, 4, Bytes);
1812  break;
1813  } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1814  if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
1815  ConstantFoldConstant(Cexpr, DL))) {
1816  int int32 = (int)(constInt->getZExtValue());
1817  ConvertIntToBytes<>(ptr, int32);
1818  aggBuffer->addBytes(ptr, 4, Bytes);
1819  break;
1820  }
1821  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1822  Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1823  aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1824  aggBuffer->addZeros(4);
1825  break;
1826  }
1827  }
1828  llvm_unreachable("unsupported integer const type");
1829  } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
1830  if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1831  long long int64 = (long long)(constInt->getZExtValue());
1832  ConvertIntToBytes<>(ptr, int64);
1833  aggBuffer->addBytes(ptr, 8, Bytes);
1834  break;
1835  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1836  if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
1837  ConstantFoldConstant(Cexpr, DL))) {
1838  long long int64 = (long long)(constInt->getZExtValue());
1839  ConvertIntToBytes<>(ptr, int64);
1840  aggBuffer->addBytes(ptr, 8, Bytes);
1841  break;
1842  }
1843  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1844  Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1845  aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1846  aggBuffer->addZeros(8);
1847  break;
1848  }
1849  }
1850  llvm_unreachable("unsupported integer const type");
1851  } else
1852  llvm_unreachable("unsupported integer const type");
1853  break;
1854  }
1855  case Type::HalfTyID:
1856  case Type::FloatTyID:
1857  case Type::DoubleTyID: {
1858  const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
1859  Type *Ty = CFP->getType();
1860  if (Ty == Type::getHalfTy(CPV->getContext())) {
1861  APInt API = CFP->getValueAPF().bitcastToAPInt();
1862  uint16_t float16 = API.getLoBits(16).getZExtValue();
1863  ConvertIntToBytes<>(ptr, float16);
1864  aggBuffer->addBytes(ptr, 2, Bytes);
1865  } else if (Ty == Type::getFloatTy(CPV->getContext())) {
1866  float float32 = (float) CFP->getValueAPF().convertToFloat();
1867  ConvertFloatToBytes(ptr, float32);
1868  aggBuffer->addBytes(ptr, 4, Bytes);
1869  } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
1870  double float64 = CFP->getValueAPF().convertToDouble();
1871  ConvertDoubleToBytes(ptr, float64);
1872  aggBuffer->addBytes(ptr, 8, Bytes);
1873  } else {
1874  llvm_unreachable("unsupported fp const type");
1875  }
1876  break;
1877  }
1878  case Type::PointerTyID: {
1879  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1880  aggBuffer->addSymbol(GVar, GVar);
1881  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1882  const Value *v = Cexpr->stripPointerCasts();
1883  aggBuffer->addSymbol(v, Cexpr);
1884  }
1885  unsigned int s = DL.getTypeAllocSize(CPV->getType());
1886  aggBuffer->addZeros(s);
1887  break;
1888  }
1889 
1890  case Type::ArrayTyID:
1891  case Type::VectorTyID:
1892  case Type::StructTyID: {
1893  if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1894  int ElementSize = DL.getTypeAllocSize(CPV->getType());
1895  bufferAggregateConstant(CPV, aggBuffer);
1896  if (Bytes > ElementSize)
1897  aggBuffer->addZeros(Bytes - ElementSize);
1898  } else if (isa<ConstantAggregateZero>(CPV))
1899  aggBuffer->addZeros(Bytes);
1900  else
1901  llvm_unreachable("Unexpected Constant type");
1902  break;
1903  }
1904 
1905  default:
1906  llvm_unreachable("unsupported type");
1907  }
1908 }
1909 
1910 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1911  AggBuffer *aggBuffer) {
1912  const DataLayout &DL = getDataLayout();
1913  int Bytes;
1914 
1915  // Integers of arbitrary width
1916  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1917  APInt Val = CI->getValue();
1918  for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1919  uint8_t Byte = Val.getLoBits(8).getZExtValue();
1920  aggBuffer->addBytes(&Byte, 1, 1);
1921  Val.lshrInPlace(8);
1922  }
1923  return;
1924  }
1925 
1926  // Old constants
1927  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1928  if (CPV->getNumOperands())
1929  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1930  bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1931  return;
1932  }
1933 
1934  if (const ConstantDataSequential *CDS =
1935  dyn_cast<ConstantDataSequential>(CPV)) {
1936  if (CDS->getNumElements())
1937  for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1938  bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1939  aggBuffer);
1940  return;
1941  }
1942 
1943  if (isa<ConstantStruct>(CPV)) {
1944  if (CPV->getNumOperands()) {
1945  StructType *ST = cast<StructType>(CPV->getType());
1946  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1947  if (i == (e - 1))
1948  Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1949  DL.getTypeAllocSize(ST) -
1950  DL.getStructLayout(ST)->getElementOffset(i);
1951  else
1952  Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1953  DL.getStructLayout(ST)->getElementOffset(i);
1954  bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1955  }
1956  }
1957  return;
1958  }
1959  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1960 }
1961 
1962 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1963 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
1964 /// expressions that are representable in PTX and create
1965 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1966 const MCExpr *
1967 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1968  MCContext &Ctx = OutContext;
1969 
1970  if (CV->isNullValue() || isa<UndefValue>(CV))
1971  return MCConstantExpr::create(0, Ctx);
1972 
1973  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1974  return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1975 
1976  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1977  const MCSymbolRefExpr *Expr =
1979  if (ProcessingGeneric) {
1980  return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1981  } else {
1982  return Expr;
1983  }
1984  }
1985 
1986  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1987  if (!CE) {
1988  llvm_unreachable("Unknown constant value to lower!");
1989  }
1990 
1991  switch (CE->getOpcode()) {
1992  default:
1993  // If the code isn't optimized, there may be outstanding folding
1994  // opportunities. Attempt to fold the expression using DataLayout as a
1995  // last resort before giving up.
1997  if (C && C != CE)
1998  return lowerConstantForGV(C, ProcessingGeneric);
1999 
2000  // Otherwise report the problem to the user.
2001  {
2002  std::string S;
2003  raw_string_ostream OS(S);
2004  OS << "Unsupported expression in static initializer: ";
2005  CE->printAsOperand(OS, /*PrintType=*/false,
2006  !MF ? nullptr : MF->getFunction().getParent());
2007  report_fatal_error(OS.str());
2008  }
2009 
2010  case Instruction::AddrSpaceCast: {
2011  // Strip the addrspacecast and pass along the operand
2012  PointerType *DstTy = cast<PointerType>(CE->getType());
2013  if (DstTy->getAddressSpace() == 0) {
2014  return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2015  }
2016  std::string S;
2017  raw_string_ostream OS(S);
2018  OS << "Unsupported expression in static initializer: ";
2019  CE->printAsOperand(OS, /*PrintType=*/ false,
2020  !MF ? nullptr : MF->getFunction().getParent());
2021  report_fatal_error(OS.str());
2022  }
2023 
2024  case Instruction::GetElementPtr: {
2025  const DataLayout &DL = getDataLayout();
2026 
2027  // Generate a symbolic expression for the byte address
2028  APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2029  cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2030 
2031  const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2032  ProcessingGeneric);
2033  if (!OffsetAI)
2034  return Base;
2035 
2036  int64_t Offset = OffsetAI.getSExtValue();
2037  return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
2038  Ctx);
2039  }
2040 
2041  case Instruction::Trunc:
2042  // We emit the value and depend on the assembler to truncate the generated
2043  // expression properly. This is important for differences between
2044  // blockaddress labels. Since the two labels are in the same function, it
2045  // is reasonable to treat their delta as a 32-bit value.
2047  case Instruction::BitCast:
2048  return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2049 
2050  case Instruction::IntToPtr: {
2051  const DataLayout &DL = getDataLayout();
2052 
2053  // Handle casts to pointers by changing them into casts to the appropriate
2054  // integer type. This promotes constant folding and simplifies this code.
2055  Constant *Op = CE->getOperand(0);
2057  false/*ZExt*/);
2058  return lowerConstantForGV(Op, ProcessingGeneric);
2059  }
2060 
2061  case Instruction::PtrToInt: {
2062  const DataLayout &DL = getDataLayout();
2063 
2064  // Support only foldable casts to/from pointers that can be eliminated by
2065  // changing the pointer to the appropriately sized integer type.
2066  Constant *Op = CE->getOperand(0);
2067  Type *Ty = CE->getType();
2068 
2069  const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2070 
2071  // We can emit the pointer value into this slot if the slot is an
2072  // integer slot equal to the size of the pointer.
2073  if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2074  return OpExpr;
2075 
2076  // Otherwise the pointer is smaller than the resultant integer, mask off
2077  // the high bits so we are sure to get a proper truncation if the input is
2078  // a constant expr.
2079  unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2080  const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2081  return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2082  }
2083 
2084  // The MC library also has a right-shift operator, but it isn't consistently
2085  // signed or unsigned between different targets.
2086  case Instruction::Add: {
2087  const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2088  const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2089  switch (CE->getOpcode()) {
2090  default: llvm_unreachable("Unknown binary operator constant cast expr");
2091  case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2092  }
2093  }
2094  }
2095 }
2096 
2097 // Copy of MCExpr::print customized for NVPTX
2098 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2099  switch (Expr.getKind()) {
2100  case MCExpr::Target:
2101  return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2102  case MCExpr::Constant:
2103  OS << cast<MCConstantExpr>(Expr).getValue();
2104  return;
2105 
2106  case MCExpr::SymbolRef: {
2107  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2108  const MCSymbol &Sym = SRE.getSymbol();
2109  Sym.print(OS, MAI);
2110  return;
2111  }
2112 
2113  case MCExpr::Unary: {
2114  const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2115  switch (UE.getOpcode()) {
2116  case MCUnaryExpr::LNot: OS << '!'; break;
2117  case MCUnaryExpr::Minus: OS << '-'; break;
2118  case MCUnaryExpr::Not: OS << '~'; break;
2119  case MCUnaryExpr::Plus: OS << '+'; break;
2120  }
2121  printMCExpr(*UE.getSubExpr(), OS);
2122  return;
2123  }
2124 
2125  case MCExpr::Binary: {
2126  const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2127 
2128  // Only print parens around the LHS if it is non-trivial.
2129  if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2130  isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2131  printMCExpr(*BE.getLHS(), OS);
2132  } else {
2133  OS << '(';
2134  printMCExpr(*BE.getLHS(), OS);
2135  OS<< ')';
2136  }
2137 
2138  switch (BE.getOpcode()) {
2139  case MCBinaryExpr::Add:
2140  // Print "X-42" instead of "X+-42".
2141  if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2142  if (RHSC->getValue() < 0) {
2143  OS << RHSC->getValue();
2144  return;
2145  }
2146  }
2147 
2148  OS << '+';
2149  break;
2150  default: llvm_unreachable("Unhandled binary operator");
2151  }
2152 
2153  // Only print parens around the LHS if it is non-trivial.
2154  if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2155  printMCExpr(*BE.getRHS(), OS);
2156  } else {
2157  OS << '(';
2158  printMCExpr(*BE.getRHS(), OS);
2159  OS << ')';
2160  }
2161  return;
2162  }
2163  }
2164 
2165  llvm_unreachable("Invalid expression kind!");
2166 }
2167 
2168 /// PrintAsmOperand - Print out an operand for an inline asm expression.
2169 ///
2170 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2171  unsigned AsmVariant,
2172  const char *ExtraCode, raw_ostream &O) {
2173  if (ExtraCode && ExtraCode[0]) {
2174  if (ExtraCode[1] != 0)
2175  return true; // Unknown modifier.
2176 
2177  switch (ExtraCode[0]) {
2178  default:
2179  // See if this is a generic print operand
2180  return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
2181  case 'r':
2182  break;
2183  }
2184  }
2185 
2186  printOperand(MI, OpNo, O);
2187 
2188  return false;
2189 }
2190 
2191 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
2192  const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
2193  const char *ExtraCode, raw_ostream &O) {
2194  if (ExtraCode && ExtraCode[0])
2195  return true; // Unknown modifier
2196 
2197  O << '[';
2198  printMemOperand(MI, OpNo, O);
2199  O << ']';
2200 
2201  return false;
2202 }
2203 
2204 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2205  raw_ostream &O, const char *Modifier) {
2206  const MachineOperand &MO = MI->getOperand(opNum);
2207  switch (MO.getType()) {
2210  if (MO.getReg() == NVPTX::VRDepot)
2211  O << DEPOTNAME << getFunctionNumber();
2212  else
2214  } else {
2215  emitVirtualRegister(MO.getReg(), O);
2216  }
2217  return;
2218 
2220  if (!Modifier)
2221  O << MO.getImm();
2222  else if (strstr(Modifier, "vec") == Modifier)
2223  printVecModifiedImmediate(MO, Modifier, O);
2224  else
2226  "Don't know how to handle modifier on immediate operand");
2227  return;
2228 
2230  printFPConstant(MO.getFPImm(), O);
2231  break;
2232 
2234  getSymbol(MO.getGlobal())->print(O, MAI);
2235  break;
2236 
2238  MO.getMBB()->getSymbol()->print(O, MAI);
2239  return;
2240 
2241  default:
2242  llvm_unreachable("Operand type not supported.");
2243  }
2244 }
2245 
2246 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2247  raw_ostream &O, const char *Modifier) {
2248  printOperand(MI, opNum, O);
2249 
2250  if (Modifier && strcmp(Modifier, "add") == 0) {
2251  O << ", ";
2252  printOperand(MI, opNum + 1, O);
2253  } else {
2254  if (MI->getOperand(opNum + 1).isImm() &&
2255  MI->getOperand(opNum + 1).getImm() == 0)
2256  return; // don't print ',0' or '+0'
2257  O << "+";
2258  printOperand(MI, opNum + 1, O);
2259  }
2260 }
2261 
2262 // Force static initialization.
2266 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
uint64_t CallInst * C
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:90
unsigned getAlignment() const
Definition: GlobalObject.h:59
unsigned getPTXVersion() const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
StringRef getTargetFeatureString() const
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:165
unsigned getOpcode() const
Return the opcode at the root of this constant expression.
Definition: Constants.h:1194
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1547
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Bitwise negation.
Definition: MCExpr.h:363
MachineBasicBlock * getMBB() const
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:93
bool hasDebugInfo() const
Returns true if valid debug info is present.
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:321
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool hasPrivateLinkage() const
Definition: GlobalValue.h:434
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool getMaxNReg(const Function &F, unsigned &x)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
static unsigned index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
unsigned getPointerPrefAlignment(unsigned AS=0) const
Return target&#39;s alignment for stack-based pointers FIXME: The defaults need to be removed once all of...
Definition: DataLayout.cpp:620
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
2: 32-bit floating point type
Definition: Type.h:59
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:88
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:137
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:588
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:161
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool isTexture(const Value &val)
unsigned Reg
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:422
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:402
float convertToFloat() const
Definition: APFloat.h:1098
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:388
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:562
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:713
Unary plus.
Definition: MCExpr.h:364
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:504
MachineBasicBlock reference.
arg_iterator arg_end()
Definition: Function.h:666
13: Structures
Definition: Type.h:73
unsigned const TargetRegisterInfo * TRI
Metadata node.
Definition: Metadata.h:862
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:464
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:96
1: 16-bit floating point type
Definition: Type.h:58
const GlobalListType & getGlobalList() const
Get the Module&#39;s list of global variables (constant).
Definition: Module.h:503
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
15: Pointers
Definition: Type.h:75
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:175
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:49
bool hasImageHandles() const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable *> &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1580
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:514
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:116
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:164
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:638
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:138
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
Class to represent struct types.
Definition: DerivedTypes.h:201
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void clearAnnotationCache(const Module *Mod)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:36
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
Name of external global symbol.
This file contains the simple types necessary to represent the attributes associated with functions a...
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:166
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:311
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:54
const char * getSymbolName() const
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:971
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This file implements a class to represent arbitrary precision integral constant values and operations...
bool hasCommonLinkage() const
Definition: GlobalValue.h:439
Target & getTheNVPTXTarget64()
unsigned getNumRegClasses() const
bool getMaxNTIDz(const Function &F, unsigned &z)
unsigned getSizeInBits() const
Context object for machine code objects.
Definition: MCContext.h:63
bool hasExternalLinkage() const
Definition: GlobalValue.h:421
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Attempt to fold the constant using the specified DataLayout.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:885
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1559
bool isKernelFunction(const Function &F)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:308
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isSurface(const Value &val)
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:288
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4444
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:570
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:565
bool arg_empty() const
Definition: Function.h:685
#define T
Unary assembler expressions.
Definition: MCExpr.h:358
Class to represent array types.
Definition: DerivedTypes.h:369
bool getMaxNTIDx(const Function &F, unsigned &x)
std::string getTextureName(const Value &val)
ManagedStringPool * getManagedStrPool() const
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:210
RegisterAsmPrinter - Helper template for registering a target specific assembly printer, for use in the target machine initialization function.
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:123
StringRef getTargetCPU() const
#define DEPOTNAME
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:459
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:425
bool isSampler(const Value &val)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:84
Unary expressions.
Definition: MCExpr.h:42
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:99
Value * getOperand(unsigned i) const
Definition: User.h:170
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:161
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:55
Class to represent pointers.
Definition: DerivedTypes.h:467
bool hasAppendingLinkage() const
Definition: GlobalValue.h:432
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
11: Arbitrary bit width integers
Definition: Type.h:71
Target & getTheNVPTXTarget32()
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:742
0: type with no size
Definition: Type.h:57
Address of a global value.
static bool isEmptyXXStructor(GlobalVariable *GV)
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:155
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
void print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens=false) const
Definition: MCExpr.cpp:42
unsigned const MachineRegisterInfo * MRI
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:112
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
bool hasName() const
Definition: Value.h:251
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:84
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
static bool is64Bit(const char *name)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, HexagonAsmPrinter &Printer, bool MustExtend)
const GlobalValue * getGlobal() const
const NVPTXTargetLowering * getTargetLowering() const override
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
size_t alias_size() const
Definition: Module.h:603
double convertToDouble() const
Definition: APFloat.h:1097
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:81
std::string getTargetName() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:732
This file declares a class to represent arbitrary precision floating point values and provide a varie...
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasInternalLinkage() const
Definition: GlobalValue.h:433
bool isHalfTy() const
Return true if this is &#39;half&#39;, a 16-bit IEEE fp type.
Definition: Type.h:144
static const char * getRegisterName(unsigned RegNo)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const Triple & getTargetTriple() const
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
arg_iterator arg_begin()
Definition: Function.h:657
NVPTX::DrvInterface getDrvInterface() const
unsigned size() const
Definition: DenseMap.h:96
Logical negation.
Definition: MCExpr.h:361
Extended Value Type.
Definition: ValueTypes.h:34
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:538
Binary assembler expressions.
Definition: MCExpr.h:415
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
std::string getVirtualRegisterName(unsigned) const
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4112
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:493
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool getMaxNTIDy(const Function &F, unsigned &y)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool getReqNTIDx(const Function &F, unsigned &x)
unsigned first
bool hasWeakLinkage() const
Definition: GlobalValue.h:429
const APFloat & getValueAPF() const
Definition: Constants.h:299
bool isImage(const Value &val)
14: Arrays
Definition: Type.h:74
bool getReqNTIDz(const Function &F, unsigned &z)
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:204
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:163
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:120
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, const MachineFunction *MF, const Module *M, const MachineFrameInfo *MFI, const TargetInstrInfo *TII, LLVMContext &Ctx)
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
void setOpcode(unsigned Op)
Definition: MCInst.h:173
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1032
static void ConvertFloatToBytes(unsigned char *p, float val)
std::string * getManagedString(const char *S)
16: SIMD &#39;packed&#39; format, or other vector type
Definition: Type.h:76
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:426
const MCSymbol & getSymbol() const
Definition: MCExpr.h:334
ExprKind getKind() const
Definition: MCExpr.h:73
virtual const MCExpr * lowerConstant(const Constant *CV)
Lower the specified LLVM Constant to an MCExpr.
MachineOperand class - Representation of each machine instruction operand.
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:82
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
bool isVector(MCInstrInfo const &MCII, MCInst const &MCI)
AddressSpace
Definition: NVPTXBaseInfo.h:22
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:722
bool isImageReadWrite(const Value &val)
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:255
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:227
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
StringRef str()
Return a StringRef for the vector contents.
Definition: raw_ostream.h:529
std::string getSurfaceName(const Value &val)
const char * getName(unsigned RegNo) const
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable *> &Order, DenseSet< const GlobalVariable *> &Visited, DenseSet< const GlobalVariable *> &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted...
std::string utostr(uint64_t X, bool isNeg=false)
Definition: StringExtras.h:203
int64_t getImm() const
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:180
const Function & getFunction() const
Return the LLVM function that this machine code represents.
std::string getSamplerName(const Value &val)
Class for arbitrary precision integers.
Definition: APInt.h:69
ConstantArray - Constant Array Declarations.
Definition: Constants.h:410
bool isManaged(const Value &val)
bool getReqNTIDy(const Function &F, unsigned &y)
iterator_range< user_iterator > users()
Definition: Value.h:399
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:887
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
Representation of each machine instruction.
Definition: MachineInstr.h:60
pointer remove(iterator &IT)
Definition: ilist.h:251
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value)
unsigned getParamAlignment(unsigned ArgNo) const
Return the alignment for the specified function parameter.
NVPTXTargetMachine.
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:244
iterator insert(iterator where, pointer New)
Definition: ilist.h:228
iterator end()
Definition: Module.h:579
const Function * getParent() const
Definition: Argument.h:42
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:543
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:123
virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const
Targets can override this to emit stuff at the start of a basic block.
static bool usedInGlobalVarDef(const Constant *C)
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:199
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
#define I(x, y, z)
Definition: MD5.cpp:58
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
iterator begin()
Definition: Module.h:577
iterator end()
Definition: DenseMap.h:79
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:124
References to labels and assigned expressions.
Definition: MCExpr.h:41
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:275
Unary minus.
Definition: MCExpr.h:362
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:44
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
const std::string & getModuleInlineAsm() const
Get any module-scope inline assembly blocks.
Definition: Module.h:246
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:559
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:201
3: 64-bit floating point type
Definition: Type.h:60
static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:477
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:405
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:247
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:73
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
static const char * name
Floating-point immediate operand.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:411
bool hasInitializer() const
Definitions have initializers, declarations don&#39;t.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Constant expressions.
Definition: MCExpr.h:40
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
Binary expressions.
Definition: MCExpr.h:39
unsigned int getSmVersion() const
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
Definition: DataLayout.h:438
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:212
static void ConvertDoubleToBytes(unsigned char *p, double val)
iterator_range< global_iterator > globals()
Definition: Module.h:566
IRTranslator LLVM IR MI
void addOperand(const MCOperand &Op)
Definition: MCInst.h:186
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
bool getMinCTASm(const Function &F, unsigned &x)
Target specific expression.
Definition: MCExpr.h:43
static void ConvertIntToBytes(unsigned char *p, T val)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:35
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:123
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
bool use_empty() const
Definition: Value.h:322
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx)
Definition: MCExpr.cpp:164
bool isImageWriteOnly(const Value &val)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:273
void LLVMInitializeNVPTXAsmPrinter()
bool doFinalization(Module &M) override
Shut down the asmprinter.
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
const NVPTXRegisterInfo * getRegisterInfo() const override
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:60