LLVM  16.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to NVPTX assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXAsmPrinter.h"
19 #include "NVPTX.h"
20 #include "NVPTXMCExpr.h"
22 #include "NVPTXRegisterInfo.h"
23 #include "NVPTXSubtarget.h"
24 #include "NVPTXTargetMachine.h"
25 #include "NVPTXUtilities.h"
27 #include "cl_common_defines.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DenseSet.h"
32 #include "llvm/ADT/SmallString.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/StringExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/Triple.h"
37 #include "llvm/ADT/Twine.h"
39 #include "llvm/CodeGen/Analysis.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/Constant.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/DataLayout.h"
55 #include "llvm/IR/DebugInfo.h"
57 #include "llvm/IR/DebugLoc.h"
58 #include "llvm/IR/DerivedTypes.h"
59 #include "llvm/IR/Function.h"
60 #include "llvm/IR/GlobalValue.h"
61 #include "llvm/IR/GlobalVariable.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/IR/Module.h"
65 #include "llvm/IR/Operator.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/MC/MCExpr.h"
69 #include "llvm/MC/MCInst.h"
70 #include "llvm/MC/MCInstrDesc.h"
71 #include "llvm/MC/MCStreamer.h"
72 #include "llvm/MC/MCSymbol.h"
73 #include "llvm/MC/TargetRegistry.h"
74 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/Endian.h"
80 #include "llvm/Support/Path.h"
85 #include <cassert>
86 #include <cstdint>
87 #include <cstring>
88 #include <new>
89 #include <string>
90 #include <utility>
91 #include <vector>
92 
93 using namespace llvm;
94 
95 #define DEPOTNAME "__local_depot"
96 
97 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
98 /// depends.
99 static void
102  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
103  Globals.insert(GV);
104  else {
105  if (const User *U = dyn_cast<User>(V)) {
106  for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
107  DiscoverDependentGlobals(U->getOperand(i), Globals);
108  }
109  }
110  }
111 }
112 
113 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
114 /// instances to be emitted, but only after any dependents have been added
115 /// first.s
116 static void
121  // Have we already visited this one?
122  if (Visited.count(GV))
123  return;
124 
125  // Do we have a circular dependency?
126  if (!Visiting.insert(GV).second)
127  report_fatal_error("Circular dependency found in global variable set");
128 
129  // Make sure we visit all dependents first
131  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
132  DiscoverDependentGlobals(GV->getOperand(i), Others);
133 
134  for (const GlobalVariable *GV : Others)
135  VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
136 
137  // Now we can visit ourself
138  Order.push_back(GV);
139  Visited.insert(GV);
140  Visiting.erase(GV);
141 }
142 
143 void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
144  NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
145  getSubtargetInfo().getFeatureBits());
146 
147  MCInst Inst;
148  lowerToMCInst(MI, Inst);
149  EmitToStreamer(*OutStreamer, Inst);
150 }
151 
152 // Handle symbol backtracking for targets that do not support image handles
153 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
154  unsigned OpNo, MCOperand &MCOp) {
155  const MachineOperand &MO = MI->getOperand(OpNo);
156  const MCInstrDesc &MCID = MI->getDesc();
157 
158  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
159  // This is a texture fetch, so operand 4 is a texref and operand 5 is
160  // a samplerref
161  if (OpNo == 4 && MO.isImm()) {
162  lowerImageHandleSymbol(MO.getImm(), MCOp);
163  return true;
164  }
165  if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
166  lowerImageHandleSymbol(MO.getImm(), MCOp);
167  return true;
168  }
169 
170  return false;
171  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
172  unsigned VecSize =
173  1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
174 
175  // For a surface load of vector size N, the Nth operand will be the surfref
176  if (OpNo == VecSize && MO.isImm()) {
177  lowerImageHandleSymbol(MO.getImm(), MCOp);
178  return true;
179  }
180 
181  return false;
182  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
183  // This is a surface store, so operand 0 is a surfref
184  if (OpNo == 0 && MO.isImm()) {
185  lowerImageHandleSymbol(MO.getImm(), MCOp);
186  return true;
187  }
188 
189  return false;
190  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
191  // This is a query, so operand 1 is a surfref/texref
192  if (OpNo == 1 && MO.isImm()) {
193  lowerImageHandleSymbol(MO.getImm(), MCOp);
194  return true;
195  }
196 
197  return false;
198  }
199 
200  return false;
201 }
202 
203 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
204  // Ewwww
205  LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
206  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
208  const char *Sym = MFI->getImageHandleSymbol(Index);
209  std::string *SymNamePtr =
210  nvTM.getManagedStrPool()->getManagedString(Sym);
211  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
212 }
213 
214 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
215  OutMI.setOpcode(MI->getOpcode());
216  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
217  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
218  const MachineOperand &MO = MI->getOperand(0);
219  OutMI.addOperand(GetSymbolRef(
221  return;
222  }
223 
224  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
225  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
226  const MachineOperand &MO = MI->getOperand(i);
227 
228  MCOperand MCOp;
229  if (!STI.hasImageHandles()) {
230  if (lowerImageHandleOperand(MI, i, MCOp)) {
231  OutMI.addOperand(MCOp);
232  continue;
233  }
234  }
235 
236  if (lowerOperand(MO, MCOp))
237  OutMI.addOperand(MCOp);
238  }
239 }
240 
241 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
242  MCOperand &MCOp) {
243  switch (MO.getType()) {
244  default: llvm_unreachable("unknown operand type");
246  MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
247  break;
249  MCOp = MCOperand::createImm(MO.getImm());
250  break;
253  MO.getMBB()->getSymbol(), OutContext));
254  break;
257  break;
259  MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
260  break;
262  const ConstantFP *Cnt = MO.getFPImm();
263  const APFloat &Val = Cnt->getValueAPF();
264 
265  switch (Cnt->getType()->getTypeID()) {
266  default: report_fatal_error("Unsupported FP type"); break;
267  case Type::HalfTyID:
268  MCOp = MCOperand::createExpr(
270  break;
271  case Type::FloatTyID:
272  MCOp = MCOperand::createExpr(
274  break;
275  case Type::DoubleTyID:
276  MCOp = MCOperand::createExpr(
278  break;
279  }
280  break;
281  }
282  }
283  return true;
284 }
285 
286 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
288  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
289 
290  DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
291  unsigned RegNum = RegMap[Reg];
292 
293  // Encode the register class in the upper 4 bits
294  // Must be kept in sync with NVPTXInstPrinter::printRegName
295  unsigned Ret = 0;
296  if (RC == &NVPTX::Int1RegsRegClass) {
297  Ret = (1 << 28);
298  } else if (RC == &NVPTX::Int16RegsRegClass) {
299  Ret = (2 << 28);
300  } else if (RC == &NVPTX::Int32RegsRegClass) {
301  Ret = (3 << 28);
302  } else if (RC == &NVPTX::Int64RegsRegClass) {
303  Ret = (4 << 28);
304  } else if (RC == &NVPTX::Float32RegsRegClass) {
305  Ret = (5 << 28);
306  } else if (RC == &NVPTX::Float64RegsRegClass) {
307  Ret = (6 << 28);
308  } else if (RC == &NVPTX::Float16RegsRegClass) {
309  Ret = (7 << 28);
310  } else if (RC == &NVPTX::Float16x2RegsRegClass) {
311  Ret = (8 << 28);
312  } else {
313  report_fatal_error("Bad register class");
314  }
315 
316  // Insert the vreg number
317  Ret |= (RegNum & 0x0FFFFFFF);
318  return Ret;
319  } else {
320  // Some special-use registers are actually physical registers.
321  // Encode this as the register class ID of 0 and the real register ID.
322  return Reg & 0x0FFFFFFF;
323  }
324 }
325 
327  const MCExpr *Expr;
329  OutContext);
330  return MCOperand::createExpr(Expr);
331 }
332 
333 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
334  const DataLayout &DL = getDataLayout();
335  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
336  const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
337 
338  Type *Ty = F->getReturnType();
339 
340  bool isABI = (STI.getSmVersion() >= 20);
341 
342  if (Ty->getTypeID() == Type::VoidTyID)
343  return;
344 
345  O << " (";
346 
347  if (isABI) {
348  if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
349  unsigned size = 0;
350  if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
351  size = ITy->getBitWidth();
352  } else {
353  assert(Ty->isFloatingPointTy() && "Floating point type expected here");
355  }
356  // PTX ABI requires all scalar return values to be at least 32
357  // bits in size. fp16 normally uses .b16 as its storage type in
358  // PTX, so its size must be adjusted here, too.
360 
361  O << ".param .b" << size << " func_retval0";
362  } else if (isa<PointerType>(Ty)) {
363  O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
364  << " func_retval0";
365  } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
366  unsigned totalsz = DL.getTypeAllocSize(Ty);
367  unsigned retAlignment = 0;
368  if (!getAlign(*F, 0, retAlignment))
369  retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
370  O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
371  << "]";
372  } else
373  llvm_unreachable("Unknown return type");
374  } else {
375  SmallVector<EVT, 16> vtparts;
376  ComputeValueVTs(*TLI, DL, Ty, vtparts);
377  unsigned idx = 0;
378  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
379  unsigned elems = 1;
380  EVT elemtype = vtparts[i];
381  if (vtparts[i].isVector()) {
382  elems = vtparts[i].getVectorNumElements();
383  elemtype = vtparts[i].getVectorElementType();
384  }
385 
386  for (unsigned j = 0, je = elems; j != je; ++j) {
387  unsigned sz = elemtype.getSizeInBits();
388  if (elemtype.isInteger())
389  sz = promoteScalarArgumentSize(sz);
390  O << ".reg .b" << sz << " func_retval" << idx;
391  if (j < je - 1)
392  O << ", ";
393  ++idx;
394  }
395  if (i < e - 1)
396  O << ", ";
397  }
398  }
399  O << ") ";
400 }
401 
402 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
403  raw_ostream &O) {
404  const Function &F = MF.getFunction();
405  printReturnValStr(&F, O);
406 }
407 
408 // Return true if MBB is the header of a loop marked with
409 // llvm.loop.unroll.disable.
410 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
411 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
412  const MachineBasicBlock &MBB) const {
413  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
414  // We insert .pragma "nounroll" only to the loop header.
415  if (!LI.isLoopHeader(&MBB))
416  return false;
417 
418  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
419  // we iterate through each back edge of the loop with header MBB, and check
420  // whether its metadata contains llvm.loop.unroll.disable.
421  for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
422  if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
423  // Edges from other loops to MBB are not back edges.
424  continue;
425  }
426  if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
427  if (MDNode *LoopID =
428  PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
429  if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
430  return true;
431  }
432  }
433  }
434  return false;
435 }
436 
437 void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
439  if (isLoopHeaderOfNoUnroll(MBB))
440  OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
441 }
442 
443 void NVPTXAsmPrinter::emitFunctionEntryLabel() {
444  SmallString<128> Str;
445  raw_svector_ostream O(Str);
446 
447  if (!GlobalsEmitted) {
448  emitGlobals(*MF->getFunction().getParent());
449  GlobalsEmitted = true;
450  }
451 
452  // Set up
453  MRI = &MF->getRegInfo();
454  F = &MF->getFunction();
455  emitLinkageDirective(F, O);
456  if (isKernelFunction(*F))
457  O << ".entry ";
458  else {
459  O << ".func ";
460  printReturnValStr(*MF, O);
461  }
462 
463  CurrentFnSym->print(O, MAI);
464 
465  emitFunctionParamList(*MF, O);
466 
467  if (isKernelFunction(*F))
468  emitKernelFunctionDirectives(*F, O);
469 
470  OutStreamer->emitRawText(O.str());
471 
472  VRegMapping.clear();
473  // Emit open brace for function body.
474  OutStreamer->emitRawText(StringRef("{\n"));
475  setAndEmitFunctionVirtualRegisters(*MF);
476  // Emit initial .loc debug directive for correct relocation symbol data.
477  if (MMI && MMI->hasDebugInfo())
479 }
480 
482  bool Result = AsmPrinter::runOnMachineFunction(F);
483  // Emit closing brace for the body of function F.
484  // The closing brace must be emitted here because we need to emit additional
485  // debug labels/data after the last basic block.
486  // We need to emit the closing brace here because we don't have function that
487  // finished emission of the function body.
488  OutStreamer->emitRawText(StringRef("}\n"));
489  return Result;
490 }
491 
492 void NVPTXAsmPrinter::emitFunctionBodyStart() {
493  SmallString<128> Str;
494  raw_svector_ostream O(Str);
495  emitDemotedVars(&MF->getFunction(), O);
496  OutStreamer->emitRawText(O.str());
497 }
498 
499 void NVPTXAsmPrinter::emitFunctionBodyEnd() {
500  VRegMapping.clear();
501 }
502 
504  SmallString<128> Str;
506  return OutContext.getOrCreateSymbol(Str);
507 }
508 
509 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
510  Register RegNo = MI->getOperand(0).getReg();
511  if (Register::isVirtualRegister(RegNo)) {
512  OutStreamer->AddComment(Twine("implicit-def: ") +
513  getVirtualRegisterName(RegNo));
514  } else {
515  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
516  OutStreamer->AddComment(Twine("implicit-def: ") +
517  STI.getRegisterInfo()->getName(RegNo));
518  }
519  OutStreamer->addBlankLine();
520 }
521 
522 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
523  raw_ostream &O) const {
524  // If the NVVM IR has some of reqntid* specified, then output
525  // the reqntid directive, and set the unspecified ones to 1.
526  // If none of reqntid* is specified, don't output reqntid directive.
527  unsigned reqntidx, reqntidy, reqntidz;
528  bool specified = false;
529  if (!getReqNTIDx(F, reqntidx))
530  reqntidx = 1;
531  else
532  specified = true;
533  if (!getReqNTIDy(F, reqntidy))
534  reqntidy = 1;
535  else
536  specified = true;
537  if (!getReqNTIDz(F, reqntidz))
538  reqntidz = 1;
539  else
540  specified = true;
541 
542  if (specified)
543  O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
544  << "\n";
545 
546  // If the NVVM IR has some of maxntid* specified, then output
547  // the maxntid directive, and set the unspecified ones to 1.
548  // If none of maxntid* is specified, don't output maxntid directive.
549  unsigned maxntidx, maxntidy, maxntidz;
550  specified = false;
551  if (!getMaxNTIDx(F, maxntidx))
552  maxntidx = 1;
553  else
554  specified = true;
555  if (!getMaxNTIDy(F, maxntidy))
556  maxntidy = 1;
557  else
558  specified = true;
559  if (!getMaxNTIDz(F, maxntidz))
560  maxntidz = 1;
561  else
562  specified = true;
563 
564  if (specified)
565  O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
566  << "\n";
567 
568  unsigned mincta;
569  if (getMinCTASm(F, mincta))
570  O << ".minnctapersm " << mincta << "\n";
571 
572  unsigned maxnreg;
573  if (getMaxNReg(F, maxnreg))
574  O << ".maxnreg " << maxnreg << "\n";
575 }
576 
577 std::string
579  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
580 
581  std::string Name;
582  raw_string_ostream NameStr(Name);
583 
584  VRegRCMap::const_iterator I = VRegMapping.find(RC);
585  assert(I != VRegMapping.end() && "Bad register class");
586  const DenseMap<unsigned, unsigned> &RegMap = I->second;
587 
589  assert(VI != RegMap.end() && "Bad virtual register");
590  unsigned MappedVR = VI->second;
591 
592  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
593 
594  NameStr.flush();
595  return Name;
596 }
597 
598 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
599  raw_ostream &O) {
600  O << getVirtualRegisterName(vr);
601 }
602 
603 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
604  emitLinkageDirective(F, O);
605  if (isKernelFunction(*F))
606  O << ".entry ";
607  else
608  O << ".func ";
609  printReturnValStr(F, O);
610  getSymbol(F)->print(O, MAI);
611  O << "\n";
612  emitFunctionParamList(F, O);
613  O << ";\n";
614 }
615 
616 static bool usedInGlobalVarDef(const Constant *C) {
617  if (!C)
618  return false;
619 
620  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
621  return GV->getName() != "llvm.used";
622  }
623 
624  for (const User *U : C->users())
625  if (const Constant *C = dyn_cast<Constant>(U))
626  if (usedInGlobalVarDef(C))
627  return true;
628 
629  return false;
630 }
631 
632 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
633  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
634  if (othergv->getName() == "llvm.used")
635  return true;
636  }
637 
638  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
639  if (instr->getParent() && instr->getParent()->getParent()) {
640  const Function *curFunc = instr->getParent()->getParent();
641  if (oneFunc && (curFunc != oneFunc))
642  return false;
643  oneFunc = curFunc;
644  return true;
645  } else
646  return false;
647  }
648 
649  for (const User *UU : U->users())
650  if (!usedInOneFunc(UU, oneFunc))
651  return false;
652 
653  return true;
654 }
655 
656 /* Find out if a global variable can be demoted to local scope.
657  * Currently, this is valid for CUDA shared variables, which have local
658  * scope and global lifetime. So the conditions to check are :
659  * 1. Is the global variable in shared address space?
660  * 2. Does it have internal linkage?
661  * 3. Is the global variable referenced only in one function?
662  */
663 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
664  if (!gv->hasInternalLinkage())
665  return false;
666  PointerType *Pty = gv->getType();
668  return false;
669 
670  const Function *oneFunc = nullptr;
671 
672  bool flag = usedInOneFunc(gv, oneFunc);
673  if (!flag)
674  return false;
675  if (!oneFunc)
676  return false;
677  f = oneFunc;
678  return true;
679 }
680 
681 static bool useFuncSeen(const Constant *C,
683  for (const User *U : C->users()) {
684  if (const Constant *cu = dyn_cast<Constant>(U)) {
685  if (useFuncSeen(cu, seenMap))
686  return true;
687  } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
688  const BasicBlock *bb = I->getParent();
689  if (!bb)
690  continue;
691  const Function *caller = bb->getParent();
692  if (!caller)
693  continue;
694  if (seenMap.find(caller) != seenMap.end())
695  return true;
696  }
697  }
698  return false;
699 }
700 
701 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
703  for (const Function &F : M) {
704  if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
705  emitDeclaration(&F, O);
706  continue;
707  }
708 
709  if (F.isDeclaration()) {
710  if (F.use_empty())
711  continue;
712  if (F.getIntrinsicID())
713  continue;
714  emitDeclaration(&F, O);
715  continue;
716  }
717  for (const User *U : F.users()) {
718  if (const Constant *C = dyn_cast<Constant>(U)) {
719  if (usedInGlobalVarDef(C)) {
720  // The use is in the initialization of a global variable
721  // that is a function pointer, so print a declaration
722  // for the original function
723  emitDeclaration(&F, O);
724  break;
725  }
726  // Emit a declaration of this function if the function that
727  // uses this constant expr has already been seen.
728  if (useFuncSeen(C, seenMap)) {
729  emitDeclaration(&F, O);
730  break;
731  }
732  }
733 
734  if (!isa<Instruction>(U))
735  continue;
736  const Instruction *instr = cast<Instruction>(U);
737  const BasicBlock *bb = instr->getParent();
738  if (!bb)
739  continue;
740  const Function *caller = bb->getParent();
741  if (!caller)
742  continue;
743 
744  // If a caller has already been seen, then the caller is
745  // appearing in the module before the callee. so print out
746  // a declaration for the callee.
747  if (seenMap.find(caller) != seenMap.end()) {
748  emitDeclaration(&F, O);
749  break;
750  }
751  }
752  seenMap[&F] = true;
753  }
754 }
755 
757  if (!GV) return true;
758  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
759  if (!InitList) return true; // Not an array; we don't know how to parse.
760  return InitList->getNumOperands() == 0;
761 }
762 
763 void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
764  // Construct a default subtarget off of the TargetMachine defaults. The
765  // rest of NVPTX isn't friendly to change subtargets per function and
766  // so the default TargetMachine will have all of the options.
767  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
768  const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
769  SmallString<128> Str1;
770  raw_svector_ostream OS1(Str1);
771 
772  // Emit header before any dwarf directives are emitted below.
773  emitHeader(M, OS1, *STI);
774  OutStreamer->emitRawText(OS1.str());
775 }
776 
778  if (M.alias_size()) {
779  report_fatal_error("Module has aliases, which NVPTX does not support.");
780  return true; // error
781  }
782  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
784  "Module has a nontrivial global ctor, which NVPTX does not support.");
785  return true; // error
786  }
787  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
789  "Module has a nontrivial global dtor, which NVPTX does not support.");
790  return true; // error
791  }
792 
793  // We need to call the parent's one explicitly.
794  bool Result = AsmPrinter::doInitialization(M);
795 
796  GlobalsEmitted = false;
797 
798  return Result;
799 }
800 
801 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
802  SmallString<128> Str2;
803  raw_svector_ostream OS2(Str2);
804 
805  emitDeclarations(M, OS2);
806 
807  // As ptxas does not support forward references of globals, we need to first
808  // sort the list of module-level globals in def-use order. We visit each
809  // global variable in order, and ensure that we emit it *after* its dependent
810  // globals. We use a little extra memory maintaining both a set and a list to
811  // have fast searches while maintaining a strict ordering.
815 
816  // Visit each global variable, in order
817  for (const GlobalVariable &I : M.globals())
818  VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
819 
820  assert(GVVisited.size() == M.getGlobalList().size() &&
821  "Missed a global variable");
822  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
823 
824  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
825  const NVPTXSubtarget &STI =
826  *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
827 
828  // Print out module-level global variables in proper order
829  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
830  printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
831 
832  OS2 << '\n';
833 
834  OutStreamer->emitRawText(OS2.str());
835 }
836 
837 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
838  const NVPTXSubtarget &STI) {
839  O << "//\n";
840  O << "// Generated by LLVM NVPTX Back-End\n";
841  O << "//\n";
842  O << "\n";
843 
844  unsigned PTXVersion = STI.getPTXVersion();
845  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
846 
847  O << ".target ";
848  O << STI.getTargetName();
849 
850  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
851  if (NTM.getDrvInterface() == NVPTX::NVCL)
852  O << ", texmode_independent";
853 
854  bool HasFullDebugInfo = false;
855  for (DICompileUnit *CU : M.debug_compile_units()) {
856  switch(CU->getEmissionKind()) {
859  break;
862  HasFullDebugInfo = true;
863  break;
864  }
865  if (HasFullDebugInfo)
866  break;
867  }
868  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
869  O << ", debug";
870 
871  O << "\n";
872 
873  O << ".address_size ";
874  if (NTM.is64Bit())
875  O << "64";
876  else
877  O << "32";
878  O << "\n";
879 
880  O << "\n";
881 }
882 
884  bool HasDebugInfo = MMI && MMI->hasDebugInfo();
885 
886  // If we did not emit any functions, then the global declarations have not
887  // yet been emitted.
888  if (!GlobalsEmitted) {
889  emitGlobals(M);
890  GlobalsEmitted = true;
891  }
892 
893  // call doFinalization
895 
897 
898  if (auto *TS = static_cast<NVPTXTargetStreamer *>(
899  OutStreamer->getTargetStreamer())) {
900  // Close the last emitted section
901  if (HasDebugInfo) {
902  TS->closeLastSection();
903  // Emit empty .debug_loc section for better support of the empty files.
904  OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
905  }
906 
907  // Output last DWARF .file directives, if any.
908  TS->outputDwarfFileDirectives();
909  }
910 
911  return ret;
912 
913  //bool Result = AsmPrinter::doFinalization(M);
914  // Instead of calling the parents doFinalization, we may
915  // clone parents doFinalization and customize here.
916  // Currently, we if NVISA out the EmitGlobals() in
917  // parent's doFinalization, which is too intrusive.
918  //
919  // Same for the doInitialization.
920  //return Result;
921 }
922 
923 // This function emits appropriate linkage directives for
924 // functions and global variables.
925 //
926 // extern function declaration -> .extern
927 // extern function definition -> .visible
928 // external global variable with init -> .visible
929 // external without init -> .extern
930 // appending -> not allowed, assert.
931 // for any linkage other than
932 // internal, private, linker_private,
933 // linker_private_weak, linker_private_weak_def_auto,
934 // we emit -> .weak.
935 
936 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
937  raw_ostream &O) {
938  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
939  if (V->hasExternalLinkage()) {
940  if (isa<GlobalVariable>(V)) {
941  const GlobalVariable *GVar = cast<GlobalVariable>(V);
942  if (GVar) {
943  if (GVar->hasInitializer())
944  O << ".visible ";
945  else
946  O << ".extern ";
947  }
948  } else if (V->isDeclaration())
949  O << ".extern ";
950  else
951  O << ".visible ";
952  } else if (V->hasAppendingLinkage()) {
953  std::string msg;
954  msg.append("Error: ");
955  msg.append("Symbol ");
956  if (V->hasName())
957  msg.append(std::string(V->getName()));
958  msg.append("has unsupported appending linkage type");
959  llvm_unreachable(msg.c_str());
960  } else if (!V->hasInternalLinkage() &&
961  !V->hasPrivateLinkage()) {
962  O << ".weak ";
963  }
964  }
965 }
966 
967 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
968  raw_ostream &O, bool processDemoted,
969  const NVPTXSubtarget &STI) {
970  // Skip meta data
971  if (GVar->hasSection()) {
972  if (GVar->getSection() == "llvm.metadata")
973  return;
974  }
975 
976  // Skip LLVM intrinsic global variables
977  if (GVar->getName().startswith("llvm.") ||
978  GVar->getName().startswith("nvvm."))
979  return;
980 
981  const DataLayout &DL = getDataLayout();
982 
983  // GlobalVariables are always constant pointers themselves.
984  PointerType *PTy = GVar->getType();
985  Type *ETy = GVar->getValueType();
986 
987  if (GVar->hasExternalLinkage()) {
988  if (GVar->hasInitializer())
989  O << ".visible ";
990  else
991  O << ".extern ";
992  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
994  GVar->hasCommonLinkage()) {
995  O << ".weak ";
996  }
997 
998  if (isTexture(*GVar)) {
999  O << ".global .texref " << getTextureName(*GVar) << ";\n";
1000  return;
1001  }
1002 
1003  if (isSurface(*GVar)) {
1004  O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1005  return;
1006  }
1007 
1008  if (GVar->isDeclaration()) {
1009  // (extern) declarations, no definition or initializer
1010  // Currently the only known declaration is for an automatic __local
1011  // (.shared) promoted to global.
1012  emitPTXGlobalVariable(GVar, O, STI);
1013  O << ";\n";
1014  return;
1015  }
1016 
1017  if (isSampler(*GVar)) {
1018  O << ".global .samplerref " << getSamplerName(*GVar);
1019 
1020  const Constant *Initializer = nullptr;
1021  if (GVar->hasInitializer())
1022  Initializer = GVar->getInitializer();
1023  const ConstantInt *CI = nullptr;
1024  if (Initializer)
1025  CI = dyn_cast<ConstantInt>(Initializer);
1026  if (CI) {
1027  unsigned sample = CI->getZExtValue();
1028 
1029  O << " = { ";
1030 
1031  for (int i = 0,
1032  addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1033  i < 3; i++) {
1034  O << "addr_mode_" << i << " = ";
1035  switch (addr) {
1036  case 0:
1037  O << "wrap";
1038  break;
1039  case 1:
1040  O << "clamp_to_border";
1041  break;
1042  case 2:
1043  O << "clamp_to_edge";
1044  break;
1045  case 3:
1046  O << "wrap";
1047  break;
1048  case 4:
1049  O << "mirror";
1050  break;
1051  }
1052  O << ", ";
1053  }
1054  O << "filter_mode = ";
1055  switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1056  case 0:
1057  O << "nearest";
1058  break;
1059  case 1:
1060  O << "linear";
1061  break;
1062  case 2:
1063  llvm_unreachable("Anisotropic filtering is not supported");
1064  default:
1065  O << "nearest";
1066  break;
1067  }
1068  if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1069  O << ", force_unnormalized_coords = 1";
1070  }
1071  O << " }";
1072  }
1073 
1074  O << ";\n";
1075  return;
1076  }
1077 
1078  if (GVar->hasPrivateLinkage()) {
1079  if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1080  return;
1081 
1082  // FIXME - need better way (e.g. Metadata) to avoid generating this global
1083  if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1084  return;
1085  if (GVar->use_empty())
1086  return;
1087  }
1088 
1089  const Function *demotedFunc = nullptr;
1090  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1091  O << "// " << GVar->getName() << " has been demoted\n";
1092  if (localDecls.find(demotedFunc) != localDecls.end())
1093  localDecls[demotedFunc].push_back(GVar);
1094  else {
1095  std::vector<const GlobalVariable *> temp;
1096  temp.push_back(GVar);
1097  localDecls[demotedFunc] = temp;
1098  }
1099  return;
1100  }
1101 
1102  O << ".";
1103  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1104 
1105  if (isManaged(*GVar)) {
1106  if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1108  ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1109  }
1110  O << " .attribute(.managed)";
1111  }
1112 
1113  if (MaybeAlign A = GVar->getAlign())
1114  O << " .align " << A->value();
1115  else
1116  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1117 
1118  if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1119  (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1120  O << " .";
1121  // Special case: ABI requires that we use .u8 for predicates
1122  if (ETy->isIntegerTy(1))
1123  O << "u8";
1124  else
1125  O << getPTXFundamentalTypeStr(ETy, false);
1126  O << " ";
1127  getSymbol(GVar)->print(O, MAI);
1128 
1129  // Ptx allows variable initilization only for constant and global state
1130  // spaces.
1131  if (GVar->hasInitializer()) {
1132  if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1133  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1134  const Constant *Initializer = GVar->getInitializer();
1135  // 'undef' is treated as there is no value specified.
1136  if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1137  O << " = ";
1138  printScalarConstant(Initializer, O);
1139  }
1140  } else {
1141  // The frontend adds zero-initializer to device and constant variables
1142  // that don't have an initial value, and UndefValue to shared
1143  // variables, so skip warning for this case.
1144  if (!GVar->getInitializer()->isNullValue() &&
1145  !isa<UndefValue>(GVar->getInitializer())) {
1146  report_fatal_error("initial value of '" + GVar->getName() +
1147  "' is not allowed in addrspace(" +
1148  Twine(PTy->getAddressSpace()) + ")");
1149  }
1150  }
1151  }
1152  } else {
1153  unsigned int ElementSize = 0;
1154 
1155  // Although PTX has direct support for struct type and array type and
1156  // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1157  // targets that support these high level field accesses. Structs, arrays
1158  // and vectors are lowered into arrays of bytes.
1159  switch (ETy->getTypeID()) {
1160  case Type::IntegerTyID: // Integers larger than 64 bits
1161  case Type::StructTyID:
1162  case Type::ArrayTyID:
1163  case Type::FixedVectorTyID:
1164  ElementSize = DL.getTypeStoreSize(ETy);
1165  // Ptx allows variable initilization only for constant and
1166  // global state spaces.
1167  if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1168  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1169  GVar->hasInitializer()) {
1170  const Constant *Initializer = GVar->getInitializer();
1171  if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1172  AggBuffer aggBuffer(ElementSize, *this);
1173  bufferAggregateConstant(Initializer, &aggBuffer);
1174  if (aggBuffer.numSymbols()) {
1175  unsigned int ptrSize = MAI->getCodePointerSize();
1176  if (ElementSize % ptrSize ||
1177  !aggBuffer.allSymbolsAligned(ptrSize)) {
1178  // Print in bytes and use the mask() operator for pointers.
1179  if (!STI.hasMaskOperator())
1181  "initialized packed aggregate with pointers '" +
1182  GVar->getName() +
1183  "' requires at least PTX ISA version 7.1");
1184  O << " .u8 ";
1185  getSymbol(GVar)->print(O, MAI);
1186  O << "[" << ElementSize << "] = {";
1187  aggBuffer.printBytes(O);
1188  O << "}";
1189  } else {
1190  O << " .u" << ptrSize * 8 << " ";
1191  getSymbol(GVar)->print(O, MAI);
1192  O << "[" << ElementSize / ptrSize << "] = {";
1193  aggBuffer.printWords(O);
1194  O << "}";
1195  }
1196  } else {
1197  O << " .b8 ";
1198  getSymbol(GVar)->print(O, MAI);
1199  O << "[" << ElementSize << "] = {";
1200  aggBuffer.printBytes(O);
1201  O << "}";
1202  }
1203  } else {
1204  O << " .b8 ";
1205  getSymbol(GVar)->print(O, MAI);
1206  if (ElementSize) {
1207  O << "[";
1208  O << ElementSize;
1209  O << "]";
1210  }
1211  }
1212  } else {
1213  O << " .b8 ";
1214  getSymbol(GVar)->print(O, MAI);
1215  if (ElementSize) {
1216  O << "[";
1217  O << ElementSize;
1218  O << "]";
1219  }
1220  }
1221  break;
1222  default:
1223  llvm_unreachable("type not supported yet");
1224  }
1225  }
1226  O << ";\n";
1227 }
1228 
1229 void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1230  const Value *v = Symbols[nSym];
1231  const Value *v0 = SymbolsBeforeStripping[nSym];
1232  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1233  MCSymbol *Name = AP.getSymbol(GVar);
1234  PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1235  // Is v0 a generic pointer?
1236  bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1237  if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1238  os << "generic(";
1239  Name->print(os, AP.MAI);
1240  os << ")";
1241  } else {
1242  Name->print(os, AP.MAI);
1243  }
1244  } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1245  const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
1246  AP.printMCExpr(*Expr, os);
1247  } else
1248  llvm_unreachable("symbol type unknown");
1249 }
1250 
1251 void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1252  unsigned int ptrSize = AP.MAI->getCodePointerSize();
1253  symbolPosInBuffer.push_back(size);
1254  unsigned int nSym = 0;
1255  unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1256  for (unsigned int pos = 0; pos < size;) {
1257  if (pos)
1258  os << ", ";
1259  if (pos != nextSymbolPos) {
1260  os << (unsigned int)buffer[pos];
1261  ++pos;
1262  continue;
1263  }
1264  // Generate a per-byte mask() operator for the symbol, which looks like:
1265  // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1266  // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1267  std::string symText;
1268  llvm::raw_string_ostream oss(symText);
1269  printSymbol(nSym, oss);
1270  for (unsigned i = 0; i < ptrSize; ++i) {
1271  if (i)
1272  os << ", ";
1273  llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1274  os << "(" << symText << ")";
1275  }
1276  pos += ptrSize;
1277  nextSymbolPos = symbolPosInBuffer[++nSym];
1278  assert(nextSymbolPos >= pos);
1279  }
1280 }
1281 
1282 void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1283  unsigned int ptrSize = AP.MAI->getCodePointerSize();
1284  symbolPosInBuffer.push_back(size);
1285  unsigned int nSym = 0;
1286  unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1287  assert(nextSymbolPos % ptrSize == 0);
1288  for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1289  if (pos)
1290  os << ", ";
1291  if (pos == nextSymbolPos) {
1292  printSymbol(nSym, os);
1293  nextSymbolPos = symbolPosInBuffer[++nSym];
1294  assert(nextSymbolPos % ptrSize == 0);
1295  assert(nextSymbolPos >= pos + ptrSize);
1296  } else if (ptrSize == 4)
1297  os << support::endian::read32le(&buffer[pos]);
1298  else
1299  os << support::endian::read64le(&buffer[pos]);
1300  }
1301 }
1302 
1303 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1304  if (localDecls.find(f) == localDecls.end())
1305  return;
1306 
1307  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1308 
1309  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1310  const NVPTXSubtarget &STI =
1311  *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1312 
1313  for (const GlobalVariable *GV : gvars) {
1314  O << "\t// demoted variable\n\t";
1315  printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1316  }
1317 }
1318 
1319 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1320  raw_ostream &O) const {
1321  switch (AddressSpace) {
1322  case ADDRESS_SPACE_LOCAL:
1323  O << "local";
1324  break;
1325  case ADDRESS_SPACE_GLOBAL:
1326  O << "global";
1327  break;
1328  case ADDRESS_SPACE_CONST:
1329  O << "const";
1330  break;
1331  case ADDRESS_SPACE_SHARED:
1332  O << "shared";
1333  break;
1334  default:
1335  report_fatal_error("Bad address space found while emitting PTX: " +
1337  break;
1338  }
1339 }
1340 
1341 std::string
1342 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1343  switch (Ty->getTypeID()) {
1344  case Type::IntegerTyID: {
1345  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1346  if (NumBits == 1)
1347  return "pred";
1348  else if (NumBits <= 64) {
1349  std::string name = "u";
1350  return name + utostr(NumBits);
1351  } else {
1352  llvm_unreachable("Integer too large");
1353  break;
1354  }
1355  break;
1356  }
1357  case Type::HalfTyID:
1358  // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1359  return "b16";
1360  case Type::FloatTyID:
1361  return "f32";
1362  case Type::DoubleTyID:
1363  return "f64";
1364  case Type::PointerTyID:
1365  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1366  if (useB4PTR)
1367  return "b64";
1368  else
1369  return "u64";
1370  else if (useB4PTR)
1371  return "b32";
1372  else
1373  return "u32";
1374  default:
1375  break;
1376  }
1377  llvm_unreachable("unexpected type");
1378 }
1379 
1380 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1381  raw_ostream &O,
1382  const NVPTXSubtarget &STI) {
1383  const DataLayout &DL = getDataLayout();
1384 
1385  // GlobalVariables are always constant pointers themselves.
1386  Type *ETy = GVar->getValueType();
1387 
1388  O << ".";
1389  emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1390  if (isManaged(*GVar)) {
1391  if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1393  ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1394  }
1395  O << " .attribute(.managed)";
1396  }
1397  if (MaybeAlign A = GVar->getAlign())
1398  O << " .align " << A->value();
1399  else
1400  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1401 
1402  // Special case for i128
1403  if (ETy->isIntegerTy(128)) {
1404  O << " .b8 ";
1405  getSymbol(GVar)->print(O, MAI);
1406  O << "[16]";
1407  return;
1408  }
1409 
1410  if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1411  O << " .";
1412  O << getPTXFundamentalTypeStr(ETy);
1413  O << " ";
1414  getSymbol(GVar)->print(O, MAI);
1415  return;
1416  }
1417 
1418  int64_t ElementSize = 0;
1419 
1420  // Although PTX has direct support for struct type and array type and LLVM IR
1421  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1422  // support these high level field accesses. Structs and arrays are lowered
1423  // into arrays of bytes.
1424  switch (ETy->getTypeID()) {
1425  case Type::StructTyID:
1426  case Type::ArrayTyID:
1427  case Type::FixedVectorTyID:
1428  ElementSize = DL.getTypeStoreSize(ETy);
1429  O << " .b8 ";
1430  getSymbol(GVar)->print(O, MAI);
1431  O << "[";
1432  if (ElementSize) {
1433  O << ElementSize;
1434  }
1435  O << "]";
1436  break;
1437  default:
1438  llvm_unreachable("type not supported yet");
1439  }
1440 }
1441 
1442 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1443  int paramIndex, raw_ostream &O) {
1444  getSymbol(I->getParent())->print(O, MAI);
1445  O << "_param_" << paramIndex;
1446 }
1447 
1448 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1449  const DataLayout &DL = getDataLayout();
1450  const AttributeList &PAL = F->getAttributes();
1451  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1452  const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1453 
1455  unsigned paramIndex = 0;
1456  bool first = true;
1457  bool isKernelFunc = isKernelFunction(*F);
1458  bool isABI = (STI.getSmVersion() >= 20);
1459  bool hasImageHandles = STI.hasImageHandles();
1460  MVT thePointerTy = TLI->getPointerTy(DL);
1461 
1462  if (F->arg_empty()) {
1463  O << "()\n";
1464  return;
1465  }
1466 
1467  O << "(\n";
1468 
1469  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1470  Type *Ty = I->getType();
1471 
1472  if (!first)
1473  O << ",\n";
1474 
1475  first = false;
1476 
1477  // Handle image/sampler parameters
1478  if (isKernelFunction(*F)) {
1479  if (isSampler(*I) || isImage(*I)) {
1480  if (isImage(*I)) {
1481  std::string sname = std::string(I->getName());
1482  if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1483  if (hasImageHandles)
1484  O << "\t.param .u64 .ptr .surfref ";
1485  else
1486  O << "\t.param .surfref ";
1487  CurrentFnSym->print(O, MAI);
1488  O << "_param_" << paramIndex;
1489  }
1490  else { // Default image is read_only
1491  if (hasImageHandles)
1492  O << "\t.param .u64 .ptr .texref ";
1493  else
1494  O << "\t.param .texref ";
1495  CurrentFnSym->print(O, MAI);
1496  O << "_param_" << paramIndex;
1497  }
1498  } else {
1499  if (hasImageHandles)
1500  O << "\t.param .u64 .ptr .samplerref ";
1501  else
1502  O << "\t.param .samplerref ";
1503  CurrentFnSym->print(O, MAI);
1504  O << "_param_" << paramIndex;
1505  }
1506  continue;
1507  }
1508  }
1509 
1510  auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1511  paramIndex](Type *Ty) -> Align {
1512  Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1513  MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1514  return std::max(TypeAlign, ParamAlign.valueOrOne());
1515  };
1516 
1517  if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1518  if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1519  // Just print .param .align <a> .b8 .param[size];
1520  // <a> = optimal alignment for the element type; always multiple of
1521  // PAL.getParamAlignment
1522  // size = typeallocsize of element type
1523  Align OptimalAlign = getOptimalAlignForParam(Ty);
1524 
1525  O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1526  printParamName(I, paramIndex, O);
1527  O << "[" << DL.getTypeAllocSize(Ty) << "]";
1528 
1529  continue;
1530  }
1531  // Just a scalar
1532  auto *PTy = dyn_cast<PointerType>(Ty);
1533  if (isKernelFunc) {
1534  if (PTy) {
1535  // Special handling for pointer arguments to kernel
1536  O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1537 
1538  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1539  NVPTX::CUDA) {
1540  int addrSpace = PTy->getAddressSpace();
1541  switch (addrSpace) {
1542  default:
1543  O << ".ptr ";
1544  break;
1545  case ADDRESS_SPACE_CONST:
1546  O << ".ptr .const ";
1547  break;
1548  case ADDRESS_SPACE_SHARED:
1549  O << ".ptr .shared ";
1550  break;
1551  case ADDRESS_SPACE_GLOBAL:
1552  O << ".ptr .global ";
1553  break;
1554  }
1555  Align ParamAlign = I->getParamAlign().valueOrOne();
1556  O << ".align " << ParamAlign.value() << " ";
1557  }
1558  printParamName(I, paramIndex, O);
1559  continue;
1560  }
1561 
1562  // non-pointer scalar to kernel func
1563  O << "\t.param .";
1564  // Special case: predicate operands become .u8 types
1565  if (Ty->isIntegerTy(1))
1566  O << "u8";
1567  else
1568  O << getPTXFundamentalTypeStr(Ty);
1569  O << " ";
1570  printParamName(I, paramIndex, O);
1571  continue;
1572  }
1573  // Non-kernel function, just print .param .b<size> for ABI
1574  // and .reg .b<size> for non-ABI
1575  unsigned sz = 0;
1576  if (isa<IntegerType>(Ty)) {
1577  sz = cast<IntegerType>(Ty)->getBitWidth();
1578  sz = promoteScalarArgumentSize(sz);
1579  } else if (isa<PointerType>(Ty))
1580  sz = thePointerTy.getSizeInBits();
1581  else if (Ty->isHalfTy())
1582  // PTX ABI requires all scalar parameters to be at least 32
1583  // bits in size. fp16 normally uses .b16 as its storage type
1584  // in PTX, so its size must be adjusted here, too.
1585  sz = 32;
1586  else
1587  sz = Ty->getPrimitiveSizeInBits();
1588  if (isABI)
1589  O << "\t.param .b" << sz << " ";
1590  else
1591  O << "\t.reg .b" << sz << " ";
1592  printParamName(I, paramIndex, O);
1593  continue;
1594  }
1595 
1596  // param has byVal attribute.
1597  Type *ETy = PAL.getParamByValType(paramIndex);
1598  assert(ETy && "Param should have byval type");
1599 
1600  if (isABI || isKernelFunc) {
1601  // Just print .param .align <a> .b8 .param[size];
1602  // <a> = optimal alignment for the element type; always multiple of
1603  // PAL.getParamAlignment
1604  // size = typeallocsize of element type
1605  Align OptimalAlign = getOptimalAlignForParam(ETy);
1606 
1607  // Work around a bug in ptxas. When PTX code takes address of
1608  // byval parameter with alignment < 4, ptxas generates code to
1609  // spill argument into memory. Alas on sm_50+ ptxas generates
1610  // SASS code that fails with misaligned access. To work around
1611  // the problem, make sure that we align byval parameters by at
1612  // least 4. Matching change must be made in LowerCall() where we
1613  // prepare parameters for the call.
1614  //
1615  // TODO: this will need to be undone when we get to support multi-TU
1616  // device-side compilation as it breaks ABI compatibility with nvcc.
1617  // Hopefully ptxas bug is fixed by then.
1618  if (!isKernelFunc && OptimalAlign < Align(4))
1619  OptimalAlign = Align(4);
1620  unsigned sz = DL.getTypeAllocSize(ETy);
1621  O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1622  printParamName(I, paramIndex, O);
1623  O << "[" << sz << "]";
1624  continue;
1625  } else {
1626  // Split the ETy into constituent parts and
1627  // print .param .b<size> <name> for each part.
1628  // Further, if a part is vector, print the above for
1629  // each vector element.
1630  SmallVector<EVT, 16> vtparts;
1631  ComputeValueVTs(*TLI, DL, ETy, vtparts);
1632  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1633  unsigned elems = 1;
1634  EVT elemtype = vtparts[i];
1635  if (vtparts[i].isVector()) {
1636  elems = vtparts[i].getVectorNumElements();
1637  elemtype = vtparts[i].getVectorElementType();
1638  }
1639 
1640  for (unsigned j = 0, je = elems; j != je; ++j) {
1641  unsigned sz = elemtype.getSizeInBits();
1642  if (elemtype.isInteger())
1643  sz = promoteScalarArgumentSize(sz);
1644  O << "\t.reg .b" << sz << " ";
1645  printParamName(I, paramIndex, O);
1646  if (j < je - 1)
1647  O << ",\n";
1648  ++paramIndex;
1649  }
1650  if (i < e - 1)
1651  O << ",\n";
1652  }
1653  --paramIndex;
1654  continue;
1655  }
1656  }
1657 
1658  O << "\n)\n";
1659 }
1660 
1661 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1662  raw_ostream &O) {
1663  const Function &F = MF.getFunction();
1664  emitFunctionParamList(&F, O);
1665 }
1666 
1667 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1668  const MachineFunction &MF) {
1669  SmallString<128> Str;
1670  raw_svector_ostream O(Str);
1671 
1672  // Map the global virtual register number to a register class specific
1673  // virtual register number starting from 1 with that class.
1675  //unsigned numRegClasses = TRI->getNumRegClasses();
1676 
1677  // Emit the Fake Stack Object
1678  const MachineFrameInfo &MFI = MF.getFrameInfo();
1679  int NumBytes = (int) MFI.getStackSize();
1680  if (NumBytes) {
1681  O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1682  << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1683  if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1684  O << "\t.reg .b64 \t%SP;\n";
1685  O << "\t.reg .b64 \t%SPL;\n";
1686  } else {
1687  O << "\t.reg .b32 \t%SP;\n";
1688  O << "\t.reg .b32 \t%SPL;\n";
1689  }
1690  }
1691 
1692  // Go through all virtual registers to establish the mapping between the
1693  // global virtual
1694  // register number and the per class virtual register number.
1695  // We use the per class virtual register number in the ptx output.
1696  unsigned int numVRs = MRI->getNumVirtRegs();
1697  for (unsigned i = 0; i < numVRs; i++) {
1699  const TargetRegisterClass *RC = MRI->getRegClass(vr);
1700  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1701  int n = regmap.size();
1702  regmap.insert(std::make_pair(vr, n + 1));
1703  }
1704 
1705  // Emit register declarations
1706  // @TODO: Extract out the real register usage
1707  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1708  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1709  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1710  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1711  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1712  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1713  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1714 
1715  // Emit declaration of the virtual registers or 'physical' registers for
1716  // each register class
1717  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1718  const TargetRegisterClass *RC = TRI->getRegClass(i);
1719  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1720  std::string rcname = getNVPTXRegClassName(RC);
1721  std::string rcStr = getNVPTXRegClassStr(RC);
1722  int n = regmap.size();
1723 
1724  // Only declare those registers that may be used.
1725  if (n) {
1726  O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1727  << ">;\n";
1728  }
1729  }
1730 
1731  OutStreamer->emitRawText(O.str());
1732 }
1733 
1734 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1735  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1736  bool ignored;
1737  unsigned int numHex;
1738  const char *lead;
1739 
1740  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1741  numHex = 8;
1742  lead = "0f";
1744  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1745  numHex = 16;
1746  lead = "0d";
1748  } else
1749  llvm_unreachable("unsupported fp type");
1750 
1751  APInt API = APF.bitcastToAPInt();
1752  O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1753 }
1754 
1755 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1756  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1757  O << CI->getValue();
1758  return;
1759  }
1760  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1761  printFPConstant(CFP, O);
1762  return;
1763  }
1764  if (isa<ConstantPointerNull>(CPV)) {
1765  O << "0";
1766  return;
1767  }
1768  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1769  bool IsNonGenericPointer = false;
1770  if (GVar->getType()->getAddressSpace() != 0) {
1771  IsNonGenericPointer = true;
1772  }
1773  if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1774  O << "generic(";
1775  getSymbol(GVar)->print(O, MAI);
1776  O << ")";
1777  } else {
1778  getSymbol(GVar)->print(O, MAI);
1779  }
1780  return;
1781  }
1782  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1783  const Value *v = Cexpr->stripPointerCasts();
1784  PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1785  bool IsNonGenericPointer = false;
1786  if (PTy && PTy->getAddressSpace() != 0) {
1787  IsNonGenericPointer = true;
1788  }
1789  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1790  if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1791  O << "generic(";
1792  getSymbol(GVar)->print(O, MAI);
1793  O << ")";
1794  } else {
1795  getSymbol(GVar)->print(O, MAI);
1796  }
1797  return;
1798  } else {
1799  lowerConstant(CPV)->print(O, MAI);
1800  return;
1801  }
1802  }
1803  llvm_unreachable("Not scalar type found in printScalarConstant()");
1804 }
1805 
1806 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1807  AggBuffer *AggBuffer) {
1808  const DataLayout &DL = getDataLayout();
1809  int AllocSize = DL.getTypeAllocSize(CPV->getType());
1810  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1811  // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1812  // only the space allocated by CPV.
1813  AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1814  return;
1815  }
1816 
1817  // Helper for filling AggBuffer with APInts.
1818  auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1819  size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1820  SmallVector<unsigned char, 16> Buf(NumBytes);
1821  for (unsigned I = 0; I < NumBytes; ++I) {
1822  Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1823  }
1824  AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1825  };
1826 
1827  switch (CPV->getType()->getTypeID()) {
1828  case Type::IntegerTyID:
1829  if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1830  AddIntToBuffer(CI->getValue());
1831  break;
1832  }
1833  if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1834  if (const auto *CI =
1835  dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1836  AddIntToBuffer(CI->getValue());
1837  break;
1838  }
1839  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1840  Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1841  AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1842  AggBuffer->addZeros(AllocSize);
1843  break;
1844  }
1845  }
1846  llvm_unreachable("unsupported integer const type");
1847  break;
1848 
1849  case Type::HalfTyID:
1850  case Type::FloatTyID:
1851  case Type::DoubleTyID:
1852  AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1853  break;
1854 
1855  case Type::PointerTyID: {
1856  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1857  AggBuffer->addSymbol(GVar, GVar);
1858  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1859  const Value *v = Cexpr->stripPointerCasts();
1860  AggBuffer->addSymbol(v, Cexpr);
1861  }
1862  AggBuffer->addZeros(AllocSize);
1863  break;
1864  }
1865 
1866  case Type::ArrayTyID:
1867  case Type::FixedVectorTyID:
1868  case Type::StructTyID: {
1869  if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1870  bufferAggregateConstant(CPV, AggBuffer);
1871  if (Bytes > AllocSize)
1872  AggBuffer->addZeros(Bytes - AllocSize);
1873  } else if (isa<ConstantAggregateZero>(CPV))
1874  AggBuffer->addZeros(Bytes);
1875  else
1876  llvm_unreachable("Unexpected Constant type");
1877  break;
1878  }
1879 
1880  default:
1881  llvm_unreachable("unsupported type");
1882  }
1883 }
1884 
1885 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1886  AggBuffer *aggBuffer) {
1887  const DataLayout &DL = getDataLayout();
1888  int Bytes;
1889 
1890  // Integers of arbitrary width
1891  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1892  APInt Val = CI->getValue();
1893  for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1894  uint8_t Byte = Val.getLoBits(8).getZExtValue();
1895  aggBuffer->addBytes(&Byte, 1, 1);
1896  Val.lshrInPlace(8);
1897  }
1898  return;
1899  }
1900 
1901  // Old constants
1902  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1903  if (CPV->getNumOperands())
1904  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1905  bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1906  return;
1907  }
1908 
1909  if (const ConstantDataSequential *CDS =
1910  dyn_cast<ConstantDataSequential>(CPV)) {
1911  if (CDS->getNumElements())
1912  for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1913  bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1914  aggBuffer);
1915  return;
1916  }
1917 
1918  if (isa<ConstantStruct>(CPV)) {
1919  if (CPV->getNumOperands()) {
1920  StructType *ST = cast<StructType>(CPV->getType());
1921  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1922  if (i == (e - 1))
1923  Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1924  DL.getTypeAllocSize(ST) -
1925  DL.getStructLayout(ST)->getElementOffset(i);
1926  else
1927  Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1928  DL.getStructLayout(ST)->getElementOffset(i);
1929  bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1930  }
1931  }
1932  return;
1933  }
1934  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1935 }
1936 
1937 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1938 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
1939 /// expressions that are representable in PTX and create
1940 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1941 const MCExpr *
1942 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1943  MCContext &Ctx = OutContext;
1944 
1945  if (CV->isNullValue() || isa<UndefValue>(CV))
1946  return MCConstantExpr::create(0, Ctx);
1947 
1948  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1949  return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1950 
1951  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1952  const MCSymbolRefExpr *Expr =
1954  if (ProcessingGeneric) {
1955  return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1956  } else {
1957  return Expr;
1958  }
1959  }
1960 
1961  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1962  if (!CE) {
1963  llvm_unreachable("Unknown constant value to lower!");
1964  }
1965 
1966  switch (CE->getOpcode()) {
1967  default: {
1968  // If the code isn't optimized, there may be outstanding folding
1969  // opportunities. Attempt to fold the expression using DataLayout as a
1970  // last resort before giving up.
1972  if (C != CE)
1973  return lowerConstantForGV(C, ProcessingGeneric);
1974 
1975  // Otherwise report the problem to the user.
1976  std::string S;
1977  raw_string_ostream OS(S);
1978  OS << "Unsupported expression in static initializer: ";
1979  CE->printAsOperand(OS, /*PrintType=*/false,
1980  !MF ? nullptr : MF->getFunction().getParent());
1981  report_fatal_error(Twine(OS.str()));
1982  }
1983 
1984  case Instruction::AddrSpaceCast: {
1985  // Strip the addrspacecast and pass along the operand
1986  PointerType *DstTy = cast<PointerType>(CE->getType());
1987  if (DstTy->getAddressSpace() == 0) {
1988  return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1989  }
1990  std::string S;
1991  raw_string_ostream OS(S);
1992  OS << "Unsupported expression in static initializer: ";
1993  CE->printAsOperand(OS, /*PrintType=*/ false,
1994  !MF ? nullptr : MF->getFunction().getParent());
1995  report_fatal_error(Twine(OS.str()));
1996  }
1997 
1998  case Instruction::GetElementPtr: {
1999  const DataLayout &DL = getDataLayout();
2000 
2001  // Generate a symbolic expression for the byte address
2002  APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2003  cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2004 
2005  const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2006  ProcessingGeneric);
2007  if (!OffsetAI)
2008  return Base;
2009 
2010  int64_t Offset = OffsetAI.getSExtValue();
2012  Ctx);
2013  }
2014 
2015  case Instruction::Trunc:
2016  // We emit the value and depend on the assembler to truncate the generated
2017  // expression properly. This is important for differences between
2018  // blockaddress labels. Since the two labels are in the same function, it
2019  // is reasonable to treat their delta as a 32-bit value.
2020  [[fallthrough]];
2021  case Instruction::BitCast:
2022  return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2023 
2024  case Instruction::IntToPtr: {
2025  const DataLayout &DL = getDataLayout();
2026 
2027  // Handle casts to pointers by changing them into casts to the appropriate
2028  // integer type. This promotes constant folding and simplifies this code.
2029  Constant *Op = CE->getOperand(0);
2030  Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2031  false/*ZExt*/);
2032  return lowerConstantForGV(Op, ProcessingGeneric);
2033  }
2034 
2035  case Instruction::PtrToInt: {
2036  const DataLayout &DL = getDataLayout();
2037 
2038  // Support only foldable casts to/from pointers that can be eliminated by
2039  // changing the pointer to the appropriately sized integer type.
2040  Constant *Op = CE->getOperand(0);
2041  Type *Ty = CE->getType();
2042 
2043  const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2044 
2045  // We can emit the pointer value into this slot if the slot is an
2046  // integer slot equal to the size of the pointer.
2047  if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2048  return OpExpr;
2049 
2050  // Otherwise the pointer is smaller than the resultant integer, mask off
2051  // the high bits so we are sure to get a proper truncation if the input is
2052  // a constant expr.
2053  unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2054  const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2055  return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2056  }
2057 
2058  // The MC library also has a right-shift operator, but it isn't consistently
2059  // signed or unsigned between different targets.
2060  case Instruction::Add: {
2061  const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2062  const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2063  switch (CE->getOpcode()) {
2064  default: llvm_unreachable("Unknown binary operator constant cast expr");
2065  case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2066  }
2067  }
2068  }
2069 }
2070 
2071 // Copy of MCExpr::print customized for NVPTX
2072 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2073  switch (Expr.getKind()) {
2074  case MCExpr::Target:
2075  return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2076  case MCExpr::Constant:
2077  OS << cast<MCConstantExpr>(Expr).getValue();
2078  return;
2079 
2080  case MCExpr::SymbolRef: {
2081  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2082  const MCSymbol &Sym = SRE.getSymbol();
2083  Sym.print(OS, MAI);
2084  return;
2085  }
2086 
2087  case MCExpr::Unary: {
2088  const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2089  switch (UE.getOpcode()) {
2090  case MCUnaryExpr::LNot: OS << '!'; break;
2091  case MCUnaryExpr::Minus: OS << '-'; break;
2092  case MCUnaryExpr::Not: OS << '~'; break;
2093  case MCUnaryExpr::Plus: OS << '+'; break;
2094  }
2095  printMCExpr(*UE.getSubExpr(), OS);
2096  return;
2097  }
2098 
2099  case MCExpr::Binary: {
2100  const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2101 
2102  // Only print parens around the LHS if it is non-trivial.
2103  if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2104  isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2105  printMCExpr(*BE.getLHS(), OS);
2106  } else {
2107  OS << '(';
2108  printMCExpr(*BE.getLHS(), OS);
2109  OS<< ')';
2110  }
2111 
2112  switch (BE.getOpcode()) {
2113  case MCBinaryExpr::Add:
2114  // Print "X-42" instead of "X+-42".
2115  if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2116  if (RHSC->getValue() < 0) {
2117  OS << RHSC->getValue();
2118  return;
2119  }
2120  }
2121 
2122  OS << '+';
2123  break;
2124  default: llvm_unreachable("Unhandled binary operator");
2125  }
2126 
2127  // Only print parens around the LHS if it is non-trivial.
2128  if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2129  printMCExpr(*BE.getRHS(), OS);
2130  } else {
2131  OS << '(';
2132  printMCExpr(*BE.getRHS(), OS);
2133  OS << ')';
2134  }
2135  return;
2136  }
2137  }
2138 
2139  llvm_unreachable("Invalid expression kind!");
2140 }
2141 
2142 /// PrintAsmOperand - Print out an operand for an inline asm expression.
2143 ///
2144 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2145  const char *ExtraCode, raw_ostream &O) {
2146  if (ExtraCode && ExtraCode[0]) {
2147  if (ExtraCode[1] != 0)
2148  return true; // Unknown modifier.
2149 
2150  switch (ExtraCode[0]) {
2151  default:
2152  // See if this is a generic print operand
2153  return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2154  case 'r':
2155  break;
2156  }
2157  }
2158 
2159  printOperand(MI, OpNo, O);
2160 
2161  return false;
2162 }
2163 
2164 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2165  unsigned OpNo,
2166  const char *ExtraCode,
2167  raw_ostream &O) {
2168  if (ExtraCode && ExtraCode[0])
2169  return true; // Unknown modifier
2170 
2171  O << '[';
2172  printMemOperand(MI, OpNo, O);
2173  O << ']';
2174 
2175  return false;
2176 }
2177 
2178 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2179  raw_ostream &O) {
2180  const MachineOperand &MO = MI->getOperand(opNum);
2181  switch (MO.getType()) {
2184  if (MO.getReg() == NVPTX::VRDepot)
2185  O << DEPOTNAME << getFunctionNumber();
2186  else
2188  } else {
2189  emitVirtualRegister(MO.getReg(), O);
2190  }
2191  break;
2192 
2194  O << MO.getImm();
2195  break;
2196 
2198  printFPConstant(MO.getFPImm(), O);
2199  break;
2200 
2202  PrintSymbolOperand(MO, O);
2203  break;
2204 
2206  MO.getMBB()->getSymbol()->print(O, MAI);
2207  break;
2208 
2209  default:
2210  llvm_unreachable("Operand type not supported.");
2211  }
2212 }
2213 
2214 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2215  raw_ostream &O, const char *Modifier) {
2216  printOperand(MI, opNum, O);
2217 
2218  if (Modifier && strcmp(Modifier, "add") == 0) {
2219  O << ", ";
2220  printOperand(MI, opNum + 1, O);
2221  } else {
2222  if (MI->getOperand(opNum + 1).isImm() &&
2223  MI->getOperand(opNum + 1).getImm() == 0)
2224  return; // don't print ',0' or '+0'
2225  O << "+";
2226  printOperand(MI, opNum + 1, O);
2227  }
2228 }
2229 
2230 // Force static initialization.
2234 }
llvm::NVPTXAsmPrinter::AggBuffer
friend class AggBuffer
Definition: NVPTXAsmPrinter.h:151
llvm::codeview::SimpleTypeKind::Byte
@ Byte
i
i
Definition: README.txt:29
llvm::NVPTXAsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: NVPTXAsmPrinter.cpp:883
NVPTXInstPrinter.h
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:438
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:75
ValueTypes.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
llvm::getReqNTIDx
bool getReqNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:264
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MachineInstr.h
llvm::MachineOperand::MO_Immediate
@ MO_Immediate
Immediate operand.
Definition: MachineOperand.h:52
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::GlobalValue::hasCommonLinkage
bool hasCommonLinkage() const
Definition: GlobalValue.h:521
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
__CLK_ADDRESS_BASE
@ __CLK_ADDRESS_BASE
Definition: cl_common_defines.h:69
llvm::MachineLoopInfo::getLoopFor
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Definition: MachineLoopInfo.h:126
llvm::NVPTXInstPrinter::getRegisterName
static const char * getRegisterName(unsigned RegNo)
llvm::MCOperand::createExpr
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
llvm::MCUnaryExpr::getOpcode
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:468
llvm::AsmPrinter::lowerConstant
virtual const MCExpr * lowerConstant(const Constant *CV)
Lower the specified LLVM Constant to an MCExpr.
Definition: AsmPrinter.cpp:2833
llvm::GlobalValue::hasExternalLinkage
bool hasExternalLinkage() const
Definition: GlobalValue.h:500
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:170
DebugInfoMetadata.h
MCInstrDesc.h
llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition: MachineOperand.h:572
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:76
llvm::Function
Definition: Function.h:60
llvm::Type::VoidTyID
@ VoidTyID
type with no size
Definition: Type.h:63
StringRef.h
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1018
llvm::NVPTXSubtarget::getTargetLowering
const NVPTXTargetLowering * getTargetLowering() const override
Definition: NVPTXSubtarget.h:64
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:628
llvm::isImageReadWrite
bool isImageReadWrite(const Value &val)
Definition: NVPTXUtilities.cpp:210
llvm::NVPTXTargetMachine::is64Bit
bool is64Bit() const
Definition: NVPTXTargetMachine.h:47
NativeFormatting.h
llvm::GlobalObject::getSection
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:111
llvm::AsmPrinter::MAI
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:90
llvm::MCConstantExpr::create
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
llvm::GlobalValue::hasWeakLinkage
bool hasWeakLinkage() const
Definition: GlobalValue.h:511
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
usedInGlobalVarDef
static bool usedInGlobalVarDef(const Constant *C)
Definition: NVPTXAsmPrinter.cpp:616
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1182
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
Path.h
llvm::ADDRESS_SPACE_LOCAL
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
llvm::Value::hasName
bool hasName() const
Definition: Value.h:261
ErrorHandling.h
llvm::NVPTXGenericMCSymbolRefExpr::create
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:54
DiscoverDependentGlobals
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
Definition: NVPTXAsmPrinter.cpp:100
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
usedInOneFunc
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
Definition: NVPTXAsmPrinter.cpp:632
llvm::MCUnaryExpr::Plus
@ Plus
Unary plus.
Definition: MCExpr.h:431
llvm::MCBinaryExpr::createAnd
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:530
llvm::MCBinaryExpr::Add
@ Add
Addition.
Definition: MCExpr.h:484
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
MachineBasicBlock.h
llvm::NVPTXSubtarget::getSmVersion
unsigned int getSmVersion() const
Definition: NVPTXSubtarget.h:81
llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AsmPrinter.cpp:2056
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
llvm::MachineModuleInfo::hasDebugInfo
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition: MachineModuleInfo.h:182
APInt.h
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
llvm::getMinCTASm
bool getMinCTASm(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:276
llvm::DenseMapIterator
Definition: DenseMap.h:57
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AsmPrinter::PrintSymbolOperand
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
Definition: AsmPrinterInlineAsm.cpp:465
DenseMap.h
Module.h
llvm::AttributeList
Definition: Attributes.h:425
llvm::MaybeAlign::valueOrOne
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:142
llvm::NVPTXFloatMCExpr::createConstantFPDouble
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:53
printOperand
static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value)
Definition: SelectionDAGDumper.cpp:958
llvm::isImage
bool isImage(const Value &val)
Definition: NVPTXUtilities.cpp:222
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:298
NVPTXMCAsmInfo.h
llvm::MCAsmInfo::getCodePointerSize
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:548
ret
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp ret(also really horrible code on ppc). This is due to the expand code for 64-bit compares. GCC produces multiple branches
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:79
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:770
llvm::NVPTXII::IsTexFlag
@ IsTexFlag
Definition: NVPTXBaseInfo.h:35
Operator.h
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
NVPTXTargetStreamer.h
llvm::MCBinaryExpr
Binary assembler expressions.
Definition: MCExpr.h:481
caller
int caller(int32 arg1, int32 arg2)
Definition: README.txt:681
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:168
llvm::format_hex_no_prefix
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:199
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MCUnaryExpr
Unary assembler expressions.
Definition: MCExpr.h:425
NVPTXRegisterInfo.h
ConstantFolding.h
llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:25
llvm::isTexture
bool isTexture(const Value &val)
Definition: NVPTXUtilities.cpp:143
llvm::MachineOperand::MO_Register
@ MO_Register
Register operand.
Definition: MachineOperand.h:51
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
NVPTX.h
MachineRegisterInfo.h
llvm::AsmPrinter::EmitToStreamer
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:402
llvm::isSampler
bool isSampler(const Value &val)
Definition: NVPTXUtilities.cpp:165
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::ComputeValueVTs
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:121
NVPTXUtilities.h
MachineValueType.h
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:197
llvm::NVPTXII::IsSuldMask
@ IsSuldMask
Definition: NVPTXBaseInfo.h:36
cl_common_defines.h
NVPTXTargetInfo.h
Instruction.h
llvm::MachineBasicBlock::getSymbol
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
Definition: MachineBasicBlock.cpp:57
CommandLine.h
llvm::NVPTXFloatMCExpr::createConstantFPSingle
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:48
llvm::NVPTXAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
Definition: NVPTXAsmPrinter.cpp:481
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
bb
< i1 > br i1 label label bb bb
Definition: README.txt:978
llvm::getReqNTIDy
bool getReqNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:268
printMCExpr
static void printMCExpr(const MCExpr *E, raw_ostream &OS)
Definition: SystemZAsmParser.cpp:703
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:205
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:207
GlobalValue.h
MachineLoopInfo.h
llvm::ConstantDataSequential
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:570
TargetMachine.h
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:271
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:849
llvm::APInt::lshrInPlace
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:839
llvm::StringRef::startswith
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:256
llvm::GlobalValue::hasAppendingLinkage
bool hasAppendingLinkage() const
Definition: GlobalValue.h:514
Constants.h
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
SmallString.h
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::MachineOperand::MO_GlobalAddress
@ MO_GlobalAddress
Address of a global value.
Definition: MachineOperand.h:61
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:754
llvm::User
Definition: User.h:44
llvm::MCExpr::Target
@ Target
Target specific expression.
Definition: MCExpr.h:42
llvm::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:99
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Twine.h
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::DICompileUnit::LineTablesOnly
@ LineTablesOnly
Definition: DebugInfoMetadata.h:1330
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
MCSymbol.h
llvm::GlobalValue::hasPrivateLinkage
bool hasPrivateLinkage() const
Definition: GlobalValue.h:516
llvm::NVPTXSubtarget::hasMaskOperator
bool hasMaskOperator() const
Definition: NVPTXSubtarget.h:80
llvm::NVPTXSubtarget::getPTXVersion
unsigned getPTXVersion() const
Definition: NVPTXSubtarget.h:84
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:232
MCInst.h
DenseSet.h
llvm::MCBinaryExpr::getRHS
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:631
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::getSamplerName
std::string getSamplerName(const Value &val)
Definition: NVPTXUtilities.cpp:247
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::ConstantArray
ConstantArray - Constant Array Declarations.
Definition: Constants.h:410
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::NVPTXTargetMachine::getManagedStrPool
ManagedStringPool * getManagedStrPool() const
Definition: NVPTXTargetMachine.h:50
llvm::getReqNTIDz
bool getReqNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:272
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::NVPTXSubtarget::getRegisterInfo
const NVPTXRegisterInfo * getRegisterInfo() const override
Definition: NVPTXSubtarget.h:61
llvm::ADDRESS_SPACE_CONST
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
llvm::AsmPrinter::CurrentFnSym
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:121
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1466
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
APFloat.h
llvm::MCSymbolRefExpr::getSymbol
const MCSymbol & getSymbol() const
Definition: MCExpr.h:399
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:185
llvm::StringRef::data
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:577
DebugLoc.h
llvm::promoteScalarArgumentSize
unsigned promoteScalarArgumentSize(unsigned size)
Definition: NVPTXUtilities.h:64
llvm::GlobalValue::hasInternalLinkage
bool hasInternalLinkage() const
Definition: GlobalValue.h:515
llvm::Type::PointerTyID
@ PointerTyID
Pointers.
Definition: Type.h:73
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
__CLK_FILTER_MASK
@ __CLK_FILTER_MASK
Definition: cl_common_defines.h:97
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1130
llvm::MCExpr::getKind
ExprKind getKind() const
Definition: MCExpr.h:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::DICompileUnit::DebugDirectivesOnly
@ DebugDirectivesOnly
Definition: DebugInfoMetadata.h:1331
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MCSymbol::print
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
llvm::MCUnaryExpr::Minus
@ Minus
Unary minus.
Definition: MCExpr.h:429
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition: AsmPrinter.cpp:3649
llvm::MachineLoopInfo::isLoopHeader
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
Definition: MachineLoopInfo.h:141
Type.h
llvm::MCExpr::Binary
@ Binary
Binary expressions.
Definition: MCExpr.h:38
llvm::DICompileUnit::FullDebug
@ FullDebug
Definition: DebugInfoMetadata.h:1329
NVPTXMachineFunctionInfo.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::getNVPTXRegClassStr
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Definition: NVPTXRegisterInfo.cpp:73
llvm::NVPTX::NVCL
@ NVCL
Definition: NVPTX.h:71
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
llvm::SmallString< 128 >
llvm::ManagedStringPool::getManagedString
std::string * getManagedString(const char *S)
Definition: ManagedStringPool.h:39
llvm::getTheNVPTXTarget64
Target & getTheNVPTXTarget64()
Definition: NVPTXTargetInfo.cpp:17
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::MCConstantExpr
Definition: MCExpr.h:144
llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:31
llvm::NVPTXFloatMCExpr::createConstantFPHalf
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:43
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:284
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::NVPTXRegisterInfo::getName
const char * getName(unsigned RegNo) const
Definition: NVPTXRegisterInfo.h:52
BasicBlock.h
llvm::GlobalObject::hasSection
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:103
llvm::APFloat
Definition: APFloat.h:701
llvm::GlobalValue::hasAvailableExternallyLinkage
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:501
LLVMInitializeNVPTXAsmPrinter
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
Definition: NVPTXAsmPrinter.cpp:2231
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::GlobalValue
Definition: GlobalValue.h:44
NVPTXAsmPrinter.h
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
VI
@ VI
Definition: SIInstrInfo.cpp:7877
llvm::NVPTXSubtarget::hasImageHandles
bool hasImageHandles() const
Definition: NVPTXSubtarget.cpp:55
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:771
DEPOTNAME
#define DEPOTNAME
Definition: NVPTXAsmPrinter.cpp:95
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:110
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:644
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::Type::isIntOrPtrTy
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:211
llvm::NVPTXII::IsSuldShift
@ IsSuldShift
Definition: NVPTXBaseInfo.h:37
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:54
llvm::NVPTXAsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: NVPTXAsmPrinter.cpp:777
llvm::DenseMap< unsigned, unsigned >
DebugInfo.h
NVPTXBaseInfo.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
Analysis.h
StringExtras.h
isEmptyXXStructor
static bool isEmptyXXStructor(GlobalVariable *GV)
Definition: NVPTXAsmPrinter.cpp:756
llvm::DICompileUnit
Compile unit.
Definition: DebugInfoMetadata.h:1322
llvm::ADDRESS_SPACE_SHARED
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
useFuncSeen
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
Definition: NVPTXAsmPrinter.cpp:681
llvm::MachineOperand::getType
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Definition: MachineOperand.h:218
printMemOperand
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, const MachineFunction *MF, const Module *M, const MachineFrameInfo *MFI, const TargetInstrInfo *TII, LLVMContext &Ctx)
Definition: SelectionDAGDumper.cpp:523
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
llvm::MachineOperand::getFPImm
const ConstantFP * getFPImm() const
Definition: MachineOperand.h:556
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::NVPTXII::IsSustFlag
@ IsSustFlag
Definition: NVPTXBaseInfo.h:38
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:655
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:890
llvm::NVPTXMachineFunctionInfo::getImageHandleSymbol
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
Definition: NVPTXMachineFunctionInfo.h:50
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:672
__CLK_FILTER_BASE
@ __CLK_FILTER_BASE
Definition: cl_common_defines.h:93
llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:102
llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:94
MachineModuleInfo.h
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::MachineOperand::MO_FPImmediate
@ MO_FPImmediate
Floating-point immediate operand.
Definition: MachineOperand.h:54
__CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_BASE
Definition: cl_common_defines.h:85
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
VisitGlobalVariableForEmission
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
Definition: NVPTXAsmPrinter.cpp:117
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:386
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::MCSymbolRefExpr
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:257
Triple.h
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1571
llvm::AsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:387
llvm::write_hex
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, Optional< size_t > Width=None)
Definition: NativeFormatting.cpp:136
llvm::MCBinaryExpr::createAdd
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:525
llvm::NVPTXAsmPrinter::getVirtualRegisterName
std::string getVirtualRegisterName(unsigned) const
Definition: NVPTXAsmPrinter.cpp:578
llvm::DICompileUnit::NoDebug
@ NoDebug
Definition: DebugInfoMetadata.h:1328
llvm::AsmPrinter::GetExternalSymbolSymbol
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
Definition: AsmPrinter.cpp:3577
llvm::MCUnaryExpr::getSubExpr
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:471
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:173
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
GetSymbolRef
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, HexagonAsmPrinter &Printer, bool MustExtend)
Definition: HexagonMCInstLower.cpp:41
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::MCOperand::createReg
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
TargetLoweringObjectFile.h
A
* A
Definition: README_ALTIVEC.txt:89
llvm::NVPTXII::IsSurfTexQueryFlag
@ IsSurfTexQueryFlag
Definition: NVPTXBaseInfo.h:39
llvm::MachineOperand::MO_MachineBasicBlock
@ MO_MachineBasicBlock
MachineBasicBlock reference.
Definition: MachineOperand.h:55
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2035
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::HexPrintStyle::PrefixUpper
@ PrefixUpper
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition: MachineFrameInfo.h:593
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::AsmPrinter::MMI
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:105
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::Type::IntegerTyID
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Attributes.h
canDemoteGlobalVar
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
Definition: NVPTXAsmPrinter.cpp:663
llvm::NVPTXTargetStreamer
Implments NVPTX-specific streamer.
Definition: NVPTXTargetStreamer.h:18
name
static const char * name
Definition: SVEIntrinsicOpts.cpp:74
j
return j(j<< 16)
Constant.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::MCBinaryExpr::getOpcode
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:625
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::getMaxNTIDz
bool getMaxNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:260
llvm::NVPTX::CUDA
@ CUDA
Definition: NVPTX.h:72
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:652
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::AsmPrinter::getSubtargetInfo
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:397
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::Type::FixedVectorTyID
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
llvm::MachineOperand::MO_ExternalSymbol
@ MO_ExternalSymbol
Name of external global symbol.
Definition: MachineOperand.h:60
GlobalVariable.h
llvm::isSurface
bool isSurface(const Value &val)
Definition: NVPTXUtilities.cpp:154
llvm::MCUnaryExpr::LNot
@ LNot
Logical negation.
Definition: MCExpr.h:428
Casting.h
llvm::NVPTXTargetMachine::getSubtargetImpl
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition: NVPTXTargetMachine.h:43
NVPTXTargetMachine.h
Function.h
__CLK_ADDRESS_MASK
@ __CLK_ADDRESS_MASK
Definition: cl_common_defines.h:79
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:99
llvm::NVPTXII::IsTexModeUnifiedFlag
@ IsTexModeUnifiedFlag
Definition: NVPTXBaseInfo.h:40
llvm::APInt::getLoBits
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:605
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:414
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:386
llvm::TargetRegisterInfo::getNumRegClasses
unsigned getNumRegClasses() const
Definition: TargetRegisterInfo.h:765
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::getAlign
bool getAlign(const Function &F, unsigned index, unsigned &align)
Definition: NVPTXUtilities.cpp:294
llvm::getSurfaceName
std::string getSurfaceName(const Value &val)
Definition: NVPTXUtilities.cpp:242
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:83
llvm::getMaxNTIDy
bool getMaxNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:256
llvm::isImageWriteOnly
bool isImageWriteOnly(const Value &val)
Definition: NVPTXUtilities.cpp:198
instr
@ instr
Definition: HWAddressSanitizer.cpp:192
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:74
llvm::LegalityPredicates::isVector
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Definition: LegalityPredicates.cpp:73
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
llvm::clearAnnotationCache
void clearAnnotationCache(const Module *Mod)
Definition: NVPTXUtilities.cpp:46
llvm::MCExpr::print
void print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens=false) const
Definition: MCExpr.cpp:41
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4843
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:87
llvm::support::endian::read64le
uint64_t read64le(const void *P)
Definition: Endian.h:382
SmallVector.h
llvm::NVPTXSubtarget::getTargetName
std::string getTargetName() const
Definition: NVPTXSubtarget.h:82
llvm::support::endian::read32le
uint32_t read32le(const void *P)
Definition: Endian.h:381
User.h
llvm::MCExpr::SymbolRef
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
__CLK_NORMALIZED_MASK
@ __CLK_NORMALIZED_MASK
Definition: cl_common_defines.h:88
llvm::MachineOperand::getSymbolName
const char * getSymbolName() const
Definition: MachineOperand.h:617
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:164
llvm::MCExpr::Unary
@ Unary
Unary expressions.
Definition: MCExpr.h:41
MCStreamer.h
UnrollLoop.h
llvm::MCExpr::Constant
@ Constant
Constant expressions.
Definition: MCExpr.h:39
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:658
NVPTXSubtarget.h
llvm::NVPTXMachineFunctionInfo
Definition: NVPTXMachineFunctionInfo.h:20
llvm::AsmPrinter::getDataLayout
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:387
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::GlobalValue::hasLinkOnceLinkage
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:504
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::ConstantFoldConstant
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Definition: ConstantFolding.cpp:1207
MachineOperand.h
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:284
DerivedTypes.h
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:286
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
llvm::NVPTXAsmPrinter::getFunctionFrameSymbol
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
Definition: NVPTXAsmPrinter.cpp:503
llvm::AsmPrinter::emitInitialRawDwarfLocDirective
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:406
llvm::MCSymbolRefExpr::VK_None
@ VK_None
Definition: MCExpr.h:195
llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:262
LLVMContext.h
llvm::NVPTXTargetMachine::getDrvInterface
NVPTX::DrvInterface getDrvInterface() const
Definition: NVPTXTargetMachine.h:49
llvm::RegisterAsmPrinter
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...
Definition: TargetRegistry.h:1381
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::getMaxNTIDx
bool getMaxNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:252
llvm::getTextureName
std::string getTextureName(const Value &val)
Definition: NVPTXUtilities.cpp:237
raw_ostream.h
llvm::getMaxNReg
bool getMaxNReg(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:280
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
MachineFunction.h
Endian.h
llvm::getNVPTXRegClassName
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
Definition: NVPTXRegisterInfo.cpp:29
TargetRegistry.h
MCExpr.h
llvm::isManaged
bool isManaged(const Value &val)
Definition: NVPTXUtilities.cpp:226
CU
Definition: AArch64AsmBackend.cpp:504
llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:379
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
TargetRegisterInfo.h
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
NVPTXMCExpr.h
llvm::Type::HalfTyID
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AsmPrinterInlineAsm.cpp:475
llvm::MCUnaryExpr::Not
@ Not
Bitwise negation.
Definition: MCExpr.h:430
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::MCBinaryExpr::getLHS
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:628
llvm::AsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:431
llvm::getTheNVPTXTarget32
Target & getTheNVPTXTarget32()
Definition: NVPTXTargetInfo.cpp:13