LLVM  13.0.0git
OMPIRBuilder.cpp
Go to the documentation of this file.
1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/IR/CFG.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/MDBuilder.h"
24 #include "llvm/Support/Error.h"
27 
28 #include <sstream>
29 
30 #define DEBUG_TYPE "openmp-ir-builder"
31 
32 using namespace llvm;
33 using namespace omp;
34 
35 static cl::opt<bool>
36  OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
37  cl::desc("Use optimistic attributes describing "
38  "'as-if' properties of runtime calls."),
39  cl::init(false));
40 
42  LLVMContext &Ctx = Fn.getContext();
43 
44 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
45 #include "llvm/Frontend/OpenMP/OMPKinds.def"
46 
47  // Add attributes to the new declaration.
48  switch (FnID) {
49 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
50  case Enum: \
51  Fn.setAttributes( \
52  AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets)); \
53  break;
54 #include "llvm/Frontend/OpenMP/OMPKinds.def"
55  default:
56  // Attributes are optional.
57  break;
58  }
59 }
60 
63  FunctionType *FnTy = nullptr;
64  Function *Fn = nullptr;
65 
66  // Try to find the declation in the module first.
67  switch (FnID) {
68 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
69  case Enum: \
70  FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
71  IsVarArg); \
72  Fn = M.getFunction(Str); \
73  break;
74 #include "llvm/Frontend/OpenMP/OMPKinds.def"
75  }
76 
77  if (!Fn) {
78  // Create a new declaration if we need one.
79  switch (FnID) {
80 #define OMP_RTL(Enum, Str, ...) \
81  case Enum: \
82  Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
83  break;
84 #include "llvm/Frontend/OpenMP/OMPKinds.def"
85  }
86 
87  // Add information if the runtime function takes a callback function
88  if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
89  if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
90  LLVMContext &Ctx = Fn->getContext();
91  MDBuilder MDB(Ctx);
92  // Annotate the callback behavior of the runtime function:
93  // - The callback callee is argument number 2 (microtask).
94  // - The first two arguments of the callback callee are unknown (-1).
95  // - All variadic arguments to the runtime function are passed to the
96  // callback callee.
97  Fn->addMetadata(
98  LLVMContext::MD_callback,
100  2, {-1, -1}, /* VarArgsArePassed */ true)}));
101  }
102  }
103 
104  LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
105  << " with type " << *Fn->getFunctionType() << "\n");
106  addAttributes(FnID, *Fn);
107 
108  } else {
109  LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
110  << " with type " << *Fn->getFunctionType() << "\n");
111  }
112 
113  assert(Fn && "Failed to create OpenMP runtime function");
114 
115  // Cast the function to the expected type if necessary
117  return {FnTy, C};
118 }
119 
121  FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
122  auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
123  assert(Fn && "Failed to create OpenMP runtime function pointer");
124  return Fn;
125 }
126 
127 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
128 
129 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
130  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
132  SmallVector<OutlineInfo, 16> DeferredOutlines;
133  for (OutlineInfo &OI : OutlineInfos) {
134  // Skip functions that have not finalized yet; may happen with nested
135  // function generation.
136  if (Fn && OI.getFunction() != Fn) {
137  DeferredOutlines.push_back(OI);
138  continue;
139  }
140 
141  ParallelRegionBlockSet.clear();
142  Blocks.clear();
143  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
144 
145  Function *OuterFn = OI.getFunction();
146  CodeExtractorAnalysisCache CEAC(*OuterFn);
147  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
148  /* AggregateArgs */ false,
149  /* BlockFrequencyInfo */ nullptr,
150  /* BranchProbabilityInfo */ nullptr,
151  /* AssumptionCache */ nullptr,
152  /* AllowVarArgs */ true,
153  /* AllowAlloca */ true,
154  /* Suffix */ ".omp_par");
155 
156  LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
157  LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
158  << " Exit: " << OI.ExitBB->getName() << "\n");
159  assert(Extractor.isEligible() &&
160  "Expected OpenMP outlining to be possible!");
161 
162  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
163 
164  LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
165  LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
166  assert(OutlinedFn->getReturnType()->isVoidTy() &&
167  "OpenMP outlined functions should not return a value!");
168 
169  // For compability with the clang CG we move the outlined function after the
170  // one with the parallel region.
171  OutlinedFn->removeFromParent();
172  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
173 
174  // Remove the artificial entry introduced by the extractor right away, we
175  // made our own entry block after all.
176  {
177  BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
178  assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
179  assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
180  if (AllowExtractorSinking) {
181  // Move instructions from the to-be-deleted ArtificialEntry to the entry
182  // basic block of the parallel region. CodeExtractor may have sunk
183  // allocas/bitcasts for values that are solely used in the outlined
184  // region and do not escape.
185  assert(!ArtificialEntry.empty() &&
186  "Expected instructions to sink in the outlined region");
187  for (BasicBlock::iterator It = ArtificialEntry.begin(),
188  End = ArtificialEntry.end();
189  It != End;) {
190  Instruction &I = *It;
191  It++;
192 
193  if (I.isTerminator())
194  continue;
195 
196  I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
197  }
198  }
199  OI.EntryBB->moveBefore(&ArtificialEntry);
200  ArtificialEntry.eraseFromParent();
201  }
202  assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
203  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
204 
205  // Run a user callback, e.g. to add attributes.
206  if (OI.PostOutlineCB)
207  OI.PostOutlineCB(*OutlinedFn);
208  }
209 
210  // Remove work items that have been completed.
211  OutlineInfos = std::move(DeferredOutlines);
212 }
213 
215  assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
216 }
217 
219  IdentFlag LocFlags,
220  unsigned Reserve2Flags) {
221  // Enable "C-mode".
222  LocFlags |= OMP_IDENT_FLAG_KMPC;
223 
224  Value *&Ident =
225  IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
226  if (!Ident) {
227  Constant *I32Null = ConstantInt::getNullValue(Int32);
228  Constant *IdentData[] = {
229  I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
230  ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
231  Constant *Initializer = ConstantStruct::get(
232  cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
233 
234  // Look for existing encoding of the location + flags, not needed but
235  // minimizes the difference to the existing solution while we transition.
236  for (GlobalVariable &GV : M.getGlobalList())
237  if (GV.getType() == IdentPtr && GV.hasInitializer())
238  if (GV.getInitializer() == Initializer)
239  return Ident = &GV;
240 
241  auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(),
242  /* isConstant = */ true,
243  GlobalValue::PrivateLinkage, Initializer);
244  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
245  GV->setAlignment(Align(8));
246  Ident = GV;
247  }
248  return Builder.CreatePointerCast(Ident, IdentPtr);
249 }
250 
252  LLVMContext &Ctx = M.getContext();
253  Triple triple(M.getTargetTriple());
254 
255  // This test is adequate until deviceRTL has finer grained lane widths
256  return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx);
257 }
258 
260  Constant *&SrcLocStr = SrcLocStrMap[LocStr];
261  if (!SrcLocStr) {
262  Constant *Initializer =
263  ConstantDataArray::getString(M.getContext(), LocStr);
264 
265  // Look for existing encoding of the location, not needed but minimizes the
266  // difference to the existing solution while we transition.
267  for (GlobalVariable &GV : M.getGlobalList())
268  if (GV.isConstant() && GV.hasInitializer() &&
269  GV.getInitializer() == Initializer)
270  return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
271 
272  SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
273  /* AddressSpace */ 0, &M);
274  }
275  return SrcLocStr;
276 }
277 
279  StringRef FileName,
280  unsigned Line,
281  unsigned Column) {
282  SmallString<128> Buffer;
283  Buffer.push_back(';');
284  Buffer.append(FileName);
285  Buffer.push_back(';');
286  Buffer.append(FunctionName);
287  Buffer.push_back(';');
288  Buffer.append(std::to_string(Line));
289  Buffer.push_back(';');
290  Buffer.append(std::to_string(Column));
291  Buffer.push_back(';');
292  Buffer.push_back(';');
293  return getOrCreateSrcLocStr(Buffer.str());
294 }
295 
297  return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
298 }
299 
300 Constant *
302  DILocation *DIL = Loc.DL.get();
303  if (!DIL)
304  return getOrCreateDefaultSrcLocStr();
305  StringRef FileName = M.getName();
306  if (DIFile *DIF = DIL->getFile())
307  if (Optional<StringRef> Source = DIF->getSource())
308  FileName = *Source;
309  StringRef Function = DIL->getScope()->getSubprogram()->getName();
310  Function =
311  !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
312  return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
313  DIL->getColumn());
314 }
315 
317  return Builder.CreateCall(
318  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
319  "omp_global_thread_num");
320 }
321 
324  bool ForceSimpleCall, bool CheckCancelFlag) {
325  if (!updateToLocation(Loc))
326  return Loc.IP;
327  return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
328 }
329 
332  bool ForceSimpleCall, bool CheckCancelFlag) {
333  // Build call __kmpc_cancel_barrier(loc, thread_id) or
334  // __kmpc_barrier(loc, thread_id);
335 
336  IdentFlag BarrierLocFlags;
337  switch (Kind) {
338  case OMPD_for:
339  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
340  break;
341  case OMPD_sections:
342  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
343  break;
344  case OMPD_single:
345  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
346  break;
347  case OMPD_barrier:
348  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
349  break;
350  default:
351  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
352  break;
353  }
354 
355  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
356  Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
357  getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
358 
359  // If we are in a cancellable parallel region, barriers are cancellation
360  // points.
361  // TODO: Check why we would force simple calls or to ignore the cancel flag.
362  bool UseCancelBarrier =
363  !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
364 
365  Value *Result =
366  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
367  UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
368  : OMPRTL___kmpc_barrier),
369  Args);
370 
371  if (UseCancelBarrier && CheckCancelFlag)
372  emitCancelationCheckImpl(Result, OMPD_parallel);
373 
374  return Builder.saveIP();
375 }
376 
379  Value *IfCondition,
380  omp::Directive CanceledDirective) {
381  if (!updateToLocation(Loc))
382  return Loc.IP;
383 
384  // LLVM utilities like blocks with terminators.
385  auto *UI = Builder.CreateUnreachable();
386 
387  Instruction *ThenTI = UI, *ElseTI = nullptr;
388  if (IfCondition)
389  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
390  Builder.SetInsertPoint(ThenTI);
391 
392  Value *CancelKind = nullptr;
393  switch (CanceledDirective) {
394 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
395  case DirectiveEnum: \
396  CancelKind = Builder.getInt32(Value); \
397  break;
398 #include "llvm/Frontend/OpenMP/OMPKinds.def"
399  default:
400  llvm_unreachable("Unknown cancel kind!");
401  }
402 
403  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
404  Value *Ident = getOrCreateIdent(SrcLocStr);
405  Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
406  Value *Result = Builder.CreateCall(
407  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
408 
409  // The actual cancel logic is shared with others, e.g., cancel_barriers.
410  emitCancelationCheckImpl(Result, CanceledDirective);
411 
412  // Update the insertion point and remove the terminator we introduced.
413  Builder.SetInsertPoint(UI->getParent());
414  UI->eraseFromParent();
415 
416  return Builder.saveIP();
417 }
418 
420  Value *CancelFlag, omp::Directive CanceledDirective) {
421  assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
422  "Unexpected cancellation!");
423 
424  // For a cancel barrier we create two new blocks.
425  BasicBlock *BB = Builder.GetInsertBlock();
426  BasicBlock *NonCancellationBlock;
427  if (Builder.GetInsertPoint() == BB->end()) {
428  // TODO: This branch will not be needed once we moved to the
429  // OpenMPIRBuilder codegen completely.
430  NonCancellationBlock = BasicBlock::Create(
431  BB->getContext(), BB->getName() + ".cont", BB->getParent());
432  } else {
433  NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
434  BB->getTerminator()->eraseFromParent();
435  Builder.SetInsertPoint(BB);
436  }
437  BasicBlock *CancellationBlock = BasicBlock::Create(
438  BB->getContext(), BB->getName() + ".cncl", BB->getParent());
439 
440  // Jump to them based on the return value.
441  Value *Cmp = Builder.CreateIsNull(CancelFlag);
442  Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
443  /* TODO weight */ nullptr, nullptr);
444 
445  // From the cancellation block we finalize all variables and go to the
446  // post finalization block that is known to the FiniCB callback.
447  Builder.SetInsertPoint(CancellationBlock);
448  auto &FI = FinalizationStack.back();
449  FI.FiniCB(Builder.saveIP());
450 
451  // The continuation block is where code generation continues.
452  Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
453 }
454 
456  const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
457  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
458  FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
459  omp::ProcBindKind ProcBind, bool IsCancellable) {
460  if (!updateToLocation(Loc))
461  return Loc.IP;
462 
463  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
464  Value *Ident = getOrCreateIdent(SrcLocStr);
465  Value *ThreadID = getOrCreateThreadID(Ident);
466 
467  if (NumThreads) {
468  // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
469  Value *Args[] = {
470  Ident, ThreadID,
471  Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
472  Builder.CreateCall(
473  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
474  }
475 
476  if (ProcBind != OMP_PROC_BIND_default) {
477  // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
478  Value *Args[] = {
479  Ident, ThreadID,
480  ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
481  Builder.CreateCall(
482  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
483  }
484 
485  BasicBlock *InsertBB = Builder.GetInsertBlock();
486  Function *OuterFn = InsertBB->getParent();
487 
488  // Save the outer alloca block because the insertion iterator may get
489  // invalidated and we still need this later.
490  BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
491 
492  // Vector to remember instructions we used only during the modeling but which
493  // we want to delete at the end.
494  SmallVector<Instruction *, 4> ToBeDeleted;
495 
496  // Change the location to the outer alloca insertion point to create and
497  // initialize the allocas we pass into the parallel region.
498  Builder.restoreIP(OuterAllocaIP);
499  AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
500  AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
501 
502  // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
503  // program, otherwise we only need them for modeling purposes to get the
504  // associated arguments in the outlined function. In the former case,
505  // initialize the allocas properly, in the latter case, delete them later.
506  if (IfCondition) {
507  Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
508  Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
509  } else {
510  ToBeDeleted.push_back(TIDAddr);
511  ToBeDeleted.push_back(ZeroAddr);
512  }
513 
514  // Create an artificial insertion point that will also ensure the blocks we
515  // are about to split are not degenerated.
516  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
517 
518  Instruction *ThenTI = UI, *ElseTI = nullptr;
519  if (IfCondition)
520  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
521 
522  BasicBlock *ThenBB = ThenTI->getParent();
523  BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
524  BasicBlock *PRegBodyBB =
525  PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
526  BasicBlock *PRegPreFiniBB =
527  PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
528  BasicBlock *PRegExitBB =
529  PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
530 
531  auto FiniCBWrapper = [&](InsertPointTy IP) {
532  // Hide "open-ended" blocks from the given FiniCB by setting the right jump
533  // target to the region exit block.
534  if (IP.getBlock()->end() == IP.getPoint()) {
536  Builder.restoreIP(IP);
537  Instruction *I = Builder.CreateBr(PRegExitBB);
538  IP = InsertPointTy(I->getParent(), I->getIterator());
539  }
540  assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
541  IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
542  "Unexpected insertion point for finalization call!");
543  return FiniCB(IP);
544  };
545 
546  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
547 
548  // Generate the privatization allocas in the block that will become the entry
549  // of the outlined function.
550  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
551  InsertPointTy InnerAllocaIP = Builder.saveIP();
552 
553  AllocaInst *PrivTIDAddr =
554  Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
555  Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
556 
557  // Add some fake uses for OpenMP provided arguments.
558  ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
559  Instruction *ZeroAddrUse = Builder.CreateLoad(ZeroAddr, "zero.addr.use");
560  ToBeDeleted.push_back(ZeroAddrUse);
561 
562  // ThenBB
563  // |
564  // V
565  // PRegionEntryBB <- Privatization allocas are placed here.
566  // |
567  // V
568  // PRegionBodyBB <- BodeGen is invoked here.
569  // |
570  // V
571  // PRegPreFiniBB <- The block we will start finalization from.
572  // |
573  // V
574  // PRegionExitBB <- A common exit to simplify block collection.
575  //
576 
577  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
578 
579  // Let the caller create the body.
580  assert(BodyGenCB && "Expected body generation callback!");
581  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
582  BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
583 
584  LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
585 
586  FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
587  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
588  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
589  llvm::LLVMContext &Ctx = F->getContext();
590  MDBuilder MDB(Ctx);
591  // Annotate the callback behavior of the __kmpc_fork_call:
592  // - The callback callee is argument number 2 (microtask).
593  // - The first two arguments of the callback callee are unknown (-1).
594  // - All variadic arguments to the __kmpc_fork_call are passed to the
595  // callback callee.
596  F->addMetadata(
597  llvm::LLVMContext::MD_callback,
599  Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
600  /* VarArgsArePassed */ true)}));
601  }
602  }
603 
604  OutlineInfo OI;
605  OI.PostOutlineCB = [=](Function &OutlinedFn) {
606  // Add some known attributes.
607  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
608  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
609  OutlinedFn.addFnAttr(Attribute::NoUnwind);
610  OutlinedFn.addFnAttr(Attribute::NoRecurse);
611 
612  assert(OutlinedFn.arg_size() >= 2 &&
613  "Expected at least tid and bounded tid as arguments");
614  unsigned NumCapturedVars =
615  OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
616 
617  CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
618  CI->getParent()->setName("omp_parallel");
619  Builder.SetInsertPoint(CI);
620 
621  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
622  Value *ForkCallArgs[] = {
623  Ident, Builder.getInt32(NumCapturedVars),
624  Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
625 
626  SmallVector<Value *, 16> RealArgs;
627  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
628  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
629 
630  Builder.CreateCall(RTLFn, RealArgs);
631 
632  LLVM_DEBUG(dbgs() << "With fork_call placed: "
633  << *Builder.GetInsertBlock()->getParent() << "\n");
634 
635  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
636 
637  // Initialize the local TID stack location with the argument value.
638  Builder.SetInsertPoint(PrivTID);
639  Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
640  Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
641 
642  // If no "if" clause was present we do not need the call created during
643  // outlining, otherwise we reuse it in the serialized parallel region.
644  if (!ElseTI) {
645  CI->eraseFromParent();
646  } else {
647 
648  // If an "if" clause was present we are now generating the serialized
649  // version into the "else" branch.
650  Builder.SetInsertPoint(ElseTI);
651 
652  // Build calls __kmpc_serialized_parallel(&Ident, GTid);
653  Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
654  Builder.CreateCall(
655  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
656  SerializedParallelCallArgs);
657 
658  // OutlinedFn(&GTid, &zero, CapturedStruct);
659  CI->removeFromParent();
660  Builder.Insert(CI);
661 
662  // __kmpc_end_serialized_parallel(&Ident, GTid);
663  Value *EndArgs[] = {Ident, ThreadID};
664  Builder.CreateCall(
665  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
666  EndArgs);
667 
668  LLVM_DEBUG(dbgs() << "With serialized parallel region: "
669  << *Builder.GetInsertBlock()->getParent() << "\n");
670  }
671 
672  for (Instruction *I : ToBeDeleted)
673  I->eraseFromParent();
674  };
675 
676  // Adjust the finalization stack, verify the adjustment, and call the
677  // finalize function a last time to finalize values between the pre-fini
678  // block and the exit block if we left the parallel "the normal way".
679  auto FiniInfo = FinalizationStack.pop_back_val();
680  (void)FiniInfo;
681  assert(FiniInfo.DK == OMPD_parallel &&
682  "Unexpected finalization stack state!");
683 
684  Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
685 
686  InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
687  FiniCB(PreFiniIP);
688 
689  OI.EntryBB = PRegEntryBB;
690  OI.ExitBB = PRegExitBB;
691 
692  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
694  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
695 
696  // Ensure a single exit node for the outlined region by creating one.
697  // We might have multiple incoming edges to the exit now due to finalizations,
698  // e.g., cancel calls that cause the control flow to leave the region.
699  BasicBlock *PRegOutlinedExitBB = PRegExitBB;
700  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
701  PRegOutlinedExitBB->setName("omp.par.outlined.exit");
702  Blocks.push_back(PRegOutlinedExitBB);
703 
704  CodeExtractorAnalysisCache CEAC(*OuterFn);
705  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
706  /* AggregateArgs */ false,
707  /* BlockFrequencyInfo */ nullptr,
708  /* BranchProbabilityInfo */ nullptr,
709  /* AssumptionCache */ nullptr,
710  /* AllowVarArgs */ true,
711  /* AllowAlloca */ true,
712  /* Suffix */ ".omp_par");
713 
714  // Find inputs to, outputs from the code region.
715  BasicBlock *CommonExit = nullptr;
716  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
717  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
718  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
719 
720  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
721 
722  FunctionCallee TIDRTLFn =
723  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
724 
725  auto PrivHelper = [&](Value &V) {
726  if (&V == TIDAddr || &V == ZeroAddr)
727  return;
728 
730  for (Use &U : V.uses())
731  if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
732  if (ParallelRegionBlockSet.count(UserI->getParent()))
733  Uses.insert(&U);
734 
735  // __kmpc_fork_call expects extra arguments as pointers. If the input
736  // already has a pointer type, everything is fine. Otherwise, store the
737  // value onto stack and load it back inside the to-be-outlined region. This
738  // will ensure only the pointer will be passed to the function.
739  // FIXME: if there are more than 15 trailing arguments, they must be
740  // additionally packed in a struct.
741  Value *Inner = &V;
742  if (!V.getType()->isPointerTy()) {
744  LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
745 
746  Builder.restoreIP(OuterAllocaIP);
747  Value *Ptr =
748  Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
749 
750  // Store to stack at end of the block that currently branches to the entry
751  // block of the to-be-outlined region.
752  Builder.SetInsertPoint(InsertBB,
753  InsertBB->getTerminator()->getIterator());
754  Builder.CreateStore(&V, Ptr);
755 
756  // Load back next to allocations in the to-be-outlined region.
757  Builder.restoreIP(InnerAllocaIP);
758  Inner = Builder.CreateLoad(Ptr);
759  }
760 
761  Value *ReplacementValue = nullptr;
762  CallInst *CI = dyn_cast<CallInst>(&V);
763  if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
764  ReplacementValue = PrivTID;
765  } else {
766  Builder.restoreIP(
767  PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
768  assert(ReplacementValue &&
769  "Expected copy/create callback to set replacement value!");
770  if (ReplacementValue == &V)
771  return;
772  }
773 
774  for (Use *UPtr : Uses)
775  UPtr->set(ReplacementValue);
776  };
777 
778  // Reset the inner alloca insertion as it will be used for loading the values
779  // wrapped into pointers before passing them into the to-be-outlined region.
780  // Configure it to insert immediately after the fake use of zero address so
781  // that they are available in the generated body and so that the
782  // OpenMP-related values (thread ID and zero address pointers) remain leading
783  // in the argument list.
784  InnerAllocaIP = IRBuilder<>::InsertPoint(
785  ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
786 
787  // Reset the outer alloca insertion point to the entry of the relevant block
788  // in case it was invalidated.
789  OuterAllocaIP = IRBuilder<>::InsertPoint(
790  OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
791 
792  for (Value *Input : Inputs) {
793  LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
794  PrivHelper(*Input);
795  }
796  LLVM_DEBUG({
797  for (Value *Output : Outputs)
798  LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
799  });
800  assert(Outputs.empty() &&
801  "OpenMP outlining should not produce live-out values!");
802 
803  LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
804  LLVM_DEBUG({
805  for (auto *BB : Blocks)
806  dbgs() << " PBR: " << BB->getName() << "\n";
807  });
808 
809  // Register the outlined info.
810  addOutlineInfo(std::move(OI));
811 
812  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
813  UI->eraseFromParent();
814 
815  return AfterIP;
816 }
817 
819  // Build call void __kmpc_flush(ident_t *loc)
820  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
821  Value *Args[] = {getOrCreateIdent(SrcLocStr)};
822 
823  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
824 }
825 
827  if (!updateToLocation(Loc))
828  return;
829  emitFlush(Loc);
830 }
831 
833  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
834  // global_tid);
835  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
836  Value *Ident = getOrCreateIdent(SrcLocStr);
837  Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
838 
839  // Ignore return result until untied tasks are supported.
840  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
841  Args);
842 }
843 
845  if (!updateToLocation(Loc))
846  return;
847  emitTaskwaitImpl(Loc);
848 }
849 
851  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
852  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
853  Value *Ident = getOrCreateIdent(SrcLocStr);
854  Constant *I32Null = ConstantInt::getNullValue(Int32);
855  Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
856 
857  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
858  Args);
859 }
860 
862  if (!updateToLocation(Loc))
863  return;
864  emitTaskyieldImpl(Loc);
865 }
866 
869  BodyGenCallbackTy BodyGenCB,
870  FinalizeCallbackTy FiniCB) {
871 
872  if (!updateToLocation(Loc))
873  return Loc.IP;
874 
875  Directive OMPD = Directive::OMPD_master;
876  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
877  Value *Ident = getOrCreateIdent(SrcLocStr);
878  Value *ThreadId = getOrCreateThreadID(Ident);
879  Value *Args[] = {Ident, ThreadId};
880 
881  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
882  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
883 
884  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
885  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
886 
887  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
888  /*Conditional*/ true, /*hasFinalize*/ true);
889 }
890 
891 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
892  DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
893  BasicBlock *PostInsertBefore, const Twine &Name) {
894  Module *M = F->getParent();
895  LLVMContext &Ctx = M->getContext();
896  Type *IndVarTy = TripCount->getType();
897 
898  // Create the basic block structure.
899  BasicBlock *Preheader =
900  BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
901  BasicBlock *Header =
902  BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
903  BasicBlock *Cond =
904  BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
905  BasicBlock *Body =
906  BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
907  BasicBlock *Latch =
908  BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
909  BasicBlock *Exit =
910  BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
911  BasicBlock *After =
912  BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
913 
914  // Use specified DebugLoc for new instructions.
915  Builder.SetCurrentDebugLocation(DL);
916 
917  Builder.SetInsertPoint(Preheader);
918  Builder.CreateBr(Header);
919 
920  Builder.SetInsertPoint(Header);
921  PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
922  IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
923  Builder.CreateBr(Cond);
924 
925  Builder.SetInsertPoint(Cond);
926  Value *Cmp =
927  Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
928  Builder.CreateCondBr(Cmp, Body, Exit);
929 
930  Builder.SetInsertPoint(Body);
931  Builder.CreateBr(Latch);
932 
933  Builder.SetInsertPoint(Latch);
934  Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
935  "omp_" + Name + ".next", /*HasNUW=*/true);
936  Builder.CreateBr(Header);
937  IndVarPHI->addIncoming(Next, Latch);
938 
939  Builder.SetInsertPoint(Exit);
940  Builder.CreateBr(After);
941 
942  // Remember and return the canonical control flow.
943  LoopInfos.emplace_front();
944  CanonicalLoopInfo *CL = &LoopInfos.front();
945 
946  CL->Preheader = Preheader;
947  CL->Header = Header;
948  CL->Cond = Cond;
949  CL->Body = Body;
950  CL->Latch = Latch;
951  CL->Exit = Exit;
952  CL->After = After;
953 
954  CL->IsValid = true;
955 
956 #ifndef NDEBUG
957  CL->assertOK();
958 #endif
959  return CL;
960 }
961 
964  LoopBodyGenCallbackTy BodyGenCB,
965  Value *TripCount, const Twine &Name) {
966  BasicBlock *BB = Loc.IP.getBlock();
967  BasicBlock *NextBB = BB->getNextNode();
968 
969  CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
970  NextBB, NextBB, Name);
971  BasicBlock *After = CL->getAfter();
972 
973  // If location is not set, don't connect the loop.
974  if (updateToLocation(Loc)) {
975  // Split the loop at the insertion point: Branch to the preheader and move
976  // every following instruction to after the loop (the After BB). Also, the
977  // new successor is the loop's after block.
978  Builder.CreateBr(CL->Preheader);
979  After->getInstList().splice(After->begin(), BB->getInstList(),
980  Builder.GetInsertPoint(), BB->end());
981  After->replaceSuccessorsPhiUsesWith(BB, After);
982  }
983 
984  // Emit the body content. We do it after connecting the loop to the CFG to
985  // avoid that the callback encounters degenerate BBs.
986  BodyGenCB(CL->getBodyIP(), CL->getIndVar());
987 
988 #ifndef NDEBUG
989  CL->assertOK();
990 #endif
991  return CL;
992 }
993 
995  const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
996  Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
997  InsertPointTy ComputeIP, const Twine &Name) {
998 
999  // Consider the following difficulties (assuming 8-bit signed integers):
1000  // * Adding \p Step to the loop counter which passes \p Stop may overflow:
1001  // DO I = 1, 100, 50
1002  /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
1003  // DO I = 100, 0, -128
1004 
1005  // Start, Stop and Step must be of the same integer type.
1006  auto *IndVarTy = cast<IntegerType>(Start->getType());
1007  assert(IndVarTy == Stop->getType() && "Stop type mismatch");
1008  assert(IndVarTy == Step->getType() && "Step type mismatch");
1009 
1010  LocationDescription ComputeLoc =
1011  ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
1012  updateToLocation(ComputeLoc);
1013 
1014  ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
1015  ConstantInt *One = ConstantInt::get(IndVarTy, 1);
1016 
1017  // Like Step, but always positive.
1018  Value *Incr = Step;
1019 
1020  // Distance between Start and Stop; always positive.
1021  Value *Span;
1022 
1023  // Condition whether there are no iterations are executed at all, e.g. because
1024  // UB < LB.
1025  Value *ZeroCmp;
1026 
1027  if (IsSigned) {
1028  // Ensure that increment is positive. If not, negate and invert LB and UB.
1029  Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
1030  Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
1031  Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
1032  Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
1033  Span = Builder.CreateSub(UB, LB, "", false, true);
1034  ZeroCmp = Builder.CreateICmp(
1035  InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
1036  } else {
1037  Span = Builder.CreateSub(Stop, Start, "", true);
1038  ZeroCmp = Builder.CreateICmp(
1039  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
1040  }
1041 
1042  Value *CountIfLooping;
1043  if (InclusiveStop) {
1044  CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
1045  } else {
1046  // Avoid incrementing past stop since it could overflow.
1047  Value *CountIfTwo = Builder.CreateAdd(
1048  Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
1049  Value *OneCmp = Builder.CreateICmp(
1050  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
1051  CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
1052  }
1053  Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
1054  "omp_" + Name + ".tripcount");
1055 
1056  auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
1057  Builder.restoreIP(CodeGenIP);
1058  Value *Span = Builder.CreateMul(IV, Step);
1059  Value *IndVar = Builder.CreateAdd(Span, Start);
1060  BodyGenCB(Builder.saveIP(), IndVar);
1061  };
1062  LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
1063  return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
1064 }
1065 
1066 // Returns an LLVM function to call for initializing loop bounds using OpenMP
1067 // static scheduling depending on `type`. Only i32 and i64 are supported by the
1068 // runtime. Always interpret integers as unsigned similarly to
1069 // CanonicalLoopInfo.
1071  OpenMPIRBuilder &OMPBuilder) {
1072  unsigned Bitwidth = Ty->getIntegerBitWidth();
1073  if (Bitwidth == 32)
1074  return OMPBuilder.getOrCreateRuntimeFunction(
1075  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1076  if (Bitwidth == 64)
1077  return OMPBuilder.getOrCreateRuntimeFunction(
1078  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1079  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1080 }
1081 
1082 // Sets the number of loop iterations to the given value. This value must be
1083 // valid in the condition block (i.e., defined in the preheader) and is
1084 // interpreted as an unsigned integer.
1086  Instruction *CmpI = &CLI->getCond()->front();
1087  assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1088  CmpI->setOperand(1, TripCount);
1089  CLI->assertOK();
1090 }
1091 
1093  const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1094  InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
1095  // Set up the source location value for OpenMP runtime.
1096  if (!updateToLocation(Loc))
1097  return nullptr;
1098 
1099  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1100  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1101 
1102  // Declare useful OpenMP runtime functions.
1103  Value *IV = CLI->getIndVar();
1104  Type *IVTy = IV->getType();
1105  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
1106  FunctionCallee StaticFini =
1107  getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1108 
1109  // Allocate space for computed loop bounds as expected by the "init" function.
1110  Builder.restoreIP(AllocaIP);
1111  Type *I32Type = Type::getInt32Ty(M.getContext());
1112  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1113  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1114  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1115  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1116 
1117  // At the end of the preheader, prepare for calling the "init" function by
1118  // storing the current loop bounds into the allocated space. A canonical loop
1119  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1120  // and produces an inclusive upper bound.
1121  Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
1122  Constant *Zero = ConstantInt::get(IVTy, 0);
1123  Constant *One = ConstantInt::get(IVTy, 1);
1124  Builder.CreateStore(Zero, PLowerBound);
1125  Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
1126  Builder.CreateStore(UpperBound, PUpperBound);
1127  Builder.CreateStore(One, PStride);
1128 
1129  if (!Chunk)
1130  Chunk = One;
1131 
1132  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1133 
1134  // TODO: extract scheduling type and map it to OMP constant. This is curently
1135  // happening in kmp.h and its ilk and needs to be moved to OpenMP.td first.
1136  constexpr int StaticSchedType = 34;
1137  Constant *SchedulingType = ConstantInt::get(I32Type, StaticSchedType);
1138 
1139  // Call the "init" function and update the trip count of the loop with the
1140  // value it produced.
1141  Builder.CreateCall(StaticInit,
1142  {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1143  PUpperBound, PStride, One, Chunk});
1144  Value *LowerBound = Builder.CreateLoad(PLowerBound);
1145  Value *InclusiveUpperBound = Builder.CreateLoad(PUpperBound);
1146  Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
1147  Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
1148  setCanonicalLoopTripCount(CLI, TripCount);
1149 
1150  // Update all uses of the induction variable except the one in the condition
1151  // block that compares it with the actual upper bound, and the increment in
1152  // the latch block.
1153  // TODO: this can eventually move to CanonicalLoopInfo or to a new
1154  // CanonicalLoopInfoUpdater interface.
1155  Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
1156  Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
1157  IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
1158  auto *Instr = dyn_cast<Instruction>(U.getUser());
1159  return !Instr ||
1160  (Instr->getParent() != CLI->getCond() &&
1161  Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
1162  });
1163 
1164  // In the "exit" block, call the "fini" function.
1165  Builder.SetInsertPoint(CLI->getExit(),
1166  CLI->getExit()->getTerminator()->getIterator());
1167  Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
1168 
1169  // Add the barrier if requested.
1170  if (NeedsBarrier)
1171  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
1172  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1173  /* CheckCancelFlag */ false);
1174 
1175  CLI->assertOK();
1176  return CLI;
1177 }
1178 
1180  const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1181  InsertPointTy AllocaIP, bool NeedsBarrier) {
1182  // Currently only supports static schedules.
1183  return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier);
1184 }
1185 
1186 /// Make \p Source branch to \p Target.
1187 ///
1188 /// Handles two situations:
1189 /// * \p Source already has an unconditional branch.
1190 /// * \p Source is a degenerate block (no terminator because the BB is
1191 /// the current head of the IR construction).
1193  if (Instruction *Term = Source->getTerminator()) {
1194  auto *Br = cast<BranchInst>(Term);
1195  assert(!Br->isConditional() &&
1196  "BB's terminator must be an unconditional branch (or degenerate)");
1197  BasicBlock *Succ = Br->getSuccessor(0);
1198  Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
1199  Br->setSuccessor(0, Target);
1200  return;
1201  }
1202 
1203  auto *NewBr = BranchInst::Create(Target, Source);
1204  NewBr->setDebugLoc(DL);
1205 }
1206 
1207 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
1208 /// after this \p OldTarget will be orphaned.
1209 static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
1210  BasicBlock *NewTarget, DebugLoc DL) {
1211  for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
1212  redirectTo(Pred, NewTarget, DL);
1213 }
1214 
1215 /// Determine which blocks in \p BBs are reachable from outside and remove the
1216 /// ones that are not reachable from the function.
1218  SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
1219  auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
1220  for (Use &U : BB->uses()) {
1221  auto *UseInst = dyn_cast<Instruction>(U.getUser());
1222  if (!UseInst)
1223  continue;
1224  if (BBsToErase.count(UseInst->getParent()))
1225  continue;
1226  return true;
1227  }
1228  return false;
1229  };
1230 
1231  while (true) {
1232  bool Changed = false;
1233  for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
1234  if (HasRemainingUses(BB)) {
1235  BBsToErase.erase(BB);
1236  Changed = true;
1237  }
1238  }
1239  if (!Changed)
1240  break;
1241  }
1242 
1243  SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
1244  DeleteDeadBlocks(BBVec);
1245 }
1246 
1249  InsertPointTy ComputeIP) {
1250  assert(Loops.size() >= 1 && "At least one loop required");
1251  size_t NumLoops = Loops.size();
1252 
1253  // Nothing to do if there is already just one loop.
1254  if (NumLoops == 1)
1255  return Loops.front();
1256 
1257  CanonicalLoopInfo *Outermost = Loops.front();
1258  CanonicalLoopInfo *Innermost = Loops.back();
1259  BasicBlock *OrigPreheader = Outermost->getPreheader();
1260  BasicBlock *OrigAfter = Outermost->getAfter();
1261  Function *F = OrigPreheader->getParent();
1262 
1263  // Setup the IRBuilder for inserting the trip count computation.
1264  Builder.SetCurrentDebugLocation(DL);
1265  if (ComputeIP.isSet())
1266  Builder.restoreIP(ComputeIP);
1267  else
1268  Builder.restoreIP(Outermost->getPreheaderIP());
1269 
1270  // Derive the collapsed' loop trip count.
1271  // TODO: Find common/largest indvar type.
1272  Value *CollapsedTripCount = nullptr;
1273  for (CanonicalLoopInfo *L : Loops) {
1274  Value *OrigTripCount = L->getTripCount();
1275  if (!CollapsedTripCount) {
1276  CollapsedTripCount = OrigTripCount;
1277  continue;
1278  }
1279 
1280  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
1281  CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
1282  {}, /*HasNUW=*/true);
1283  }
1284 
1285  // Create the collapsed loop control flow.
1286  CanonicalLoopInfo *Result =
1287  createLoopSkeleton(DL, CollapsedTripCount, F,
1288  OrigPreheader->getNextNode(), OrigAfter, "collapsed");
1289 
1290  // Build the collapsed loop body code.
1291  // Start with deriving the input loop induction variables from the collapsed
1292  // one, using a divmod scheme. To preserve the original loops' order, the
1293  // innermost loop use the least significant bits.
1294  Builder.restoreIP(Result->getBodyIP());
1295 
1296  Value *Leftover = Result->getIndVar();
1297  SmallVector<Value *> NewIndVars;
1298  NewIndVars.set_size(NumLoops);
1299  for (int i = NumLoops - 1; i >= 1; --i) {
1300  Value *OrigTripCount = Loops[i]->getTripCount();
1301 
1302  Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
1303  NewIndVars[i] = NewIndVar;
1304 
1305  Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
1306  }
1307  // Outermost loop gets all the remaining bits.
1308  NewIndVars[0] = Leftover;
1309 
1310  // Construct the loop body control flow.
1311  // We progressively construct the branch structure following in direction of
1312  // the control flow, from the leading in-between code, the loop nest body, the
1313  // trailing in-between code, and rejoining the collapsed loop's latch.
1314  // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
1315  // the ContinueBlock is set, continue with that block. If ContinuePred, use
1316  // its predecessors as sources.
1317  BasicBlock *ContinueBlock = Result->getBody();
1318  BasicBlock *ContinuePred = nullptr;
1319  auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
1320  BasicBlock *NextSrc) {
1321  if (ContinueBlock)
1322  redirectTo(ContinueBlock, Dest, DL);
1323  else
1324  redirectAllPredecessorsTo(ContinuePred, Dest, DL);
1325 
1326  ContinueBlock = nullptr;
1327  ContinuePred = NextSrc;
1328  };
1329 
1330  // The code before the nested loop of each level.
1331  // Because we are sinking it into the nest, it will be executed more often
1332  // that the original loop. More sophisticated schemes could keep track of what
1333  // the in-between code is and instantiate it only once per thread.
1334  for (size_t i = 0; i < NumLoops - 1; ++i)
1335  ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
1336 
1337  // Connect the loop nest body.
1338  ContinueWith(Innermost->getBody(), Innermost->getLatch());
1339 
1340  // The code after the nested loop at each level.
1341  for (size_t i = NumLoops - 1; i > 0; --i)
1342  ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
1343 
1344  // Connect the finished loop to the collapsed loop latch.
1345  ContinueWith(Result->getLatch(), nullptr);
1346 
1347  // Replace the input loops with the new collapsed loop.
1348  redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
1349  redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
1350 
1351  // Replace the input loop indvars with the derived ones.
1352  for (size_t i = 0; i < NumLoops; ++i)
1353  Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
1354 
1355  // Remove unused parts of the input loops.
1356  SmallVector<BasicBlock *, 12> OldControlBBs;
1357  OldControlBBs.reserve(6 * Loops.size());
1358  for (CanonicalLoopInfo *Loop : Loops)
1359  Loop->collectControlBlocks(OldControlBBs);
1360  removeUnusedBlocksFromParent(OldControlBBs);
1361 
1362 #ifndef NDEBUG
1363  Result->assertOK();
1364 #endif
1365  return Result;
1366 }
1367 
1368 std::vector<CanonicalLoopInfo *>
1370  ArrayRef<Value *> TileSizes) {
1371  assert(TileSizes.size() == Loops.size() &&
1372  "Must pass as many tile sizes as there are loops");
1373  int NumLoops = Loops.size();
1374  assert(NumLoops >= 1 && "At least one loop to tile required");
1375 
1376  CanonicalLoopInfo *OutermostLoop = Loops.front();
1377  CanonicalLoopInfo *InnermostLoop = Loops.back();
1378  Function *F = OutermostLoop->getBody()->getParent();
1379  BasicBlock *InnerEnter = InnermostLoop->getBody();
1380  BasicBlock *InnerLatch = InnermostLoop->getLatch();
1381 
1382  // Collect original trip counts and induction variable to be accessible by
1383  // index. Also, the structure of the original loops is not preserved during
1384  // the construction of the tiled loops, so do it before we scavenge the BBs of
1385  // any original CanonicalLoopInfo.
1386  SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
1387  for (CanonicalLoopInfo *L : Loops) {
1388  OrigTripCounts.push_back(L->getTripCount());
1389  OrigIndVars.push_back(L->getIndVar());
1390  }
1391 
1392  // Collect the code between loop headers. These may contain SSA definitions
1393  // that are used in the loop nest body. To be usable with in the innermost
1394  // body, these BasicBlocks will be sunk into the loop nest body. That is,
1395  // these instructions may be executed more often than before the tiling.
1396  // TODO: It would be sufficient to only sink them into body of the
1397  // corresponding tile loop.
1399  for (int i = 0; i < NumLoops - 1; ++i) {
1400  CanonicalLoopInfo *Surrounding = Loops[i];
1401  CanonicalLoopInfo *Nested = Loops[i + 1];
1402 
1403  BasicBlock *EnterBB = Surrounding->getBody();
1404  BasicBlock *ExitBB = Nested->getHeader();
1405  InbetweenCode.emplace_back(EnterBB, ExitBB);
1406  }
1407 
1408  // Compute the trip counts of the floor loops.
1409  Builder.SetCurrentDebugLocation(DL);
1410  Builder.restoreIP(OutermostLoop->getPreheaderIP());
1411  SmallVector<Value *, 4> FloorCount, FloorRems;
1412  for (int i = 0; i < NumLoops; ++i) {
1413  Value *TileSize = TileSizes[i];
1414  Value *OrigTripCount = OrigTripCounts[i];
1415  Type *IVType = OrigTripCount->getType();
1416 
1417  Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
1418  Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
1419 
1420  // 0 if tripcount divides the tilesize, 1 otherwise.
1421  // 1 means we need an additional iteration for a partial tile.
1422  //
1423  // Unfortunately we cannot just use the roundup-formula
1424  // (tripcount + tilesize - 1)/tilesize
1425  // because the summation might overflow. We do not want introduce undefined
1426  // behavior when the untiled loop nest did not.
1427  Value *FloorTripOverflow =
1428  Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
1429 
1430  FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
1431  FloorTripCount =
1432  Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
1433  "omp_floor" + Twine(i) + ".tripcount", true);
1434 
1435  // Remember some values for later use.
1436  FloorCount.push_back(FloorTripCount);
1437  FloorRems.push_back(FloorTripRem);
1438  }
1439 
1440  // Generate the new loop nest, from the outermost to the innermost.
1441  std::vector<CanonicalLoopInfo *> Result;
1442  Result.reserve(NumLoops * 2);
1443 
1444  // The basic block of the surrounding loop that enters the nest generated
1445  // loop.
1446  BasicBlock *Enter = OutermostLoop->getPreheader();
1447 
1448  // The basic block of the surrounding loop where the inner code should
1449  // continue.
1450  BasicBlock *Continue = OutermostLoop->getAfter();
1451 
1452  // Where the next loop basic block should be inserted.
1453  BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
1454 
1455  auto EmbeddNewLoop =
1456  [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
1457  Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
1458  CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
1459  DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
1460  redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
1461  redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
1462 
1463  // Setup the position where the next embedded loop connects to this loop.
1464  Enter = EmbeddedLoop->getBody();
1465  Continue = EmbeddedLoop->getLatch();
1466  OutroInsertBefore = EmbeddedLoop->getLatch();
1467  return EmbeddedLoop;
1468  };
1469 
1470  auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
1471  const Twine &NameBase) {
1472  for (auto P : enumerate(TripCounts)) {
1473  CanonicalLoopInfo *EmbeddedLoop =
1474  EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
1475  Result.push_back(EmbeddedLoop);
1476  }
1477  };
1478 
1479  EmbeddNewLoops(FloorCount, "floor");
1480 
1481  // Within the innermost floor loop, emit the code that computes the tile
1482  // sizes.
1483  Builder.SetInsertPoint(Enter->getTerminator());
1484  SmallVector<Value *, 4> TileCounts;
1485  for (int i = 0; i < NumLoops; ++i) {
1486  CanonicalLoopInfo *FloorLoop = Result[i];
1487  Value *TileSize = TileSizes[i];
1488 
1489  Value *FloorIsEpilogue =
1490  Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
1491  Value *TileTripCount =
1492  Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
1493 
1494  TileCounts.push_back(TileTripCount);
1495  }
1496 
1497  // Create the tile loops.
1498  EmbeddNewLoops(TileCounts, "tile");
1499 
1500  // Insert the inbetween code into the body.
1501  BasicBlock *BodyEnter = Enter;
1502  BasicBlock *BodyEntered = nullptr;
1503  for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
1504  BasicBlock *EnterBB = P.first;
1505  BasicBlock *ExitBB = P.second;
1506 
1507  if (BodyEnter)
1508  redirectTo(BodyEnter, EnterBB, DL);
1509  else
1510  redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
1511 
1512  BodyEnter = nullptr;
1513  BodyEntered = ExitBB;
1514  }
1515 
1516  // Append the original loop nest body into the generated loop nest body.
1517  if (BodyEnter)
1518  redirectTo(BodyEnter, InnerEnter, DL);
1519  else
1520  redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
1521  redirectAllPredecessorsTo(InnerLatch, Continue, DL);
1522 
1523  // Replace the original induction variable with an induction variable computed
1524  // from the tile and floor induction variables.
1525  Builder.restoreIP(Result.back()->getBodyIP());
1526  for (int i = 0; i < NumLoops; ++i) {
1527  CanonicalLoopInfo *FloorLoop = Result[i];
1528  CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
1529  Value *OrigIndVar = OrigIndVars[i];
1530  Value *Size = TileSizes[i];
1531 
1532  Value *Scale =
1533  Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
1534  Value *Shift =
1535  Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
1536  OrigIndVar->replaceAllUsesWith(Shift);
1537  }
1538 
1539  // Remove unused parts of the original loops.
1540  SmallVector<BasicBlock *, 12> OldControlBBs;
1541  OldControlBBs.reserve(6 * Loops.size());
1542  for (CanonicalLoopInfo *Loop : Loops)
1543  Loop->collectControlBlocks(OldControlBBs);
1544  removeUnusedBlocksFromParent(OldControlBBs);
1545 
1546 #ifndef NDEBUG
1547  for (CanonicalLoopInfo *GenL : Result)
1548  GenL->assertOK();
1549 #endif
1550  return Result;
1551 }
1552 
1555  llvm::Value *BufSize, llvm::Value *CpyBuf,
1556  llvm::Value *CpyFn, llvm::Value *DidIt) {
1557  if (!updateToLocation(Loc))
1558  return Loc.IP;
1559 
1560  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1561  Value *Ident = getOrCreateIdent(SrcLocStr);
1562  Value *ThreadId = getOrCreateThreadID(Ident);
1563 
1564  llvm::Value *DidItLD = Builder.CreateLoad(DidIt);
1565 
1566  Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
1567 
1568  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
1569  Builder.CreateCall(Fn, Args);
1570 
1571  return Builder.saveIP();
1572 }
1573 
1576  BodyGenCallbackTy BodyGenCB,
1577  FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
1578 
1579  if (!updateToLocation(Loc))
1580  return Loc.IP;
1581 
1582  // If needed (i.e. not null), initialize `DidIt` with 0
1583  if (DidIt) {
1584  Builder.CreateStore(Builder.getInt32(0), DidIt);
1585  }
1586 
1587  Directive OMPD = Directive::OMPD_single;
1588  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1589  Value *Ident = getOrCreateIdent(SrcLocStr);
1590  Value *ThreadId = getOrCreateThreadID(Ident);
1591  Value *Args[] = {Ident, ThreadId};
1592 
1593  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
1594  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1595 
1596  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
1597  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1598 
1599  // generates the following:
1600  // if (__kmpc_single()) {
1601  // .... single region ...
1602  // __kmpc_end_single
1603  // }
1604 
1605  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1606  /*Conditional*/ true, /*hasFinalize*/ true);
1607 }
1608 
1610  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
1611  FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
1612 
1613  if (!updateToLocation(Loc))
1614  return Loc.IP;
1615 
1616  Directive OMPD = Directive::OMPD_critical;
1617  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1618  Value *Ident = getOrCreateIdent(SrcLocStr);
1619  Value *ThreadId = getOrCreateThreadID(Ident);
1620  Value *LockVar = getOMPCriticalRegionLock(CriticalName);
1621  Value *Args[] = {Ident, ThreadId, LockVar};
1622 
1624  Function *RTFn = nullptr;
1625  if (HintInst) {
1626  // Add Hint to entry Args and create call
1627  EnterArgs.push_back(HintInst);
1628  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
1629  } else {
1630  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
1631  }
1632  Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
1633 
1634  Function *ExitRTLFn =
1635  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
1636  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1637 
1638  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1639  /*Conditional*/ false, /*hasFinalize*/ true);
1640 }
1641 
1642 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
1643  Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
1644  BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
1645  bool HasFinalize) {
1646 
1647  if (HasFinalize)
1648  FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false});
1649 
1650  // Create inlined region's entry and body blocks, in preparation
1651  // for conditional creation
1652  BasicBlock *EntryBB = Builder.GetInsertBlock();
1653  Instruction *SplitPos = EntryBB->getTerminator();
1654  if (!isa_and_nonnull<BranchInst>(SplitPos))
1655  SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
1656  BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
1657  BasicBlock *FiniBB =
1658  EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
1659 
1660  Builder.SetInsertPoint(EntryBB->getTerminator());
1661  emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
1662 
1663  // generate body
1664  BodyGenCB(/* AllocaIP */ InsertPointTy(),
1665  /* CodeGenIP */ Builder.saveIP(), *FiniBB);
1666 
1667  // If we didn't emit a branch to FiniBB during body generation, it means
1668  // FiniBB is unreachable (e.g. while(1);). stop generating all the
1669  // unreachable blocks, and remove anything we are not going to use.
1670  auto SkipEmittingRegion = FiniBB->hasNPredecessors(0);
1671  if (SkipEmittingRegion) {
1672  FiniBB->eraseFromParent();
1673  ExitCall->eraseFromParent();
1674  // Discard finalization if we have it.
1675  if (HasFinalize) {
1676  assert(!FinalizationStack.empty() &&
1677  "Unexpected finalization stack state!");
1678  FinalizationStack.pop_back();
1679  }
1680  } else {
1681  // emit exit call and do any needed finalization.
1682  auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
1683  assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
1684  FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
1685  "Unexpected control flow graph state!!");
1686  emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
1687  assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
1688  "Unexpected Control Flow State!");
1689  MergeBlockIntoPredecessor(FiniBB);
1690  }
1691 
1692  // If we are skipping the region of a non conditional, remove the exit
1693  // block, and clear the builder's insertion point.
1694  assert(SplitPos->getParent() == ExitBB &&
1695  "Unexpected Insertion point location!");
1696  if (!Conditional && SkipEmittingRegion) {
1697  ExitBB->eraseFromParent();
1698  Builder.ClearInsertionPoint();
1699  } else {
1700  auto merged = MergeBlockIntoPredecessor(ExitBB);
1701  BasicBlock *ExitPredBB = SplitPos->getParent();
1702  auto InsertBB = merged ? ExitPredBB : ExitBB;
1703  if (!isa_and_nonnull<BranchInst>(SplitPos))
1704  SplitPos->eraseFromParent();
1705  Builder.SetInsertPoint(InsertBB);
1706  }
1707 
1708  return Builder.saveIP();
1709 }
1710 
1711 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
1712  Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
1713 
1714  // if nothing to do, Return current insertion point.
1715  if (!Conditional)
1716  return Builder.saveIP();
1717 
1718  BasicBlock *EntryBB = Builder.GetInsertBlock();
1719  Value *CallBool = Builder.CreateIsNotNull(EntryCall);
1720  auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
1721  auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
1722 
1723  // Emit thenBB and set the Builder's insertion point there for
1724  // body generation next. Place the block after the current block.
1725  Function *CurFn = EntryBB->getParent();
1726  CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB);
1727 
1728  // Move Entry branch to end of ThenBB, and replace with conditional
1729  // branch (If-stmt)
1730  Instruction *EntryBBTI = EntryBB->getTerminator();
1731  Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
1732  EntryBBTI->removeFromParent();
1733  Builder.SetInsertPoint(UI);
1734  Builder.Insert(EntryBBTI);
1735  UI->eraseFromParent();
1736  Builder.SetInsertPoint(ThenBB->getTerminator());
1737 
1738  // return an insertion point to ExitBB.
1739  return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
1740 }
1741 
1742 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
1743  omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
1744  bool HasFinalize) {
1745 
1746  Builder.restoreIP(FinIP);
1747 
1748  // If there is finalization to do, emit it before the exit call
1749  if (HasFinalize) {
1750  assert(!FinalizationStack.empty() &&
1751  "Unexpected finalization stack state!");
1752 
1753  FinalizationInfo Fi = FinalizationStack.pop_back_val();
1754  assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
1755 
1756  Fi.FiniCB(FinIP);
1757 
1758  BasicBlock *FiniBB = FinIP.getBlock();
1759  Instruction *FiniBBTI = FiniBB->getTerminator();
1760 
1761  // set Builder IP for call creation
1762  Builder.SetInsertPoint(FiniBBTI);
1763  }
1764 
1765  // place the Exitcall as last instruction before Finalization block terminator
1766  ExitCall->removeFromParent();
1767  Builder.Insert(ExitCall);
1768 
1769  return IRBuilder<>::InsertPoint(ExitCall->getParent(),
1770  ExitCall->getIterator());
1771 }
1772 
1774  InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
1775  llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
1776  if (!IP.isSet())
1777  return IP;
1778 
1780 
1781  // creates the following CFG structure
1782  // OMP_Entry : (MasterAddr != PrivateAddr)?
1783  // F T
1784  // | \
1785  // | copin.not.master
1786  // | /
1787  // v /
1788  // copyin.not.master.end
1789  // |
1790  // v
1791  // OMP.Entry.Next
1792 
1793  BasicBlock *OMP_Entry = IP.getBlock();
1794  Function *CurFn = OMP_Entry->getParent();
1795  BasicBlock *CopyBegin =
1796  BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
1797  BasicBlock *CopyEnd = nullptr;
1798 
1799  // If entry block is terminated, split to preserve the branch to following
1800  // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
1801  if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
1802  CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
1803  "copyin.not.master.end");
1804  OMP_Entry->getTerminator()->eraseFromParent();
1805  } else {
1806  CopyEnd =
1807  BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
1808  }
1809 
1810  Builder.SetInsertPoint(OMP_Entry);
1811  Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
1812  Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
1813  Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
1814  Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
1815 
1816  Builder.SetInsertPoint(CopyBegin);
1817  if (BranchtoEnd)
1818  Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
1819 
1820  return Builder.saveIP();
1821 }
1822 
1825  std::string Name) {
1827  Builder.restoreIP(Loc.IP);
1828 
1829  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1830  Value *Ident = getOrCreateIdent(SrcLocStr);
1831  Value *ThreadId = getOrCreateThreadID(Ident);
1832  Value *Args[] = {ThreadId, Size, Allocator};
1833 
1834  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
1835 
1836  return Builder.CreateCall(Fn, Args, Name);
1837 }
1838 
1841  std::string Name) {
1843  Builder.restoreIP(Loc.IP);
1844 
1845  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1846  Value *Ident = getOrCreateIdent(SrcLocStr);
1847  Value *ThreadId = getOrCreateThreadID(Ident);
1848  Value *Args[] = {ThreadId, Addr, Allocator};
1849  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
1850  return Builder.CreateCall(Fn, Args, Name);
1851 }
1852 
1854  const LocationDescription &Loc, llvm::Value *Pointer,
1857  Builder.restoreIP(Loc.IP);
1858 
1859  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1860  Value *Ident = getOrCreateIdent(SrcLocStr);
1861  Value *ThreadId = getOrCreateThreadID(Ident);
1862  Constant *ThreadPrivateCache =
1863  getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
1864  llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
1865 
1866  Function *Fn =
1867  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
1868 
1869  return Builder.CreateCall(Fn, Args);
1870 }
1871 
1872 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
1873  StringRef FirstSeparator,
1874  StringRef Separator) {
1875  SmallString<128> Buffer;
1876  llvm::raw_svector_ostream OS(Buffer);
1877  StringRef Sep = FirstSeparator;
1878  for (StringRef Part : Parts) {
1879  OS << Sep << Part;
1880  Sep = Separator;
1881  }
1882  return OS.str().str();
1883 }
1884 
1885 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
1886  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
1887  // TODO: Replace the twine arg with stringref to get rid of the conversion
1888  // logic. However This is taken from current implementation in clang as is.
1889  // Since this method is used in many places exclusively for OMP internal use
1890  // we will keep it as is for temporarily until we move all users to the
1891  // builder and then, if possible, fix it everywhere in one go.
1892  SmallString<256> Buffer;
1893  llvm::raw_svector_ostream Out(Buffer);
1894  Out << Name;
1895  StringRef RuntimeName = Out.str();
1896  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
1897  if (Elem.second) {
1898  assert(Elem.second->getType()->getPointerElementType() == Ty &&
1899  "OMP internal variable has different type than requested");
1900  } else {
1901  // TODO: investigate the appropriate linkage type used for the global
1902  // variable for possibly changing that to internal or private, or maybe
1903  // create different versions of the function for different OMP internal
1904  // variables.
1905  Elem.second = new llvm::GlobalVariable(
1906  M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage,
1907  llvm::Constant::getNullValue(Ty), Elem.first(),
1908  /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1909  AddressSpace);
1910  }
1911 
1912  return Elem.second;
1913 }
1914 
1915 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
1916  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1917  std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
1918  return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
1919 }
1920 
1921 // Create all simple and struct types exposed by the runtime and remember
1922 // the llvm::PointerTypes of them for easy access later.
1923 void OpenMPIRBuilder::initializeTypes(Module &M) {
1924  LLVMContext &Ctx = M.getContext();
1925  StructType *T;
1926 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
1927 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
1928  VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
1929  VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
1930 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
1931  VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
1932  VarName##Ptr = PointerType::getUnqual(VarName);
1933 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
1934  T = StructType::getTypeByName(Ctx, StructName); \
1935  if (!T) \
1936  T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
1937  VarName = T; \
1938  VarName##Ptr = PointerType::getUnqual(T);
1939 #include "llvm/Frontend/OpenMP/OMPKinds.def"
1940 }
1941 
1944  SmallVectorImpl<BasicBlock *> &BlockVector) {
1946  BlockSet.insert(EntryBB);
1947  BlockSet.insert(ExitBB);
1948 
1949  Worklist.push_back(EntryBB);
1950  while (!Worklist.empty()) {
1951  BasicBlock *BB = Worklist.pop_back_val();
1952  BlockVector.push_back(BB);
1953  for (BasicBlock *SuccBB : successors(BB))
1954  if (BlockSet.insert(SuccBB).second)
1955  Worklist.push_back(SuccBB);
1956  }
1957 }
1958 
1959 void CanonicalLoopInfo::collectControlBlocks(
1961  // We only count those BBs as control block for which we do not need to
1962  // reverse the CFG, i.e. not the loop body which can contain arbitrary control
1963  // flow. For consistency, this also means we do not add the Body block, which
1964  // is just the entry to the body code.
1965  BBs.reserve(BBs.size() + 6);
1966  BBs.append({Preheader, Header, Cond, Latch, Exit, After});
1967 }
1968 
1970 #ifndef NDEBUG
1971  if (!IsValid)
1972  return;
1973 
1974  // Verify standard control-flow we use for OpenMP loops.
1975  assert(Preheader);
1976  assert(isa<BranchInst>(Preheader->getTerminator()) &&
1977  "Preheader must terminate with unconditional branch");
1978  assert(Preheader->getSingleSuccessor() == Header &&
1979  "Preheader must jump to header");
1980 
1981  assert(Header);
1982  assert(isa<BranchInst>(Header->getTerminator()) &&
1983  "Header must terminate with unconditional branch");
1984  assert(Header->getSingleSuccessor() == Cond &&
1985  "Header must jump to exiting block");
1986 
1987  assert(Cond);
1988  assert(Cond->getSinglePredecessor() == Header &&
1989  "Exiting block only reachable from header");
1990 
1991  assert(isa<BranchInst>(Cond->getTerminator()) &&
1992  "Exiting block must terminate with conditional branch");
1993  assert(size(successors(Cond)) == 2 &&
1994  "Exiting block must have two successors");
1995  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
1996  "Exiting block's first successor jump to the body");
1997  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
1998  "Exiting block's second successor must exit the loop");
1999 
2000  assert(Body);
2001  assert(Body->getSinglePredecessor() == Cond &&
2002  "Body only reachable from exiting block");
2003  assert(!isa<PHINode>(Body->front()));
2004 
2005  assert(Latch);
2006  assert(isa<BranchInst>(Latch->getTerminator()) &&
2007  "Latch must terminate with unconditional branch");
2008  assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
2009  // TODO: To support simple redirecting of the end of the body code that has
2010  // multiple; introduce another auxiliary basic block like preheader and after.
2011  assert(Latch->getSinglePredecessor() != nullptr);
2012  assert(!isa<PHINode>(Latch->front()));
2013 
2014  assert(Exit);
2015  assert(isa<BranchInst>(Exit->getTerminator()) &&
2016  "Exit block must terminate with unconditional branch");
2017  assert(Exit->getSingleSuccessor() == After &&
2018  "Exit block must jump to after block");
2019 
2020  assert(After);
2021  assert(After->getSinglePredecessor() == Exit &&
2022  "After block only reachable from exit block");
2023  assert(After->empty() || !isa<PHINode>(After->front()));
2024 
2025  Instruction *IndVar = getIndVar();
2026  assert(IndVar && "Canonical induction variable not found?");
2027  assert(isa<IntegerType>(IndVar->getType()) &&
2028  "Induction variable must be an integer");
2029  assert(cast<PHINode>(IndVar)->getParent() == Header &&
2030  "Induction variable must be a PHI in the loop header");
2031  assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
2032  assert(
2033  cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
2034  assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
2035 
2036  auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
2037  assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
2038  assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
2039  assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
2040  assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
2041  ->isOne());
2042 
2043  Value *TripCount = getTripCount();
2044  assert(TripCount && "Loop trip count not found?");
2045  assert(IndVar->getType() == TripCount->getType() &&
2046  "Trip count and induction variable must have the same type");
2047 
2048  auto *CmpI = cast<CmpInst>(&Cond->front());
2049  assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
2050  "Exit condition must be a signed less-than comparison");
2051  assert(CmpI->getOperand(0) == IndVar &&
2052  "Exit condition must compare the induction variable");
2053  assert(CmpI->getOperand(1) == TripCount &&
2054  "Exit condition must compare with the trip count");
2055 #endif
2056 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::EngineKind::Kind
Kind
Definition: ExecutionEngine.h:524
llvm::CanonicalLoopInfo::getPreheaderIP
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
Definition: OMPIRBuilder.h:934
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::OpenMPIRBuilder::createCachedThreadPrivate
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
Definition: OMPIRBuilder.cpp:1853
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:125
llvm::OpenMPIRBuilder::LocationDescription
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:142
llvm::OpenMPIRBuilder::OutlineInfo::ExitBB
BasicBlock * ExitBB
Definition: OMPIRBuilder.h:566
llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::OpenMPIRBuilder::createCritical
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
Definition: OMPIRBuilder.cpp:1609
llvm::CanonicalLoopInfo::getAfter
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Definition: OMPIRBuilder.h:911
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:161
llvm::Function::empty
bool empty() const
Definition: Function.h:753
llvm::Function::getBasicBlockList
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:724
llvm::CodeExtractor::findAllocas
void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const
Find the set of allocas whose life ranges are contained within the outlined region.
Definition: CodeExtractor.cpp:494
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:529
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1300
llvm::CodeExtractor::extractCodeRegion
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Definition: CodeExtractor.cpp:1567
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::NoAlias
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:87
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:124
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:179
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::BasicBlock::empty
bool empty() const
Definition: BasicBlock.h:307
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.h:305
llvm::OpenMPIRBuilder::tileLoops
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
Definition: OMPIRBuilder.cpp:1369
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1916
llvm::OpenMPIRBuilder::createWorkshareLoop
CanonicalLoopInfo * createWorkshareLoop(const LocationDescription &Loc, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier)
Modifies the canonical loop to be a workshare loop.
Definition: OMPIRBuilder.cpp:1179
llvm::OpenMPIRBuilder::getLanemaskType
Type * getLanemaskType()
Definition: OMPIRBuilder.cpp:251
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:731
llvm::OpenMPIRBuilder::InsertPointTy
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:53
llvm::BasicBlock::replaceSuccessorsPhiUsesWith
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:451
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2547
llvm::OpenMPIRBuilder::getOrCreateThreadID
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
Definition: OMPIRBuilder.cpp:316
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::OpenMPIRBuilder::emitCancelationCheckImpl
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
Definition: OMPIRBuilder.cpp:419
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2182
llvm::DeleteDeadBlocks
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
Definition: BasicBlockUtils.cpp:94
Error.h
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1554
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:300
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:129
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::BasicBlock::getSingleSuccessor
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:294
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:375
llvm::GlobalValue::UnnamedAddr::Global
@ Global
llvm::Optional
Definition: APInt.h:33
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
CodeExtractor.h
llvm::OpenMPIRBuilder::FinalizeCallbackTy
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:63
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:752
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:264
llvm::BasicBlock::hasNPredecessors
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:286
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1306
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:260
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:197
llvm::OpenMPIRBuilder::emitTaskyieldImpl
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
Definition: OMPIRBuilder.cpp:850
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1194
llvm::OpenMPIRBuilder::collapseLoops
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
Definition: OMPIRBuilder.cpp:1248
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::ConstantExpr::getPointerCast
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:1994
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:302
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:583
llvm::CanonicalLoopInfo::getIndVar
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Definition: OMPIRBuilder.h:924
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::OpenMPIRBuilder::finalize
void finalize(Function *Fn=nullptr, bool AllowExtractorSinking=false)
Finalize the underlying module, e.g., by outlining regions.
Definition: OMPIRBuilder.cpp:129
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:876
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:696
llvm::OpenMPIRBuilder::getOrCreateDefaultSrcLocStr
Constant * getOrCreateDefaultSrcLocStr()
Return the (LLVM-IR) string describing the default source location.
Definition: OMPIRBuilder.cpp:296
llvm::OpenMPIRBuilder::getOrCreateIdent
Value * getOrCreateIdent(Constant *SrcLocStr, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
Definition: OMPIRBuilder.cpp:218
OMPIRBuilder.h
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:517
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::CanonicalLoopInfo::assertOK
void assertOK() const
Consistency self-check.
Definition: OMPIRBuilder.cpp:1969
llvm::CanonicalLoopInfo::getCond
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
Definition: OMPIRBuilder.h:891
llvm::OpenMPIRBuilder
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:29
llvm::OpenMPIRBuilder::createOMPFree
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
Definition: OMPIRBuilder.cpp:1839
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
cmp
< i32 >< i32 > cmp
Definition: README.txt:1447
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:748
TileSize
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1396
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
redirectAllPredecessorsTo
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
Definition: OMPIRBuilder.cpp:1209
llvm::OpenMPIRBuilder::createBarrier
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
Definition: OMPIRBuilder.cpp:323
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:249
llvm::OpenMPIRBuilder::~OpenMPIRBuilder
~OpenMPIRBuilder()
Definition: OMPIRBuilder.cpp:214
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::CodeExtractor::findInputsOutputs
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas) const
Compute the set of input values and output values for the code.
Definition: CodeExtractor.cpp:646
llvm::Instruction
Definition: Instruction.h:45
MDBuilder.h
setCanonicalLoopTripCount
void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount)
Definition: OMPIRBuilder.cpp:1085
llvm::GlobalObject::addMetadata
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
Definition: Metadata.cpp:1262
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:367
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
llvm::CanonicalLoopInfo::getBodyIP
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
Definition: OMPIRBuilder.h:939
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:708
removeUnusedBlocksFromParent
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
Definition: OMPIRBuilder.cpp:1217
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:73
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::omp::IdentFlag
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:74
llvm::OpenMPIRBuilder::emitTaskwaitImpl
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Definition: OMPIRBuilder.cpp:832
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:248
llvm::OpenMPIRBuilder::createOMPAlloc
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
Definition: OMPIRBuilder.cpp:1823
llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::OpenMPIRBuilder::addAttributes
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Definition: OMPIRBuilder.cpp:41
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
llvm::SmallString< 128 >
CFG.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::OpenMPIRBuilder::OutlineInfo::EntryBB
BasicBlock * EntryBB
Definition: OMPIRBuilder.h:566
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:176
llvm::omp::RuntimeFunction
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:54
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::OpenMPIRBuilder::createTaskyield
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
Definition: OMPIRBuilder.cpp:861
llvm::cl::opt< bool >
llvm::CanonicalLoopInfo::getPreheader
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Definition: OMPIRBuilder.h:883
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:170
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2720
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3061
DebugInfo.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::DebugLoc::get
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:21
llvm::OpenMPIRBuilder::createCanonicalLoop
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.cpp:963
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:581
llvm::OpenMPIRBuilder::createParallel
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
Definition: OMPIRBuilder.cpp:455
llvm::Triple::isAMDGCN
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition: Triple.h:704
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MDBuilder::createCallbackEncoding
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
Definition: MDBuilder.cpp:107
llvm::OpenMPIRBuilder::LocationDescription::DL
DebugLoc DL
Definition: OMPIRBuilder.h:150
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
Definition: OMPIRBuilder.cpp:120
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::OpenMPIRBuilder::createMaster
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
Definition: OMPIRBuilder.cpp:868
llvm::SplitBlockAndInsertIfThenElse
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
Definition: BasicBlockUtils.cpp:1224
llvm::OpenMPIRBuilder::createFlush
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Definition: OMPIRBuilder.cpp:826
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1312
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::Function::getFunction
const Function & getFunction() const
Definition: Function.h:135
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:643
llvm::OpenMPIRBuilder::OutlineInfo
Helper that contains information about regions we need to outline during finalization.
Definition: OMPIRBuilder.h:563
llvm::CanonicalLoopInfo
Class to represented the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.h:857
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Triple.h
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:751
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1479
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:98
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:173
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:212
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:747
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:241
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:272
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:523
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:780
uint32_t
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ConstantDataArray::getString
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:3015
llvm::GlobalObject::hasMetadata
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:584
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
llvm::OpenMPIRBuilder::OutlineInfo::PostOutlineCB
PostOutlineCBTy PostOutlineCB
Definition: OMPIRBuilder.h:565
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:295
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::GlobalValue::CommonLinkage
@ CommonLinkage
Tentative definitions.
Definition: GlobalValue.h:58
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:183
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::CanonicalLoopInfo::getExit
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
Definition: OMPIRBuilder.h:904
llvm::OpenMPIRBuilder::createStaticWorkshareLoop
CanonicalLoopInfo * createStaticWorkshareLoop(const LocationDescription &Loc, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a statically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1092
llvm::OpenMPIRBuilder::createTaskwait
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
Definition: OMPIRBuilder.cpp:844
redirectTo
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
Definition: OMPIRBuilder.cpp:1192
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:198
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:347
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:151
llvm::OpenMPIRBuilder::createCancel
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
Definition: OMPIRBuilder.cpp:378
OptimisticAttributes
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:708
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:165
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::OpenMPIRBuilder::getOrCreateSrcLocStr
Constant * getOrCreateSrcLocStr(StringRef LocStr)
Return the (LLVM-IR) string describing the source location LocStr.
Definition: OMPIRBuilder.cpp:259
llvm::CodeExtractorAnalysisCache
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:184
llvm::SmallString::str
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:259
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::OpenMPIRBuilder::initialize
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
Definition: OMPIRBuilder.cpp:127
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
llvm::CanonicalLoopInfo::getLatch
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
Definition: OMPIRBuilder.h:901
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:146
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:62
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:647
llvm::OpenMPIRBuilder::emitBarrierImpl
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
Definition: OMPIRBuilder.cpp:331
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
llvm::PHINode
Definition: Instructions.h:2572
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:321
llvm::Function::removeFromParent
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
Definition: Function.cpp:317
llvm::SmallVectorImpl< BasicBlock * >
llvm::CodeExtractor::isEligible
bool isEligible() const
Test whether this code extractor is eligible.
Definition: CodeExtractor.cpp:619
llvm::CanonicalLoopInfo::getTripCount
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
Definition: OMPIRBuilder.h:916
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::OpenMPIRBuilder::emitFlush
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
Definition: OMPIRBuilder.cpp:818
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
llvm::OpenMPIRBuilder::OutlineInfo::collectBlocks
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Definition: OMPIRBuilder.cpp:1942
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4650
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:379
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:61
llvm::cl::desc
Definition: CommandLine.h:411
llvm::OpenMPIRBuilder::createSingle
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, llvm::Value *DidIt)
Generator for '#omp single'.
Definition: OMPIRBuilder.cpp:1575
llvm::OpenMPIRBuilder::createCopyinClauseBlocks
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
Definition: OMPIRBuilder.cpp:1773
llvm::OpenMPIRBuilder::createCopyPrivate
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
Definition: OMPIRBuilder.cpp:1554
llvm::SetVector< Value * >
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
Definition: OMPIRBuilder.cpp:62
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:599
llvm::CanonicalLoopInfo::getBody
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
Definition: OMPIRBuilder.h:896
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
getKmpcForStaticInitForType
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Definition: OMPIRBuilder.cpp:1070
llvm::Directive
Definition: DirectiveEmitter.h:104
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:152
llvm::DIFile
File.
Definition: DebugInfoMetadata.h:522
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:102
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::OpenMPIRBuilder::LocationDescription::IP
InsertPointTy IP
Definition: OMPIRBuilder.h:149