LLVM  12.0.0git
OMPIRBuilder.cpp
Go to the documentation of this file.
1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/IR/CFG.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/MDBuilder.h"
25 #include "llvm/Support/Error.h"
28 
29 #include <sstream>
30 
31 #define DEBUG_TYPE "openmp-ir-builder"
32 
33 using namespace llvm;
34 using namespace omp;
35 
36 static cl::opt<bool>
37  OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
38  cl::desc("Use optimistic attributes describing "
39  "'as-if' properties of runtime calls."),
40  cl::init(false));
41 
43  LLVMContext &Ctx = Fn.getContext();
44 
45 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
46 #include "llvm/Frontend/OpenMP/OMPKinds.def"
47 
48  // Add attributes to the new declaration.
49  switch (FnID) {
50 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
51  case Enum: \
52  Fn.setAttributes( \
53  AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets)); \
54  break;
55 #include "llvm/Frontend/OpenMP/OMPKinds.def"
56  default:
57  // Attributes are optional.
58  break;
59  }
60 }
61 
64  FunctionType *FnTy = nullptr;
65  Function *Fn = nullptr;
66 
67  // Try to find the declation in the module first.
68  switch (FnID) {
69 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
70  case Enum: \
71  FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
72  IsVarArg); \
73  Fn = M.getFunction(Str); \
74  break;
75 #include "llvm/Frontend/OpenMP/OMPKinds.def"
76  }
77 
78  if (!Fn) {
79  // Create a new declaration if we need one.
80  switch (FnID) {
81 #define OMP_RTL(Enum, Str, ...) \
82  case Enum: \
83  Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
84  break;
85 #include "llvm/Frontend/OpenMP/OMPKinds.def"
86  }
87 
88  // Add information if the runtime function takes a callback function
89  if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
90  if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
91  LLVMContext &Ctx = Fn->getContext();
92  MDBuilder MDB(Ctx);
93  // Annotate the callback behavior of the runtime function:
94  // - The callback callee is argument number 2 (microtask).
95  // - The first two arguments of the callback callee are unknown (-1).
96  // - All variadic arguments to the runtime function are passed to the
97  // callback callee.
98  Fn->addMetadata(
99  LLVMContext::MD_callback,
101  2, {-1, -1}, /* VarArgsArePassed */ true)}));
102  }
103  }
104 
105  LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
106  << " with type " << *Fn->getFunctionType() << "\n");
107  addAttributes(FnID, *Fn);
108 
109  } else {
110  LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
111  << " with type " << *Fn->getFunctionType() << "\n");
112  }
113 
114  assert(Fn && "Failed to create OpenMP runtime function");
115 
116  // Cast the function to the expected type if necessary
118  return {FnTy, C};
119 }
120 
122  FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
123  auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
124  assert(Fn && "Failed to create OpenMP runtime function pointer");
125  return Fn;
126 }
127 
128 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
129 
130 void OpenMPIRBuilder::finalize(bool AllowExtractorSinking) {
131  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
133  for (OutlineInfo &OI : OutlineInfos) {
134  ParallelRegionBlockSet.clear();
135  Blocks.clear();
136  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
137 
138  Function *OuterFn = OI.EntryBB->getParent();
139  CodeExtractorAnalysisCache CEAC(*OuterFn);
140  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
141  /* AggregateArgs */ false,
142  /* BlockFrequencyInfo */ nullptr,
143  /* BranchProbabilityInfo */ nullptr,
144  /* AssumptionCache */ nullptr,
145  /* AllowVarArgs */ true,
146  /* AllowAlloca */ true,
147  /* Suffix */ ".omp_par");
148 
149  LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
150  LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
151  << " Exit: " << OI.ExitBB->getName() << "\n");
152  assert(Extractor.isEligible() &&
153  "Expected OpenMP outlining to be possible!");
154 
155  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
156 
157  LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
158  LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
159  assert(OutlinedFn->getReturnType()->isVoidTy() &&
160  "OpenMP outlined functions should not return a value!");
161 
162  // For compability with the clang CG we move the outlined function after the
163  // one with the parallel region.
164  OutlinedFn->removeFromParent();
165  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
166 
167  // Remove the artificial entry introduced by the extractor right away, we
168  // made our own entry block after all.
169  {
170  BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
171  assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
172  assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
173  if (AllowExtractorSinking) {
174  // Move instructions from the to-be-deleted ArtificialEntry to the entry
175  // basic block of the parallel region. CodeExtractor may have sunk
176  // allocas/bitcasts for values that are solely used in the outlined
177  // region and do not escape.
178  assert(!ArtificialEntry.empty() &&
179  "Expected instructions to sink in the outlined region");
180  for (BasicBlock::iterator It = ArtificialEntry.begin(),
181  End = ArtificialEntry.end();
182  It != End;) {
183  Instruction &I = *It;
184  It++;
185 
186  if (I.isTerminator())
187  continue;
188 
189  I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
190  }
191  }
192  OI.EntryBB->moveBefore(&ArtificialEntry);
193  ArtificialEntry.eraseFromParent();
194  }
195  assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
196  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
197 
198  // Run a user callback, e.g. to add attributes.
199  if (OI.PostOutlineCB)
200  OI.PostOutlineCB(*OutlinedFn);
201  }
202 
203  // Allow finalize to be called multiple times.
204  OutlineInfos.clear();
205 }
206 
208  IdentFlag LocFlags,
209  unsigned Reserve2Flags) {
210  // Enable "C-mode".
211  LocFlags |= OMP_IDENT_FLAG_KMPC;
212 
213  Value *&Ident =
214  IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
215  if (!Ident) {
216  Constant *I32Null = ConstantInt::getNullValue(Int32);
217  Constant *IdentData[] = {
218  I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
219  ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
220  Constant *Initializer = ConstantStruct::get(
221  cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
222 
223  // Look for existing encoding of the location + flags, not needed but
224  // minimizes the difference to the existing solution while we transition.
225  for (GlobalVariable &GV : M.getGlobalList())
226  if (GV.getType() == IdentPtr && GV.hasInitializer())
227  if (GV.getInitializer() == Initializer)
228  return Ident = &GV;
229 
230  auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(),
231  /* isConstant = */ true,
232  GlobalValue::PrivateLinkage, Initializer);
234  GV->setAlignment(Align(8));
235  Ident = GV;
236  }
237  return Builder.CreatePointerCast(Ident, IdentPtr);
238 }
239 
241  LLVMContext &Ctx = M.getContext();
242  Triple triple(M.getTargetTriple());
243 
244  // This test is adequate until deviceRTL has finer grained lane widths
245  return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx);
246 }
247 
249  Constant *&SrcLocStr = SrcLocStrMap[LocStr];
250  if (!SrcLocStr) {
251  Constant *Initializer =
252  ConstantDataArray::getString(M.getContext(), LocStr);
253 
254  // Look for existing encoding of the location, not needed but minimizes the
255  // difference to the existing solution while we transition.
256  for (GlobalVariable &GV : M.getGlobalList())
257  if (GV.isConstant() && GV.hasInitializer() &&
258  GV.getInitializer() == Initializer)
259  return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
260 
261  SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
262  /* AddressSpace */ 0, &M);
263  }
264  return SrcLocStr;
265 }
266 
268  StringRef FileName,
269  unsigned Line,
270  unsigned Column) {
271  SmallString<128> Buffer;
272  Buffer.push_back(';');
273  Buffer.append(FileName);
274  Buffer.push_back(';');
275  Buffer.append(FunctionName);
276  Buffer.push_back(';');
277  Buffer.append(std::to_string(Line));
278  Buffer.push_back(';');
279  Buffer.append(std::to_string(Column));
280  Buffer.push_back(';');
281  Buffer.push_back(';');
282  return getOrCreateSrcLocStr(Buffer.str());
283 }
284 
286  return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
287 }
288 
289 Constant *
291  DILocation *DIL = Loc.DL.get();
292  if (!DIL)
293  return getOrCreateDefaultSrcLocStr();
294  StringRef FileName = M.getName();
295  if (DIFile *DIF = DIL->getFile())
296  if (Optional<StringRef> Source = DIF->getSource())
297  FileName = *Source;
298  StringRef Function = DIL->getScope()->getSubprogram()->getName();
299  Function =
300  !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
301  return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
302  DIL->getColumn());
303 }
304 
306  return Builder.CreateCall(
307  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
308  "omp_global_thread_num");
309 }
310 
313  bool ForceSimpleCall, bool CheckCancelFlag) {
314  if (!updateToLocation(Loc))
315  return Loc.IP;
316  return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
317 }
318 
321  bool ForceSimpleCall, bool CheckCancelFlag) {
322  // Build call __kmpc_cancel_barrier(loc, thread_id) or
323  // __kmpc_barrier(loc, thread_id);
324 
325  IdentFlag BarrierLocFlags;
326  switch (Kind) {
327  case OMPD_for:
328  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
329  break;
330  case OMPD_sections:
331  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
332  break;
333  case OMPD_single:
334  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
335  break;
336  case OMPD_barrier:
337  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
338  break;
339  default:
340  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
341  break;
342  }
343 
344  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
345  Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
346  getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
347 
348  // If we are in a cancellable parallel region, barriers are cancellation
349  // points.
350  // TODO: Check why we would force simple calls or to ignore the cancel flag.
351  bool UseCancelBarrier =
352  !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
353 
354  Value *Result =
355  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
356  UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
357  : OMPRTL___kmpc_barrier),
358  Args);
359 
360  if (UseCancelBarrier && CheckCancelFlag)
361  emitCancelationCheckImpl(Result, OMPD_parallel);
362 
363  return Builder.saveIP();
364 }
365 
368  Value *IfCondition,
369  omp::Directive CanceledDirective) {
370  if (!updateToLocation(Loc))
371  return Loc.IP;
372 
373  // LLVM utilities like blocks with terminators.
374  auto *UI = Builder.CreateUnreachable();
375 
376  Instruction *ThenTI = UI, *ElseTI = nullptr;
377  if (IfCondition)
378  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
379  Builder.SetInsertPoint(ThenTI);
380 
381  Value *CancelKind = nullptr;
382  switch (CanceledDirective) {
383 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
384  case DirectiveEnum: \
385  CancelKind = Builder.getInt32(Value); \
386  break;
387 #include "llvm/Frontend/OpenMP/OMPKinds.def"
388  default:
389  llvm_unreachable("Unknown cancel kind!");
390  }
391 
392  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
393  Value *Ident = getOrCreateIdent(SrcLocStr);
394  Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
395  Value *Result = Builder.CreateCall(
396  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
397 
398  // The actual cancel logic is shared with others, e.g., cancel_barriers.
399  emitCancelationCheckImpl(Result, CanceledDirective);
400 
401  // Update the insertion point and remove the terminator we introduced.
402  Builder.SetInsertPoint(UI->getParent());
403  UI->eraseFromParent();
404 
405  return Builder.saveIP();
406 }
407 
409  Value *CancelFlag, omp::Directive CanceledDirective) {
410  assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
411  "Unexpected cancellation!");
412 
413  // For a cancel barrier we create two new blocks.
414  BasicBlock *BB = Builder.GetInsertBlock();
415  BasicBlock *NonCancellationBlock;
416  if (Builder.GetInsertPoint() == BB->end()) {
417  // TODO: This branch will not be needed once we moved to the
418  // OpenMPIRBuilder codegen completely.
419  NonCancellationBlock = BasicBlock::Create(
420  BB->getContext(), BB->getName() + ".cont", BB->getParent());
421  } else {
422  NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
424  Builder.SetInsertPoint(BB);
425  }
426  BasicBlock *CancellationBlock = BasicBlock::Create(
427  BB->getContext(), BB->getName() + ".cncl", BB->getParent());
428 
429  // Jump to them based on the return value.
430  Value *Cmp = Builder.CreateIsNull(CancelFlag);
431  Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
432  /* TODO weight */ nullptr, nullptr);
433 
434  // From the cancellation block we finalize all variables and go to the
435  // post finalization block that is known to the FiniCB callback.
436  Builder.SetInsertPoint(CancellationBlock);
437  auto &FI = FinalizationStack.back();
438  FI.FiniCB(Builder.saveIP());
439 
440  // The continuation block is where code generation continues.
441  Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
442 }
443 
445  const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
446  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
447  FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
448  omp::ProcBindKind ProcBind, bool IsCancellable) {
449  if (!updateToLocation(Loc))
450  return Loc.IP;
451 
452  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
453  Value *Ident = getOrCreateIdent(SrcLocStr);
454  Value *ThreadID = getOrCreateThreadID(Ident);
455 
456  if (NumThreads) {
457  // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
458  Value *Args[] = {
459  Ident, ThreadID,
460  Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
461  Builder.CreateCall(
462  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
463  }
464 
465  if (ProcBind != OMP_PROC_BIND_default) {
466  // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
467  Value *Args[] = {
468  Ident, ThreadID,
469  ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
470  Builder.CreateCall(
471  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
472  }
473 
474  BasicBlock *InsertBB = Builder.GetInsertBlock();
475  Function *OuterFn = InsertBB->getParent();
476 
477  // Save the outer alloca block because the insertion iterator may get
478  // invalidated and we still need this later.
479  BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
480 
481  // Vector to remember instructions we used only during the modeling but which
482  // we want to delete at the end.
483  SmallVector<Instruction *, 4> ToBeDeleted;
484 
485  // Change the location to the outer alloca insertion point to create and
486  // initialize the allocas we pass into the parallel region.
487  Builder.restoreIP(OuterAllocaIP);
488  AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
489  AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
490 
491  // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
492  // program, otherwise we only need them for modeling purposes to get the
493  // associated arguments in the outlined function. In the former case,
494  // initialize the allocas properly, in the latter case, delete them later.
495  if (IfCondition) {
496  Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
497  Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
498  } else {
499  ToBeDeleted.push_back(TIDAddr);
500  ToBeDeleted.push_back(ZeroAddr);
501  }
502 
503  // Create an artificial insertion point that will also ensure the blocks we
504  // are about to split are not degenerated.
505  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
506 
507  Instruction *ThenTI = UI, *ElseTI = nullptr;
508  if (IfCondition)
509  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
510 
511  BasicBlock *ThenBB = ThenTI->getParent();
512  BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
513  BasicBlock *PRegBodyBB =
514  PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
515  BasicBlock *PRegPreFiniBB =
516  PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
517  BasicBlock *PRegExitBB =
518  PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
519 
520  auto FiniCBWrapper = [&](InsertPointTy IP) {
521  // Hide "open-ended" blocks from the given FiniCB by setting the right jump
522  // target to the region exit block.
523  if (IP.getBlock()->end() == IP.getPoint()) {
525  Builder.restoreIP(IP);
526  Instruction *I = Builder.CreateBr(PRegExitBB);
527  IP = InsertPointTy(I->getParent(), I->getIterator());
528  }
529  assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
530  IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
531  "Unexpected insertion point for finalization call!");
532  return FiniCB(IP);
533  };
534 
535  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
536 
537  // Generate the privatization allocas in the block that will become the entry
538  // of the outlined function.
539  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
540  InsertPointTy InnerAllocaIP = Builder.saveIP();
541 
542  AllocaInst *PrivTIDAddr =
543  Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
544  Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
545 
546  // Add some fake uses for OpenMP provided arguments.
547  ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
548  Instruction *ZeroAddrUse = Builder.CreateLoad(ZeroAddr, "zero.addr.use");
549  ToBeDeleted.push_back(ZeroAddrUse);
550 
551  // ThenBB
552  // |
553  // V
554  // PRegionEntryBB <- Privatization allocas are placed here.
555  // |
556  // V
557  // PRegionBodyBB <- BodeGen is invoked here.
558  // |
559  // V
560  // PRegPreFiniBB <- The block we will start finalization from.
561  // |
562  // V
563  // PRegionExitBB <- A common exit to simplify block collection.
564  //
565 
566  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
567 
568  // Let the caller create the body.
569  assert(BodyGenCB && "Expected body generation callback!");
570  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
571  BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
572 
573  LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
574 
575  FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
576  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
577  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
578  llvm::LLVMContext &Ctx = F->getContext();
579  MDBuilder MDB(Ctx);
580  // Annotate the callback behavior of the __kmpc_fork_call:
581  // - The callback callee is argument number 2 (microtask).
582  // - The first two arguments of the callback callee are unknown (-1).
583  // - All variadic arguments to the __kmpc_fork_call are passed to the
584  // callback callee.
585  F->addMetadata(
586  llvm::LLVMContext::MD_callback,
588  Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
589  /* VarArgsArePassed */ true)}));
590  }
591  }
592 
593  OutlineInfo OI;
594  OI.PostOutlineCB = [=](Function &OutlinedFn) {
595  // Add some known attributes.
596  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
597  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
598  OutlinedFn.addFnAttr(Attribute::NoUnwind);
599  OutlinedFn.addFnAttr(Attribute::NoRecurse);
600 
601  assert(OutlinedFn.arg_size() >= 2 &&
602  "Expected at least tid and bounded tid as arguments");
603  unsigned NumCapturedVars =
604  OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
605 
606  CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
607  CI->getParent()->setName("omp_parallel");
608  Builder.SetInsertPoint(CI);
609 
610  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
611  Value *ForkCallArgs[] = {
612  Ident, Builder.getInt32(NumCapturedVars),
613  Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
614 
615  SmallVector<Value *, 16> RealArgs;
616  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
617  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
618 
619  Builder.CreateCall(RTLFn, RealArgs);
620 
621  LLVM_DEBUG(dbgs() << "With fork_call placed: "
622  << *Builder.GetInsertBlock()->getParent() << "\n");
623 
624  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
625 
626  // Initialize the local TID stack location with the argument value.
627  Builder.SetInsertPoint(PrivTID);
628  Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
629  Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
630 
631  // If no "if" clause was present we do not need the call created during
632  // outlining, otherwise we reuse it in the serialized parallel region.
633  if (!ElseTI) {
634  CI->eraseFromParent();
635  } else {
636 
637  // If an "if" clause was present we are now generating the serialized
638  // version into the "else" branch.
639  Builder.SetInsertPoint(ElseTI);
640 
641  // Build calls __kmpc_serialized_parallel(&Ident, GTid);
642  Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
643  Builder.CreateCall(
644  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
645  SerializedParallelCallArgs);
646 
647  // OutlinedFn(&GTid, &zero, CapturedStruct);
648  CI->removeFromParent();
649  Builder.Insert(CI);
650 
651  // __kmpc_end_serialized_parallel(&Ident, GTid);
652  Value *EndArgs[] = {Ident, ThreadID};
653  Builder.CreateCall(
654  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
655  EndArgs);
656 
657  LLVM_DEBUG(dbgs() << "With serialized parallel region: "
658  << *Builder.GetInsertBlock()->getParent() << "\n");
659  }
660 
661  for (Instruction *I : ToBeDeleted)
662  I->eraseFromParent();
663  };
664 
665  // Adjust the finalization stack, verify the adjustment, and call the
666  // finalize function a last time to finalize values between the pre-fini
667  // block and the exit block if we left the parallel "the normal way".
668  auto FiniInfo = FinalizationStack.pop_back_val();
669  (void)FiniInfo;
670  assert(FiniInfo.DK == OMPD_parallel &&
671  "Unexpected finalization stack state!");
672 
673  Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
674 
675  InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
676  FiniCB(PreFiniIP);
677 
678  OI.EntryBB = PRegEntryBB;
679  OI.ExitBB = PRegExitBB;
680 
681  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
683  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
684 
685  // Ensure a single exit node for the outlined region by creating one.
686  // We might have multiple incoming edges to the exit now due to finalizations,
687  // e.g., cancel calls that cause the control flow to leave the region.
688  BasicBlock *PRegOutlinedExitBB = PRegExitBB;
689  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
690  PRegOutlinedExitBB->setName("omp.par.outlined.exit");
691  Blocks.push_back(PRegOutlinedExitBB);
692 
693  CodeExtractorAnalysisCache CEAC(*OuterFn);
694  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
695  /* AggregateArgs */ false,
696  /* BlockFrequencyInfo */ nullptr,
697  /* BranchProbabilityInfo */ nullptr,
698  /* AssumptionCache */ nullptr,
699  /* AllowVarArgs */ true,
700  /* AllowAlloca */ true,
701  /* Suffix */ ".omp_par");
702 
703  // Find inputs to, outputs from the code region.
704  BasicBlock *CommonExit = nullptr;
705  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
706  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
707  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
708 
709  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
710 
711  FunctionCallee TIDRTLFn =
712  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
713 
714  auto PrivHelper = [&](Value &V) {
715  if (&V == TIDAddr || &V == ZeroAddr)
716  return;
717 
719  for (Use &U : V.uses())
720  if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
721  if (ParallelRegionBlockSet.count(UserI->getParent()))
722  Uses.insert(&U);
723 
724  // __kmpc_fork_call expects extra arguments as pointers. If the input
725  // already has a pointer type, everything is fine. Otherwise, store the
726  // value onto stack and load it back inside the to-be-outlined region. This
727  // will ensure only the pointer will be passed to the function.
728  // FIXME: if there are more than 15 trailing arguments, they must be
729  // additionally packed in a struct.
730  Value *Inner = &V;
731  if (!V.getType()->isPointerTy()) {
733  LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
734 
735  Builder.restoreIP(OuterAllocaIP);
736  Value *Ptr =
737  Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
738 
739  // Store to stack at end of the block that currently branches to the entry
740  // block of the to-be-outlined region.
741  Builder.SetInsertPoint(InsertBB,
742  InsertBB->getTerminator()->getIterator());
743  Builder.CreateStore(&V, Ptr);
744 
745  // Load back next to allocations in the to-be-outlined region.
746  Builder.restoreIP(InnerAllocaIP);
747  Inner = Builder.CreateLoad(Ptr);
748  }
749 
750  Value *ReplacementValue = nullptr;
751  CallInst *CI = dyn_cast<CallInst>(&V);
752  if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
753  ReplacementValue = PrivTID;
754  } else {
755  Builder.restoreIP(
756  PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
757  assert(ReplacementValue &&
758  "Expected copy/create callback to set replacement value!");
759  if (ReplacementValue == &V)
760  return;
761  }
762 
763  for (Use *UPtr : Uses)
764  UPtr->set(ReplacementValue);
765  };
766 
767  // Reset the inner alloca insertion as it will be used for loading the values
768  // wrapped into pointers before passing them into the to-be-outlined region.
769  // Configure it to insert immediately after the fake use of zero address so
770  // that they are available in the generated body and so that the
771  // OpenMP-related values (thread ID and zero address pointers) remain leading
772  // in the argument list.
773  InnerAllocaIP = IRBuilder<>::InsertPoint(
774  ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
775 
776  // Reset the outer alloca insertion point to the entry of the relevant block
777  // in case it was invalidated.
778  OuterAllocaIP = IRBuilder<>::InsertPoint(
779  OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
780 
781  for (Value *Input : Inputs) {
782  LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
783  PrivHelper(*Input);
784  }
785  LLVM_DEBUG({
786  for (Value *Output : Outputs)
787  LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
788  });
789  assert(Outputs.empty() &&
790  "OpenMP outlining should not produce live-out values!");
791 
792  LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
793  LLVM_DEBUG({
794  for (auto *BB : Blocks)
795  dbgs() << " PBR: " << BB->getName() << "\n";
796  });
797 
798  // Register the outlined info.
799  addOutlineInfo(std::move(OI));
800 
801  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
802  UI->eraseFromParent();
803 
804  return AfterIP;
805 }
806 
808  // Build call void __kmpc_flush(ident_t *loc)
809  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
810  Value *Args[] = {getOrCreateIdent(SrcLocStr)};
811 
812  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
813 }
814 
816  if (!updateToLocation(Loc))
817  return;
818  emitFlush(Loc);
819 }
820 
822  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
823  // global_tid);
824  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
825  Value *Ident = getOrCreateIdent(SrcLocStr);
826  Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
827 
828  // Ignore return result until untied tasks are supported.
829  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
830  Args);
831 }
832 
834  if (!updateToLocation(Loc))
835  return;
836  emitTaskwaitImpl(Loc);
837 }
838 
840  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
841  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
842  Value *Ident = getOrCreateIdent(SrcLocStr);
843  Constant *I32Null = ConstantInt::getNullValue(Int32);
844  Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
845 
846  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
847  Args);
848 }
849 
851  if (!updateToLocation(Loc))
852  return;
853  emitTaskyieldImpl(Loc);
854 }
855 
858  BodyGenCallbackTy BodyGenCB,
859  FinalizeCallbackTy FiniCB) {
860 
861  if (!updateToLocation(Loc))
862  return Loc.IP;
863 
864  Directive OMPD = Directive::OMPD_master;
865  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
866  Value *Ident = getOrCreateIdent(SrcLocStr);
867  Value *ThreadId = getOrCreateThreadID(Ident);
868  Value *Args[] = {Ident, ThreadId};
869 
870  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
871  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
872 
873  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
874  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
875 
876  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
877  /*Conditional*/ true, /*hasFinalize*/ true);
878 }
879 
880 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
881  DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
882  BasicBlock *PostInsertBefore, const Twine &Name) {
883  Module *M = F->getParent();
884  LLVMContext &Ctx = M->getContext();
885  Type *IndVarTy = TripCount->getType();
886 
887  // Create the basic block structure.
888  BasicBlock *Preheader =
889  BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
890  BasicBlock *Header =
891  BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
892  BasicBlock *Cond =
893  BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
894  BasicBlock *Body =
895  BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
896  BasicBlock *Latch =
897  BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
898  BasicBlock *Exit =
899  BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
900  BasicBlock *After =
901  BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
902 
903  // Use specified DebugLoc for new instructions.
904  Builder.SetCurrentDebugLocation(DL);
905 
906  Builder.SetInsertPoint(Preheader);
907  Builder.CreateBr(Header);
908 
909  Builder.SetInsertPoint(Header);
910  PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
911  IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
912  Builder.CreateBr(Cond);
913 
914  Builder.SetInsertPoint(Cond);
915  Value *Cmp =
916  Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
917  Builder.CreateCondBr(Cmp, Body, Exit);
918 
919  Builder.SetInsertPoint(Body);
920  Builder.CreateBr(Latch);
921 
922  Builder.SetInsertPoint(Latch);
923  Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
924  "omp_" + Name + ".next", /*HasNUW=*/true);
925  Builder.CreateBr(Header);
926  IndVarPHI->addIncoming(Next, Latch);
927 
928  Builder.SetInsertPoint(Exit);
929  Builder.CreateBr(After);
930 
931  // Remember and return the canonical control flow.
932  LoopInfos.emplace_front();
933  CanonicalLoopInfo *CL = &LoopInfos.front();
934 
935  CL->Preheader = Preheader;
936  CL->Header = Header;
937  CL->Cond = Cond;
938  CL->Body = Body;
939  CL->Latch = Latch;
940  CL->Exit = Exit;
941  CL->After = After;
942 
943  CL->IsValid = true;
944 
945 #ifndef NDEBUG
946  CL->assertOK();
947 #endif
948  return CL;
949 }
950 
953  LoopBodyGenCallbackTy BodyGenCB,
954  Value *TripCount, const Twine &Name) {
955  BasicBlock *BB = Loc.IP.getBlock();
956  BasicBlock *NextBB = BB->getNextNode();
957 
958  CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
959  NextBB, NextBB, Name);
960  BasicBlock *After = CL->getAfter();
961 
962  // If location is not set, don't connect the loop.
963  if (updateToLocation(Loc)) {
964  // Split the loop at the insertion point: Branch to the preheader and move
965  // every following instruction to after the loop (the After BB). Also, the
966  // new successor is the loop's after block.
967  Builder.CreateBr(CL->Preheader);
968  After->getInstList().splice(After->begin(), BB->getInstList(),
969  Builder.GetInsertPoint(), BB->end());
970  After->replaceSuccessorsPhiUsesWith(BB, After);
971  }
972 
973  // Emit the body content. We do it after connecting the loop to the CFG to
974  // avoid that the callback encounters degenerate BBs.
975  BodyGenCB(CL->getBodyIP(), CL->getIndVar());
976 
977 #ifndef NDEBUG
978  CL->assertOK();
979 #endif
980  return CL;
981 }
982 
984  const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
985  Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
986  InsertPointTy ComputeIP, const Twine &Name) {
987 
988  // Consider the following difficulties (assuming 8-bit signed integers):
989  // * Adding \p Step to the loop counter which passes \p Stop may overflow:
990  // DO I = 1, 100, 50
991  /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
992  // DO I = 100, 0, -128
993 
994  // Start, Stop and Step must be of the same integer type.
995  auto *IndVarTy = cast<IntegerType>(Start->getType());
996  assert(IndVarTy == Stop->getType() && "Stop type mismatch");
997  assert(IndVarTy == Step->getType() && "Step type mismatch");
998 
999  LocationDescription ComputeLoc =
1000  ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
1001  updateToLocation(ComputeLoc);
1002 
1003  ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
1004  ConstantInt *One = ConstantInt::get(IndVarTy, 1);
1005 
1006  // Like Step, but always positive.
1007  Value *Incr = Step;
1008 
1009  // Distance between Start and Stop; always positive.
1010  Value *Span;
1011 
1012  // Condition whether there are no iterations are executed at all, e.g. because
1013  // UB < LB.
1014  Value *ZeroCmp;
1015 
1016  if (IsSigned) {
1017  // Ensure that increment is positive. If not, negate and invert LB and UB.
1018  Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
1019  Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
1020  Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
1021  Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
1022  Span = Builder.CreateSub(UB, LB, "", false, true);
1023  ZeroCmp = Builder.CreateICmp(
1024  InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
1025  } else {
1026  Span = Builder.CreateSub(Stop, Start, "", true);
1027  ZeroCmp = Builder.CreateICmp(
1028  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
1029  }
1030 
1031  Value *CountIfLooping;
1032  if (InclusiveStop) {
1033  CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
1034  } else {
1035  // Avoid incrementing past stop since it could overflow.
1036  Value *CountIfTwo = Builder.CreateAdd(
1037  Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
1038  Value *OneCmp = Builder.CreateICmp(
1039  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
1040  CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
1041  }
1042  Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
1043  "omp_" + Name + ".tripcount");
1044 
1045  auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
1046  Builder.restoreIP(CodeGenIP);
1047  Value *Span = Builder.CreateMul(IV, Step);
1048  Value *IndVar = Builder.CreateAdd(Span, Start);
1049  BodyGenCB(Builder.saveIP(), IndVar);
1050  };
1051  LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
1052  return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
1053 }
1054 
1055 // Returns an LLVM function to call for initializing loop bounds using OpenMP
1056 // static scheduling depending on `type`. Only i32 and i64 are supported by the
1057 // runtime. Always interpret integers as unsigned similarly to
1058 // CanonicalLoopInfo.
1060  OpenMPIRBuilder &OMPBuilder) {
1061  unsigned Bitwidth = Ty->getIntegerBitWidth();
1062  if (Bitwidth == 32)
1063  return OMPBuilder.getOrCreateRuntimeFunction(
1064  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1065  if (Bitwidth == 64)
1066  return OMPBuilder.getOrCreateRuntimeFunction(
1067  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1068  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1069 }
1070 
1071 // Sets the number of loop iterations to the given value. This value must be
1072 // valid in the condition block (i.e., defined in the preheader) and is
1073 // interpreted as an unsigned integer.
1075  Instruction *CmpI = &CLI->getCond()->front();
1076  assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1077  CmpI->setOperand(1, TripCount);
1078  CLI->assertOK();
1079 }
1080 
1082  const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1083  InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
1084  // Set up the source location value for OpenMP runtime.
1085  if (!updateToLocation(Loc))
1086  return nullptr;
1087 
1088  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1089  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1090 
1091  // Declare useful OpenMP runtime functions.
1092  Value *IV = CLI->getIndVar();
1093  Type *IVTy = IV->getType();
1094  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
1095  FunctionCallee StaticFini =
1096  getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1097 
1098  // Allocate space for computed loop bounds as expected by the "init" function.
1099  Builder.restoreIP(AllocaIP);
1100  Type *I32Type = Type::getInt32Ty(M.getContext());
1101  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1102  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1103  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1104  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1105 
1106  // At the end of the preheader, prepare for calling the "init" function by
1107  // storing the current loop bounds into the allocated space. A canonical loop
1108  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1109  // and produces an inclusive upper bound.
1110  Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
1111  Constant *Zero = ConstantInt::get(IVTy, 0);
1112  Constant *One = ConstantInt::get(IVTy, 1);
1113  Builder.CreateStore(Zero, PLowerBound);
1114  Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
1115  Builder.CreateStore(UpperBound, PUpperBound);
1116  Builder.CreateStore(One, PStride);
1117 
1118  if (!Chunk)
1119  Chunk = One;
1120 
1121  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1122 
1123  // TODO: extract scheduling type and map it to OMP constant. This is curently
1124  // happening in kmp.h and its ilk and needs to be moved to OpenMP.td first.
1125  constexpr int StaticSchedType = 34;
1126  Constant *SchedulingType = ConstantInt::get(I32Type, StaticSchedType);
1127 
1128  // Call the "init" function and update the trip count of the loop with the
1129  // value it produced.
1130  Builder.CreateCall(StaticInit,
1131  {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1132  PUpperBound, PStride, One, Chunk});
1133  Value *LowerBound = Builder.CreateLoad(PLowerBound);
1134  Value *InclusiveUpperBound = Builder.CreateLoad(PUpperBound);
1135  Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
1136  Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
1137  setCanonicalLoopTripCount(CLI, TripCount);
1138 
1139  // Update all uses of the induction variable except the one in the condition
1140  // block that compares it with the actual upper bound, and the increment in
1141  // the latch block.
1142  // TODO: this can eventually move to CanonicalLoopInfo or to a new
1143  // CanonicalLoopInfoUpdater interface.
1144  Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
1145  Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
1146  IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
1147  auto *Instr = dyn_cast<Instruction>(U.getUser());
1148  return !Instr ||
1149  (Instr->getParent() != CLI->getCond() &&
1150  Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
1151  });
1152 
1153  // In the "exit" block, call the "fini" function.
1154  Builder.SetInsertPoint(CLI->getExit(),
1155  CLI->getExit()->getTerminator()->getIterator());
1156  Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
1157 
1158  // Add the barrier if requested.
1159  if (NeedsBarrier)
1160  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
1161  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1162  /* CheckCancelFlag */ false);
1163 
1164  CLI->assertOK();
1165  return CLI;
1166 }
1167 
1170  llvm::Value *BufSize, llvm::Value *CpyBuf,
1171  llvm::Value *CpyFn, llvm::Value *DidIt) {
1172  if (!updateToLocation(Loc))
1173  return Loc.IP;
1174 
1175  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1176  Value *Ident = getOrCreateIdent(SrcLocStr);
1177  Value *ThreadId = getOrCreateThreadID(Ident);
1178 
1179  llvm::Value *DidItLD = Builder.CreateLoad(DidIt);
1180 
1181  Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
1182 
1183  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
1184  Builder.CreateCall(Fn, Args);
1185 
1186  return Builder.saveIP();
1187 }
1188 
1191  BodyGenCallbackTy BodyGenCB,
1192  FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
1193 
1194  if (!updateToLocation(Loc))
1195  return Loc.IP;
1196 
1197  // If needed (i.e. not null), initialize `DidIt` with 0
1198  if (DidIt) {
1199  Builder.CreateStore(Builder.getInt32(0), DidIt);
1200  }
1201 
1202  Directive OMPD = Directive::OMPD_single;
1203  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1204  Value *Ident = getOrCreateIdent(SrcLocStr);
1205  Value *ThreadId = getOrCreateThreadID(Ident);
1206  Value *Args[] = {Ident, ThreadId};
1207 
1208  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
1209  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1210 
1211  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
1212  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1213 
1214  // generates the following:
1215  // if (__kmpc_single()) {
1216  // .... single region ...
1217  // __kmpc_end_single
1218  // }
1219 
1220  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1221  /*Conditional*/ true, /*hasFinalize*/ true);
1222 }
1223 
1225  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
1226  FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
1227 
1228  if (!updateToLocation(Loc))
1229  return Loc.IP;
1230 
1231  Directive OMPD = Directive::OMPD_critical;
1232  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1233  Value *Ident = getOrCreateIdent(SrcLocStr);
1234  Value *ThreadId = getOrCreateThreadID(Ident);
1235  Value *LockVar = getOMPCriticalRegionLock(CriticalName);
1236  Value *Args[] = {Ident, ThreadId, LockVar};
1237 
1239  Function *RTFn = nullptr;
1240  if (HintInst) {
1241  // Add Hint to entry Args and create call
1242  EnterArgs.push_back(HintInst);
1243  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
1244  } else {
1245  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
1246  }
1247  Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
1248 
1249  Function *ExitRTLFn =
1250  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
1251  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1252 
1253  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1254  /*Conditional*/ false, /*hasFinalize*/ true);
1255 }
1256 
1257 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
1258  Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
1259  BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
1260  bool HasFinalize) {
1261 
1262  if (HasFinalize)
1263  FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false});
1264 
1265  // Create inlined region's entry and body blocks, in preparation
1266  // for conditional creation
1267  BasicBlock *EntryBB = Builder.GetInsertBlock();
1268  Instruction *SplitPos = EntryBB->getTerminator();
1269  if (!isa_and_nonnull<BranchInst>(SplitPos))
1270  SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
1271  BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
1272  BasicBlock *FiniBB =
1273  EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
1274 
1275  Builder.SetInsertPoint(EntryBB->getTerminator());
1276  emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
1277 
1278  // generate body
1279  BodyGenCB(/* AllocaIP */ InsertPointTy(),
1280  /* CodeGenIP */ Builder.saveIP(), *FiniBB);
1281 
1282  // If we didn't emit a branch to FiniBB during body generation, it means
1283  // FiniBB is unreachable (e.g. while(1);). stop generating all the
1284  // unreachable blocks, and remove anything we are not going to use.
1285  auto SkipEmittingRegion = FiniBB->hasNPredecessors(0);
1286  if (SkipEmittingRegion) {
1287  FiniBB->eraseFromParent();
1288  ExitCall->eraseFromParent();
1289  // Discard finalization if we have it.
1290  if (HasFinalize) {
1291  assert(!FinalizationStack.empty() &&
1292  "Unexpected finalization stack state!");
1293  FinalizationStack.pop_back();
1294  }
1295  } else {
1296  // emit exit call and do any needed finalization.
1297  auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
1298  assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
1299  FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
1300  "Unexpected control flow graph state!!");
1301  emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
1302  assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
1303  "Unexpected Control Flow State!");
1304  MergeBlockIntoPredecessor(FiniBB);
1305  }
1306 
1307  // If we are skipping the region of a non conditional, remove the exit
1308  // block, and clear the builder's insertion point.
1309  assert(SplitPos->getParent() == ExitBB &&
1310  "Unexpected Insertion point location!");
1311  if (!Conditional && SkipEmittingRegion) {
1312  ExitBB->eraseFromParent();
1313  Builder.ClearInsertionPoint();
1314  } else {
1315  auto merged = MergeBlockIntoPredecessor(ExitBB);
1316  BasicBlock *ExitPredBB = SplitPos->getParent();
1317  auto InsertBB = merged ? ExitPredBB : ExitBB;
1318  if (!isa_and_nonnull<BranchInst>(SplitPos))
1319  SplitPos->eraseFromParent();
1320  Builder.SetInsertPoint(InsertBB);
1321  }
1322 
1323  return Builder.saveIP();
1324 }
1325 
1326 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
1327  Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
1328 
1329  // if nothing to do, Return current insertion point.
1330  if (!Conditional)
1331  return Builder.saveIP();
1332 
1333  BasicBlock *EntryBB = Builder.GetInsertBlock();
1334  Value *CallBool = Builder.CreateIsNotNull(EntryCall);
1335  auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
1336  auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
1337 
1338  // Emit thenBB and set the Builder's insertion point there for
1339  // body generation next. Place the block after the current block.
1340  Function *CurFn = EntryBB->getParent();
1341  CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB);
1342 
1343  // Move Entry branch to end of ThenBB, and replace with conditional
1344  // branch (If-stmt)
1345  Instruction *EntryBBTI = EntryBB->getTerminator();
1346  Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
1347  EntryBBTI->removeFromParent();
1348  Builder.SetInsertPoint(UI);
1349  Builder.Insert(EntryBBTI);
1350  UI->eraseFromParent();
1351  Builder.SetInsertPoint(ThenBB->getTerminator());
1352 
1353  // return an insertion point to ExitBB.
1354  return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
1355 }
1356 
1357 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
1358  omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
1359  bool HasFinalize) {
1360 
1361  Builder.restoreIP(FinIP);
1362 
1363  // If there is finalization to do, emit it before the exit call
1364  if (HasFinalize) {
1365  assert(!FinalizationStack.empty() &&
1366  "Unexpected finalization stack state!");
1367 
1368  FinalizationInfo Fi = FinalizationStack.pop_back_val();
1369  assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
1370 
1371  Fi.FiniCB(FinIP);
1372 
1373  BasicBlock *FiniBB = FinIP.getBlock();
1374  Instruction *FiniBBTI = FiniBB->getTerminator();
1375 
1376  // set Builder IP for call creation
1377  Builder.SetInsertPoint(FiniBBTI);
1378  }
1379 
1380  // place the Exitcall as last instruction before Finalization block terminator
1381  ExitCall->removeFromParent();
1382  Builder.Insert(ExitCall);
1383 
1384  return IRBuilder<>::InsertPoint(ExitCall->getParent(),
1385  ExitCall->getIterator());
1386 }
1387 
1389  InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
1390  llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
1391  if (!IP.isSet())
1392  return IP;
1393 
1395 
1396  // creates the following CFG structure
1397  // OMP_Entry : (MasterAddr != PrivateAddr)?
1398  // F T
1399  // | \
1400  // | copin.not.master
1401  // | /
1402  // v /
1403  // copyin.not.master.end
1404  // |
1405  // v
1406  // OMP.Entry.Next
1407 
1408  BasicBlock *OMP_Entry = IP.getBlock();
1409  Function *CurFn = OMP_Entry->getParent();
1410  BasicBlock *CopyBegin =
1411  BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
1412  BasicBlock *CopyEnd = nullptr;
1413 
1414  // If entry block is terminated, split to preserve the branch to following
1415  // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
1416  if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
1417  CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
1418  "copyin.not.master.end");
1419  OMP_Entry->getTerminator()->eraseFromParent();
1420  } else {
1421  CopyEnd =
1422  BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
1423  }
1424 
1425  Builder.SetInsertPoint(OMP_Entry);
1426  Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
1427  Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
1428  Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
1429  Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
1430 
1431  Builder.SetInsertPoint(CopyBegin);
1432  if (BranchtoEnd)
1433  Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
1434 
1435  return Builder.saveIP();
1436 }
1437 
1440  std::string Name) {
1442  Builder.restoreIP(Loc.IP);
1443 
1444  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1445  Value *Ident = getOrCreateIdent(SrcLocStr);
1446  Value *ThreadId = getOrCreateThreadID(Ident);
1447  Value *Args[] = {ThreadId, Size, Allocator};
1448 
1449  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
1450 
1451  return Builder.CreateCall(Fn, Args, Name);
1452 }
1453 
1456  std::string Name) {
1458  Builder.restoreIP(Loc.IP);
1459 
1460  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1461  Value *Ident = getOrCreateIdent(SrcLocStr);
1462  Value *ThreadId = getOrCreateThreadID(Ident);
1463  Value *Args[] = {ThreadId, Addr, Allocator};
1464  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
1465  return Builder.CreateCall(Fn, Args, Name);
1466 }
1467 
1469  const LocationDescription &Loc, llvm::Value *Pointer,
1472  Builder.restoreIP(Loc.IP);
1473 
1474  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1475  Value *Ident = getOrCreateIdent(SrcLocStr);
1476  Value *ThreadId = getOrCreateThreadID(Ident);
1477  Constant *ThreadPrivateCache =
1478  getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
1479  llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
1480 
1481  Function *Fn =
1482  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
1483 
1484  return Builder.CreateCall(Fn, Args);
1485 }
1486 
1487 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
1488  StringRef FirstSeparator,
1489  StringRef Separator) {
1490  SmallString<128> Buffer;
1491  llvm::raw_svector_ostream OS(Buffer);
1492  StringRef Sep = FirstSeparator;
1493  for (StringRef Part : Parts) {
1494  OS << Sep << Part;
1495  Sep = Separator;
1496  }
1497  return OS.str().str();
1498 }
1499 
1500 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
1501  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
1502  // TODO: Replace the twine arg with stringref to get rid of the conversion
1503  // logic. However This is taken from current implementation in clang as is.
1504  // Since this method is used in many places exclusively for OMP internal use
1505  // we will keep it as is for temporarily until we move all users to the
1506  // builder and then, if possible, fix it everywhere in one go.
1507  SmallString<256> Buffer;
1508  llvm::raw_svector_ostream Out(Buffer);
1509  Out << Name;
1510  StringRef RuntimeName = Out.str();
1511  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
1512  if (Elem.second) {
1513  assert(Elem.second->getType()->getPointerElementType() == Ty &&
1514  "OMP internal variable has different type than requested");
1515  } else {
1516  // TODO: investigate the appropriate linkage type used for the global
1517  // variable for possibly changing that to internal or private, or maybe
1518  // create different versions of the function for different OMP internal
1519  // variables.
1520  Elem.second = new llvm::GlobalVariable(
1521  M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage,
1522  llvm::Constant::getNullValue(Ty), Elem.first(),
1523  /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1524  AddressSpace);
1525  }
1526 
1527  return Elem.second;
1528 }
1529 
1530 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
1531  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1532  std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
1533  return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
1534 }
1535 
1536 // Create all simple and struct types exposed by the runtime and remember
1537 // the llvm::PointerTypes of them for easy access later.
1538 void OpenMPIRBuilder::initializeTypes(Module &M) {
1539  LLVMContext &Ctx = M.getContext();
1540  StructType *T;
1541 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
1542 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
1543  VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
1544  VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
1545 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
1546  VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
1547  VarName##Ptr = PointerType::getUnqual(VarName);
1548 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
1549  T = StructType::getTypeByName(Ctx, StructName); \
1550  if (!T) \
1551  T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
1552  VarName = T; \
1553  VarName##Ptr = PointerType::getUnqual(T);
1554 #include "llvm/Frontend/OpenMP/OMPKinds.def"
1555 }
1556 
1559  SmallVectorImpl<BasicBlock *> &BlockVector) {
1561  BlockSet.insert(EntryBB);
1562  BlockSet.insert(ExitBB);
1563 
1564  Worklist.push_back(EntryBB);
1565  while (!Worklist.empty()) {
1566  BasicBlock *BB = Worklist.pop_back_val();
1567  BlockVector.push_back(BB);
1568  for (BasicBlock *SuccBB : successors(BB))
1569  if (BlockSet.insert(SuccBB).second)
1570  Worklist.push_back(SuccBB);
1571  }
1572 }
1573 
1575 #ifndef NDEBUG
1576  if (!IsValid)
1577  return;
1578 
1579  // Verify standard control-flow we use for OpenMP loops.
1580  assert(Preheader);
1581  assert(isa<BranchInst>(Preheader->getTerminator()) &&
1582  "Preheader must terminate with unconditional branch");
1583  assert(Preheader->getSingleSuccessor() == Header &&
1584  "Preheader must jump to header");
1585 
1586  assert(Header);
1587  assert(isa<BranchInst>(Header->getTerminator()) &&
1588  "Header must terminate with unconditional branch");
1589  assert(Header->getSingleSuccessor() == Cond &&
1590  "Header must jump to exiting block");
1591 
1592  assert(Cond);
1593  assert(Cond->getSinglePredecessor() == Header &&
1594  "Exiting block only reachable from header");
1595 
1596  assert(isa<BranchInst>(Cond->getTerminator()) &&
1597  "Exiting block must terminate with conditional branch");
1598  assert(size(successors(Cond)) == 2 &&
1599  "Exiting block must have two successors");
1600  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
1601  "Exiting block's first successor jump to the body");
1602  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
1603  "Exiting block's second successor must exit the loop");
1604 
1605  assert(Body);
1606  assert(Body->getSinglePredecessor() == Cond &&
1607  "Body only reachable from exiting block");
1608 
1609  assert(Latch);
1610  assert(isa<BranchInst>(Latch->getTerminator()) &&
1611  "Latch must terminate with unconditional branch");
1612  assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
1613 
1614  assert(Exit);
1615  assert(isa<BranchInst>(Exit->getTerminator()) &&
1616  "Exit block must terminate with unconditional branch");
1617  assert(Exit->getSingleSuccessor() == After &&
1618  "Exit block must jump to after block");
1619 
1620  assert(After);
1621  assert(After->getSinglePredecessor() == Exit &&
1622  "After block only reachable from exit block");
1623 
1624  Instruction *IndVar = getIndVar();
1625  assert(IndVar && "Canonical induction variable not found?");
1626  assert(isa<IntegerType>(IndVar->getType()) &&
1627  "Induction variable must be an integer");
1628  assert(cast<PHINode>(IndVar)->getParent() == Header &&
1629  "Induction variable must be a PHI in the loop header");
1630 
1631  Value *TripCount = getTripCount();
1632  assert(TripCount && "Loop trip count not found?");
1633  assert(IndVar->getType() == TripCount->getType() &&
1634  "Trip count and induction variable must have the same type");
1635 
1636  auto *CmpI = cast<CmpInst>(&Cond->front());
1637  assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
1638  "Exit condition must be a signed less-than comparison");
1639  assert(CmpI->getOperand(0) == IndVar &&
1640  "Exit condition must compare the induction variable");
1641  assert(CmpI->getOperand(1) == TripCount &&
1642  "Exit condition must compare with the trip count");
1643 #endif
1644 }
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:3006
bool empty() const
Definition: Function.h:753
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:286
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
Definition: OMPIRBuilder.h:755
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:54
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:21
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:248
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
Definition: OMPIRBuilder.h:770
void assertOK() const
Consistency self-check.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas) const
Compute the set of input values and output values for the code.
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
unsigned less than
Definition: InstrTypes.h:747
void push_back(const T &Elt)
Definition: SmallVector.h:379
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
This class represents a function call, abstracting a target machine's calling convention.
The two locations do not alias at all.
Definition: AliasAnalysis.h:87
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:176
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:636
bool isEligible() const
Test whether this code extractor is eligible.
A debug info location.
Definition: DebugLoc.h:33
F(f)
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1294
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:198
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
Definition: OMPIRBuilder.h:758
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
Tentative definitions.
Definition: GlobalValue.h:58
void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const
Find the set of allocas whose life ranges are contained within the outlined region.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:347
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:50
Class to represent struct types.
Definition: DerivedTypes.h:212
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
Constant * getOrCreateDefaultSrcLocStr()
Return the (LLVM-IR) string describing the default source location.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:691
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
uint64_t Addr
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:367
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
Definition: OMPIRBuilder.h:750
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:292
bool empty() const
Definition: BasicBlock.h:307
Class to represent function types.
Definition: DerivedTypes.h:102
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:272
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:294
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Debug location.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition: Triple.h:703
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.h:305
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
Definition: OMPIRBuilder.h:745
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:86
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2173
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:427
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:249
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Definition: OMPIRBuilder.h:778
signed less or equal
Definition: InstrTypes.h:752
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:264
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
This function has undefined behavior.
signed less than
Definition: InstrTypes.h:751
This is an important base class in LLVM.
Definition: Constant.h:41
SmallPtrSet< MachineInstr *, 2 > Uses
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
Definition: MDBuilder.cpp:107
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
const Instruction & front() const
Definition: BasicBlock.h:308
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
Definition: Metadata.cpp:1249
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Definition: OMPIRBuilder.h:737
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
Value * getOrCreateIdent(Constant *SrcLocStr, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:375
self_iterator getIterator()
Definition: ilist_node.h:81
Class to represent integer types.
Definition: DerivedTypes.h:40
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:296
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:375
assume Assume Builder
uint64_t Align
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
CanonicalLoopInfo * createStaticWorkshareLoop(const LocationDescription &Loc, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a statically-scheduled workshare loop.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Definition: OMPIRBuilder.h:765
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:1985
void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount)
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
Basic Register Allocator
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:442
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
Definition: OMPIRBuilder.h:785
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:29
iterator end()
Definition: BasicBlock.h:298
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1116
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
AddressSpace
Definition: NVPTXBaseInfo.h:21
Helper that contains information about regions we need to outline during finalization.
Definition: OMPIRBuilder.h:426
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:595
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:867
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:165
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
unsigned less or equal
Definition: InstrTypes.h:748
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:608
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Class to represented the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.h:717
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:73
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:585
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1288
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1171
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:212
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:197
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:139
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:295
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1378
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:129
#define I(x, y, z)
Definition: MD5.cpp:59
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:724
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
const std::string to_string(const T &Value)
Definition: ScopedPrinter.h:61
SmallVector< MachineOperand, 4 > Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1479
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:60
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
LLVM Value Representation.
Definition: Value.h:75
succ_range successors(Instruction *I)
Definition: CFG.h:260
static const Function * getParent(const Value *V)
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1556
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
Constant * getOrCreateSrcLocStr(StringRef LocStr)
Return the (LLVM-IR) string describing the source location LocStr.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
void finalize(bool AllowExtractorSinking=false)
Finalize the underlying module, e.g., by outlining regions.
iterator insertAfter(iterator where, pointer New)
Definition: ilist.h:237
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, llvm::Value *DidIt)
Generator for '#omp single'.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
const BasicBlock * getParent() const
Definition: Instruction.h:94
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:74
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
an instruction to allocate memory on the stack
Definition: Instructions.h:61
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:302