LLVM  15.0.0git
OMPIRBuilder.cpp
Go to the documentation of this file.
1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/PassManager.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/MC/TargetRegistry.h"
40 
41 #include <cstdint>
42 
43 #define DEBUG_TYPE "openmp-ir-builder"
44 
45 using namespace llvm;
46 using namespace omp;
47 
48 static cl::opt<bool>
49  OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
50  cl::desc("Use optimistic attributes describing "
51  "'as-if' properties of runtime calls."),
52  cl::init(false));
53 
55  "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
56  cl::desc("Factor for the unroll threshold to account for code "
57  "simplifications still taking place"),
58  cl::init(1.5));
59 
60 #ifndef NDEBUG
61 /// Return whether IP1 and IP2 are ambiguous, i.e. that inserting instructions
62 /// at position IP1 may change the meaning of IP2 or vice-versa. This is because
63 /// an InsertPoint stores the instruction before something is inserted. For
64 /// instance, if both point to the same instruction, two IRBuilders alternating
65 /// creating instruction will cause the instructions to be interleaved.
68  if (!IP1.isSet() || !IP2.isSet())
69  return false;
70  return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
71 }
72 
74  // Valid ordered/unordered and base algorithm combinations.
75  switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
117  break;
118  default:
119  return false;
120  }
121 
122  // Must not set both monotonicity modifiers at the same time.
123  OMPScheduleType MonotonicityFlags =
125  if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
126  return false;
127 
128  return true;
129 }
130 #endif
131 
132 /// Determine which scheduling algorithm to use, determined from schedule clause
133 /// arguments.
134 static OMPScheduleType
135 getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks,
136  bool HasSimdModifier) {
137  // Currently, the default schedule it static.
138  switch (ClauseKind) {
139  case OMP_SCHEDULE_Default:
140  case OMP_SCHEDULE_Static:
141  return HasChunks ? OMPScheduleType::BaseStaticChunked
143  case OMP_SCHEDULE_Dynamic:
145  case OMP_SCHEDULE_Guided:
146  return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
148  case OMP_SCHEDULE_Auto:
150  case OMP_SCHEDULE_Runtime:
151  return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
153  }
154  llvm_unreachable("unhandled schedule clause argument");
155 }
156 
157 /// Adds ordering modifier flags to schedule type.
158 static OMPScheduleType
160  bool HasOrderedClause) {
161  assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
163  "Must not have ordering nor monotonicity flags already set");
164 
165  OMPScheduleType OrderingModifier = HasOrderedClause
168  OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
169 
170  // Unsupported combinations
171  if (OrderingScheduleType ==
174  else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
177 
178  return OrderingScheduleType;
179 }
180 
181 /// Adds monotonicity modifier flags to schedule type.
182 static OMPScheduleType
184  bool HasSimdModifier, bool HasMonotonic,
185  bool HasNonmonotonic, bool HasOrderedClause) {
186  assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
188  "Must not have monotonicity flags already set");
189  assert((!HasMonotonic || !HasNonmonotonic) &&
190  "Monotonic and Nonmonotonic are contradicting each other");
191 
192  if (HasMonotonic) {
193  return ScheduleType | OMPScheduleType::ModifierMonotonic;
194  } else if (HasNonmonotonic) {
195  return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
196  } else {
197  // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description.
198  // If the static schedule kind is specified or if the ordered clause is
199  // specified, and if the nonmonotonic modifier is not specified, the
200  // effect is as if the monotonic modifier is specified. Otherwise, unless
201  // the monotonic modifier is specified, the effect is as if the
202  // nonmonotonic modifier is specified.
203  OMPScheduleType BaseScheduleType =
204  ScheduleType & ~OMPScheduleType::ModifierMask;
205  if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
206  (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
207  HasOrderedClause) {
208  // The monotonic is used by default in openmp runtime library, so no need
209  // to set it.
210  return ScheduleType;
211  } else {
212  return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
213  }
214  }
215 }
216 
217 /// Determine the schedule type using schedule and ordering clause arguments.
218 static OMPScheduleType
219 computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks,
220  bool HasSimdModifier, bool HasMonotonicModifier,
221  bool HasNonmonotonicModifier, bool HasOrderedClause) {
222  OMPScheduleType BaseSchedule =
223  getOpenMPBaseScheduleType(ClauseKind, HasChunks, HasSimdModifier);
224  OMPScheduleType OrderedSchedule =
225  getOpenMPOrderingScheduleType(BaseSchedule, HasOrderedClause);
227  OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
228  HasNonmonotonicModifier, HasOrderedClause);
229 
231  return Result;
232 }
233 
234 /// Make \p Source branch to \p Target.
235 ///
236 /// Handles two situations:
237 /// * \p Source already has an unconditional branch.
238 /// * \p Source is a degenerate block (no terminator because the BB is
239 /// the current head of the IR construction).
241  if (Instruction *Term = Source->getTerminator()) {
242  auto *Br = cast<BranchInst>(Term);
243  assert(!Br->isConditional() &&
244  "BB's terminator must be an unconditional branch (or degenerate)");
245  BasicBlock *Succ = Br->getSuccessor(0);
246  Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
247  Br->setSuccessor(0, Target);
248  return;
249  }
250 
251  auto *NewBr = BranchInst::Create(Target, Source);
252  NewBr->setDebugLoc(DL);
253 }
254 
256  bool CreateBranch) {
257  assert(New->getFirstInsertionPt() == New->begin() &&
258  "Target BB must not have PHI nodes");
259 
260  // Move instructions to new block.
261  BasicBlock *Old = IP.getBlock();
262  New->getInstList().splice(New->begin(), Old->getInstList(), IP.getPoint(),
263  Old->end());
264 
265  if (CreateBranch)
266  BranchInst::Create(New, Old);
267 }
268 
269 void llvm::spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch) {
270  DebugLoc DebugLoc = Builder.getCurrentDebugLocation();
271  BasicBlock *Old = Builder.GetInsertBlock();
272 
273  spliceBB(Builder.saveIP(), New, CreateBranch);
274  if (CreateBranch)
275  Builder.SetInsertPoint(Old->getTerminator());
276  else
277  Builder.SetInsertPoint(Old);
278 
279  // SetInsertPoint also updates the Builder's debug location, but we want to
280  // keep the one the Builder was configured to use.
281  Builder.SetCurrentDebugLocation(DebugLoc);
282 }
283 
285  llvm::Twine Name) {
286  BasicBlock *Old = IP.getBlock();
288  Old->getContext(), Name.isTriviallyEmpty() ? Old->getName() : Name,
289  Old->getParent(), Old->getNextNode());
290  spliceBB(IP, New, CreateBranch);
291  New->replaceSuccessorsPhiUsesWith(Old, New);
292  return New;
293 }
294 
296  llvm::Twine Name) {
297  DebugLoc DebugLoc = Builder.getCurrentDebugLocation();
298  BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name);
299  if (CreateBranch)
300  Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
301  else
302  Builder.SetInsertPoint(Builder.GetInsertBlock());
303  // SetInsertPoint also updates the Builder's debug location, but we want to
304  // keep the one the Builder was configured to use.
305  Builder.SetCurrentDebugLocation(DebugLoc);
306  return New;
307 }
308 
310  llvm::Twine Name) {
311  DebugLoc DebugLoc = Builder.getCurrentDebugLocation();
312  BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name);
313  if (CreateBranch)
314  Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
315  else
316  Builder.SetInsertPoint(Builder.GetInsertBlock());
317  // SetInsertPoint also updates the Builder's debug location, but we want to
318  // keep the one the Builder was configured to use.
319  Builder.SetCurrentDebugLocation(DebugLoc);
320  return New;
321 }
322 
324  llvm::Twine Suffix) {
325  BasicBlock *Old = Builder.GetInsertBlock();
326  return splitBB(Builder, CreateBranch, Old->getName() + Suffix);
327 }
328 
330  LLVMContext &Ctx = Fn.getContext();
331 
332  // Get the function's current attributes.
333  auto Attrs = Fn.getAttributes();
334  auto FnAttrs = Attrs.getFnAttrs();
335  auto RetAttrs = Attrs.getRetAttrs();
337  for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
338  ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo));
339 
340 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
341 #include "llvm/Frontend/OpenMP/OMPKinds.def"
342 
343  // Add attributes to the function declaration.
344  switch (FnID) {
345 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
346  case Enum: \
347  FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
348  RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
349  for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
350  ArgAttrs[ArgNo] = \
351  ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
352  Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
353  break;
354 #include "llvm/Frontend/OpenMP/OMPKinds.def"
355  default:
356  // Attributes are optional.
357  break;
358  }
359 }
360 
363  FunctionType *FnTy = nullptr;
364  Function *Fn = nullptr;
365 
366  // Try to find the declation in the module first.
367  switch (FnID) {
368 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
369  case Enum: \
370  FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
371  IsVarArg); \
372  Fn = M.getFunction(Str); \
373  break;
374 #include "llvm/Frontend/OpenMP/OMPKinds.def"
375  }
376 
377  if (!Fn) {
378  // Create a new declaration if we need one.
379  switch (FnID) {
380 #define OMP_RTL(Enum, Str, ...) \
381  case Enum: \
382  Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
383  break;
384 #include "llvm/Frontend/OpenMP/OMPKinds.def"
385  }
386 
387  // Add information if the runtime function takes a callback function
388  if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
389  if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
390  LLVMContext &Ctx = Fn->getContext();
391  MDBuilder MDB(Ctx);
392  // Annotate the callback behavior of the runtime function:
393  // - The callback callee is argument number 2 (microtask).
394  // - The first two arguments of the callback callee are unknown (-1).
395  // - All variadic arguments to the runtime function are passed to the
396  // callback callee.
397  Fn->addMetadata(
398  LLVMContext::MD_callback,
400  2, {-1, -1}, /* VarArgsArePassed */ true)}));
401  }
402  }
403 
404  LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
405  << " with type " << *Fn->getFunctionType() << "\n");
406  addAttributes(FnID, *Fn);
407 
408  } else {
409  LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
410  << " with type " << *Fn->getFunctionType() << "\n");
411  }
412 
413  assert(Fn && "Failed to create OpenMP runtime function");
414 
415  // Cast the function to the expected type if necessary
417  return {FnTy, C};
418 }
419 
421  FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
422  auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
423  assert(Fn && "Failed to create OpenMP runtime function pointer");
424  return Fn;
425 }
426 
427 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
428 
430  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
432  SmallVector<OutlineInfo, 16> DeferredOutlines;
433  for (OutlineInfo &OI : OutlineInfos) {
434  // Skip functions that have not finalized yet; may happen with nested
435  // function generation.
436  if (Fn && OI.getFunction() != Fn) {
437  DeferredOutlines.push_back(OI);
438  continue;
439  }
440 
441  ParallelRegionBlockSet.clear();
442  Blocks.clear();
443  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
444 
445  Function *OuterFn = OI.getFunction();
446  CodeExtractorAnalysisCache CEAC(*OuterFn);
447  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
448  /* AggregateArgs */ true,
449  /* BlockFrequencyInfo */ nullptr,
450  /* BranchProbabilityInfo */ nullptr,
451  /* AssumptionCache */ nullptr,
452  /* AllowVarArgs */ true,
453  /* AllowAlloca */ true,
454  /* AllocaBlock*/ OI.OuterAllocaBB,
455  /* Suffix */ ".omp_par");
456 
457  LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
458  LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
459  << " Exit: " << OI.ExitBB->getName() << "\n");
460  assert(Extractor.isEligible() &&
461  "Expected OpenMP outlining to be possible!");
462 
463  for (auto *V : OI.ExcludeArgsFromAggregate)
464  Extractor.excludeArgFromAggregate(V);
465 
466  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
467 
468  LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
469  LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
470  assert(OutlinedFn->getReturnType()->isVoidTy() &&
471  "OpenMP outlined functions should not return a value!");
472 
473  // For compability with the clang CG we move the outlined function after the
474  // one with the parallel region.
475  OutlinedFn->removeFromParent();
476  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
477 
478  // Remove the artificial entry introduced by the extractor right away, we
479  // made our own entry block after all.
480  {
481  BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
482  assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
483  assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
484  // Move instructions from the to-be-deleted ArtificialEntry to the entry
485  // basic block of the parallel region. CodeExtractor generates
486  // instructions to unwrap the aggregate argument and may sink
487  // allocas/bitcasts for values that are solely used in the outlined region
488  // and do not escape.
489  assert(!ArtificialEntry.empty() &&
490  "Expected instructions to add in the outlined region entry");
491  for (BasicBlock::reverse_iterator It = ArtificialEntry.rbegin(),
492  End = ArtificialEntry.rend();
493  It != End;) {
494  Instruction &I = *It;
495  It++;
496 
497  if (I.isTerminator())
498  continue;
499 
500  I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
501  }
502 
503  OI.EntryBB->moveBefore(&ArtificialEntry);
504  ArtificialEntry.eraseFromParent();
505  }
506  assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
507  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
508 
509  // Run a user callback, e.g. to add attributes.
510  if (OI.PostOutlineCB)
511  OI.PostOutlineCB(*OutlinedFn);
512  }
513 
514  // Remove work items that have been completed.
515  OutlineInfos = std::move(DeferredOutlines);
516 }
517 
519  assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
520 }
521 
523  IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
524  auto *GV =
525  new GlobalVariable(M, I32Ty,
526  /* isConstant = */ true, GlobalValue::WeakODRLinkage,
527  ConstantInt::get(I32Ty, Value), Name);
528  GV->setVisibility(GlobalValue::HiddenVisibility);
529 
530  return GV;
531 }
532 
534  uint32_t SrcLocStrSize,
535  IdentFlag LocFlags,
536  unsigned Reserve2Flags) {
537  // Enable "C-mode".
538  LocFlags |= OMP_IDENT_FLAG_KMPC;
539 
540  Constant *&Ident =
541  IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
542  if (!Ident) {
543  Constant *I32Null = ConstantInt::getNullValue(Int32);
544  Constant *IdentData[] = {I32Null,
545  ConstantInt::get(Int32, uint32_t(LocFlags)),
546  ConstantInt::get(Int32, Reserve2Flags),
547  ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
548  Constant *Initializer =
549  ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
550 
551  // Look for existing encoding of the location + flags, not needed but
552  // minimizes the difference to the existing solution while we transition.
553  for (GlobalVariable &GV : M.getGlobalList())
554  if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
555  if (GV.getInitializer() == Initializer)
556  Ident = &GV;
557 
558  if (!Ident) {
559  auto *GV = new GlobalVariable(
560  M, OpenMPIRBuilder::Ident,
561  /* isConstant = */ true, GlobalValue::PrivateLinkage, Initializer, "",
563  M.getDataLayout().getDefaultGlobalsAddressSpace());
564  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
565  GV->setAlignment(Align(8));
566  Ident = GV;
567  }
568  }
569 
570  return ConstantExpr::getPointerBitCastOrAddrSpaceCast(Ident, IdentPtr);
571 }
572 
574  uint32_t &SrcLocStrSize) {
575  SrcLocStrSize = LocStr.size();
576  Constant *&SrcLocStr = SrcLocStrMap[LocStr];
577  if (!SrcLocStr) {
578  Constant *Initializer =
579  ConstantDataArray::getString(M.getContext(), LocStr);
580 
581  // Look for existing encoding of the location, not needed but minimizes the
582  // difference to the existing solution while we transition.
583  for (GlobalVariable &GV : M.getGlobalList())
584  if (GV.isConstant() && GV.hasInitializer() &&
585  GV.getInitializer() == Initializer)
586  return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
587 
588  SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
589  /* AddressSpace */ 0, &M);
590  }
591  return SrcLocStr;
592 }
593 
595  StringRef FileName,
596  unsigned Line, unsigned Column,
597  uint32_t &SrcLocStrSize) {
598  SmallString<128> Buffer;
599  Buffer.push_back(';');
600  Buffer.append(FileName);
601  Buffer.push_back(';');
602  Buffer.append(FunctionName);
603  Buffer.push_back(';');
604  Buffer.append(std::to_string(Line));
605  Buffer.push_back(';');
606  Buffer.append(std::to_string(Column));
607  Buffer.push_back(';');
608  Buffer.push_back(';');
609  return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize);
610 }
611 
612 Constant *
614  StringRef UnknownLoc = ";unknown;unknown;0;0;;";
615  return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
616 }
617 
619  uint32_t &SrcLocStrSize,
620  Function *F) {
621  DILocation *DIL = DL.get();
622  if (!DIL)
623  return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
624  StringRef FileName = M.getName();
625  if (DIFile *DIF = DIL->getFile())
626  if (Optional<StringRef> Source = DIF->getSource())
627  FileName = *Source;
628  StringRef Function = DIL->getScope()->getSubprogram()->getName();
629  if (Function.empty() && F)
630  Function = F->getName();
631  return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
632  DIL->getColumn(), SrcLocStrSize);
633 }
634 
636  uint32_t &SrcLocStrSize) {
637  return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize,
638  Loc.IP.getBlock()->getParent());
639 }
640 
642  return Builder.CreateCall(
643  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
644  "omp_global_thread_num");
645 }
646 
649  bool ForceSimpleCall, bool CheckCancelFlag) {
650  if (!updateToLocation(Loc))
651  return Loc.IP;
652  return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
653 }
654 
657  bool ForceSimpleCall, bool CheckCancelFlag) {
658  // Build call __kmpc_cancel_barrier(loc, thread_id) or
659  // __kmpc_barrier(loc, thread_id);
660 
661  IdentFlag BarrierLocFlags;
662  switch (Kind) {
663  case OMPD_for:
664  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
665  break;
666  case OMPD_sections:
667  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
668  break;
669  case OMPD_single:
670  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
671  break;
672  case OMPD_barrier:
673  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
674  break;
675  default:
676  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
677  break;
678  }
679 
680  uint32_t SrcLocStrSize;
681  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
682  Value *Args[] = {
683  getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
684  getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
685 
686  // If we are in a cancellable parallel region, barriers are cancellation
687  // points.
688  // TODO: Check why we would force simple calls or to ignore the cancel flag.
689  bool UseCancelBarrier =
690  !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
691 
692  Value *Result =
693  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
694  UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
695  : OMPRTL___kmpc_barrier),
696  Args);
697 
698  if (UseCancelBarrier && CheckCancelFlag)
699  emitCancelationCheckImpl(Result, OMPD_parallel);
700 
701  return Builder.saveIP();
702 }
703 
706  Value *IfCondition,
707  omp::Directive CanceledDirective) {
708  if (!updateToLocation(Loc))
709  return Loc.IP;
710 
711  // LLVM utilities like blocks with terminators.
712  auto *UI = Builder.CreateUnreachable();
713 
714  Instruction *ThenTI = UI, *ElseTI = nullptr;
715  if (IfCondition)
716  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
717  Builder.SetInsertPoint(ThenTI);
718 
719  Value *CancelKind = nullptr;
720  switch (CanceledDirective) {
721 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
722  case DirectiveEnum: \
723  CancelKind = Builder.getInt32(Value); \
724  break;
725 #include "llvm/Frontend/OpenMP/OMPKinds.def"
726  default:
727  llvm_unreachable("Unknown cancel kind!");
728  }
729 
730  uint32_t SrcLocStrSize;
731  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
732  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
733  Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
734  Value *Result = Builder.CreateCall(
735  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
736  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
737  if (CanceledDirective == OMPD_parallel) {
739  Builder.restoreIP(IP);
740  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
741  omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
742  /* CheckCancelFlag */ false);
743  }
744  };
745 
746  // The actual cancel logic is shared with others, e.g., cancel_barriers.
747  emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
748 
749  // Update the insertion point and remove the terminator we introduced.
750  Builder.SetInsertPoint(UI->getParent());
751  UI->eraseFromParent();
752 
753  return Builder.saveIP();
754 }
755 
757  uint64_t Size, int32_t Flags,
759  Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
760  Type *Int32Ty = Type::getInt32Ty(M.getContext());
761  Type *SizeTy = M.getDataLayout().getIntPtrType(M.getContext());
762 
763  Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);
764 
765  // Create the constant string used to look up the symbol in the device.
766  auto *Str =
767  new llvm::GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
769  ".omp_offloading.entry_name");
770  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
771 
772  // Construct the offloading entry.
773  Constant *EntryData[] = {
776  ConstantInt::get(SizeTy, Size),
777  ConstantInt::get(Int32Ty, Flags),
779  };
780  Constant *EntryInitializer =
781  ConstantStruct::get(OpenMPIRBuilder::OffloadEntry, EntryData);
782 
783  auto *Entry = new GlobalVariable(
784  M, OpenMPIRBuilder::OffloadEntry,
785  /* isConstant = */ true, GlobalValue::WeakAnyLinkage, EntryInitializer,
786  ".omp_offloading.entry." + Name, nullptr, GlobalValue::NotThreadLocal,
787  M.getDataLayout().getDefaultGlobalsAddressSpace());
788 
789  // The entry has to be created in the section the linker expects it to be.
790  Entry->setSection(SectionName);
791  Entry->setAlignment(Align(1));
792 }
793 
795  omp::Directive CanceledDirective,
796  FinalizeCallbackTy ExitCB) {
797  assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
798  "Unexpected cancellation!");
799 
800  // For a cancel barrier we create two new blocks.
801  BasicBlock *BB = Builder.GetInsertBlock();
802  BasicBlock *NonCancellationBlock;
803  if (Builder.GetInsertPoint() == BB->end()) {
804  // TODO: This branch will not be needed once we moved to the
805  // OpenMPIRBuilder codegen completely.
806  NonCancellationBlock = BasicBlock::Create(
807  BB->getContext(), BB->getName() + ".cont", BB->getParent());
808  } else {
809  NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
810  BB->getTerminator()->eraseFromParent();
811  Builder.SetInsertPoint(BB);
812  }
813  BasicBlock *CancellationBlock = BasicBlock::Create(
814  BB->getContext(), BB->getName() + ".cncl", BB->getParent());
815 
816  // Jump to them based on the return value.
817  Value *Cmp = Builder.CreateIsNull(CancelFlag);
818  Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
819  /* TODO weight */ nullptr, nullptr);
820 
821  // From the cancellation block we finalize all variables and go to the
822  // post finalization block that is known to the FiniCB callback.
823  Builder.SetInsertPoint(CancellationBlock);
824  if (ExitCB)
825  ExitCB(Builder.saveIP());
826  auto &FI = FinalizationStack.back();
827  FI.FiniCB(Builder.saveIP());
828 
829  // The continuation block is where code generation continues.
830  Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
831 }
832 
834  const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
835  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
836  FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
837  omp::ProcBindKind ProcBind, bool IsCancellable) {
838  assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous");
839 
840  if (!updateToLocation(Loc))
841  return Loc.IP;
842 
843  uint32_t SrcLocStrSize;
844  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
845  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
846  Value *ThreadID = getOrCreateThreadID(Ident);
847 
848  if (NumThreads) {
849  // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
850  Value *Args[] = {
851  Ident, ThreadID,
852  Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
853  Builder.CreateCall(
854  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
855  }
856 
857  if (ProcBind != OMP_PROC_BIND_default) {
858  // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
859  Value *Args[] = {
860  Ident, ThreadID,
861  ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
862  Builder.CreateCall(
863  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
864  }
865 
866  BasicBlock *InsertBB = Builder.GetInsertBlock();
867  Function *OuterFn = InsertBB->getParent();
868 
869  // Save the outer alloca block because the insertion iterator may get
870  // invalidated and we still need this later.
871  BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
872 
873  // Vector to remember instructions we used only during the modeling but which
874  // we want to delete at the end.
875  SmallVector<Instruction *, 4> ToBeDeleted;
876 
877  // Change the location to the outer alloca insertion point to create and
878  // initialize the allocas we pass into the parallel region.
879  Builder.restoreIP(OuterAllocaIP);
880  AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
881  AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
882 
883  // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
884  // program, otherwise we only need them for modeling purposes to get the
885  // associated arguments in the outlined function. In the former case,
886  // initialize the allocas properly, in the latter case, delete them later.
887  if (IfCondition) {
888  Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
889  Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
890  } else {
891  ToBeDeleted.push_back(TIDAddr);
892  ToBeDeleted.push_back(ZeroAddr);
893  }
894 
895  // Create an artificial insertion point that will also ensure the blocks we
896  // are about to split are not degenerated.
897  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
898 
899  Instruction *ThenTI = UI, *ElseTI = nullptr;
900  if (IfCondition)
901  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
902 
903  BasicBlock *ThenBB = ThenTI->getParent();
904  BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
905  BasicBlock *PRegBodyBB =
906  PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
907  BasicBlock *PRegPreFiniBB =
908  PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
909  BasicBlock *PRegExitBB =
910  PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
911 
912  auto FiniCBWrapper = [&](InsertPointTy IP) {
913  // Hide "open-ended" blocks from the given FiniCB by setting the right jump
914  // target to the region exit block.
915  if (IP.getBlock()->end() == IP.getPoint()) {
917  Builder.restoreIP(IP);
918  Instruction *I = Builder.CreateBr(PRegExitBB);
919  IP = InsertPointTy(I->getParent(), I->getIterator());
920  }
921  assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
922  IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
923  "Unexpected insertion point for finalization call!");
924  return FiniCB(IP);
925  };
926 
927  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
928 
929  // Generate the privatization allocas in the block that will become the entry
930  // of the outlined function.
931  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
932  InsertPointTy InnerAllocaIP = Builder.saveIP();
933 
934  AllocaInst *PrivTIDAddr =
935  Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
936  Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
937 
938  // Add some fake uses for OpenMP provided arguments.
939  ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
940  Instruction *ZeroAddrUse =
941  Builder.CreateLoad(Int32, ZeroAddr, "zero.addr.use");
942  ToBeDeleted.push_back(ZeroAddrUse);
943 
944  // ThenBB
945  // |
946  // V
947  // PRegionEntryBB <- Privatization allocas are placed here.
948  // |
949  // V
950  // PRegionBodyBB <- BodeGen is invoked here.
951  // |
952  // V
953  // PRegPreFiniBB <- The block we will start finalization from.
954  // |
955  // V
956  // PRegionExitBB <- A common exit to simplify block collection.
957  //
958 
959  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
960 
961  // Let the caller create the body.
962  assert(BodyGenCB && "Expected body generation callback!");
963  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
964  BodyGenCB(InnerAllocaIP, CodeGenIP);
965 
966  LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
967 
968  FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
969  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
970  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
971  llvm::LLVMContext &Ctx = F->getContext();
972  MDBuilder MDB(Ctx);
973  // Annotate the callback behavior of the __kmpc_fork_call:
974  // - The callback callee is argument number 2 (microtask).
975  // - The first two arguments of the callback callee are unknown (-1).
976  // - All variadic arguments to the __kmpc_fork_call are passed to the
977  // callback callee.
978  F->addMetadata(
979  llvm::LLVMContext::MD_callback,
981  Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
982  /* VarArgsArePassed */ true)}));
983  }
984  }
985 
986  OutlineInfo OI;
987  OI.PostOutlineCB = [=](Function &OutlinedFn) {
988  // Add some known attributes.
989  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
990  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
991  OutlinedFn.addFnAttr(Attribute::NoUnwind);
992  OutlinedFn.addFnAttr(Attribute::NoRecurse);
993 
994  assert(OutlinedFn.arg_size() >= 2 &&
995  "Expected at least tid and bounded tid as arguments");
996  unsigned NumCapturedVars =
997  OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
998 
999  CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
1000  CI->getParent()->setName("omp_parallel");
1001  Builder.SetInsertPoint(CI);
1002 
1003  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
1004  Value *ForkCallArgs[] = {
1005  Ident, Builder.getInt32(NumCapturedVars),
1006  Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
1007 
1008  SmallVector<Value *, 16> RealArgs;
1009  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1010  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
1011 
1012  Builder.CreateCall(RTLFn, RealArgs);
1013 
1014  LLVM_DEBUG(dbgs() << "With fork_call placed: "
1015  << *Builder.GetInsertBlock()->getParent() << "\n");
1016 
1017  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
1018 
1019  // Initialize the local TID stack location with the argument value.
1020  Builder.SetInsertPoint(PrivTID);
1021  Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
1022  Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
1023 
1024  // If no "if" clause was present we do not need the call created during
1025  // outlining, otherwise we reuse it in the serialized parallel region.
1026  if (!ElseTI) {
1027  CI->eraseFromParent();
1028  } else {
1029 
1030  // If an "if" clause was present we are now generating the serialized
1031  // version into the "else" branch.
1032  Builder.SetInsertPoint(ElseTI);
1033 
1034  // Build calls __kmpc_serialized_parallel(&Ident, GTid);
1035  Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
1036  Builder.CreateCall(
1037  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
1038  SerializedParallelCallArgs);
1039 
1040  // OutlinedFn(&GTid, &zero, CapturedStruct);
1041  CI->removeFromParent();
1042  Builder.Insert(CI);
1043 
1044  // __kmpc_end_serialized_parallel(&Ident, GTid);
1045  Value *EndArgs[] = {Ident, ThreadID};
1046  Builder.CreateCall(
1047  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
1048  EndArgs);
1049 
1050  LLVM_DEBUG(dbgs() << "With serialized parallel region: "
1051  << *Builder.GetInsertBlock()->getParent() << "\n");
1052  }
1053 
1054  for (Instruction *I : ToBeDeleted)
1055  I->eraseFromParent();
1056  };
1057 
1058  // Adjust the finalization stack, verify the adjustment, and call the
1059  // finalize function a last time to finalize values between the pre-fini
1060  // block and the exit block if we left the parallel "the normal way".
1061  auto FiniInfo = FinalizationStack.pop_back_val();
1062  (void)FiniInfo;
1063  assert(FiniInfo.DK == OMPD_parallel &&
1064  "Unexpected finalization stack state!");
1065 
1066  Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
1067 
1068  InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
1069  FiniCB(PreFiniIP);
1070 
1071  OI.OuterAllocaBB = OuterAllocaBlock;
1072  OI.EntryBB = PRegEntryBB;
1073  OI.ExitBB = PRegExitBB;
1074 
1075  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
1077  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1078 
1079  // Ensure a single exit node for the outlined region by creating one.
1080  // We might have multiple incoming edges to the exit now due to finalizations,
1081  // e.g., cancel calls that cause the control flow to leave the region.
1082  BasicBlock *PRegOutlinedExitBB = PRegExitBB;
1083  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
1084  PRegOutlinedExitBB->setName("omp.par.outlined.exit");
1085  Blocks.push_back(PRegOutlinedExitBB);
1086 
1087  CodeExtractorAnalysisCache CEAC(*OuterFn);
1088  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
1089  /* AggregateArgs */ false,
1090  /* BlockFrequencyInfo */ nullptr,
1091  /* BranchProbabilityInfo */ nullptr,
1092  /* AssumptionCache */ nullptr,
1093  /* AllowVarArgs */ true,
1094  /* AllowAlloca */ true,
1095  /* AllocationBlock */ OuterAllocaBlock,
1096  /* Suffix */ ".omp_par");
1097 
1098  // Find inputs to, outputs from the code region.
1099  BasicBlock *CommonExit = nullptr;
1100  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
1101  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1102  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
1103 
1104  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
1105 
1106  FunctionCallee TIDRTLFn =
1107  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1108 
1109  auto PrivHelper = [&](Value &V) {
1110  if (&V == TIDAddr || &V == ZeroAddr) {
1111  OI.ExcludeArgsFromAggregate.push_back(&V);
1112  return;
1113  }
1114 
1116  for (Use &U : V.uses())
1117  if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
1118  if (ParallelRegionBlockSet.count(UserI->getParent()))
1119  Uses.insert(&U);
1120 
1121  // __kmpc_fork_call expects extra arguments as pointers. If the input
1122  // already has a pointer type, everything is fine. Otherwise, store the
1123  // value onto stack and load it back inside the to-be-outlined region. This
1124  // will ensure only the pointer will be passed to the function.
1125  // FIXME: if there are more than 15 trailing arguments, they must be
1126  // additionally packed in a struct.
1127  Value *Inner = &V;
1128  if (!V.getType()->isPointerTy()) {
1130  LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
1131 
1132  Builder.restoreIP(OuterAllocaIP);
1133  Value *Ptr =
1134  Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
1135 
1136  // Store to stack at end of the block that currently branches to the entry
1137  // block of the to-be-outlined region.
1138  Builder.SetInsertPoint(InsertBB,
1139  InsertBB->getTerminator()->getIterator());
1140  Builder.CreateStore(&V, Ptr);
1141 
1142  // Load back next to allocations in the to-be-outlined region.
1143  Builder.restoreIP(InnerAllocaIP);
1144  Inner = Builder.CreateLoad(V.getType(), Ptr);
1145  }
1146 
1147  Value *ReplacementValue = nullptr;
1148  CallInst *CI = dyn_cast<CallInst>(&V);
1149  if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
1150  ReplacementValue = PrivTID;
1151  } else {
1152  Builder.restoreIP(
1153  PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
1154  assert(ReplacementValue &&
1155  "Expected copy/create callback to set replacement value!");
1156  if (ReplacementValue == &V)
1157  return;
1158  }
1159 
1160  for (Use *UPtr : Uses)
1161  UPtr->set(ReplacementValue);
1162  };
1163 
1164  // Reset the inner alloca insertion as it will be used for loading the values
1165  // wrapped into pointers before passing them into the to-be-outlined region.
1166  // Configure it to insert immediately after the fake use of zero address so
1167  // that they are available in the generated body and so that the
1168  // OpenMP-related values (thread ID and zero address pointers) remain leading
1169  // in the argument list.
1170  InnerAllocaIP = IRBuilder<>::InsertPoint(
1171  ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
1172 
1173  // Reset the outer alloca insertion point to the entry of the relevant block
1174  // in case it was invalidated.
1175  OuterAllocaIP = IRBuilder<>::InsertPoint(
1176  OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
1177 
1178  for (Value *Input : Inputs) {
1179  LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
1180  PrivHelper(*Input);
1181  }
1182  LLVM_DEBUG({
1183  for (Value *Output : Outputs)
1184  LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
1185  });
1186  assert(Outputs.empty() &&
1187  "OpenMP outlining should not produce live-out values!");
1188 
1189  LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
1190  LLVM_DEBUG({
1191  for (auto *BB : Blocks)
1192  dbgs() << " PBR: " << BB->getName() << "\n";
1193  });
1194 
1195  // Register the outlined info.
1196  addOutlineInfo(std::move(OI));
1197 
1198  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1199  UI->eraseFromParent();
1200 
1201  return AfterIP;
1202 }
1203 
1205  // Build call void __kmpc_flush(ident_t *loc)
1206  uint32_t SrcLocStrSize;
1207  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1208  Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1209 
1210  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
1211 }
1212 
1214  if (!updateToLocation(Loc))
1215  return;
1216  emitFlush(Loc);
1217 }
1218 
1220  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
1221  // global_tid);
1222  uint32_t SrcLocStrSize;
1223  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1224  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1225  Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
1226 
1227  // Ignore return result until untied tasks are supported.
1228  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1229  Args);
1230 }
1231 
1233  if (!updateToLocation(Loc))
1234  return;
1235  emitTaskwaitImpl(Loc);
1236 }
1237 
1239  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1240  uint32_t SrcLocStrSize;
1241  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1242  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1243  Constant *I32Null = ConstantInt::getNullValue(Int32);
1244  Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1245 
1246  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1247  Args);
1248 }
1249 
1251  if (!updateToLocation(Loc))
1252  return;
1253  emitTaskyieldImpl(Loc);
1254 }
1255 
1257  const LocationDescription &Loc, InsertPointTy AllocaIP,
1259  FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
1260  assert(!isConflictIP(AllocaIP, Loc.IP) && "Dedicated IP allocas required");
1261 
1262  if (!updateToLocation(Loc))
1263  return Loc.IP;
1264 
1265  auto FiniCBWrapper = [&](InsertPointTy IP) {
1266  if (IP.getBlock()->end() != IP.getPoint())
1267  return FiniCB(IP);
1268  // This must be done otherwise any nested constructs using FinalizeOMPRegion
1269  // will fail because that function requires the Finalization Basic Block to
1270  // have a terminator, which is already removed by EmitOMPRegionBody.
1271  // IP is currently at cancelation block.
1272  // We need to backtrack to the condition block to fetch
1273  // the exit block and create a branch from cancelation
1274  // to exit block.
1276  Builder.restoreIP(IP);
1277  auto *CaseBB = IP.getBlock()->getSinglePredecessor();
1278  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1279  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1280  Instruction *I = Builder.CreateBr(ExitBB);
1281  IP = InsertPointTy(I->getParent(), I->getIterator());
1282  return FiniCB(IP);
1283  };
1284 
1285  FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
1286 
1287  // Each section is emitted as a switch case
1288  // Each finalization callback is handled from clang.EmitOMPSectionDirective()
1289  // -> OMP.createSection() which generates the IR for each section
1290  // Iterate through all sections and emit a switch construct:
1291  // switch (IV) {
1292  // case 0:
1293  // <SectionStmt[0]>;
1294  // break;
1295  // ...
1296  // case <NumSection> - 1:
1297  // <SectionStmt[<NumSection> - 1]>;
1298  // break;
1299  // }
1300  // ...
1301  // section_loop.after:
1302  // <FiniCB>;
1303  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
1304  Builder.restoreIP(CodeGenIP);
1305  BasicBlock *Continue =
1306  splitBBWithSuffix(Builder, /*CreateBranch=*/false, ".sections.after");
1307  Function *CurFn = Continue->getParent();
1308  SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, Continue);
1309 
1310  unsigned CaseNumber = 0;
1311  for (auto SectionCB : SectionCBs) {
1312  BasicBlock *CaseBB = BasicBlock::Create(
1313  M.getContext(), "omp_section_loop.body.case", CurFn, Continue);
1314  SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
1315  Builder.SetInsertPoint(CaseBB);
1316  BranchInst *CaseEndBr = Builder.CreateBr(Continue);
1317  SectionCB(InsertPointTy(),
1318  {CaseEndBr->getParent(), CaseEndBr->getIterator()});
1319  CaseNumber++;
1320  }
1321  // remove the existing terminator from body BB since there can be no
1322  // terminators after switch/case
1323  };
1324  // Loop body ends here
1325  // LowerBound, UpperBound, and STride for createCanonicalLoop
1326  Type *I32Ty = Type::getInt32Ty(M.getContext());
1327  Value *LB = ConstantInt::get(I32Ty, 0);
1328  Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
1329  Value *ST = ConstantInt::get(I32Ty, 1);
1330  llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
1331  Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
1332  InsertPointTy AfterIP =
1333  applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait);
1334 
1335  // Apply the finalization callback in LoopAfterBB
1336  auto FiniInfo = FinalizationStack.pop_back_val();
1337  assert(FiniInfo.DK == OMPD_sections &&
1338  "Unexpected finalization stack state!");
1339  if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
1340  Builder.restoreIP(AfterIP);
1341  BasicBlock *FiniBB =
1342  splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini");
1343  CB(Builder.saveIP());
1344  AfterIP = {FiniBB, FiniBB->begin()};
1345  }
1346 
1347  return AfterIP;
1348 }
1349 
1352  BodyGenCallbackTy BodyGenCB,
1353  FinalizeCallbackTy FiniCB) {
1354  if (!updateToLocation(Loc))
1355  return Loc.IP;
1356 
1357  auto FiniCBWrapper = [&](InsertPointTy IP) {
1358  if (IP.getBlock()->end() != IP.getPoint())
1359  return FiniCB(IP);
1360  // This must be done otherwise any nested constructs using FinalizeOMPRegion
1361  // will fail because that function requires the Finalization Basic Block to
1362  // have a terminator, which is already removed by EmitOMPRegionBody.
1363  // IP is currently at cancelation block.
1364  // We need to backtrack to the condition block to fetch
1365  // the exit block and create a branch from cancelation
1366  // to exit block.
1368  Builder.restoreIP(IP);
1369  auto *CaseBB = Loc.IP.getBlock();
1370  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1371  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1372  Instruction *I = Builder.CreateBr(ExitBB);
1373  IP = InsertPointTy(I->getParent(), I->getIterator());
1374  return FiniCB(IP);
1375  };
1376 
1377  Directive OMPD = Directive::OMPD_sections;
1378  // Since we are using Finalization Callback here, HasFinalize
1379  // and IsCancellable have to be true
1380  return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
1381  /*Conditional*/ false, /*hasFinalize*/ true,
1382  /*IsCancellable*/ true);
1383 }
1384 
1385 /// Create a function with a unique name and a "void (i8*, i8*)" signature in
1386 /// the given module and return it.
1388  Type *VoidTy = Type::getVoidTy(M.getContext());
1389  Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
1390  auto *FuncTy =
1391  FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
1393  M.getDataLayout().getDefaultGlobalsAddressSpace(),
1394  ".omp.reduction.func", &M);
1395 }
1396 
1398  const LocationDescription &Loc, InsertPointTy AllocaIP,
1399  ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
1400  for (const ReductionInfo &RI : ReductionInfos) {
1401  (void)RI;
1402  assert(RI.Variable && "expected non-null variable");
1403  assert(RI.PrivateVariable && "expected non-null private variable");
1404  assert(RI.ReductionGen && "expected non-null reduction generator callback");
1405  assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
1406  "expected variables and their private equivalents to have the same "
1407  "type");
1408  assert(RI.Variable->getType()->isPointerTy() &&
1409  "expected variables to be pointers");
1410  }
1411 
1412  if (!updateToLocation(Loc))
1413  return InsertPointTy();
1414 
1415  BasicBlock *InsertBlock = Loc.IP.getBlock();
1416  BasicBlock *ContinuationBlock =
1417  InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
1418  InsertBlock->getTerminator()->eraseFromParent();
1419 
1420  // Create and populate array of type-erased pointers to private reduction
1421  // values.
1422  unsigned NumReductions = ReductionInfos.size();
1423  Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
1424  Builder.restoreIP(AllocaIP);
1425  Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
1426 
1427  Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
1428 
1429  for (auto En : enumerate(ReductionInfos)) {
1430  unsigned Index = En.index();
1431  const ReductionInfo &RI = En.value();
1432  Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
1433  RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
1434  Value *Casted =
1435  Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
1436  "private.red.var." + Twine(Index) + ".casted");
1437  Builder.CreateStore(Casted, RedArrayElemPtr);
1438  }
1439 
1440  // Emit a call to the runtime function that orchestrates the reduction.
1441  // Declare the reduction function in the process.
1442  Function *Func = Builder.GetInsertBlock()->getParent();
1443  Module *Module = Func->getParent();
1444  Value *RedArrayPtr =
1445  Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
1446  uint32_t SrcLocStrSize;
1447  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1448  bool CanGenerateAtomic =
1449  llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
1450  return RI.AtomicReductionGen;
1451  });
1452  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
1453  CanGenerateAtomic
1454  ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
1455  : IdentFlag(0));
1456  Value *ThreadId = getOrCreateThreadID(Ident);
1457  Constant *NumVariables = Builder.getInt32(NumReductions);
1458  const DataLayout &DL = Module->getDataLayout();
1459  unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
1460  Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
1461  Function *ReductionFunc = getFreshReductionFunc(*Module);
1462  Value *Lock = getOMPCriticalRegionLock(".reduction");
1463  Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
1464  IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
1465  : RuntimeFunction::OMPRTL___kmpc_reduce);
1466  CallInst *ReduceCall =
1467  Builder.CreateCall(ReduceFunc,
1468  {Ident, ThreadId, NumVariables, RedArraySize,
1469  RedArrayPtr, ReductionFunc, Lock},
1470  "reduce");
1471 
1472  // Create final reduction entry blocks for the atomic and non-atomic case.
1473  // Emit IR that dispatches control flow to one of the blocks based on the
1474  // reduction supporting the atomic mode.
1475  BasicBlock *NonAtomicRedBlock =
1476  BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
1477  BasicBlock *AtomicRedBlock =
1478  BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
1479  SwitchInst *Switch =
1480  Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
1481  Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
1482  Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
1483 
1484  // Populate the non-atomic reduction using the elementwise reduction function.
1485  // This loads the elements from the global and private variables and reduces
1486  // them before storing back the result to the global variable.
1487  Builder.SetInsertPoint(NonAtomicRedBlock);
1488  for (auto En : enumerate(ReductionInfos)) {
1489  const ReductionInfo &RI = En.value();
1490  Type *ValueType = RI.ElementType;
1491  Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
1492  "red.value." + Twine(En.index()));
1493  Value *PrivateRedValue =
1494  Builder.CreateLoad(ValueType, RI.PrivateVariable,
1495  "red.private.value." + Twine(En.index()));
1496  Value *Reduced;
1497  Builder.restoreIP(
1498  RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
1499  if (!Builder.GetInsertBlock())
1500  return InsertPointTy();
1501  Builder.CreateStore(Reduced, RI.Variable);
1502  }
1503  Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
1504  IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
1505  : RuntimeFunction::OMPRTL___kmpc_end_reduce);
1506  Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
1507  Builder.CreateBr(ContinuationBlock);
1508 
1509  // Populate the atomic reduction using the atomic elementwise reduction
1510  // function. There are no loads/stores here because they will be happening
1511  // inside the atomic elementwise reduction.
1512  Builder.SetInsertPoint(AtomicRedBlock);
1513  if (CanGenerateAtomic) {
1514  for (const ReductionInfo &RI : ReductionInfos) {
1515  Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.ElementType,
1516  RI.Variable, RI.PrivateVariable));
1517  if (!Builder.GetInsertBlock())
1518  return InsertPointTy();
1519  }
1520  Builder.CreateBr(ContinuationBlock);
1521  } else {
1522  Builder.CreateUnreachable();
1523  }
1524 
1525  // Populate the outlined reduction function using the elementwise reduction
1526  // function. Partial values are extracted from the type-erased array of
1527  // pointers to private variables.
1528  BasicBlock *ReductionFuncBlock =
1529  BasicBlock::Create(Module->getContext(), "", ReductionFunc);
1530  Builder.SetInsertPoint(ReductionFuncBlock);
1531  Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
1532  RedArrayTy->getPointerTo());
1533  Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
1534  RedArrayTy->getPointerTo());
1535  for (auto En : enumerate(ReductionInfos)) {
1536  const ReductionInfo &RI = En.value();
1537  Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
1538  RedArrayTy, LHSArrayPtr, 0, En.index());
1539  Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
1540  Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
1541  Value *LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
1542  Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
1543  RedArrayTy, RHSArrayPtr, 0, En.index());
1544  Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
1545  Value *RHSPtr =
1546  Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
1547  Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
1548  Value *Reduced;
1549  Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
1550  if (!Builder.GetInsertBlock())
1551  return InsertPointTy();
1552  Builder.CreateStore(Reduced, LHSPtr);
1553  }
1554  Builder.CreateRetVoid();
1555 
1556  Builder.SetInsertPoint(ContinuationBlock);
1557  return Builder.saveIP();
1558 }
1559 
1562  BodyGenCallbackTy BodyGenCB,
1563  FinalizeCallbackTy FiniCB) {
1564 
1565  if (!updateToLocation(Loc))
1566  return Loc.IP;
1567 
1568  Directive OMPD = Directive::OMPD_master;
1569  uint32_t SrcLocStrSize;
1570  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1571  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1572  Value *ThreadId = getOrCreateThreadID(Ident);
1573  Value *Args[] = {Ident, ThreadId};
1574 
1575  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
1576  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1577 
1578  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
1579  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1580 
1581  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1582  /*Conditional*/ true, /*hasFinalize*/ true);
1583 }
1584 
1587  BodyGenCallbackTy BodyGenCB,
1588  FinalizeCallbackTy FiniCB, Value *Filter) {
1589  if (!updateToLocation(Loc))
1590  return Loc.IP;
1591 
1592  Directive OMPD = Directive::OMPD_masked;
1593  uint32_t SrcLocStrSize;
1594  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1595  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1596  Value *ThreadId = getOrCreateThreadID(Ident);
1597  Value *Args[] = {Ident, ThreadId, Filter};
1598  Value *ArgsEnd[] = {Ident, ThreadId};
1599 
1600  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
1601  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1602 
1603  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
1604  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
1605 
1606  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1607  /*Conditional*/ true, /*hasFinalize*/ true);
1608 }
1609 
1611  DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
1612  BasicBlock *PostInsertBefore, const Twine &Name) {
1613  Module *M = F->getParent();
1614  LLVMContext &Ctx = M->getContext();
1615  Type *IndVarTy = TripCount->getType();
1616 
1617  // Create the basic block structure.
1618  BasicBlock *Preheader =
1619  BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
1620  BasicBlock *Header =
1621  BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
1622  BasicBlock *Cond =
1623  BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
1624  BasicBlock *Body =
1625  BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
1626  BasicBlock *Latch =
1627  BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
1628  BasicBlock *Exit =
1629  BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
1630  BasicBlock *After =
1631  BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
1632 
1633  // Use specified DebugLoc for new instructions.
1634  Builder.SetCurrentDebugLocation(DL);
1635 
1636  Builder.SetInsertPoint(Preheader);
1637  Builder.CreateBr(Header);
1638 
1639  Builder.SetInsertPoint(Header);
1640  PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
1641  IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
1642  Builder.CreateBr(Cond);
1643 
1644  Builder.SetInsertPoint(Cond);
1645  Value *Cmp =
1646  Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
1647  Builder.CreateCondBr(Cmp, Body, Exit);
1648 
1649  Builder.SetInsertPoint(Body);
1650  Builder.CreateBr(Latch);
1651 
1652  Builder.SetInsertPoint(Latch);
1653  Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
1654  "omp_" + Name + ".next", /*HasNUW=*/true);
1655  Builder.CreateBr(Header);
1656  IndVarPHI->addIncoming(Next, Latch);
1657 
1658  Builder.SetInsertPoint(Exit);
1659  Builder.CreateBr(After);
1660 
1661  // Remember and return the canonical control flow.
1662  LoopInfos.emplace_front();
1663  CanonicalLoopInfo *CL = &LoopInfos.front();
1664 
1665  CL->Header = Header;
1666  CL->Cond = Cond;
1667  CL->Latch = Latch;
1668  CL->Exit = Exit;
1669 
1670 #ifndef NDEBUG
1671  CL->assertOK();
1672 #endif
1673  return CL;
1674 }
1675 
1678  LoopBodyGenCallbackTy BodyGenCB,
1679  Value *TripCount, const Twine &Name) {
1680  BasicBlock *BB = Loc.IP.getBlock();
1681  BasicBlock *NextBB = BB->getNextNode();
1682 
1683  CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
1684  NextBB, NextBB, Name);
1685  BasicBlock *After = CL->getAfter();
1686 
1687  // If location is not set, don't connect the loop.
1688  if (updateToLocation(Loc)) {
1689  // Split the loop at the insertion point: Branch to the preheader and move
1690  // every following instruction to after the loop (the After BB). Also, the
1691  // new successor is the loop's after block.
1692  spliceBB(Builder, After, /*CreateBranch=*/false);
1693  Builder.CreateBr(CL->getPreheader());
1694  }
1695 
1696  // Emit the body content. We do it after connecting the loop to the CFG to
1697  // avoid that the callback encounters degenerate BBs.
1698  BodyGenCB(CL->getBodyIP(), CL->getIndVar());
1699 
1700 #ifndef NDEBUG
1701  CL->assertOK();
1702 #endif
1703  return CL;
1704 }
1705 
1707  const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
1708  Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
1709  InsertPointTy ComputeIP, const Twine &Name) {
1710 
1711  // Consider the following difficulties (assuming 8-bit signed integers):
1712  // * Adding \p Step to the loop counter which passes \p Stop may overflow:
1713  // DO I = 1, 100, 50
1714  /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
1715  // DO I = 100, 0, -128
1716 
1717  // Start, Stop and Step must be of the same integer type.
1718  auto *IndVarTy = cast<IntegerType>(Start->getType());
1719  assert(IndVarTy == Stop->getType() && "Stop type mismatch");
1720  assert(IndVarTy == Step->getType() && "Step type mismatch");
1721 
1722  LocationDescription ComputeLoc =
1723  ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
1724  updateToLocation(ComputeLoc);
1725 
1726  ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
1727  ConstantInt *One = ConstantInt::get(IndVarTy, 1);
1728 
1729  // Like Step, but always positive.
1730  Value *Incr = Step;
1731 
1732  // Distance between Start and Stop; always positive.
1733  Value *Span;
1734 
1735  // Condition whether there are no iterations are executed at all, e.g. because
1736  // UB < LB.
1737  Value *ZeroCmp;
1738 
1739  if (IsSigned) {
1740  // Ensure that increment is positive. If not, negate and invert LB and UB.
1741  Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
1742  Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
1743  Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
1744  Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
1745  Span = Builder.CreateSub(UB, LB, "", false, true);
1746  ZeroCmp = Builder.CreateICmp(
1747  InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
1748  } else {
1749  Span = Builder.CreateSub(Stop, Start, "", true);
1750  ZeroCmp = Builder.CreateICmp(
1751  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
1752  }
1753 
1754  Value *CountIfLooping;
1755  if (InclusiveStop) {
1756  CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
1757  } else {
1758  // Avoid incrementing past stop since it could overflow.
1759  Value *CountIfTwo = Builder.CreateAdd(
1760  Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
1761  Value *OneCmp = Builder.CreateICmp(
1762  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
1763  CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
1764  }
1765  Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
1766  "omp_" + Name + ".tripcount");
1767 
1768  auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
1769  Builder.restoreIP(CodeGenIP);
1770  Value *Span = Builder.CreateMul(IV, Step);
1771  Value *IndVar = Builder.CreateAdd(Span, Start);
1772  BodyGenCB(Builder.saveIP(), IndVar);
1773  };
1774  LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
1775  return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
1776 }
1777 
1778 // Returns an LLVM function to call for initializing loop bounds using OpenMP
1779 // static scheduling depending on `type`. Only i32 and i64 are supported by the
1780 // runtime. Always interpret integers as unsigned similarly to
1781 // CanonicalLoopInfo.
1783  OpenMPIRBuilder &OMPBuilder) {
1784  unsigned Bitwidth = Ty->getIntegerBitWidth();
1785  if (Bitwidth == 32)
1786  return OMPBuilder.getOrCreateRuntimeFunction(
1787  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1788  if (Bitwidth == 64)
1789  return OMPBuilder.getOrCreateRuntimeFunction(
1790  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1791  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1792 }
1793 
1795 OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
1796  InsertPointTy AllocaIP,
1797  bool NeedsBarrier) {
1798  assert(CLI->isValid() && "Requires a valid canonical loop");
1799  assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
1800  "Require dedicated allocate IP");
1801 
1802  // Set up the source location value for OpenMP runtime.
1803  Builder.restoreIP(CLI->getPreheaderIP());
1804  Builder.SetCurrentDebugLocation(DL);
1805 
1806  uint32_t SrcLocStrSize;
1807  Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
1808  Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1809 
1810  // Declare useful OpenMP runtime functions.
1811  Value *IV = CLI->getIndVar();
1812  Type *IVTy = IV->getType();
1813  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
1814  FunctionCallee StaticFini =
1815  getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1816 
1817  // Allocate space for computed loop bounds as expected by the "init" function.
1818  Builder.restoreIP(AllocaIP);
1819  Type *I32Type = Type::getInt32Ty(M.getContext());
1820  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1821  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1822  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1823  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1824 
1825  // At the end of the preheader, prepare for calling the "init" function by
1826  // storing the current loop bounds into the allocated space. A canonical loop
1827  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1828  // and produces an inclusive upper bound.
1829  Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
1830  Constant *Zero = ConstantInt::get(IVTy, 0);
1831  Constant *One = ConstantInt::get(IVTy, 1);
1832  Builder.CreateStore(Zero, PLowerBound);
1833  Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
1834  Builder.CreateStore(UpperBound, PUpperBound);
1835  Builder.CreateStore(One, PStride);
1836 
1837  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1838 
1839  Constant *SchedulingType = ConstantInt::get(
1840  I32Type, static_cast<int>(OMPScheduleType::UnorderedStatic));
1841 
1842  // Call the "init" function and update the trip count of the loop with the
1843  // value it produced.
1844  Builder.CreateCall(StaticInit,
1845  {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1846  PUpperBound, PStride, One, Zero});
1847  Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
1848  Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
1849  Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
1850  Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
1851  CLI->setTripCount(TripCount);
1852 
1853  // Update all uses of the induction variable except the one in the condition
1854  // block that compares it with the actual upper bound, and the increment in
1855  // the latch block.
1856 
1857  CLI->mapIndVar([&](Instruction *OldIV) -> Value * {
1858  Builder.SetInsertPoint(CLI->getBody(),
1859  CLI->getBody()->getFirstInsertionPt());
1860  Builder.SetCurrentDebugLocation(DL);
1861  return Builder.CreateAdd(OldIV, LowerBound);
1862  });
1863 
1864  // In the "exit" block, call the "fini" function.
1865  Builder.SetInsertPoint(CLI->getExit(),
1866  CLI->getExit()->getTerminator()->getIterator());
1867  Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
1868 
1869  // Add the barrier if requested.
1870  if (NeedsBarrier)
1871  createBarrier(LocationDescription(Builder.saveIP(), DL),
1872  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1873  /* CheckCancelFlag */ false);
1874 
1875  InsertPointTy AfterIP = CLI->getAfterIP();
1876  CLI->invalidate();
1877 
1878  return AfterIP;
1879 }
1880 
1881 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
1882  DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
1883  bool NeedsBarrier, Value *ChunkSize) {
1884  assert(CLI->isValid() && "Requires a valid canonical loop");
1885  assert(ChunkSize && "Chunk size is required");
1886 
1887  LLVMContext &Ctx = CLI->getFunction()->getContext();
1888  Value *IV = CLI->getIndVar();
1889  Value *OrigTripCount = CLI->getTripCount();
1890  Type *IVTy = IV->getType();
1891  assert(IVTy->getIntegerBitWidth() <= 64 &&
1892  "Max supported tripcount bitwidth is 64 bits");
1893  Type *InternalIVTy = IVTy->getIntegerBitWidth() <= 32 ? Type::getInt32Ty(Ctx)
1894  : Type::getInt64Ty(Ctx);
1895  Type *I32Type = Type::getInt32Ty(M.getContext());
1896  Constant *Zero = ConstantInt::get(InternalIVTy, 0);
1897  Constant *One = ConstantInt::get(InternalIVTy, 1);
1898 
1899  // Declare useful OpenMP runtime functions.
1900  FunctionCallee StaticInit =
1901  getKmpcForStaticInitForType(InternalIVTy, M, *this);
1902  FunctionCallee StaticFini =
1903  getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1904 
1905  // Allocate space for computed loop bounds as expected by the "init" function.
1906  Builder.restoreIP(AllocaIP);
1907  Builder.SetCurrentDebugLocation(DL);
1908  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1909  Value *PLowerBound =
1910  Builder.CreateAlloca(InternalIVTy, nullptr, "p.lowerbound");
1911  Value *PUpperBound =
1912  Builder.CreateAlloca(InternalIVTy, nullptr, "p.upperbound");
1913  Value *PStride = Builder.CreateAlloca(InternalIVTy, nullptr, "p.stride");
1914 
1915  // Set up the source location value for the OpenMP runtime.
1916  Builder.restoreIP(CLI->getPreheaderIP());
1917  Builder.SetCurrentDebugLocation(DL);
1918 
1919  // TODO: Detect overflow in ubsan or max-out with current tripcount.
1920  Value *CastedChunkSize =
1921  Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy, "chunksize");
1922  Value *CastedTripCount =
1923  Builder.CreateZExt(OrigTripCount, InternalIVTy, "tripcount");
1924 
1925  Constant *SchedulingType = ConstantInt::get(
1926  I32Type, static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
1927  Builder.CreateStore(Zero, PLowerBound);
1928  Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
1929  Builder.CreateStore(OrigUpperBound, PUpperBound);
1930  Builder.CreateStore(One, PStride);
1931 
1932  // Call the "init" function and update the trip count of the loop with the
1933  // value it produced.
1934  uint32_t SrcLocStrSize;
1935  Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
1936  Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1937  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1938  Builder.CreateCall(StaticInit,
1939  {/*loc=*/SrcLoc, /*global_tid=*/ThreadNum,
1940  /*schedtype=*/SchedulingType, /*plastiter=*/PLastIter,
1941  /*plower=*/PLowerBound, /*pupper=*/PUpperBound,
1942  /*pstride=*/PStride, /*incr=*/One,
1943  /*chunk=*/CastedChunkSize});
1944 
1945  // Load values written by the "init" function.
1946  Value *FirstChunkStart =
1947  Builder.CreateLoad(InternalIVTy, PLowerBound, "omp_firstchunk.lb");
1948  Value *FirstChunkStop =
1949  Builder.CreateLoad(InternalIVTy, PUpperBound, "omp_firstchunk.ub");
1950  Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
1951  Value *ChunkRange =
1952  Builder.CreateSub(FirstChunkEnd, FirstChunkStart, "omp_chunk.range");
1953  Value *NextChunkStride =
1954  Builder.CreateLoad(InternalIVTy, PStride, "omp_dispatch.stride");
1955 
1956  // Create outer "dispatch" loop for enumerating the chunks.
1957  BasicBlock *DispatchEnter = splitBB(Builder, true);
1958  Value *DispatchCounter;
1959  CanonicalLoopInfo *DispatchCLI = createCanonicalLoop(
1960  {Builder.saveIP(), DL},
1961  [&](InsertPointTy BodyIP, Value *Counter) { DispatchCounter = Counter; },
1962  FirstChunkStart, CastedTripCount, NextChunkStride,
1963  /*IsSigned=*/false, /*InclusiveStop=*/false, /*ComputeIP=*/{},
1964  "dispatch");
1965 
1966  // Remember the BasicBlocks of the dispatch loop we need, then invalidate to
1967  // not have to preserve the canonical invariant.
1968  BasicBlock *DispatchBody = DispatchCLI->getBody();
1969  BasicBlock *DispatchLatch = DispatchCLI->getLatch();
1970  BasicBlock *DispatchExit = DispatchCLI->getExit();
1971  BasicBlock *DispatchAfter = DispatchCLI->getAfter();
1972  DispatchCLI->invalidate();
1973 
1974  // Rewire the original loop to become the chunk loop inside the dispatch loop.
1975  redirectTo(DispatchAfter, CLI->getAfter(), DL);
1976  redirectTo(CLI->getExit(), DispatchLatch, DL);
1977  redirectTo(DispatchBody, DispatchEnter, DL);
1978 
1979  // Prepare the prolog of the chunk loop.
1980  Builder.restoreIP(CLI->getPreheaderIP());
1981  Builder.SetCurrentDebugLocation(DL);
1982 
1983  // Compute the number of iterations of the chunk loop.
1984  Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
1985  Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
1986  Value *IsLastChunk =
1987  Builder.CreateICmpUGE(ChunkEnd, CastedTripCount, "omp_chunk.is_last");
1988  Value *CountUntilOrigTripCount =
1989  Builder.CreateSub(CastedTripCount, DispatchCounter);
1990  Value *ChunkTripCount = Builder.CreateSelect(
1991  IsLastChunk, CountUntilOrigTripCount, ChunkRange, "omp_chunk.tripcount");
1992  Value *BackcastedChunkTC =
1993  Builder.CreateTrunc(ChunkTripCount, IVTy, "omp_chunk.tripcount.trunc");
1994  CLI->setTripCount(BackcastedChunkTC);
1995 
1996  // Update all uses of the induction variable except the one in the condition
1997  // block that compares it with the actual upper bound, and the increment in
1998  // the latch block.
1999  Value *BackcastedDispatchCounter =
2000  Builder.CreateTrunc(DispatchCounter, IVTy, "omp_dispatch.iv.trunc");
2001  CLI->mapIndVar([&](Instruction *) -> Value * {
2002  Builder.restoreIP(CLI->getBodyIP());
2003  return Builder.CreateAdd(IV, BackcastedDispatchCounter);
2004  });
2005 
2006  // In the "exit" block, call the "fini" function.
2007  Builder.SetInsertPoint(DispatchExit, DispatchExit->getFirstInsertionPt());
2008  Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
2009 
2010  // Add the barrier if requested.
2011  if (NeedsBarrier)
2012  createBarrier(LocationDescription(Builder.saveIP(), DL), OMPD_for,
2013  /*ForceSimpleCall=*/false, /*CheckCancelFlag=*/false);
2014 
2015 #ifndef NDEBUG
2016  // Even though we currently do not support applying additional methods to it,
2017  // the chunk loop should remain a canonical loop.
2018  CLI->assertOK();
2019 #endif
2020 
2021  return {DispatchAfter, DispatchAfter->getFirstInsertionPt()};
2022 }
2023 
2025  DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
2026  bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind,
2027  llvm::Value *ChunkSize, bool HasSimdModifier, bool HasMonotonicModifier,
2028  bool HasNonmonotonicModifier, bool HasOrderedClause) {
2029  OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType(
2030  SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
2031  HasNonmonotonicModifier, HasOrderedClause);
2032 
2033  bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
2035  switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
2037  assert(!ChunkSize && "No chunk size with static-chunked schedule");
2038  if (IsOrdered)
2039  return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
2040  NeedsBarrier, ChunkSize);
2041  // FIXME: Monotonicity ignored?
2042  return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
2043 
2045  if (IsOrdered)
2046  return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
2047  NeedsBarrier, ChunkSize);
2048  // FIXME: Monotonicity ignored?
2049  return applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier,
2050  ChunkSize);
2051 
2059  assert(!ChunkSize &&
2060  "schedule type does not support user-defined chunk sizes");
2067  return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
2068  NeedsBarrier, ChunkSize);
2069 
2070  default:
2071  llvm_unreachable("Unknown/unimplemented schedule kind");
2072  }
2073 }
2074 
2075 /// Returns an LLVM function to call for initializing loop bounds using OpenMP
2076 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
2077 /// the runtime. Always interpret integers as unsigned similarly to
2078 /// CanonicalLoopInfo.
2079 static FunctionCallee
2081  unsigned Bitwidth = Ty->getIntegerBitWidth();
2082  if (Bitwidth == 32)
2083  return OMPBuilder.getOrCreateRuntimeFunction(
2084  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
2085  if (Bitwidth == 64)
2086  return OMPBuilder.getOrCreateRuntimeFunction(
2087  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
2088  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
2089 }
2090 
2091 /// Returns an LLVM function to call for updating the next loop using OpenMP
2092 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
2093 /// the runtime. Always interpret integers as unsigned similarly to
2094 /// CanonicalLoopInfo.
2095 static FunctionCallee
2097  unsigned Bitwidth = Ty->getIntegerBitWidth();
2098  if (Bitwidth == 32)
2099  return OMPBuilder.getOrCreateRuntimeFunction(
2100  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
2101  if (Bitwidth == 64)
2102  return OMPBuilder.getOrCreateRuntimeFunction(
2103  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
2104  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
2105 }
2106 
2107 /// Returns an LLVM function to call for finalizing the dynamic loop using
2108 /// depending on `type`. Only i32 and i64 are supported by the runtime. Always
2109 /// interpret integers as unsigned similarly to CanonicalLoopInfo.
2110 static FunctionCallee
2112  unsigned Bitwidth = Ty->getIntegerBitWidth();
2113  if (Bitwidth == 32)
2114  return OMPBuilder.getOrCreateRuntimeFunction(
2115  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
2116  if (Bitwidth == 64)
2117  return OMPBuilder.getOrCreateRuntimeFunction(
2118  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
2119  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
2120 }
2121 
2122 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
2123  DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
2124  OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
2125  assert(CLI->isValid() && "Requires a valid canonical loop");
2126  assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
2127  "Require dedicated allocate IP");
2129  "Require valid schedule type");
2130 
2131  bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
2133 
2134  // Set up the source location value for OpenMP runtime.
2135  Builder.SetCurrentDebugLocation(DL);
2136 
2137  uint32_t SrcLocStrSize;
2138  Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
2139  Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2140 
2141  // Declare useful OpenMP runtime functions.
2142  Value *IV = CLI->getIndVar();
2143  Type *IVTy = IV->getType();
2144  FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
2145  FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
2146 
2147  // Allocate space for computed loop bounds as expected by the "init" function.
2148  Builder.restoreIP(AllocaIP);
2149  Type *I32Type = Type::getInt32Ty(M.getContext());
2150  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
2151  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
2152  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
2153  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
2154 
2155  // At the end of the preheader, prepare for calling the "init" function by
2156  // storing the current loop bounds into the allocated space. A canonical loop
2157  // always iterates from 0 to trip-count with step 1. Note that "init" expects
2158  // and produces an inclusive upper bound.
2159  BasicBlock *PreHeader = CLI->getPreheader();
2160  Builder.SetInsertPoint(PreHeader->getTerminator());
2161  Constant *One = ConstantInt::get(IVTy, 1);
2162  Builder.CreateStore(One, PLowerBound);
2163  Value *UpperBound = CLI->getTripCount();
2164  Builder.CreateStore(UpperBound, PUpperBound);
2165  Builder.CreateStore(One, PStride);
2166 
2167  BasicBlock *Header = CLI->getHeader();
2168  BasicBlock *Exit = CLI->getExit();
2169  BasicBlock *Cond = CLI->getCond();
2170  BasicBlock *Latch = CLI->getLatch();
2171  InsertPointTy AfterIP = CLI->getAfterIP();
2172 
2173  // The CLI will be "broken" in the code below, as the loop is no longer
2174  // a valid canonical loop.
2175 
2176  if (!Chunk)
2177  Chunk = One;
2178 
2179  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
2180 
2181  Constant *SchedulingType =
2182  ConstantInt::get(I32Type, static_cast<int>(SchedType));
2183 
2184  // Call the "init" function.
2185  Builder.CreateCall(DynamicInit,
2186  {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
2187  UpperBound, /* step */ One, Chunk});
2188 
2189  // An outer loop around the existing one.
2190  BasicBlock *OuterCond = BasicBlock::Create(
2191  PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
2192  PreHeader->getParent());
2193  // This needs to be 32-bit always, so can't use the IVTy Zero above.
2194  Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
2195  Value *Res =
2196  Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
2197  PLowerBound, PUpperBound, PStride});
2198  Constant *Zero32 = ConstantInt::get(I32Type, 0);
2199  Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
2200  Value *LowerBound =
2201  Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
2202  Builder.CreateCondBr(MoreWork, Header, Exit);
2203 
2204  // Change PHI-node in loop header to use outer cond rather than preheader,
2205  // and set IV to the LowerBound.
2206  Instruction *Phi = &Header->front();
2207  auto *PI = cast<PHINode>(Phi);
2208  PI->setIncomingBlock(0, OuterCond);
2209  PI->setIncomingValue(0, LowerBound);
2210 
2211  // Then set the pre-header to jump to the OuterCond
2212  Instruction *Term = PreHeader->getTerminator();
2213  auto *Br = cast<BranchInst>(Term);
2214  Br->setSuccessor(0, OuterCond);
2215 
2216  // Modify the inner condition:
2217  // * Use the UpperBound returned from the DynamicNext call.
2218  // * jump to the loop outer loop when done with one of the inner loops.
2219  Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
2220  UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
2221  Instruction *Comp = &*Builder.GetInsertPoint();
2222  auto *CI = cast<CmpInst>(Comp);
2223  CI->setOperand(1, UpperBound);
2224  // Redirect the inner exit to branch to outer condition.
2225  Instruction *Branch = &Cond->back();
2226  auto *BI = cast<BranchInst>(Branch);
2227  assert(BI->getSuccessor(1) == Exit);
2228  BI->setSuccessor(1, OuterCond);
2229 
2230  // Call the "fini" function if "ordered" is present in wsloop directive.
2231  if (Ordered) {
2232  Builder.SetInsertPoint(&Latch->back());
2233  FunctionCallee DynamicFini = getKmpcForDynamicFiniForType(IVTy, M, *this);
2234  Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
2235  }
2236 
2237  // Add the barrier if requested.
2238  if (NeedsBarrier) {
2239  Builder.SetInsertPoint(&Exit->back());
2240  createBarrier(LocationDescription(Builder.saveIP(), DL),
2241  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
2242  /* CheckCancelFlag */ false);
2243  }
2244 
2245  CLI->invalidate();
2246  return AfterIP;
2247 }
2248 
2249 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
2250 /// after this \p OldTarget will be orphaned.
2251 static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
2252  BasicBlock *NewTarget, DebugLoc DL) {
2253  for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
2254  redirectTo(Pred, NewTarget, DL);
2255 }
2256 
2257 /// Determine which blocks in \p BBs are reachable from outside and remove the
2258 /// ones that are not reachable from the function.
2260  SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
2261  auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
2262  for (Use &U : BB->uses()) {
2263  auto *UseInst = dyn_cast<Instruction>(U.getUser());
2264  if (!UseInst)
2265  continue;
2266  if (BBsToErase.count(UseInst->getParent()))
2267  continue;
2268  return true;
2269  }
2270  return false;
2271  };
2272 
2273  while (true) {
2274  bool Changed = false;
2275  for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
2276  if (HasRemainingUses(BB)) {
2277  BBsToErase.erase(BB);
2278  Changed = true;
2279  }
2280  }
2281  if (!Changed)
2282  break;
2283  }
2284 
2285  SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
2286  DeleteDeadBlocks(BBVec);
2287 }
2288 
2291  InsertPointTy ComputeIP) {
2292  assert(Loops.size() >= 1 && "At least one loop required");
2293  size_t NumLoops = Loops.size();
2294 
2295  // Nothing to do if there is already just one loop.
2296  if (NumLoops == 1)
2297  return Loops.front();
2298 
2299  CanonicalLoopInfo *Outermost = Loops.front();
2300  CanonicalLoopInfo *Innermost = Loops.back();
2301  BasicBlock *OrigPreheader = Outermost->getPreheader();
2302  BasicBlock *OrigAfter = Outermost->getAfter();
2303  Function *F = OrigPreheader->getParent();
2304 
2305  // Loop control blocks that may become orphaned later.
2306  SmallVector<BasicBlock *, 12> OldControlBBs;
2307  OldControlBBs.reserve(6 * Loops.size());
2308  for (CanonicalLoopInfo *Loop : Loops)
2309  Loop->collectControlBlocks(OldControlBBs);
2310 
2311  // Setup the IRBuilder for inserting the trip count computation.
2312  Builder.SetCurrentDebugLocation(DL);
2313  if (ComputeIP.isSet())
2314  Builder.restoreIP(ComputeIP);
2315  else
2316  Builder.restoreIP(Outermost->getPreheaderIP());
2317 
2318  // Derive the collapsed' loop trip count.
2319  // TODO: Find common/largest indvar type.
2320  Value *CollapsedTripCount = nullptr;
2321  for (CanonicalLoopInfo *L : Loops) {
2322  assert(L->isValid() &&
2323  "All loops to collapse must be valid canonical loops");
2324  Value *OrigTripCount = L->getTripCount();
2325  if (!CollapsedTripCount) {
2326  CollapsedTripCount = OrigTripCount;
2327  continue;
2328  }
2329 
2330  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
2331  CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
2332  {}, /*HasNUW=*/true);
2333  }
2334 
2335  // Create the collapsed loop control flow.
2336  CanonicalLoopInfo *Result =
2337  createLoopSkeleton(DL, CollapsedTripCount, F,
2338  OrigPreheader->getNextNode(), OrigAfter, "collapsed");
2339 
2340  // Build the collapsed loop body code.
2341  // Start with deriving the input loop induction variables from the collapsed
2342  // one, using a divmod scheme. To preserve the original loops' order, the
2343  // innermost loop use the least significant bits.
2344  Builder.restoreIP(Result->getBodyIP());
2345 
2346  Value *Leftover = Result->getIndVar();
2347  SmallVector<Value *> NewIndVars;
2348  NewIndVars.resize(NumLoops);
2349  for (int i = NumLoops - 1; i >= 1; --i) {
2350  Value *OrigTripCount = Loops[i]->getTripCount();
2351 
2352  Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
2353  NewIndVars[i] = NewIndVar;
2354 
2355  Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
2356  }
2357  // Outermost loop gets all the remaining bits.
2358  NewIndVars[0] = Leftover;
2359 
2360  // Construct the loop body control flow.
2361  // We progressively construct the branch structure following in direction of
2362  // the control flow, from the leading in-between code, the loop nest body, the
2363  // trailing in-between code, and rejoining the collapsed loop's latch.
2364  // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
2365  // the ContinueBlock is set, continue with that block. If ContinuePred, use
2366  // its predecessors as sources.
2367  BasicBlock *ContinueBlock = Result->getBody();
2368  BasicBlock *ContinuePred = nullptr;
2369  auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
2370  BasicBlock *NextSrc) {
2371  if (ContinueBlock)
2372  redirectTo(ContinueBlock, Dest, DL);
2373  else
2374  redirectAllPredecessorsTo(ContinuePred, Dest, DL);
2375 
2376  ContinueBlock = nullptr;
2377  ContinuePred = NextSrc;
2378  };
2379 
2380  // The code before the nested loop of each level.
2381  // Because we are sinking it into the nest, it will be executed more often
2382  // that the original loop. More sophisticated schemes could keep track of what
2383  // the in-between code is and instantiate it only once per thread.
2384  for (size_t i = 0; i < NumLoops - 1; ++i)
2385  ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
2386 
2387  // Connect the loop nest body.
2388  ContinueWith(Innermost->getBody(), Innermost->getLatch());
2389 
2390  // The code after the nested loop at each level.
2391  for (size_t i = NumLoops - 1; i > 0; --i)
2392  ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
2393 
2394  // Connect the finished loop to the collapsed loop latch.
2395  ContinueWith(Result->getLatch(), nullptr);
2396 
2397  // Replace the input loops with the new collapsed loop.
2398  redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
2399  redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
2400 
2401  // Replace the input loop indvars with the derived ones.
2402  for (size_t i = 0; i < NumLoops; ++i)
2403  Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
2404 
2405  // Remove unused parts of the input loops.
2406  removeUnusedBlocksFromParent(OldControlBBs);
2407 
2408  for (CanonicalLoopInfo *L : Loops)
2409  L->invalidate();
2410 
2411 #ifndef NDEBUG
2412  Result->assertOK();
2413 #endif
2414  return Result;
2415 }
2416 
2417 std::vector<CanonicalLoopInfo *>
2419  ArrayRef<Value *> TileSizes) {
2420  assert(TileSizes.size() == Loops.size() &&
2421  "Must pass as many tile sizes as there are loops");
2422  int NumLoops = Loops.size();
2423  assert(NumLoops >= 1 && "At least one loop to tile required");
2424 
2425  CanonicalLoopInfo *OutermostLoop = Loops.front();
2426  CanonicalLoopInfo *InnermostLoop = Loops.back();
2427  Function *F = OutermostLoop->getBody()->getParent();
2428  BasicBlock *InnerEnter = InnermostLoop->getBody();
2429  BasicBlock *InnerLatch = InnermostLoop->getLatch();
2430 
2431  // Loop control blocks that may become orphaned later.
2432  SmallVector<BasicBlock *, 12> OldControlBBs;
2433  OldControlBBs.reserve(6 * Loops.size());
2434  for (CanonicalLoopInfo *Loop : Loops)
2435  Loop->collectControlBlocks(OldControlBBs);
2436 
2437  // Collect original trip counts and induction variable to be accessible by
2438  // index. Also, the structure of the original loops is not preserved during
2439  // the construction of the tiled loops, so do it before we scavenge the BBs of
2440  // any original CanonicalLoopInfo.
2441  SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
2442  for (CanonicalLoopInfo *L : Loops) {
2443  assert(L->isValid() && "All input loops must be valid canonical loops");
2444  OrigTripCounts.push_back(L->getTripCount());
2445  OrigIndVars.push_back(L->getIndVar());
2446  }
2447 
2448  // Collect the code between loop headers. These may contain SSA definitions
2449  // that are used in the loop nest body. To be usable with in the innermost
2450  // body, these BasicBlocks will be sunk into the loop nest body. That is,
2451  // these instructions may be executed more often than before the tiling.
2452  // TODO: It would be sufficient to only sink them into body of the
2453  // corresponding tile loop.
2455  for (int i = 0; i < NumLoops - 1; ++i) {
2456  CanonicalLoopInfo *Surrounding = Loops[i];
2457  CanonicalLoopInfo *Nested = Loops[i + 1];
2458 
2459  BasicBlock *EnterBB = Surrounding->getBody();
2460  BasicBlock *ExitBB = Nested->getHeader();
2461  InbetweenCode.emplace_back(EnterBB, ExitBB);
2462  }
2463 
2464  // Compute the trip counts of the floor loops.
2465  Builder.SetCurrentDebugLocation(DL);
2466  Builder.restoreIP(OutermostLoop->getPreheaderIP());
2467  SmallVector<Value *, 4> FloorCount, FloorRems;
2468  for (int i = 0; i < NumLoops; ++i) {
2469  Value *TileSize = TileSizes[i];
2470  Value *OrigTripCount = OrigTripCounts[i];
2471  Type *IVType = OrigTripCount->getType();
2472 
2473  Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
2474  Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
2475 
2476  // 0 if tripcount divides the tilesize, 1 otherwise.
2477  // 1 means we need an additional iteration for a partial tile.
2478  //
2479  // Unfortunately we cannot just use the roundup-formula
2480  // (tripcount + tilesize - 1)/tilesize
2481  // because the summation might overflow. We do not want introduce undefined
2482  // behavior when the untiled loop nest did not.
2483  Value *FloorTripOverflow =
2484  Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
2485 
2486  FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
2487  FloorTripCount =
2488  Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
2489  "omp_floor" + Twine(i) + ".tripcount", true);
2490 
2491  // Remember some values for later use.
2492  FloorCount.push_back(FloorTripCount);
2493  FloorRems.push_back(FloorTripRem);
2494  }
2495 
2496  // Generate the new loop nest, from the outermost to the innermost.
2497  std::vector<CanonicalLoopInfo *> Result;
2498  Result.reserve(NumLoops * 2);
2499 
2500  // The basic block of the surrounding loop that enters the nest generated
2501  // loop.
2502  BasicBlock *Enter = OutermostLoop->getPreheader();
2503 
2504  // The basic block of the surrounding loop where the inner code should
2505  // continue.
2506  BasicBlock *Continue = OutermostLoop->getAfter();
2507 
2508  // Where the next loop basic block should be inserted.
2509  BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
2510 
2511  auto EmbeddNewLoop =
2512  [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
2513  Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
2514  CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
2515  DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
2516  redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
2517  redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
2518 
2519  // Setup the position where the next embedded loop connects to this loop.
2520  Enter = EmbeddedLoop->getBody();
2521  Continue = EmbeddedLoop->getLatch();
2522  OutroInsertBefore = EmbeddedLoop->getLatch();
2523  return EmbeddedLoop;
2524  };
2525 
2526  auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
2527  const Twine &NameBase) {
2528  for (auto P : enumerate(TripCounts)) {
2529  CanonicalLoopInfo *EmbeddedLoop =
2530  EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
2531  Result.push_back(EmbeddedLoop);
2532  }
2533  };
2534 
2535  EmbeddNewLoops(FloorCount, "floor");
2536 
2537  // Within the innermost floor loop, emit the code that computes the tile
2538  // sizes.
2539  Builder.SetInsertPoint(Enter->getTerminator());
2540  SmallVector<Value *, 4> TileCounts;
2541  for (int i = 0; i < NumLoops; ++i) {
2542  CanonicalLoopInfo *FloorLoop = Result[i];
2543  Value *TileSize = TileSizes[i];
2544 
2545  Value *FloorIsEpilogue =
2546  Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
2547  Value *TileTripCount =
2548  Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
2549 
2550  TileCounts.push_back(TileTripCount);
2551  }
2552 
2553  // Create the tile loops.
2554  EmbeddNewLoops(TileCounts, "tile");
2555 
2556  // Insert the inbetween code into the body.
2557  BasicBlock *BodyEnter = Enter;
2558  BasicBlock *BodyEntered = nullptr;
2559  for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
2560  BasicBlock *EnterBB = P.first;
2561  BasicBlock *ExitBB = P.second;
2562 
2563  if (BodyEnter)
2564  redirectTo(BodyEnter, EnterBB, DL);
2565  else
2566  redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
2567 
2568  BodyEnter = nullptr;
2569  BodyEntered = ExitBB;
2570  }
2571 
2572  // Append the original loop nest body into the generated loop nest body.
2573  if (BodyEnter)
2574  redirectTo(BodyEnter, InnerEnter, DL);
2575  else
2576  redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
2577  redirectAllPredecessorsTo(InnerLatch, Continue, DL);
2578 
2579  // Replace the original induction variable with an induction variable computed
2580  // from the tile and floor induction variables.
2581  Builder.restoreIP(Result.back()->getBodyIP());
2582  for (int i = 0; i < NumLoops; ++i) {
2583  CanonicalLoopInfo *FloorLoop = Result[i];
2584  CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
2585  Value *OrigIndVar = OrigIndVars[i];
2586  Value *Size = TileSizes[i];
2587 
2588  Value *Scale =
2589  Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
2590  Value *Shift =
2591  Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
2592  OrigIndVar->replaceAllUsesWith(Shift);
2593  }
2594 
2595  // Remove unused parts of the original loops.
2596  removeUnusedBlocksFromParent(OldControlBBs);
2597 
2598  for (CanonicalLoopInfo *L : Loops)
2599  L->invalidate();
2600 
2601 #ifndef NDEBUG
2602  for (CanonicalLoopInfo *GenL : Result)
2603  GenL->assertOK();
2604 #endif
2605  return Result;
2606 }
2607 
2608 /// Attach loop metadata \p Properties to the loop described by \p Loop. If the
2609 /// loop already has metadata, the loop properties are appended.
2611  ArrayRef<Metadata *> Properties) {
2612  assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
2613 
2614  // Nothing to do if no property to attach.
2615  if (Properties.empty())
2616  return;
2617 
2618  LLVMContext &Ctx = Loop->getFunction()->getContext();
2619  SmallVector<Metadata *> NewLoopProperties;
2620  NewLoopProperties.push_back(nullptr);
2621 
2622  // If the loop already has metadata, prepend it to the new metadata.
2623  BasicBlock *Latch = Loop->getLatch();
2624  assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
2625  MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
2626  if (Existing)
2627  append_range(NewLoopProperties, drop_begin(Existing->operands(), 1));
2628 
2629  append_range(NewLoopProperties, Properties);
2630  MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
2631  LoopID->replaceOperandWith(0, LoopID);
2632 
2633  Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
2634 }
2635 
2636 /// Attach llvm.access.group metadata to the memref instructions of \p Block
2637 static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup,
2638  LoopInfo &LI) {
2639  for (Instruction &I : *Block) {
2640  if (I.mayReadOrWriteMemory()) {
2641  // TODO: This instruction may already have access group from
2642  // other pragmas e.g. #pragma clang loop vectorize. Append
2643  // so that the existing metadata is not overwritten.
2644  I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
2645  }
2646  }
2647 }
2648 
2650  LLVMContext &Ctx = Builder.getContext();
2652  Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2653  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
2654 }
2655 
2657  LLVMContext &Ctx = Builder.getContext();
2659  Loop, {
2660  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2661  });
2662 }
2663 
2665  LLVMContext &Ctx = Builder.getContext();
2666 
2667  Function *F = CanonicalLoop->getFunction();
2668 
2670  FAM.registerPass([]() { return DominatorTreeAnalysis(); });
2671  FAM.registerPass([]() { return LoopAnalysis(); });
2672  FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
2673 
2674  LoopAnalysis LIA;
2675  LoopInfo &&LI = LIA.run(*F, FAM);
2676 
2677  Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
2678 
2679  SmallSet<BasicBlock *, 8> Reachable;
2680 
2681  // Get the basic blocks from the loop in which memref instructions
2682  // can be found.
2683  // TODO: Generalize getting all blocks inside a CanonicalizeLoopInfo,
2684  // preferably without running any passes.
2685  for (BasicBlock *Block : L->getBlocks()) {
2686  if (Block == CanonicalLoop->getCond() ||
2687  Block == CanonicalLoop->getHeader())
2688  continue;
2689  Reachable.insert(Block);
2690  }
2691 
2692  // Add access group metadata to memory-access instructions.
2693  MDNode *AccessGroup = MDNode::getDistinct(Ctx, {});
2694  for (BasicBlock *BB : Reachable)
2695  addSimdMetadata(BB, AccessGroup, LI);
2696 
2697  // Use the above access group metadata to create loop level
2698  // metadata, which should be distinct for each loop.
2699  ConstantAsMetadata *BoolConst =
2701  // TODO: If the loop has existing parallel access metadata, have
2702  // to combine two lists.
2704  CanonicalLoop,
2705  {MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"),
2706  AccessGroup}),
2707  MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
2708  BoolConst})});
2709 }
2710 
2711 /// Create the TargetMachine object to query the backend for optimization
2712 /// preferences.
2713 ///
2714 /// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
2715 /// e.g. Clang does not pass it to its CodeGen layer and creates it only when
2716 /// needed for the LLVM pass pipline. We use some default options to avoid
2717 /// having to pass too many settings from the frontend that probably do not
2718 /// matter.
2719 ///
2720 /// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
2721 /// method. If we are going to use TargetMachine for more purposes, especially
2722 /// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
2723 /// might become be worth requiring front-ends to pass on their TargetMachine,
2724 /// or at least cache it between methods. Note that while fontends such as Clang
2725 /// have just a single main TargetMachine per translation unit, "target-cpu" and
2726 /// "target-features" that determine the TargetMachine are per-function and can
2727 /// be overrided using __attribute__((target("OPTIONS"))).
2728 static std::unique_ptr<TargetMachine>
2730  Module *M = F->getParent();
2731 
2732  StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
2733  StringRef Features = F->getFnAttribute("target-features").getValueAsString();
2734  const std::string &Triple = M->getTargetTriple();
2735 
2736  std::string Error;
2738  if (!TheTarget)
2739  return {};
2740 
2742  return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
2743  Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None,
2744  OptLevel));
2745 }
2746 
2747 /// Heuristically determine the best-performant unroll factor for \p CLI. This
2748 /// depends on the target processor. We are re-using the same heuristics as the
2749 /// LoopUnrollPass.
2751  Function *F = CLI->getFunction();
2752 
2753  // Assume the user requests the most aggressive unrolling, even if the rest of
2754  // the code is optimized using a lower setting.
2756  std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
2757 
2759  FAM.registerPass([]() { return TargetLibraryAnalysis(); });
2760  FAM.registerPass([]() { return AssumptionAnalysis(); });
2761  FAM.registerPass([]() { return DominatorTreeAnalysis(); });
2762  FAM.registerPass([]() { return LoopAnalysis(); });
2763  FAM.registerPass([]() { return ScalarEvolutionAnalysis(); });
2764  FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
2765  TargetIRAnalysis TIRA;
2766  if (TM)
2767  TIRA = TargetIRAnalysis(
2768  [&](const Function &F) { return TM->getTargetTransformInfo(F); });
2769  FAM.registerPass([&]() { return TIRA; });
2770 
2771  TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
2773  ScalarEvolution &&SE = SEA.run(*F, FAM);
2775  DominatorTree &&DT = DTA.run(*F, FAM);
2776  LoopAnalysis LIA;
2777  LoopInfo &&LI = LIA.run(*F, FAM);
2778  AssumptionAnalysis ACT;
2779  AssumptionCache &&AC = ACT.run(*F, FAM);
2781 
2782  Loop *L = LI.getLoopFor(CLI->getHeader());
2783  assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
2784 
2787  /*BlockFrequencyInfo=*/nullptr,
2788  /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
2789  /*UserThreshold=*/None,
2790  /*UserCount=*/None,
2791  /*UserAllowPartial=*/true,
2792  /*UserAllowRuntime=*/true,
2793  /*UserUpperBound=*/None,
2794  /*UserFullUnrollMaxCount=*/None);
2795 
2796  UP.Force = true;
2797 
2798  // Account for additional optimizations taking place before the LoopUnrollPass
2799  // would unroll the loop.
2802 
2803  // Use normal unroll factors even if the rest of the code is optimized for
2804  // size.
2805  UP.OptSizeThreshold = UP.Threshold;
2807 
2808  LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
2809  << " Threshold=" << UP.Threshold << "\n"
2810  << " PartialThreshold=" << UP.PartialThreshold << "\n"
2811  << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
2812  << " PartialOptSizeThreshold="
2813  << UP.PartialOptSizeThreshold << "\n");
2814 
2815  // Disable peeling.
2818  /*UserAllowPeeling=*/false,
2819  /*UserAllowProfileBasedPeeling=*/false,
2820  /*UnrollingSpecficValues=*/false);
2821 
2823  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
2824 
2825  // Assume that reads and writes to stack variables can be eliminated by
2826  // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
2827  // size.
2828  for (BasicBlock *BB : L->blocks()) {
2829  for (Instruction &I : *BB) {
2830  Value *Ptr;
2831  if (auto *Load = dyn_cast<LoadInst>(&I)) {
2832  Ptr = Load->getPointerOperand();
2833  } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
2834  Ptr = Store->getPointerOperand();
2835  } else
2836  continue;
2837 
2838  Ptr = Ptr->stripPointerCasts();
2839 
2840  if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
2841  if (Alloca->getParent() == &F->getEntryBlock())
2842  EphValues.insert(&I);
2843  }
2844  }
2845  }
2846 
2847  unsigned NumInlineCandidates;
2848  bool NotDuplicatable;
2849  bool Convergent;
2850  unsigned LoopSize =
2851  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
2852  TTI, EphValues, UP.BEInsns);
2853  LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
2854 
2855  // Loop is not unrollable if the loop contains certain instructions.
2856  if (NotDuplicatable || Convergent) {
2857  LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
2858  return 1;
2859  }
2860 
2861  // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
2862  // be able to use it.
2863  int TripCount = 0;
2864  int MaxTripCount = 0;
2865  bool MaxOrZero = false;
2866  unsigned TripMultiple = 0;
2867 
2868  bool UseUpperBound = false;
2869  computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount,
2870  MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
2871  UseUpperBound);
2872  unsigned Factor = UP.Count;
2873  LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
2874 
2875  // This function returns 1 to signal to not unroll a loop.
2876  if (Factor == 0)
2877  return 1;
2878  return Factor;
2879 }
2880 
2882  int32_t Factor,
2883  CanonicalLoopInfo **UnrolledCLI) {
2884  assert(Factor >= 0 && "Unroll factor must not be negative");
2885 
2886  Function *F = Loop->getFunction();
2887  LLVMContext &Ctx = F->getContext();
2888 
2889  // If the unrolled loop is not used for another loop-associated directive, it
2890  // is sufficient to add metadata for the LoopUnrollPass.
2891  if (!UnrolledCLI) {
2892  SmallVector<Metadata *, 2> LoopMetadata;
2893  LoopMetadata.push_back(
2894  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
2895 
2896  if (Factor >= 1) {
2898  ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
2899  LoopMetadata.push_back(MDNode::get(
2900  Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
2901  }
2902 
2903  addLoopMetadata(Loop, LoopMetadata);
2904  return;
2905  }
2906 
2907  // Heuristically determine the unroll factor.
2908  if (Factor == 0)
2910 
2911  // No change required with unroll factor 1.
2912  if (Factor == 1) {
2913  *UnrolledCLI = Loop;
2914  return;
2915  }
2916 
2917  assert(Factor >= 2 &&
2918  "unrolling only makes sense with a factor of 2 or larger");
2919 
2920  Type *IndVarTy = Loop->getIndVarType();
2921 
2922  // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
2923  // unroll the inner loop.
2924  Value *FactorVal =
2925  ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
2926  /*isSigned=*/false));
2927  std::vector<CanonicalLoopInfo *> LoopNest =
2928  tileLoops(DL, {Loop}, {FactorVal});
2929  assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
2930  *UnrolledCLI = LoopNest[0];
2931  CanonicalLoopInfo *InnerLoop = LoopNest[1];
2932 
2933  // LoopUnrollPass can only fully unroll loops with constant trip count.
2934  // Unroll by the unroll factor with a fallback epilog for the remainder
2935  // iterations if necessary.
2937  ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
2939  InnerLoop,
2940  {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2941  MDNode::get(
2942  Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
2943 
2944 #ifndef NDEBUG
2945  (*UnrolledCLI)->assertOK();
2946 #endif
2947 }
2948 
2951  llvm::Value *BufSize, llvm::Value *CpyBuf,
2952  llvm::Value *CpyFn, llvm::Value *DidIt) {
2953  if (!updateToLocation(Loc))
2954  return Loc.IP;
2955 
2956  uint32_t SrcLocStrSize;
2957  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2958  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2959  Value *ThreadId = getOrCreateThreadID(Ident);
2960 
2961  llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
2962 
2963  Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
2964 
2965  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
2966  Builder.CreateCall(Fn, Args);
2967 
2968  return Builder.saveIP();
2969 }
2970 
2972  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
2973  FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt) {
2974 
2975  if (!updateToLocation(Loc))
2976  return Loc.IP;
2977 
2978  // If needed (i.e. not null), initialize `DidIt` with 0
2979  if (DidIt) {
2980  Builder.CreateStore(Builder.getInt32(0), DidIt);
2981  }
2982 
2983  Directive OMPD = Directive::OMPD_single;
2984  uint32_t SrcLocStrSize;
2985  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2986  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2987  Value *ThreadId = getOrCreateThreadID(Ident);
2988  Value *Args[] = {Ident, ThreadId};
2989 
2990  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
2991  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2992 
2993  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
2994  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2995 
2996  // generates the following:
2997  // if (__kmpc_single()) {
2998  // .... single region ...
2999  // __kmpc_end_single
3000  // }
3001  // __kmpc_barrier
3002 
3003  EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3004  /*Conditional*/ true,
3005  /*hasFinalize*/ true);
3006  if (!IsNowait)
3007  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
3008  omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
3009  /* CheckCancelFlag */ false);
3010  return Builder.saveIP();
3011 }
3012 
3014  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
3015  FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
3016 
3017  if (!updateToLocation(Loc))
3018  return Loc.IP;
3019 
3020  Directive OMPD = Directive::OMPD_critical;
3021  uint32_t SrcLocStrSize;
3022  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3023  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3024  Value *ThreadId = getOrCreateThreadID(Ident);
3025  Value *LockVar = getOMPCriticalRegionLock(CriticalName);
3026  Value *Args[] = {Ident, ThreadId, LockVar};
3027 
3029  Function *RTFn = nullptr;
3030  if (HintInst) {
3031  // Add Hint to entry Args and create call
3032  EnterArgs.push_back(HintInst);
3033  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
3034  } else {
3035  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
3036  }
3037  Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
3038 
3039  Function *ExitRTLFn =
3040  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
3041  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
3042 
3043  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3044  /*Conditional*/ false, /*hasFinalize*/ true);
3045 }
3046 
3049  InsertPointTy AllocaIP, unsigned NumLoops,
3050  ArrayRef<llvm::Value *> StoreValues,
3051  const Twine &Name, bool IsDependSource) {
3052  for (size_t I = 0; I < StoreValues.size(); I++)
3053  assert(StoreValues[I]->getType()->isIntegerTy(64) &&
3054  "OpenMP runtime requires depend vec with i64 type");
3055 
3056  if (!updateToLocation(Loc))
3057  return Loc.IP;
3058 
3059  // Allocate space for vector and generate alloc instruction.
3060  auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
3061  Builder.restoreIP(AllocaIP);
3062  AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
3063  ArgsBase->setAlignment(Align(8));
3064  Builder.restoreIP(Loc.IP);
3065 
3066  // Store the index value with offset in depend vector.
3067  for (unsigned I = 0; I < NumLoops; ++I) {
3068  Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
3069  ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
3070  StoreInst *STInst = Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
3071  STInst->setAlignment(Align(8));
3072  }
3073 
3074  Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
3075  ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
3076 
3077  uint32_t SrcLocStrSize;
3078  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3079  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3080  Value *ThreadId = getOrCreateThreadID(Ident);
3081  Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
3082 
3083  Function *RTLFn = nullptr;
3084  if (IsDependSource)
3085  RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
3086  else
3087  RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
3088  Builder.CreateCall(RTLFn, Args);
3089 
3090  return Builder.saveIP();
3091 }
3092 
3094  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
3095  FinalizeCallbackTy FiniCB, bool IsThreads) {
3096  if (!updateToLocation(Loc))
3097  return Loc.IP;
3098 
3099  Directive OMPD = Directive::OMPD_ordered;
3100  Instruction *EntryCall = nullptr;
3101  Instruction *ExitCall = nullptr;
3102 
3103  if (IsThreads) {
3104  uint32_t SrcLocStrSize;
3105  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3106  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3107  Value *ThreadId = getOrCreateThreadID(Ident);
3108  Value *Args[] = {Ident, ThreadId};
3109 
3110  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
3111  EntryCall = Builder.CreateCall(EntryRTLFn, Args);
3112 
3113  Function *ExitRTLFn =
3114  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
3115  ExitCall = Builder.CreateCall(ExitRTLFn, Args);
3116  }
3117 
3118  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3119  /*Conditional*/ false, /*hasFinalize*/ true);
3120 }
3121 
3122 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
3123  Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
3124  BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
3125  bool HasFinalize, bool IsCancellable) {
3126 
3127  if (HasFinalize)
3128  FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
3129 
3130  // Create inlined region's entry and body blocks, in preparation
3131  // for conditional creation
3132  BasicBlock *EntryBB = Builder.GetInsertBlock();
3133  Instruction *SplitPos = EntryBB->getTerminator();
3134  if (!isa_and_nonnull<BranchInst>(SplitPos))
3135  SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
3136  BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
3137  BasicBlock *FiniBB =
3138  EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
3139 
3140  Builder.SetInsertPoint(EntryBB->getTerminator());
3141  emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
3142 
3143  // generate body
3144  BodyGenCB(/* AllocaIP */ InsertPointTy(),
3145  /* CodeGenIP */ Builder.saveIP());
3146 
3147  // emit exit call and do any needed finalization.
3148  auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
3149  assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
3150  FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
3151  "Unexpected control flow graph state!!");
3152  emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
3153  assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
3154  "Unexpected Control Flow State!");
3155  MergeBlockIntoPredecessor(FiniBB);
3156 
3157  // If we are skipping the region of a non conditional, remove the exit
3158  // block, and clear the builder's insertion point.
3159  assert(SplitPos->getParent() == ExitBB &&
3160  "Unexpected Insertion point location!");
3161  auto merged = MergeBlockIntoPredecessor(ExitBB);
3162  BasicBlock *ExitPredBB = SplitPos->getParent();
3163  auto InsertBB = merged ? ExitPredBB : ExitBB;
3164  if (!isa_and_nonnull<BranchInst>(SplitPos))
3165  SplitPos->eraseFromParent();
3166  Builder.SetInsertPoint(InsertBB);
3167 
3168  return Builder.saveIP();
3169 }
3170 
3171 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
3172  Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
3173  // if nothing to do, Return current insertion point.
3174  if (!Conditional || !EntryCall)
3175  return Builder.saveIP();
3176 
3177  BasicBlock *EntryBB = Builder.GetInsertBlock();
3178  Value *CallBool = Builder.CreateIsNotNull(EntryCall);
3179  auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
3180  auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
3181 
3182  // Emit thenBB and set the Builder's insertion point there for
3183  // body generation next. Place the block after the current block.
3184  Function *CurFn = EntryBB->getParent();
3185  CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB);
3186 
3187  // Move Entry branch to end of ThenBB, and replace with conditional
3188  // branch (If-stmt)
3189  Instruction *EntryBBTI = EntryBB->getTerminator();
3190  Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
3191  EntryBBTI->removeFromParent();
3192  Builder.SetInsertPoint(UI);
3193  Builder.Insert(EntryBBTI);
3194  UI->eraseFromParent();
3195  Builder.SetInsertPoint(ThenBB->getTerminator());
3196 
3197  // return an insertion point to ExitBB.
3198  return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
3199 }
3200 
3201 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
3202  omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
3203  bool HasFinalize) {
3204 
3205  Builder.restoreIP(FinIP);
3206 
3207  // If there is finalization to do, emit it before the exit call
3208  if (HasFinalize) {
3209  assert(!FinalizationStack.empty() &&
3210  "Unexpected finalization stack state!");
3211 
3212  FinalizationInfo Fi = FinalizationStack.pop_back_val();
3213  assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
3214 
3215  Fi.FiniCB(FinIP);
3216 
3217  BasicBlock *FiniBB = FinIP.getBlock();
3218  Instruction *FiniBBTI = FiniBB->getTerminator();
3219 
3220  // set Builder IP for call creation
3221  Builder.SetInsertPoint(FiniBBTI);
3222  }
3223 
3224  if (!ExitCall)
3225  return Builder.saveIP();
3226 
3227  // place the Exitcall as last instruction before Finalization block terminator
3228  ExitCall->removeFromParent();
3229  Builder.Insert(ExitCall);
3230 
3231  return IRBuilder<>::InsertPoint(ExitCall->getParent(),
3232  ExitCall->getIterator());
3233 }
3234 
3236  InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
3237  llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
3238  if (!IP.isSet())
3239  return IP;
3240 
3242 
3243  // creates the following CFG structure
3244  // OMP_Entry : (MasterAddr != PrivateAddr)?
3245  // F T
3246  // | \
3247  // | copin.not.master
3248  // | /
3249  // v /
3250  // copyin.not.master.end
3251  // |
3252  // v
3253  // OMP.Entry.Next
3254 
3255  BasicBlock *OMP_Entry = IP.getBlock();
3256  Function *CurFn = OMP_Entry->getParent();
3257  BasicBlock *CopyBegin =
3258  BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
3259  BasicBlock *CopyEnd = nullptr;
3260 
3261  // If entry block is terminated, split to preserve the branch to following
3262  // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
3263  if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
3264  CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
3265  "copyin.not.master.end");
3266  OMP_Entry->getTerminator()->eraseFromParent();
3267  } else {
3268  CopyEnd =
3269  BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
3270  }
3271 
3272  Builder.SetInsertPoint(OMP_Entry);
3273  Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
3274  Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
3275  Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
3276  Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
3277 
3278  Builder.SetInsertPoint(CopyBegin);
3279  if (BranchtoEnd)
3280  Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
3281 
3282  return Builder.saveIP();
3283 }
3284 
3286  Value *Size, Value *Allocator,
3287  std::string Name) {
3289  Builder.restoreIP(Loc.IP);
3290 
3291  uint32_t SrcLocStrSize;
3292  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3293  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3294  Value *ThreadId = getOrCreateThreadID(Ident);
3295  Value *Args[] = {ThreadId, Size, Allocator};
3296 
3297  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
3298 
3299  return Builder.CreateCall(Fn, Args, Name);
3300 }
3301 
3304  std::string Name) {
3306  Builder.restoreIP(Loc.IP);
3307 
3308  uint32_t SrcLocStrSize;
3309  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3310  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3311  Value *ThreadId = getOrCreateThreadID(Ident);
3312  Value *Args[] = {ThreadId, Addr, Allocator};
3313  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
3314  return Builder.CreateCall(Fn, Args, Name);
3315 }
3316 
3318  const LocationDescription &Loc, Value *InteropVar,
3319  omp::OMPInteropType InteropType, Value *Device, Value *NumDependences,
3320  Value *DependenceAddress, bool HaveNowaitClause) {
3322  Builder.restoreIP(Loc.IP);
3323 
3324  uint32_t SrcLocStrSize;
3325  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3326  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3327  Value *ThreadId = getOrCreateThreadID(Ident);
3328  if (Device == nullptr)
3329  Device = ConstantInt::get(Int32, -1);
3330  Constant *InteropTypeVal = ConstantInt::get(Int64, (int)InteropType);
3331  if (NumDependences == nullptr) {
3332  NumDependences = ConstantInt::get(Int32, 0);
3333  PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
3334  DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
3335  }
3336  Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
3337  Value *Args[] = {
3338  Ident, ThreadId, InteropVar, InteropTypeVal,
3339  Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
3340 
3341  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
3342 
3343  return Builder.CreateCall(Fn, Args);
3344 }
3345 
3347  const LocationDescription &Loc, Value *InteropVar, Value *Device,
3348  Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause) {
3350  Builder.restoreIP(Loc.IP);
3351 
3352  uint32_t SrcLocStrSize;
3353  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3354  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3355  Value *ThreadId = getOrCreateThreadID(Ident);
3356  if (Device == nullptr)
3357  Device = ConstantInt::get(Int32, -1);
3358  if (NumDependences == nullptr) {
3359  NumDependences = ConstantInt::get(Int32, 0);
3360  PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
3361  DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
3362  }
3363  Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
3364  Value *Args[] = {
3365  Ident, ThreadId, InteropVar, Device,
3366  NumDependences, DependenceAddress, HaveNowaitClauseVal};
3367 
3368  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
3369 
3370  return Builder.CreateCall(Fn, Args);
3371 }
3372 
3374  Value *InteropVar, Value *Device,
3375  Value *NumDependences,
3376  Value *DependenceAddress,
3377  bool HaveNowaitClause) {
3379  Builder.restoreIP(Loc.IP);
3380  uint32_t SrcLocStrSize;
3381  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3382  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3383  Value *ThreadId = getOrCreateThreadID(Ident);
3384  if (Device == nullptr)
3385  Device = ConstantInt::get(Int32, -1);
3386  if (NumDependences == nullptr) {
3387  NumDependences = ConstantInt::get(Int32, 0);
3388  PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
3389  DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
3390  }
3391  Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
3392  Value *Args[] = {
3393  Ident, ThreadId, InteropVar, Device,
3394  NumDependences, DependenceAddress, HaveNowaitClauseVal};
3395 
3396  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
3397 
3398  return Builder.CreateCall(Fn, Args);
3399 }
3400 
3402  const LocationDescription &Loc, llvm::Value *Pointer,
3403  llvm::ConstantInt *Size, const llvm::Twine &Name) {
3405  Builder.restoreIP(Loc.IP);
3406 
3407  uint32_t SrcLocStrSize;
3408  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3409  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3410  Value *ThreadId = getOrCreateThreadID(Ident);
3411  Constant *ThreadPrivateCache =
3412  getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
3413  llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
3414 
3415  Function *Fn =
3416  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
3417 
3418  return Builder.CreateCall(Fn, Args);
3419 }
3420 
3423  bool RequiresFullRuntime) {
3424  if (!updateToLocation(Loc))
3425  return Loc.IP;
3426 
3427  uint32_t SrcLocStrSize;
3428  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3429  Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3430  ConstantInt *IsSPMDVal = ConstantInt::getSigned(
3431  IntegerType::getInt8Ty(Int8->getContext()),
3433  ConstantInt *UseGenericStateMachine =
3434  ConstantInt::getBool(Int32->getContext(), !IsSPMD);
3435  ConstantInt *RequiresFullRuntimeVal =
3436  ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
3437 
3438  Function *Fn = getOrCreateRuntimeFunctionPtr(
3439  omp::RuntimeFunction::OMPRTL___kmpc_target_init);
3440 
3441  CallInst *ThreadKind = Builder.CreateCall(
3442  Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
3443 
3444  Value *ExecUserCode = Builder.CreateICmpEQ(
3445  ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
3446  "exec_user_code");
3447 
3448  // ThreadKind = __kmpc_target_init(...)
3449  // if (ThreadKind == -1)
3450  // user_code
3451  // else
3452  // return;
3453 
3454  auto *UI = Builder.CreateUnreachable();
3455  BasicBlock *CheckBB = UI->getParent();
3456  BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
3457 
3458  BasicBlock *WorkerExitBB = BasicBlock::Create(
3459  CheckBB->getContext(), "worker.exit", CheckBB->getParent());
3460  Builder.SetInsertPoint(WorkerExitBB);
3461  Builder.CreateRetVoid();
3462 
3463  auto *CheckBBTI = CheckBB->getTerminator();
3464  Builder.SetInsertPoint(CheckBBTI);
3465  Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
3466 
3467  CheckBBTI->eraseFromParent();
3468  UI->eraseFromParent();
3469 
3470  // Continue in the "user_code" block, see diagram above and in
3471  // openmp/libomptarget/deviceRTLs/common/include/target.h .
3472  return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
3473 }
3474 
3476  bool IsSPMD,
3477  bool RequiresFullRuntime) {
3478  if (!updateToLocation(Loc))
3479  return;
3480 
3481  uint32_t SrcLocStrSize;
3482  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3483  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3484  ConstantInt *IsSPMDVal = ConstantInt::getSigned(
3485  IntegerType::getInt8Ty(Int8->getContext()),
3487  ConstantInt *RequiresFullRuntimeVal =
3488  ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
3489 
3490  Function *Fn = getOrCreateRuntimeFunctionPtr(
3491  omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
3492 
3493  Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
3494 }
3495 
3496 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
3497  StringRef FirstSeparator,
3498  StringRef Separator) {
3499  SmallString<128> Buffer;
3500  llvm::raw_svector_ostream OS(Buffer);
3501  StringRef Sep = FirstSeparator;
3502  for (StringRef Part : Parts) {
3503  OS << Sep << Part;
3504  Sep = Separator;
3505  }
3506  return OS.str().str();
3507 }
3508 
3509 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
3510  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3511  // TODO: Replace the twine arg with stringref to get rid of the conversion
3512  // logic. However This is taken from current implementation in clang as is.
3513  // Since this method is used in many places exclusively for OMP internal use
3514  // we will keep it as is for temporarily until we move all users to the
3515  // builder and then, if possible, fix it everywhere in one go.
3516  SmallString<256> Buffer;
3517  llvm::raw_svector_ostream Out(Buffer);
3518  Out << Name;
3519  StringRef RuntimeName = Out.str();
3520  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3521  if (Elem.second) {
3522  assert(cast<PointerType>(Elem.second->getType())
3523  ->isOpaqueOrPointeeTypeMatches(Ty) &&
3524  "OMP internal variable has different type than requested");
3525  } else {
3526  // TODO: investigate the appropriate linkage type used for the global
3527  // variable for possibly changing that to internal or private, or maybe
3528  // create different versions of the function for different OMP internal
3529  // variables.
3530  Elem.second = new llvm::GlobalVariable(
3531  M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage,
3532  llvm::Constant::getNullValue(Ty), Elem.first(),
3533  /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
3534  AddressSpace);
3535  }
3536 
3537  return Elem.second;
3538 }
3539 
3540 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
3541  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3542  std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
3543  return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
3544 }
3545 
3548  std::string VarName) {
3549  llvm::Constant *MaptypesArrayInit =
3550  llvm::ConstantDataArray::get(M.getContext(), Mappings);
3551  auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
3552  M, MaptypesArrayInit->getType(),
3553  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
3554  VarName);
3555  MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3556  return MaptypesArrayGlobal;
3557 }
3558 
3560  InsertPointTy AllocaIP,
3561  unsigned NumOperands,
3562  struct MapperAllocas &MapperAllocas) {
3563  if (!updateToLocation(Loc))
3564  return;
3565 
3566  auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
3567  auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
3568  Builder.restoreIP(AllocaIP);
3569  AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy);
3570  AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy);
3571  AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty);
3572  Builder.restoreIP(Loc.IP);
3573  MapperAllocas.ArgsBase = ArgsBase;
3575  MapperAllocas.ArgSizes = ArgSizes;
3576 }
3577 
3579  Function *MapperFunc, Value *SrcLocInfo,
3580  Value *MaptypesArg, Value *MapnamesArg,
3581  struct MapperAllocas &MapperAllocas,
3582  int64_t DeviceID, unsigned NumOperands) {
3583  if (!updateToLocation(Loc))
3584  return;
3585 
3586  auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
3587  auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
3588  Value *ArgsBaseGEP =
3589  Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
3590  {Builder.getInt32(0), Builder.getInt32(0)});
3591  Value *ArgsGEP =
3592  Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
3593  {Builder.getInt32(0), Builder.getInt32(0)});
3594  Value *ArgSizesGEP =
3595  Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
3596  {Builder.getInt32(0), Builder.getInt32(0)});
3597  Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo());
3598  Builder.CreateCall(MapperFunc,
3599  {SrcLocInfo, Builder.getInt64(DeviceID),
3600  Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
3601  ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
3602 }
3603 
3604 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
3605  const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
3608  "Unexpected Atomic Ordering.");
3609 
3610  bool Flush = false;
3612 
3613  switch (AK) {
3614  case Read:
3617  FlushAO = AtomicOrdering::Acquire;
3618  Flush = true;
3619  }
3620  break;
3621  case Write:
3622  case Compare:
3623  case Update:
3626  FlushAO = AtomicOrdering::Release;
3627  Flush = true;
3628  }
3629  break;
3630  case Capture:
3631  switch (AO) {
3633  FlushAO = AtomicOrdering::Acquire;
3634  Flush = true;
3635  break;
3637  FlushAO = AtomicOrdering::Release;
3638  Flush = true;
3639  break;
3643  Flush = true;
3644  break;
3645  default:
3646  // do nothing - leave silently.
3647  break;
3648  }
3649  }
3650 
3651  if (Flush) {
3652  // Currently Flush RT call still doesn't take memory_ordering, so for when
3653  // that happens, this tries to do the resolution of which atomic ordering
3654  // to use with but issue the flush call
3655  // TODO: pass `FlushAO` after memory ordering support is added
3656  (void)FlushAO;
3657  emitFlush(Loc);
3658  }
3659 
3660  // for AO == AtomicOrdering::Monotonic and all other case combinations
3661  // do nothing
3662  return Flush;
3663 }
3664 
3668  AtomicOrdering AO) {
3669  if (!updateToLocation(Loc))
3670  return Loc.IP;
3671 
3672  Type *XTy = X.Var->getType();
3673  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
3674  Type *XElemTy = X.ElemTy;
3675  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3676  XElemTy->isPointerTy()) &&
3677  "OMP atomic read expected a scalar type");
3678 
3679  Value *XRead = nullptr;
3680 
3681  if (XElemTy->isIntegerTy()) {
3682  LoadInst *XLD =
3683  Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
3684  XLD->setAtomic(AO);
3685  XRead = cast<Value>(XLD);
3686  } else {
3687  // We need to bitcast and perform atomic op as integer
3688  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3689  IntegerType *IntCastTy =
3690  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3691  Value *XBCast = Builder.CreateBitCast(
3692  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
3693  LoadInst *XLoad =
3694  Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
3695  XLoad->setAtomic(AO);
3696  if (XElemTy->isFloatingPointTy()) {
3697  XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
3698  } else {
3699  XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
3700  }
3701  }
3702  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
3703  Builder.CreateStore(XRead, V.Var, V.IsVolatile);
3704  return Builder.saveIP();
3705 }
3706 
3709  AtomicOpValue &X, Value *Expr,
3710  AtomicOrdering AO) {
3711  if (!updateToLocation(Loc))
3712  return Loc.IP;
3713 
3714  Type *XTy = X.Var->getType();
3715  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
3716  Type *XElemTy = X.ElemTy;
3717  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3718  XElemTy->isPointerTy()) &&
3719  "OMP atomic write expected a scalar type");
3720 
3721  if (XElemTy->isIntegerTy()) {
3722  StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
3723  XSt->setAtomic(AO);
3724  } else {
3725  // We need to bitcast and perform atomic op as integers
3726  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3727  IntegerType *IntCastTy =
3728  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3729  Value *XBCast = Builder.CreateBitCast(
3730  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
3731  Value *ExprCast =
3732  Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
3733  StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
3734  XSt->setAtomic(AO);
3735  }
3736 
3737  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
3738  return Builder.saveIP();
3739 }
3740 
3742  const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
3743  Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3744  AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
3745  assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
3746  if (!updateToLocation(Loc))
3747  return Loc.IP;
3748 
3749  LLVM_DEBUG({
3750  Type *XTy = X.Var->getType();
3751  assert(XTy->isPointerTy() &&
3752  "OMP Atomic expects a pointer to target memory");
3753  Type *XElemTy = X.ElemTy;
3754  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3755  XElemTy->isPointerTy()) &&
3756  "OMP atomic update expected a scalar type");
3757  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
3758  (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
3759  "OpenMP atomic does not support LT or GT operations");
3760  });
3761 
3762  emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
3763  X.IsVolatile, IsXBinopExpr);
3764  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
3765  return Builder.saveIP();
3766 }
3767 
3768 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
3769  AtomicRMWInst::BinOp RMWOp) {
3770  switch (RMWOp) {
3771  case AtomicRMWInst::Add:
3772  return Builder.CreateAdd(Src1, Src2);
3773  case AtomicRMWInst::Sub:
3774  return Builder.CreateSub(Src1, Src2);
3775  case AtomicRMWInst::And:
3776  return Builder.CreateAnd(Src1, Src2);
3777  case AtomicRMWInst::Nand:
3778  return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
3779  case AtomicRMWInst::Or:
3780  return Builder.CreateOr(Src1, Src2);
3781  case AtomicRMWInst::Xor:
3782  return Builder.CreateXor(Src1, Src2);
3783  case AtomicRMWInst::Xchg:
3784  case AtomicRMWInst::FAdd:
3785  case AtomicRMWInst::FSub:
3787  case AtomicRMWInst::Max:
3788  case AtomicRMWInst::Min:
3789  case AtomicRMWInst::UMax:
3790  case AtomicRMWInst::UMin:
3791  llvm_unreachable("Unsupported atomic update operation");
3792  }
3793  llvm_unreachable("Unsupported atomic update operation");
3794 }
3795 
3796 std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
3797  InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
3799  AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
3800  // TODO: handle the case where XElemTy is not byte-sized or not a power of 2
3801  // or a complex datatype.
3802  bool emitRMWOp = false;
3803  switch (RMWOp) {
3804  case AtomicRMWInst::Add:
3805  case AtomicRMWInst::And:
3806  case AtomicRMWInst::Nand:
3807  case AtomicRMWInst::Or:
3808  case AtomicRMWInst::Xor:
3809  case AtomicRMWInst::Xchg:
3810  emitRMWOp = XElemTy;
3811  break;
3812  case AtomicRMWInst::Sub:
3813  emitRMWOp = (IsXBinopExpr && XElemTy);
3814  break;
3815  default:
3816  emitRMWOp = false;
3817  }
3818  emitRMWOp &= XElemTy->isIntegerTy();
3819 
3820  std::pair<Value *, Value *> Res;
3821  if (emitRMWOp) {
3822  Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
3823  // not needed except in case of postfix captures. Generate anyway for
3824  // consistency with the else part. Will be removed with any DCE pass.
3825  // AtomicRMWInst::Xchg does not have a coressponding instruction.
3826  if (RMWOp == AtomicRMWInst::Xchg)
3827  Res.second = Res.first;
3828  else
3829  Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
3830  } else {
3831  unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
3832  IntegerType *IntCastTy =
3833  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3834  Value *XBCast =
3835  Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
3836  LoadInst *OldVal =
3837  Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
3838  OldVal->setAtomic(AO);
3839  // CurBB
3840  // | /---\
3841  // ContBB |
3842  // | \---/
3843  // ExitBB
3844  BasicBlock *CurBB = Builder.GetInsertBlock();
3845  Instruction *CurBBTI = CurBB->getTerminator();
3846  CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
3847  BasicBlock *ExitBB =
3848  CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
3849  BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
3850  X->getName() + ".atomic.cont");
3851  ContBB->getTerminator()->eraseFromParent();
3852  Builder.restoreIP(AllocaIP);
3853  AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
3854  NewAtomicAddr->setName(X->getName() + "x.new.val");
3855  Builder.SetInsertPoint(ContBB);
3856  llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
3857  PHI->addIncoming(OldVal, CurBB);
3858  IntegerType *NewAtomicCastTy =
3859  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3860  bool IsIntTy = XElemTy->isIntegerTy();
3861  Value *NewAtomicIntAddr =
3862  (IsIntTy)
3863  ? NewAtomicAddr
3864  : Builder.CreateBitCast(NewAtomicAddr,
3865  NewAtomicCastTy->getPointerTo(Addrspace));
3866  Value *OldExprVal = PHI;
3867  if (!IsIntTy) {
3868  if (XElemTy->isFloatingPointTy()) {
3869  OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
3870  X->getName() + ".atomic.fltCast");
3871  } else {
3872  OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
3873  X->getName() + ".atomic.ptrCast");
3874  }
3875  }
3876 
3877  Value *Upd = UpdateOp(OldExprVal, Builder);
3878  Builder.CreateStore(Upd, NewAtomicAddr);
3879  LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicIntAddr);
3880  Value *XAddr =
3881  (IsIntTy)
3882  ? X
3883  : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
3886  AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
3887  XAddr, PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
3888  Result->setVolatile(VolatileX);
3889  Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
3890  Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
3891  PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
3892  Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
3893 
3894  Res.first = OldExprVal;
3895  Res.second = Upd;
3896 
3897  // set Insertion point in exit block
3898  if (UnreachableInst *ExitTI =
3899  dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
3900  CurBBTI->eraseFromParent();
3901  Builder.SetInsertPoint(ExitBB);
3902  } else {
3903  Builder.SetInsertPoint(ExitTI);
3904  }
3905  }
3906 
3907  return Res;
3908 }
3909 
3911  const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
3912  AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
3914  bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
3915  if (!updateToLocation(Loc))
3916  return Loc.IP;
3917 
3918  LLVM_DEBUG({
3919  Type *XTy = X.Var->getType();
3920  assert(XTy->isPointerTy() &&
3921  "OMP Atomic expects a pointer to target memory");
3922  Type *XElemTy = X.ElemTy;
3923  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3924  XElemTy->isPointerTy()) &&
3925  "OMP atomic capture expected a scalar type");
3926  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
3927  "OpenMP atomic does not support LT or GT operations");
3928  });
3929 
3930  // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
3931  // 'x' is simply atomically rewritten with 'expr'.
3932  AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
3933  std::pair<Value *, Value *> Result =
3934  emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
3935  X.IsVolatile, IsXBinopExpr);
3936 
3937  Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
3938  Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
3939 
3940  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
3941  return Builder.saveIP();
3942 }
3943 
3945  const LocationDescription &Loc, AtomicOpValue &X, Value *E, Value *D,
3946  AtomicOrdering AO, OMPAtomicCompareOp Op, bool IsXBinopExpr) {
3947  if (!updateToLocation(Loc))
3948  return Loc.IP;
3949 
3950  assert(X.Var->getType()->isPointerTy() &&
3951  "OMP atomic expects a pointer to target memory");
3952  assert((X.ElemTy->isIntegerTy() || X.ElemTy->isPointerTy()) &&
3953  "OMP atomic compare expected a integer scalar type");
3954 
3955  if (Op == OMPAtomicCompareOp::EQ) {
3957  // We don't need the result for now.
3958  (void)Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure);
3959  } else {
3961  "Op should be either max or min at this point");
3962 
3963  // Reverse the ordop as the OpenMP forms are different from LLVM forms.
3964  // Let's take max as example.
3965  // OpenMP form:
3966  // x = x > expr ? expr : x;
3967  // LLVM form:
3968  // *ptr = *ptr > val ? *ptr : val;
3969  // We need to transform to LLVM form.
3970  // x = x <= expr ? x : expr;
3971  AtomicRMWInst::BinOp NewOp;
3972  if (IsXBinopExpr) {
3973  if (X.IsSigned)
3976  else
3979  } else {
3980  if (X.IsSigned)
3983  else
3986  }
3987  // We dont' need the result for now.
3988  (void)Builder.CreateAtomicRMW(NewOp, X.Var, E, MaybeAlign(), AO);
3989  }
3990 
3991  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare);
3992 
3993  return Builder.saveIP();
3994 }
3995 
3998  std::string VarName) {
3999  llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
4001  llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
4002  Names);
4003  auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
4004  M, MapNamesArrayInit->getType(),
4005  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
4006  VarName);
4007  return MapNamesArrayGlobal;
4008 }
4009 
4010 // Create all simple and struct types exposed by the runtime and remember
4011 // the llvm::PointerTypes of them for easy access later.
4012 void OpenMPIRBuilder::initializeTypes(Module &M) {
4013  LLVMContext &Ctx = M.getContext();
4014  StructType *T;
4015 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
4016 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
4017  VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
4018  VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
4019 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
4020  VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
4021  VarName##Ptr = PointerType::getUnqual(VarName);
4022 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
4023  T = StructType::getTypeByName(Ctx, StructName); \
4024  if (!T) \
4025  T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
4026  VarName = T; \
4027  VarName##Ptr = PointerType::getUnqual(T);
4028 #include "llvm/Frontend/OpenMP/OMPKinds.def"
4029 }
4030 
4033  SmallVectorImpl<BasicBlock *> &BlockVector) {
4035  BlockSet.insert(EntryBB);
4036  BlockSet.insert(ExitBB);
4037 
4038  Worklist.push_back(EntryBB);
4039  while (!Worklist.empty()) {
4040  BasicBlock *BB = Worklist.pop_back_val();
4041  BlockVector.push_back(BB);
4042  for (BasicBlock *SuccBB : successors(BB))
4043  if (BlockSet.insert(SuccBB).second)
4044  Worklist.push_back(SuccBB);
4045  }
4046 }
4047 
4048 void CanonicalLoopInfo::collectControlBlocks(
4050  // We only count those BBs as control block for which we do not need to
4051  // reverse the CFG, i.e. not the loop body which can contain arbitrary control
4052  // flow. For consistency, this also means we do not add the Body block, which
4053  // is just the entry to the body code.
4054  BBs.reserve(BBs.size() + 6);
4055  BBs.append({getPreheader(), Header, Cond, Latch, Exit, getAfter()});
4056 }
4057 
4059  assert(isValid() && "Requires a valid canonical loop");
4060  for (BasicBlock *Pred : predecessors(Header)) {
4061  if (Pred != Latch)
4062  return Pred;
4063  }
4064  llvm_unreachable("Missing preheader");
4065 }
4066 
4067 void CanonicalLoopInfo::setTripCount(Value *TripCount) {
4068  assert(isValid() && "Requires a valid canonical loop");
4069 
4070  Instruction *CmpI = &getCond()->front();
4071  assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
4072  CmpI->setOperand(1, TripCount);
4073 
4074 #ifndef NDEBUG
4075  assertOK();
4076 #endif
4077 }
4078 
4079 void CanonicalLoopInfo::mapIndVar(
4080  llvm::function_ref<Value *(Instruction *)> Updater) {
4081  assert(isValid() && "Requires a valid canonical loop");
4082 
4083  Instruction *OldIV = getIndVar();
4084 
4085  // Record all uses excluding those introduced by the updater. Uses by the
4086  // CanonicalLoopInfo itself to keep track of the number of iterations are
4087  // excluded.
4088  SmallVector<Use *> ReplacableUses;
4089  for (Use &U : OldIV->uses()) {
4090  auto *User = dyn_cast<Instruction>(U.getUser());
4091  if (!User)
4092  continue;
4093  if (User->getParent() == getCond())
4094  continue;
4095  if (User->getParent() == getLatch())
4096  continue;
4097  ReplacableUses.push_back(&U);
4098  }
4099 
4100  // Run the updater that may introduce new uses
4101  Value *NewIV = Updater(OldIV);
4102 
4103  // Replace the old uses with the value returned by the updater.
4104  for (Use *U : ReplacableUses)
4105  U->set(NewIV);
4106 
4107 #ifndef NDEBUG
4108  assertOK();
4109 #endif
4110 }
4111 
4113 #ifndef NDEBUG
4114  // No constraints if this object currently does not describe a loop.
4115  if (!isValid())
4116  return;
4117 
4118  BasicBlock *Preheader = getPreheader();
4119  BasicBlock *Body = getBody();
4120  BasicBlock *After = getAfter();
4121 
4122  // Verify standard control-flow we use for OpenMP loops.
4123  assert(Preheader);
4124  assert(isa<BranchInst>(Preheader->getTerminator()) &&
4125  "Preheader must terminate with unconditional branch");
4126  assert(Preheader->getSingleSuccessor() == Header &&
4127  "Preheader must jump to header");
4128 
4129  assert(Header);
4130  assert(isa<BranchInst>(Header->getTerminator()) &&
4131  "Header must terminate with unconditional branch");
4132  assert(Header->getSingleSuccessor() == Cond &&
4133  "Header must jump to exiting block");
4134 
4135  assert(Cond);
4136  assert(Cond->getSinglePredecessor() == Header &&
4137  "Exiting block only reachable from header");
4138 
4139  assert(isa<BranchInst>(Cond->getTerminator()) &&
4140  "Exiting block must terminate with conditional branch");
4141  assert(size(successors(Cond)) == 2 &&
4142  "Exiting block must have two successors");
4143  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
4144  "Exiting block's first successor jump to the body");
4145  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
4146  "Exiting block's second successor must exit the loop");
4147 
4148  assert(Body);
4149  assert(Body->getSinglePredecessor() == Cond &&
4150  "Body only reachable from exiting block");
4151  assert(!isa<PHINode>(Body->front()));
4152 
4153  assert(Latch);
4154  assert(isa<BranchInst>(Latch->getTerminator()) &&
4155  "Latch must terminate with unconditional branch");
4156  assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
4157  // TODO: To support simple redirecting of the end of the body code that has
4158  // multiple; introduce another auxiliary basic block like preheader and after.
4159  assert(Latch->getSinglePredecessor() != nullptr);
4160  assert(!isa<PHINode>(Latch->front()));
4161 
4162  assert(Exit);
4163  assert(isa<BranchInst>(Exit->getTerminator()) &&
4164  "Exit block must terminate with unconditional branch");
4165  assert(Exit->getSingleSuccessor() == After &&
4166  "Exit block must jump to after block");
4167 
4168  assert(After);
4169  assert(After->getSinglePredecessor() == Exit &&
4170  "After block only reachable from exit block");
4171  assert(After->empty() || !isa<PHINode>(After->front()));
4172 
4173  Instruction *IndVar = getIndVar();
4174  assert(IndVar && "Canonical induction variable not found?");
4175  assert(isa<IntegerType>(IndVar->getType()) &&
4176  "Induction variable must be an integer");
4177  assert(cast<PHINode>(IndVar)->getParent() == Header &&
4178  "Induction variable must be a PHI in the loop header");
4179  assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
4180  assert(
4181  cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
4182  assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
4183 
4184  auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
4185  assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
4186  assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
4187  assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
4188  assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
4189  ->isOne());
4190 
4191  Value *TripCount = getTripCount();
4192  assert(TripCount && "Loop trip count not found?");
4193  assert(IndVar->getType() == TripCount->getType() &&
4194  "Trip count and induction variable must have the same type");
4195 
4196  auto *CmpI = cast<CmpInst>(&Cond->front());
4197  assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
4198  "Exit condition must be a signed less-than comparison");
4199  assert(CmpI->getOperand(0) == IndVar &&
4200  "Exit condition must compare the induction variable");
4201  assert(CmpI->getOperand(1) == TripCount &&
4202  "Exit condition must compare with the trip count");
4203 #endif
4204 }
4205 
4207  Header = nullptr;
4208  Cond = nullptr;
4209  Latch = nullptr;
4210  Exit = nullptr;
4211 }
llvm::omp::OMPScheduleType::NomergeOrderedStatic
@ NomergeOrderedStatic
i
i
Definition: README.txt:29
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::omp::OMPScheduleType::BaseGuidedChunked
@ BaseGuidedChunked
llvm::CanonicalLoopInfo::getPreheaderIP
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
Definition: OMPIRBuilder.h:1732
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:480
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
AssumptionCache.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:459
llvm::OpenMPIRBuilder::createCachedThreadPrivate
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
Definition: OMPIRBuilder.cpp:3401
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:299
llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2458
llvm::OpenMPIRBuilder::LocationDescription
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:202
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2115
addLoopMetadata
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
Definition: OMPIRBuilder.cpp:2610
llvm::OpenMPIRBuilder::OutlineInfo::ExitBB
BasicBlock * ExitBB
Definition: OMPIRBuilder.h:875
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:455
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::OpenMPIRBuilder::createSection
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
Definition: OMPIRBuilder.cpp:1351
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:280
llvm::OpenMPIRBuilder::createCritical
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
Definition: OMPIRBuilder.cpp:3013
llvm::CanonicalLoopInfo::getAfter
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Definition: OMPIRBuilder.h:1701
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::OpenMPIRBuilder::OutlineInfo::ExcludeArgsFromAggregate
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
Definition: OMPIRBuilder.h:876
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
createTargetMachine
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOpt::Level OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
Definition: OMPIRBuilder.cpp:2729
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
llvm::omp::OMPScheduleType::NomergeOrderedRuntime
@ NomergeOrderedRuntime
llvm::omp::OMPScheduleType::NomergeUnorderedGuidedChunked
@ NomergeUnorderedGuidedChunked
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::OpenMPIRBuilder::createLoopSkeleton
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
Definition: OMPIRBuilder.cpp:1610
DebugInfoMetadata.h
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:160
llvm::Function::empty
bool empty() const
Definition: Function.h:731
llvm::Function::getBasicBlockList
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:702
llvm::GlobalValue::HiddenVisibility
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:64
T
llvm::CodeExtractor::findAllocas
void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const
Find the set of allocas whose life ranges are contained within the outlined region.
Definition: CodeExtractor.cpp:494
llvm::CanonicalLoopInfo::getPreheader
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Definition: OMPIRBuilder.cpp:4058
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1347
llvm::omp::OMPScheduleType::BaseGuidedAnalyticalChunked
@ BaseGuidedAnalyticalChunked
llvm::OpenMPIRBuilder::createOMPInteropUse
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
Definition: OMPIRBuilder.cpp:3373
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:753
llvm::omp::OMPScheduleType::UnorderedDynamicChunked
@ UnorderedDynamicChunked
llvm::CodeExtractor::extractCodeRegion
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Definition: CodeExtractor.cpp:1624
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:140
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:178
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:739
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::BasicBlock::empty
bool empty() const
Definition: BasicBlock.h:308
llvm::omp::OMPScheduleType::NomergeUnorderedSteal
@ NomergeUnorderedSteal
llvm::omp::OMPScheduleType::OrderdTrapezoidal
@ OrderdTrapezoidal
llvm::OpenMPIRBuilder::tileLoops
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
Definition: OMPIRBuilder.cpp:2418
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2045
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:709
llvm::OpenMPIRBuilder::InsertPointTy
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:96
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2489
llvm::OpenMPIRBuilder::createAtomicCompare
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr)
Emit atomic compare for constructs: — Only scalar data types cond-update-atomic: x = x ordop expr ?...
Definition: OMPIRBuilder.cpp:3944
llvm::omp::OMPScheduleType::NomergeUnorderedRuntime
@ NomergeUnorderedRuntime
llvm::OpenMPIRBuilder::getOrCreateThreadID
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
Definition: OMPIRBuilder.cpp:641
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::omp::OMPScheduleType::NomergeUnorderedBalanced
@ NomergeUnorderedBalanced
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2258
llvm::DeleteDeadBlocks
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
Definition: BasicBlockUtils.cpp:99
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
llvm::OpenMPIRBuilder::createAtomicUpdate
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
Definition: OMPIRBuilder.cpp:3741
llvm::omp::OMPScheduleType::NomergeOrderedTrapezoidal
@ NomergeOrderedTrapezoidal
OptimizationRemarkEmitter.h
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::omp::OMPScheduleType::UnorderedGreedy
@ UnorderedGreedy
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::omp::OMPScheduleType::BaseGreedy
@ BaseGreedy
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:464
llvm::omp::OMPScheduleType::UnorderedRuntimeSimd
@ UnorderedRuntimeSimd
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1551
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:773
ScalarEvolution.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:132
llvm::OpenMPIRBuilder::AtomicOpValue
a struct to pack relevant information while generating atomic Ops
Definition: OMPIRBuilder.h:1357
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:537
llvm::CanonicalLoopInfo::getAfterIP
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
Definition: OMPIRBuilder.h:1746
getTripCount
static const SCEV * getTripCount(const SCEV *BECount, Type *IntPtr, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute trip count from the backedge taken count.
Definition: LoopIdiomRecognize.cpp:1056
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::BasicBlock::getSingleSuccessor
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:291
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:358
llvm::spliceBB
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
Definition: OMPIRBuilder.cpp:255
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::omp::OMPScheduleType::UnorderedGuidedAnalyticalChunked
@ UnorderedGuidedAnalyticalChunked
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:378
llvm::GlobalValue::UnnamedAddr::Global
@ Global
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::ApproximateLoopSize
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:666
llvm::Optional
Definition: APInt.h:33
llvm::OpenMPIRBuilder::createSingle
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt)
Generator for '#omp single'.
Definition: OMPIRBuilder.cpp:2971
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:420
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::omp::OMPScheduleType::BaseRuntimeSimd
@ BaseRuntimeSimd
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:184
CodeExtractor.h
llvm::omp::OMPScheduleType::ModifierNonmonotonic
@ ModifierNonmonotonic
llvm::OpenMPIRBuilder::ReductionInfo::Variable
Value * Variable
Reduction variable of pointer type.
Definition: OMPIRBuilder.h:650
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::OpenMPIRBuilder::FinalizeCallbackTy
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:106
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:749
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:887
llvm::CanonicalLoopInfo::getFunction
Function * getFunction() const
Definition: OMPIRBuilder.h:1752
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:261
llvm::OpenMPIRBuilder::AtomicOpValue::Var
Value * Var
Definition: OMPIRBuilder.h:1358
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1316
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::TargetRegistry::lookupTarget
static const Target * lookupTarget(const std::string &Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Definition: TargetRegistry.cpp:62
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::BasicBlock::rend
reverse_iterator rend()
Definition: BasicBlock.h:304
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::omp::OMPScheduleType::UnorderedSteal
@ UnorderedSteal
llvm::OpenMPIRBuilder::emitTaskyieldImpl
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
Definition: OMPIRBuilder.cpp:1238
llvm::OpenMPIRBuilder::createReductions
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false)
Generator for '#omp reduction'.
Definition: OMPIRBuilder.cpp:1397
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:159
computeOpenMPScheduleType
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
Definition: OMPIRBuilder.cpp:219
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:385
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1289
llvm::OpenMPIRBuilder::collapseLoops
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
Definition: OMPIRBuilder.cpp:2290
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1191
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1368
llvm::ConstantExpr::getPointerCast
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2070
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:299
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:585
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::CanonicalLoopInfo::getIndVar
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Definition: OMPIRBuilder.h:1718
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::omp::OMPScheduleType::UnorderedGuidedChunked
@ UnorderedGuidedChunked
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:184
llvm::OpenMPIRBuilder::createOrderedDepend
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
Definition: OMPIRBuilder.cpp:3048
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::Lock
static sys::Mutex Lock
Definition: NVPTXUtilities.cpp:39
getKmpcForDynamicFiniForType
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
Definition: OMPIRBuilder.cpp:2111
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:171
CommandLine.h
CodeMetrics.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::omp::OMPInteropType
OMPInteropType
Definition: OMPConstants.h:205
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:777
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1366
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1605
llvm::omp::OMPScheduleType::UnorderedAuto
@ UnorderedAuto
TargetMachine.h
llvm::OpenMPIRBuilder::emitMapperCall
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
Definition: OMPIRBuilder.cpp:3578
OMPIRBuilder.h
Constants.h
llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
@ OMP_TGT_EXEC_MODE_GENERIC
Definition: OMPConstants.h:189
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:518
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
llvm::CanonicalLoopInfo::assertOK
void assertOK() const
Consistency self-check.
Definition: OMPIRBuilder.cpp:4112
llvm::CanonicalLoopInfo::getCond
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
Definition: OMPIRBuilder.h:1669
llvm::OpenMPIRBuilder
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:75
llvm::OpenMPIRBuilder::createOMPFree
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
Definition: OMPIRBuilder.cpp:3302
llvm::User
Definition: User.h:44
llvm::OpenMPIRBuilder::getOrCreateDefaultSrcLocStr
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
Definition: OMPIRBuilder.cpp:613
llvm::omp::OMPScheduleType::UnorderedRuntime
@ UnorderedRuntime
llvm::OpenMPIRBuilder::emitOffloadingEntry
void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, StringRef SectionName="omp_offloading_entries")
Create an offloading section struct used to register this global at runtime.
Definition: OMPIRBuilder.cpp:756
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:495
cmp
< i32 >< i32 > cmp
Definition: README.txt:1447
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:745
TileSize
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::OpenMPIRBuilder::createMapperAllocas
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Definition: OMPIRBuilder.cpp:3559
redirectAllPredecessorsTo
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
Definition: OMPIRBuilder.cpp:2251
llvm::OpenMPIRBuilder::createBarrier
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
Definition: OMPIRBuilder.cpp:648
llvm::OpenMPIRBuilder::createAtomicRead
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
Definition: OMPIRBuilder.cpp:3666
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:190
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
IP
Definition: NVPTXLowerArgs.cpp:167
TargetLibraryInfo.h
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::OpenMPIRBuilder::~OpenMPIRBuilder
~OpenMPIRBuilder()
Definition: OMPIRBuilder.cpp:518
llvm::omp::OMPScheduleType::ModifierMonotonic
@ ModifierMonotonic
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::CanonicalLoopInfo::isValid
bool isValid() const
Returns whether this object currently represents the IR of a loop.
Definition: OMPIRBuilder.h:1652
llvm::OpenMPIRBuilder::getOrCreateSrcLocStr
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
Definition: OMPIRBuilder.cpp:573
llvm::CodeExtractor::findInputsOutputs
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas) const
Compute the set of input values and output values for the code.
Definition: CodeExtractor.cpp:646
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::splitBBWithSuffix
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
Definition: OMPIRBuilder.cpp:323
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
isValidWorkshareLoopScheduleType
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
Definition: OMPIRBuilder.cpp:73
MDBuilder.h
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:749
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::omp::OMPScheduleType::NomergeUnorderedAuto
@ NomergeUnorderedAuto
llvm::GlobalObject::addMetadata
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
Definition: Metadata.cpp:1300
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:372
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:619
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:919
llvm::OpenMPIRBuilder::applySimd
void applySimd(DebugLoc DL, CanonicalLoopInfo *Loop)
Add metadata to simd-ize a loop.
Definition: OMPIRBuilder.cpp:2664
llvm::BasicBlock::rbegin
reverse_iterator rbegin()
Definition: BasicBlock.h:302
llvm::CanonicalLoopInfo::getBodyIP
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
Definition: OMPIRBuilder.h:1739
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::omp::OMPAtomicCompareOp::EQ
@ EQ
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:789
removeUnusedBlocksFromParent
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
Definition: OMPIRBuilder.cpp:2259
llvm::omp::OMPScheduleType::BaseBalanced
@ BaseBalanced
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:73
llvm::OpenMPIRBuilder::MapperAllocas::Args
AllocaInst * Args
Definition: OMPIRBuilder.h:915
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::omp::IdentFlag
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:66
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:741
llvm::OpenMPIRBuilder::unrollLoopFull
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
Definition: OMPIRBuilder.cpp:2649
llvm::OpenMPIRBuilder::emitTaskwaitImpl
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Definition: OMPIRBuilder.cpp:1219
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:245
llvm::OpenMPIRBuilder::createOMPAlloc
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
Definition: OMPIRBuilder.cpp:3285
llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::OpenMPIRBuilder::addAttributes
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Definition: OMPIRBuilder.cpp:329
llvm::None
const NoneType None
Definition: None.h:24
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:743
llvm::omp::OMPScheduleType::NomergeUnorderedStatic
@ NomergeUnorderedStatic
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::omp::OMPAtomicCompareOp::MAX
@ MAX
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:279
llvm::SmallString< 128 >
CFG.h
LoopInfo.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::OpenMPIRBuilder::OutlineInfo::EntryBB
BasicBlock * EntryBB
Definition: OMPIRBuilder.h:875
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:314
llvm::omp::OMPScheduleType::NomergeOrderedGuidedChunked
@ NomergeOrderedGuidedChunked
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::omp::RuntimeFunction
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:46
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::OpenMPIRBuilder::createTaskyield
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
Definition: OMPIRBuilder.cpp:1250
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:761
llvm::cl::opt< bool >
llvm::ClrHandlerType::Filter
@ Filter
llvm::OpenMPIRBuilder::createOffloadMaptypes
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
Definition: OMPIRBuilder.cpp:3547
llvm::OpenMPIRBuilder::AtomicOpValue::IsVolatile
bool IsVolatile
Definition: OMPIRBuilder.h:1361
llvm::OpenMPIRBuilder::createSections
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
Definition: OMPIRBuilder.cpp:1256
llvm::OpenMPIRBuilder::MapperAllocas::ArgsBase
AllocaInst * ArgsBase
Definition: OMPIRBuilder.h:914
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::OpenMPIRBuilder::createOMPInteropDestroy
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
Definition: OMPIRBuilder.cpp:3346
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::OpenMPIRBuilder::ReductionInfo::PrivateVariable
Value * PrivateVariable
Thread-private partial reduction variable.
Definition: OMPIRBuilder.h:653
llvm::OpenMPIRBuilder::createOrderedThreadsSimd
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
Definition: OMPIRBuilder.cpp:3093
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:180
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:745
llvm::omp::OMPScheduleType::NomergeOrderedAuto
@ NomergeOrderedAuto
uint64_t
llvm::ScalarEvolutionAnalysis::run
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
Definition: ScalarEvolution.cpp:13731
llvm::AssumptionAnalysis::run
AssumptionCache run(Function &F, FunctionAnalysisManager &)
Definition: AssumptionCache.cpp:258
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::omp::OMPScheduleType::OrderedGuidedChunked
@ OrderedGuidedChunked
llvm::ConstantPointerNull::get
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1755
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2814
llvm::GlobalValue::WeakAnyLinkage
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:52
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3155
llvm::omp::OMPScheduleType::MonotonicityMask
@ MonotonicityMask
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:430
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:970
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::OpenMPIRBuilder::emitCancelationCheckImpl
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Definition: OMPIRBuilder.cpp:794
llvm::OpenMPIRBuilder::createCanonicalLoop
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.cpp:1677
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:757
llvm::OpenMPIRBuilder::createTargetInit
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime)
The omp target interface.
Definition: OMPIRBuilder.cpp:3422
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:608
llvm::OpenMPIRBuilder::getOrCreateIdent
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
Definition: OMPIRBuilder.cpp:533
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:751
addSimdMetadata
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
Definition: OMPIRBuilder.cpp:2637
llvm::OpenMPIRBuilder::createParallel
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
Definition: OMPIRBuilder.cpp:833
llvm::OpenMPIRBuilder::OutlineInfo::OuterAllocaBB
BasicBlock * OuterAllocaBB
Definition: OMPIRBuilder.h:875
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:499
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:137
llvm::OpenMPIRBuilder::finalize
void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
Definition: OMPIRBuilder.cpp:429
IRBuilder.h
llvm::omp::OMPScheduleType::BaseRuntime
@ BaseRuntime
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::omp::OMPScheduleType::UnorderedStatic
@ UnorderedStatic
llvm::MDBuilder::createCallbackEncoding
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
Definition: MDBuilder.cpp:107
llvm::omp::OMPScheduleType::None
@ None
llvm::OpenMPIRBuilder::LocationDescription::DL
DebugLoc DL
Definition: OMPIRBuilder.h:209
isConflictIP
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
Definition: OMPIRBuilder.cpp:66
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::OpenMPIRBuilder::unrollLoopHeuristic
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
Definition: OMPIRBuilder.cpp:2656
llvm::omp::OMPScheduleType::UnorderedTrapezoidal
@ UnorderedTrapezoidal
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:638
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
Definition: OMPIRBuilder.cpp:420
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::OpenMPIRBuilder::createMaster
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
Definition: OMPIRBuilder.cpp:1561
llvm::MDNode
Metadata node.
Definition: Metadata.h:926
llvm::SplitBlockAndInsertIfThenElse
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
Definition: BasicBlockUtils.cpp:1463
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::omp::OMPScheduleType::ModifierOrdered
@ ModifierOrdered
llvm::OpenMPIRBuilder::createFlush
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Definition: OMPIRBuilder.cpp:1213
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1322
llvm::DominatorTreeAnalysis::run
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Definition: Dominators.cpp:367
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::Function::getFunction
const Function & getFunction() const
Definition: Function.h:135
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::OpenMPIRBuilder::OutlineInfo
Helper that contains information about regions we need to outline during finalization.
Definition: OMPIRBuilder.h:872
llvm::CanonicalLoopInfo
Class to represented the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.h:1608
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::omp::OMPScheduleType::OrderedDynamicChunked
@ OrderedDynamicChunked
llvm::omp::OMPScheduleType::NomergeUnorderedDynamicChunked
@ NomergeUnorderedDynamicChunked
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:748
Compare
QP Compare Ordered outs ins xscmpudp No builtin are required Or llvm fcmp order unorder compare DP QP Compare builtin are required DP Compare
Definition: README_P9.txt:309
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1586
llvm::OpenMPIRBuilder::createGlobalFlag
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
Definition: OMPIRBuilder.cpp:522
TargetOptions.h
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:95
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::OpenMPIRBuilder::createOMPInteropInit
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
Definition: OMPIRBuilder.cpp:3317
llvm::omp::OMPAtomicCompareOp
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
Definition: OMPConstants.h:208
llvm::omp::OMPScheduleType::NomergeUnorderedGuidedIterativeChunked
@ NomergeUnorderedGuidedIterativeChunked
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:178
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::omp::OMPScheduleType::NomergeUnorderedGreedy
@ NomergeUnorderedGreedy
llvm::CodeExtractor::excludeArgFromAggregate
void excludeArgFromAggregate(Value *Arg)
Exclude a value from aggregate argument passing when extracting a code region, passing it instead as ...
Definition: CodeExtractor.cpp:1879
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections