LLVM  9.0.0svn
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // __atomic_* library calls, or target specific instruction which implement the
11 // same semantics in a way which better fits the target backend. This can
12 // include the use of (intrinsic-based) load-linked/store-conditional loops,
13 // AtomicCmpXchg, or type coercions.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/InstIterator.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/User.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Pass.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/Debug.h"
48 #include <cassert>
49 #include <cstdint>
50 #include <iterator>
51 
52 using namespace llvm;
53 
54 #define DEBUG_TYPE "atomic-expand"
55 
56 namespace {
57 
58  class AtomicExpand: public FunctionPass {
59  const TargetLowering *TLI = nullptr;
60 
61  public:
62  static char ID; // Pass identification, replacement for typeid
63 
64  AtomicExpand() : FunctionPass(ID) {
66  }
67 
68  bool runOnFunction(Function &F) override;
69 
70  private:
71  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
72  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
73  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
74  bool tryExpandAtomicLoad(LoadInst *LI);
75  bool expandAtomicLoadToLL(LoadInst *LI);
76  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
77  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
78  bool expandAtomicStore(StoreInst *SI);
79  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
80  Value *
81  insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
82  AtomicOrdering MemOpOrder,
83  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
84  void expandAtomicOpToLLSC(
85  Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
86  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
87  void expandPartwordAtomicRMW(
88  AtomicRMWInst *I,
90  AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
91  void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
92  void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
93  void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
94 
95  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
96  static Value *insertRMWCmpXchgLoop(
97  IRBuilder<> &Builder, Type *ResultType, Value *Addr,
98  AtomicOrdering MemOpOrder,
99  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
100  CreateCmpXchgInstFun CreateCmpXchg);
101  bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
102 
103  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
104  bool isIdempotentRMW(AtomicRMWInst *RMWI);
105  bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
106 
107  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
108  Value *PointerOperand, Value *ValueOperand,
109  Value *CASExpected, AtomicOrdering Ordering,
110  AtomicOrdering Ordering2,
111  ArrayRef<RTLIB::Libcall> Libcalls);
112  void expandAtomicLoadToLibcall(LoadInst *LI);
113  void expandAtomicStoreToLibcall(StoreInst *LI);
114  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
115  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
116 
117  friend bool
119  CreateCmpXchgInstFun CreateCmpXchg);
120  };
121 
122 } // end anonymous namespace
123 
124 char AtomicExpand::ID = 0;
125 
127 
128 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
129  false, false)
130 
131 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
132 
133 // Helper functions to retrieve the size of atomic instructions.
134 static unsigned getAtomicOpSize(LoadInst *LI) {
135  const DataLayout &DL = LI->getModule()->getDataLayout();
136  return DL.getTypeStoreSize(LI->getType());
137 }
138 
139 static unsigned getAtomicOpSize(StoreInst *SI) {
140  const DataLayout &DL = SI->getModule()->getDataLayout();
141  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
142 }
143 
144 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
145  const DataLayout &DL = RMWI->getModule()->getDataLayout();
146  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
147 }
148 
149 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
150  const DataLayout &DL = CASI->getModule()->getDataLayout();
151  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
152 }
153 
154 // Helper functions to retrieve the alignment of atomic instructions.
155 static unsigned getAtomicOpAlign(LoadInst *LI) {
156  unsigned Align = LI->getAlignment();
157  // In the future, if this IR restriction is relaxed, we should
158  // return DataLayout::getABITypeAlignment when there's no align
159  // value.
160  assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
161  return Align;
162 }
163 
164 static unsigned getAtomicOpAlign(StoreInst *SI) {
165  unsigned Align = SI->getAlignment();
166  // In the future, if this IR restriction is relaxed, we should
167  // return DataLayout::getABITypeAlignment when there's no align
168  // value.
169  assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
170  return Align;
171 }
172 
173 static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
174  // TODO(PR27168): This instruction has no alignment attribute, but unlike the
175  // default alignment for load/store, the default here is to assume
176  // it has NATURAL alignment, not DataLayout-specified alignment.
177  const DataLayout &DL = RMWI->getModule()->getDataLayout();
178  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
179 }
180 
181 static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
182  // TODO(PR27168): same comment as above.
183  const DataLayout &DL = CASI->getModule()->getDataLayout();
184  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
185 }
186 
187 // Determine if a particular atomic operation has a supported size,
188 // and is of appropriate alignment, to be passed through for target
189 // lowering. (Versus turning into a __atomic libcall)
190 template <typename Inst>
191 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
192  unsigned Size = getAtomicOpSize(I);
193  unsigned Align = getAtomicOpAlign(I);
194  return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
195 }
196 
198  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
199  if (!TPC)
200  return false;
201 
202  auto &TM = TPC->getTM<TargetMachine>();
203  if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
204  return false;
205  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
206 
207  SmallVector<Instruction *, 1> AtomicInsts;
208 
209  // Changing control-flow while iterating through it is a bad idea, so gather a
210  // list of all atomic instructions before we start.
211  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
212  Instruction *I = &*II;
213  if (I->isAtomic() && !isa<FenceInst>(I))
214  AtomicInsts.push_back(I);
215  }
216 
217  bool MadeChange = false;
218  for (auto I : AtomicInsts) {
219  auto LI = dyn_cast<LoadInst>(I);
220  auto SI = dyn_cast<StoreInst>(I);
221  auto RMWI = dyn_cast<AtomicRMWInst>(I);
222  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
223  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
224 
225  // If the Size/Alignment is not supported, replace with a libcall.
226  if (LI) {
227  if (!atomicSizeSupported(TLI, LI)) {
228  expandAtomicLoadToLibcall(LI);
229  MadeChange = true;
230  continue;
231  }
232  } else if (SI) {
233  if (!atomicSizeSupported(TLI, SI)) {
234  expandAtomicStoreToLibcall(SI);
235  MadeChange = true;
236  continue;
237  }
238  } else if (RMWI) {
239  if (!atomicSizeSupported(TLI, RMWI)) {
240  expandAtomicRMWToLibcall(RMWI);
241  MadeChange = true;
242  continue;
243  }
244  } else if (CASI) {
245  if (!atomicSizeSupported(TLI, CASI)) {
246  expandAtomicCASToLibcall(CASI);
247  MadeChange = true;
248  continue;
249  }
250  }
251 
252  if (TLI->shouldInsertFencesForAtomic(I)) {
253  auto FenceOrdering = AtomicOrdering::Monotonic;
254  if (LI && isAcquireOrStronger(LI->getOrdering())) {
255  FenceOrdering = LI->getOrdering();
256  LI->setOrdering(AtomicOrdering::Monotonic);
257  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
258  FenceOrdering = SI->getOrdering();
259  SI->setOrdering(AtomicOrdering::Monotonic);
260  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
261  isAcquireOrStronger(RMWI->getOrdering()))) {
262  FenceOrdering = RMWI->getOrdering();
263  RMWI->setOrdering(AtomicOrdering::Monotonic);
264  } else if (CASI &&
265  TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
267  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
268  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
269  // If a compare and swap is lowered to LL/SC, we can do smarter fence
270  // insertion, with a stronger one on the success path than on the
271  // failure path. As a result, fence insertion is directly done by
272  // expandAtomicCmpXchg in that case.
273  FenceOrdering = CASI->getSuccessOrdering();
274  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
275  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
276  }
277 
278  if (FenceOrdering != AtomicOrdering::Monotonic) {
279  MadeChange |= bracketInstWithFences(I, FenceOrdering);
280  }
281  }
282 
283  if (LI) {
284  if (LI->getType()->isFloatingPointTy()) {
285  // TODO: add a TLI hook to control this so that each target can
286  // convert to lowering the original type one at a time.
287  LI = convertAtomicLoadToIntegerType(LI);
288  assert(LI->getType()->isIntegerTy() && "invariant broken");
289  MadeChange = true;
290  }
291 
292  MadeChange |= tryExpandAtomicLoad(LI);
293  } else if (SI) {
294  if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
295  // TODO: add a TLI hook to control this so that each target can
296  // convert to lowering the original type one at a time.
297  SI = convertAtomicStoreToIntegerType(SI);
298  assert(SI->getValueOperand()->getType()->isIntegerTy() &&
299  "invariant broken");
300  MadeChange = true;
301  }
302 
303  if (TLI->shouldExpandAtomicStoreInIR(SI))
304  MadeChange |= expandAtomicStore(SI);
305  } else if (RMWI) {
306  // There are two different ways of expanding RMW instructions:
307  // - into a load if it is idempotent
308  // - into a Cmpxchg/LL-SC loop otherwise
309  // we try them in that order.
310 
311  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
312  MadeChange = true;
313  } else {
314  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
315  unsigned ValueSize = getAtomicOpSize(RMWI);
316  AtomicRMWInst::BinOp Op = RMWI->getOperation();
317  if (ValueSize < MinCASSize &&
318  (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
319  Op == AtomicRMWInst::And)) {
320  RMWI = widenPartwordAtomicRMW(RMWI);
321  MadeChange = true;
322  }
323 
324  MadeChange |= tryExpandAtomicRMW(RMWI);
325  }
326  } else if (CASI) {
327  // TODO: when we're ready to make the change at the IR level, we can
328  // extend convertCmpXchgToInteger for floating point too.
329  assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
330  "unimplemented - floating point not legal at IR level");
331  if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
332  // TODO: add a TLI hook to control this so that each target can
333  // convert to lowering the original type one at a time.
334  CASI = convertCmpXchgToIntegerType(CASI);
335  assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
336  "invariant broken");
337  MadeChange = true;
338  }
339 
340  MadeChange |= tryExpandAtomicCmpXchg(CASI);
341  }
342  }
343  return MadeChange;
344 }
345 
346 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
347  IRBuilder<> Builder(I);
348 
349  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
350 
351  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
352  // We have a guard here because not every atomic operation generates a
353  // trailing fence.
354  if (TrailingFence)
355  TrailingFence->moveAfter(I);
356 
357  return (LeadingFence || TrailingFence);
358 }
359 
360 /// Get the iX type with the same bitwidth as T.
361 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
362  const DataLayout &DL) {
363  EVT VT = TLI->getValueType(DL, T);
364  unsigned BitWidth = VT.getStoreSizeInBits();
365  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
366  return IntegerType::get(T->getContext(), BitWidth);
367 }
368 
369 /// Convert an atomic load of a non-integral type to an integer load of the
370 /// equivalent bitwidth. See the function comment on
371 /// convertAtomicStoreToIntegerType for background.
372 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
373  auto *M = LI->getModule();
374  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
375  M->getDataLayout());
376 
377  IRBuilder<> Builder(LI);
378 
379  Value *Addr = LI->getPointerOperand();
380  Type *PT = PointerType::get(NewTy,
381  Addr->getType()->getPointerAddressSpace());
382  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
383 
384  auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
385  NewLI->setAlignment(LI->getAlignment());
386  NewLI->setVolatile(LI->isVolatile());
387  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
388  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
389 
390  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
391  LI->replaceAllUsesWith(NewVal);
392  LI->eraseFromParent();
393  return NewLI;
394 }
395 
396 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
397  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
399  return false;
401  expandAtomicOpToLLSC(
402  LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
403  [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
404  return true;
406  return expandAtomicLoadToLL(LI);
408  return expandAtomicLoadToCmpXchg(LI);
409  default:
410  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
411  }
412 }
413 
414 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
415  IRBuilder<> Builder(LI);
416 
417  // On some architectures, load-linked instructions are atomic for larger
418  // sizes than normal loads. For example, the only 64-bit load guaranteed
419  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
420  Value *Val =
421  TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
422  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
423 
424  LI->replaceAllUsesWith(Val);
425  LI->eraseFromParent();
426 
427  return true;
428 }
429 
430 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
431  IRBuilder<> Builder(LI);
432  AtomicOrdering Order = LI->getOrdering();
433  Value *Addr = LI->getPointerOperand();
434  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
435  Constant *DummyVal = Constant::getNullValue(Ty);
436 
437  Value *Pair = Builder.CreateAtomicCmpXchg(
438  Addr, DummyVal, DummyVal, Order,
440  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
441 
442  LI->replaceAllUsesWith(Loaded);
443  LI->eraseFromParent();
444 
445  return true;
446 }
447 
448 /// Convert an atomic store of a non-integral type to an integer store of the
449 /// equivalent bitwidth. We used to not support floating point or vector
450 /// atomics in the IR at all. The backends learned to deal with the bitcast
451 /// idiom because that was the only way of expressing the notion of a atomic
452 /// float or vector store. The long term plan is to teach each backend to
453 /// instruction select from the original atomic store, but as a migration
454 /// mechanism, we convert back to the old format which the backends understand.
455 /// Each backend will need individual work to recognize the new format.
456 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
457  IRBuilder<> Builder(SI);
458  auto *M = SI->getModule();
459  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
460  M->getDataLayout());
461  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
462 
463  Value *Addr = SI->getPointerOperand();
464  Type *PT = PointerType::get(NewTy,
465  Addr->getType()->getPointerAddressSpace());
466  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
467 
468  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
469  NewSI->setAlignment(SI->getAlignment());
470  NewSI->setVolatile(SI->isVolatile());
471  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
472  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
473  SI->eraseFromParent();
474  return NewSI;
475 }
476 
477 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
478  // This function is only called on atomic stores that are too large to be
479  // atomic if implemented as a native store. So we replace them by an
480  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
481  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
482  // It is the responsibility of the target to only signal expansion via
483  // shouldExpandAtomicRMW in cases where this is required and possible.
484  IRBuilder<> Builder(SI);
485  AtomicRMWInst *AI =
487  SI->getValueOperand(), SI->getOrdering());
488  SI->eraseFromParent();
489 
490  // Now we have an appropriate swap instruction, lower it as usual.
491  return tryExpandAtomicRMW(AI);
492 }
493 
494 static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
495  Value *Loaded, Value *NewVal,
496  AtomicOrdering MemOpOrder,
497  Value *&Success, Value *&NewLoaded) {
498  Type *OrigTy = NewVal->getType();
499 
500  // This code can go away when cmpxchg supports FP types.
501  bool NeedBitcast = OrigTy->isFloatingPointTy();
502  if (NeedBitcast) {
503  IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
504  unsigned AS = Addr->getType()->getPointerAddressSpace();
505  Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
506  NewVal = Builder.CreateBitCast(NewVal, IntTy);
507  Loaded = Builder.CreateBitCast(Loaded, IntTy);
508  }
509 
510  Value* Pair = Builder.CreateAtomicCmpXchg(
511  Addr, Loaded, NewVal, MemOpOrder,
513  Success = Builder.CreateExtractValue(Pair, 1, "success");
514  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
515 
516  if (NeedBitcast)
517  NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
518 }
519 
520 /// Emit IR to implement the given atomicrmw operation on values in registers,
521 /// returning the new value.
523  Value *Loaded, Value *Inc) {
524  Value *NewVal;
525  switch (Op) {
526  case AtomicRMWInst::Xchg:
527  return Inc;
528  case AtomicRMWInst::Add:
529  return Builder.CreateAdd(Loaded, Inc, "new");
530  case AtomicRMWInst::Sub:
531  return Builder.CreateSub(Loaded, Inc, "new");
532  case AtomicRMWInst::And:
533  return Builder.CreateAnd(Loaded, Inc, "new");
534  case AtomicRMWInst::Nand:
535  return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
536  case AtomicRMWInst::Or:
537  return Builder.CreateOr(Loaded, Inc, "new");
538  case AtomicRMWInst::Xor:
539  return Builder.CreateXor(Loaded, Inc, "new");
540  case AtomicRMWInst::Max:
541  NewVal = Builder.CreateICmpSGT(Loaded, Inc);
542  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
543  case AtomicRMWInst::Min:
544  NewVal = Builder.CreateICmpSLE(Loaded, Inc);
545  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
546  case AtomicRMWInst::UMax:
547  NewVal = Builder.CreateICmpUGT(Loaded, Inc);
548  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
549  case AtomicRMWInst::UMin:
550  NewVal = Builder.CreateICmpULE(Loaded, Inc);
551  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
552  case AtomicRMWInst::FAdd:
553  return Builder.CreateFAdd(Loaded, Inc, "new");
554  case AtomicRMWInst::FSub:
555  return Builder.CreateFSub(Loaded, Inc, "new");
556  default:
557  llvm_unreachable("Unknown atomic op");
558  }
559 }
560 
561 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
562  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
564  return false;
566  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
567  unsigned ValueSize = getAtomicOpSize(AI);
568  if (ValueSize < MinCASSize) {
570  "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
571  } else {
572  auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
573  return performAtomicOp(AI->getOperation(), Builder, Loaded,
574  AI->getValOperand());
575  };
576  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
577  AI->getOrdering(), PerformOp);
578  }
579  return true;
580  }
582  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
583  unsigned ValueSize = getAtomicOpSize(AI);
584  if (ValueSize < MinCASSize) {
585  expandPartwordAtomicRMW(AI,
587  } else {
589  }
590  return true;
591  }
593  expandAtomicRMWToMaskedIntrinsic(AI);
594  return true;
595  }
596  default:
597  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
598  }
599 }
600 
601 namespace {
602 
603 /// Result values from createMaskInstrs helper.
604 struct PartwordMaskValues {
605  Type *WordType;
606  Type *ValueType;
607  Value *AlignedAddr;
608  Value *ShiftAmt;
609  Value *Mask;
610  Value *Inv_Mask;
611 };
612 
613 } // end anonymous namespace
614 
615 /// This is a helper function which builds instructions to provide
616 /// values necessary for partword atomic operations. It takes an
617 /// incoming address, Addr, and ValueType, and constructs the address,
618 /// shift-amounts and masks needed to work with a larger value of size
619 /// WordSize.
620 ///
621 /// AlignedAddr: Addr rounded down to a multiple of WordSize
622 ///
623 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
624 /// from AlignAddr for it to have the same value as if
625 /// ValueType was loaded from Addr.
626 ///
627 /// Mask: Value to mask with the value loaded from AlignAddr to
628 /// include only the part that would've been loaded from Addr.
629 ///
630 /// Inv_Mask: The inverse of Mask.
631 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
632  Type *ValueType, Value *Addr,
633  unsigned WordSize) {
634  PartwordMaskValues Ret;
635 
636  BasicBlock *BB = I->getParent();
637  Function *F = BB->getParent();
638  Module *M = I->getModule();
639 
640  LLVMContext &Ctx = F->getContext();
641  const DataLayout &DL = M->getDataLayout();
642 
643  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
644 
645  assert(ValueSize < WordSize);
646 
647  Ret.ValueType = ValueType;
648  Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
649 
650  Type *WordPtrType =
651  Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
652 
653  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
654  Ret.AlignedAddr = Builder.CreateIntToPtr(
655  Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
656  "AlignedAddr");
657 
658  Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
659  if (DL.isLittleEndian()) {
660  // turn bytes into bits
661  Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
662  } else {
663  // turn bytes into bits, and count from the other side.
664  Ret.ShiftAmt =
665  Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
666  }
667 
668  Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
669  Ret.Mask = Builder.CreateShl(
670  ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
671  "Mask");
672  Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
673 
674  return Ret;
675 }
676 
677 /// Emit IR to implement a masked version of a given atomicrmw
678 /// operation. (That is, only the bits under the Mask should be
679 /// affected by the operation)
681  IRBuilder<> &Builder, Value *Loaded,
682  Value *Shifted_Inc, Value *Inc,
683  const PartwordMaskValues &PMV) {
684  // TODO: update to use
685  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
686  // to merge bits from two values without requiring PMV.Inv_Mask.
687  switch (Op) {
688  case AtomicRMWInst::Xchg: {
689  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
690  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
691  return FinalVal;
692  }
693  case AtomicRMWInst::Or:
694  case AtomicRMWInst::Xor:
695  case AtomicRMWInst::And:
696  llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
697  case AtomicRMWInst::Add:
698  case AtomicRMWInst::Sub:
699  case AtomicRMWInst::Nand: {
700  // The other arithmetic ops need to be masked into place.
701  Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
702  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
703  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
704  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
705  return FinalVal;
706  }
707  case AtomicRMWInst::Max:
708  case AtomicRMWInst::Min:
709  case AtomicRMWInst::UMax:
710  case AtomicRMWInst::UMin: {
711  // Finally, comparison ops will operate on the full value, so
712  // truncate down to the original size, and expand out again after
713  // doing the operation.
714  Value *Loaded_Shiftdown = Builder.CreateTrunc(
715  Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
716  Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
717  Value *NewVal_Shiftup = Builder.CreateShl(
718  Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
719  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
720  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
721  return FinalVal;
722  }
723  default:
724  llvm_unreachable("Unknown atomic op");
725  }
726 }
727 
728 /// Expand a sub-word atomicrmw operation into an appropriate
729 /// word-sized operation.
730 ///
731 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
732 /// way as a typical atomicrmw expansion. The only difference here is
733 /// that the operation inside of the loop must operate only upon a
734 /// part of the value.
735 void AtomicExpand::expandPartwordAtomicRMW(
738 
739  AtomicOrdering MemOpOrder = AI->getOrdering();
740 
741  IRBuilder<> Builder(AI);
742 
743  PartwordMaskValues PMV =
744  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
745  TLI->getMinCmpXchgSizeInBits() / 8);
746 
747  Value *ValOperand_Shifted =
748  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
749  PMV.ShiftAmt, "ValOperand_Shifted");
750 
751  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
752  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
753  ValOperand_Shifted, AI->getValOperand(), PMV);
754  };
755 
756  // TODO: When we're ready to support LLSC conversions too, use
757  // insertRMWLLSCLoop here for ExpansionKind==LLSC.
758  Value *OldResult =
759  insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
760  PerformPartwordOp, createCmpXchgInstFun);
761  Value *FinalOldResult = Builder.CreateTrunc(
762  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
763  AI->replaceAllUsesWith(FinalOldResult);
764  AI->eraseFromParent();
765 }
766 
767 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
768 AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
769  IRBuilder<> Builder(AI);
771 
772  assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
773  Op == AtomicRMWInst::And) &&
774  "Unable to widen operation");
775 
776  PartwordMaskValues PMV =
777  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
778  TLI->getMinCmpXchgSizeInBits() / 8);
779 
780  Value *ValOperand_Shifted =
781  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
782  PMV.ShiftAmt, "ValOperand_Shifted");
783 
784  Value *NewOperand;
785 
786  if (Op == AtomicRMWInst::And)
787  NewOperand =
788  Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
789  else
790  NewOperand = ValOperand_Shifted;
791 
792  AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
793  NewOperand, AI->getOrdering());
794 
795  Value *FinalOldResult = Builder.CreateTrunc(
796  Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
797  AI->replaceAllUsesWith(FinalOldResult);
798  AI->eraseFromParent();
799  return NewAI;
800 }
801 
802 void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
803  // The basic idea here is that we're expanding a cmpxchg of a
804  // smaller memory size up to a word-sized cmpxchg. To do this, we
805  // need to add a retry-loop for strong cmpxchg, so that
806  // modifications to other parts of the word don't cause a spurious
807  // failure.
808 
809  // This generates code like the following:
810  // [[Setup mask values PMV.*]]
811  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
812  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
813  // %InitLoaded = load i32* %addr
814  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
815  // br partword.cmpxchg.loop
816  // partword.cmpxchg.loop:
817  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
818  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
819  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
820  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
821  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
822  // i32 %FullWord_NewVal success_ordering failure_ordering
823  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
824  // %Success = extractvalue { i32, i1 } %NewCI, 1
825  // br i1 %Success, label %partword.cmpxchg.end,
826  // label %partword.cmpxchg.failure
827  // partword.cmpxchg.failure:
828  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
829  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
830  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
831  // label %partword.cmpxchg.end
832  // partword.cmpxchg.end:
833  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
834  // %FinalOldVal = trunc i32 %tmp1 to i8
835  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
836  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
837 
838  Value *Addr = CI->getPointerOperand();
839  Value *Cmp = CI->getCompareOperand();
840  Value *NewVal = CI->getNewValOperand();
841 
842  BasicBlock *BB = CI->getParent();
843  Function *F = BB->getParent();
844  IRBuilder<> Builder(CI);
845  LLVMContext &Ctx = Builder.getContext();
846 
847  const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
848 
849  BasicBlock *EndBB =
850  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
851  auto FailureBB =
852  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
853  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
854 
855  // The split call above "helpfully" added a branch at the end of BB
856  // (to the wrong place).
857  std::prev(BB->end())->eraseFromParent();
858  Builder.SetInsertPoint(BB);
859 
860  PartwordMaskValues PMV = createMaskInstrs(
861  Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
862 
863  // Shift the incoming values over, into the right location in the word.
864  Value *NewVal_Shifted =
865  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
866  Value *Cmp_Shifted =
867  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
868 
869  // Load the entire current word, and mask into place the expected and new
870  // values
871  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
872  InitLoaded->setVolatile(CI->isVolatile());
873  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
874  Builder.CreateBr(LoopBB);
875 
876  // partword.cmpxchg.loop:
877  Builder.SetInsertPoint(LoopBB);
878  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
879  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
880 
881  // Mask/Or the expected and new values into place in the loaded word.
882  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
883  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
884  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
885  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
886  CI->getFailureOrdering(), CI->getSyncScopeID());
887  NewCI->setVolatile(CI->isVolatile());
888  // When we're building a strong cmpxchg, we need a loop, so you
889  // might think we could use a weak cmpxchg inside. But, using strong
890  // allows the below comparison for ShouldContinue, and we're
891  // expecting the underlying cmpxchg to be a machine instruction,
892  // which is strong anyways.
893  NewCI->setWeak(CI->isWeak());
894 
895  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
896  Value *Success = Builder.CreateExtractValue(NewCI, 1);
897 
898  if (CI->isWeak())
899  Builder.CreateBr(EndBB);
900  else
901  Builder.CreateCondBr(Success, EndBB, FailureBB);
902 
903  // partword.cmpxchg.failure:
904  Builder.SetInsertPoint(FailureBB);
905  // Upon failure, verify that the masked-out part of the loaded value
906  // has been modified. If it didn't, abort the cmpxchg, since the
907  // masked-in part must've.
908  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
909  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
910  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
911 
912  // Add the second value to the phi from above
913  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
914 
915  // partword.cmpxchg.end:
916  Builder.SetInsertPoint(CI);
917 
918  Value *FinalOldVal = Builder.CreateTrunc(
919  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
920  Value *Res = UndefValue::get(CI->getType());
921  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
922  Res = Builder.CreateInsertValue(Res, Success, 1);
923 
924  CI->replaceAllUsesWith(Res);
925  CI->eraseFromParent();
926 }
927 
928 void AtomicExpand::expandAtomicOpToLLSC(
929  Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
930  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
931  IRBuilder<> Builder(I);
932  Value *Loaded =
933  insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
934 
935  I->replaceAllUsesWith(Loaded);
936  I->eraseFromParent();
937 }
938 
939 void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
940  IRBuilder<> Builder(AI);
941 
942  PartwordMaskValues PMV =
943  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
944  TLI->getMinCmpXchgSizeInBits() / 8);
945 
946  // The value operand must be sign-extended for signed min/max so that the
947  // target's signed comparison instructions can be used. Otherwise, just
948  // zero-ext.
949  Instruction::CastOps CastOp = Instruction::ZExt;
950  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
951  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
952  CastOp = Instruction::SExt;
953 
954  Value *ValOperand_Shifted = Builder.CreateShl(
955  Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
956  PMV.ShiftAmt, "ValOperand_Shifted");
957  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
958  Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
959  AI->getOrdering());
960  Value *FinalOldResult = Builder.CreateTrunc(
961  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
962  AI->replaceAllUsesWith(FinalOldResult);
963  AI->eraseFromParent();
964 }
965 
966 void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
967  IRBuilder<> Builder(CI);
968 
969  PartwordMaskValues PMV = createMaskInstrs(
970  Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
971  TLI->getMinCmpXchgSizeInBits() / 8);
972 
973  Value *CmpVal_Shifted = Builder.CreateShl(
974  Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
975  "CmpVal_Shifted");
976  Value *NewVal_Shifted = Builder.CreateShl(
977  Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
978  "NewVal_Shifted");
979  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
980  Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
981  CI->getSuccessOrdering());
982  Value *FinalOldVal = Builder.CreateTrunc(
983  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
984 
985  Value *Res = UndefValue::get(CI->getType());
986  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
987  Value *Success = Builder.CreateICmpEQ(
988  CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
989  Res = Builder.CreateInsertValue(Res, Success, 1);
990 
991  CI->replaceAllUsesWith(Res);
992  CI->eraseFromParent();
993 }
994 
995 Value *AtomicExpand::insertRMWLLSCLoop(
996  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
997  AtomicOrdering MemOpOrder,
998  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
999  LLVMContext &Ctx = Builder.getContext();
1000  BasicBlock *BB = Builder.GetInsertBlock();
1001  Function *F = BB->getParent();
1002 
1003  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1004  //
1005  // The standard expansion we produce is:
1006  // [...]
1007  // atomicrmw.start:
1008  // %loaded = @load.linked(%addr)
1009  // %new = some_op iN %loaded, %incr
1010  // %stored = @store_conditional(%new, %addr)
1011  // %try_again = icmp i32 ne %stored, 0
1012  // br i1 %try_again, label %loop, label %atomicrmw.end
1013  // atomicrmw.end:
1014  // [...]
1015  BasicBlock *ExitBB =
1016  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1017  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1018 
1019  // The split call above "helpfully" added a branch at the end of BB (to the
1020  // wrong place).
1021  std::prev(BB->end())->eraseFromParent();
1022  Builder.SetInsertPoint(BB);
1023  Builder.CreateBr(LoopBB);
1024 
1025  // Start the main loop block now that we've taken care of the preliminaries.
1026  Builder.SetInsertPoint(LoopBB);
1027  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1028 
1029  Value *NewVal = PerformOp(Builder, Loaded);
1030 
1031  Value *StoreSuccess =
1032  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1033  Value *TryAgain = Builder.CreateICmpNE(
1034  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1035  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1036 
1037  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1038  return Loaded;
1039 }
1040 
1041 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1042 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1043 /// IR. As a migration step, we convert back to what use to be the standard
1044 /// way to represent a pointer cmpxchg so that we can update backends one by
1045 /// one.
1046 AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1047  auto *M = CI->getModule();
1048  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1049  M->getDataLayout());
1050 
1051  IRBuilder<> Builder(CI);
1052 
1053  Value *Addr = CI->getPointerOperand();
1054  Type *PT = PointerType::get(NewTy,
1055  Addr->getType()->getPointerAddressSpace());
1056  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1057 
1058  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1059  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1060 
1061 
1062  auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
1063  CI->getSuccessOrdering(),
1064  CI->getFailureOrdering(),
1065  CI->getSyncScopeID());
1066  NewCI->setVolatile(CI->isVolatile());
1067  NewCI->setWeak(CI->isWeak());
1068  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1069 
1070  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1071  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1072 
1073  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1074 
1075  Value *Res = UndefValue::get(CI->getType());
1076  Res = Builder.CreateInsertValue(Res, OldVal, 0);
1077  Res = Builder.CreateInsertValue(Res, Succ, 1);
1078 
1079  CI->replaceAllUsesWith(Res);
1080  CI->eraseFromParent();
1081  return NewCI;
1082 }
1083 
1084 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1085  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1086  AtomicOrdering FailureOrder = CI->getFailureOrdering();
1087  Value *Addr = CI->getPointerOperand();
1088  BasicBlock *BB = CI->getParent();
1089  Function *F = BB->getParent();
1090  LLVMContext &Ctx = F->getContext();
1091  // If shouldInsertFencesForAtomic() returns true, then the target does not
1092  // want to deal with memory orders, and emitLeading/TrailingFence should take
1093  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1094  // should preserve the ordering.
1095  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1096  AtomicOrdering MemOpOrder =
1097  ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
1098 
1099  // In implementations which use a barrier to achieve release semantics, we can
1100  // delay emitting this barrier until we know a store is actually going to be
1101  // attempted. The cost of this delay is that we need 2 copies of the block
1102  // emitting the load-linked, affecting code size.
1103  //
1104  // Ideally, this logic would be unconditional except for the minsize check
1105  // since in other cases the extra blocks naturally collapse down to the
1106  // minimal loop. Unfortunately, this puts too much stress on later
1107  // optimisations so we avoid emitting the extra logic in those cases too.
1108  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1109  SuccessOrder != AtomicOrdering::Monotonic &&
1110  SuccessOrder != AtomicOrdering::Acquire &&
1111  !F->optForMinSize();
1112 
1113  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1114  // do it even on minsize.
1115  bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
1116 
1117  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1118  //
1119  // The full expansion we produce is:
1120  // [...]
1121  // cmpxchg.start:
1122  // %unreleasedload = @load.linked(%addr)
1123  // %should_store = icmp eq %unreleasedload, %desired
1124  // br i1 %should_store, label %cmpxchg.fencedstore,
1125  // label %cmpxchg.nostore
1126  // cmpxchg.releasingstore:
1127  // fence?
1128  // br label cmpxchg.trystore
1129  // cmpxchg.trystore:
1130  // %loaded.trystore = phi [%unreleasedload, %releasingstore],
1131  // [%releasedload, %cmpxchg.releasedload]
1132  // %stored = @store_conditional(%new, %addr)
1133  // %success = icmp eq i32 %stored, 0
1134  // br i1 %success, label %cmpxchg.success,
1135  // label %cmpxchg.releasedload/%cmpxchg.failure
1136  // cmpxchg.releasedload:
1137  // %releasedload = @load.linked(%addr)
1138  // %should_store = icmp eq %releasedload, %desired
1139  // br i1 %should_store, label %cmpxchg.trystore,
1140  // label %cmpxchg.failure
1141  // cmpxchg.success:
1142  // fence?
1143  // br label %cmpxchg.end
1144  // cmpxchg.nostore:
1145  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1146  // [%releasedload,
1147  // %cmpxchg.releasedload/%cmpxchg.trystore]
1148  // @load_linked_fail_balance()?
1149  // br label %cmpxchg.failure
1150  // cmpxchg.failure:
1151  // fence?
1152  // br label %cmpxchg.end
1153  // cmpxchg.end:
1154  // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
1155  // [%loaded.trystore, %cmpxchg.trystore]
1156  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1157  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1158  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1159  // [...]
1160  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1161  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1162  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1163  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1164  auto ReleasedLoadBB =
1165  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1166  auto TryStoreBB =
1167  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1168  auto ReleasingStoreBB =
1169  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1170  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1171 
1172  // This grabs the DebugLoc from CI
1173  IRBuilder<> Builder(CI);
1174 
1175  // The split call above "helpfully" added a branch at the end of BB (to the
1176  // wrong place), but we might want a fence too. It's easiest to just remove
1177  // the branch entirely.
1178  std::prev(BB->end())->eraseFromParent();
1179  Builder.SetInsertPoint(BB);
1180  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1181  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1182  Builder.CreateBr(StartBB);
1183 
1184  // Start the main loop block now that we've taken care of the preliminaries.
1185  Builder.SetInsertPoint(StartBB);
1186  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1187  Value *ShouldStore = Builder.CreateICmpEQ(
1188  UnreleasedLoad, CI->getCompareOperand(), "should_store");
1189 
1190  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1191  // jump straight past that fence instruction (if it exists).
1192  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1193 
1194  Builder.SetInsertPoint(ReleasingStoreBB);
1195  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1196  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1197  Builder.CreateBr(TryStoreBB);
1198 
1199  Builder.SetInsertPoint(TryStoreBB);
1200  Value *StoreSuccess = TLI->emitStoreConditional(
1201  Builder, CI->getNewValOperand(), Addr, MemOpOrder);
1202  StoreSuccess = Builder.CreateICmpEQ(
1203  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1204  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1205  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1206  CI->isWeak() ? FailureBB : RetryBB);
1207 
1208  Builder.SetInsertPoint(ReleasedLoadBB);
1209  Value *SecondLoad;
1210  if (HasReleasedLoadBB) {
1211  SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1212  ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
1213  "should_store");
1214 
1215  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1216  // jump straight past that fence instruction (if it exists).
1217  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1218  } else
1219  Builder.CreateUnreachable();
1220 
1221  // Make sure later instructions don't get reordered with a fence if
1222  // necessary.
1223  Builder.SetInsertPoint(SuccessBB);
1224  if (ShouldInsertFencesForAtomic)
1225  TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1226  Builder.CreateBr(ExitBB);
1227 
1228  Builder.SetInsertPoint(NoStoreBB);
1229  // In the failing case, where we don't execute the store-conditional, the
1230  // target might want to balance out the load-linked with a dedicated
1231  // instruction (e.g., on ARM, clearing the exclusive monitor).
1232  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1233  Builder.CreateBr(FailureBB);
1234 
1235  Builder.SetInsertPoint(FailureBB);
1236  if (ShouldInsertFencesForAtomic)
1237  TLI->emitTrailingFence(Builder, CI, FailureOrder);
1238  Builder.CreateBr(ExitBB);
1239 
1240  // Finally, we have control-flow based knowledge of whether the cmpxchg
1241  // succeeded or not. We expose this to later passes by converting any
1242  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1243  // PHI.
1244  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1245  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
1246  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1247  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1248 
1249  // Setup the builder so we can create any PHIs we need.
1250  Value *Loaded;
1251  if (!HasReleasedLoadBB)
1252  Loaded = UnreleasedLoad;
1253  else {
1254  Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
1255  PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1256  TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1257  TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1258 
1259  Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
1260  PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1261  NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
1262  NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1263 
1264  Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
1265  PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1266  ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
1267  ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
1268 
1269  Loaded = ExitLoaded;
1270  }
1271 
1272  // Look for any users of the cmpxchg that are just comparing the loaded value
1273  // against the desired one, and replace them with the CFG-derived version.
1275  for (auto User : CI->users()) {
1277  if (!EV)
1278  continue;
1279 
1280  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1281  "weird extraction from { iN, i1 }");
1282 
1283  if (EV->getIndices()[0] == 0)
1284  EV->replaceAllUsesWith(Loaded);
1285  else
1286  EV->replaceAllUsesWith(Success);
1287 
1288  PrunedInsts.push_back(EV);
1289  }
1290 
1291  // We can remove the instructions now we're no longer iterating through them.
1292  for (auto EV : PrunedInsts)
1293  EV->eraseFromParent();
1294 
1295  if (!CI->use_empty()) {
1296  // Some use of the full struct return that we don't understand has happened,
1297  // so we've got to reconstruct it properly.
1298  Value *Res;
1299  Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1300  Res = Builder.CreateInsertValue(Res, Success, 1);
1301 
1302  CI->replaceAllUsesWith(Res);
1303  }
1304 
1305  CI->eraseFromParent();
1306  return true;
1307 }
1308 
1309 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1310  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1311  if(!C)
1312  return false;
1313 
1315  switch(Op) {
1316  case AtomicRMWInst::Add:
1317  case AtomicRMWInst::Sub:
1318  case AtomicRMWInst::Or:
1319  case AtomicRMWInst::Xor:
1320  return C->isZero();
1321  case AtomicRMWInst::And:
1322  return C->isMinusOne();
1323  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1324  default:
1325  return false;
1326  }
1327 }
1328 
1329 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1330  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1331  tryExpandAtomicLoad(ResultingLoad);
1332  return true;
1333  }
1334  return false;
1335 }
1336 
1337 Value *AtomicExpand::insertRMWCmpXchgLoop(
1338  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1339  AtomicOrdering MemOpOrder,
1340  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1341  CreateCmpXchgInstFun CreateCmpXchg) {
1342  LLVMContext &Ctx = Builder.getContext();
1343  BasicBlock *BB = Builder.GetInsertBlock();
1344  Function *F = BB->getParent();
1345 
1346  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1347  //
1348  // The standard expansion we produce is:
1349  // [...]
1350  // %init_loaded = load atomic iN* %addr
1351  // br label %loop
1352  // loop:
1353  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1354  // %new = some_op iN %loaded, %incr
1355  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1356  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1357  // %success = extractvalue { iN, i1 } %pair, 1
1358  // br i1 %success, label %atomicrmw.end, label %loop
1359  // atomicrmw.end:
1360  // [...]
1361  BasicBlock *ExitBB =
1362  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1363  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1364 
1365  // The split call above "helpfully" added a branch at the end of BB (to the
1366  // wrong place), but we want a load. It's easiest to just remove
1367  // the branch entirely.
1368  std::prev(BB->end())->eraseFromParent();
1369  Builder.SetInsertPoint(BB);
1370  LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
1371  // Atomics require at least natural alignment.
1372  InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
1373  Builder.CreateBr(LoopBB);
1374 
1375  // Start the main loop block now that we've taken care of the preliminaries.
1376  Builder.SetInsertPoint(LoopBB);
1377  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1378  Loaded->addIncoming(InitLoaded, BB);
1379 
1380  Value *NewVal = PerformOp(Builder, Loaded);
1381 
1382  Value *NewLoaded = nullptr;
1383  Value *Success = nullptr;
1384 
1385  CreateCmpXchg(Builder, Addr, Loaded, NewVal,
1386  MemOpOrder == AtomicOrdering::Unordered
1388  : MemOpOrder,
1389  Success, NewLoaded);
1390  assert(Success && NewLoaded);
1391 
1392  Loaded->addIncoming(NewLoaded, LoopBB);
1393 
1394  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1395 
1396  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1397  return NewLoaded;
1398 }
1399 
1400 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1401  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1402  unsigned ValueSize = getAtomicOpSize(CI);
1403 
1404  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1405  default:
1406  llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1408  if (ValueSize < MinCASSize)
1409  expandPartwordCmpXchg(CI);
1410  return false;
1412  assert(ValueSize >= MinCASSize &&
1413  "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
1414  return expandAtomicCmpXchg(CI);
1415  }
1417  expandAtomicCmpXchgToMaskedIntrinsic(CI);
1418  return true;
1419  }
1420 }
1421 
1422 // Note: This function is exposed externally by AtomicExpandUtils.h
1424  CreateCmpXchgInstFun CreateCmpXchg) {
1425  IRBuilder<> Builder(AI);
1426  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1427  Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
1428  [&](IRBuilder<> &Builder, Value *Loaded) {
1429  return performAtomicOp(AI->getOperation(), Builder, Loaded,
1430  AI->getValOperand());
1431  },
1432  CreateCmpXchg);
1433 
1434  AI->replaceAllUsesWith(Loaded);
1435  AI->eraseFromParent();
1436  return true;
1437 }
1438 
1439 // In order to use one of the sized library calls such as
1440 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1441 // must be one of the potentially-specialized sizes, and the value
1442 // type must actually exist in C on the target (otherwise, the
1443 // function wouldn't actually be defined.)
1444 static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
1445  const DataLayout &DL) {
1446  // TODO: "LargestSize" is an approximation for "largest type that
1447  // you can express in C". It seems to be the case that int128 is
1448  // supported on all 64-bit platforms, otherwise only up to 64-bit
1449  // integers are supported. If we get this wrong, then we'll try to
1450  // call a sized libcall that doesn't actually exist. There should
1451  // really be some more reliable way in LLVM of determining integer
1452  // sizes which are valid in the target's C ABI...
1453  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1454  return Align >= Size &&
1455  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1456  Size <= LargestSize;
1457 }
1458 
1459 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1460  static const RTLIB::Libcall Libcalls[6] = {
1461  RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1462  RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1463  unsigned Size = getAtomicOpSize(I);
1464  unsigned Align = getAtomicOpAlign(I);
1465 
1466  bool expanded = expandAtomicOpToLibcall(
1467  I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
1468  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1469  (void)expanded;
1470  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
1471 }
1472 
1473 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1474  static const RTLIB::Libcall Libcalls[6] = {
1475  RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1476  RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1477  unsigned Size = getAtomicOpSize(I);
1478  unsigned Align = getAtomicOpAlign(I);
1479 
1480  bool expanded = expandAtomicOpToLibcall(
1481  I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
1482  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1483  (void)expanded;
1484  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
1485 }
1486 
1487 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1488  static const RTLIB::Libcall Libcalls[6] = {
1489  RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1490  RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1491  RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1492  unsigned Size = getAtomicOpSize(I);
1493  unsigned Align = getAtomicOpAlign(I);
1494 
1495  bool expanded = expandAtomicOpToLibcall(
1496  I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
1498  Libcalls);
1499  (void)expanded;
1500  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
1501 }
1502 
1504  static const RTLIB::Libcall LibcallsXchg[6] = {
1505  RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1506  RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1507  RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1508  static const RTLIB::Libcall LibcallsAdd[6] = {
1509  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1510  RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1511  RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1512  static const RTLIB::Libcall LibcallsSub[6] = {
1513  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1514  RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1515  RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1516  static const RTLIB::Libcall LibcallsAnd[6] = {
1517  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1518  RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1519  RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1520  static const RTLIB::Libcall LibcallsOr[6] = {
1521  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1522  RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1523  RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1524  static const RTLIB::Libcall LibcallsXor[6] = {
1525  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1526  RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1527  RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1528  static const RTLIB::Libcall LibcallsNand[6] = {
1529  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1530  RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1531  RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1532 
1533  switch (Op) {
1535  llvm_unreachable("Should not have BAD_BINOP.");
1536  case AtomicRMWInst::Xchg:
1537  return makeArrayRef(LibcallsXchg);
1538  case AtomicRMWInst::Add:
1539  return makeArrayRef(LibcallsAdd);
1540  case AtomicRMWInst::Sub:
1541  return makeArrayRef(LibcallsSub);
1542  case AtomicRMWInst::And:
1543  return makeArrayRef(LibcallsAnd);
1544  case AtomicRMWInst::Or:
1545  return makeArrayRef(LibcallsOr);
1546  case AtomicRMWInst::Xor:
1547  return makeArrayRef(LibcallsXor);
1548  case AtomicRMWInst::Nand:
1549  return makeArrayRef(LibcallsNand);
1550  case AtomicRMWInst::Max:
1551  case AtomicRMWInst::Min:
1552  case AtomicRMWInst::UMax:
1553  case AtomicRMWInst::UMin:
1554  case AtomicRMWInst::FAdd:
1555  case AtomicRMWInst::FSub:
1556  // No atomic libcalls are available for max/min/umax/umin.
1557  return {};
1558  }
1559  llvm_unreachable("Unexpected AtomicRMW operation.");
1560 }
1561 
1562 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1564 
1565  unsigned Size = getAtomicOpSize(I);
1566  unsigned Align = getAtomicOpAlign(I);
1567 
1568  bool Success = false;
1569  if (!Libcalls.empty())
1570  Success = expandAtomicOpToLibcall(
1571  I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
1572  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1573 
1574  // The expansion failed: either there were no libcalls at all for
1575  // the operation (min/max), or there were only size-specialized
1576  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1577  // CAS libcall, via a CAS loop, instead.
1578  if (!Success) {
1579  expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
1580  Value *Loaded, Value *NewVal,
1581  AtomicOrdering MemOpOrder,
1582  Value *&Success, Value *&NewLoaded) {
1583  // Create the CAS instruction normally...
1584  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1585  Addr, Loaded, NewVal, MemOpOrder,
1587  Success = Builder.CreateExtractValue(Pair, 1, "success");
1588  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1589 
1590  // ...and then expand the CAS into a libcall.
1591  expandAtomicCASToLibcall(Pair);
1592  });
1593  }
1594 }
1595 
1596 // A helper routine for the above expandAtomic*ToLibcall functions.
1597 //
1598 // 'Libcalls' contains an array of enum values for the particular
1599 // ATOMIC libcalls to be emitted. All of the other arguments besides
1600 // 'I' are extracted from the Instruction subclass by the
1601 // caller. Depending on the particular call, some will be null.
1602 bool AtomicExpand::expandAtomicOpToLibcall(
1603  Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
1604  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1605  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1606  assert(Libcalls.size() == 6);
1607 
1608  LLVMContext &Ctx = I->getContext();
1609  Module *M = I->getModule();
1610  const DataLayout &DL = M->getDataLayout();
1611  IRBuilder<> Builder(I);
1612  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1613 
1614  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
1615  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1616 
1617  unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
1618 
1619  // TODO: the "order" argument type is "int", not int32. So
1620  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1621  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1622  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1623  Constant *OrderingVal =
1624  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1625  Constant *Ordering2Val = nullptr;
1626  if (CASExpected) {
1627  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1628  Ordering2Val =
1629  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1630  }
1631  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1632 
1633  RTLIB::Libcall RTLibType;
1634  if (UseSizedLibcall) {
1635  switch (Size) {
1636  case 1: RTLibType = Libcalls[1]; break;
1637  case 2: RTLibType = Libcalls[2]; break;
1638  case 4: RTLibType = Libcalls[3]; break;
1639  case 8: RTLibType = Libcalls[4]; break;
1640  case 16: RTLibType = Libcalls[5]; break;
1641  }
1642  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1643  RTLibType = Libcalls[0];
1644  } else {
1645  // Can't use sized function, and there's no generic for this
1646  // operation, so give up.
1647  return false;
1648  }
1649 
1650  // Build up the function call. There's two kinds. First, the sized
1651  // variants. These calls are going to be one of the following (with
1652  // N=1,2,4,8,16):
1653  // iN __atomic_load_N(iN *ptr, int ordering)
1654  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1655  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1656  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1657  // int success_order, int failure_order)
1658  //
1659  // Note that these functions can be used for non-integer atomic
1660  // operations, the values just need to be bitcast to integers on the
1661  // way in and out.
1662  //
1663  // And, then, the generic variants. They look like the following:
1664  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1665  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1666  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1667  // int ordering)
1668  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1669  // void *desired, int success_order,
1670  // int failure_order)
1671  //
1672  // The different signatures are built up depending on the
1673  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1674  // variables.
1675 
1676  AllocaInst *AllocaCASExpected = nullptr;
1677  Value *AllocaCASExpected_i8 = nullptr;
1678  AllocaInst *AllocaValue = nullptr;
1679  Value *AllocaValue_i8 = nullptr;
1680  AllocaInst *AllocaResult = nullptr;
1681  Value *AllocaResult_i8 = nullptr;
1682 
1683  Type *ResultTy;
1685  AttributeList Attr;
1686 
1687  // 'size' argument.
1688  if (!UseSizedLibcall) {
1689  // Note, getIntPtrType is assumed equivalent to size_t.
1690  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1691  }
1692 
1693  // 'ptr' argument.
1694  Value *PtrVal =
1695  Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
1696  Args.push_back(PtrVal);
1697 
1698  // 'expected' argument, if present.
1699  if (CASExpected) {
1700  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1701  AllocaCASExpected->setAlignment(AllocaAlignment);
1702  AllocaCASExpected_i8 =
1703  Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
1704  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1705  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1706  Args.push_back(AllocaCASExpected_i8);
1707  }
1708 
1709  // 'val' argument ('desired' for cas), if present.
1710  if (ValueOperand) {
1711  if (UseSizedLibcall) {
1712  Value *IntValue =
1713  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1714  Args.push_back(IntValue);
1715  } else {
1716  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1717  AllocaValue->setAlignment(AllocaAlignment);
1718  AllocaValue_i8 =
1719  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1720  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1721  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1722  Args.push_back(AllocaValue_i8);
1723  }
1724  }
1725 
1726  // 'ret' argument.
1727  if (!CASExpected && HasResult && !UseSizedLibcall) {
1728  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1729  AllocaResult->setAlignment(AllocaAlignment);
1730  AllocaResult_i8 =
1731  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
1732  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1733  Args.push_back(AllocaResult_i8);
1734  }
1735 
1736  // 'ordering' ('success_order' for cas) argument.
1737  Args.push_back(OrderingVal);
1738 
1739  // 'failure_order' argument, if present.
1740  if (Ordering2Val)
1741  Args.push_back(Ordering2Val);
1742 
1743  // Now, the return type.
1744  if (CASExpected) {
1745  ResultTy = Type::getInt1Ty(Ctx);
1746  Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
1747  } else if (HasResult && UseSizedLibcall)
1748  ResultTy = SizedIntTy;
1749  else
1750  ResultTy = Type::getVoidTy(Ctx);
1751 
1752  // Done with setting up arguments and return types, create the call:
1753  SmallVector<Type *, 6> ArgTys;
1754  for (Value *Arg : Args)
1755  ArgTys.push_back(Arg->getType());
1756  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1757  FunctionCallee LibcallFn =
1758  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1759  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1760  Call->setAttributes(Attr);
1761  Value *Result = Call;
1762 
1763  // And then, extract the results...
1764  if (ValueOperand && !UseSizedLibcall)
1765  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1766 
1767  if (CASExpected) {
1768  // The final result from the CAS is {load of 'expected' alloca, bool result
1769  // from call}
1770  Type *FinalResultTy = I->getType();
1771  Value *V = UndefValue::get(FinalResultTy);
1772  Value *ExpectedOut = Builder.CreateAlignedLoad(
1773  CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1774  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1775  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1776  V = Builder.CreateInsertValue(V, Result, 1);
1777  I->replaceAllUsesWith(V);
1778  } else if (HasResult) {
1779  Value *V;
1780  if (UseSizedLibcall)
1781  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1782  else {
1783  V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1784  AllocaAlignment);
1785  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1786  }
1787  I->replaceAllUsesWith(V);
1788  }
1789  I->eraseFromParent();
1790  return true;
1791 }
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:584
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional &#39;br Cond, TrueDest, FalseDest&#39; instruction.
Definition: IRBuilder.h:853
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:172
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
*p = old <signed v ? old : v
Definition: Instructions.h:721
LLVMContext & getContext() const
Definition: IRBuilder.h:122
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1878
Atomic ordering constants.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve &#39;CreateLoad(Ty, Ptr, "...")&#39; correctly, instead of converting the string to &#39;bool...
Definition: IRBuilder.h:1392
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1235
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1428
void setAlignment(unsigned Align)
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:528
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
This class represents a function call, abstracting a target machine&#39;s calling convention.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, unsigned WordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
*p = old <unsigned v ? old : v
Definition: Instructions.h:725
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:116
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:563
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:247
*p = old >unsigned v ? old : v
Definition: Instructions.h:723
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:534
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional l...
An instruction for reading from memory.
Definition: Instructions.h:167
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:691
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1894
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:395
*p = old >signed v ? old : v
Definition: Instructions.h:719
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:264
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:595
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1465
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1369
ArrayRef< unsigned > getIndices() const
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:231
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:131
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:651
BinOp getOperation() const
Definition: Instructions.h:750
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:569
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1049
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:557
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1898
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1421
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1762
Class to represent function types.
Definition: DerivedTypes.h:102
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1767
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, const DataLayout &DL)
Value * CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1882
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:703
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:582
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded)
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:120
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:220
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1479
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1066
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1694
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:126
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1217
const BasicBlock & getEntryBlock() const
Definition: Function.h:639
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:749
static bool runOnFunction(Function &F, bool PostInlining)
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1013
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:2057
const Instruction & front() const
Definition: BasicBlock.h:280
bool isAcquireOrStronger(AtomicOrdering ao)
unsigned getNumIndices() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:739
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:771
static unsigned getAtomicOpSize(LoadInst *LI)
void initializeAtomicExpandPass(PassRegistry &)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:296
Value * getPointerOperand()
Definition: Instructions.h:284
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1874
self_iterator getIterator()
Definition: ilist_node.h:81
Class to represent integer types.
Definition: DerivedTypes.h:39
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:359
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
void setAlignment(unsigned Align)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1487
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm
Extended Value Type.
Definition: ValueTypes.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1414
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:608
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2119
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:789
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1690
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
static Value * performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success...
Definition: Instructions.h:640
bool isReleaseOrStronger(AtomicOrdering ao)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2004
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:416
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
Value * getValOperand()
Definition: Instructions.h:815
unsigned getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:309
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
iterator end()
Definition: BasicBlock.h:270
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
Module.h This file contains the declarations for the Module class.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:143
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:356
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
void setWeak(bool IsWeak)
Definition: Instructions.h:573
iterator_range< user_iterator > users()
Definition: Value.h:399
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1138
#define Success
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1845
LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Add an attribute to the attribute set at the given index.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:353
#define DEBUG_TYPE
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:372
Value * getPointerOperand()
Definition: Instructions.h:811
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:259
#define I(x, y, z)
Definition: MD5.cpp:58
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1265
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:594
static unsigned getAtomicOpAlign(LoadInst *LI)
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1284
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
uint32_t Size
Definition: Profile.cpp:46
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2009
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:407
AtomicOrderingCABI toCABI(AtomicOrdering ao)
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1248
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1199
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1804
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
LLVM Value Representation.
Definition: Value.h:72
void setAlignment(unsigned Align)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:418
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional &#39;br label X&#39; instruction.
Definition: IRBuilder.h:847
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:121
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1159
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:58
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:400
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:132
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:793
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2127
#define LLVM_DEBUG(X)
Definition: Debug.h:122
FunctionPass * createAtomicExpandPass()
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:234
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1910
Value * getPointerOperand()
Definition: Instructions.h:412
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:789
bool use_empty() const
Definition: Value.h:322
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
A discriminated union of two pointer types, with the discriminator in the low bit of the pointer...
Definition: PointerUnion.h:86