LLVM  4.0.0
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass (at IR level) to replace atomic instructions with
11 // __atomic_* library calls, or target specific instruction which implement the
12 // same semantics in a way which better fits the target backend. This can
13 // include the use of (intrinsic-based) load-linked/store-conditional loops,
14 // AtomicCmpXchg, or type coercions.
15 //
16 //===----------------------------------------------------------------------===//
17 
19 #include "llvm/CodeGen/Passes.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/InstIterator.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Intrinsics.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Support/Debug.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "atomic-expand"
35 
36 namespace {
37  class AtomicExpand: public FunctionPass {
38  const TargetMachine *TM;
39  const TargetLowering *TLI;
40  public:
41  static char ID; // Pass identification, replacement for typeid
42  explicit AtomicExpand(const TargetMachine *TM = nullptr)
43  : FunctionPass(ID), TM(TM), TLI(nullptr) {
45  }
46 
47  bool runOnFunction(Function &F) override;
48 
49  private:
50  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
51  bool IsStore, bool IsLoad);
52  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
53  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
54  bool tryExpandAtomicLoad(LoadInst *LI);
55  bool expandAtomicLoadToLL(LoadInst *LI);
56  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
57  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
58  bool expandAtomicStore(StoreInst *SI);
59  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
60  Value *
61  insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
62  AtomicOrdering MemOpOrder,
63  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
64  void expandAtomicOpToLLSC(
65  Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
66  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
67  void expandPartwordAtomicRMW(
68  AtomicRMWInst *I,
70  void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
71 
72  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
73  static Value *insertRMWCmpXchgLoop(
74  IRBuilder<> &Builder, Type *ResultType, Value *Addr,
75  AtomicOrdering MemOpOrder,
76  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
77  CreateCmpXchgInstFun CreateCmpXchg);
78 
79  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
80  bool isIdempotentRMW(AtomicRMWInst *AI);
81  bool simplifyIdempotentRMW(AtomicRMWInst *AI);
82 
83  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
84  Value *PointerOperand, Value *ValueOperand,
85  Value *CASExpected, AtomicOrdering Ordering,
86  AtomicOrdering Ordering2,
87  ArrayRef<RTLIB::Libcall> Libcalls);
88  void expandAtomicLoadToLibcall(LoadInst *LI);
89  void expandAtomicStoreToLibcall(StoreInst *LI);
90  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
91  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
92 
93  friend bool
95  CreateCmpXchgInstFun CreateCmpXchg);
96  };
97 }
98 
99 char AtomicExpand::ID = 0;
101 INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions",
102  false, false)
103 
105  return new AtomicExpand(TM);
106 }
107 
108 namespace {
109 // Helper functions to retrieve the size of atomic instructions.
110 unsigned getAtomicOpSize(LoadInst *LI) {
111  const DataLayout &DL = LI->getModule()->getDataLayout();
112  return DL.getTypeStoreSize(LI->getType());
113 }
114 
115 unsigned getAtomicOpSize(StoreInst *SI) {
116  const DataLayout &DL = SI->getModule()->getDataLayout();
117  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
118 }
119 
120 unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
121  const DataLayout &DL = RMWI->getModule()->getDataLayout();
122  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
123 }
124 
125 unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
126  const DataLayout &DL = CASI->getModule()->getDataLayout();
127  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
128 }
129 
130 // Helper functions to retrieve the alignment of atomic instructions.
131 unsigned getAtomicOpAlign(LoadInst *LI) {
132  unsigned Align = LI->getAlignment();
133  // In the future, if this IR restriction is relaxed, we should
134  // return DataLayout::getABITypeAlignment when there's no align
135  // value.
136  assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
137  return Align;
138 }
139 
140 unsigned getAtomicOpAlign(StoreInst *SI) {
141  unsigned Align = SI->getAlignment();
142  // In the future, if this IR restriction is relaxed, we should
143  // return DataLayout::getABITypeAlignment when there's no align
144  // value.
145  assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
146  return Align;
147 }
148 
149 unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
150  // TODO(PR27168): This instruction has no alignment attribute, but unlike the
151  // default alignment for load/store, the default here is to assume
152  // it has NATURAL alignment, not DataLayout-specified alignment.
153  const DataLayout &DL = RMWI->getModule()->getDataLayout();
154  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
155 }
156 
157 unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
158  // TODO(PR27168): same comment as above.
159  const DataLayout &DL = CASI->getModule()->getDataLayout();
160  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
161 }
162 
163 // Determine if a particular atomic operation has a supported size,
164 // and is of appropriate alignment, to be passed through for target
165 // lowering. (Versus turning into a __atomic libcall)
166 template <typename Inst>
167 bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
168  unsigned Size = getAtomicOpSize(I);
169  unsigned Align = getAtomicOpAlign(I);
170  return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
171 }
172 
173 } // end anonymous namespace
174 
175 bool AtomicExpand::runOnFunction(Function &F) {
176  if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
177  return false;
178  TLI = TM->getSubtargetImpl(F)->getTargetLowering();
179 
180  SmallVector<Instruction *, 1> AtomicInsts;
181 
182  // Changing control-flow while iterating through it is a bad idea, so gather a
183  // list of all atomic instructions before we start.
184  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
185  Instruction *I = &*II;
186  if (I->isAtomic() && !isa<FenceInst>(I))
187  AtomicInsts.push_back(I);
188  }
189 
190  bool MadeChange = false;
191  for (auto I : AtomicInsts) {
192  auto LI = dyn_cast<LoadInst>(I);
193  auto SI = dyn_cast<StoreInst>(I);
194  auto RMWI = dyn_cast<AtomicRMWInst>(I);
195  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
196  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
197 
198  // If the Size/Alignment is not supported, replace with a libcall.
199  if (LI) {
200  if (!atomicSizeSupported(TLI, LI)) {
201  expandAtomicLoadToLibcall(LI);
202  MadeChange = true;
203  continue;
204  }
205  } else if (SI) {
206  if (!atomicSizeSupported(TLI, SI)) {
207  expandAtomicStoreToLibcall(SI);
208  MadeChange = true;
209  continue;
210  }
211  } else if (RMWI) {
212  if (!atomicSizeSupported(TLI, RMWI)) {
213  expandAtomicRMWToLibcall(RMWI);
214  MadeChange = true;
215  continue;
216  }
217  } else if (CASI) {
218  if (!atomicSizeSupported(TLI, CASI)) {
219  expandAtomicCASToLibcall(CASI);
220  MadeChange = true;
221  continue;
222  }
223  }
224 
225  if (TLI->shouldInsertFencesForAtomic(I)) {
226  auto FenceOrdering = AtomicOrdering::Monotonic;
227  bool IsStore, IsLoad;
228  if (LI && isAcquireOrStronger(LI->getOrdering())) {
229  FenceOrdering = LI->getOrdering();
231  IsStore = false;
232  IsLoad = true;
233  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
234  FenceOrdering = SI->getOrdering();
235  SI->setOrdering(AtomicOrdering::Monotonic);
236  IsStore = true;
237  IsLoad = false;
238  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
239  isAcquireOrStronger(RMWI->getOrdering()))) {
240  FenceOrdering = RMWI->getOrdering();
241  RMWI->setOrdering(AtomicOrdering::Monotonic);
242  IsStore = IsLoad = true;
243  } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
244  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
245  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
246  // If a compare and swap is lowered to LL/SC, we can do smarter fence
247  // insertion, with a stronger one on the success path than on the
248  // failure path. As a result, fence insertion is directly done by
249  // expandAtomicCmpXchg in that case.
250  FenceOrdering = CASI->getSuccessOrdering();
251  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
252  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
253  IsStore = IsLoad = true;
254  }
255 
256  if (FenceOrdering != AtomicOrdering::Monotonic) {
257  MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
258  }
259  }
260 
261  if (LI) {
262  if (LI->getType()->isFloatingPointTy()) {
263  // TODO: add a TLI hook to control this so that each target can
264  // convert to lowering the original type one at a time.
265  LI = convertAtomicLoadToIntegerType(LI);
266  assert(LI->getType()->isIntegerTy() && "invariant broken");
267  MadeChange = true;
268  }
269 
270  MadeChange |= tryExpandAtomicLoad(LI);
271  } else if (SI) {
272  if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
273  // TODO: add a TLI hook to control this so that each target can
274  // convert to lowering the original type one at a time.
275  SI = convertAtomicStoreToIntegerType(SI);
276  assert(SI->getValueOperand()->getType()->isIntegerTy() &&
277  "invariant broken");
278  MadeChange = true;
279  }
280 
281  if (TLI->shouldExpandAtomicStoreInIR(SI))
282  MadeChange |= expandAtomicStore(SI);
283  } else if (RMWI) {
284  // There are two different ways of expanding RMW instructions:
285  // - into a load if it is idempotent
286  // - into a Cmpxchg/LL-SC loop otherwise
287  // we try them in that order.
288 
289  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
290  MadeChange = true;
291  } else {
292  MadeChange |= tryExpandAtomicRMW(RMWI);
293  }
294  } else if (CASI) {
295  // TODO: when we're ready to make the change at the IR level, we can
296  // extend convertCmpXchgToInteger for floating point too.
297  assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
298  "unimplemented - floating point not legal at IR level");
299  if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
300  // TODO: add a TLI hook to control this so that each target can
301  // convert to lowering the original type one at a time.
302  CASI = convertCmpXchgToIntegerType(CASI);
303  assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
304  "invariant broken");
305  MadeChange = true;
306  }
307 
308  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
309  unsigned ValueSize = getAtomicOpSize(CASI);
310  if (ValueSize < MinCASSize) {
311  assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
312  "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
313  expandPartwordCmpXchg(CASI);
314  } else {
315  if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
316  MadeChange |= expandAtomicCmpXchg(CASI);
317  }
318  }
319  }
320  return MadeChange;
321 }
322 
323 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
324  bool IsStore, bool IsLoad) {
325  IRBuilder<> Builder(I);
326 
327  auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
328 
329  auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
330  // The trailing fence is emitted before the instruction instead of after
331  // because there is no easy way of setting Builder insertion point after
332  // an instruction. So we must erase it from the BB, and insert it back
333  // in the right place.
334  // We have a guard here because not every atomic operation generates a
335  // trailing fence.
336  if (TrailingFence) {
337  TrailingFence->removeFromParent();
338  TrailingFence->insertAfter(I);
339  }
340 
341  return (LeadingFence || TrailingFence);
342 }
343 
344 /// Get the iX type with the same bitwidth as T.
345 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
346  const DataLayout &DL) {
347  EVT VT = TLI->getValueType(DL, T);
348  unsigned BitWidth = VT.getStoreSizeInBits();
349  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
350  return IntegerType::get(T->getContext(), BitWidth);
351 }
352 
353 /// Convert an atomic load of a non-integral type to an integer load of the
354 /// equivalent bitwidth. See the function comment on
355 /// convertAtomicStoreToIntegerType for background.
356 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
357  auto *M = LI->getModule();
358  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
359  M->getDataLayout());
360 
361  IRBuilder<> Builder(LI);
362 
363  Value *Addr = LI->getPointerOperand();
364  Type *PT = PointerType::get(NewTy,
365  Addr->getType()->getPointerAddressSpace());
366  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
367 
368  auto *NewLI = Builder.CreateLoad(NewAddr);
369  NewLI->setAlignment(LI->getAlignment());
370  NewLI->setVolatile(LI->isVolatile());
371  NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
372  DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
373 
374  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
375  LI->replaceAllUsesWith(NewVal);
376  LI->eraseFromParent();
377  return NewLI;
378 }
379 
380 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
381  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
383  return false;
385  expandAtomicOpToLLSC(
386  LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
387  [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
388  return true;
390  return expandAtomicLoadToLL(LI);
392  return expandAtomicLoadToCmpXchg(LI);
393  }
394  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
395 }
396 
397 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
398  IRBuilder<> Builder(LI);
399 
400  // On some architectures, load-linked instructions are atomic for larger
401  // sizes than normal loads. For example, the only 64-bit load guaranteed
402  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
403  Value *Val =
404  TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
405  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
406 
407  LI->replaceAllUsesWith(Val);
408  LI->eraseFromParent();
409 
410  return true;
411 }
412 
413 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
414  IRBuilder<> Builder(LI);
415  AtomicOrdering Order = LI->getOrdering();
416  Value *Addr = LI->getPointerOperand();
417  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
418  Constant *DummyVal = Constant::getNullValue(Ty);
419 
420  Value *Pair = Builder.CreateAtomicCmpXchg(
421  Addr, DummyVal, DummyVal, Order,
423  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
424 
425  LI->replaceAllUsesWith(Loaded);
426  LI->eraseFromParent();
427 
428  return true;
429 }
430 
431 /// Convert an atomic store of a non-integral type to an integer store of the
432 /// equivalent bitwidth. We used to not support floating point or vector
433 /// atomics in the IR at all. The backends learned to deal with the bitcast
434 /// idiom because that was the only way of expressing the notion of a atomic
435 /// float or vector store. The long term plan is to teach each backend to
436 /// instruction select from the original atomic store, but as a migration
437 /// mechanism, we convert back to the old format which the backends understand.
438 /// Each backend will need individual work to recognize the new format.
439 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
440  IRBuilder<> Builder(SI);
441  auto *M = SI->getModule();
442  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
443  M->getDataLayout());
444  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
445 
446  Value *Addr = SI->getPointerOperand();
447  Type *PT = PointerType::get(NewTy,
448  Addr->getType()->getPointerAddressSpace());
449  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
450 
451  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
452  NewSI->setAlignment(SI->getAlignment());
453  NewSI->setVolatile(SI->isVolatile());
454  NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
455  DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
456  SI->eraseFromParent();
457  return NewSI;
458 }
459 
460 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
461  // This function is only called on atomic stores that are too large to be
462  // atomic if implemented as a native store. So we replace them by an
463  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
464  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
465  // It is the responsibility of the target to only signal expansion via
466  // shouldExpandAtomicRMW in cases where this is required and possible.
467  IRBuilder<> Builder(SI);
468  AtomicRMWInst *AI =
469  Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
470  SI->getValueOperand(), SI->getOrdering());
471  SI->eraseFromParent();
472 
473  // Now we have an appropriate swap instruction, lower it as usual.
474  return tryExpandAtomicRMW(AI);
475 }
476 
477 static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
478  Value *Loaded, Value *NewVal,
479  AtomicOrdering MemOpOrder,
480  Value *&Success, Value *&NewLoaded) {
481  Value* Pair = Builder.CreateAtomicCmpXchg(
482  Addr, Loaded, NewVal, MemOpOrder,
484  Success = Builder.CreateExtractValue(Pair, 1, "success");
485  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
486 }
487 
488 /// Emit IR to implement the given atomicrmw operation on values in registers,
489 /// returning the new value.
491  Value *Loaded, Value *Inc) {
492  Value *NewVal;
493  switch (Op) {
494  case AtomicRMWInst::Xchg:
495  return Inc;
496  case AtomicRMWInst::Add:
497  return Builder.CreateAdd(Loaded, Inc, "new");
498  case AtomicRMWInst::Sub:
499  return Builder.CreateSub(Loaded, Inc, "new");
500  case AtomicRMWInst::And:
501  return Builder.CreateAnd(Loaded, Inc, "new");
502  case AtomicRMWInst::Nand:
503  return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
504  case AtomicRMWInst::Or:
505  return Builder.CreateOr(Loaded, Inc, "new");
506  case AtomicRMWInst::Xor:
507  return Builder.CreateXor(Loaded, Inc, "new");
508  case AtomicRMWInst::Max:
509  NewVal = Builder.CreateICmpSGT(Loaded, Inc);
510  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
511  case AtomicRMWInst::Min:
512  NewVal = Builder.CreateICmpSLE(Loaded, Inc);
513  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
514  case AtomicRMWInst::UMax:
515  NewVal = Builder.CreateICmpUGT(Loaded, Inc);
516  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
517  case AtomicRMWInst::UMin:
518  NewVal = Builder.CreateICmpULE(Loaded, Inc);
519  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
520  default:
521  llvm_unreachable("Unknown atomic op");
522  }
523 }
524 
525 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
526  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
528  return false;
530  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
531  unsigned ValueSize = getAtomicOpSize(AI);
532  if (ValueSize < MinCASSize) {
534  "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
535  } else {
536  auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
537  return performAtomicOp(AI->getOperation(), Builder, Loaded,
538  AI->getValOperand());
539  };
540  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
541  AI->getOrdering(), PerformOp);
542  }
543  return true;
544  }
546  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
547  unsigned ValueSize = getAtomicOpSize(AI);
548  if (ValueSize < MinCASSize) {
549  expandPartwordAtomicRMW(AI,
551  } else {
553  }
554  return true;
555  }
556  default:
557  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
558  }
559 }
560 
561 namespace {
562 
563 /// Result values from createMaskInstrs helper.
564 struct PartwordMaskValues {
565  Type *WordType;
566  Type *ValueType;
567  Value *AlignedAddr;
568  Value *ShiftAmt;
569  Value *Mask;
570  Value *Inv_Mask;
571 };
572 } // end anonymous namespace
573 
574 /// This is a helper function which builds instructions to provide
575 /// values necessary for partword atomic operations. It takes an
576 /// incoming address, Addr, and ValueType, and constructs the address,
577 /// shift-amounts and masks needed to work with a larger value of size
578 /// WordSize.
579 ///
580 /// AlignedAddr: Addr rounded down to a multiple of WordSize
581 ///
582 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
583 /// from AlignAddr for it to have the same value as if
584 /// ValueType was loaded from Addr.
585 ///
586 /// Mask: Value to mask with the value loaded from AlignAddr to
587 /// include only the part that would've been loaded from Addr.
588 ///
589 /// Inv_Mask: The inverse of Mask.
590 
591 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
592  Type *ValueType, Value *Addr,
593  unsigned WordSize) {
594  PartwordMaskValues Ret;
595 
596  BasicBlock *BB = I->getParent();
597  Function *F = BB->getParent();
598  Module *M = I->getModule();
599 
600  LLVMContext &Ctx = F->getContext();
601  const DataLayout &DL = M->getDataLayout();
602 
603  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
604 
605  assert(ValueSize < WordSize);
606 
607  Ret.ValueType = ValueType;
608  Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
609 
610  Type *WordPtrType =
611  Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
612 
613  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
614  Ret.AlignedAddr = Builder.CreateIntToPtr(
615  Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
616  "AlignedAddr");
617 
618  Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
619  if (DL.isLittleEndian()) {
620  // turn bytes into bits
621  Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
622  } else {
623  // turn bytes into bits, and count from the other side.
624  Ret.ShiftAmt =
625  Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
626  }
627 
628  Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
629  Ret.Mask = Builder.CreateShl(
630  ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
631  "Mask");
632  Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
633 
634  return Ret;
635 }
636 
637 /// Emit IR to implement a masked version of a given atomicrmw
638 /// operation. (That is, only the bits under the Mask should be
639 /// affected by the operation)
641  IRBuilder<> &Builder, Value *Loaded,
642  Value *Shifted_Inc, Value *Inc,
643  const PartwordMaskValues &PMV) {
644  switch (Op) {
645  case AtomicRMWInst::Xchg: {
646  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
647  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
648  return FinalVal;
649  }
650  case AtomicRMWInst::Or:
651  case AtomicRMWInst::Xor:
652  // Or/Xor won't affect any other bits, so can just be done
653  // directly.
654  return performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
655  case AtomicRMWInst::Add:
656  case AtomicRMWInst::Sub:
657  case AtomicRMWInst::And:
658  case AtomicRMWInst::Nand: {
659  // The other arithmetic ops need to be masked into place.
660  Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
661  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
662  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
663  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
664  return FinalVal;
665  }
666  case AtomicRMWInst::Max:
667  case AtomicRMWInst::Min:
668  case AtomicRMWInst::UMax:
669  case AtomicRMWInst::UMin: {
670  // Finally, comparison ops will operate on the full value, so
671  // truncate down to the original size, and expand out again after
672  // doing the operation.
673  Value *Loaded_Shiftdown = Builder.CreateTrunc(
674  Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
675  Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
676  Value *NewVal_Shiftup = Builder.CreateShl(
677  Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
678  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
679  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
680  return FinalVal;
681  }
682  default:
683  llvm_unreachable("Unknown atomic op");
684  }
685 }
686 
687 /// Expand a sub-word atomicrmw operation into an appropriate
688 /// word-sized operation.
689 ///
690 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
691 /// way as a typical atomicrmw expansion. The only difference here is
692 /// that the operation inside of the loop must operate only upon a
693 /// part of the value.
694 void AtomicExpand::expandPartwordAtomicRMW(
696 
698 
699  AtomicOrdering MemOpOrder = AI->getOrdering();
700 
701  IRBuilder<> Builder(AI);
702 
703  PartwordMaskValues PMV =
704  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
705  TLI->getMinCmpXchgSizeInBits() / 8);
706 
707  Value *ValOperand_Shifted =
708  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
709  PMV.ShiftAmt, "ValOperand_Shifted");
710 
711  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
712  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
713  ValOperand_Shifted, AI->getValOperand(), PMV);
714  };
715 
716  // TODO: When we're ready to support LLSC conversions too, use
717  // insertRMWLLSCLoop here for ExpansionKind==LLSC.
718  Value *OldResult =
719  insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
720  PerformPartwordOp, createCmpXchgInstFun);
721  Value *FinalOldResult = Builder.CreateTrunc(
722  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
723  AI->replaceAllUsesWith(FinalOldResult);
724  AI->eraseFromParent();
725 }
726 
727 void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
728  // The basic idea here is that we're expanding a cmpxchg of a
729  // smaller memory size up to a word-sized cmpxchg. To do this, we
730  // need to add a retry-loop for strong cmpxchg, so that
731  // modifications to other parts of the word don't cause a spurious
732  // failure.
733 
734  // This generates code like the following:
735  // [[Setup mask values PMV.*]]
736  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
737  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
738  // %InitLoaded = load i32* %addr
739  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
740  // br partword.cmpxchg.loop
741  // partword.cmpxchg.loop:
742  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
743  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
744  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
745  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
746  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
747  // i32 %FullWord_NewVal success_ordering failure_ordering
748  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
749  // %Success = extractvalue { i32, i1 } %NewCI, 1
750  // br i1 %Success, label %partword.cmpxchg.end,
751  // label %partword.cmpxchg.failure
752  // partword.cmpxchg.failure:
753  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
754  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
755  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
756  // label %partword.cmpxchg.end
757  // partword.cmpxchg.end:
758  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
759  // %FinalOldVal = trunc i32 %tmp1 to i8
760  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
761  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
762 
763  Value *Addr = CI->getPointerOperand();
764  Value *Cmp = CI->getCompareOperand();
765  Value *NewVal = CI->getNewValOperand();
766 
767  BasicBlock *BB = CI->getParent();
768  Function *F = BB->getParent();
769  IRBuilder<> Builder(CI);
770  LLVMContext &Ctx = Builder.getContext();
771 
772  const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
773 
774  BasicBlock *EndBB =
775  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
776  auto FailureBB =
777  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
778  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
779 
780  // The split call above "helpfully" added a branch at the end of BB
781  // (to the wrong place).
782  std::prev(BB->end())->eraseFromParent();
783  Builder.SetInsertPoint(BB);
784 
785  PartwordMaskValues PMV = createMaskInstrs(
786  Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
787 
788  // Shift the incoming values over, into the right location in the word.
789  Value *NewVal_Shifted =
790  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
791  Value *Cmp_Shifted =
792  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
793 
794  // Load the entire current word, and mask into place the expected and new
795  // values
796  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
797  InitLoaded->setVolatile(CI->isVolatile());
798  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
799  Builder.CreateBr(LoopBB);
800 
801  // partword.cmpxchg.loop:
802  Builder.SetInsertPoint(LoopBB);
803  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
804  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
805 
806  // Mask/Or the expected and new values into place in the loaded word.
807  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
808  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
809  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
810  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
811  CI->getFailureOrdering(), CI->getSynchScope());
812  NewCI->setVolatile(CI->isVolatile());
813  // When we're building a strong cmpxchg, we need a loop, so you
814  // might think we could use a weak cmpxchg inside. But, using strong
815  // allows the below comparison for ShouldContinue, and we're
816  // expecting the underlying cmpxchg to be a machine instruction,
817  // which is strong anyways.
818  NewCI->setWeak(CI->isWeak());
819 
820  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
821  Value *Success = Builder.CreateExtractValue(NewCI, 1);
822 
823  if (CI->isWeak())
824  Builder.CreateBr(EndBB);
825  else
826  Builder.CreateCondBr(Success, EndBB, FailureBB);
827 
828  // partword.cmpxchg.failure:
829  Builder.SetInsertPoint(FailureBB);
830  // Upon failure, verify that the masked-out part of the loaded value
831  // has been modified. If it didn't, abort the cmpxchg, since the
832  // masked-in part must've.
833  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
834  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
835  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
836 
837  // Add the second value to the phi from above
838  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
839 
840  // partword.cmpxchg.end:
841  Builder.SetInsertPoint(CI);
842 
843  Value *FinalOldVal = Builder.CreateTrunc(
844  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
845  Value *Res = UndefValue::get(CI->getType());
846  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
847  Res = Builder.CreateInsertValue(Res, Success, 1);
848 
849  CI->replaceAllUsesWith(Res);
850  CI->eraseFromParent();
851 }
852 
853 void AtomicExpand::expandAtomicOpToLLSC(
854  Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
855  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
856  IRBuilder<> Builder(I);
857  Value *Loaded =
858  insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
859 
860  I->replaceAllUsesWith(Loaded);
861  I->eraseFromParent();
862 }
863 
864 Value *AtomicExpand::insertRMWLLSCLoop(
865  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
866  AtomicOrdering MemOpOrder,
867  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
868  LLVMContext &Ctx = Builder.getContext();
869  BasicBlock *BB = Builder.GetInsertBlock();
870  Function *F = BB->getParent();
871 
872  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
873  //
874  // The standard expansion we produce is:
875  // [...]
876  // atomicrmw.start:
877  // %loaded = @load.linked(%addr)
878  // %new = some_op iN %loaded, %incr
879  // %stored = @store_conditional(%new, %addr)
880  // %try_again = icmp i32 ne %stored, 0
881  // br i1 %try_again, label %loop, label %atomicrmw.end
882  // atomicrmw.end:
883  // [...]
884  BasicBlock *ExitBB =
885  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
886  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
887 
888  // The split call above "helpfully" added a branch at the end of BB (to the
889  // wrong place).
890  std::prev(BB->end())->eraseFromParent();
891  Builder.SetInsertPoint(BB);
892  Builder.CreateBr(LoopBB);
893 
894  // Start the main loop block now that we've taken care of the preliminaries.
895  Builder.SetInsertPoint(LoopBB);
896  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
897 
898  Value *NewVal = PerformOp(Builder, Loaded);
899 
900  Value *StoreSuccess =
901  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
902  Value *TryAgain = Builder.CreateICmpNE(
903  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
904  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
905 
906  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
907  return Loaded;
908 }
909 
910 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
911 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
912 /// IR. As a migration step, we convert back to what use to be the standard
913 /// way to represent a pointer cmpxchg so that we can update backends one by
914 /// one.
915 AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
916  auto *M = CI->getModule();
917  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
918  M->getDataLayout());
919 
920  IRBuilder<> Builder(CI);
921 
922  Value *Addr = CI->getPointerOperand();
923  Type *PT = PointerType::get(NewTy,
924  Addr->getType()->getPointerAddressSpace());
925  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
926 
927  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
928  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
929 
930 
931  auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
932  CI->getSuccessOrdering(),
933  CI->getFailureOrdering(),
934  CI->getSynchScope());
935  NewCI->setVolatile(CI->isVolatile());
936  NewCI->setWeak(CI->isWeak());
937  DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
938 
939  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
940  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
941 
942  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
943 
944  Value *Res = UndefValue::get(CI->getType());
945  Res = Builder.CreateInsertValue(Res, OldVal, 0);
946  Res = Builder.CreateInsertValue(Res, Succ, 1);
947 
948  CI->replaceAllUsesWith(Res);
949  CI->eraseFromParent();
950  return NewCI;
951 }
952 
953 
954 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
955  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
956  AtomicOrdering FailureOrder = CI->getFailureOrdering();
957  Value *Addr = CI->getPointerOperand();
958  BasicBlock *BB = CI->getParent();
959  Function *F = BB->getParent();
960  LLVMContext &Ctx = F->getContext();
961  // If shouldInsertFencesForAtomic() returns true, then the target does not
962  // want to deal with memory orders, and emitLeading/TrailingFence should take
963  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
964  // should preserve the ordering.
965  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
966  AtomicOrdering MemOpOrder =
967  ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
968 
969  // In implementations which use a barrier to achieve release semantics, we can
970  // delay emitting this barrier until we know a store is actually going to be
971  // attempted. The cost of this delay is that we need 2 copies of the block
972  // emitting the load-linked, affecting code size.
973  //
974  // Ideally, this logic would be unconditional except for the minsize check
975  // since in other cases the extra blocks naturally collapse down to the
976  // minimal loop. Unfortunately, this puts too much stress on later
977  // optimisations so we avoid emitting the extra logic in those cases too.
978  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
979  SuccessOrder != AtomicOrdering::Monotonic &&
980  SuccessOrder != AtomicOrdering::Acquire &&
981  !F->optForMinSize();
982 
983  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
984  // do it even on minsize.
985  bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
986 
987  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
988  //
989  // The full expansion we produce is:
990  // [...]
991  // cmpxchg.start:
992  // %unreleasedload = @load.linked(%addr)
993  // %should_store = icmp eq %unreleasedload, %desired
994  // br i1 %should_store, label %cmpxchg.fencedstore,
995  // label %cmpxchg.nostore
996  // cmpxchg.releasingstore:
997  // fence?
998  // br label cmpxchg.trystore
999  // cmpxchg.trystore:
1000  // %loaded.trystore = phi [%unreleasedload, %releasingstore],
1001  // [%releasedload, %cmpxchg.releasedload]
1002  // %stored = @store_conditional(%new, %addr)
1003  // %success = icmp eq i32 %stored, 0
1004  // br i1 %success, label %cmpxchg.success,
1005  // label %cmpxchg.releasedload/%cmpxchg.failure
1006  // cmpxchg.releasedload:
1007  // %releasedload = @load.linked(%addr)
1008  // %should_store = icmp eq %releasedload, %desired
1009  // br i1 %should_store, label %cmpxchg.trystore,
1010  // label %cmpxchg.failure
1011  // cmpxchg.success:
1012  // fence?
1013  // br label %cmpxchg.end
1014  // cmpxchg.nostore:
1015  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1016  // [%releasedload,
1017  // %cmpxchg.releasedload/%cmpxchg.trystore]
1018  // @load_linked_fail_balance()?
1019  // br label %cmpxchg.failure
1020  // cmpxchg.failure:
1021  // fence?
1022  // br label %cmpxchg.end
1023  // cmpxchg.end:
1024  // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
1025  // [%loaded.trystore, %cmpxchg.trystore]
1026  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1027  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1028  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1029  // [...]
1030  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1031  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1032  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1033  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1034  auto ReleasedLoadBB =
1035  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1036  auto TryStoreBB =
1037  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1038  auto ReleasingStoreBB =
1039  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1040  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1041 
1042  // This grabs the DebugLoc from CI
1043  IRBuilder<> Builder(CI);
1044 
1045  // The split call above "helpfully" added a branch at the end of BB (to the
1046  // wrong place), but we might want a fence too. It's easiest to just remove
1047  // the branch entirely.
1048  std::prev(BB->end())->eraseFromParent();
1049  Builder.SetInsertPoint(BB);
1050  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1051  TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
1052  /*IsLoad=*/true);
1053  Builder.CreateBr(StartBB);
1054 
1055  // Start the main loop block now that we've taken care of the preliminaries.
1056  Builder.SetInsertPoint(StartBB);
1057  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1058  Value *ShouldStore = Builder.CreateICmpEQ(
1059  UnreleasedLoad, CI->getCompareOperand(), "should_store");
1060 
1061  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1062  // jump straight past that fence instruction (if it exists).
1063  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1064 
1065  Builder.SetInsertPoint(ReleasingStoreBB);
1066  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1067  TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
1068  /*IsLoad=*/true);
1069  Builder.CreateBr(TryStoreBB);
1070 
1071  Builder.SetInsertPoint(TryStoreBB);
1072  Value *StoreSuccess = TLI->emitStoreConditional(
1073  Builder, CI->getNewValOperand(), Addr, MemOpOrder);
1074  StoreSuccess = Builder.CreateICmpEQ(
1075  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1076  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1077  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1078  CI->isWeak() ? FailureBB : RetryBB);
1079 
1080  Builder.SetInsertPoint(ReleasedLoadBB);
1081  Value *SecondLoad;
1082  if (HasReleasedLoadBB) {
1083  SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1084  ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
1085  "should_store");
1086 
1087  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1088  // jump straight past that fence instruction (if it exists).
1089  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1090  } else
1091  Builder.CreateUnreachable();
1092 
1093  // Make sure later instructions don't get reordered with a fence if
1094  // necessary.
1095  Builder.SetInsertPoint(SuccessBB);
1096  if (ShouldInsertFencesForAtomic)
1097  TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
1098  /*IsLoad=*/true);
1099  Builder.CreateBr(ExitBB);
1100 
1101  Builder.SetInsertPoint(NoStoreBB);
1102  // In the failing case, where we don't execute the store-conditional, the
1103  // target might want to balance out the load-linked with a dedicated
1104  // instruction (e.g., on ARM, clearing the exclusive monitor).
1105  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1106  Builder.CreateBr(FailureBB);
1107 
1108  Builder.SetInsertPoint(FailureBB);
1109  if (ShouldInsertFencesForAtomic)
1110  TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
1111  /*IsLoad=*/true);
1112  Builder.CreateBr(ExitBB);
1113 
1114  // Finally, we have control-flow based knowledge of whether the cmpxchg
1115  // succeeded or not. We expose this to later passes by converting any
1116  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1117  // PHI.
1118  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1119  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
1120  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1121  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1122 
1123  // Setup the builder so we can create any PHIs we need.
1124  Value *Loaded;
1125  if (!HasReleasedLoadBB)
1126  Loaded = UnreleasedLoad;
1127  else {
1128  Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
1129  PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1130  TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1131  TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1132 
1133  Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
1134  PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1135  NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
1136  NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1137 
1138  Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
1139  PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1140  ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
1141  ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
1142 
1143  Loaded = ExitLoaded;
1144  }
1145 
1146  // Look for any users of the cmpxchg that are just comparing the loaded value
1147  // against the desired one, and replace them with the CFG-derived version.
1149  for (auto User : CI->users()) {
1151  if (!EV)
1152  continue;
1153 
1154  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1155  "weird extraction from { iN, i1 }");
1156 
1157  if (EV->getIndices()[0] == 0)
1158  EV->replaceAllUsesWith(Loaded);
1159  else
1160  EV->replaceAllUsesWith(Success);
1161 
1162  PrunedInsts.push_back(EV);
1163  }
1164 
1165  // We can remove the instructions now we're no longer iterating through them.
1166  for (auto EV : PrunedInsts)
1167  EV->eraseFromParent();
1168 
1169  if (!CI->use_empty()) {
1170  // Some use of the full struct return that we don't understand has happened,
1171  // so we've got to reconstruct it properly.
1172  Value *Res;
1173  Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1174  Res = Builder.CreateInsertValue(Res, Success, 1);
1175 
1176  CI->replaceAllUsesWith(Res);
1177  }
1178 
1179  CI->eraseFromParent();
1180  return true;
1181 }
1182 
1183 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1184  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1185  if(!C)
1186  return false;
1187 
1189  switch(Op) {
1190  case AtomicRMWInst::Add:
1191  case AtomicRMWInst::Sub:
1192  case AtomicRMWInst::Or:
1193  case AtomicRMWInst::Xor:
1194  return C->isZero();
1195  case AtomicRMWInst::And:
1196  return C->isMinusOne();
1197  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1198  default:
1199  return false;
1200  }
1201 }
1202 
1203 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1204  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1205  tryExpandAtomicLoad(ResultingLoad);
1206  return true;
1207  }
1208  return false;
1209 }
1210 
1211 Value *AtomicExpand::insertRMWCmpXchgLoop(
1212  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1213  AtomicOrdering MemOpOrder,
1214  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1215  CreateCmpXchgInstFun CreateCmpXchg) {
1216  LLVMContext &Ctx = Builder.getContext();
1217  BasicBlock *BB = Builder.GetInsertBlock();
1218  Function *F = BB->getParent();
1219 
1220  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1221  //
1222  // The standard expansion we produce is:
1223  // [...]
1224  // %init_loaded = load atomic iN* %addr
1225  // br label %loop
1226  // loop:
1227  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1228  // %new = some_op iN %loaded, %incr
1229  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1230  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1231  // %success = extractvalue { iN, i1 } %pair, 1
1232  // br i1 %success, label %atomicrmw.end, label %loop
1233  // atomicrmw.end:
1234  // [...]
1235  BasicBlock *ExitBB =
1236  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1237  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1238 
1239  // The split call above "helpfully" added a branch at the end of BB (to the
1240  // wrong place), but we want a load. It's easiest to just remove
1241  // the branch entirely.
1242  std::prev(BB->end())->eraseFromParent();
1243  Builder.SetInsertPoint(BB);
1244  LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
1245  // Atomics require at least natural alignment.
1246  InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
1247  Builder.CreateBr(LoopBB);
1248 
1249  // Start the main loop block now that we've taken care of the preliminaries.
1250  Builder.SetInsertPoint(LoopBB);
1251  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1252  Loaded->addIncoming(InitLoaded, BB);
1253 
1254  Value *NewVal = PerformOp(Builder, Loaded);
1255 
1256  Value *NewLoaded = nullptr;
1257  Value *Success = nullptr;
1258 
1259  CreateCmpXchg(Builder, Addr, Loaded, NewVal,
1260  MemOpOrder == AtomicOrdering::Unordered
1262  : MemOpOrder,
1263  Success, NewLoaded);
1264  assert(Success && NewLoaded);
1265 
1266  Loaded->addIncoming(NewLoaded, LoopBB);
1267 
1268  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1269 
1270  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1271  return NewLoaded;
1272 }
1273 
1274 // Note: This function is exposed externally by AtomicExpandUtils.h
1276  CreateCmpXchgInstFun CreateCmpXchg) {
1277  IRBuilder<> Builder(AI);
1278  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1279  Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
1280  [&](IRBuilder<> &Builder, Value *Loaded) {
1281  return performAtomicOp(AI->getOperation(), Builder, Loaded,
1282  AI->getValOperand());
1283  },
1284  CreateCmpXchg);
1285 
1286  AI->replaceAllUsesWith(Loaded);
1287  AI->eraseFromParent();
1288  return true;
1289 }
1290 
1291 // In order to use one of the sized library calls such as
1292 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1293 // must be one of the potentially-specialized sizes, and the value
1294 // type must actually exist in C on the target (otherwise, the
1295 // function wouldn't actually be defined.)
1296 static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
1297  const DataLayout &DL) {
1298  // TODO: "LargestSize" is an approximation for "largest type that
1299  // you can express in C". It seems to be the case that int128 is
1300  // supported on all 64-bit platforms, otherwise only up to 64-bit
1301  // integers are supported. If we get this wrong, then we'll try to
1302  // call a sized libcall that doesn't actually exist. There should
1303  // really be some more reliable way in LLVM of determining integer
1304  // sizes which are valid in the target's C ABI...
1305  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1306  return Align >= Size &&
1307  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1308  Size <= LargestSize;
1309 }
1310 
1311 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1312  static const RTLIB::Libcall Libcalls[6] = {
1315  unsigned Size = getAtomicOpSize(I);
1316  unsigned Align = getAtomicOpAlign(I);
1317 
1318  bool expanded = expandAtomicOpToLibcall(
1319  I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
1320  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1321  (void)expanded;
1322  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
1323 }
1324 
1325 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1326  static const RTLIB::Libcall Libcalls[6] = {
1329  unsigned Size = getAtomicOpSize(I);
1330  unsigned Align = getAtomicOpAlign(I);
1331 
1332  bool expanded = expandAtomicOpToLibcall(
1333  I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
1334  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1335  (void)expanded;
1336  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
1337 }
1338 
1339 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1340  static const RTLIB::Libcall Libcalls[6] = {
1344  unsigned Size = getAtomicOpSize(I);
1345  unsigned Align = getAtomicOpAlign(I);
1346 
1347  bool expanded = expandAtomicOpToLibcall(
1348  I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
1350  Libcalls);
1351  (void)expanded;
1352  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
1353 }
1354 
1356  static const RTLIB::Libcall LibcallsXchg[6] = {
1360  static const RTLIB::Libcall LibcallsAdd[6] = {
1364  static const RTLIB::Libcall LibcallsSub[6] = {
1368  static const RTLIB::Libcall LibcallsAnd[6] = {
1372  static const RTLIB::Libcall LibcallsOr[6] = {
1376  static const RTLIB::Libcall LibcallsXor[6] = {
1380  static const RTLIB::Libcall LibcallsNand[6] = {
1384 
1385  switch (Op) {
1387  llvm_unreachable("Should not have BAD_BINOP.");
1388  case AtomicRMWInst::Xchg:
1389  return makeArrayRef(LibcallsXchg);
1390  case AtomicRMWInst::Add:
1391  return makeArrayRef(LibcallsAdd);
1392  case AtomicRMWInst::Sub:
1393  return makeArrayRef(LibcallsSub);
1394  case AtomicRMWInst::And:
1395  return makeArrayRef(LibcallsAnd);
1396  case AtomicRMWInst::Or:
1397  return makeArrayRef(LibcallsOr);
1398  case AtomicRMWInst::Xor:
1399  return makeArrayRef(LibcallsXor);
1400  case AtomicRMWInst::Nand:
1401  return makeArrayRef(LibcallsNand);
1402  case AtomicRMWInst::Max:
1403  case AtomicRMWInst::Min:
1404  case AtomicRMWInst::UMax:
1405  case AtomicRMWInst::UMin:
1406  // No atomic libcalls are available for max/min/umax/umin.
1407  return {};
1408  }
1409  llvm_unreachable("Unexpected AtomicRMW operation.");
1410 }
1411 
1412 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1414 
1415  unsigned Size = getAtomicOpSize(I);
1416  unsigned Align = getAtomicOpAlign(I);
1417 
1418  bool Success = false;
1419  if (!Libcalls.empty())
1420  Success = expandAtomicOpToLibcall(
1421  I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
1422  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1423 
1424  // The expansion failed: either there were no libcalls at all for
1425  // the operation (min/max), or there were only size-specialized
1426  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1427  // CAS libcall, via a CAS loop, instead.
1428  if (!Success) {
1429  expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
1430  Value *Loaded, Value *NewVal,
1431  AtomicOrdering MemOpOrder,
1432  Value *&Success, Value *&NewLoaded) {
1433  // Create the CAS instruction normally...
1434  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1435  Addr, Loaded, NewVal, MemOpOrder,
1437  Success = Builder.CreateExtractValue(Pair, 1, "success");
1438  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1439 
1440  // ...and then expand the CAS into a libcall.
1441  expandAtomicCASToLibcall(Pair);
1442  });
1443  }
1444 }
1445 
1446 // A helper routine for the above expandAtomic*ToLibcall functions.
1447 //
1448 // 'Libcalls' contains an array of enum values for the particular
1449 // ATOMIC libcalls to be emitted. All of the other arguments besides
1450 // 'I' are extracted from the Instruction subclass by the
1451 // caller. Depending on the particular call, some will be null.
1452 bool AtomicExpand::expandAtomicOpToLibcall(
1453  Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
1454  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1455  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1456  assert(Libcalls.size() == 6);
1457 
1458  LLVMContext &Ctx = I->getContext();
1459  Module *M = I->getModule();
1460  const DataLayout &DL = M->getDataLayout();
1461  IRBuilder<> Builder(I);
1462  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1463 
1464  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
1465  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1466 
1467  unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
1468 
1469  // TODO: the "order" argument type is "int", not int32. So
1470  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1471  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1472  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1473  Constant *OrderingVal =
1474  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1475  Constant *Ordering2Val = nullptr;
1476  if (CASExpected) {
1477  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1478  Ordering2Val =
1479  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1480  }
1481  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1482 
1483  RTLIB::Libcall RTLibType;
1484  if (UseSizedLibcall) {
1485  switch (Size) {
1486  case 1: RTLibType = Libcalls[1]; break;
1487  case 2: RTLibType = Libcalls[2]; break;
1488  case 4: RTLibType = Libcalls[3]; break;
1489  case 8: RTLibType = Libcalls[4]; break;
1490  case 16: RTLibType = Libcalls[5]; break;
1491  }
1492  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1493  RTLibType = Libcalls[0];
1494  } else {
1495  // Can't use sized function, and there's no generic for this
1496  // operation, so give up.
1497  return false;
1498  }
1499 
1500  // Build up the function call. There's two kinds. First, the sized
1501  // variants. These calls are going to be one of the following (with
1502  // N=1,2,4,8,16):
1503  // iN __atomic_load_N(iN *ptr, int ordering)
1504  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1505  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1506  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1507  // int success_order, int failure_order)
1508  //
1509  // Note that these functions can be used for non-integer atomic
1510  // operations, the values just need to be bitcast to integers on the
1511  // way in and out.
1512  //
1513  // And, then, the generic variants. They look like the following:
1514  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1515  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1516  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1517  // int ordering)
1518  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1519  // void *desired, int success_order,
1520  // int failure_order)
1521  //
1522  // The different signatures are built up depending on the
1523  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1524  // variables.
1525 
1526  AllocaInst *AllocaCASExpected = nullptr;
1527  Value *AllocaCASExpected_i8 = nullptr;
1528  AllocaInst *AllocaValue = nullptr;
1529  Value *AllocaValue_i8 = nullptr;
1530  AllocaInst *AllocaResult = nullptr;
1531  Value *AllocaResult_i8 = nullptr;
1532 
1533  Type *ResultTy;
1535  AttributeSet Attr;
1536 
1537  // 'size' argument.
1538  if (!UseSizedLibcall) {
1539  // Note, getIntPtrType is assumed equivalent to size_t.
1540  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1541  }
1542 
1543  // 'ptr' argument.
1544  Value *PtrVal =
1545  Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
1546  Args.push_back(PtrVal);
1547 
1548  // 'expected' argument, if present.
1549  if (CASExpected) {
1550  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1551  AllocaCASExpected->setAlignment(AllocaAlignment);
1552  AllocaCASExpected_i8 =
1553  Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
1554  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1555  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1556  Args.push_back(AllocaCASExpected_i8);
1557  }
1558 
1559  // 'val' argument ('desired' for cas), if present.
1560  if (ValueOperand) {
1561  if (UseSizedLibcall) {
1562  Value *IntValue =
1563  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1564  Args.push_back(IntValue);
1565  } else {
1566  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1567  AllocaValue->setAlignment(AllocaAlignment);
1568  AllocaValue_i8 =
1569  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1570  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1571  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1572  Args.push_back(AllocaValue_i8);
1573  }
1574  }
1575 
1576  // 'ret' argument.
1577  if (!CASExpected && HasResult && !UseSizedLibcall) {
1578  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1579  AllocaResult->setAlignment(AllocaAlignment);
1580  AllocaResult_i8 =
1581  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
1582  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1583  Args.push_back(AllocaResult_i8);
1584  }
1585 
1586  // 'ordering' ('success_order' for cas) argument.
1587  Args.push_back(OrderingVal);
1588 
1589  // 'failure_order' argument, if present.
1590  if (Ordering2Val)
1591  Args.push_back(Ordering2Val);
1592 
1593  // Now, the return type.
1594  if (CASExpected) {
1595  ResultTy = Type::getInt1Ty(Ctx);
1596  Attr = Attr.addAttribute(Ctx, AttributeSet::ReturnIndex, Attribute::ZExt);
1597  } else if (HasResult && UseSizedLibcall)
1598  ResultTy = SizedIntTy;
1599  else
1600  ResultTy = Type::getVoidTy(Ctx);
1601 
1602  // Done with setting up arguments and return types, create the call:
1603  SmallVector<Type *, 6> ArgTys;
1604  for (Value *Arg : Args)
1605  ArgTys.push_back(Arg->getType());
1606  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1607  Constant *LibcallFn =
1608  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1609  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1610  Call->setAttributes(Attr);
1611  Value *Result = Call;
1612 
1613  // And then, extract the results...
1614  if (ValueOperand && !UseSizedLibcall)
1615  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1616 
1617  if (CASExpected) {
1618  // The final result from the CAS is {load of 'expected' alloca, bool result
1619  // from call}
1620  Type *FinalResultTy = I->getType();
1621  Value *V = UndefValue::get(FinalResultTy);
1622  Value *ExpectedOut =
1623  Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
1624  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1625  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1626  V = Builder.CreateInsertValue(V, Result, 1);
1627  I->replaceAllUsesWith(V);
1628  } else if (HasResult) {
1629  Value *V;
1630  if (UseSizedLibcall)
1631  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1632  else {
1633  V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
1634  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1635  }
1636  I->replaceAllUsesWith(V);
1637  }
1638  I->eraseFromParent();
1639  return true;
1640 }
Value * getValueOperand()
Definition: Instructions.h:391
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:76
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
AtomicOrdering getFailureOrdering() const
Returns the ordering constraint on this cmpxchg.
Definition: Instructions.h:590
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:513
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:699
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:166
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:122
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:226
*p = old <signed v ? old : v
Definition: Instructions.h:699
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:336
SynchronizationScope getSynchScope() const
Definition: Instructions.h:366
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1019
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
void setAlignment(unsigned Align)
AtomicOrdering getSuccessOrdering() const
Returns the ordering constraint on this cmpxchg.
Definition: Instructions.h:585
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:504
static AtomicOrderingCABI toCABI(AtomicOrdering ao)
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:699
This class represents a function call, abstracting a target machine's calling convention.
void setOrdering(AtomicOrdering Ordering)
Set the ordering constraint on this load.
Definition: Instructions.h:240
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:655
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, unsigned WordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
*p = old <unsigned v ? old : v
Definition: Instructions.h:703
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:83
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:543
*p = old >unsigned v ? old : v
Definition: Instructions.h:701
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
const Instruction & front() const
Definition: BasicBlock.h:240
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
An instruction for reading from memory.
Definition: Instructions.h:164
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:170
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:669
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1481
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
*p = old >signed v ? old : v
Definition: Instructions.h:697
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:461
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:195
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1117
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1066
ArrayRef< unsigned > getIndices() const
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:130
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
FunctionPass * createAtomicExpandPass(const TargetMachine *TM)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
unsigned getNumIndices() const
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:813
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1484
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SynchronizationScope getSynchScope() const
Definition: Instructions.h:245
AtomicOrdering
Atomic ordering for LLVM's memory model.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1358
Class to represent function types.
Definition: DerivedTypes.h:102
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:537
AtomicOrdering getOrdering() const
Returns the ordering constraint on this RMW.
Definition: Instructions.h:767
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1362
#define F(x, y, z)
Definition: MD5.cpp:51
static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, const DataLayout &DL)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
Value * CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1472
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:681
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded)
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:291
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:160
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:220
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:835
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:68
An instruction for storing to memory.
Definition: Instructions.h:300
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:401
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1301
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
void setAttributes(AttributeSet Attrs)
Set the parameter attributes for this call.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1003
LoadInst * CreateLoad(Value *Ptr, const char *Name)
Definition: IRBuilder.h:1082
unsigned getStoreSizeInBits() const
getStoreSizeInBits - Return the number of bits overwritten by a store of the specified value type...
Definition: ValueTypes.h:274
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:348
INITIALIZE_TM_PASS(AtomicExpand,"atomic-expand","Expand Atomic instructions", false, false) FunctionPass *llvm
Constant * getOrInsertFunction(StringRef Name, FunctionType *T, AttributeSet AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:123
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:783
AttributeSet addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Add an attribute to the attribute set at the given index.
Definition: Attributes.cpp:753
This is an important base class in LLVM.
Definition: Constant.h:42
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1609
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:154
void initializeAtomicExpandPass(PassRegistry &)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
Value * getPointerOperand()
Definition: Instructions.h:270
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:93
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1466
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
self_iterator getIterator()
Definition: ilist_node.h:81
Class to represent integer types.
Definition: DerivedTypes.h:39
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
void setAlignment(unsigned Align)
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1337
LLVMContext & getContext() const
Definition: IRBuilder.h:123
void setAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope=CrossThread)
Definition: Instructions.h:378
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:654
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1671
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:213
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1298
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SynchronizationScope SynchScope=CrossThread)
Definition: IRBuilder.h:1129
static Value * performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value...
static bool isReleaseOrStronger(AtomicOrdering ao)
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success...
Definition: Instructions.h:623
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:1574
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:709
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:179
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:234
Value * getValOperand()
Definition: Instructions.h:781
AtomicOrdering getOrdering() const
Returns the ordering effect of this store.
Definition: Instructions.h:355
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
iterator end()
Definition: BasicBlock.h:230
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:218
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:173
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:558
AtomicOrdering getOrdering() const
Returns the ordering effect of this fence.
Definition: Instructions.h:234
const BasicBlock & getEntryBlock() const
Definition: Function.h:519
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:506
BinOp getOperation() const
Definition: Instructions.h:724
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:339
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
void setWeak(bool IsWeak)
Definition: Instructions.h:553
iterator_range< user_iterator > users()
Definition: Value.h:370
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:932
static bool isAcquireOrStronger(AtomicOrdering ao)
#define Success
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1434
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:169
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:227
Value * getPointerOperand()
Definition: Instructions.h:777
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
#define I(x, y, z)
Definition: MD5.cpp:54
CallInst * CreateCall(Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1579
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
new_loaded *typedef function_ref< void(IRBuilder<> &, Value *, Value *, Value *, AtomicOrdering, Value *&, Value *&)> CreateCmpXchgInstFun
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:374
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:391
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:987
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1354
bool use_empty() const
Definition: Value.h:299
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:549
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1100
aarch64 promote const
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
LLVM Value Representation.
Definition: Value.h:71
void setAlignment(unsigned Align)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:693
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:951
#define DEBUG(X)
Definition: Debug.h:100
Primary interface to the complete machine description for the target machine.
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:164
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:131
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:731
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1679
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:221
SynchronizationScope getSynchScope() const
Returns whether this cmpxchg is atomic between threads or only within a single thread.
Definition: Instructions.h:596
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:479
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1493
Value * getPointerOperand()
Definition: Instructions.h:394
const BasicBlock * getParent() const
Definition: Instruction.h:62
This file describes how to lower LLVM code to machine code.
an instruction to allocate memory on the stack
Definition: Instructions.h:60