LLVM  9.0.0svn
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // __atomic_* library calls, or target specific instruction which implement the
11 // same semantics in a way which better fits the target backend. This can
12 // include the use of (intrinsic-based) load-linked/store-conditional loops,
13 // AtomicCmpXchg, or type coercions.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/InstIterator.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/IR/User.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Pass.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/Debug.h"
48 #include <cassert>
49 #include <cstdint>
50 #include <iterator>
51 
52 using namespace llvm;
53 
54 #define DEBUG_TYPE "atomic-expand"
55 
56 namespace {
57 
58  class AtomicExpand: public FunctionPass {
59  const TargetLowering *TLI = nullptr;
60 
61  public:
62  static char ID; // Pass identification, replacement for typeid
63 
64  AtomicExpand() : FunctionPass(ID) {
66  }
67 
68  bool runOnFunction(Function &F) override;
69 
70  private:
71  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
72  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
73  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
74  bool tryExpandAtomicLoad(LoadInst *LI);
75  bool expandAtomicLoadToLL(LoadInst *LI);
76  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
77  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
78  bool expandAtomicStore(StoreInst *SI);
79  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
80  Value *
81  insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
82  AtomicOrdering MemOpOrder,
83  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
84  void expandAtomicOpToLLSC(
85  Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
86  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
87  void expandPartwordAtomicRMW(
88  AtomicRMWInst *I,
90  AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
91  void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
92  void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
93  void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
94 
95  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
96  static Value *insertRMWCmpXchgLoop(
97  IRBuilder<> &Builder, Type *ResultType, Value *Addr,
98  AtomicOrdering MemOpOrder,
99  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
100  CreateCmpXchgInstFun CreateCmpXchg);
101  bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
102 
103  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
104  bool isIdempotentRMW(AtomicRMWInst *RMWI);
105  bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
106 
107  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
108  Value *PointerOperand, Value *ValueOperand,
109  Value *CASExpected, AtomicOrdering Ordering,
110  AtomicOrdering Ordering2,
111  ArrayRef<RTLIB::Libcall> Libcalls);
112  void expandAtomicLoadToLibcall(LoadInst *LI);
113  void expandAtomicStoreToLibcall(StoreInst *LI);
114  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
115  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
116 
117  friend bool
119  CreateCmpXchgInstFun CreateCmpXchg);
120  };
121 
122 } // end anonymous namespace
123 
124 char AtomicExpand::ID = 0;
125 
127 
128 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
129  false, false)
130 
131 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
132 
133 // Helper functions to retrieve the size of atomic instructions.
134 static unsigned getAtomicOpSize(LoadInst *LI) {
135  const DataLayout &DL = LI->getModule()->getDataLayout();
136  return DL.getTypeStoreSize(LI->getType());
137 }
138 
139 static unsigned getAtomicOpSize(StoreInst *SI) {
140  const DataLayout &DL = SI->getModule()->getDataLayout();
141  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
142 }
143 
144 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
145  const DataLayout &DL = RMWI->getModule()->getDataLayout();
146  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
147 }
148 
149 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
150  const DataLayout &DL = CASI->getModule()->getDataLayout();
151  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
152 }
153 
154 // Helper functions to retrieve the alignment of atomic instructions.
155 static unsigned getAtomicOpAlign(LoadInst *LI) {
156  unsigned Align = LI->getAlignment();
157  // In the future, if this IR restriction is relaxed, we should
158  // return DataLayout::getABITypeAlignment when there's no align
159  // value.
160  assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
161  return Align;
162 }
163 
164 static unsigned getAtomicOpAlign(StoreInst *SI) {
165  unsigned Align = SI->getAlignment();
166  // In the future, if this IR restriction is relaxed, we should
167  // return DataLayout::getABITypeAlignment when there's no align
168  // value.
169  assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
170  return Align;
171 }
172 
173 static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
174  // TODO(PR27168): This instruction has no alignment attribute, but unlike the
175  // default alignment for load/store, the default here is to assume
176  // it has NATURAL alignment, not DataLayout-specified alignment.
177  const DataLayout &DL = RMWI->getModule()->getDataLayout();
178  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
179 }
180 
181 static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
182  // TODO(PR27168): same comment as above.
183  const DataLayout &DL = CASI->getModule()->getDataLayout();
184  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
185 }
186 
187 // Determine if a particular atomic operation has a supported size,
188 // and is of appropriate alignment, to be passed through for target
189 // lowering. (Versus turning into a __atomic libcall)
190 template <typename Inst>
191 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
192  unsigned Size = getAtomicOpSize(I);
193  unsigned Align = getAtomicOpAlign(I);
194  return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
195 }
196 
198  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
199  if (!TPC)
200  return false;
201 
202  auto &TM = TPC->getTM<TargetMachine>();
203  if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
204  return false;
205  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
206 
207  SmallVector<Instruction *, 1> AtomicInsts;
208 
209  // Changing control-flow while iterating through it is a bad idea, so gather a
210  // list of all atomic instructions before we start.
211  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
212  Instruction *I = &*II;
213  if (I->isAtomic() && !isa<FenceInst>(I))
214  AtomicInsts.push_back(I);
215  }
216 
217  bool MadeChange = false;
218  for (auto I : AtomicInsts) {
219  auto LI = dyn_cast<LoadInst>(I);
220  auto SI = dyn_cast<StoreInst>(I);
221  auto RMWI = dyn_cast<AtomicRMWInst>(I);
222  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
223  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
224 
225  // If the Size/Alignment is not supported, replace with a libcall.
226  if (LI) {
227  if (!atomicSizeSupported(TLI, LI)) {
228  expandAtomicLoadToLibcall(LI);
229  MadeChange = true;
230  continue;
231  }
232  } else if (SI) {
233  if (!atomicSizeSupported(TLI, SI)) {
234  expandAtomicStoreToLibcall(SI);
235  MadeChange = true;
236  continue;
237  }
238  } else if (RMWI) {
239  if (!atomicSizeSupported(TLI, RMWI)) {
240  expandAtomicRMWToLibcall(RMWI);
241  MadeChange = true;
242  continue;
243  }
244  } else if (CASI) {
245  if (!atomicSizeSupported(TLI, CASI)) {
246  expandAtomicCASToLibcall(CASI);
247  MadeChange = true;
248  continue;
249  }
250  }
251 
252  if (TLI->shouldInsertFencesForAtomic(I)) {
253  auto FenceOrdering = AtomicOrdering::Monotonic;
254  if (LI && isAcquireOrStronger(LI->getOrdering())) {
255  FenceOrdering = LI->getOrdering();
256  LI->setOrdering(AtomicOrdering::Monotonic);
257  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
258  FenceOrdering = SI->getOrdering();
259  SI->setOrdering(AtomicOrdering::Monotonic);
260  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
261  isAcquireOrStronger(RMWI->getOrdering()))) {
262  FenceOrdering = RMWI->getOrdering();
263  RMWI->setOrdering(AtomicOrdering::Monotonic);
264  } else if (CASI &&
265  TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
267  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
268  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
269  // If a compare and swap is lowered to LL/SC, we can do smarter fence
270  // insertion, with a stronger one on the success path than on the
271  // failure path. As a result, fence insertion is directly done by
272  // expandAtomicCmpXchg in that case.
273  FenceOrdering = CASI->getSuccessOrdering();
274  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
275  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
276  }
277 
278  if (FenceOrdering != AtomicOrdering::Monotonic) {
279  MadeChange |= bracketInstWithFences(I, FenceOrdering);
280  }
281  }
282 
283  if (LI) {
284  if (LI->getType()->isFloatingPointTy()) {
285  // TODO: add a TLI hook to control this so that each target can
286  // convert to lowering the original type one at a time.
287  LI = convertAtomicLoadToIntegerType(LI);
288  assert(LI->getType()->isIntegerTy() && "invariant broken");
289  MadeChange = true;
290  }
291 
292  MadeChange |= tryExpandAtomicLoad(LI);
293  } else if (SI) {
294  if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
295  // TODO: add a TLI hook to control this so that each target can
296  // convert to lowering the original type one at a time.
297  SI = convertAtomicStoreToIntegerType(SI);
298  assert(SI->getValueOperand()->getType()->isIntegerTy() &&
299  "invariant broken");
300  MadeChange = true;
301  }
302 
303  if (TLI->shouldExpandAtomicStoreInIR(SI))
304  MadeChange |= expandAtomicStore(SI);
305  } else if (RMWI) {
306  // There are two different ways of expanding RMW instructions:
307  // - into a load if it is idempotent
308  // - into a Cmpxchg/LL-SC loop otherwise
309  // we try them in that order.
310 
311  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
312  MadeChange = true;
313  } else {
314  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
315  unsigned ValueSize = getAtomicOpSize(RMWI);
316  AtomicRMWInst::BinOp Op = RMWI->getOperation();
317  if (ValueSize < MinCASSize &&
318  (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
319  Op == AtomicRMWInst::And)) {
320  RMWI = widenPartwordAtomicRMW(RMWI);
321  MadeChange = true;
322  }
323 
324  MadeChange |= tryExpandAtomicRMW(RMWI);
325  }
326  } else if (CASI) {
327  // TODO: when we're ready to make the change at the IR level, we can
328  // extend convertCmpXchgToInteger for floating point too.
329  assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
330  "unimplemented - floating point not legal at IR level");
331  if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
332  // TODO: add a TLI hook to control this so that each target can
333  // convert to lowering the original type one at a time.
334  CASI = convertCmpXchgToIntegerType(CASI);
335  assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
336  "invariant broken");
337  MadeChange = true;
338  }
339 
340  MadeChange |= tryExpandAtomicCmpXchg(CASI);
341  }
342  }
343  return MadeChange;
344 }
345 
346 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
347  IRBuilder<> Builder(I);
348 
349  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
350 
351  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
352  // We have a guard here because not every atomic operation generates a
353  // trailing fence.
354  if (TrailingFence)
355  TrailingFence->moveAfter(I);
356 
357  return (LeadingFence || TrailingFence);
358 }
359 
360 /// Get the iX type with the same bitwidth as T.
361 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
362  const DataLayout &DL) {
363  EVT VT = TLI->getMemValueType(DL, T);
364  unsigned BitWidth = VT.getStoreSizeInBits();
365  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
366  return IntegerType::get(T->getContext(), BitWidth);
367 }
368 
369 /// Convert an atomic load of a non-integral type to an integer load of the
370 /// equivalent bitwidth. See the function comment on
371 /// convertAtomicStoreToIntegerType for background.
372 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
373  auto *M = LI->getModule();
374  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
375  M->getDataLayout());
376 
377  IRBuilder<> Builder(LI);
378 
379  Value *Addr = LI->getPointerOperand();
380  Type *PT = PointerType::get(NewTy,
381  Addr->getType()->getPointerAddressSpace());
382  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
383 
384  auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
385  NewLI->setAlignment(LI->getAlignment());
386  NewLI->setVolatile(LI->isVolatile());
387  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
388  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
389 
390  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
391  LI->replaceAllUsesWith(NewVal);
392  LI->eraseFromParent();
393  return NewLI;
394 }
395 
396 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
397  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
399  return false;
401  expandAtomicOpToLLSC(
402  LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
403  [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
404  return true;
406  return expandAtomicLoadToLL(LI);
408  return expandAtomicLoadToCmpXchg(LI);
409  default:
410  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
411  }
412 }
413 
414 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
415  IRBuilder<> Builder(LI);
416 
417  // On some architectures, load-linked instructions are atomic for larger
418  // sizes than normal loads. For example, the only 64-bit load guaranteed
419  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
420  Value *Val =
421  TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
422  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
423 
424  LI->replaceAllUsesWith(Val);
425  LI->eraseFromParent();
426 
427  return true;
428 }
429 
430 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
431  IRBuilder<> Builder(LI);
432  AtomicOrdering Order = LI->getOrdering();
433  if (Order == AtomicOrdering::Unordered)
435 
436  Value *Addr = LI->getPointerOperand();
437  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
438  Constant *DummyVal = Constant::getNullValue(Ty);
439 
440  Value *Pair = Builder.CreateAtomicCmpXchg(
441  Addr, DummyVal, DummyVal, Order,
443  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
444 
445  LI->replaceAllUsesWith(Loaded);
446  LI->eraseFromParent();
447 
448  return true;
449 }
450 
451 /// Convert an atomic store of a non-integral type to an integer store of the
452 /// equivalent bitwidth. We used to not support floating point or vector
453 /// atomics in the IR at all. The backends learned to deal with the bitcast
454 /// idiom because that was the only way of expressing the notion of a atomic
455 /// float or vector store. The long term plan is to teach each backend to
456 /// instruction select from the original atomic store, but as a migration
457 /// mechanism, we convert back to the old format which the backends understand.
458 /// Each backend will need individual work to recognize the new format.
459 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
460  IRBuilder<> Builder(SI);
461  auto *M = SI->getModule();
462  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
463  M->getDataLayout());
464  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
465 
466  Value *Addr = SI->getPointerOperand();
467  Type *PT = PointerType::get(NewTy,
468  Addr->getType()->getPointerAddressSpace());
469  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
470 
471  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
472  NewSI->setAlignment(SI->getAlignment());
473  NewSI->setVolatile(SI->isVolatile());
474  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
475  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
476  SI->eraseFromParent();
477  return NewSI;
478 }
479 
480 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
481  // This function is only called on atomic stores that are too large to be
482  // atomic if implemented as a native store. So we replace them by an
483  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
484  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
485  // It is the responsibility of the target to only signal expansion via
486  // shouldExpandAtomicRMW in cases where this is required and possible.
487  IRBuilder<> Builder(SI);
488  AtomicRMWInst *AI =
490  SI->getValueOperand(), SI->getOrdering());
491  SI->eraseFromParent();
492 
493  // Now we have an appropriate swap instruction, lower it as usual.
494  return tryExpandAtomicRMW(AI);
495 }
496 
497 static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
498  Value *Loaded, Value *NewVal,
499  AtomicOrdering MemOpOrder,
500  Value *&Success, Value *&NewLoaded) {
501  Type *OrigTy = NewVal->getType();
502 
503  // This code can go away when cmpxchg supports FP types.
504  bool NeedBitcast = OrigTy->isFloatingPointTy();
505  if (NeedBitcast) {
506  IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
507  unsigned AS = Addr->getType()->getPointerAddressSpace();
508  Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
509  NewVal = Builder.CreateBitCast(NewVal, IntTy);
510  Loaded = Builder.CreateBitCast(Loaded, IntTy);
511  }
512 
513  Value* Pair = Builder.CreateAtomicCmpXchg(
514  Addr, Loaded, NewVal, MemOpOrder,
516  Success = Builder.CreateExtractValue(Pair, 1, "success");
517  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
518 
519  if (NeedBitcast)
520  NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
521 }
522 
523 /// Emit IR to implement the given atomicrmw operation on values in registers,
524 /// returning the new value.
526  Value *Loaded, Value *Inc) {
527  Value *NewVal;
528  switch (Op) {
529  case AtomicRMWInst::Xchg:
530  return Inc;
531  case AtomicRMWInst::Add:
532  return Builder.CreateAdd(Loaded, Inc, "new");
533  case AtomicRMWInst::Sub:
534  return Builder.CreateSub(Loaded, Inc, "new");
535  case AtomicRMWInst::And:
536  return Builder.CreateAnd(Loaded, Inc, "new");
537  case AtomicRMWInst::Nand:
538  return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
539  case AtomicRMWInst::Or:
540  return Builder.CreateOr(Loaded, Inc, "new");
541  case AtomicRMWInst::Xor:
542  return Builder.CreateXor(Loaded, Inc, "new");
543  case AtomicRMWInst::Max:
544  NewVal = Builder.CreateICmpSGT(Loaded, Inc);
545  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
546  case AtomicRMWInst::Min:
547  NewVal = Builder.CreateICmpSLE(Loaded, Inc);
548  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
549  case AtomicRMWInst::UMax:
550  NewVal = Builder.CreateICmpUGT(Loaded, Inc);
551  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
552  case AtomicRMWInst::UMin:
553  NewVal = Builder.CreateICmpULE(Loaded, Inc);
554  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
555  case AtomicRMWInst::FAdd:
556  return Builder.CreateFAdd(Loaded, Inc, "new");
557  case AtomicRMWInst::FSub:
558  return Builder.CreateFSub(Loaded, Inc, "new");
559  default:
560  llvm_unreachable("Unknown atomic op");
561  }
562 }
563 
564 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
565  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
567  return false;
569  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
570  unsigned ValueSize = getAtomicOpSize(AI);
571  if (ValueSize < MinCASSize) {
573  "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
574  } else {
575  auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
576  return performAtomicOp(AI->getOperation(), Builder, Loaded,
577  AI->getValOperand());
578  };
579  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
580  AI->getOrdering(), PerformOp);
581  }
582  return true;
583  }
585  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
586  unsigned ValueSize = getAtomicOpSize(AI);
587  if (ValueSize < MinCASSize) {
588  expandPartwordAtomicRMW(AI,
590  } else {
592  }
593  return true;
594  }
596  expandAtomicRMWToMaskedIntrinsic(AI);
597  return true;
598  }
599  default:
600  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
601  }
602 }
603 
604 namespace {
605 
606 /// Result values from createMaskInstrs helper.
607 struct PartwordMaskValues {
608  Type *WordType;
609  Type *ValueType;
610  Value *AlignedAddr;
611  Value *ShiftAmt;
612  Value *Mask;
613  Value *Inv_Mask;
614 };
615 
616 } // end anonymous namespace
617 
618 /// This is a helper function which builds instructions to provide
619 /// values necessary for partword atomic operations. It takes an
620 /// incoming address, Addr, and ValueType, and constructs the address,
621 /// shift-amounts and masks needed to work with a larger value of size
622 /// WordSize.
623 ///
624 /// AlignedAddr: Addr rounded down to a multiple of WordSize
625 ///
626 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
627 /// from AlignAddr for it to have the same value as if
628 /// ValueType was loaded from Addr.
629 ///
630 /// Mask: Value to mask with the value loaded from AlignAddr to
631 /// include only the part that would've been loaded from Addr.
632 ///
633 /// Inv_Mask: The inverse of Mask.
634 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
635  Type *ValueType, Value *Addr,
636  unsigned WordSize) {
637  PartwordMaskValues Ret;
638 
639  BasicBlock *BB = I->getParent();
640  Function *F = BB->getParent();
641  Module *M = I->getModule();
642 
643  LLVMContext &Ctx = F->getContext();
644  const DataLayout &DL = M->getDataLayout();
645 
646  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
647 
648  assert(ValueSize < WordSize);
649 
650  Ret.ValueType = ValueType;
651  Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
652 
653  Type *WordPtrType =
654  Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
655 
656  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
657  Ret.AlignedAddr = Builder.CreateIntToPtr(
658  Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
659  "AlignedAddr");
660 
661  Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
662  if (DL.isLittleEndian()) {
663  // turn bytes into bits
664  Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
665  } else {
666  // turn bytes into bits, and count from the other side.
667  Ret.ShiftAmt =
668  Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
669  }
670 
671  Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
672  Ret.Mask = Builder.CreateShl(
673  ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
674  "Mask");
675  Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
676 
677  return Ret;
678 }
679 
680 /// Emit IR to implement a masked version of a given atomicrmw
681 /// operation. (That is, only the bits under the Mask should be
682 /// affected by the operation)
684  IRBuilder<> &Builder, Value *Loaded,
685  Value *Shifted_Inc, Value *Inc,
686  const PartwordMaskValues &PMV) {
687  // TODO: update to use
688  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
689  // to merge bits from two values without requiring PMV.Inv_Mask.
690  switch (Op) {
691  case AtomicRMWInst::Xchg: {
692  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
693  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
694  return FinalVal;
695  }
696  case AtomicRMWInst::Or:
697  case AtomicRMWInst::Xor:
698  case AtomicRMWInst::And:
699  llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
700  case AtomicRMWInst::Add:
701  case AtomicRMWInst::Sub:
702  case AtomicRMWInst::Nand: {
703  // The other arithmetic ops need to be masked into place.
704  Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
705  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
706  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
707  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
708  return FinalVal;
709  }
710  case AtomicRMWInst::Max:
711  case AtomicRMWInst::Min:
712  case AtomicRMWInst::UMax:
713  case AtomicRMWInst::UMin: {
714  // Finally, comparison ops will operate on the full value, so
715  // truncate down to the original size, and expand out again after
716  // doing the operation.
717  Value *Loaded_Shiftdown = Builder.CreateTrunc(
718  Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
719  Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
720  Value *NewVal_Shiftup = Builder.CreateShl(
721  Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
722  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
723  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
724  return FinalVal;
725  }
726  default:
727  llvm_unreachable("Unknown atomic op");
728  }
729 }
730 
731 /// Expand a sub-word atomicrmw operation into an appropriate
732 /// word-sized operation.
733 ///
734 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
735 /// way as a typical atomicrmw expansion. The only difference here is
736 /// that the operation inside of the loop must operate only upon a
737 /// part of the value.
738 void AtomicExpand::expandPartwordAtomicRMW(
741 
742  AtomicOrdering MemOpOrder = AI->getOrdering();
743 
744  IRBuilder<> Builder(AI);
745 
746  PartwordMaskValues PMV =
747  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
748  TLI->getMinCmpXchgSizeInBits() / 8);
749 
750  Value *ValOperand_Shifted =
751  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
752  PMV.ShiftAmt, "ValOperand_Shifted");
753 
754  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
755  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
756  ValOperand_Shifted, AI->getValOperand(), PMV);
757  };
758 
759  // TODO: When we're ready to support LLSC conversions too, use
760  // insertRMWLLSCLoop here for ExpansionKind==LLSC.
761  Value *OldResult =
762  insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
763  PerformPartwordOp, createCmpXchgInstFun);
764  Value *FinalOldResult = Builder.CreateTrunc(
765  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
766  AI->replaceAllUsesWith(FinalOldResult);
767  AI->eraseFromParent();
768 }
769 
770 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
771 AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
772  IRBuilder<> Builder(AI);
774 
775  assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
776  Op == AtomicRMWInst::And) &&
777  "Unable to widen operation");
778 
779  PartwordMaskValues PMV =
780  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
781  TLI->getMinCmpXchgSizeInBits() / 8);
782 
783  Value *ValOperand_Shifted =
784  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
785  PMV.ShiftAmt, "ValOperand_Shifted");
786 
787  Value *NewOperand;
788 
789  if (Op == AtomicRMWInst::And)
790  NewOperand =
791  Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
792  else
793  NewOperand = ValOperand_Shifted;
794 
795  AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
796  NewOperand, AI->getOrdering());
797 
798  Value *FinalOldResult = Builder.CreateTrunc(
799  Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
800  AI->replaceAllUsesWith(FinalOldResult);
801  AI->eraseFromParent();
802  return NewAI;
803 }
804 
805 void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
806  // The basic idea here is that we're expanding a cmpxchg of a
807  // smaller memory size up to a word-sized cmpxchg. To do this, we
808  // need to add a retry-loop for strong cmpxchg, so that
809  // modifications to other parts of the word don't cause a spurious
810  // failure.
811 
812  // This generates code like the following:
813  // [[Setup mask values PMV.*]]
814  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
815  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
816  // %InitLoaded = load i32* %addr
817  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
818  // br partword.cmpxchg.loop
819  // partword.cmpxchg.loop:
820  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
821  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
822  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
823  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
824  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
825  // i32 %FullWord_NewVal success_ordering failure_ordering
826  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
827  // %Success = extractvalue { i32, i1 } %NewCI, 1
828  // br i1 %Success, label %partword.cmpxchg.end,
829  // label %partword.cmpxchg.failure
830  // partword.cmpxchg.failure:
831  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
832  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
833  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
834  // label %partword.cmpxchg.end
835  // partword.cmpxchg.end:
836  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
837  // %FinalOldVal = trunc i32 %tmp1 to i8
838  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
839  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
840 
841  Value *Addr = CI->getPointerOperand();
842  Value *Cmp = CI->getCompareOperand();
843  Value *NewVal = CI->getNewValOperand();
844 
845  BasicBlock *BB = CI->getParent();
846  Function *F = BB->getParent();
847  IRBuilder<> Builder(CI);
848  LLVMContext &Ctx = Builder.getContext();
849 
850  const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
851 
852  BasicBlock *EndBB =
853  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
854  auto FailureBB =
855  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
856  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
857 
858  // The split call above "helpfully" added a branch at the end of BB
859  // (to the wrong place).
860  std::prev(BB->end())->eraseFromParent();
861  Builder.SetInsertPoint(BB);
862 
863  PartwordMaskValues PMV = createMaskInstrs(
864  Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
865 
866  // Shift the incoming values over, into the right location in the word.
867  Value *NewVal_Shifted =
868  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
869  Value *Cmp_Shifted =
870  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
871 
872  // Load the entire current word, and mask into place the expected and new
873  // values
874  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
875  InitLoaded->setVolatile(CI->isVolatile());
876  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
877  Builder.CreateBr(LoopBB);
878 
879  // partword.cmpxchg.loop:
880  Builder.SetInsertPoint(LoopBB);
881  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
882  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
883 
884  // Mask/Or the expected and new values into place in the loaded word.
885  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
886  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
887  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
888  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
889  CI->getFailureOrdering(), CI->getSyncScopeID());
890  NewCI->setVolatile(CI->isVolatile());
891  // When we're building a strong cmpxchg, we need a loop, so you
892  // might think we could use a weak cmpxchg inside. But, using strong
893  // allows the below comparison for ShouldContinue, and we're
894  // expecting the underlying cmpxchg to be a machine instruction,
895  // which is strong anyways.
896  NewCI->setWeak(CI->isWeak());
897 
898  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
899  Value *Success = Builder.CreateExtractValue(NewCI, 1);
900 
901  if (CI->isWeak())
902  Builder.CreateBr(EndBB);
903  else
904  Builder.CreateCondBr(Success, EndBB, FailureBB);
905 
906  // partword.cmpxchg.failure:
907  Builder.SetInsertPoint(FailureBB);
908  // Upon failure, verify that the masked-out part of the loaded value
909  // has been modified. If it didn't, abort the cmpxchg, since the
910  // masked-in part must've.
911  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
912  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
913  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
914 
915  // Add the second value to the phi from above
916  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
917 
918  // partword.cmpxchg.end:
919  Builder.SetInsertPoint(CI);
920 
921  Value *FinalOldVal = Builder.CreateTrunc(
922  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
923  Value *Res = UndefValue::get(CI->getType());
924  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
925  Res = Builder.CreateInsertValue(Res, Success, 1);
926 
927  CI->replaceAllUsesWith(Res);
928  CI->eraseFromParent();
929 }
930 
931 void AtomicExpand::expandAtomicOpToLLSC(
932  Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
933  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
934  IRBuilder<> Builder(I);
935  Value *Loaded =
936  insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
937 
938  I->replaceAllUsesWith(Loaded);
939  I->eraseFromParent();
940 }
941 
942 void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
943  IRBuilder<> Builder(AI);
944 
945  PartwordMaskValues PMV =
946  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
947  TLI->getMinCmpXchgSizeInBits() / 8);
948 
949  // The value operand must be sign-extended for signed min/max so that the
950  // target's signed comparison instructions can be used. Otherwise, just
951  // zero-ext.
952  Instruction::CastOps CastOp = Instruction::ZExt;
953  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
954  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
955  CastOp = Instruction::SExt;
956 
957  Value *ValOperand_Shifted = Builder.CreateShl(
958  Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
959  PMV.ShiftAmt, "ValOperand_Shifted");
960  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
961  Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
962  AI->getOrdering());
963  Value *FinalOldResult = Builder.CreateTrunc(
964  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
965  AI->replaceAllUsesWith(FinalOldResult);
966  AI->eraseFromParent();
967 }
968 
969 void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
970  IRBuilder<> Builder(CI);
971 
972  PartwordMaskValues PMV = createMaskInstrs(
973  Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
974  TLI->getMinCmpXchgSizeInBits() / 8);
975 
976  Value *CmpVal_Shifted = Builder.CreateShl(
977  Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
978  "CmpVal_Shifted");
979  Value *NewVal_Shifted = Builder.CreateShl(
980  Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
981  "NewVal_Shifted");
982  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
983  Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
984  CI->getSuccessOrdering());
985  Value *FinalOldVal = Builder.CreateTrunc(
986  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
987 
988  Value *Res = UndefValue::get(CI->getType());
989  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
990  Value *Success = Builder.CreateICmpEQ(
991  CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
992  Res = Builder.CreateInsertValue(Res, Success, 1);
993 
994  CI->replaceAllUsesWith(Res);
995  CI->eraseFromParent();
996 }
997 
998 Value *AtomicExpand::insertRMWLLSCLoop(
999  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1000  AtomicOrdering MemOpOrder,
1001  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
1002  LLVMContext &Ctx = Builder.getContext();
1003  BasicBlock *BB = Builder.GetInsertBlock();
1004  Function *F = BB->getParent();
1005 
1006  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1007  //
1008  // The standard expansion we produce is:
1009  // [...]
1010  // atomicrmw.start:
1011  // %loaded = @load.linked(%addr)
1012  // %new = some_op iN %loaded, %incr
1013  // %stored = @store_conditional(%new, %addr)
1014  // %try_again = icmp i32 ne %stored, 0
1015  // br i1 %try_again, label %loop, label %atomicrmw.end
1016  // atomicrmw.end:
1017  // [...]
1018  BasicBlock *ExitBB =
1019  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1020  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1021 
1022  // The split call above "helpfully" added a branch at the end of BB (to the
1023  // wrong place).
1024  std::prev(BB->end())->eraseFromParent();
1025  Builder.SetInsertPoint(BB);
1026  Builder.CreateBr(LoopBB);
1027 
1028  // Start the main loop block now that we've taken care of the preliminaries.
1029  Builder.SetInsertPoint(LoopBB);
1030  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1031 
1032  Value *NewVal = PerformOp(Builder, Loaded);
1033 
1034  Value *StoreSuccess =
1035  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1036  Value *TryAgain = Builder.CreateICmpNE(
1037  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1038  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1039 
1040  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1041  return Loaded;
1042 }
1043 
1044 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1045 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1046 /// IR. As a migration step, we convert back to what use to be the standard
1047 /// way to represent a pointer cmpxchg so that we can update backends one by
1048 /// one.
1049 AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1050  auto *M = CI->getModule();
1051  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1052  M->getDataLayout());
1053 
1054  IRBuilder<> Builder(CI);
1055 
1056  Value *Addr = CI->getPointerOperand();
1057  Type *PT = PointerType::get(NewTy,
1058  Addr->getType()->getPointerAddressSpace());
1059  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1060 
1061  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1062  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1063 
1064 
1065  auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
1066  CI->getSuccessOrdering(),
1067  CI->getFailureOrdering(),
1068  CI->getSyncScopeID());
1069  NewCI->setVolatile(CI->isVolatile());
1070  NewCI->setWeak(CI->isWeak());
1071  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1072 
1073  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1074  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1075 
1076  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1077 
1078  Value *Res = UndefValue::get(CI->getType());
1079  Res = Builder.CreateInsertValue(Res, OldVal, 0);
1080  Res = Builder.CreateInsertValue(Res, Succ, 1);
1081 
1082  CI->replaceAllUsesWith(Res);
1083  CI->eraseFromParent();
1084  return NewCI;
1085 }
1086 
1087 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1088  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1089  AtomicOrdering FailureOrder = CI->getFailureOrdering();
1090  Value *Addr = CI->getPointerOperand();
1091  BasicBlock *BB = CI->getParent();
1092  Function *F = BB->getParent();
1093  LLVMContext &Ctx = F->getContext();
1094  // If shouldInsertFencesForAtomic() returns true, then the target does not
1095  // want to deal with memory orders, and emitLeading/TrailingFence should take
1096  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1097  // should preserve the ordering.
1098  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1099  AtomicOrdering MemOpOrder =
1100  ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
1101 
1102  // In implementations which use a barrier to achieve release semantics, we can
1103  // delay emitting this barrier until we know a store is actually going to be
1104  // attempted. The cost of this delay is that we need 2 copies of the block
1105  // emitting the load-linked, affecting code size.
1106  //
1107  // Ideally, this logic would be unconditional except for the minsize check
1108  // since in other cases the extra blocks naturally collapse down to the
1109  // minimal loop. Unfortunately, this puts too much stress on later
1110  // optimisations so we avoid emitting the extra logic in those cases too.
1111  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1112  SuccessOrder != AtomicOrdering::Monotonic &&
1113  SuccessOrder != AtomicOrdering::Acquire &&
1114  !F->hasMinSize();
1115 
1116  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1117  // do it even on minsize.
1118  bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1119 
1120  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1121  //
1122  // The full expansion we produce is:
1123  // [...]
1124  // cmpxchg.start:
1125  // %unreleasedload = @load.linked(%addr)
1126  // %should_store = icmp eq %unreleasedload, %desired
1127  // br i1 %should_store, label %cmpxchg.fencedstore,
1128  // label %cmpxchg.nostore
1129  // cmpxchg.releasingstore:
1130  // fence?
1131  // br label cmpxchg.trystore
1132  // cmpxchg.trystore:
1133  // %loaded.trystore = phi [%unreleasedload, %releasingstore],
1134  // [%releasedload, %cmpxchg.releasedload]
1135  // %stored = @store_conditional(%new, %addr)
1136  // %success = icmp eq i32 %stored, 0
1137  // br i1 %success, label %cmpxchg.success,
1138  // label %cmpxchg.releasedload/%cmpxchg.failure
1139  // cmpxchg.releasedload:
1140  // %releasedload = @load.linked(%addr)
1141  // %should_store = icmp eq %releasedload, %desired
1142  // br i1 %should_store, label %cmpxchg.trystore,
1143  // label %cmpxchg.failure
1144  // cmpxchg.success:
1145  // fence?
1146  // br label %cmpxchg.end
1147  // cmpxchg.nostore:
1148  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1149  // [%releasedload,
1150  // %cmpxchg.releasedload/%cmpxchg.trystore]
1151  // @load_linked_fail_balance()?
1152  // br label %cmpxchg.failure
1153  // cmpxchg.failure:
1154  // fence?
1155  // br label %cmpxchg.end
1156  // cmpxchg.end:
1157  // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
1158  // [%loaded.trystore, %cmpxchg.trystore]
1159  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1160  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1161  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1162  // [...]
1163  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1164  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1165  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1166  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1167  auto ReleasedLoadBB =
1168  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1169  auto TryStoreBB =
1170  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1171  auto ReleasingStoreBB =
1172  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1173  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1174 
1175  // This grabs the DebugLoc from CI
1176  IRBuilder<> Builder(CI);
1177 
1178  // The split call above "helpfully" added a branch at the end of BB (to the
1179  // wrong place), but we might want a fence too. It's easiest to just remove
1180  // the branch entirely.
1181  std::prev(BB->end())->eraseFromParent();
1182  Builder.SetInsertPoint(BB);
1183  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1184  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1185  Builder.CreateBr(StartBB);
1186 
1187  // Start the main loop block now that we've taken care of the preliminaries.
1188  Builder.SetInsertPoint(StartBB);
1189  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1190  Value *ShouldStore = Builder.CreateICmpEQ(
1191  UnreleasedLoad, CI->getCompareOperand(), "should_store");
1192 
1193  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1194  // jump straight past that fence instruction (if it exists).
1195  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1196 
1197  Builder.SetInsertPoint(ReleasingStoreBB);
1198  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1199  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1200  Builder.CreateBr(TryStoreBB);
1201 
1202  Builder.SetInsertPoint(TryStoreBB);
1203  Value *StoreSuccess = TLI->emitStoreConditional(
1204  Builder, CI->getNewValOperand(), Addr, MemOpOrder);
1205  StoreSuccess = Builder.CreateICmpEQ(
1206  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1207  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1208  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1209  CI->isWeak() ? FailureBB : RetryBB);
1210 
1211  Builder.SetInsertPoint(ReleasedLoadBB);
1212  Value *SecondLoad;
1213  if (HasReleasedLoadBB) {
1214  SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1215  ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
1216  "should_store");
1217 
1218  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1219  // jump straight past that fence instruction (if it exists).
1220  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1221  } else
1222  Builder.CreateUnreachable();
1223 
1224  // Make sure later instructions don't get reordered with a fence if
1225  // necessary.
1226  Builder.SetInsertPoint(SuccessBB);
1227  if (ShouldInsertFencesForAtomic)
1228  TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1229  Builder.CreateBr(ExitBB);
1230 
1231  Builder.SetInsertPoint(NoStoreBB);
1232  // In the failing case, where we don't execute the store-conditional, the
1233  // target might want to balance out the load-linked with a dedicated
1234  // instruction (e.g., on ARM, clearing the exclusive monitor).
1235  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1236  Builder.CreateBr(FailureBB);
1237 
1238  Builder.SetInsertPoint(FailureBB);
1239  if (ShouldInsertFencesForAtomic)
1240  TLI->emitTrailingFence(Builder, CI, FailureOrder);
1241  Builder.CreateBr(ExitBB);
1242 
1243  // Finally, we have control-flow based knowledge of whether the cmpxchg
1244  // succeeded or not. We expose this to later passes by converting any
1245  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1246  // PHI.
1247  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1248  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
1249  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1250  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1251 
1252  // Setup the builder so we can create any PHIs we need.
1253  Value *Loaded;
1254  if (!HasReleasedLoadBB)
1255  Loaded = UnreleasedLoad;
1256  else {
1257  Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
1258  PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1259  TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1260  TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1261 
1262  Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
1263  PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1264  NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
1265  NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1266 
1267  Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
1268  PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1269  ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
1270  ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
1271 
1272  Loaded = ExitLoaded;
1273  }
1274 
1275  // Look for any users of the cmpxchg that are just comparing the loaded value
1276  // against the desired one, and replace them with the CFG-derived version.
1278  for (auto User : CI->users()) {
1280  if (!EV)
1281  continue;
1282 
1283  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1284  "weird extraction from { iN, i1 }");
1285 
1286  if (EV->getIndices()[0] == 0)
1287  EV->replaceAllUsesWith(Loaded);
1288  else
1289  EV->replaceAllUsesWith(Success);
1290 
1291  PrunedInsts.push_back(EV);
1292  }
1293 
1294  // We can remove the instructions now we're no longer iterating through them.
1295  for (auto EV : PrunedInsts)
1296  EV->eraseFromParent();
1297 
1298  if (!CI->use_empty()) {
1299  // Some use of the full struct return that we don't understand has happened,
1300  // so we've got to reconstruct it properly.
1301  Value *Res;
1302  Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1303  Res = Builder.CreateInsertValue(Res, Success, 1);
1304 
1305  CI->replaceAllUsesWith(Res);
1306  }
1307 
1308  CI->eraseFromParent();
1309  return true;
1310 }
1311 
1312 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1313  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1314  if(!C)
1315  return false;
1316 
1318  switch(Op) {
1319  case AtomicRMWInst::Add:
1320  case AtomicRMWInst::Sub:
1321  case AtomicRMWInst::Or:
1322  case AtomicRMWInst::Xor:
1323  return C->isZero();
1324  case AtomicRMWInst::And:
1325  return C->isMinusOne();
1326  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1327  default:
1328  return false;
1329  }
1330 }
1331 
1332 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1333  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1334  tryExpandAtomicLoad(ResultingLoad);
1335  return true;
1336  }
1337  return false;
1338 }
1339 
1340 Value *AtomicExpand::insertRMWCmpXchgLoop(
1341  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1342  AtomicOrdering MemOpOrder,
1343  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1344  CreateCmpXchgInstFun CreateCmpXchg) {
1345  LLVMContext &Ctx = Builder.getContext();
1346  BasicBlock *BB = Builder.GetInsertBlock();
1347  Function *F = BB->getParent();
1348 
1349  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1350  //
1351  // The standard expansion we produce is:
1352  // [...]
1353  // %init_loaded = load atomic iN* %addr
1354  // br label %loop
1355  // loop:
1356  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1357  // %new = some_op iN %loaded, %incr
1358  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1359  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1360  // %success = extractvalue { iN, i1 } %pair, 1
1361  // br i1 %success, label %atomicrmw.end, label %loop
1362  // atomicrmw.end:
1363  // [...]
1364  BasicBlock *ExitBB =
1365  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1366  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1367 
1368  // The split call above "helpfully" added a branch at the end of BB (to the
1369  // wrong place), but we want a load. It's easiest to just remove
1370  // the branch entirely.
1371  std::prev(BB->end())->eraseFromParent();
1372  Builder.SetInsertPoint(BB);
1373  LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
1374  // Atomics require at least natural alignment.
1375  InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
1376  Builder.CreateBr(LoopBB);
1377 
1378  // Start the main loop block now that we've taken care of the preliminaries.
1379  Builder.SetInsertPoint(LoopBB);
1380  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1381  Loaded->addIncoming(InitLoaded, BB);
1382 
1383  Value *NewVal = PerformOp(Builder, Loaded);
1384 
1385  Value *NewLoaded = nullptr;
1386  Value *Success = nullptr;
1387 
1388  CreateCmpXchg(Builder, Addr, Loaded, NewVal,
1389  MemOpOrder == AtomicOrdering::Unordered
1391  : MemOpOrder,
1392  Success, NewLoaded);
1393  assert(Success && NewLoaded);
1394 
1395  Loaded->addIncoming(NewLoaded, LoopBB);
1396 
1397  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1398 
1399  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1400  return NewLoaded;
1401 }
1402 
1403 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1404  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1405  unsigned ValueSize = getAtomicOpSize(CI);
1406 
1407  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1408  default:
1409  llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1411  if (ValueSize < MinCASSize)
1412  expandPartwordCmpXchg(CI);
1413  return false;
1415  assert(ValueSize >= MinCASSize &&
1416  "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
1417  return expandAtomicCmpXchg(CI);
1418  }
1420  expandAtomicCmpXchgToMaskedIntrinsic(CI);
1421  return true;
1422  }
1423 }
1424 
1425 // Note: This function is exposed externally by AtomicExpandUtils.h
1427  CreateCmpXchgInstFun CreateCmpXchg) {
1428  IRBuilder<> Builder(AI);
1429  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1430  Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
1431  [&](IRBuilder<> &Builder, Value *Loaded) {
1432  return performAtomicOp(AI->getOperation(), Builder, Loaded,
1433  AI->getValOperand());
1434  },
1435  CreateCmpXchg);
1436 
1437  AI->replaceAllUsesWith(Loaded);
1438  AI->eraseFromParent();
1439  return true;
1440 }
1441 
1442 // In order to use one of the sized library calls such as
1443 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1444 // must be one of the potentially-specialized sizes, and the value
1445 // type must actually exist in C on the target (otherwise, the
1446 // function wouldn't actually be defined.)
1447 static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
1448  const DataLayout &DL) {
1449  // TODO: "LargestSize" is an approximation for "largest type that
1450  // you can express in C". It seems to be the case that int128 is
1451  // supported on all 64-bit platforms, otherwise only up to 64-bit
1452  // integers are supported. If we get this wrong, then we'll try to
1453  // call a sized libcall that doesn't actually exist. There should
1454  // really be some more reliable way in LLVM of determining integer
1455  // sizes which are valid in the target's C ABI...
1456  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1457  return Align >= Size &&
1458  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1459  Size <= LargestSize;
1460 }
1461 
1462 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1463  static const RTLIB::Libcall Libcalls[6] = {
1464  RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1465  RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1466  unsigned Size = getAtomicOpSize(I);
1467  unsigned Align = getAtomicOpAlign(I);
1468 
1469  bool expanded = expandAtomicOpToLibcall(
1470  I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
1471  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1472  (void)expanded;
1473  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
1474 }
1475 
1476 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1477  static const RTLIB::Libcall Libcalls[6] = {
1478  RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1479  RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1480  unsigned Size = getAtomicOpSize(I);
1481  unsigned Align = getAtomicOpAlign(I);
1482 
1483  bool expanded = expandAtomicOpToLibcall(
1484  I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
1485  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1486  (void)expanded;
1487  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
1488 }
1489 
1490 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1491  static const RTLIB::Libcall Libcalls[6] = {
1492  RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1493  RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1494  RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1495  unsigned Size = getAtomicOpSize(I);
1496  unsigned Align = getAtomicOpAlign(I);
1497 
1498  bool expanded = expandAtomicOpToLibcall(
1499  I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
1501  Libcalls);
1502  (void)expanded;
1503  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
1504 }
1505 
1507  static const RTLIB::Libcall LibcallsXchg[6] = {
1508  RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1509  RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1510  RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1511  static const RTLIB::Libcall LibcallsAdd[6] = {
1512  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1513  RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1514  RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1515  static const RTLIB::Libcall LibcallsSub[6] = {
1516  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1517  RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1518  RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1519  static const RTLIB::Libcall LibcallsAnd[6] = {
1520  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1521  RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1522  RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1523  static const RTLIB::Libcall LibcallsOr[6] = {
1524  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1525  RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1526  RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1527  static const RTLIB::Libcall LibcallsXor[6] = {
1528  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1529  RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1530  RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1531  static const RTLIB::Libcall LibcallsNand[6] = {
1532  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1533  RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1534  RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1535 
1536  switch (Op) {
1538  llvm_unreachable("Should not have BAD_BINOP.");
1539  case AtomicRMWInst::Xchg:
1540  return makeArrayRef(LibcallsXchg);
1541  case AtomicRMWInst::Add:
1542  return makeArrayRef(LibcallsAdd);
1543  case AtomicRMWInst::Sub:
1544  return makeArrayRef(LibcallsSub);
1545  case AtomicRMWInst::And:
1546  return makeArrayRef(LibcallsAnd);
1547  case AtomicRMWInst::Or:
1548  return makeArrayRef(LibcallsOr);
1549  case AtomicRMWInst::Xor:
1550  return makeArrayRef(LibcallsXor);
1551  case AtomicRMWInst::Nand:
1552  return makeArrayRef(LibcallsNand);
1553  case AtomicRMWInst::Max:
1554  case AtomicRMWInst::Min:
1555  case AtomicRMWInst::UMax:
1556  case AtomicRMWInst::UMin:
1557  case AtomicRMWInst::FAdd:
1558  case AtomicRMWInst::FSub:
1559  // No atomic libcalls are available for max/min/umax/umin.
1560  return {};
1561  }
1562  llvm_unreachable("Unexpected AtomicRMW operation.");
1563 }
1564 
1565 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1567 
1568  unsigned Size = getAtomicOpSize(I);
1569  unsigned Align = getAtomicOpAlign(I);
1570 
1571  bool Success = false;
1572  if (!Libcalls.empty())
1573  Success = expandAtomicOpToLibcall(
1574  I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
1575  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1576 
1577  // The expansion failed: either there were no libcalls at all for
1578  // the operation (min/max), or there were only size-specialized
1579  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1580  // CAS libcall, via a CAS loop, instead.
1581  if (!Success) {
1582  expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
1583  Value *Loaded, Value *NewVal,
1584  AtomicOrdering MemOpOrder,
1585  Value *&Success, Value *&NewLoaded) {
1586  // Create the CAS instruction normally...
1587  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1588  Addr, Loaded, NewVal, MemOpOrder,
1590  Success = Builder.CreateExtractValue(Pair, 1, "success");
1591  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1592 
1593  // ...and then expand the CAS into a libcall.
1594  expandAtomicCASToLibcall(Pair);
1595  });
1596  }
1597 }
1598 
1599 // A helper routine for the above expandAtomic*ToLibcall functions.
1600 //
1601 // 'Libcalls' contains an array of enum values for the particular
1602 // ATOMIC libcalls to be emitted. All of the other arguments besides
1603 // 'I' are extracted from the Instruction subclass by the
1604 // caller. Depending on the particular call, some will be null.
1605 bool AtomicExpand::expandAtomicOpToLibcall(
1606  Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
1607  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1608  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1609  assert(Libcalls.size() == 6);
1610 
1611  LLVMContext &Ctx = I->getContext();
1612  Module *M = I->getModule();
1613  const DataLayout &DL = M->getDataLayout();
1614  IRBuilder<> Builder(I);
1615  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1616 
1617  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
1618  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1619 
1620  unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
1621 
1622  // TODO: the "order" argument type is "int", not int32. So
1623  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1624  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1625  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1626  Constant *OrderingVal =
1627  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1628  Constant *Ordering2Val = nullptr;
1629  if (CASExpected) {
1630  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1631  Ordering2Val =
1632  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1633  }
1634  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1635 
1636  RTLIB::Libcall RTLibType;
1637  if (UseSizedLibcall) {
1638  switch (Size) {
1639  case 1: RTLibType = Libcalls[1]; break;
1640  case 2: RTLibType = Libcalls[2]; break;
1641  case 4: RTLibType = Libcalls[3]; break;
1642  case 8: RTLibType = Libcalls[4]; break;
1643  case 16: RTLibType = Libcalls[5]; break;
1644  }
1645  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1646  RTLibType = Libcalls[0];
1647  } else {
1648  // Can't use sized function, and there's no generic for this
1649  // operation, so give up.
1650  return false;
1651  }
1652 
1653  // Build up the function call. There's two kinds. First, the sized
1654  // variants. These calls are going to be one of the following (with
1655  // N=1,2,4,8,16):
1656  // iN __atomic_load_N(iN *ptr, int ordering)
1657  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1658  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1659  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1660  // int success_order, int failure_order)
1661  //
1662  // Note that these functions can be used for non-integer atomic
1663  // operations, the values just need to be bitcast to integers on the
1664  // way in and out.
1665  //
1666  // And, then, the generic variants. They look like the following:
1667  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1668  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1669  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1670  // int ordering)
1671  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1672  // void *desired, int success_order,
1673  // int failure_order)
1674  //
1675  // The different signatures are built up depending on the
1676  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1677  // variables.
1678 
1679  AllocaInst *AllocaCASExpected = nullptr;
1680  Value *AllocaCASExpected_i8 = nullptr;
1681  AllocaInst *AllocaValue = nullptr;
1682  Value *AllocaValue_i8 = nullptr;
1683  AllocaInst *AllocaResult = nullptr;
1684  Value *AllocaResult_i8 = nullptr;
1685 
1686  Type *ResultTy;
1688  AttributeList Attr;
1689 
1690  // 'size' argument.
1691  if (!UseSizedLibcall) {
1692  // Note, getIntPtrType is assumed equivalent to size_t.
1693  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1694  }
1695 
1696  // 'ptr' argument.
1697  // note: This assumes all address spaces share a common libfunc
1698  // implementation and that addresses are convertable. For systems without
1699  // that property, we'd need to extend this mechanism to support AS-specific
1700  // families of atomic intrinsics.
1701  auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
1702  Value *PtrVal = Builder.CreateBitCast(PointerOperand,
1703  Type::getInt8PtrTy(Ctx, PtrTypeAS));
1704  PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
1705  Args.push_back(PtrVal);
1706 
1707  // 'expected' argument, if present.
1708  if (CASExpected) {
1709  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1710  AllocaCASExpected->setAlignment(AllocaAlignment);
1711  AllocaCASExpected_i8 =
1712  Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
1713  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1714  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1715  Args.push_back(AllocaCASExpected_i8);
1716  }
1717 
1718  // 'val' argument ('desired' for cas), if present.
1719  if (ValueOperand) {
1720  if (UseSizedLibcall) {
1721  Value *IntValue =
1722  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1723  Args.push_back(IntValue);
1724  } else {
1725  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1726  AllocaValue->setAlignment(AllocaAlignment);
1727  AllocaValue_i8 =
1728  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1729  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1730  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1731  Args.push_back(AllocaValue_i8);
1732  }
1733  }
1734 
1735  // 'ret' argument.
1736  if (!CASExpected && HasResult && !UseSizedLibcall) {
1737  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1738  AllocaResult->setAlignment(AllocaAlignment);
1739  AllocaResult_i8 =
1740  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
1741  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1742  Args.push_back(AllocaResult_i8);
1743  }
1744 
1745  // 'ordering' ('success_order' for cas) argument.
1746  Args.push_back(OrderingVal);
1747 
1748  // 'failure_order' argument, if present.
1749  if (Ordering2Val)
1750  Args.push_back(Ordering2Val);
1751 
1752  // Now, the return type.
1753  if (CASExpected) {
1754  ResultTy = Type::getInt1Ty(Ctx);
1755  Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
1756  } else if (HasResult && UseSizedLibcall)
1757  ResultTy = SizedIntTy;
1758  else
1759  ResultTy = Type::getVoidTy(Ctx);
1760 
1761  // Done with setting up arguments and return types, create the call:
1762  SmallVector<Type *, 6> ArgTys;
1763  for (Value *Arg : Args)
1764  ArgTys.push_back(Arg->getType());
1765  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1766  FunctionCallee LibcallFn =
1767  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1768  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1769  Call->setAttributes(Attr);
1770  Value *Result = Call;
1771 
1772  // And then, extract the results...
1773  if (ValueOperand && !UseSizedLibcall)
1774  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1775 
1776  if (CASExpected) {
1777  // The final result from the CAS is {load of 'expected' alloca, bool result
1778  // from call}
1779  Type *FinalResultTy = I->getType();
1780  Value *V = UndefValue::get(FinalResultTy);
1781  Value *ExpectedOut = Builder.CreateAlignedLoad(
1782  CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1783  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1784  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1785  V = Builder.CreateInsertValue(V, Result, 1);
1786  I->replaceAllUsesWith(V);
1787  } else if (HasResult) {
1788  Value *V;
1789  if (UseSizedLibcall)
1790  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1791  else {
1792  V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1793  AllocaAlignment);
1794  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1795  }
1796  I->replaceAllUsesWith(V);
1797  }
1798  I->eraseFromParent();
1799  return true;
1800 }
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:594
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional &#39;br Cond, TrueDest, FalseDest&#39; instruction.
Definition: IRBuilder.h:853
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:172
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
*p = old <signed v ? old : v
Definition: Instructions.h:721
LLVMContext & getContext() const
Definition: IRBuilder.h:122
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1878
Atomic ordering constants.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1772
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve &#39;CreateLoad(Ty, Ptr, "...")&#39; correctly, instead of converting the string to &#39;bool...
Definition: IRBuilder.h:1392
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1235
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1428
void setAlignment(unsigned Align)
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:528
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
This class represents a function call, abstracting a target machine&#39;s calling convention.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, unsigned WordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
*p = old <unsigned v ? old : v
Definition: Instructions.h:725
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:116
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:563
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:247
*p = old >unsigned v ? old : v
Definition: Instructions.h:723
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:534
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional l...
An instruction for reading from memory.
Definition: Instructions.h:167
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:691
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1894
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:395
*p = old >signed v ? old : v
Definition: Instructions.h:719
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:595
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1465
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1369
ArrayRef< unsigned > getIndices() const
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:231
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:131
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:651
BinOp getOperation() const
Definition: Instructions.h:750
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:569
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1049
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:557
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1898
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1421
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1762
Class to represent function types.
Definition: DerivedTypes.h:102
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1767
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, const DataLayout &DL)
Value * CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1882
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:703
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:582
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded)
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:120
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:232
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1479
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1066
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1694
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:126
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1217
const BasicBlock & getEntryBlock() const
Definition: Function.h:645
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
static bool runOnFunction(Function &F, bool PostInlining)
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1013
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:2057
const Instruction & front() const
Definition: BasicBlock.h:280
bool isAcquireOrStronger(AtomicOrdering ao)
unsigned getNumIndices() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:759
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:791
static unsigned getAtomicOpSize(LoadInst *LI)
void initializeAtomicExpandPass(PassRegistry &)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:296
Value * getPointerOperand()
Definition: Instructions.h:284
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1874
self_iterator getIterator()
Definition: ilist_node.h:81
Class to represent integer types.
Definition: DerivedTypes.h:39
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:359
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
void setAlignment(unsigned Align)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1487
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:196
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm
Extended Value Type.
Definition: ValueTypes.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:608
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2119
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:789
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1690
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
static Value * performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success...
Definition: Instructions.h:640
bool isReleaseOrStronger(AtomicOrdering ao)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2004
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:414
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
Value * getValOperand()
Definition: Instructions.h:815
unsigned getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:309
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
iterator end()
Definition: BasicBlock.h:270
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Module.h This file contains the declarations for the Module class.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:143
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:587
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:356
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
void setWeak(bool IsWeak)
Definition: Instructions.h:573
iterator_range< user_iterator > users()
Definition: Value.h:399
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1138
#define Success
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1845
LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Add an attribute to the attribute set at the given index.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:353
#define DEBUG_TYPE
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:372
Value * getPointerOperand()
Definition: Instructions.h:811
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:259
#define I(x, y, z)
Definition: MD5.cpp:58
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1265
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:600
static unsigned getAtomicOpAlign(LoadInst *LI)
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1347
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2009
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:407
AtomicOrderingCABI toCABI(AtomicOrdering ao)
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1248
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1199
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1804
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
LLVM Value Representation.
Definition: Value.h:72
void setAlignment(unsigned Align)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional &#39;br label X&#39; instruction.
Definition: IRBuilder.h:847
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:121
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1159
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:398
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:132
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:815
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2127
#define LLVM_DEBUG(X)
Definition: Debug.h:122
FunctionPass * createAtomicExpandPass()
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:234
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1910
Value * getPointerOperand()
Definition: Instructions.h:412
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:811
bool use_empty() const
Definition: Value.h:322
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:163