LLVM  14.0.0git
ImplicitNullChecks.cpp
Go to the documentation of this file.
1 //===- ImplicitNullChecks.cpp - Fold null checks into memory accesses -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass turns explicit null checks of the form
10 //
11 // test %r10, %r10
12 // je throw_npe
13 // movl (%r10), %esi
14 // ...
15 //
16 // to
17 //
18 // faulting_load_op("movl (%r10), %esi", throw_npe)
19 // ...
20 //
21 // With the help of a runtime that understands the .fault_maps section,
22 // faulting_load_op branches to throw_npe if executing movl (%r10), %esi incurs
23 // a page fault.
24 // Store and LoadStore are also supported.
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/Optional.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
36 #include "llvm/CodeGen/FaultMaps.h"
50 #include "llvm/IR/BasicBlock.h"
51 #include "llvm/IR/DebugLoc.h"
52 #include "llvm/IR/LLVMContext.h"
53 #include "llvm/InitializePasses.h"
54 #include "llvm/MC/MCInstrDesc.h"
55 #include "llvm/MC/MCRegisterInfo.h"
56 #include "llvm/Pass.h"
58 #include <cassert>
59 #include <cstdint>
60 #include <iterator>
61 
62 using namespace llvm;
63 
64 static cl::opt<int> PageSize("imp-null-check-page-size",
65  cl::desc("The page size of the target in bytes"),
66  cl::init(4096), cl::Hidden);
67 
69  "imp-null-max-insts-to-consider",
70  cl::desc("The max number of instructions to consider hoisting loads over "
71  "(the algorithm is quadratic over this number)"),
72  cl::Hidden, cl::init(8));
73 
74 #define DEBUG_TYPE "implicit-null-checks"
75 
76 STATISTIC(NumImplicitNullChecks,
77  "Number of explicit null checks made implicit");
78 
79 namespace {
80 
81 class ImplicitNullChecks : public MachineFunctionPass {
82  /// Return true if \c computeDependence can process \p MI.
83  static bool canHandle(const MachineInstr *MI);
84 
85  /// Helper function for \c computeDependence. Return true if \p A
86  /// and \p B do not have any dependences between them, and can be
87  /// re-ordered without changing program semantics.
88  bool canReorder(const MachineInstr *A, const MachineInstr *B);
89 
90  /// A data type for representing the result computed by \c
91  /// computeDependence. States whether it is okay to reorder the
92  /// instruction passed to \c computeDependence with at most one
93  /// dependency.
94  struct DependenceResult {
95  /// Can we actually re-order \p MI with \p Insts (see \c
96  /// computeDependence).
97  bool CanReorder;
98 
99  /// If non-None, then an instruction in \p Insts that also must be
100  /// hoisted.
101  Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence;
102 
103  /*implicit*/ DependenceResult(
104  bool CanReorder,
105  Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence)
106  : CanReorder(CanReorder), PotentialDependence(PotentialDependence) {
107  assert((!PotentialDependence || CanReorder) &&
108  "!CanReorder && PotentialDependence.hasValue() not allowed!");
109  }
110  };
111 
112  /// Compute a result for the following question: can \p MI be
113  /// re-ordered from after \p Insts to before it.
114  ///
115  /// \c canHandle should return true for all instructions in \p
116  /// Insts.
117  DependenceResult computeDependence(const MachineInstr *MI,
119 
120  /// Represents one null check that can be made implicit.
121  class NullCheck {
122  // The memory operation the null check can be folded into.
123  MachineInstr *MemOperation;
124 
125  // The instruction actually doing the null check (Ptr != 0).
126  MachineInstr *CheckOperation;
127 
128  // The block the check resides in.
129  MachineBasicBlock *CheckBlock;
130 
131  // The block branched to if the pointer is non-null.
132  MachineBasicBlock *NotNullSucc;
133 
134  // The block branched to if the pointer is null.
135  MachineBasicBlock *NullSucc;
136 
137  // If this is non-null, then MemOperation has a dependency on this
138  // instruction; and it needs to be hoisted to execute before MemOperation.
139  MachineInstr *OnlyDependency;
140 
141  public:
142  explicit NullCheck(MachineInstr *memOperation, MachineInstr *checkOperation,
143  MachineBasicBlock *checkBlock,
144  MachineBasicBlock *notNullSucc,
145  MachineBasicBlock *nullSucc,
146  MachineInstr *onlyDependency)
147  : MemOperation(memOperation), CheckOperation(checkOperation),
148  CheckBlock(checkBlock), NotNullSucc(notNullSucc), NullSucc(nullSucc),
149  OnlyDependency(onlyDependency) {}
150 
151  MachineInstr *getMemOperation() const { return MemOperation; }
152 
153  MachineInstr *getCheckOperation() const { return CheckOperation; }
154 
155  MachineBasicBlock *getCheckBlock() const { return CheckBlock; }
156 
157  MachineBasicBlock *getNotNullSucc() const { return NotNullSucc; }
158 
159  MachineBasicBlock *getNullSucc() const { return NullSucc; }
160 
161  MachineInstr *getOnlyDependency() const { return OnlyDependency; }
162  };
163 
164  const TargetInstrInfo *TII = nullptr;
165  const TargetRegisterInfo *TRI = nullptr;
166  AliasAnalysis *AA = nullptr;
167  MachineFrameInfo *MFI = nullptr;
168 
169  bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
170  SmallVectorImpl<NullCheck> &NullCheckList);
171  MachineInstr *insertFaultingInstr(MachineInstr *MI, MachineBasicBlock *MBB,
172  MachineBasicBlock *HandlerMBB);
173  void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
174 
175  enum AliasResult {
176  AR_NoAlias,
177  AR_MayAlias,
178  AR_WillAliasEverything
179  };
180 
181  /// Returns AR_NoAlias if \p MI memory operation does not alias with
182  /// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if
183  /// they may alias and any further memory operation may alias with \p PrevMI.
184  AliasResult areMemoryOpsAliased(const MachineInstr &MI,
185  const MachineInstr *PrevMI) const;
186 
187  enum SuitabilityResult {
188  SR_Suitable,
189  SR_Unsuitable,
190  SR_Impossible
191  };
192 
193  /// Return SR_Suitable if \p MI a memory operation that can be used to
194  /// implicitly null check the value in \p PointerReg, SR_Unsuitable if
195  /// \p MI cannot be used to null check and SR_Impossible if there is
196  /// no sense to continue lookup due to any other instruction will not be able
197  /// to be used. \p PrevInsts is the set of instruction seen since
198  /// the explicit null check on \p PointerReg.
199  SuitabilityResult isSuitableMemoryOp(const MachineInstr &MI,
200  unsigned PointerReg,
201  ArrayRef<MachineInstr *> PrevInsts);
202 
203  /// Returns true if \p DependenceMI can clobber the liveIns in NullSucc block
204  /// if it was hoisted to the NullCheck block. This is used by caller
205  /// canHoistInst to decide if DependenceMI can be hoisted safely.
206  bool canDependenceHoistingClobberLiveIns(MachineInstr *DependenceMI,
207  MachineBasicBlock *NullSucc);
208 
209  /// Return true if \p FaultingMI can be hoisted from after the
210  /// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a
211  /// non-null value if we also need to (and legally can) hoist a dependency.
212  bool canHoistInst(MachineInstr *FaultingMI,
213  ArrayRef<MachineInstr *> InstsSeenSoFar,
215 
216 public:
217  static char ID;
218 
219  ImplicitNullChecks() : MachineFunctionPass(ID) {
221  }
222 
223  bool runOnMachineFunction(MachineFunction &MF) override;
224 
225  void getAnalysisUsage(AnalysisUsage &AU) const override {
228  }
229 
230  MachineFunctionProperties getRequiredProperties() const override {
233  }
234 };
235 
236 } // end anonymous namespace
237 
238 bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
239  if (MI->isCall() || MI->mayRaiseFPException() ||
240  MI->hasUnmodeledSideEffects())
241  return false;
242  auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
243  (void)IsRegMask;
244 
245  assert(!llvm::any_of(MI->operands(), IsRegMask) &&
246  "Calls were filtered out above!");
247 
248  auto IsUnordered = [](MachineMemOperand *MMO) { return MMO->isUnordered(); };
249  return llvm::all_of(MI->memoperands(), IsUnordered);
250 }
251 
252 ImplicitNullChecks::DependenceResult
253 ImplicitNullChecks::computeDependence(const MachineInstr *MI,
254  ArrayRef<MachineInstr *> Block) {
255  assert(llvm::all_of(Block, canHandle) && "Check this first!");
256  assert(!is_contained(Block, MI) && "Block must be exclusive of MI!");
257 
259 
260  for (auto I = Block.begin(), E = Block.end(); I != E; ++I) {
261  if (canReorder(*I, MI))
262  continue;
263 
264  if (Dep == None) {
265  // Found one possible dependency, keep track of it.
266  Dep = I;
267  } else {
268  // We found two dependencies, so bail out.
269  return {false, None};
270  }
271  }
272 
273  return {true, Dep};
274 }
275 
276 bool ImplicitNullChecks::canReorder(const MachineInstr *A,
277  const MachineInstr *B) {
278  assert(canHandle(A) && canHandle(B) && "Precondition!");
279 
280  // canHandle makes sure that we _can_ correctly analyze the dependencies
281  // between A and B here -- for instance, we should not be dealing with heap
282  // load-store dependencies here.
283 
284  for (const auto &MOA : A->operands()) {
285  if (!(MOA.isReg() && MOA.getReg()))
286  continue;
287 
288  Register RegA = MOA.getReg();
289  for (const auto &MOB : B->operands()) {
290  if (!(MOB.isReg() && MOB.getReg()))
291  continue;
292 
293  Register RegB = MOB.getReg();
294 
295  if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef()))
296  return false;
297  }
298  }
299 
300  return true;
301 }
302 
303 bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
304  TII = MF.getSubtarget().getInstrInfo();
306  MFI = &MF.getFrameInfo();
307  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
308 
309  SmallVector<NullCheck, 16> NullCheckList;
310 
311  for (auto &MBB : MF)
312  analyzeBlockForNullChecks(MBB, NullCheckList);
313 
314  if (!NullCheckList.empty())
315  rewriteNullChecks(NullCheckList);
316 
317  return !NullCheckList.empty();
318 }
319 
320 // Return true if any register aliasing \p Reg is live-in into \p MBB.
322  MachineBasicBlock *MBB, unsigned Reg) {
323  for (MCRegAliasIterator AR(Reg, TRI, /*IncludeSelf*/ true); AR.isValid();
324  ++AR)
325  if (MBB->isLiveIn(*AR))
326  return true;
327  return false;
328 }
329 
330 ImplicitNullChecks::AliasResult
331 ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI,
332  const MachineInstr *PrevMI) const {
333  // If it is not memory access, skip the check.
334  if (!(PrevMI->mayStore() || PrevMI->mayLoad()))
335  return AR_NoAlias;
336  // Load-Load may alias
337  if (!(MI.mayStore() || PrevMI->mayStore()))
338  return AR_NoAlias;
339  // We lost info, conservatively alias. If it was store then no sense to
340  // continue because we won't be able to check against it further.
341  if (MI.memoperands_empty())
342  return MI.mayStore() ? AR_WillAliasEverything : AR_MayAlias;
343  if (PrevMI->memoperands_empty())
344  return PrevMI->mayStore() ? AR_WillAliasEverything : AR_MayAlias;
345 
346  for (MachineMemOperand *MMO1 : MI.memoperands()) {
347  // MMO1 should have a value due it comes from operation we'd like to use
348  // as implicit null check.
349  assert(MMO1->getValue() && "MMO1 should have a Value!");
350  for (MachineMemOperand *MMO2 : PrevMI->memoperands()) {
351  if (const PseudoSourceValue *PSV = MMO2->getPseudoValue()) {
352  if (PSV->mayAlias(MFI))
353  return AR_MayAlias;
354  continue;
355  }
356  if (!AA->isNoAlias(
357  MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
358  MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo())))
359  return AR_MayAlias;
360  }
361  }
362  return AR_NoAlias;
363 }
364 
365 ImplicitNullChecks::SuitabilityResult
366 ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
367  unsigned PointerReg,
368  ArrayRef<MachineInstr *> PrevInsts) {
369  // Implementation restriction for faulting_op insertion
370  // TODO: This could be relaxed if we find a test case which warrants it.
371  if (MI.getDesc().getNumDefs() > 1)
372  return SR_Unsuitable;
373 
374  if (!MI.mayLoadOrStore() || MI.isPredicable())
375  return SR_Unsuitable;
376  auto AM = TII->getAddrModeFromMemoryOp(MI, TRI);
377  if (!AM)
378  return SR_Unsuitable;
379  auto AddrMode = *AM;
380  const Register BaseReg = AddrMode.BaseReg, ScaledReg = AddrMode.ScaledReg;
381  int64_t Displacement = AddrMode.Displacement;
382 
383  // We need the base of the memory instruction to be same as the register
384  // where the null check is performed (i.e. PointerReg).
385  if (BaseReg != PointerReg && ScaledReg != PointerReg)
386  return SR_Unsuitable;
387  const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
388  unsigned PointerRegSizeInBits = TRI->getRegSizeInBits(PointerReg, MRI);
389  // Bail out of the sizes of BaseReg, ScaledReg and PointerReg are not the
390  // same.
391  if ((BaseReg &&
392  TRI->getRegSizeInBits(BaseReg, MRI) != PointerRegSizeInBits) ||
393  (ScaledReg &&
394  TRI->getRegSizeInBits(ScaledReg, MRI) != PointerRegSizeInBits))
395  return SR_Unsuitable;
396 
397  // Returns true if RegUsedInAddr is used for calculating the displacement
398  // depending on addressing mode. Also calculates the Displacement.
399  auto CalculateDisplacementFromAddrMode = [&](Register RegUsedInAddr,
400  int64_t Multiplier) {
401  // The register can be NoRegister, which is defined as zero for all targets.
402  // Consider instruction of interest as `movq 8(,%rdi,8), %rax`. Here the
403  // ScaledReg is %rdi, while there is no BaseReg.
404  if (!RegUsedInAddr)
405  return false;
406  assert(Multiplier && "expected to be non-zero!");
407  MachineInstr *ModifyingMI = nullptr;
408  for (auto It = std::next(MachineBasicBlock::const_reverse_iterator(&MI));
409  It != MI.getParent()->rend(); It++) {
410  const MachineInstr *CurrMI = &*It;
411  if (CurrMI->modifiesRegister(RegUsedInAddr, TRI)) {
412  ModifyingMI = const_cast<MachineInstr *>(CurrMI);
413  break;
414  }
415  }
416  if (!ModifyingMI)
417  return false;
418  // Check for the const value defined in register by ModifyingMI. This means
419  // all other previous values for that register has been invalidated.
420  int64_t ImmVal;
421  if (!TII->getConstValDefinedInReg(*ModifyingMI, RegUsedInAddr, ImmVal))
422  return false;
423  // Calculate the reg size in bits, since this is needed for bailing out in
424  // case of overflow.
425  int32_t RegSizeInBits = TRI->getRegSizeInBits(RegUsedInAddr, MRI);
426  APInt ImmValC(RegSizeInBits, ImmVal, true /*IsSigned*/);
427  APInt MultiplierC(RegSizeInBits, Multiplier);
428  assert(MultiplierC.isStrictlyPositive() &&
429  "expected to be a positive value!");
430  bool IsOverflow;
431  // Sign of the product depends on the sign of the ImmVal, since Multiplier
432  // is always positive.
433  APInt Product = ImmValC.smul_ov(MultiplierC, IsOverflow);
434  if (IsOverflow)
435  return false;
436  APInt DisplacementC(64, Displacement, true /*isSigned*/);
437  DisplacementC = Product.sadd_ov(DisplacementC, IsOverflow);
438  if (IsOverflow)
439  return false;
440 
441  // We only handle diplacements upto 64 bits wide.
442  if (DisplacementC.getActiveBits() > 64)
443  return false;
444  Displacement = DisplacementC.getSExtValue();
445  return true;
446  };
447 
448  // If a register used in the address is constant, fold it's effect into the
449  // displacement for ease of analysis.
450  bool BaseRegIsConstVal = false, ScaledRegIsConstVal = false;
451  if (CalculateDisplacementFromAddrMode(BaseReg, 1))
452  BaseRegIsConstVal = true;
453  if (CalculateDisplacementFromAddrMode(ScaledReg, AddrMode.Scale))
454  ScaledRegIsConstVal = true;
455 
456  // The register which is not null checked should be part of the Displacement
457  // calculation, otherwise we do not know whether the Displacement is made up
458  // by some symbolic values.
459  // This matters because we do not want to incorrectly assume that load from
460  // falls in the zeroth faulting page in the "sane offset check" below.
461  if ((BaseReg && BaseReg != PointerReg && !BaseRegIsConstVal) ||
462  (ScaledReg && ScaledReg != PointerReg && !ScaledRegIsConstVal))
463  return SR_Unsuitable;
464 
465  // We want the mem access to be issued at a sane offset from PointerReg,
466  // so that if PointerReg is null then the access reliably page faults.
467  if (!(-PageSize < Displacement && Displacement < PageSize))
468  return SR_Unsuitable;
469 
470  // Finally, check whether the current memory access aliases with previous one.
471  for (auto *PrevMI : PrevInsts) {
472  AliasResult AR = areMemoryOpsAliased(MI, PrevMI);
473  if (AR == AR_WillAliasEverything)
474  return SR_Impossible;
475  if (AR == AR_MayAlias)
476  return SR_Unsuitable;
477  }
478  return SR_Suitable;
479 }
480 
481 bool ImplicitNullChecks::canDependenceHoistingClobberLiveIns(
482  MachineInstr *DependenceMI, MachineBasicBlock *NullSucc) {
483  for (const auto &DependenceMO : DependenceMI->operands()) {
484  if (!(DependenceMO.isReg() && DependenceMO.getReg()))
485  continue;
486 
487  // Make sure that we won't clobber any live ins to the sibling block by
488  // hoisting Dependency. For instance, we can't hoist INST to before the
489  // null check (even if it safe, and does not violate any dependencies in
490  // the non_null_block) if %rdx is live in to _null_block.
491  //
492  // test %rcx, %rcx
493  // je _null_block
494  // _non_null_block:
495  // %rdx = INST
496  // ...
497  //
498  // This restriction does not apply to the faulting load inst because in
499  // case the pointer loaded from is in the null page, the load will not
500  // semantically execute, and affect machine state. That is, if the load
501  // was loading into %rax and it faults, the value of %rax should stay the
502  // same as it would have been had the load not have executed and we'd have
503  // branched to NullSucc directly.
504  if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg()))
505  return true;
506 
507  }
508 
509  // The dependence does not clobber live-ins in NullSucc block.
510  return false;
511 }
512 
513 bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
514  ArrayRef<MachineInstr *> InstsSeenSoFar,
515  MachineBasicBlock *NullSucc,
517  auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar);
518  if (!DepResult.CanReorder)
519  return false;
520 
521  if (!DepResult.PotentialDependence) {
522  Dependence = nullptr;
523  return true;
524  }
525 
526  auto DependenceItr = *DepResult.PotentialDependence;
527  auto *DependenceMI = *DependenceItr;
528 
529  // We don't want to reason about speculating loads. Note -- at this point
530  // we should have already filtered out all of the other non-speculatable
531  // things, like calls and stores.
532  // We also do not want to hoist stores because it might change the memory
533  // while the FaultingMI may result in faulting.
534  assert(canHandle(DependenceMI) && "Should never have reached here!");
535  if (DependenceMI->mayLoadOrStore())
536  return false;
537 
538  if (canDependenceHoistingClobberLiveIns(DependenceMI, NullSucc))
539  return false;
540 
541  auto DepDepResult =
542  computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr});
543 
544  if (!DepDepResult.CanReorder || DepDepResult.PotentialDependence)
545  return false;
546 
547  Dependence = DependenceMI;
548  return true;
549 }
550 
551 /// Analyze MBB to check if its terminating branch can be turned into an
552 /// implicit null check. If yes, append a description of the said null check to
553 /// NullCheckList and return true, else return false.
554 bool ImplicitNullChecks::analyzeBlockForNullChecks(
556  using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
557 
558  MDNode *BranchMD = nullptr;
559  if (auto *BB = MBB.getBasicBlock())
560  BranchMD = BB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit);
561 
562  if (!BranchMD)
563  return false;
564 
565  MachineBranchPredicate MBP;
566 
567  if (TII->analyzeBranchPredicate(MBB, MBP, true))
568  return false;
569 
570  // Is the predicate comparing an integer to zero?
571  if (!(MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
572  (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
573  MBP.Predicate == MachineBranchPredicate::PRED_EQ)))
574  return false;
575 
576  // If there is a separate condition generation instruction, we chose not to
577  // transform unless we can remove both condition and consuming branch.
578  if (MBP.ConditionDef && !MBP.SingleUseCondition)
579  return false;
580 
581  MachineBasicBlock *NotNullSucc, *NullSucc;
582 
583  if (MBP.Predicate == MachineBranchPredicate::PRED_NE) {
584  NotNullSucc = MBP.TrueDest;
585  NullSucc = MBP.FalseDest;
586  } else {
587  NotNullSucc = MBP.FalseDest;
588  NullSucc = MBP.TrueDest;
589  }
590 
591  // We handle the simplest case for now. We can potentially do better by using
592  // the machine dominator tree.
593  if (NotNullSucc->pred_size() != 1)
594  return false;
595 
596  const Register PointerReg = MBP.LHS.getReg();
597 
598  if (MBP.ConditionDef) {
599  // To prevent the invalid transformation of the following code:
600  //
601  // mov %rax, %rcx
602  // test %rax, %rax
603  // %rax = ...
604  // je throw_npe
605  // mov(%rcx), %r9
606  // mov(%rax), %r10
607  //
608  // into:
609  //
610  // mov %rax, %rcx
611  // %rax = ....
612  // faulting_load_op("movl (%rax), %r10", throw_npe)
613  // mov(%rcx), %r9
614  //
615  // we must ensure that there are no instructions between the 'test' and
616  // conditional jump that modify %rax.
617  assert(MBP.ConditionDef->getParent() == &MBB &&
618  "Should be in basic block");
619 
620  for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I)
621  if (I->modifiesRegister(PointerReg, TRI))
622  return false;
623  }
624  // Starting with a code fragment like:
625  //
626  // test %rax, %rax
627  // jne LblNotNull
628  //
629  // LblNull:
630  // callq throw_NullPointerException
631  //
632  // LblNotNull:
633  // Inst0
634  // Inst1
635  // ...
636  // Def = Load (%rax + <offset>)
637  // ...
638  //
639  //
640  // we want to end up with
641  //
642  // Def = FaultingLoad (%rax + <offset>), LblNull
643  // jmp LblNotNull ;; explicit or fallthrough
644  //
645  // LblNotNull:
646  // Inst0
647  // Inst1
648  // ...
649  //
650  // LblNull:
651  // callq throw_NullPointerException
652  //
653  //
654  // To see why this is legal, consider the two possibilities:
655  //
656  // 1. %rax is null: since we constrain <offset> to be less than PageSize, the
657  // load instruction dereferences the null page, causing a segmentation
658  // fault.
659  //
660  // 2. %rax is not null: in this case we know that the load cannot fault, as
661  // otherwise the load would've faulted in the original program too and the
662  // original program would've been undefined.
663  //
664  // This reasoning cannot be extended to justify hoisting through arbitrary
665  // control flow. For instance, in the example below (in pseudo-C)
666  //
667  // if (ptr == null) { throw_npe(); unreachable; }
668  // if (some_cond) { return 42; }
669  // v = ptr->field; // LD
670  // ...
671  //
672  // we cannot (without code duplication) use the load marked "LD" to null check
673  // ptr -- clause (2) above does not apply in this case. In the above program
674  // the safety of ptr->field can be dependent on some_cond; and, for instance,
675  // ptr could be some non-null invalid reference that never gets loaded from
676  // because some_cond is always true.
677 
678  SmallVector<MachineInstr *, 8> InstsSeenSoFar;
679 
680  for (auto &MI : *NotNullSucc) {
681  if (!canHandle(&MI) || InstsSeenSoFar.size() >= MaxInstsToConsider)
682  return false;
683 
685  SuitabilityResult SR = isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar);
686  if (SR == SR_Impossible)
687  return false;
688  if (SR == SR_Suitable &&
689  canHoistInst(&MI, InstsSeenSoFar, NullSucc, Dependence)) {
690  NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
691  NullSucc, Dependence);
692  return true;
693  }
694 
695  // If MI re-defines the PointerReg in a way that changes the value of
696  // PointerReg if it was null, then we cannot move further.
697  if (!TII->preservesZeroValueInReg(&MI, PointerReg, TRI))
698  return false;
699  InstsSeenSoFar.push_back(&MI);
700  }
701 
702  return false;
703 }
704 
705 /// Wrap a machine instruction, MI, into a FAULTING machine instruction.
706 /// The FAULTING instruction does the same load/store as MI
707 /// (defining the same register), and branches to HandlerMBB if the mem access
708 /// faults. The FAULTING instruction is inserted at the end of MBB.
709 MachineInstr *ImplicitNullChecks::insertFaultingInstr(
711  const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
712  // all targets.
713 
714  DebugLoc DL;
715  unsigned NumDefs = MI->getDesc().getNumDefs();
716  assert(NumDefs <= 1 && "other cases unhandled!");
717 
718  unsigned DefReg = NoRegister;
719  if (NumDefs != 0) {
720  DefReg = MI->getOperand(0).getReg();
721  assert(NumDefs == 1 && "expected exactly one def!");
722  }
723 
725  if (MI->mayLoad())
726  FK =
728  else
730 
731  auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_OP), DefReg)
732  .addImm(FK)
733  .addMBB(HandlerMBB)
734  .addImm(MI->getOpcode());
735 
736  for (auto &MO : MI->uses()) {
737  if (MO.isReg()) {
738  MachineOperand NewMO = MO;
739  if (MO.isUse()) {
740  NewMO.setIsKill(false);
741  } else {
742  assert(MO.isDef() && "Expected def or use");
743  NewMO.setIsDead(false);
744  }
745  MIB.add(NewMO);
746  } else {
747  MIB.add(MO);
748  }
749  }
750 
751  MIB.setMemRefs(MI->memoperands());
752 
753  return MIB;
754 }
755 
756 /// Rewrite the null checks in NullCheckList into implicit null checks.
757 void ImplicitNullChecks::rewriteNullChecks(
759  DebugLoc DL;
760 
761  for (auto &NC : NullCheckList) {
762  // Remove the conditional branch dependent on the null check.
763  unsigned BranchesRemoved = TII->removeBranch(*NC.getCheckBlock());
764  (void)BranchesRemoved;
765  assert(BranchesRemoved > 0 && "expected at least one branch!");
766 
767  if (auto *DepMI = NC.getOnlyDependency()) {
768  DepMI->removeFromParent();
769  NC.getCheckBlock()->insert(NC.getCheckBlock()->end(), DepMI);
770  }
771 
772  // Insert a faulting instruction where the conditional branch was
773  // originally. We check earlier ensures that this bit of code motion
774  // is legal. We do not touch the successors list for any basic block
775  // since we haven't changed control flow, we've just made it implicit.
776  MachineInstr *FaultingInstr = insertFaultingInstr(
777  NC.getMemOperation(), NC.getCheckBlock(), NC.getNullSucc());
778  // Now the values defined by MemOperation, if any, are live-in of
779  // the block of MemOperation.
780  // The original operation may define implicit-defs alongside
781  // the value.
782  MachineBasicBlock *MBB = NC.getMemOperation()->getParent();
783  for (const MachineOperand &MO : FaultingInstr->operands()) {
784  if (!MO.isReg() || !MO.isDef())
785  continue;
786  Register Reg = MO.getReg();
787  if (!Reg || MBB->isLiveIn(Reg))
788  continue;
789  MBB->addLiveIn(Reg);
790  }
791 
792  if (auto *DepMI = NC.getOnlyDependency()) {
793  for (auto &MO : DepMI->operands()) {
794  if (!MO.isReg() || !MO.getReg() || !MO.isDef() || MO.isDead())
795  continue;
796  if (!NC.getNotNullSucc()->isLiveIn(MO.getReg()))
797  NC.getNotNullSucc()->addLiveIn(MO.getReg());
798  }
799  }
800 
801  NC.getMemOperation()->eraseFromParent();
802  if (auto *CheckOp = NC.getCheckOperation())
803  CheckOp->eraseFromParent();
804 
805  // Insert an *unconditional* branch to not-null successor - we expect
806  // block placement to remove fallthroughs later.
807  TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
808  /*Cond=*/None, DL);
809 
810  NumImplicitNullChecks++;
811  }
812 }
813 
814 char ImplicitNullChecks::ID = 0;
815 
817 
818 INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE,
819  "Implicit null checks", false, false)
821 INITIALIZE_PASS_END(ImplicitNullChecks, DEBUG_TYPE,
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
llvm::APInt::sadd_ov
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1920
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1558
MaxInstsToConsider
static cl::opt< unsigned > MaxInstsToConsider("imp-null-max-insts-to-consider", cl::desc("The max number of instructions to consider hoisting loads over " "(the algorithm is quadratic over this number)"), cl::Hidden, cl::init(8))
DEBUG_TYPE
#define DEBUG_TYPE
Definition: ImplicitNullChecks.cpp:74
llvm::MachineBasicBlock::isLiveIn
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Definition: MachineBasicBlock.cpp:579
Optional.h
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
llvm::HexagonInstrInfo::removeBranch
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
Definition: HexagonInstrInfo.cpp:559
MCInstrDesc.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::TargetInstrInfo::MachineBranchPredicate
Represents a predicate at the MachineFunction level.
Definition: TargetInstrInfo.h:640
llvm::MachineInstr::mayLoadOrStore
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:1028
Pass.h
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:92
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1005
llvm::ArrayRef::iterator
const_pointer iterator
Definition: ArrayRef.h:48
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:500
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1472
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:153
llvm::AAResults::isNoAlias
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Definition: AliasAnalysis.h:562
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::Dependence
Dependence - This class represents a dependence between two memory memory references in a function.
Definition: DependenceAnalysis.h:71
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE, "Implicit null checks", false, false) INITIALIZE_PASS_END(ImplicitNullChecks
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
TargetInstrInfo.h
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::FaultMaps::FaultingLoadStore
@ FaultingLoadStore
Definition: FaultMaps.h:27
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:111
llvm::Optional
Definition: APInt.h:33
llvm::AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:82
STLExtras.h
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1559
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
MachineRegisterInfo.h
AliasAnalysis.h
checks
Implicit null checks
Definition: ImplicitNullChecks.cpp:822
CommandLine.h
llvm::MachineBasicBlock::pred_size
unsigned pred_size() const
Definition: MachineBasicBlock.h:328
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1600
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:651
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
PageSize
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::AAResults
Definition: AliasAnalysis.h:508
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
TargetOpcodes.h
PseudoSourceValue.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:180
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::HexagonInstrInfo::insertBranch
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
Definition: HexagonInstrInfo.cpp:582
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:418
DebugLoc.h
llvm::PseudoSourceValue
Special value supplied for machine level alias analysis.
Definition: PseudoSourceValue.h:35
llvm::None
const NoneType None
Definition: None.h:23
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
llvm::PPC::PRED_EQ
@ PRED_EQ
Definition: PPCPredicates.h:29
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:641
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:506
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
MemoryLocation.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
MCRegisterInfo.h
llvm::FaultMaps::FaultingStore
@ FaultingStore
Definition: FaultMaps.h:28
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1665
ArrayRef.h
MachineFunctionPass.h
llvm::PPC::PRED_NE
@ PRED_NE
Definition: PPCPredicates.h:32
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:657
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
None.h
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1607
llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:272
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
AddrMode
AddrMode
Definition: MSP430Disassembler.cpp:142
llvm::initializeImplicitNullChecksPass
void initializeImplicitNullChecksPass(PassRegistry &)
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
llvm::FaultMaps::FaultingLoad
@ FaultingLoad
Definition: FaultMaps.h:26
llvm::MachineInstr::modifiesRegister
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
Definition: MachineInstr.h:1401
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:276
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:46
NC
#define NC
Definition: regutils.h:42
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:151
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1018
llvm::APInt::smul_ov
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1952
llvm::MachineInstr::memoperands_empty
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:720
llvm::FaultMaps::FaultKind
FaultKind
Definition: FaultMaps.h:25
llvm::MCRegAliasIterator::isValid
bool isValid() const
Definition: MCRegisterInfo.h:805
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
SmallVector.h
MachineInstrBuilder.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:269
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1336
llvm::MachineInstr::memoperands
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:690
MachineMemOperand.h
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::sys::path::rend
reverse_iterator rend(StringRef path)
Get reverse end iterator over path.
Definition: Path.cpp:306
MachineOperand.h
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ImplicitNullChecksID
char & ImplicitNullChecksID
ImplicitNullChecks - This pass folds null pointer checks into nearby memory operations.
Definition: ImplicitNullChecks.cpp:816
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
FaultMaps.h
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:412
MachineFunction.h
llvm::MachineInstrBundleIterator
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
Definition: MachineInstrBundleIterator.h:108
llvm::pdb::PDB_SymType::Block
@ Block
InitializePasses.h
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:618
TargetRegisterInfo.h
llvm::MCRegAliasIterator
MCRegAliasIterator enumerates all registers aliasing Reg.
Definition: MCRegisterInfo.h:780
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
AnyAliasLiveIn
static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI, MachineBasicBlock *MBB, unsigned Reg)
Definition: ImplicitNullChecks.cpp:321
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38