LLVM  15.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1 //====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// Provide a pass which mitigates speculative execution attacks which operate
11 /// by speculating incorrectly past some predicate (a type check, bounds check,
12 /// or other condition) to reach a load with invalid inputs and leak the data
13 /// accessed by that load using a side channel out of the speculative domain.
14 ///
15 /// For details on the attacks, see the first variant in both the Project Zero
16 /// writeup and the Spectre paper:
17 /// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18 /// https://spectreattack.com/spectre.pdf
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #include "X86.h"
23 #include "X86InstrBuilder.h"
24 #include "X86InstrInfo.h"
25 #include "X86Subtarget.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/ScopeExit.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
50 #include "llvm/IR/DebugLoc.h"
51 #include "llvm/MC/MCSchedule.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Debug.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <iterator>
60 #include <utility>
61 
62 using namespace llvm;
63 
64 #define PASS_KEY "x86-slh"
65 #define DEBUG_TYPE PASS_KEY
66 
67 STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
68 STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
69 STATISTIC(NumAddrRegsHardened,
70  "Number of address mode used registers hardaned");
71 STATISTIC(NumPostLoadRegsHardened,
72  "Number of post-load register values hardened");
73 STATISTIC(NumCallsOrJumpsHardened,
74  "Number of calls or jumps requiring extra hardening");
75 STATISTIC(NumInstsInserted, "Number of instructions inserted");
76 STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
77 
79  "x86-speculative-load-hardening",
80  cl::desc("Force enable speculative load hardening"), cl::init(false),
81  cl::Hidden);
82 
84  PASS_KEY "-lfence",
85  cl::desc(
86  "Use LFENCE along each conditional edge to harden against speculative "
87  "loads rather than conditional movs and poisoned pointers."),
88  cl::init(false), cl::Hidden);
89 
91  PASS_KEY "-post-load",
92  cl::desc("Harden the value loaded *after* it is loaded by "
93  "flushing the loaded bits to 1. This is hard to do "
94  "in general but can be done easily for GPRs."),
95  cl::init(true), cl::Hidden);
96 
98  PASS_KEY "-fence-call-and-ret",
99  cl::desc("Use a full speculation fence to harden both call and ret edges "
100  "rather than a lighter weight mitigation."),
101  cl::init(false), cl::Hidden);
102 
104  PASS_KEY "-ip",
105  cl::desc("Harden interprocedurally by passing our state in and out of "
106  "functions in the high bits of the stack pointer."),
107  cl::init(true), cl::Hidden);
108 
109 static cl::opt<bool>
110  HardenLoads(PASS_KEY "-loads",
111  cl::desc("Sanitize loads from memory. When disable, no "
112  "significant security is provided."),
113  cl::init(true), cl::Hidden);
114 
116  PASS_KEY "-indirect",
117  cl::desc("Harden indirect calls and jumps against using speculatively "
118  "stored attacker controlled addresses. This is designed to "
119  "mitigate Spectre v1.2 style attacks."),
120  cl::init(true), cl::Hidden);
121 
122 namespace {
123 
124 class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
125 public:
126  X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
127 
128  StringRef getPassName() const override {
129  return "X86 speculative load hardening";
130  }
131  bool runOnMachineFunction(MachineFunction &MF) override;
132  void getAnalysisUsage(AnalysisUsage &AU) const override;
133 
134  /// Pass identification, replacement for typeid.
135  static char ID;
136 
137 private:
138  /// The information about a block's conditional terminators needed to trace
139  /// our predicate state through the exiting edges.
140  struct BlockCondInfo {
142 
143  // We mostly have one conditional branch, and in extremely rare cases have
144  // two. Three and more are so rare as to be unimportant for compile time.
146 
147  MachineInstr *UncondBr;
148  };
149 
150  /// Manages the predicate state traced through the program.
151  struct PredState {
152  unsigned InitialReg = 0;
153  unsigned PoisonReg = 0;
154 
155  const TargetRegisterClass *RC;
157 
158  PredState(MachineFunction &MF, const TargetRegisterClass *RC)
159  : RC(RC), SSA(MF) {}
160  };
161 
162  const X86Subtarget *Subtarget = nullptr;
163  MachineRegisterInfo *MRI = nullptr;
164  const X86InstrInfo *TII = nullptr;
165  const TargetRegisterInfo *TRI = nullptr;
166 
168 
169  void hardenEdgesWithLFENCE(MachineFunction &MF);
170 
171  SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
172 
174  tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
175 
176  void unfoldCallAndJumpLoads(MachineFunction &MF);
177 
179  tracePredStateThroughIndirectBranches(MachineFunction &MF);
180 
181  void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
182 
183  unsigned saveEFLAGS(MachineBasicBlock &MBB,
185  const DebugLoc &Loc);
186  void restoreEFLAGS(MachineBasicBlock &MBB,
187  MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc,
188  Register Reg);
189 
190  void mergePredStateIntoSP(MachineBasicBlock &MBB,
192  const DebugLoc &Loc, unsigned PredStateReg);
193  unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
195  const DebugLoc &Loc);
196 
197  void
198  hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
199  MachineOperand &IndexMO,
200  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
201  MachineInstr *
202  sinkPostLoadHardenedInst(MachineInstr &MI,
203  SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
204  bool canHardenRegister(Register Reg);
205  unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
207  const DebugLoc &Loc);
208  unsigned hardenPostLoad(MachineInstr &MI);
209  void hardenReturnInstr(MachineInstr &MI);
210  void tracePredStateThroughCall(MachineInstr &MI);
211  void hardenIndirectCallOrJumpInstr(
212  MachineInstr &MI,
213  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
214 };
215 
216 } // end anonymous namespace
217 
219 
220 void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
221  AnalysisUsage &AU) const {
223 }
224 
226  MachineBasicBlock &Succ, int SuccCount,
227  MachineInstr *Br, MachineInstr *&UncondBr,
228  const X86InstrInfo &TII) {
229  assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
230 
231  MachineFunction &MF = *MBB.getParent();
232 
234 
235  // We have to insert the new block immediately after the current one as we
236  // don't know what layout-successor relationships the successor has and we
237  // may not be able to (and generally don't want to) try to fix those up.
238  MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
239 
240  // Update the branch instruction if necessary.
241  if (Br) {
242  assert(Br->getOperand(0).getMBB() == &Succ &&
243  "Didn't start with the right target!");
244  Br->getOperand(0).setMBB(&NewMBB);
245 
246  // If this successor was reached through a branch rather than fallthrough,
247  // we might have *broken* fallthrough and so need to inject a new
248  // unconditional branch.
249  if (!UncondBr) {
250  MachineBasicBlock &OldLayoutSucc =
251  *std::next(MachineFunction::iterator(&NewMBB));
252  assert(MBB.isSuccessor(&OldLayoutSucc) &&
253  "Without an unconditional branch, the old layout successor should "
254  "be an actual successor!");
255  auto BrBuilder =
256  BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
257  // Update the unconditional branch now that we've added one.
258  UncondBr = &*BrBuilder;
259  }
260 
261  // Insert unconditional "jump Succ" instruction in the new block if
262  // necessary.
263  if (!NewMBB.isLayoutSuccessor(&Succ)) {
265  TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
266  }
267  } else {
268  assert(!UncondBr &&
269  "Cannot have a branchless successor and an unconditional branch!");
270  assert(NewMBB.isLayoutSuccessor(&Succ) &&
271  "A non-branch successor must have been a layout successor before "
272  "and now is a layout successor of the new block.");
273  }
274 
275  // If this is the only edge to the successor, we can just replace it in the
276  // CFG. Otherwise we need to add a new entry in the CFG for the new
277  // successor.
278  if (SuccCount == 1) {
279  MBB.replaceSuccessor(&Succ, &NewMBB);
280  } else {
281  MBB.splitSuccessor(&Succ, &NewMBB);
282  }
283 
284  // Hook up the edge from the new basic block to the old successor in the CFG.
285  NewMBB.addSuccessor(&Succ);
286 
287  // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
288  for (MachineInstr &MI : Succ) {
289  if (!MI.isPHI())
290  break;
291  for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
292  OpIdx += 2) {
293  MachineOperand &OpV = MI.getOperand(OpIdx);
294  MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
295  assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
296  if (OpMBB.getMBB() != &MBB)
297  continue;
298 
299  // If this is the last edge to the succesor, just replace MBB in the PHI
300  if (SuccCount == 1) {
301  OpMBB.setMBB(&NewMBB);
302  break;
303  }
304 
305  // Otherwise, append a new pair of operands for the new incoming edge.
306  MI.addOperand(MF, OpV);
307  MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
308  break;
309  }
310  }
311 
312  // Inherit live-ins from the successor
313  for (auto &LI : Succ.liveins())
314  NewMBB.addLiveIn(LI);
315 
316  LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
317  << Succ.getName() << "'.\n");
318  return NewMBB;
319 }
320 
321 /// Removing duplicate PHI operands to leave the PHI in a canonical and
322 /// predictable form.
323 ///
324 /// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
325 /// isn't what you might expect. We may have multiple entries in PHI nodes for
326 /// a single predecessor. This makes CFG-updating extremely complex, so here we
327 /// simplify all PHI nodes to a model even simpler than the IR's model: exactly
328 /// one entry per predecessor, regardless of how many edges there are.
331  SmallVector<int, 4> DupIndices;
332  for (auto &MBB : MF)
333  for (auto &MI : MBB) {
334  if (!MI.isPHI())
335  break;
336 
337  // First we scan the operands of the PHI looking for duplicate entries
338  // a particular predecessor. We retain the operand index of each duplicate
339  // entry found.
340  for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
341  OpIdx += 2)
342  if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
343  DupIndices.push_back(OpIdx);
344 
345  // Now walk the duplicate indices, removing both the block and value. Note
346  // that these are stored as a vector making this element-wise removal
347  // :w
348  // potentially quadratic.
349  //
350  // FIXME: It is really frustrating that we have to use a quadratic
351  // removal algorithm here. There should be a better way, but the use-def
352  // updates required make that impossible using the public API.
353  //
354  // Note that we have to process these backwards so that we don't
355  // invalidate other indices with each removal.
356  while (!DupIndices.empty()) {
357  int OpIdx = DupIndices.pop_back_val();
358  // Remove both the block and value operand, again in reverse order to
359  // preserve indices.
360  MI.removeOperand(OpIdx + 1);
361  MI.removeOperand(OpIdx);
362  }
363 
364  Preds.clear();
365  }
366 }
367 
368 /// Helper to scan a function for loads vulnerable to misspeculation that we
369 /// want to harden.
370 ///
371 /// We use this to avoid making changes to functions where there is nothing we
372 /// need to do to harden against misspeculation.
374  for (MachineBasicBlock &MBB : MF) {
375  for (MachineInstr &MI : MBB) {
376  // Loads within this basic block after an LFENCE are not at risk of
377  // speculatively executing with invalid predicates from prior control
378  // flow. So break out of this block but continue scanning the function.
379  if (MI.getOpcode() == X86::LFENCE)
380  break;
381 
382  // Looking for loads only.
383  if (!MI.mayLoad())
384  continue;
385 
386  // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
387  if (MI.getOpcode() == X86::MFENCE)
388  continue;
389 
390  // We found a load.
391  return true;
392  }
393  }
394 
395  // No loads found.
396  return false;
397 }
398 
399 bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
400  MachineFunction &MF) {
401  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
402  << " **********\n");
403 
404  // Only run if this pass is forced enabled or we detect the relevant function
405  // attribute requesting SLH.
407  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
408  return false;
409 
410  Subtarget = &MF.getSubtarget<X86Subtarget>();
411  MRI = &MF.getRegInfo();
412  TII = Subtarget->getInstrInfo();
413  TRI = Subtarget->getRegisterInfo();
414 
415  // FIXME: Support for 32-bit.
416  PS.emplace(MF, &X86::GR64_NOSPRegClass);
417 
418  if (MF.begin() == MF.end())
419  // Nothing to do for a degenerate empty function...
420  return false;
421 
422  // We support an alternative hardening technique based on a debug flag.
423  if (HardenEdgesWithLFENCE) {
424  hardenEdgesWithLFENCE(MF);
425  return true;
426  }
427 
428  // Create a dummy debug loc to use for all the generated code here.
429  DebugLoc Loc;
430 
431  MachineBasicBlock &Entry = *MF.begin();
432  auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
433 
434  // Do a quick scan to see if we have any checkable loads.
435  bool HasVulnerableLoad = hasVulnerableLoad(MF);
436 
437  // See if we have any conditional branching blocks that we will need to trace
438  // predicate state through.
439  SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
440 
441  // If we have no interesting conditions or loads, nothing to do here.
442  if (!HasVulnerableLoad && Infos.empty())
443  return true;
444 
445  // The poison value is required to be an all-ones value for many aspects of
446  // this mitigation.
447  const int PoisonVal = -1;
448  PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
449  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
450  .addImm(PoisonVal);
451  ++NumInstsInserted;
452 
453  // If we have loads being hardened and we've asked for call and ret edges to
454  // get a full fence-based mitigation, inject that fence.
455  if (HasVulnerableLoad && FenceCallAndRet) {
456  // We need to insert an LFENCE at the start of the function to suspend any
457  // incoming misspeculation from the caller. This helps two-fold: the caller
458  // may not have been protected as this code has been, and this code gets to
459  // not take any specific action to protect across calls.
460  // FIXME: We could skip this for functions which unconditionally return
461  // a constant.
462  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
463  ++NumInstsInserted;
464  ++NumLFENCEsInserted;
465  }
466 
467  // If we guarded the entry with an LFENCE and have no conditionals to protect
468  // in blocks, then we're done.
469  if (FenceCallAndRet && Infos.empty())
470  // We may have changed the function's code at this point to insert fences.
471  return true;
472 
473  // For every basic block in the function which can b
475  // Set up the predicate state by extracting it from the incoming stack
476  // pointer so we pick up any misspeculation in our caller.
477  PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
478  } else {
479  // Otherwise, just build the predicate state itself by zeroing a register
480  // as we don't need any initial state.
481  PS->InitialReg = MRI->createVirtualRegister(PS->RC);
482  Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
483  auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
484  PredStateSubReg);
485  ++NumInstsInserted;
486  MachineOperand *ZeroEFLAGSDefOp =
487  ZeroI->findRegisterDefOperand(X86::EFLAGS);
488  assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
489  "Must have an implicit def of EFLAGS!");
490  ZeroEFLAGSDefOp->setIsDead(true);
491  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
492  PS->InitialReg)
493  .addImm(0)
494  .addReg(PredStateSubReg)
495  .addImm(X86::sub_32bit);
496  }
497 
498  // We're going to need to trace predicate state throughout the function's
499  // CFG. Prepare for this by setting up our initial state of PHIs with unique
500  // predecessor entries and all the initial predicate state.
502 
503  // Track the updated values in an SSA updater to rewrite into SSA form at the
504  // end.
505  PS->SSA.Initialize(PS->InitialReg);
506  PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
507 
508  // Trace through the CFG.
509  auto CMovs = tracePredStateThroughCFG(MF, Infos);
510 
511  // We may also enter basic blocks in this function via exception handling
512  // control flow. Here, if we are hardening interprocedurally, we need to
513  // re-capture the predicate state from the throwing code. In the Itanium ABI,
514  // the throw will always look like a call to __cxa_throw and will have the
515  // predicate state in the stack pointer, so extract fresh predicate state from
516  // the stack pointer and make it available in SSA.
517  // FIXME: Handle non-itanium ABI EH models.
519  for (MachineBasicBlock &MBB : MF) {
520  assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
521  assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
522  assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
523  if (!MBB.isEHPad())
524  continue;
525  PS->SSA.AddAvailableValue(
526  &MBB,
527  extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
528  }
529  }
530 
532  // If we are going to harden calls and jumps we need to unfold their memory
533  // operands.
534  unfoldCallAndJumpLoads(MF);
535 
536  // Then we trace predicate state through the indirect branches.
537  auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
538  CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
539  }
540 
541  // Now that we have the predicate state available at the start of each block
542  // in the CFG, trace it through each block, hardening vulnerable instructions
543  // as we go.
544  tracePredStateThroughBlocksAndHarden(MF);
545 
546  // Now rewrite all the uses of the pred state using the SSA updater to insert
547  // PHIs connecting the state between blocks along the CFG edges.
548  for (MachineInstr *CMovI : CMovs)
549  for (MachineOperand &Op : CMovI->operands()) {
550  if (!Op.isReg() || Op.getReg() != PS->InitialReg)
551  continue;
552 
553  PS->SSA.RewriteUse(Op);
554  }
555 
556  LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
557  dbgs() << "\n"; MF.verify(this));
558  return true;
559 }
560 
561 /// Implements the naive hardening approach of putting an LFENCE after every
562 /// potentially mis-predicted control flow construct.
563 ///
564 /// We include this as an alternative mostly for the purpose of comparison. The
565 /// performance impact of this is expected to be extremely severe and not
566 /// practical for any real-world users.
567 void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
568  MachineFunction &MF) {
569  // First, we scan the function looking for blocks that are reached along edges
570  // that we might want to harden.
572  for (MachineBasicBlock &MBB : MF) {
573  // If there are no or only one successor, nothing to do here.
574  if (MBB.succ_size() <= 1)
575  continue;
576 
577  // Skip blocks unless their terminators start with a branch. Other
578  // terminators don't seem interesting for guarding against misspeculation.
579  auto TermIt = MBB.getFirstTerminator();
580  if (TermIt == MBB.end() || !TermIt->isBranch())
581  continue;
582 
583  // Add all the non-EH-pad succossors to the blocks we want to harden. We
584  // skip EH pads because there isn't really a condition of interest on
585  // entering.
586  for (MachineBasicBlock *SuccMBB : MBB.successors())
587  if (!SuccMBB->isEHPad())
588  Blocks.insert(SuccMBB);
589  }
590 
591  for (MachineBasicBlock *MBB : Blocks) {
592  auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
593  BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
594  ++NumInstsInserted;
595  ++NumLFENCEsInserted;
596  }
597 }
598 
600 X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
602 
603  // Walk the function and build up a summary for each block's conditions that
604  // we need to trace through.
605  for (MachineBasicBlock &MBB : MF) {
606  // If there are no or only one successor, nothing to do here.
607  if (MBB.succ_size() <= 1)
608  continue;
609 
610  // We want to reliably handle any conditional branch terminators in the
611  // MBB, so we manually analyze the branch. We can handle all of the
612  // permutations here, including ones that analyze branch cannot.
613  //
614  // The approach is to walk backwards across the terminators, resetting at
615  // any unconditional non-indirect branch, and track all conditional edges
616  // to basic blocks as well as the fallthrough or unconditional successor
617  // edge. For each conditional edge, we track the target and the opposite
618  // condition code in order to inject a "no-op" cmov into that successor
619  // that will harden the predicate. For the fallthrough/unconditional
620  // edge, we inject a separate cmov for each conditional branch with
621  // matching condition codes. This effectively implements an "and" of the
622  // condition flags, even if there isn't a single condition flag that would
623  // directly implement that. We don't bother trying to optimize either of
624  // these cases because if such an optimization is possible, LLVM should
625  // have optimized the conditional *branches* in that way already to reduce
626  // instruction count. This late, we simply assume the minimal number of
627  // branch instructions is being emitted and use that to guide our cmov
628  // insertion.
629 
630  BlockCondInfo Info = {&MBB, {}, nullptr};
631 
632  // Now walk backwards through the terminators and build up successors they
633  // reach and the conditions.
634  for (MachineInstr &MI : llvm::reverse(MBB)) {
635  // Once we've handled all the terminators, we're done.
636  if (!MI.isTerminator())
637  break;
638 
639  // If we see a non-branch terminator, we can't handle anything so bail.
640  if (!MI.isBranch()) {
641  Info.CondBrs.clear();
642  break;
643  }
644 
645  // If we see an unconditional branch, reset our state, clear any
646  // fallthrough, and set this is the "else" successor.
647  if (MI.getOpcode() == X86::JMP_1) {
648  Info.CondBrs.clear();
649  Info.UncondBr = &MI;
650  continue;
651  }
652 
653  // If we get an invalid condition, we have an indirect branch or some
654  // other unanalyzable "fallthrough" case. We model this as a nullptr for
655  // the destination so we can still guard any conditional successors.
656  // Consider code sequences like:
657  // ```
658  // jCC L1
659  // jmpq *%rax
660  // ```
661  // We still want to harden the edge to `L1`.
663  Info.CondBrs.clear();
664  Info.UncondBr = &MI;
665  continue;
666  }
667 
668  // We have a vanilla conditional branch, add it to our list.
669  Info.CondBrs.push_back(&MI);
670  }
671  if (Info.CondBrs.empty()) {
672  ++NumBranchesUntraced;
673  LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
674  MBB.dump());
675  continue;
676  }
677 
678  Infos.push_back(Info);
679  }
680 
681  return Infos;
682 }
683 
684 /// Trace the predicate state through the CFG, instrumenting each conditional
685 /// branch such that misspeculation through an edge will poison the predicate
686 /// state.
687 ///
688 /// Returns the list of inserted CMov instructions so that they can have their
689 /// uses of the predicate state rewritten into proper SSA form once it is
690 /// complete.
692 X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
694  // Collect the inserted cmov instructions so we can rewrite their uses of the
695  // predicate state into SSA form.
697 
698  // Now walk all of the basic blocks looking for ones that end in conditional
699  // jumps where we need to update this register along each edge.
700  for (const BlockCondInfo &Info : Infos) {
701  MachineBasicBlock &MBB = *Info.MBB;
702  const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
703  MachineInstr *UncondBr = Info.UncondBr;
704 
705  LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
706  << "\n");
707  ++NumCondBranchesTraced;
708 
709  // Compute the non-conditional successor as either the target of any
710  // unconditional branch or the layout successor.
711  MachineBasicBlock *UncondSucc =
712  UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
713  ? UncondBr->getOperand(0).getMBB()
714  : nullptr)
715  : &*std::next(MachineFunction::iterator(&MBB));
716 
717  // Count how many edges there are to any given successor.
719  if (UncondSucc)
720  ++SuccCounts[UncondSucc];
721  for (auto *CondBr : CondBrs)
722  ++SuccCounts[CondBr->getOperand(0).getMBB()];
723 
724  // A lambda to insert cmov instructions into a block checking all of the
725  // condition codes in a sequence.
726  auto BuildCheckingBlockForSuccAndConds =
727  [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
728  MachineInstr *Br, MachineInstr *&UncondBr,
729  ArrayRef<X86::CondCode> Conds) {
730  // First, we split the edge to insert the checking block into a safe
731  // location.
732  auto &CheckingMBB =
733  (SuccCount == 1 && Succ.pred_size() == 1)
734  ? Succ
735  : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
736 
737  bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
738  if (!LiveEFLAGS)
739  CheckingMBB.addLiveIn(X86::EFLAGS);
740 
741  // Now insert the cmovs to implement the checks.
742  auto InsertPt = CheckingMBB.begin();
743  assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
744  "Should never have a PHI in the initial checking block as it "
745  "always has a single predecessor!");
746 
747  // We will wire each cmov to each other, but need to start with the
748  // incoming pred state.
749  unsigned CurStateReg = PS->InitialReg;
750 
751  for (X86::CondCode Cond : Conds) {
752  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
753  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
754 
755  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
756  // Note that we intentionally use an empty debug location so that
757  // this picks up the preceding location.
758  auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
759  TII->get(CMovOp), UpdatedStateReg)
760  .addReg(CurStateReg)
761  .addReg(PS->PoisonReg)
762  .addImm(Cond);
763  // If this is the last cmov and the EFLAGS weren't originally
764  // live-in, mark them as killed.
765  if (!LiveEFLAGS && Cond == Conds.back())
766  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
767 
768  ++NumInstsInserted;
769  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
770  dbgs() << "\n");
771 
772  // The first one of the cmovs will be using the top level
773  // `PredStateReg` and need to get rewritten into SSA form.
774  if (CurStateReg == PS->InitialReg)
775  CMovs.push_back(&*CMovI);
776 
777  // The next cmov should start from this one's def.
778  CurStateReg = UpdatedStateReg;
779  }
780 
781  // And put the last one into the available values for SSA form of our
782  // predicate state.
783  PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
784  };
785 
786  std::vector<X86::CondCode> UncondCodeSeq;
787  for (auto *CondBr : CondBrs) {
788  MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
789  int &SuccCount = SuccCounts[&Succ];
790 
793  UncondCodeSeq.push_back(Cond);
794 
795  BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
796  {InvCond});
797 
798  // Decrement the successor count now that we've split one of the edges.
799  // We need to keep the count of edges to the successor accurate in order
800  // to know above when to *replace* the successor in the CFG vs. just
801  // adding the new successor.
802  --SuccCount;
803  }
804 
805  // Since we may have split edges and changed the number of successors,
806  // normalize the probabilities. This avoids doing it each time we split an
807  // edge.
809 
810  // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
811  // need to intersect the other condition codes. We can do this by just
812  // doing a cmov for each one.
813  if (!UncondSucc)
814  // If we have no fallthrough to protect (perhaps it is an indirect jump?)
815  // just skip this and continue.
816  continue;
817 
818  assert(SuccCounts[UncondSucc] == 1 &&
819  "We should never have more than one edge to the unconditional "
820  "successor at this point because every other edge must have been "
821  "split above!");
822 
823  // Sort and unique the codes to minimize them.
824  llvm::sort(UncondCodeSeq);
825  UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()),
826  UncondCodeSeq.end());
827 
828  // Build a checking version of the successor.
829  BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
830  UncondBr, UncondBr, UncondCodeSeq);
831  }
832 
833  return CMovs;
834 }
835 
836 /// Compute the register class for the unfolded load.
837 ///
838 /// FIXME: This should probably live in X86InstrInfo, potentially by adding
839 /// a way to unfold into a newly created vreg rather than requiring a register
840 /// input.
841 static const TargetRegisterClass *
843  unsigned Opcode) {
844  unsigned Index;
845  unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
846  Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
847  const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
848  return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
849 }
850 
851 void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
852  MachineFunction &MF) {
853  for (MachineBasicBlock &MBB : MF)
854  // We use make_early_inc_range here so we can remove instructions if needed
855  // without disturbing the iteration.
857  // Must either be a call or a branch.
858  if (!MI.isCall() && !MI.isBranch())
859  continue;
860  // We only care about loading variants of these instructions.
861  if (!MI.mayLoad())
862  continue;
863 
864  switch (MI.getOpcode()) {
865  default: {
866  LLVM_DEBUG(
867  dbgs() << "ERROR: Found an unexpected loading branch or call "
868  "instruction:\n";
869  MI.dump(); dbgs() << "\n");
870  report_fatal_error("Unexpected loading branch or call!");
871  }
872 
873  case X86::FARCALL16m:
874  case X86::FARCALL32m:
875  case X86::FARCALL64m:
876  case X86::FARJMP16m:
877  case X86::FARJMP32m:
878  case X86::FARJMP64m:
879  // We cannot mitigate far jumps or calls, but we also don't expect them
880  // to be vulnerable to Spectre v1.2 style attacks.
881  continue;
882 
883  case X86::CALL16m:
884  case X86::CALL16m_NT:
885  case X86::CALL32m:
886  case X86::CALL32m_NT:
887  case X86::CALL64m:
888  case X86::CALL64m_NT:
889  case X86::JMP16m:
890  case X86::JMP16m_NT:
891  case X86::JMP32m:
892  case X86::JMP32m_NT:
893  case X86::JMP64m:
894  case X86::JMP64m_NT:
895  case X86::TAILJMPm64:
896  case X86::TAILJMPm64_REX:
897  case X86::TAILJMPm:
898  case X86::TCRETURNmi64:
899  case X86::TCRETURNmi: {
900  // Use the generic unfold logic now that we know we're dealing with
901  // expected instructions.
902  // FIXME: We don't have test coverage for all of these!
903  auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
904  if (!UnfoldedRC) {
905  LLVM_DEBUG(dbgs()
906  << "ERROR: Unable to unfold load from instruction:\n";
907  MI.dump(); dbgs() << "\n");
908  report_fatal_error("Unable to unfold load!");
909  }
910  Register Reg = MRI->createVirtualRegister(UnfoldedRC);
912  // If we were able to compute an unfolded reg class, any failure here
913  // is just a programming error so just assert.
914  bool Unfolded =
915  TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
916  /*UnfoldStore*/ false, NewMIs);
917  (void)Unfolded;
918  assert(Unfolded &&
919  "Computed unfolded register class but failed to unfold");
920  // Now stitch the new instructions into place and erase the old one.
921  for (auto *NewMI : NewMIs)
922  MBB.insert(MI.getIterator(), NewMI);
923 
924  // Update the call site info.
925  if (MI.isCandidateForCallSiteEntry())
926  MF.eraseCallSiteInfo(&MI);
927 
928  MI.eraseFromParent();
929  LLVM_DEBUG({
930  dbgs() << "Unfolded load successfully into:\n";
931  for (auto *NewMI : NewMIs) {
932  NewMI->dump();
933  dbgs() << "\n";
934  }
935  });
936  continue;
937  }
938  }
939  llvm_unreachable("Escaped switch with default!");
940  }
941 }
942 
943 /// Trace the predicate state through indirect branches, instrumenting them to
944 /// poison the state if a target is reached that does not match the expected
945 /// target.
946 ///
947 /// This is designed to mitigate Spectre variant 1 attacks where an indirect
948 /// branch is trained to predict a particular target and then mispredicts that
949 /// target in a way that can leak data. Despite using an indirect branch, this
950 /// is really a variant 1 style attack: it does not steer execution to an
951 /// arbitrary or attacker controlled address, and it does not require any
952 /// special code executing next to the victim. This attack can also be mitigated
953 /// through retpolines, but those require either replacing indirect branches
954 /// with conditional direct branches or lowering them through a device that
955 /// blocks speculation. This mitigation can replace these retpoline-style
956 /// mitigations for jump tables and other indirect branches within a function
957 /// when variant 2 isn't a risk while allowing limited speculation. Indirect
958 /// calls, however, cannot be mitigated through this technique without changing
959 /// the ABI in a fundamental way.
961 X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
962  MachineFunction &MF) {
963  // We use the SSAUpdater to insert PHI nodes for the target addresses of
964  // indirect branches. We don't actually need the full power of the SSA updater
965  // in this particular case as we always have immediately available values, but
966  // this avoids us having to re-implement the PHI construction logic.
967  MachineSSAUpdater TargetAddrSSA(MF);
968  TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
969 
970  // Track which blocks were terminated with an indirect branch.
971  SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
972 
973  // We need to know what blocks end up reached via indirect branches. We
974  // expect this to be a subset of those whose address is taken and so track it
975  // directly via the CFG.
976  SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
977 
978  // Walk all the blocks which end in an indirect branch and make the
979  // target address available.
980  for (MachineBasicBlock &MBB : MF) {
981  // Find the last terminator.
982  auto MII = MBB.instr_rbegin();
983  while (MII != MBB.instr_rend() && MII->isDebugInstr())
984  ++MII;
985  if (MII == MBB.instr_rend())
986  continue;
987  MachineInstr &TI = *MII;
988  if (!TI.isTerminator() || !TI.isBranch())
989  // No terminator or non-branch terminator.
990  continue;
991 
992  unsigned TargetReg;
993 
994  switch (TI.getOpcode()) {
995  default:
996  // Direct branch or conditional branch (leading to fallthrough).
997  continue;
998 
999  case X86::FARJMP16m:
1000  case X86::FARJMP32m:
1001  case X86::FARJMP64m:
1002  // We cannot mitigate far jumps or calls, but we also don't expect them
1003  // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1004  continue;
1005 
1006  case X86::JMP16m:
1007  case X86::JMP16m_NT:
1008  case X86::JMP32m:
1009  case X86::JMP32m_NT:
1010  case X86::JMP64m:
1011  case X86::JMP64m_NT:
1012  // Mostly as documentation.
1013  report_fatal_error("Memory operand jumps should have been unfolded!");
1014 
1015  case X86::JMP16r:
1017  "Support for 16-bit indirect branches is not implemented.");
1018  case X86::JMP32r:
1020  "Support for 32-bit indirect branches is not implemented.");
1021 
1022  case X86::JMP64r:
1023  TargetReg = TI.getOperand(0).getReg();
1024  }
1025 
1026  // We have definitely found an indirect branch. Verify that there are no
1027  // preceding conditional branches as we don't yet support that.
1028  if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1029  return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1030  })) {
1031  LLVM_DEBUG({
1032  dbgs() << "ERROR: Found other terminators in a block with an indirect "
1033  "branch! This is not yet supported! Terminator sequence:\n";
1034  for (MachineInstr &MI : MBB.terminators()) {
1035  MI.dump();
1036  dbgs() << '\n';
1037  }
1038  });
1039  report_fatal_error("Unimplemented terminator sequence!");
1040  }
1041 
1042  // Make the target register an available value for this block.
1043  TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1044  IndirectTerminatedMBBs.insert(&MBB);
1045 
1046  // Add all the successors to our target candidates.
1047  for (MachineBasicBlock *Succ : MBB.successors())
1048  IndirectTargetMBBs.insert(Succ);
1049  }
1050 
1051  // Keep track of the cmov instructions we insert so we can return them.
1053 
1054  // If we didn't find any indirect branches with targets, nothing to do here.
1055  if (IndirectTargetMBBs.empty())
1056  return CMovs;
1057 
1058  // We found indirect branches and targets that need to be instrumented to
1059  // harden loads within them. Walk the blocks of the function (to get a stable
1060  // ordering) and instrument each target of an indirect branch.
1061  for (MachineBasicBlock &MBB : MF) {
1062  // Skip the blocks that aren't candidate targets.
1063  if (!IndirectTargetMBBs.count(&MBB))
1064  continue;
1065 
1066  // We don't expect EH pads to ever be reached via an indirect branch. If
1067  // this is desired for some reason, we could simply skip them here rather
1068  // than asserting.
1069  assert(!MBB.isEHPad() &&
1070  "Unexpected EH pad as target of an indirect branch!");
1071 
1072  // We should never end up threading EFLAGS into a block to harden
1073  // conditional jumps as there would be an additional successor via the
1074  // indirect branch. As a consequence, all such edges would be split before
1075  // reaching here, and the inserted block will handle the EFLAGS-based
1076  // hardening.
1077  assert(!MBB.isLiveIn(X86::EFLAGS) &&
1078  "Cannot check within a block that already has live-in EFLAGS!");
1079 
1080  // We can't handle having non-indirect edges into this block unless this is
1081  // the only successor and we can synthesize the necessary target address.
1082  for (MachineBasicBlock *Pred : MBB.predecessors()) {
1083  // If we've already handled this by extracting the target directly,
1084  // nothing to do.
1085  if (IndirectTerminatedMBBs.count(Pred))
1086  continue;
1087 
1088  // Otherwise, we have to be the only successor. We generally expect this
1089  // to be true as conditional branches should have had a critical edge
1090  // split already. We don't however need to worry about EH pad successors
1091  // as they'll happily ignore the target and their hardening strategy is
1092  // resilient to all ways in which they could be reached speculatively.
1093  if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1094  return Succ->isEHPad() || Succ == &MBB;
1095  })) {
1096  LLVM_DEBUG({
1097  dbgs() << "ERROR: Found conditional entry to target of indirect "
1098  "branch!\n";
1099  Pred->dump();
1100  MBB.dump();
1101  });
1102  report_fatal_error("Cannot harden a conditional entry to a target of "
1103  "an indirect branch!");
1104  }
1105 
1106  // Now we need to compute the address of this block and install it as a
1107  // synthetic target in the predecessor. We do this at the bottom of the
1108  // predecessor.
1109  auto InsertPt = Pred->getFirstTerminator();
1110  Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1111  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1112  !Subtarget->isPositionIndependent()) {
1113  // Directly materialize it into an immediate.
1114  auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1115  TII->get(X86::MOV64ri32), TargetReg)
1116  .addMBB(&MBB);
1117  ++NumInstsInserted;
1118  (void)AddrI;
1119  LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1120  dbgs() << "\n");
1121  } else {
1122  auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1123  TargetReg)
1124  .addReg(/*Base*/ X86::RIP)
1125  .addImm(/*Scale*/ 1)
1126  .addReg(/*Index*/ 0)
1127  .addMBB(&MBB)
1128  .addReg(/*Segment*/ 0);
1129  ++NumInstsInserted;
1130  (void)AddrI;
1131  LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1132  dbgs() << "\n");
1133  }
1134  // And make this available.
1135  TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1136  }
1137 
1138  // Materialize the needed SSA value of the target. Note that we need the
1139  // middle of the block as this block might at the bottom have an indirect
1140  // branch back to itself. We can do this here because at this point, every
1141  // predecessor of this block has an available value. This is basically just
1142  // automating the construction of a PHI node for this target.
1143  Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1144 
1145  // Insert a comparison of the incoming target register with this block's
1146  // address. This also requires us to mark the block as having its address
1147  // taken explicitly.
1149  auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1150  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1151  !Subtarget->isPositionIndependent()) {
1152  // Check directly against a relocated immediate when we can.
1153  auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1154  .addReg(TargetReg, RegState::Kill)
1155  .addMBB(&MBB);
1156  ++NumInstsInserted;
1157  (void)CheckI;
1158  LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1159  } else {
1160  // Otherwise compute the address into a register first.
1161  Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1162  auto AddrI =
1163  BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1164  .addReg(/*Base*/ X86::RIP)
1165  .addImm(/*Scale*/ 1)
1166  .addReg(/*Index*/ 0)
1167  .addMBB(&MBB)
1168  .addReg(/*Segment*/ 0);
1169  ++NumInstsInserted;
1170  (void)AddrI;
1171  LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1172  auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1173  .addReg(TargetReg, RegState::Kill)
1174  .addReg(AddrReg, RegState::Kill);
1175  ++NumInstsInserted;
1176  (void)CheckI;
1177  LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1178  }
1179 
1180  // Now cmov over the predicate if the comparison wasn't equal.
1181  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1182  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1183  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1184  auto CMovI =
1185  BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1186  .addReg(PS->InitialReg)
1187  .addReg(PS->PoisonReg)
1188  .addImm(X86::COND_NE);
1189  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
1190  ++NumInstsInserted;
1191  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1192  CMovs.push_back(&*CMovI);
1193 
1194  // And put the new value into the available values for SSA form of our
1195  // predicate state.
1196  PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1197  }
1198 
1199  // Return all the newly inserted cmov instructions of the predicate state.
1200  return CMovs;
1201 }
1202 
1203 // Returns true if the MI has EFLAGS as a register def operand and it's live,
1204 // otherwise it returns false
1205 static bool isEFLAGSDefLive(const MachineInstr &MI) {
1206  if (const MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1207  return !DefOp->isDead();
1208  }
1209  return false;
1210 }
1211 
1213  const TargetRegisterInfo &TRI) {
1214  // Check if EFLAGS are alive by seeing if there is a def of them or they
1215  // live-in, and then seeing if that def is in turn used.
1217  if (MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1218  // If the def is dead, then EFLAGS is not live.
1219  if (DefOp->isDead())
1220  return false;
1221 
1222  // Otherwise we've def'ed it, and it is live.
1223  return true;
1224  }
1225  // While at this instruction, also check if we use and kill EFLAGS
1226  // which means it isn't live.
1227  if (MI.killsRegister(X86::EFLAGS, &TRI))
1228  return false;
1229  }
1230 
1231  // If we didn't find anything conclusive (neither definitely alive or
1232  // definitely dead) return whether it lives into the block.
1233  return MBB.isLiveIn(X86::EFLAGS);
1234 }
1235 
1236 /// Trace the predicate state through each of the blocks in the function,
1237 /// hardening everything necessary along the way.
1238 ///
1239 /// We call this routine once the initial predicate state has been established
1240 /// for each basic block in the function in the SSA updater. This routine traces
1241 /// it through the instructions within each basic block, and for non-returning
1242 /// blocks informs the SSA updater about the final state that lives out of the
1243 /// block. Along the way, it hardens any vulnerable instruction using the
1244 /// currently valid predicate state. We have to do these two things together
1245 /// because the SSA updater only works across blocks. Within a block, we track
1246 /// the current predicate state directly and update it as it changes.
1247 ///
1248 /// This operates in two passes over each block. First, we analyze the loads in
1249 /// the block to determine which strategy will be used to harden them: hardening
1250 /// the address or hardening the loaded value when loaded into a register
1251 /// amenable to hardening. We have to process these first because the two
1252 /// strategies may interact -- later hardening may change what strategy we wish
1253 /// to use. We also will analyze data dependencies between loads and avoid
1254 /// hardening those loads that are data dependent on a load with a hardened
1255 /// address. We also skip hardening loads already behind an LFENCE as that is
1256 /// sufficient to harden them against misspeculation.
1257 ///
1258 /// Second, we actively trace the predicate state through the block, applying
1259 /// the hardening steps we determined necessary in the first pass as we go.
1260 ///
1261 /// These two passes are applied to each basic block. We operate one block at a
1262 /// time to simplify reasoning about reachability and sequencing.
1263 void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
1264  MachineFunction &MF) {
1265  SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1266  SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1267 
1268  SmallSet<unsigned, 16> HardenedAddrRegs;
1269 
1270  SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg;
1271 
1272  // Track the set of load-dependent registers through the basic block. Because
1273  // the values of these registers have an existing data dependency on a loaded
1274  // value which we would have checked, we can omit any checks on them.
1275  SparseBitVector<> LoadDepRegs;
1276 
1277  for (MachineBasicBlock &MBB : MF) {
1278  // The first pass over the block: collect all the loads which can have their
1279  // loaded value hardened and all the loads that instead need their address
1280  // hardened. During this walk we propagate load dependence for address
1281  // hardened loads and also look for LFENCE to stop hardening wherever
1282  // possible. When deciding whether or not to harden the loaded value or not,
1283  // we check to see if any registers used in the address will have been
1284  // hardened at this point and if so, harden any remaining address registers
1285  // as that often successfully re-uses hardened addresses and minimizes
1286  // instructions.
1287  //
1288  // FIXME: We should consider an aggressive mode where we continue to keep as
1289  // many loads value hardened even when some address register hardening would
1290  // be free (due to reuse).
1291  //
1292  // Note that we only need this pass if we are actually hardening loads.
1293  if (HardenLoads)
1294  for (MachineInstr &MI : MBB) {
1295  // We naively assume that all def'ed registers of an instruction have
1296  // a data dependency on all of their operands.
1297  // FIXME: Do a more careful analysis of x86 to build a conservative
1298  // model here.
1299  if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1300  return Op.isReg() && LoadDepRegs.test(Op.getReg());
1301  }))
1302  for (MachineOperand &Def : MI.defs())
1303  if (Def.isReg())
1304  LoadDepRegs.set(Def.getReg());
1305 
1306  // Both Intel and AMD are guiding that they will change the semantics of
1307  // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1308  // no more need to guard things in this block.
1309  if (MI.getOpcode() == X86::LFENCE)
1310  break;
1311 
1312  // If this instruction cannot load, nothing to do.
1313  if (!MI.mayLoad())
1314  continue;
1315 
1316  // Some instructions which "load" are trivially safe or unimportant.
1317  if (MI.getOpcode() == X86::MFENCE)
1318  continue;
1319 
1320  // Extract the memory operand information about this instruction.
1321  // FIXME: This doesn't handle loading pseudo instructions which we often
1322  // could handle with similarly generic logic. We probably need to add an
1323  // MI-layer routine similar to the MC-layer one we use here which maps
1324  // pseudos much like this maps real instructions.
1325  const MCInstrDesc &Desc = MI.getDesc();
1326  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1327  if (MemRefBeginIdx < 0) {
1328  LLVM_DEBUG(dbgs()
1329  << "WARNING: unable to harden loading instruction: ";
1330  MI.dump());
1331  continue;
1332  }
1333 
1334  MemRefBeginIdx += X86II::getOperandBias(Desc);
1335 
1336  MachineOperand &BaseMO =
1337  MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1338  MachineOperand &IndexMO =
1339  MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1340 
1341  // If we have at least one (non-frame-index, non-RIP) register operand,
1342  // and neither operand is load-dependent, we need to check the load.
1343  unsigned BaseReg = 0, IndexReg = 0;
1344  if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1345  BaseMO.getReg() != X86::NoRegister)
1346  BaseReg = BaseMO.getReg();
1347  if (IndexMO.getReg() != X86::NoRegister)
1348  IndexReg = IndexMO.getReg();
1349 
1350  if (!BaseReg && !IndexReg)
1351  // No register operands!
1352  continue;
1353 
1354  // If any register operand is dependent, this load is dependent and we
1355  // needn't check it.
1356  // FIXME: Is this true in the case where we are hardening loads after
1357  // they complete? Unclear, need to investigate.
1358  if ((BaseReg && LoadDepRegs.test(BaseReg)) ||
1359  (IndexReg && LoadDepRegs.test(IndexReg)))
1360  continue;
1361 
1362  // If post-load hardening is enabled, this load is compatible with
1363  // post-load hardening, and we aren't already going to harden one of the
1364  // address registers, queue it up to be hardened post-load. Notably,
1365  // even once hardened this won't introduce a useful dependency that
1366  // could prune out subsequent loads.
1368  !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1369  MI.getOperand(0).isReg() &&
1370  canHardenRegister(MI.getOperand(0).getReg()) &&
1371  !HardenedAddrRegs.count(BaseReg) &&
1372  !HardenedAddrRegs.count(IndexReg)) {
1373  HardenPostLoad.insert(&MI);
1374  HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1375  continue;
1376  }
1377 
1378  // Record this instruction for address hardening and record its register
1379  // operands as being address-hardened.
1380  HardenLoadAddr.insert(&MI);
1381  if (BaseReg)
1382  HardenedAddrRegs.insert(BaseReg);
1383  if (IndexReg)
1384  HardenedAddrRegs.insert(IndexReg);
1385 
1386  for (MachineOperand &Def : MI.defs())
1387  if (Def.isReg())
1388  LoadDepRegs.set(Def.getReg());
1389  }
1390 
1391  // Now re-walk the instructions in the basic block, and apply whichever
1392  // hardening strategy we have elected. Note that we do this in a second
1393  // pass specifically so that we have the complete set of instructions for
1394  // which we will do post-load hardening and can defer it in certain
1395  // circumstances.
1396  for (MachineInstr &MI : MBB) {
1397  if (HardenLoads) {
1398  // We cannot both require hardening the def of a load and its address.
1399  assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1400  "Requested to harden both the address and def of a load!");
1401 
1402  // Check if this is a load whose address needs to be hardened.
1403  if (HardenLoadAddr.erase(&MI)) {
1404  const MCInstrDesc &Desc = MI.getDesc();
1405  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1406  assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1407 
1408  MemRefBeginIdx += X86II::getOperandBias(Desc);
1409 
1410  MachineOperand &BaseMO =
1411  MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1412  MachineOperand &IndexMO =
1413  MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1414  hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1415  continue;
1416  }
1417 
1418  // Test if this instruction is one of our post load instructions (and
1419  // remove it from the set if so).
1420  if (HardenPostLoad.erase(&MI)) {
1421  assert(!MI.isCall() && "Must not try to post-load harden a call!");
1422 
1423  // If this is a data-invariant load and there is no EFLAGS
1424  // interference, we want to try and sink any hardening as far as
1425  // possible.
1427  // Sink the instruction we'll need to harden as far as we can down
1428  // the graph.
1429  MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1430 
1431  // If we managed to sink this instruction, update everything so we
1432  // harden that instruction when we reach it in the instruction
1433  // sequence.
1434  if (SunkMI != &MI) {
1435  // If in sinking there was no instruction needing to be hardened,
1436  // we're done.
1437  if (!SunkMI)
1438  continue;
1439 
1440  // Otherwise, add this to the set of defs we harden.
1441  HardenPostLoad.insert(SunkMI);
1442  continue;
1443  }
1444  }
1445 
1446  unsigned HardenedReg = hardenPostLoad(MI);
1447 
1448  // Mark the resulting hardened register as such so we don't re-harden.
1449  AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1450 
1451  continue;
1452  }
1453 
1454  // Check for an indirect call or branch that may need its input hardened
1455  // even if we couldn't find the specific load used, or were able to
1456  // avoid hardening it for some reason. Note that here we cannot break
1457  // out afterward as we may still need to handle any call aspect of this
1458  // instruction.
1459  if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1460  hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1461  }
1462 
1463  // After we finish hardening loads we handle interprocedural hardening if
1464  // enabled and relevant for this instruction.
1466  continue;
1467  if (!MI.isCall() && !MI.isReturn())
1468  continue;
1469 
1470  // If this is a direct return (IE, not a tail call) just directly harden
1471  // it.
1472  if (MI.isReturn() && !MI.isCall()) {
1473  hardenReturnInstr(MI);
1474  continue;
1475  }
1476 
1477  // Otherwise we have a call. We need to handle transferring the predicate
1478  // state into a call and recovering it after the call returns (unless this
1479  // is a tail call).
1480  assert(MI.isCall() && "Should only reach here for calls!");
1481  tracePredStateThroughCall(MI);
1482  }
1483 
1484  HardenPostLoad.clear();
1485  HardenLoadAddr.clear();
1486  HardenedAddrRegs.clear();
1487  AddrRegToHardenedReg.clear();
1488 
1489  // Currently, we only track data-dependent loads within a basic block.
1490  // FIXME: We should see if this is necessary or if we could be more
1491  // aggressive here without opening up attack avenues.
1492  LoadDepRegs.clear();
1493  }
1494 }
1495 
1496 /// Save EFLAGS into the returned GPR. This can in turn be restored with
1497 /// `restoreEFLAGS`.
1498 ///
1499 /// Note that LLVM can only lower very simple patterns of saved and restored
1500 /// EFLAGS registers. The restore should always be within the same basic block
1501 /// as the save so that no PHI nodes are inserted.
1502 unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
1504  const DebugLoc &Loc) {
1505  // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1506  // what instruction selection does.
1507  Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1508  // We directly copy the FLAGS register and rely on later lowering to clean
1509  // this up into the appropriate setCC instructions.
1510  BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1511  ++NumInstsInserted;
1512  return Reg;
1513 }
1514 
1515 /// Restore EFLAGS from the provided GPR. This should be produced by
1516 /// `saveEFLAGS`.
1517 ///
1518 /// This must be done within the same basic block as the save in order to
1519 /// reliably lower.
1520 void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1522  const DebugLoc &Loc, Register Reg) {
1523  BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1524  ++NumInstsInserted;
1525 }
1526 
1527 /// Takes the current predicate state (in a register) and merges it into the
1528 /// stack pointer. The state is essentially a single bit, but we merge this in
1529 /// a way that won't form non-canonical pointers and also will be preserved
1530 /// across normal stack adjustments.
1531 void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1533  const DebugLoc &Loc, unsigned PredStateReg) {
1534  Register TmpReg = MRI->createVirtualRegister(PS->RC);
1535  // FIXME: This hard codes a shift distance based on the number of bits needed
1536  // to stay canonical on 64-bit. We should compute this somehow and support
1537  // 32-bit as part of that.
1538  auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1539  .addReg(PredStateReg, RegState::Kill)
1540  .addImm(47);
1541  ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1542  ++NumInstsInserted;
1543  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1544  .addReg(X86::RSP)
1545  .addReg(TmpReg, RegState::Kill);
1546  OrI->addRegisterDead(X86::EFLAGS, TRI);
1547  ++NumInstsInserted;
1548 }
1549 
1550 /// Extracts the predicate state stored in the high bits of the stack pointer.
1551 unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1553  const DebugLoc &Loc) {
1554  Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1555  Register TmpReg = MRI->createVirtualRegister(PS->RC);
1556 
1557  // We know that the stack pointer will have any preserved predicate state in
1558  // its high bit. We just want to smear this across the other bits. Turns out,
1559  // this is exactly what an arithmetic right shift does.
1560  BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1561  .addReg(X86::RSP);
1562  auto ShiftI =
1563  BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1564  .addReg(TmpReg, RegState::Kill)
1565  .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1566  ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1567  ++NumInstsInserted;
1568 
1569  return PredStateReg;
1570 }
1571 
1572 void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1573  MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1574  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
1575  MachineBasicBlock &MBB = *MI.getParent();
1576  const DebugLoc &Loc = MI.getDebugLoc();
1577 
1578  // Check if EFLAGS are alive by seeing if there is a def of them or they
1579  // live-in, and then seeing if that def is in turn used.
1580  bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1581 
1582  SmallVector<MachineOperand *, 2> HardenOpRegs;
1583 
1584  if (BaseMO.isFI()) {
1585  // A frame index is never a dynamically controllable load, so only
1586  // harden it if we're covering fixed address loads as well.
1587  LLVM_DEBUG(
1588  dbgs() << " Skipping hardening base of explicit stack frame load: ";
1589  MI.dump(); dbgs() << "\n");
1590  } else if (BaseMO.getReg() == X86::RSP) {
1591  // Some idempotent atomic operations are lowered directly to a locked
1592  // OR with 0 to the top of stack(or slightly offset from top) which uses an
1593  // explicit RSP register as the base.
1594  assert(IndexMO.getReg() == X86::NoRegister &&
1595  "Explicit RSP access with dynamic index!");
1596  LLVM_DEBUG(
1597  dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1598  } else if (BaseMO.getReg() == X86::RIP ||
1599  BaseMO.getReg() == X86::NoRegister) {
1600  // For both RIP-relative addressed loads or absolute loads, we cannot
1601  // meaningfully harden them because the address being loaded has no
1602  // dynamic component.
1603  //
1604  // FIXME: When using a segment base (like TLS does) we end up with the
1605  // dynamic address being the base plus -1 because we can't mutate the
1606  // segment register here. This allows the signed 32-bit offset to point at
1607  // valid segment-relative addresses and load them successfully.
1608  LLVM_DEBUG(
1609  dbgs() << " Cannot harden base of "
1610  << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1611  << " address in a load!");
1612  } else {
1613  assert(BaseMO.isReg() &&
1614  "Only allowed to have a frame index or register base.");
1615  HardenOpRegs.push_back(&BaseMO);
1616  }
1617 
1618  if (IndexMO.getReg() != X86::NoRegister &&
1619  (HardenOpRegs.empty() ||
1620  HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1621  HardenOpRegs.push_back(&IndexMO);
1622 
1623  assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1624  "Should have exactly one or two registers to harden!");
1625  assert((HardenOpRegs.size() == 1 ||
1626  HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1627  "Should not have two of the same registers!");
1628 
1629  // Remove any registers that have alreaded been checked.
1630  llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1631  // See if this operand's register has already been checked.
1632  auto It = AddrRegToHardenedReg.find(Op->getReg());
1633  if (It == AddrRegToHardenedReg.end())
1634  // Not checked, so retain this one.
1635  return false;
1636 
1637  // Otherwise, we can directly update this operand and remove it.
1638  Op->setReg(It->second);
1639  return true;
1640  });
1641  // If there are none left, we're done.
1642  if (HardenOpRegs.empty())
1643  return;
1644 
1645  // Compute the current predicate state.
1646  Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1647 
1648  auto InsertPt = MI.getIterator();
1649 
1650  // If EFLAGS are live and we don't have access to instructions that avoid
1651  // clobbering EFLAGS we need to save and restore them. This in turn makes
1652  // the EFLAGS no longer live.
1653  unsigned FlagsReg = 0;
1654  if (EFLAGSLive && !Subtarget->hasBMI2()) {
1655  EFLAGSLive = false;
1656  FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1657  }
1658 
1659  for (MachineOperand *Op : HardenOpRegs) {
1660  Register OpReg = Op->getReg();
1661  auto *OpRC = MRI->getRegClass(OpReg);
1662  Register TmpReg = MRI->createVirtualRegister(OpRC);
1663 
1664  // If this is a vector register, we'll need somewhat custom logic to handle
1665  // hardening it.
1666  if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1667  OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1668  assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1669  bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1670 
1671  // Move our state into a vector register.
1672  // FIXME: We could skip this at the cost of longer encodings with AVX-512
1673  // but that doesn't seem likely worth it.
1674  Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1675  auto MovI =
1676  BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1677  .addReg(StateReg);
1678  (void)MovI;
1679  ++NumInstsInserted;
1680  LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1681 
1682  // Broadcast it across the vector register.
1683  Register VBStateReg = MRI->createVirtualRegister(OpRC);
1684  auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1685  TII->get(Is128Bit ? X86::VPBROADCASTQrr
1686  : X86::VPBROADCASTQYrr),
1687  VBStateReg)
1688  .addReg(VStateReg);
1689  (void)BroadcastI;
1690  ++NumInstsInserted;
1691  LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1692  dbgs() << "\n");
1693 
1694  // Merge our potential poison state into the value with a vector or.
1695  auto OrI =
1696  BuildMI(MBB, InsertPt, Loc,
1697  TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1698  .addReg(VBStateReg)
1699  .addReg(OpReg);
1700  (void)OrI;
1701  ++NumInstsInserted;
1702  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1703  } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1704  OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1705  OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1706  assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1707  bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1708  bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1709  if (Is128Bit || Is256Bit)
1710  assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1711 
1712  // Broadcast our state into a vector register.
1713  Register VStateReg = MRI->createVirtualRegister(OpRC);
1714  unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1715  : Is256Bit ? X86::VPBROADCASTQrZ256rr
1716  : X86::VPBROADCASTQrZrr;
1717  auto BroadcastI =
1718  BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1719  .addReg(StateReg);
1720  (void)BroadcastI;
1721  ++NumInstsInserted;
1722  LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1723  dbgs() << "\n");
1724 
1725  // Merge our potential poison state into the value with a vector or.
1726  unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1727  : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1728  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1729  .addReg(VStateReg)
1730  .addReg(OpReg);
1731  (void)OrI;
1732  ++NumInstsInserted;
1733  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1734  } else {
1735  // FIXME: Need to support GR32 here for 32-bit code.
1736  assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1737  "Not a supported register class for address hardening!");
1738 
1739  if (!EFLAGSLive) {
1740  // Merge our potential poison state into the value with an or.
1741  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1742  .addReg(StateReg)
1743  .addReg(OpReg);
1744  OrI->addRegisterDead(X86::EFLAGS, TRI);
1745  ++NumInstsInserted;
1746  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1747  } else {
1748  // We need to avoid touching EFLAGS so shift out all but the least
1749  // significant bit using the instruction that doesn't update flags.
1750  auto ShiftI =
1751  BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1752  .addReg(OpReg)
1753  .addReg(StateReg);
1754  (void)ShiftI;
1755  ++NumInstsInserted;
1756  LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1757  dbgs() << "\n");
1758  }
1759  }
1760 
1761  // Record this register as checked and update the operand.
1762  assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1763  "Should not have checked this register yet!");
1764  AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1765  Op->setReg(TmpReg);
1766  ++NumAddrRegsHardened;
1767  }
1768 
1769  // And restore the flags if needed.
1770  if (FlagsReg)
1771  restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1772 }
1773 
1774 MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1775  MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1777  "Cannot get here with a non-invariant load!");
1778  assert(!isEFLAGSDefLive(InitialMI) &&
1779  "Cannot get here with a data invariant load "
1780  "that interferes with EFLAGS!");
1781 
1782  // See if we can sink hardening the loaded value.
1783  auto SinkCheckToSingleUse =
1785  Register DefReg = MI.getOperand(0).getReg();
1786 
1787  // We need to find a single use which we can sink the check. We can
1788  // primarily do this because many uses may already end up checked on their
1789  // own.
1790  MachineInstr *SingleUseMI = nullptr;
1791  for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1792  // If we're already going to harden this use, it is data invariant, it
1793  // does not interfere with EFLAGS, and within our block.
1794  if (HardenedInstrs.count(&UseMI)) {
1796  // If we've already decided to harden a non-load, we must have sunk
1797  // some other post-load hardened instruction to it and it must itself
1798  // be data-invariant.
1800  "Data variant instruction being hardened!");
1801  continue;
1802  }
1803 
1804  // Otherwise, this is a load and the load component can't be data
1805  // invariant so check how this register is being used.
1806  const MCInstrDesc &Desc = UseMI.getDesc();
1807  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1808  assert(MemRefBeginIdx >= 0 &&
1809  "Should always have mem references here!");
1810  MemRefBeginIdx += X86II::getOperandBias(Desc);
1811 
1812  MachineOperand &BaseMO =
1813  UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1814  MachineOperand &IndexMO =
1815  UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1816  if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1817  (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1818  // The load uses the register as part of its address making it not
1819  // invariant.
1820  return {};
1821 
1822  continue;
1823  }
1824 
1825  if (SingleUseMI)
1826  // We already have a single use, this would make two. Bail.
1827  return {};
1828 
1829  // If this single use isn't data invariant, isn't in this block, or has
1830  // interfering EFLAGS, we can't sink the hardening to it.
1831  if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1833  return {};
1834 
1835  // If this instruction defines multiple registers bail as we won't harden
1836  // all of them.
1837  if (UseMI.getDesc().getNumDefs() > 1)
1838  return {};
1839 
1840  // If this register isn't a virtual register we can't walk uses of sanely,
1841  // just bail. Also check that its register class is one of the ones we
1842  // can harden.
1843  Register UseDefReg = UseMI.getOperand(0).getReg();
1844  if (!UseDefReg.isVirtual() || !canHardenRegister(UseDefReg))
1845  return {};
1846 
1847  SingleUseMI = &UseMI;
1848  }
1849 
1850  // If SingleUseMI is still null, there is no use that needs its own
1851  // checking. Otherwise, it is the single use that needs checking.
1852  return {SingleUseMI};
1853  };
1854 
1855  MachineInstr *MI = &InitialMI;
1856  while (Optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1857  // Update which MI we're checking now.
1858  MI = *SingleUse;
1859  if (!MI)
1860  break;
1861  }
1862 
1863  return MI;
1864 }
1865 
1866 bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
1867  auto *RC = MRI->getRegClass(Reg);
1868  int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1869  if (RegBytes > 8)
1870  // We don't support post-load hardening of vectors.
1871  return false;
1872 
1873  unsigned RegIdx = Log2_32(RegBytes);
1874  assert(RegIdx < 4 && "Unsupported register size");
1875 
1876  // If this register class is explicitly constrained to a class that doesn't
1877  // require REX prefix, we may not be able to satisfy that constraint when
1878  // emitting the hardening instructions, so bail out here.
1879  // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1880  // end up both with a NOREX and REX-only register as operands to the hardening
1881  // instructions. It would be better to fix that code to handle this situation
1882  // rather than hack around it in this way.
1883  const TargetRegisterClass *NOREXRegClasses[] = {
1884  &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1885  &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1886  if (RC == NOREXRegClasses[RegIdx])
1887  return false;
1888 
1889  const TargetRegisterClass *GPRRegClasses[] = {
1890  &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1891  &X86::GR64RegClass};
1892  return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1893 }
1894 
1895 /// Harden a value in a register.
1896 ///
1897 /// This is the low-level logic to fully harden a value sitting in a register
1898 /// against leaking during speculative execution.
1899 ///
1900 /// Unlike hardening an address that is used by a load, this routine is required
1901 /// to hide *all* incoming bits in the register.
1902 ///
1903 /// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1904 /// larger than the predicate state register. FIXME: We should support vector
1905 /// registers here by broadcasting the predicate state.
1906 ///
1907 /// The new, hardened virtual register is returned. It will have the same
1908 /// register class as `Reg`.
1909 unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
1911  const DebugLoc &Loc) {
1912  assert(canHardenRegister(Reg) && "Cannot harden this register!");
1913  assert(Reg.isVirtual() && "Cannot harden a physical register!");
1914 
1915  auto *RC = MRI->getRegClass(Reg);
1916  int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1917  Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1918  assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
1919  "Unknown register size");
1920 
1921  // FIXME: Need to teach this about 32-bit mode.
1922  if (Bytes != 8) {
1923  unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1924  unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1925  Register NarrowStateReg = MRI->createVirtualRegister(RC);
1926  BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1927  .addReg(StateReg, 0, SubRegImm);
1928  StateReg = NarrowStateReg;
1929  }
1930 
1931  unsigned FlagsReg = 0;
1932  if (isEFLAGSLive(MBB, InsertPt, *TRI))
1933  FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1934 
1935  Register NewReg = MRI->createVirtualRegister(RC);
1936  unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1937  unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1938  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1939  .addReg(StateReg)
1940  .addReg(Reg);
1941  OrI->addRegisterDead(X86::EFLAGS, TRI);
1942  ++NumInstsInserted;
1943  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1944 
1945  if (FlagsReg)
1946  restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1947 
1948  return NewReg;
1949 }
1950 
1951 /// Harden a load by hardening the loaded value in the defined register.
1952 ///
1953 /// We can harden a non-leaking load into a register without touching the
1954 /// address by just hiding all of the loaded bits during misspeculation. We use
1955 /// an `or` instruction to do this because we set up our poison value as all
1956 /// ones. And the goal is just for the loaded bits to not be exposed to
1957 /// execution and coercing them to one is sufficient.
1958 ///
1959 /// Returns the newly hardened register.
1960 unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
1961  MachineBasicBlock &MBB = *MI.getParent();
1962  const DebugLoc &Loc = MI.getDebugLoc();
1963 
1964  auto &DefOp = MI.getOperand(0);
1965  Register OldDefReg = DefOp.getReg();
1966  auto *DefRC = MRI->getRegClass(OldDefReg);
1967 
1968  // Because we want to completely replace the uses of this def'ed value with
1969  // the hardened value, create a dedicated new register that will only be used
1970  // to communicate the unhardened value to the hardening.
1971  Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1972  DefOp.setReg(UnhardenedReg);
1973 
1974  // Now harden this register's value, getting a hardened reg that is safe to
1975  // use. Note that we insert the instructions to compute this *after* the
1976  // defining instruction, not before it.
1977  unsigned HardenedReg = hardenValueInRegister(
1978  UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1979 
1980  // Finally, replace the old register (which now only has the uses of the
1981  // original def) with the hardened register.
1982  MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1983 
1984  ++NumPostLoadRegsHardened;
1985  return HardenedReg;
1986 }
1987 
1988 /// Harden a return instruction.
1989 ///
1990 /// Returns implicitly perform a load which we need to harden. Without hardening
1991 /// this load, an attacker my speculatively write over the return address to
1992 /// steer speculation of the return to an attacker controlled address. This is
1993 /// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1994 /// this paper:
1995 /// https://people.csail.mit.edu/vlk/spectre11.pdf
1996 ///
1997 /// We can harden this by introducing an LFENCE that will delay any load of the
1998 /// return address until prior instructions have retired (and thus are not being
1999 /// speculated), or we can harden the address used by the implicit load: the
2000 /// stack pointer.
2001 ///
2002 /// If we are not using an LFENCE, hardening the stack pointer has an additional
2003 /// benefit: it allows us to pass the predicate state accumulated in this
2004 /// function back to the caller. In the absence of a BCBS attack on the return,
2005 /// the caller will typically be resumed and speculatively executed due to the
2006 /// Return Stack Buffer (RSB) prediction which is very accurate and has a high
2007 /// priority. It is possible that some code from the caller will be executed
2008 /// speculatively even during a BCBS-attacked return until the steering takes
2009 /// effect. Whenever this happens, the caller can recover the (poisoned)
2010 /// predicate state from the stack pointer and continue to harden loads.
2011 void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
2012  MachineBasicBlock &MBB = *MI.getParent();
2013  const DebugLoc &Loc = MI.getDebugLoc();
2014  auto InsertPt = MI.getIterator();
2015 
2016  if (FenceCallAndRet)
2017  // No need to fence here as we'll fence at the return site itself. That
2018  // handles more cases than we can handle here.
2019  return;
2020 
2021  // Take our predicate state, shift it to the high 17 bits (so that we keep
2022  // pointers canonical) and merge it into RSP. This will allow the caller to
2023  // extract it when we return (speculatively).
2024  mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2025 }
2026 
2027 /// Trace the predicate state through a call.
2028 ///
2029 /// There are several layers of this needed to handle the full complexity of
2030 /// calls.
2031 ///
2032 /// First, we need to send the predicate state into the called function. We do
2033 /// this by merging it into the high bits of the stack pointer.
2034 ///
2035 /// For tail calls, this is all we need to do.
2036 ///
2037 /// For calls where we might return and resume the control flow, we need to
2038 /// extract the predicate state from the high bits of the stack pointer after
2039 /// control returns from the called function.
2040 ///
2041 /// We also need to verify that we intended to return to this location in the
2042 /// code. An attacker might arrange for the processor to mispredict the return
2043 /// to this valid but incorrect return address in the program rather than the
2044 /// correct one. See the paper on this attack, called "ret2spec" by the
2045 /// researchers, here:
2046 /// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2047 ///
2048 /// The way we verify that we returned to the correct location is by preserving
2049 /// the expected return address across the call. One technique involves taking
2050 /// advantage of the red-zone to load the return address from `8(%rsp)` where it
2051 /// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2052 /// directly save the address into a register that will be preserved across the
2053 /// call. We compare this intended return address against the address
2054 /// immediately following the call (the observed return address). If these
2055 /// mismatch, we have detected misspeculation and can poison our predicate
2056 /// state.
2057 void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
2058  MachineInstr &MI) {
2059  MachineBasicBlock &MBB = *MI.getParent();
2060  MachineFunction &MF = *MBB.getParent();
2061  auto InsertPt = MI.getIterator();
2062  const DebugLoc &Loc = MI.getDebugLoc();
2063 
2064  if (FenceCallAndRet) {
2065  if (MI.isReturn())
2066  // Tail call, we don't return to this function.
2067  // FIXME: We should also handle noreturn calls.
2068  return;
2069 
2070  // We don't need to fence before the call because the function should fence
2071  // in its entry. However, we do need to fence after the call returns.
2072  // Fencing before the return doesn't correctly handle cases where the return
2073  // itself is mispredicted.
2074  BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2075  ++NumInstsInserted;
2076  ++NumLFENCEsInserted;
2077  return;
2078  }
2079 
2080  // First, we transfer the predicate state into the called function by merging
2081  // it into the stack pointer. This will kill the current def of the state.
2082  Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2083  mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2084 
2085  // If this call is also a return, it is a tail call and we don't need anything
2086  // else to handle it so just return. Also, if there are no further
2087  // instructions and no successors, this call does not return so we can also
2088  // bail.
2089  if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2090  return;
2091 
2092  // Create a symbol to track the return address and attach it to the call
2093  // machine instruction. We will lower extra symbols attached to call
2094  // instructions as label immediately following the call.
2095  MCSymbol *RetSymbol =
2096  MF.getContext().createTempSymbol("slh_ret_addr",
2097  /*AlwaysAddSuffix*/ true);
2098  MI.setPostInstrSymbol(MF, RetSymbol);
2099 
2100  const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2101  unsigned ExpectedRetAddrReg = 0;
2102 
2103  // If we have no red zones or if the function returns twice (possibly without
2104  // using the `ret` instruction) like setjmp, we need to save the expected
2105  // return address prior to the call.
2106  if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2107  MF.exposesReturnsTwice()) {
2108  // If we don't have red zones, we need to compute the expected return
2109  // address prior to the call and store it in a register that lives across
2110  // the call.
2111  //
2112  // In some ways, this is doubly satisfying as a mitigation because it will
2113  // also successfully detect stack smashing bugs in some cases (typically,
2114  // when a callee-saved register is used and the callee doesn't push it onto
2115  // the stack). But that isn't our primary goal, so we only use it as
2116  // a fallback.
2117  //
2118  // FIXME: It isn't clear that this is reliable in the face of
2119  // rematerialization in the register allocator. We somehow need to force
2120  // that to not occur for this particular instruction, and instead to spill
2121  // or otherwise preserve the value computed *prior* to the call.
2122  //
2123  // FIXME: It is even less clear why MachineCSE can't just fold this when we
2124  // end up having to use identical instructions both before and after the
2125  // call to feed the comparison.
2126  ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2127  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2128  !Subtarget->isPositionIndependent()) {
2129  BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2130  .addSym(RetSymbol);
2131  } else {
2132  BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2133  .addReg(/*Base*/ X86::RIP)
2134  .addImm(/*Scale*/ 1)
2135  .addReg(/*Index*/ 0)
2136  .addSym(RetSymbol)
2137  .addReg(/*Segment*/ 0);
2138  }
2139  }
2140 
2141  // Step past the call to handle when it returns.
2142  ++InsertPt;
2143 
2144  // If we didn't pre-compute the expected return address into a register, then
2145  // red zones are enabled and the return address is still available on the
2146  // stack immediately after the call. As the very first instruction, we load it
2147  // into a register.
2148  if (!ExpectedRetAddrReg) {
2149  ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2150  BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2151  .addReg(/*Base*/ X86::RSP)
2152  .addImm(/*Scale*/ 1)
2153  .addReg(/*Index*/ 0)
2154  .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2155  // the return address is 8-bytes past it.
2156  .addReg(/*Segment*/ 0);
2157  }
2158 
2159  // Now we extract the callee's predicate state from the stack pointer.
2160  unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2161 
2162  // Test the expected return address against our actual address. If we can
2163  // form this basic block's address as an immediate, this is easy. Otherwise
2164  // we compute it.
2165  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2166  !Subtarget->isPositionIndependent()) {
2167  // FIXME: Could we fold this with the load? It would require careful EFLAGS
2168  // management.
2169  BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2170  .addReg(ExpectedRetAddrReg, RegState::Kill)
2171  .addSym(RetSymbol);
2172  } else {
2173  Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2174  BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2175  .addReg(/*Base*/ X86::RIP)
2176  .addImm(/*Scale*/ 1)
2177  .addReg(/*Index*/ 0)
2178  .addSym(RetSymbol)
2179  .addReg(/*Segment*/ 0);
2180  BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2181  .addReg(ExpectedRetAddrReg, RegState::Kill)
2182  .addReg(ActualRetAddrReg, RegState::Kill);
2183  }
2184 
2185  // Now conditionally update the predicate state we just extracted if we ended
2186  // up at a different return address than expected.
2187  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2188  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2189 
2190  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2191  auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2192  .addReg(NewStateReg, RegState::Kill)
2193  .addReg(PS->PoisonReg)
2194  .addImm(X86::COND_NE);
2195  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
2196  ++NumInstsInserted;
2197  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2198 
2199  PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2200 }
2201 
2202 /// An attacker may speculatively store over a value that is then speculatively
2203 /// loaded and used as the target of an indirect call or jump instruction. This
2204 /// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2205 /// in this paper:
2206 /// https://people.csail.mit.edu/vlk/spectre11.pdf
2207 ///
2208 /// When this happens, the speculative execution of the call or jump will end up
2209 /// being steered to this attacker controlled address. While most such loads
2210 /// will be adequately hardened already, we want to ensure that they are
2211 /// definitively treated as needing post-load hardening. While address hardening
2212 /// is sufficient to prevent secret data from leaking to the attacker, it may
2213 /// not be sufficient to prevent an attacker from steering speculative
2214 /// execution. We forcibly unfolded all relevant loads above and so will always
2215 /// have an opportunity to post-load harden here, we just need to scan for cases
2216 /// not already flagged and add them.
2217 void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
2218  MachineInstr &MI,
2219  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
2220  switch (MI.getOpcode()) {
2221  case X86::FARCALL16m:
2222  case X86::FARCALL32m:
2223  case X86::FARCALL64m:
2224  case X86::FARJMP16m:
2225  case X86::FARJMP32m:
2226  case X86::FARJMP64m:
2227  // We don't need to harden either far calls or far jumps as they are
2228  // safe from Spectre.
2229  return;
2230 
2231  default:
2232  break;
2233  }
2234 
2235  // We should never see a loading instruction at this point, as those should
2236  // have been unfolded.
2237  assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2238 
2239  // If the first operand isn't a register, this is a branch or call
2240  // instruction with an immediate operand which doesn't need to be hardened.
2241  if (!MI.getOperand(0).isReg())
2242  return;
2243 
2244  // For all of these, the target register is the first operand of the
2245  // instruction.
2246  auto &TargetOp = MI.getOperand(0);
2247  Register OldTargetReg = TargetOp.getReg();
2248 
2249  // Try to lookup a hardened version of this register. We retain a reference
2250  // here as we want to update the map to track any newly computed hardened
2251  // register.
2252  unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2253 
2254  // If we don't have a hardened register yet, compute one. Otherwise, just use
2255  // the already hardened register.
2256  //
2257  // FIXME: It is a little suspect that we use partially hardened registers that
2258  // only feed addresses. The complexity of partial hardening with SHRX
2259  // continues to pile up. Should definitively measure its value and consider
2260  // eliminating it.
2261  if (!HardenedTargetReg)
2262  HardenedTargetReg = hardenValueInRegister(
2263  OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2264 
2265  // Set the target operand to the hardened register.
2266  TargetOp.setReg(HardenedTargetReg);
2267 
2268  ++NumCallsOrJumpsHardened;
2269 }
2270 
2271 INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
2272  "X86 speculative load hardener", false, false)
2273 INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
2274  "X86 speculative load hardener", false, false)
2275 
2277  return new X86SpeculativeLoadHardeningPass();
2278 }
llvm::MachineInstr::isBranch
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:862
llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition: MachineBasicBlock.h:354
llvm::X86InstrInfo::isDataInvariant
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
Definition: X86InstrInfo.cpp:142
llvm::MachineInstr::addRegisterDead
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
Definition: MachineInstr.cpp:1907
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::X86II::getMemoryOperandNo
int getMemoryOperandNo(uint64_t TSFlags)
The function returns the MCInst operand # for the first field of the memory operand.
Definition: X86BaseInfo.h:1095
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
MachineSSAUpdater.h
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::MachineBasicBlock::isLiveIn
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Definition: MachineBasicBlock.cpp:576
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
Optional.h
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
X86Subtarget.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
HardenIndirectCallsAndJumps
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
llvm::MachineFunction::getContext
MCContext & getContext() const
Definition: MachineFunction.h:608
llvm::SparseBitVector::clear
void clear()
Definition: SparseBitVector.h:452
Pass.h
X86InstrBuilder.h
llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition: MachineBasicBlock.h:273
llvm::MachineBasicBlock::isEHFuncletEntry
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
Definition: MachineBasicBlock.h:575
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:509
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::MachineFunction::end
iterator end()
Definition: MachineFunction.h:855
llvm::X86Subtarget
Definition: X86Subtarget.h:52
llvm::MachineFunction::exposesReturnsTwice
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
Definition: MachineFunction.h:718
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1807
llvm::SmallDenseMap
Definition: DenseMap.h:882
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
DenseMap.h
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:493
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
TargetInstrInfo.h
llvm::MachineInstr::findRegisterUseOperand
MachineOperand * findRegisterUseOperand(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
Definition: MachineInstr.h:1409
HardenLoads
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:872
llvm::SmallSet< unsigned, 16 >
llvm::Optional
Definition: APInt.h:33
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::X86InstrInfo::isDataInvariantLoad
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
Definition: X86InstrInfo.cpp:209
llvm::X86::getCondFromBranch
CondCode getCondFromBranch(const MachineInstr &MI)
Definition: X86InstrInfo.cpp:2708
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:298
llvm::X86::COND_INVALID
@ COND_INVALID
Definition: X86BaseInfo.h:107
llvm::MachineOperand::isFI
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
Definition: MachineOperand.h:330
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
MachineRegisterInfo.h
llvm::MachineBasicBlock::dump
void dump() const
Definition: MachineBasicBlock.cpp:291
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SparseBitVector.h
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:747
CommandLine.h
llvm::SparseBitVector
Definition: SparseBitVector.h:256
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:205
X86.h
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:379
TargetMachine.h
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:501
llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:909
getRegClassForUnfoldedLoad
static const TargetRegisterClass * getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
Definition: X86SpeculativeLoadHardening.cpp:842
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::createX86SpeculativeLoadHardeningPass
FunctionPass * createX86SpeculativeLoadHardeningPass()
Definition: X86SpeculativeLoadHardening.cpp:2276
llvm::MachineOperand::isMBB
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
Definition: MachineOperand.h:328
false
Definition: StackSlotColoring.cpp:141
llvm::X86ISD::MFENCE
@ MFENCE
Definition: X86ISelLowering.h:664
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:623
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::SystemZII::Is128Bit
@ Is128Bit
Definition: SystemZInstrInfo.h:40
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
FenceCallAndRet
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
llvm::TargetRegisterClass::hasSuperClassEq
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
Definition: TargetRegisterInfo.h:139
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::HexagonInstrInfo::insertBranch
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
Definition: HexagonInstrInfo.cpp:626
llvm::MachineFunction::begin
iterator begin()
Definition: MachineFunction.h:853
DebugLoc.h
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MachineBasicBlock::isCleanupFuncletEntry
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
Definition: MachineBasicBlock.h:581
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:94
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
HardenEdgesWithLFENCE
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:625
llvm::cl::opt< bool >
SSA
Memory SSA
Definition: MemorySSA.cpp:73
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:420
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
HardenInterprocedurally
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
TargetSchedule.h
llvm::X86II::getOperandBias
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
Definition: X86BaseInfo.h:1055
MCSchedule.h
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:515
llvm::SparseBitVector::set
void set(unsigned Idx)
Definition: SparseBitVector.h:508
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:268
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:112
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
splitEdge
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
Definition: X86SpeculativeLoadHardening.cpp:225
canonicalizePHIOperands
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
Definition: X86SpeculativeLoadHardening.cpp:329
llvm::MachineOperand::CreateMBB
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
Definition: MachineOperand.h:825
llvm::MachineBasicBlock::SkipPHIsAndLabels
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
Definition: MachineBasicBlock.cpp:206
llvm::MachineBasicBlock::instr_rbegin
reverse_instr_iterator instr_rbegin()
Definition: MachineBasicBlock.h:266
llvm::SparseBitVector::test
bool test(unsigned Idx) const
Definition: SparseBitVector.h:472
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:618
MachineConstantPool.h
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:439
ArrayRef.h
MachineFunctionPass.h
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:565
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::setHasAddressTaken
void setHasAddressTaken()
Set this block to reflect that it potentially is the target of an indirect branch.
Definition: MachineBasicBlock.h:224
load
LLVM currently emits rax rax movq rax rax ret It could narrow the loads and stores to emit rax rax movq rax rax ret The trouble is that there is a TokenFactor between the store and the load
Definition: README.txt:1531
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:234
MachineModuleInfo.h
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:359
llvm::X86::GetOppositeBranchCondition
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
Definition: X86InstrInfo.cpp:2725
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::MachineBasicBlock::succ_empty
bool succ_empty() const
Definition: MachineBasicBlock.h:357
llvm::X86InstrInfo
Definition: X86InstrInfo.h:138
hardener
X86 speculative load hardener
Definition: X86SpeculativeLoadHardening.cpp:2274
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:238
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:95
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
llvm::MachineBasicBlock::isEHScopeEntry
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
Definition: MachineBasicBlock.h:562
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:365
llvm::MachineBasicBlock::isEHPad
bool isEHPad() const
Returns true if the block is a landing pad.
Definition: MachineBasicBlock.h:549
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:491
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
TargetSubtargetInfo.h
llvm::MachineInstr::isTerminator
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:854
llvm::MachineBasicBlock::splitSuccessor
void splitSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New, bool NormalizeSuccProbs=false)
Split the old successor into old plus new and updates the probability info.
Definition: MachineBasicBlock.cpp:766
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:135
llvm::MCContext::createTempSymbol
MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Definition: MCContext.cpp:324
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:377
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:378
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineBasicBlock::replaceSuccessor
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Definition: MachineBasicBlock.cpp:808
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:277
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:652
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1312
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1562
PASS_KEY
#define PASS_KEY
Definition: X86SpeculativeLoadHardening.cpp:64
llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition: TargetMachine.cpp:72
llvm::unique
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:1779
llvm::MachineBasicBlock::isLayoutSuccessor
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Definition: MachineBasicBlock.cpp:913
llvm::X86::getCMovOpcode
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
Definition: X86InstrInfo.cpp:2806
llvm::MachineBasicBlock::normalizeSuccProbs
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
Definition: MachineBasicBlock.h:677
llvm::MachineSSAUpdater
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
Definition: MachineSSAUpdater.h:34
llvm::MachineInstrBuilder::addSym
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
Definition: MachineInstrBuilder.h:267
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY, "X86 speculative load hardener", false, false) INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:278
MachineInstrBuilder.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
llvm::MachineOperand::setMBB
void setMBB(MachineBasicBlock *MBB)
Definition: MachineOperand.h:698
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:220
EnablePostLoadHardening
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
ScopeExit.h
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
EnableSpeculativeLoadHardening
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::X86::AddrBaseReg
@ AddrBaseReg
Definition: X86BaseInfo.h:32
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::MachineBasicBlock::SkipPHIsLabelsAndDebug
iterator SkipPHIsLabelsAndDebug(iterator I, bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
Definition: MachineBasicBlock.cpp:221
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:405
raw_ostream.h
isEFLAGSLive
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
Definition: X86SpeculativeLoadHardening.cpp:1212
MachineFunction.h
X86InstrInfo.h
llvm::MachineInstrBundleIterator< MachineInstr >
isEFLAGSDefLive
static bool isEFLAGSDefLive(const MachineInstr &MI)
Definition: X86SpeculativeLoadHardening.cpp:1205
TargetRegisterInfo.h
llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:310
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:280
hasVulnerableLoad
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
Definition: X86SpeculativeLoadHardening.cpp:373
llvm::X86::COND_NE
@ COND_NE
Definition: X86BaseInfo.h:86
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38