LLVM  14.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1 //====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// Provide a pass which mitigates speculative execution attacks which operate
11 /// by speculating incorrectly past some predicate (a type check, bounds check,
12 /// or other condition) to reach a load with invalid inputs and leak the data
13 /// accessed by that load using a side channel out of the speculative domain.
14 ///
15 /// For details on the attacks, see the first variant in both the Project Zero
16 /// writeup and the Spectre paper:
17 /// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18 /// https://spectreattack.com/spectre.pdf
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #include "X86.h"
23 #include "X86InstrBuilder.h"
24 #include "X86InstrInfo.h"
25 #include "X86Subtarget.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/ScopeExit.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
50 #include "llvm/IR/DebugLoc.h"
51 #include "llvm/MC/MCSchedule.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Debug.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <iterator>
60 #include <utility>
61 
62 using namespace llvm;
63 
64 #define PASS_KEY "x86-slh"
65 #define DEBUG_TYPE PASS_KEY
66 
67 STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
68 STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
69 STATISTIC(NumAddrRegsHardened,
70  "Number of address mode used registers hardaned");
71 STATISTIC(NumPostLoadRegsHardened,
72  "Number of post-load register values hardened");
73 STATISTIC(NumCallsOrJumpsHardened,
74  "Number of calls or jumps requiring extra hardening");
75 STATISTIC(NumInstsInserted, "Number of instructions inserted");
76 STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
77 
79  "x86-speculative-load-hardening",
80  cl::desc("Force enable speculative load hardening"), cl::init(false),
81  cl::Hidden);
82 
84  PASS_KEY "-lfence",
85  cl::desc(
86  "Use LFENCE along each conditional edge to harden against speculative "
87  "loads rather than conditional movs and poisoned pointers."),
88  cl::init(false), cl::Hidden);
89 
91  PASS_KEY "-post-load",
92  cl::desc("Harden the value loaded *after* it is loaded by "
93  "flushing the loaded bits to 1. This is hard to do "
94  "in general but can be done easily for GPRs."),
95  cl::init(true), cl::Hidden);
96 
98  PASS_KEY "-fence-call-and-ret",
99  cl::desc("Use a full speculation fence to harden both call and ret edges "
100  "rather than a lighter weight mitigation."),
101  cl::init(false), cl::Hidden);
102 
104  PASS_KEY "-ip",
105  cl::desc("Harden interprocedurally by passing our state in and out of "
106  "functions in the high bits of the stack pointer."),
107  cl::init(true), cl::Hidden);
108 
109 static cl::opt<bool>
110  HardenLoads(PASS_KEY "-loads",
111  cl::desc("Sanitize loads from memory. When disable, no "
112  "significant security is provided."),
113  cl::init(true), cl::Hidden);
114 
116  PASS_KEY "-indirect",
117  cl::desc("Harden indirect calls and jumps against using speculatively "
118  "stored attacker controlled addresses. This is designed to "
119  "mitigate Spectre v1.2 style attacks."),
120  cl::init(true), cl::Hidden);
121 
122 namespace {
123 
124 class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
125 public:
126  X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
127 
128  StringRef getPassName() const override {
129  return "X86 speculative load hardening";
130  }
131  bool runOnMachineFunction(MachineFunction &MF) override;
132  void getAnalysisUsage(AnalysisUsage &AU) const override;
133 
134  /// Pass identification, replacement for typeid.
135  static char ID;
136 
137 private:
138  /// The information about a block's conditional terminators needed to trace
139  /// our predicate state through the exiting edges.
140  struct BlockCondInfo {
142 
143  // We mostly have one conditional branch, and in extremely rare cases have
144  // two. Three and more are so rare as to be unimportant for compile time.
146 
147  MachineInstr *UncondBr;
148  };
149 
150  /// Manages the predicate state traced through the program.
151  struct PredState {
152  unsigned InitialReg = 0;
153  unsigned PoisonReg = 0;
154 
155  const TargetRegisterClass *RC;
157 
158  PredState(MachineFunction &MF, const TargetRegisterClass *RC)
159  : RC(RC), SSA(MF) {}
160  };
161 
162  const X86Subtarget *Subtarget = nullptr;
163  MachineRegisterInfo *MRI = nullptr;
164  const X86InstrInfo *TII = nullptr;
165  const TargetRegisterInfo *TRI = nullptr;
166 
168 
169  void hardenEdgesWithLFENCE(MachineFunction &MF);
170 
171  SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
172 
174  tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
175 
176  void unfoldCallAndJumpLoads(MachineFunction &MF);
177 
179  tracePredStateThroughIndirectBranches(MachineFunction &MF);
180 
181  void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
182 
183  unsigned saveEFLAGS(MachineBasicBlock &MBB,
184  MachineBasicBlock::iterator InsertPt, DebugLoc Loc);
185  void restoreEFLAGS(MachineBasicBlock &MBB,
187  Register Reg);
188 
189  void mergePredStateIntoSP(MachineBasicBlock &MBB,
191  unsigned PredStateReg);
192  unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
194  DebugLoc Loc);
195 
196  void
197  hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
198  MachineOperand &IndexMO,
199  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
200  MachineInstr *
201  sinkPostLoadHardenedInst(MachineInstr &MI,
202  SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
203  bool canHardenRegister(Register Reg);
204  unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
206  DebugLoc Loc);
207  unsigned hardenPostLoad(MachineInstr &MI);
208  void hardenReturnInstr(MachineInstr &MI);
209  void tracePredStateThroughCall(MachineInstr &MI);
210  void hardenIndirectCallOrJumpInstr(
211  MachineInstr &MI,
212  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
213 };
214 
215 } // end anonymous namespace
216 
218 
219 void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
220  AnalysisUsage &AU) const {
222 }
223 
225  MachineBasicBlock &Succ, int SuccCount,
226  MachineInstr *Br, MachineInstr *&UncondBr,
227  const X86InstrInfo &TII) {
228  assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
229 
230  MachineFunction &MF = *MBB.getParent();
231 
233 
234  // We have to insert the new block immediately after the current one as we
235  // don't know what layout-successor relationships the successor has and we
236  // may not be able to (and generally don't want to) try to fix those up.
237  MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
238 
239  // Update the branch instruction if necessary.
240  if (Br) {
241  assert(Br->getOperand(0).getMBB() == &Succ &&
242  "Didn't start with the right target!");
243  Br->getOperand(0).setMBB(&NewMBB);
244 
245  // If this successor was reached through a branch rather than fallthrough,
246  // we might have *broken* fallthrough and so need to inject a new
247  // unconditional branch.
248  if (!UncondBr) {
249  MachineBasicBlock &OldLayoutSucc =
250  *std::next(MachineFunction::iterator(&NewMBB));
251  assert(MBB.isSuccessor(&OldLayoutSucc) &&
252  "Without an unconditional branch, the old layout successor should "
253  "be an actual successor!");
254  auto BrBuilder =
255  BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
256  // Update the unconditional branch now that we've added one.
257  UncondBr = &*BrBuilder;
258  }
259 
260  // Insert unconditional "jump Succ" instruction in the new block if
261  // necessary.
262  if (!NewMBB.isLayoutSuccessor(&Succ)) {
264  TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
265  }
266  } else {
267  assert(!UncondBr &&
268  "Cannot have a branchless successor and an unconditional branch!");
269  assert(NewMBB.isLayoutSuccessor(&Succ) &&
270  "A non-branch successor must have been a layout successor before "
271  "and now is a layout successor of the new block.");
272  }
273 
274  // If this is the only edge to the successor, we can just replace it in the
275  // CFG. Otherwise we need to add a new entry in the CFG for the new
276  // successor.
277  if (SuccCount == 1) {
278  MBB.replaceSuccessor(&Succ, &NewMBB);
279  } else {
280  MBB.splitSuccessor(&Succ, &NewMBB);
281  }
282 
283  // Hook up the edge from the new basic block to the old successor in the CFG.
284  NewMBB.addSuccessor(&Succ);
285 
286  // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
287  for (MachineInstr &MI : Succ) {
288  if (!MI.isPHI())
289  break;
290  for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
291  OpIdx += 2) {
292  MachineOperand &OpV = MI.getOperand(OpIdx);
293  MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
294  assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
295  if (OpMBB.getMBB() != &MBB)
296  continue;
297 
298  // If this is the last edge to the succesor, just replace MBB in the PHI
299  if (SuccCount == 1) {
300  OpMBB.setMBB(&NewMBB);
301  break;
302  }
303 
304  // Otherwise, append a new pair of operands for the new incoming edge.
305  MI.addOperand(MF, OpV);
306  MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
307  break;
308  }
309  }
310 
311  // Inherit live-ins from the successor
312  for (auto &LI : Succ.liveins())
313  NewMBB.addLiveIn(LI);
314 
315  LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
316  << Succ.getName() << "'.\n");
317  return NewMBB;
318 }
319 
320 /// Removing duplicate PHI operands to leave the PHI in a canonical and
321 /// predictable form.
322 ///
323 /// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
324 /// isn't what you might expect. We may have multiple entries in PHI nodes for
325 /// a single predecessor. This makes CFG-updating extremely complex, so here we
326 /// simplify all PHI nodes to a model even simpler than the IR's model: exactly
327 /// one entry per predecessor, regardless of how many edges there are.
330  SmallVector<int, 4> DupIndices;
331  for (auto &MBB : MF)
332  for (auto &MI : MBB) {
333  if (!MI.isPHI())
334  break;
335 
336  // First we scan the operands of the PHI looking for duplicate entries
337  // a particular predecessor. We retain the operand index of each duplicate
338  // entry found.
339  for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
340  OpIdx += 2)
341  if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
342  DupIndices.push_back(OpIdx);
343 
344  // Now walk the duplicate indices, removing both the block and value. Note
345  // that these are stored as a vector making this element-wise removal
346  // :w
347  // potentially quadratic.
348  //
349  // FIXME: It is really frustrating that we have to use a quadratic
350  // removal algorithm here. There should be a better way, but the use-def
351  // updates required make that impossible using the public API.
352  //
353  // Note that we have to process these backwards so that we don't
354  // invalidate other indices with each removal.
355  while (!DupIndices.empty()) {
356  int OpIdx = DupIndices.pop_back_val();
357  // Remove both the block and value operand, again in reverse order to
358  // preserve indices.
359  MI.RemoveOperand(OpIdx + 1);
360  MI.RemoveOperand(OpIdx);
361  }
362 
363  Preds.clear();
364  }
365 }
366 
367 /// Helper to scan a function for loads vulnerable to misspeculation that we
368 /// want to harden.
369 ///
370 /// We use this to avoid making changes to functions where there is nothing we
371 /// need to do to harden against misspeculation.
373  for (MachineBasicBlock &MBB : MF) {
374  for (MachineInstr &MI : MBB) {
375  // Loads within this basic block after an LFENCE are not at risk of
376  // speculatively executing with invalid predicates from prior control
377  // flow. So break out of this block but continue scanning the function.
378  if (MI.getOpcode() == X86::LFENCE)
379  break;
380 
381  // Looking for loads only.
382  if (!MI.mayLoad())
383  continue;
384 
385  // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
386  if (MI.getOpcode() == X86::MFENCE)
387  continue;
388 
389  // We found a load.
390  return true;
391  }
392  }
393 
394  // No loads found.
395  return false;
396 }
397 
398 bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
399  MachineFunction &MF) {
400  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
401  << " **********\n");
402 
403  // Only run if this pass is forced enabled or we detect the relevant function
404  // attribute requesting SLH.
406  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
407  return false;
408 
409  Subtarget = &MF.getSubtarget<X86Subtarget>();
410  MRI = &MF.getRegInfo();
411  TII = Subtarget->getInstrInfo();
412  TRI = Subtarget->getRegisterInfo();
413 
414  // FIXME: Support for 32-bit.
415  PS.emplace(MF, &X86::GR64_NOSPRegClass);
416 
417  if (MF.begin() == MF.end())
418  // Nothing to do for a degenerate empty function...
419  return false;
420 
421  // We support an alternative hardening technique based on a debug flag.
422  if (HardenEdgesWithLFENCE) {
423  hardenEdgesWithLFENCE(MF);
424  return true;
425  }
426 
427  // Create a dummy debug loc to use for all the generated code here.
428  DebugLoc Loc;
429 
430  MachineBasicBlock &Entry = *MF.begin();
431  auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
432 
433  // Do a quick scan to see if we have any checkable loads.
434  bool HasVulnerableLoad = hasVulnerableLoad(MF);
435 
436  // See if we have any conditional branching blocks that we will need to trace
437  // predicate state through.
438  SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
439 
440  // If we have no interesting conditions or loads, nothing to do here.
441  if (!HasVulnerableLoad && Infos.empty())
442  return true;
443 
444  // The poison value is required to be an all-ones value for many aspects of
445  // this mitigation.
446  const int PoisonVal = -1;
447  PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
448  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
449  .addImm(PoisonVal);
450  ++NumInstsInserted;
451 
452  // If we have loads being hardened and we've asked for call and ret edges to
453  // get a full fence-based mitigation, inject that fence.
454  if (HasVulnerableLoad && FenceCallAndRet) {
455  // We need to insert an LFENCE at the start of the function to suspend any
456  // incoming misspeculation from the caller. This helps two-fold: the caller
457  // may not have been protected as this code has been, and this code gets to
458  // not take any specific action to protect across calls.
459  // FIXME: We could skip this for functions which unconditionally return
460  // a constant.
461  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
462  ++NumInstsInserted;
463  ++NumLFENCEsInserted;
464  }
465 
466  // If we guarded the entry with an LFENCE and have no conditionals to protect
467  // in blocks, then we're done.
468  if (FenceCallAndRet && Infos.empty())
469  // We may have changed the function's code at this point to insert fences.
470  return true;
471 
472  // For every basic block in the function which can b
474  // Set up the predicate state by extracting it from the incoming stack
475  // pointer so we pick up any misspeculation in our caller.
476  PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
477  } else {
478  // Otherwise, just build the predicate state itself by zeroing a register
479  // as we don't need any initial state.
480  PS->InitialReg = MRI->createVirtualRegister(PS->RC);
481  Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
482  auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
483  PredStateSubReg);
484  ++NumInstsInserted;
485  MachineOperand *ZeroEFLAGSDefOp =
486  ZeroI->findRegisterDefOperand(X86::EFLAGS);
487  assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
488  "Must have an implicit def of EFLAGS!");
489  ZeroEFLAGSDefOp->setIsDead(true);
490  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
491  PS->InitialReg)
492  .addImm(0)
493  .addReg(PredStateSubReg)
494  .addImm(X86::sub_32bit);
495  }
496 
497  // We're going to need to trace predicate state throughout the function's
498  // CFG. Prepare for this by setting up our initial state of PHIs with unique
499  // predecessor entries and all the initial predicate state.
501 
502  // Track the updated values in an SSA updater to rewrite into SSA form at the
503  // end.
504  PS->SSA.Initialize(PS->InitialReg);
505  PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
506 
507  // Trace through the CFG.
508  auto CMovs = tracePredStateThroughCFG(MF, Infos);
509 
510  // We may also enter basic blocks in this function via exception handling
511  // control flow. Here, if we are hardening interprocedurally, we need to
512  // re-capture the predicate state from the throwing code. In the Itanium ABI,
513  // the throw will always look like a call to __cxa_throw and will have the
514  // predicate state in the stack pointer, so extract fresh predicate state from
515  // the stack pointer and make it available in SSA.
516  // FIXME: Handle non-itanium ABI EH models.
518  for (MachineBasicBlock &MBB : MF) {
519  assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
520  assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
521  assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
522  if (!MBB.isEHPad())
523  continue;
524  PS->SSA.AddAvailableValue(
525  &MBB,
526  extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
527  }
528  }
529 
531  // If we are going to harden calls and jumps we need to unfold their memory
532  // operands.
533  unfoldCallAndJumpLoads(MF);
534 
535  // Then we trace predicate state through the indirect branches.
536  auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
537  CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
538  }
539 
540  // Now that we have the predicate state available at the start of each block
541  // in the CFG, trace it through each block, hardening vulnerable instructions
542  // as we go.
543  tracePredStateThroughBlocksAndHarden(MF);
544 
545  // Now rewrite all the uses of the pred state using the SSA updater to insert
546  // PHIs connecting the state between blocks along the CFG edges.
547  for (MachineInstr *CMovI : CMovs)
548  for (MachineOperand &Op : CMovI->operands()) {
549  if (!Op.isReg() || Op.getReg() != PS->InitialReg)
550  continue;
551 
552  PS->SSA.RewriteUse(Op);
553  }
554 
555  LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
556  dbgs() << "\n"; MF.verify(this));
557  return true;
558 }
559 
560 /// Implements the naive hardening approach of putting an LFENCE after every
561 /// potentially mis-predicted control flow construct.
562 ///
563 /// We include this as an alternative mostly for the purpose of comparison. The
564 /// performance impact of this is expected to be extremely severe and not
565 /// practical for any real-world users.
566 void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
567  MachineFunction &MF) {
568  // First, we scan the function looking for blocks that are reached along edges
569  // that we might want to harden.
571  for (MachineBasicBlock &MBB : MF) {
572  // If there are no or only one successor, nothing to do here.
573  if (MBB.succ_size() <= 1)
574  continue;
575 
576  // Skip blocks unless their terminators start with a branch. Other
577  // terminators don't seem interesting for guarding against misspeculation.
578  auto TermIt = MBB.getFirstTerminator();
579  if (TermIt == MBB.end() || !TermIt->isBranch())
580  continue;
581 
582  // Add all the non-EH-pad succossors to the blocks we want to harden. We
583  // skip EH pads because there isn't really a condition of interest on
584  // entering.
585  for (MachineBasicBlock *SuccMBB : MBB.successors())
586  if (!SuccMBB->isEHPad())
587  Blocks.insert(SuccMBB);
588  }
589 
590  for (MachineBasicBlock *MBB : Blocks) {
591  auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
592  BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
593  ++NumInstsInserted;
594  ++NumLFENCEsInserted;
595  }
596 }
597 
599 X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
601 
602  // Walk the function and build up a summary for each block's conditions that
603  // we need to trace through.
604  for (MachineBasicBlock &MBB : MF) {
605  // If there are no or only one successor, nothing to do here.
606  if (MBB.succ_size() <= 1)
607  continue;
608 
609  // We want to reliably handle any conditional branch terminators in the
610  // MBB, so we manually analyze the branch. We can handle all of the
611  // permutations here, including ones that analyze branch cannot.
612  //
613  // The approach is to walk backwards across the terminators, resetting at
614  // any unconditional non-indirect branch, and track all conditional edges
615  // to basic blocks as well as the fallthrough or unconditional successor
616  // edge. For each conditional edge, we track the target and the opposite
617  // condition code in order to inject a "no-op" cmov into that successor
618  // that will harden the predicate. For the fallthrough/unconditional
619  // edge, we inject a separate cmov for each conditional branch with
620  // matching condition codes. This effectively implements an "and" of the
621  // condition flags, even if there isn't a single condition flag that would
622  // directly implement that. We don't bother trying to optimize either of
623  // these cases because if such an optimization is possible, LLVM should
624  // have optimized the conditional *branches* in that way already to reduce
625  // instruction count. This late, we simply assume the minimal number of
626  // branch instructions is being emitted and use that to guide our cmov
627  // insertion.
628 
629  BlockCondInfo Info = {&MBB, {}, nullptr};
630 
631  // Now walk backwards through the terminators and build up successors they
632  // reach and the conditions.
633  for (MachineInstr &MI : llvm::reverse(MBB)) {
634  // Once we've handled all the terminators, we're done.
635  if (!MI.isTerminator())
636  break;
637 
638  // If we see a non-branch terminator, we can't handle anything so bail.
639  if (!MI.isBranch()) {
640  Info.CondBrs.clear();
641  break;
642  }
643 
644  // If we see an unconditional branch, reset our state, clear any
645  // fallthrough, and set this is the "else" successor.
646  if (MI.getOpcode() == X86::JMP_1) {
647  Info.CondBrs.clear();
648  Info.UncondBr = &MI;
649  continue;
650  }
651 
652  // If we get an invalid condition, we have an indirect branch or some
653  // other unanalyzable "fallthrough" case. We model this as a nullptr for
654  // the destination so we can still guard any conditional successors.
655  // Consider code sequences like:
656  // ```
657  // jCC L1
658  // jmpq *%rax
659  // ```
660  // We still want to harden the edge to `L1`.
662  Info.CondBrs.clear();
663  Info.UncondBr = &MI;
664  continue;
665  }
666 
667  // We have a vanilla conditional branch, add it to our list.
668  Info.CondBrs.push_back(&MI);
669  }
670  if (Info.CondBrs.empty()) {
671  ++NumBranchesUntraced;
672  LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
673  MBB.dump());
674  continue;
675  }
676 
677  Infos.push_back(Info);
678  }
679 
680  return Infos;
681 }
682 
683 /// Trace the predicate state through the CFG, instrumenting each conditional
684 /// branch such that misspeculation through an edge will poison the predicate
685 /// state.
686 ///
687 /// Returns the list of inserted CMov instructions so that they can have their
688 /// uses of the predicate state rewritten into proper SSA form once it is
689 /// complete.
691 X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
693  // Collect the inserted cmov instructions so we can rewrite their uses of the
694  // predicate state into SSA form.
696 
697  // Now walk all of the basic blocks looking for ones that end in conditional
698  // jumps where we need to update this register along each edge.
699  for (const BlockCondInfo &Info : Infos) {
700  MachineBasicBlock &MBB = *Info.MBB;
701  const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
702  MachineInstr *UncondBr = Info.UncondBr;
703 
704  LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
705  << "\n");
706  ++NumCondBranchesTraced;
707 
708  // Compute the non-conditional successor as either the target of any
709  // unconditional branch or the layout successor.
710  MachineBasicBlock *UncondSucc =
711  UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
712  ? UncondBr->getOperand(0).getMBB()
713  : nullptr)
714  : &*std::next(MachineFunction::iterator(&MBB));
715 
716  // Count how many edges there are to any given successor.
718  if (UncondSucc)
719  ++SuccCounts[UncondSucc];
720  for (auto *CondBr : CondBrs)
721  ++SuccCounts[CondBr->getOperand(0).getMBB()];
722 
723  // A lambda to insert cmov instructions into a block checking all of the
724  // condition codes in a sequence.
725  auto BuildCheckingBlockForSuccAndConds =
726  [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
727  MachineInstr *Br, MachineInstr *&UncondBr,
728  ArrayRef<X86::CondCode> Conds) {
729  // First, we split the edge to insert the checking block into a safe
730  // location.
731  auto &CheckingMBB =
732  (SuccCount == 1 && Succ.pred_size() == 1)
733  ? Succ
734  : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
735 
736  bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
737  if (!LiveEFLAGS)
738  CheckingMBB.addLiveIn(X86::EFLAGS);
739 
740  // Now insert the cmovs to implement the checks.
741  auto InsertPt = CheckingMBB.begin();
742  assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
743  "Should never have a PHI in the initial checking block as it "
744  "always has a single predecessor!");
745 
746  // We will wire each cmov to each other, but need to start with the
747  // incoming pred state.
748  unsigned CurStateReg = PS->InitialReg;
749 
750  for (X86::CondCode Cond : Conds) {
751  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
752  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
753 
754  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
755  // Note that we intentionally use an empty debug location so that
756  // this picks up the preceding location.
757  auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
758  TII->get(CMovOp), UpdatedStateReg)
759  .addReg(CurStateReg)
760  .addReg(PS->PoisonReg)
761  .addImm(Cond);
762  // If this is the last cmov and the EFLAGS weren't originally
763  // live-in, mark them as killed.
764  if (!LiveEFLAGS && Cond == Conds.back())
765  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
766 
767  ++NumInstsInserted;
768  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
769  dbgs() << "\n");
770 
771  // The first one of the cmovs will be using the top level
772  // `PredStateReg` and need to get rewritten into SSA form.
773  if (CurStateReg == PS->InitialReg)
774  CMovs.push_back(&*CMovI);
775 
776  // The next cmov should start from this one's def.
777  CurStateReg = UpdatedStateReg;
778  }
779 
780  // And put the last one into the available values for SSA form of our
781  // predicate state.
782  PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
783  };
784 
785  std::vector<X86::CondCode> UncondCodeSeq;
786  for (auto *CondBr : CondBrs) {
787  MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
788  int &SuccCount = SuccCounts[&Succ];
789 
792  UncondCodeSeq.push_back(Cond);
793 
794  BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
795  {InvCond});
796 
797  // Decrement the successor count now that we've split one of the edges.
798  // We need to keep the count of edges to the successor accurate in order
799  // to know above when to *replace* the successor in the CFG vs. just
800  // adding the new successor.
801  --SuccCount;
802  }
803 
804  // Since we may have split edges and changed the number of successors,
805  // normalize the probabilities. This avoids doing it each time we split an
806  // edge.
808 
809  // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
810  // need to intersect the other condition codes. We can do this by just
811  // doing a cmov for each one.
812  if (!UncondSucc)
813  // If we have no fallthrough to protect (perhaps it is an indirect jump?)
814  // just skip this and continue.
815  continue;
816 
817  assert(SuccCounts[UncondSucc] == 1 &&
818  "We should never have more than one edge to the unconditional "
819  "successor at this point because every other edge must have been "
820  "split above!");
821 
822  // Sort and unique the codes to minimize them.
823  llvm::sort(UncondCodeSeq);
824  UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()),
825  UncondCodeSeq.end());
826 
827  // Build a checking version of the successor.
828  BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
829  UncondBr, UncondBr, UncondCodeSeq);
830  }
831 
832  return CMovs;
833 }
834 
835 /// Compute the register class for the unfolded load.
836 ///
837 /// FIXME: This should probably live in X86InstrInfo, potentially by adding
838 /// a way to unfold into a newly created vreg rather than requiring a register
839 /// input.
840 static const TargetRegisterClass *
842  unsigned Opcode) {
843  unsigned Index;
844  unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
845  Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
846  const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
847  return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
848 }
849 
850 void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
851  MachineFunction &MF) {
852  for (MachineBasicBlock &MBB : MF)
853  // We use make_early_inc_range here so we can remove instructions if needed
854  // without disturbing the iteration.
856  // Must either be a call or a branch.
857  if (!MI.isCall() && !MI.isBranch())
858  continue;
859  // We only care about loading variants of these instructions.
860  if (!MI.mayLoad())
861  continue;
862 
863  switch (MI.getOpcode()) {
864  default: {
865  LLVM_DEBUG(
866  dbgs() << "ERROR: Found an unexpected loading branch or call "
867  "instruction:\n";
868  MI.dump(); dbgs() << "\n");
869  report_fatal_error("Unexpected loading branch or call!");
870  }
871 
872  case X86::FARCALL16m:
873  case X86::FARCALL32m:
874  case X86::FARCALL64m:
875  case X86::FARJMP16m:
876  case X86::FARJMP32m:
877  case X86::FARJMP64m:
878  // We cannot mitigate far jumps or calls, but we also don't expect them
879  // to be vulnerable to Spectre v1.2 style attacks.
880  continue;
881 
882  case X86::CALL16m:
883  case X86::CALL16m_NT:
884  case X86::CALL32m:
885  case X86::CALL32m_NT:
886  case X86::CALL64m:
887  case X86::CALL64m_NT:
888  case X86::JMP16m:
889  case X86::JMP16m_NT:
890  case X86::JMP32m:
891  case X86::JMP32m_NT:
892  case X86::JMP64m:
893  case X86::JMP64m_NT:
894  case X86::TAILJMPm64:
895  case X86::TAILJMPm64_REX:
896  case X86::TAILJMPm:
897  case X86::TCRETURNmi64:
898  case X86::TCRETURNmi: {
899  // Use the generic unfold logic now that we know we're dealing with
900  // expected instructions.
901  // FIXME: We don't have test coverage for all of these!
902  auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
903  if (!UnfoldedRC) {
904  LLVM_DEBUG(dbgs()
905  << "ERROR: Unable to unfold load from instruction:\n";
906  MI.dump(); dbgs() << "\n");
907  report_fatal_error("Unable to unfold load!");
908  }
909  Register Reg = MRI->createVirtualRegister(UnfoldedRC);
911  // If we were able to compute an unfolded reg class, any failure here
912  // is just a programming error so just assert.
913  bool Unfolded =
914  TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
915  /*UnfoldStore*/ false, NewMIs);
916  (void)Unfolded;
917  assert(Unfolded &&
918  "Computed unfolded register class but failed to unfold");
919  // Now stitch the new instructions into place and erase the old one.
920  for (auto *NewMI : NewMIs)
921  MBB.insert(MI.getIterator(), NewMI);
922 
923  // Update the call site info.
924  if (MI.isCandidateForCallSiteEntry())
925  MF.eraseCallSiteInfo(&MI);
926 
927  MI.eraseFromParent();
928  LLVM_DEBUG({
929  dbgs() << "Unfolded load successfully into:\n";
930  for (auto *NewMI : NewMIs) {
931  NewMI->dump();
932  dbgs() << "\n";
933  }
934  });
935  continue;
936  }
937  }
938  llvm_unreachable("Escaped switch with default!");
939  }
940 }
941 
942 /// Trace the predicate state through indirect branches, instrumenting them to
943 /// poison the state if a target is reached that does not match the expected
944 /// target.
945 ///
946 /// This is designed to mitigate Spectre variant 1 attacks where an indirect
947 /// branch is trained to predict a particular target and then mispredicts that
948 /// target in a way that can leak data. Despite using an indirect branch, this
949 /// is really a variant 1 style attack: it does not steer execution to an
950 /// arbitrary or attacker controlled address, and it does not require any
951 /// special code executing next to the victim. This attack can also be mitigated
952 /// through retpolines, but those require either replacing indirect branches
953 /// with conditional direct branches or lowering them through a device that
954 /// blocks speculation. This mitigation can replace these retpoline-style
955 /// mitigations for jump tables and other indirect branches within a function
956 /// when variant 2 isn't a risk while allowing limited speculation. Indirect
957 /// calls, however, cannot be mitigated through this technique without changing
958 /// the ABI in a fundamental way.
960 X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
961  MachineFunction &MF) {
962  // We use the SSAUpdater to insert PHI nodes for the target addresses of
963  // indirect branches. We don't actually need the full power of the SSA updater
964  // in this particular case as we always have immediately available values, but
965  // this avoids us having to re-implement the PHI construction logic.
966  MachineSSAUpdater TargetAddrSSA(MF);
967  TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
968 
969  // Track which blocks were terminated with an indirect branch.
970  SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
971 
972  // We need to know what blocks end up reached via indirect branches. We
973  // expect this to be a subset of those whose address is taken and so track it
974  // directly via the CFG.
975  SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
976 
977  // Walk all the blocks which end in an indirect branch and make the
978  // target address available.
979  for (MachineBasicBlock &MBB : MF) {
980  // Find the last terminator.
981  auto MII = MBB.instr_rbegin();
982  while (MII != MBB.instr_rend() && MII->isDebugInstr())
983  ++MII;
984  if (MII == MBB.instr_rend())
985  continue;
986  MachineInstr &TI = *MII;
987  if (!TI.isTerminator() || !TI.isBranch())
988  // No terminator or non-branch terminator.
989  continue;
990 
991  unsigned TargetReg;
992 
993  switch (TI.getOpcode()) {
994  default:
995  // Direct branch or conditional branch (leading to fallthrough).
996  continue;
997 
998  case X86::FARJMP16m:
999  case X86::FARJMP32m:
1000  case X86::FARJMP64m:
1001  // We cannot mitigate far jumps or calls, but we also don't expect them
1002  // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1003  continue;
1004 
1005  case X86::JMP16m:
1006  case X86::JMP16m_NT:
1007  case X86::JMP32m:
1008  case X86::JMP32m_NT:
1009  case X86::JMP64m:
1010  case X86::JMP64m_NT:
1011  // Mostly as documentation.
1012  report_fatal_error("Memory operand jumps should have been unfolded!");
1013 
1014  case X86::JMP16r:
1016  "Support for 16-bit indirect branches is not implemented.");
1017  case X86::JMP32r:
1019  "Support for 32-bit indirect branches is not implemented.");
1020 
1021  case X86::JMP64r:
1022  TargetReg = TI.getOperand(0).getReg();
1023  }
1024 
1025  // We have definitely found an indirect branch. Verify that there are no
1026  // preceding conditional branches as we don't yet support that.
1027  if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1028  return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1029  })) {
1030  LLVM_DEBUG({
1031  dbgs() << "ERROR: Found other terminators in a block with an indirect "
1032  "branch! This is not yet supported! Terminator sequence:\n";
1033  for (MachineInstr &MI : MBB.terminators()) {
1034  MI.dump();
1035  dbgs() << '\n';
1036  }
1037  });
1038  report_fatal_error("Unimplemented terminator sequence!");
1039  }
1040 
1041  // Make the target register an available value for this block.
1042  TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1043  IndirectTerminatedMBBs.insert(&MBB);
1044 
1045  // Add all the successors to our target candidates.
1046  for (MachineBasicBlock *Succ : MBB.successors())
1047  IndirectTargetMBBs.insert(Succ);
1048  }
1049 
1050  // Keep track of the cmov instructions we insert so we can return them.
1052 
1053  // If we didn't find any indirect branches with targets, nothing to do here.
1054  if (IndirectTargetMBBs.empty())
1055  return CMovs;
1056 
1057  // We found indirect branches and targets that need to be instrumented to
1058  // harden loads within them. Walk the blocks of the function (to get a stable
1059  // ordering) and instrument each target of an indirect branch.
1060  for (MachineBasicBlock &MBB : MF) {
1061  // Skip the blocks that aren't candidate targets.
1062  if (!IndirectTargetMBBs.count(&MBB))
1063  continue;
1064 
1065  // We don't expect EH pads to ever be reached via an indirect branch. If
1066  // this is desired for some reason, we could simply skip them here rather
1067  // than asserting.
1068  assert(!MBB.isEHPad() &&
1069  "Unexpected EH pad as target of an indirect branch!");
1070 
1071  // We should never end up threading EFLAGS into a block to harden
1072  // conditional jumps as there would be an additional successor via the
1073  // indirect branch. As a consequence, all such edges would be split before
1074  // reaching here, and the inserted block will handle the EFLAGS-based
1075  // hardening.
1076  assert(!MBB.isLiveIn(X86::EFLAGS) &&
1077  "Cannot check within a block that already has live-in EFLAGS!");
1078 
1079  // We can't handle having non-indirect edges into this block unless this is
1080  // the only successor and we can synthesize the necessary target address.
1081  for (MachineBasicBlock *Pred : MBB.predecessors()) {
1082  // If we've already handled this by extracting the target directly,
1083  // nothing to do.
1084  if (IndirectTerminatedMBBs.count(Pred))
1085  continue;
1086 
1087  // Otherwise, we have to be the only successor. We generally expect this
1088  // to be true as conditional branches should have had a critical edge
1089  // split already. We don't however need to worry about EH pad successors
1090  // as they'll happily ignore the target and their hardening strategy is
1091  // resilient to all ways in which they could be reached speculatively.
1092  if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1093  return Succ->isEHPad() || Succ == &MBB;
1094  })) {
1095  LLVM_DEBUG({
1096  dbgs() << "ERROR: Found conditional entry to target of indirect "
1097  "branch!\n";
1098  Pred->dump();
1099  MBB.dump();
1100  });
1101  report_fatal_error("Cannot harden a conditional entry to a target of "
1102  "an indirect branch!");
1103  }
1104 
1105  // Now we need to compute the address of this block and install it as a
1106  // synthetic target in the predecessor. We do this at the bottom of the
1107  // predecessor.
1108  auto InsertPt = Pred->getFirstTerminator();
1109  Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1110  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1111  !Subtarget->isPositionIndependent()) {
1112  // Directly materialize it into an immediate.
1113  auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1114  TII->get(X86::MOV64ri32), TargetReg)
1115  .addMBB(&MBB);
1116  ++NumInstsInserted;
1117  (void)AddrI;
1118  LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1119  dbgs() << "\n");
1120  } else {
1121  auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1122  TargetReg)
1123  .addReg(/*Base*/ X86::RIP)
1124  .addImm(/*Scale*/ 1)
1125  .addReg(/*Index*/ 0)
1126  .addMBB(&MBB)
1127  .addReg(/*Segment*/ 0);
1128  ++NumInstsInserted;
1129  (void)AddrI;
1130  LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1131  dbgs() << "\n");
1132  }
1133  // And make this available.
1134  TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1135  }
1136 
1137  // Materialize the needed SSA value of the target. Note that we need the
1138  // middle of the block as this block might at the bottom have an indirect
1139  // branch back to itself. We can do this here because at this point, every
1140  // predecessor of this block has an available value. This is basically just
1141  // automating the construction of a PHI node for this target.
1142  unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1143 
1144  // Insert a comparison of the incoming target register with this block's
1145  // address. This also requires us to mark the block as having its address
1146  // taken explicitly.
1148  auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1149  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1150  !Subtarget->isPositionIndependent()) {
1151  // Check directly against a relocated immediate when we can.
1152  auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1153  .addReg(TargetReg, RegState::Kill)
1154  .addMBB(&MBB);
1155  ++NumInstsInserted;
1156  (void)CheckI;
1157  LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1158  } else {
1159  // Otherwise compute the address into a register first.
1160  Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1161  auto AddrI =
1162  BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1163  .addReg(/*Base*/ X86::RIP)
1164  .addImm(/*Scale*/ 1)
1165  .addReg(/*Index*/ 0)
1166  .addMBB(&MBB)
1167  .addReg(/*Segment*/ 0);
1168  ++NumInstsInserted;
1169  (void)AddrI;
1170  LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1171  auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1172  .addReg(TargetReg, RegState::Kill)
1173  .addReg(AddrReg, RegState::Kill);
1174  ++NumInstsInserted;
1175  (void)CheckI;
1176  LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1177  }
1178 
1179  // Now cmov over the predicate if the comparison wasn't equal.
1180  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1181  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1182  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1183  auto CMovI =
1184  BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1185  .addReg(PS->InitialReg)
1186  .addReg(PS->PoisonReg)
1187  .addImm(X86::COND_NE);
1188  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
1189  ++NumInstsInserted;
1190  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1191  CMovs.push_back(&*CMovI);
1192 
1193  // And put the new value into the available values for SSA form of our
1194  // predicate state.
1195  PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1196  }
1197 
1198  // Return all the newly inserted cmov instructions of the predicate state.
1199  return CMovs;
1200 }
1201 
1202 // Returns true if the MI has EFLAGS as a register def operand and it's live,
1203 // otherwise it returns false
1204 static bool isEFLAGSDefLive(const MachineInstr &MI) {
1205  if (const MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1206  return !DefOp->isDead();
1207  }
1208  return false;
1209 }
1210 
1212  const TargetRegisterInfo &TRI) {
1213  // Check if EFLAGS are alive by seeing if there is a def of them or they
1214  // live-in, and then seeing if that def is in turn used.
1216  if (MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1217  // If the def is dead, then EFLAGS is not live.
1218  if (DefOp->isDead())
1219  return false;
1220 
1221  // Otherwise we've def'ed it, and it is live.
1222  return true;
1223  }
1224  // While at this instruction, also check if we use and kill EFLAGS
1225  // which means it isn't live.
1226  if (MI.killsRegister(X86::EFLAGS, &TRI))
1227  return false;
1228  }
1229 
1230  // If we didn't find anything conclusive (neither definitely alive or
1231  // definitely dead) return whether it lives into the block.
1232  return MBB.isLiveIn(X86::EFLAGS);
1233 }
1234 
1235 /// Trace the predicate state through each of the blocks in the function,
1236 /// hardening everything necessary along the way.
1237 ///
1238 /// We call this routine once the initial predicate state has been established
1239 /// for each basic block in the function in the SSA updater. This routine traces
1240 /// it through the instructions within each basic block, and for non-returning
1241 /// blocks informs the SSA updater about the final state that lives out of the
1242 /// block. Along the way, it hardens any vulnerable instruction using the
1243 /// currently valid predicate state. We have to do these two things together
1244 /// because the SSA updater only works across blocks. Within a block, we track
1245 /// the current predicate state directly and update it as it changes.
1246 ///
1247 /// This operates in two passes over each block. First, we analyze the loads in
1248 /// the block to determine which strategy will be used to harden them: hardening
1249 /// the address or hardening the loaded value when loaded into a register
1250 /// amenable to hardening. We have to process these first because the two
1251 /// strategies may interact -- later hardening may change what strategy we wish
1252 /// to use. We also will analyze data dependencies between loads and avoid
1253 /// hardening those loads that are data dependent on a load with a hardened
1254 /// address. We also skip hardening loads already behind an LFENCE as that is
1255 /// sufficient to harden them against misspeculation.
1256 ///
1257 /// Second, we actively trace the predicate state through the block, applying
1258 /// the hardening steps we determined necessary in the first pass as we go.
1259 ///
1260 /// These two passes are applied to each basic block. We operate one block at a
1261 /// time to simplify reasoning about reachability and sequencing.
1262 void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
1263  MachineFunction &MF) {
1264  SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1265  SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1266 
1267  SmallSet<unsigned, 16> HardenedAddrRegs;
1268 
1269  SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg;
1270 
1271  // Track the set of load-dependent registers through the basic block. Because
1272  // the values of these registers have an existing data dependency on a loaded
1273  // value which we would have checked, we can omit any checks on them.
1274  SparseBitVector<> LoadDepRegs;
1275 
1276  for (MachineBasicBlock &MBB : MF) {
1277  // The first pass over the block: collect all the loads which can have their
1278  // loaded value hardened and all the loads that instead need their address
1279  // hardened. During this walk we propagate load dependence for address
1280  // hardened loads and also look for LFENCE to stop hardening wherever
1281  // possible. When deciding whether or not to harden the loaded value or not,
1282  // we check to see if any registers used in the address will have been
1283  // hardened at this point and if so, harden any remaining address registers
1284  // as that often successfully re-uses hardened addresses and minimizes
1285  // instructions.
1286  //
1287  // FIXME: We should consider an aggressive mode where we continue to keep as
1288  // many loads value hardened even when some address register hardening would
1289  // be free (due to reuse).
1290  //
1291  // Note that we only need this pass if we are actually hardening loads.
1292  if (HardenLoads)
1293  for (MachineInstr &MI : MBB) {
1294  // We naively assume that all def'ed registers of an instruction have
1295  // a data dependency on all of their operands.
1296  // FIXME: Do a more careful analysis of x86 to build a conservative
1297  // model here.
1298  if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1299  return Op.isReg() && LoadDepRegs.test(Op.getReg());
1300  }))
1301  for (MachineOperand &Def : MI.defs())
1302  if (Def.isReg())
1303  LoadDepRegs.set(Def.getReg());
1304 
1305  // Both Intel and AMD are guiding that they will change the semantics of
1306  // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1307  // no more need to guard things in this block.
1308  if (MI.getOpcode() == X86::LFENCE)
1309  break;
1310 
1311  // If this instruction cannot load, nothing to do.
1312  if (!MI.mayLoad())
1313  continue;
1314 
1315  // Some instructions which "load" are trivially safe or unimportant.
1316  if (MI.getOpcode() == X86::MFENCE)
1317  continue;
1318 
1319  // Extract the memory operand information about this instruction.
1320  // FIXME: This doesn't handle loading pseudo instructions which we often
1321  // could handle with similarly generic logic. We probably need to add an
1322  // MI-layer routine similar to the MC-layer one we use here which maps
1323  // pseudos much like this maps real instructions.
1324  const MCInstrDesc &Desc = MI.getDesc();
1325  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1326  if (MemRefBeginIdx < 0) {
1327  LLVM_DEBUG(dbgs()
1328  << "WARNING: unable to harden loading instruction: ";
1329  MI.dump());
1330  continue;
1331  }
1332 
1333  MemRefBeginIdx += X86II::getOperandBias(Desc);
1334 
1335  MachineOperand &BaseMO =
1336  MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1337  MachineOperand &IndexMO =
1338  MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1339 
1340  // If we have at least one (non-frame-index, non-RIP) register operand,
1341  // and neither operand is load-dependent, we need to check the load.
1342  unsigned BaseReg = 0, IndexReg = 0;
1343  if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1344  BaseMO.getReg() != X86::NoRegister)
1345  BaseReg = BaseMO.getReg();
1346  if (IndexMO.getReg() != X86::NoRegister)
1347  IndexReg = IndexMO.getReg();
1348 
1349  if (!BaseReg && !IndexReg)
1350  // No register operands!
1351  continue;
1352 
1353  // If any register operand is dependent, this load is dependent and we
1354  // needn't check it.
1355  // FIXME: Is this true in the case where we are hardening loads after
1356  // they complete? Unclear, need to investigate.
1357  if ((BaseReg && LoadDepRegs.test(BaseReg)) ||
1358  (IndexReg && LoadDepRegs.test(IndexReg)))
1359  continue;
1360 
1361  // If post-load hardening is enabled, this load is compatible with
1362  // post-load hardening, and we aren't already going to harden one of the
1363  // address registers, queue it up to be hardened post-load. Notably,
1364  // even once hardened this won't introduce a useful dependency that
1365  // could prune out subsequent loads.
1367  !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1368  MI.getOperand(0).isReg() &&
1369  canHardenRegister(MI.getOperand(0).getReg()) &&
1370  !HardenedAddrRegs.count(BaseReg) &&
1371  !HardenedAddrRegs.count(IndexReg)) {
1372  HardenPostLoad.insert(&MI);
1373  HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1374  continue;
1375  }
1376 
1377  // Record this instruction for address hardening and record its register
1378  // operands as being address-hardened.
1379  HardenLoadAddr.insert(&MI);
1380  if (BaseReg)
1381  HardenedAddrRegs.insert(BaseReg);
1382  if (IndexReg)
1383  HardenedAddrRegs.insert(IndexReg);
1384 
1385  for (MachineOperand &Def : MI.defs())
1386  if (Def.isReg())
1387  LoadDepRegs.set(Def.getReg());
1388  }
1389 
1390  // Now re-walk the instructions in the basic block, and apply whichever
1391  // hardening strategy we have elected. Note that we do this in a second
1392  // pass specifically so that we have the complete set of instructions for
1393  // which we will do post-load hardening and can defer it in certain
1394  // circumstances.
1395  for (MachineInstr &MI : MBB) {
1396  if (HardenLoads) {
1397  // We cannot both require hardening the def of a load and its address.
1398  assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1399  "Requested to harden both the address and def of a load!");
1400 
1401  // Check if this is a load whose address needs to be hardened.
1402  if (HardenLoadAddr.erase(&MI)) {
1403  const MCInstrDesc &Desc = MI.getDesc();
1404  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1405  assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1406 
1407  MemRefBeginIdx += X86II::getOperandBias(Desc);
1408 
1409  MachineOperand &BaseMO =
1410  MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1411  MachineOperand &IndexMO =
1412  MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1413  hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1414  continue;
1415  }
1416 
1417  // Test if this instruction is one of our post load instructions (and
1418  // remove it from the set if so).
1419  if (HardenPostLoad.erase(&MI)) {
1420  assert(!MI.isCall() && "Must not try to post-load harden a call!");
1421 
1422  // If this is a data-invariant load and there is no EFLAGS
1423  // interference, we want to try and sink any hardening as far as
1424  // possible.
1426  // Sink the instruction we'll need to harden as far as we can down
1427  // the graph.
1428  MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1429 
1430  // If we managed to sink this instruction, update everything so we
1431  // harden that instruction when we reach it in the instruction
1432  // sequence.
1433  if (SunkMI != &MI) {
1434  // If in sinking there was no instruction needing to be hardened,
1435  // we're done.
1436  if (!SunkMI)
1437  continue;
1438 
1439  // Otherwise, add this to the set of defs we harden.
1440  HardenPostLoad.insert(SunkMI);
1441  continue;
1442  }
1443  }
1444 
1445  unsigned HardenedReg = hardenPostLoad(MI);
1446 
1447  // Mark the resulting hardened register as such so we don't re-harden.
1448  AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1449 
1450  continue;
1451  }
1452 
1453  // Check for an indirect call or branch that may need its input hardened
1454  // even if we couldn't find the specific load used, or were able to
1455  // avoid hardening it for some reason. Note that here we cannot break
1456  // out afterward as we may still need to handle any call aspect of this
1457  // instruction.
1458  if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1459  hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1460  }
1461 
1462  // After we finish hardening loads we handle interprocedural hardening if
1463  // enabled and relevant for this instruction.
1465  continue;
1466  if (!MI.isCall() && !MI.isReturn())
1467  continue;
1468 
1469  // If this is a direct return (IE, not a tail call) just directly harden
1470  // it.
1471  if (MI.isReturn() && !MI.isCall()) {
1472  hardenReturnInstr(MI);
1473  continue;
1474  }
1475 
1476  // Otherwise we have a call. We need to handle transferring the predicate
1477  // state into a call and recovering it after the call returns (unless this
1478  // is a tail call).
1479  assert(MI.isCall() && "Should only reach here for calls!");
1480  tracePredStateThroughCall(MI);
1481  }
1482 
1483  HardenPostLoad.clear();
1484  HardenLoadAddr.clear();
1485  HardenedAddrRegs.clear();
1486  AddrRegToHardenedReg.clear();
1487 
1488  // Currently, we only track data-dependent loads within a basic block.
1489  // FIXME: We should see if this is necessary or if we could be more
1490  // aggressive here without opening up attack avenues.
1491  LoadDepRegs.clear();
1492  }
1493 }
1494 
1495 /// Save EFLAGS into the returned GPR. This can in turn be restored with
1496 /// `restoreEFLAGS`.
1497 ///
1498 /// Note that LLVM can only lower very simple patterns of saved and restored
1499 /// EFLAGS registers. The restore should always be within the same basic block
1500 /// as the save so that no PHI nodes are inserted.
1501 unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
1503  DebugLoc Loc) {
1504  // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1505  // what instruction selection does.
1506  Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1507  // We directly copy the FLAGS register and rely on later lowering to clean
1508  // this up into the appropriate setCC instructions.
1509  BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1510  ++NumInstsInserted;
1511  return Reg;
1512 }
1513 
1514 /// Restore EFLAGS from the provided GPR. This should be produced by
1515 /// `saveEFLAGS`.
1516 ///
1517 /// This must be done within the same basic block as the save in order to
1518 /// reliably lower.
1519 void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1521  Register Reg) {
1522  BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1523  ++NumInstsInserted;
1524 }
1525 
1526 /// Takes the current predicate state (in a register) and merges it into the
1527 /// stack pointer. The state is essentially a single bit, but we merge this in
1528 /// a way that won't form non-canonical pointers and also will be preserved
1529 /// across normal stack adjustments.
1530 void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1532  unsigned PredStateReg) {
1533  Register TmpReg = MRI->createVirtualRegister(PS->RC);
1534  // FIXME: This hard codes a shift distance based on the number of bits needed
1535  // to stay canonical on 64-bit. We should compute this somehow and support
1536  // 32-bit as part of that.
1537  auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1538  .addReg(PredStateReg, RegState::Kill)
1539  .addImm(47);
1540  ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1541  ++NumInstsInserted;
1542  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1543  .addReg(X86::RSP)
1544  .addReg(TmpReg, RegState::Kill);
1545  OrI->addRegisterDead(X86::EFLAGS, TRI);
1546  ++NumInstsInserted;
1547 }
1548 
1549 /// Extracts the predicate state stored in the high bits of the stack pointer.
1550 unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1552  DebugLoc Loc) {
1553  Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1554  Register TmpReg = MRI->createVirtualRegister(PS->RC);
1555 
1556  // We know that the stack pointer will have any preserved predicate state in
1557  // its high bit. We just want to smear this across the other bits. Turns out,
1558  // this is exactly what an arithmetic right shift does.
1559  BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1560  .addReg(X86::RSP);
1561  auto ShiftI =
1562  BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1563  .addReg(TmpReg, RegState::Kill)
1564  .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1565  ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1566  ++NumInstsInserted;
1567 
1568  return PredStateReg;
1569 }
1570 
1571 void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1572  MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1573  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
1574  MachineBasicBlock &MBB = *MI.getParent();
1575  const DebugLoc &Loc = MI.getDebugLoc();
1576 
1577  // Check if EFLAGS are alive by seeing if there is a def of them or they
1578  // live-in, and then seeing if that def is in turn used.
1579  bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1580 
1581  SmallVector<MachineOperand *, 2> HardenOpRegs;
1582 
1583  if (BaseMO.isFI()) {
1584  // A frame index is never a dynamically controllable load, so only
1585  // harden it if we're covering fixed address loads as well.
1586  LLVM_DEBUG(
1587  dbgs() << " Skipping hardening base of explicit stack frame load: ";
1588  MI.dump(); dbgs() << "\n");
1589  } else if (BaseMO.getReg() == X86::RSP) {
1590  // Some idempotent atomic operations are lowered directly to a locked
1591  // OR with 0 to the top of stack(or slightly offset from top) which uses an
1592  // explicit RSP register as the base.
1593  assert(IndexMO.getReg() == X86::NoRegister &&
1594  "Explicit RSP access with dynamic index!");
1595  LLVM_DEBUG(
1596  dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1597  } else if (BaseMO.getReg() == X86::RIP ||
1598  BaseMO.getReg() == X86::NoRegister) {
1599  // For both RIP-relative addressed loads or absolute loads, we cannot
1600  // meaningfully harden them because the address being loaded has no
1601  // dynamic component.
1602  //
1603  // FIXME: When using a segment base (like TLS does) we end up with the
1604  // dynamic address being the base plus -1 because we can't mutate the
1605  // segment register here. This allows the signed 32-bit offset to point at
1606  // valid segment-relative addresses and load them successfully.
1607  LLVM_DEBUG(
1608  dbgs() << " Cannot harden base of "
1609  << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1610  << " address in a load!");
1611  } else {
1612  assert(BaseMO.isReg() &&
1613  "Only allowed to have a frame index or register base.");
1614  HardenOpRegs.push_back(&BaseMO);
1615  }
1616 
1617  if (IndexMO.getReg() != X86::NoRegister &&
1618  (HardenOpRegs.empty() ||
1619  HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1620  HardenOpRegs.push_back(&IndexMO);
1621 
1622  assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1623  "Should have exactly one or two registers to harden!");
1624  assert((HardenOpRegs.size() == 1 ||
1625  HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1626  "Should not have two of the same registers!");
1627 
1628  // Remove any registers that have alreaded been checked.
1629  llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1630  // See if this operand's register has already been checked.
1631  auto It = AddrRegToHardenedReg.find(Op->getReg());
1632  if (It == AddrRegToHardenedReg.end())
1633  // Not checked, so retain this one.
1634  return false;
1635 
1636  // Otherwise, we can directly update this operand and remove it.
1637  Op->setReg(It->second);
1638  return true;
1639  });
1640  // If there are none left, we're done.
1641  if (HardenOpRegs.empty())
1642  return;
1643 
1644  // Compute the current predicate state.
1645  unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1646 
1647  auto InsertPt = MI.getIterator();
1648 
1649  // If EFLAGS are live and we don't have access to instructions that avoid
1650  // clobbering EFLAGS we need to save and restore them. This in turn makes
1651  // the EFLAGS no longer live.
1652  unsigned FlagsReg = 0;
1653  if (EFLAGSLive && !Subtarget->hasBMI2()) {
1654  EFLAGSLive = false;
1655  FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1656  }
1657 
1658  for (MachineOperand *Op : HardenOpRegs) {
1659  Register OpReg = Op->getReg();
1660  auto *OpRC = MRI->getRegClass(OpReg);
1661  Register TmpReg = MRI->createVirtualRegister(OpRC);
1662 
1663  // If this is a vector register, we'll need somewhat custom logic to handle
1664  // hardening it.
1665  if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1666  OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1667  assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1668  bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1669 
1670  // Move our state into a vector register.
1671  // FIXME: We could skip this at the cost of longer encodings with AVX-512
1672  // but that doesn't seem likely worth it.
1673  Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1674  auto MovI =
1675  BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1676  .addReg(StateReg);
1677  (void)MovI;
1678  ++NumInstsInserted;
1679  LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1680 
1681  // Broadcast it across the vector register.
1682  Register VBStateReg = MRI->createVirtualRegister(OpRC);
1683  auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1684  TII->get(Is128Bit ? X86::VPBROADCASTQrr
1685  : X86::VPBROADCASTQYrr),
1686  VBStateReg)
1687  .addReg(VStateReg);
1688  (void)BroadcastI;
1689  ++NumInstsInserted;
1690  LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1691  dbgs() << "\n");
1692 
1693  // Merge our potential poison state into the value with a vector or.
1694  auto OrI =
1695  BuildMI(MBB, InsertPt, Loc,
1696  TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1697  .addReg(VBStateReg)
1698  .addReg(OpReg);
1699  (void)OrI;
1700  ++NumInstsInserted;
1701  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1702  } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1703  OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1704  OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1705  assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1706  bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1707  bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1708  if (Is128Bit || Is256Bit)
1709  assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1710 
1711  // Broadcast our state into a vector register.
1712  Register VStateReg = MRI->createVirtualRegister(OpRC);
1713  unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1714  : Is256Bit ? X86::VPBROADCASTQrZ256rr
1715  : X86::VPBROADCASTQrZrr;
1716  auto BroadcastI =
1717  BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1718  .addReg(StateReg);
1719  (void)BroadcastI;
1720  ++NumInstsInserted;
1721  LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1722  dbgs() << "\n");
1723 
1724  // Merge our potential poison state into the value with a vector or.
1725  unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1726  : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1727  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1728  .addReg(VStateReg)
1729  .addReg(OpReg);
1730  (void)OrI;
1731  ++NumInstsInserted;
1732  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1733  } else {
1734  // FIXME: Need to support GR32 here for 32-bit code.
1735  assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1736  "Not a supported register class for address hardening!");
1737 
1738  if (!EFLAGSLive) {
1739  // Merge our potential poison state into the value with an or.
1740  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1741  .addReg(StateReg)
1742  .addReg(OpReg);
1743  OrI->addRegisterDead(X86::EFLAGS, TRI);
1744  ++NumInstsInserted;
1745  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1746  } else {
1747  // We need to avoid touching EFLAGS so shift out all but the least
1748  // significant bit using the instruction that doesn't update flags.
1749  auto ShiftI =
1750  BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1751  .addReg(OpReg)
1752  .addReg(StateReg);
1753  (void)ShiftI;
1754  ++NumInstsInserted;
1755  LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1756  dbgs() << "\n");
1757  }
1758  }
1759 
1760  // Record this register as checked and update the operand.
1761  assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1762  "Should not have checked this register yet!");
1763  AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1764  Op->setReg(TmpReg);
1765  ++NumAddrRegsHardened;
1766  }
1767 
1768  // And restore the flags if needed.
1769  if (FlagsReg)
1770  restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1771 }
1772 
1773 MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1774  MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1776  "Cannot get here with a non-invariant load!");
1777  assert(!isEFLAGSDefLive(InitialMI) &&
1778  "Cannot get here with a data invariant load "
1779  "that interferes with EFLAGS!");
1780 
1781  // See if we can sink hardening the loaded value.
1782  auto SinkCheckToSingleUse =
1784  Register DefReg = MI.getOperand(0).getReg();
1785 
1786  // We need to find a single use which we can sink the check. We can
1787  // primarily do this because many uses may already end up checked on their
1788  // own.
1789  MachineInstr *SingleUseMI = nullptr;
1790  for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1791  // If we're already going to harden this use, it is data invariant, it
1792  // does not interfere with EFLAGS, and within our block.
1793  if (HardenedInstrs.count(&UseMI)) {
1795  // If we've already decided to harden a non-load, we must have sunk
1796  // some other post-load hardened instruction to it and it must itself
1797  // be data-invariant.
1799  "Data variant instruction being hardened!");
1800  continue;
1801  }
1802 
1803  // Otherwise, this is a load and the load component can't be data
1804  // invariant so check how this register is being used.
1805  const MCInstrDesc &Desc = UseMI.getDesc();
1806  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1807  assert(MemRefBeginIdx >= 0 &&
1808  "Should always have mem references here!");
1809  MemRefBeginIdx += X86II::getOperandBias(Desc);
1810 
1811  MachineOperand &BaseMO =
1812  UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1813  MachineOperand &IndexMO =
1814  UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1815  if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1816  (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1817  // The load uses the register as part of its address making it not
1818  // invariant.
1819  return {};
1820 
1821  continue;
1822  }
1823 
1824  if (SingleUseMI)
1825  // We already have a single use, this would make two. Bail.
1826  return {};
1827 
1828  // If this single use isn't data invariant, isn't in this block, or has
1829  // interfering EFLAGS, we can't sink the hardening to it.
1830  if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1832  return {};
1833 
1834  // If this instruction defines multiple registers bail as we won't harden
1835  // all of them.
1836  if (UseMI.getDesc().getNumDefs() > 1)
1837  return {};
1838 
1839  // If this register isn't a virtual register we can't walk uses of sanely,
1840  // just bail. Also check that its register class is one of the ones we
1841  // can harden.
1842  Register UseDefReg = UseMI.getOperand(0).getReg();
1843  if (!UseDefReg.isVirtual() || !canHardenRegister(UseDefReg))
1844  return {};
1845 
1846  SingleUseMI = &UseMI;
1847  }
1848 
1849  // If SingleUseMI is still null, there is no use that needs its own
1850  // checking. Otherwise, it is the single use that needs checking.
1851  return {SingleUseMI};
1852  };
1853 
1854  MachineInstr *MI = &InitialMI;
1855  while (Optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1856  // Update which MI we're checking now.
1857  MI = *SingleUse;
1858  if (!MI)
1859  break;
1860  }
1861 
1862  return MI;
1863 }
1864 
1865 bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
1866  auto *RC = MRI->getRegClass(Reg);
1867  int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1868  if (RegBytes > 8)
1869  // We don't support post-load hardening of vectors.
1870  return false;
1871 
1872  unsigned RegIdx = Log2_32(RegBytes);
1873  assert(RegIdx < 4 && "Unsupported register size");
1874 
1875  // If this register class is explicitly constrained to a class that doesn't
1876  // require REX prefix, we may not be able to satisfy that constraint when
1877  // emitting the hardening instructions, so bail out here.
1878  // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1879  // end up both with a NOREX and REX-only register as operands to the hardening
1880  // instructions. It would be better to fix that code to handle this situation
1881  // rather than hack around it in this way.
1882  const TargetRegisterClass *NOREXRegClasses[] = {
1883  &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1884  &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1885  if (RC == NOREXRegClasses[RegIdx])
1886  return false;
1887 
1888  const TargetRegisterClass *GPRRegClasses[] = {
1889  &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1890  &X86::GR64RegClass};
1891  return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1892 }
1893 
1894 /// Harden a value in a register.
1895 ///
1896 /// This is the low-level logic to fully harden a value sitting in a register
1897 /// against leaking during speculative execution.
1898 ///
1899 /// Unlike hardening an address that is used by a load, this routine is required
1900 /// to hide *all* incoming bits in the register.
1901 ///
1902 /// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1903 /// larger than the predicate state register. FIXME: We should support vector
1904 /// registers here by broadcasting the predicate state.
1905 ///
1906 /// The new, hardened virtual register is returned. It will have the same
1907 /// register class as `Reg`.
1908 unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
1910  DebugLoc Loc) {
1911  assert(canHardenRegister(Reg) && "Cannot harden this register!");
1912  assert(Reg.isVirtual() && "Cannot harden a physical register!");
1913 
1914  auto *RC = MRI->getRegClass(Reg);
1915  int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1916  unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1917  assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
1918  "Unknown register size");
1919 
1920  // FIXME: Need to teach this about 32-bit mode.
1921  if (Bytes != 8) {
1922  unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1923  unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1924  Register NarrowStateReg = MRI->createVirtualRegister(RC);
1925  BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1926  .addReg(StateReg, 0, SubRegImm);
1927  StateReg = NarrowStateReg;
1928  }
1929 
1930  unsigned FlagsReg = 0;
1931  if (isEFLAGSLive(MBB, InsertPt, *TRI))
1932  FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1933 
1934  Register NewReg = MRI->createVirtualRegister(RC);
1935  unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1936  unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1937  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1938  .addReg(StateReg)
1939  .addReg(Reg);
1940  OrI->addRegisterDead(X86::EFLAGS, TRI);
1941  ++NumInstsInserted;
1942  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1943 
1944  if (FlagsReg)
1945  restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1946 
1947  return NewReg;
1948 }
1949 
1950 /// Harden a load by hardening the loaded value in the defined register.
1951 ///
1952 /// We can harden a non-leaking load into a register without touching the
1953 /// address by just hiding all of the loaded bits during misspeculation. We use
1954 /// an `or` instruction to do this because we set up our poison value as all
1955 /// ones. And the goal is just for the loaded bits to not be exposed to
1956 /// execution and coercing them to one is sufficient.
1957 ///
1958 /// Returns the newly hardened register.
1959 unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
1960  MachineBasicBlock &MBB = *MI.getParent();
1961  const DebugLoc &Loc = MI.getDebugLoc();
1962 
1963  auto &DefOp = MI.getOperand(0);
1964  Register OldDefReg = DefOp.getReg();
1965  auto *DefRC = MRI->getRegClass(OldDefReg);
1966 
1967  // Because we want to completely replace the uses of this def'ed value with
1968  // the hardened value, create a dedicated new register that will only be used
1969  // to communicate the unhardened value to the hardening.
1970  Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1971  DefOp.setReg(UnhardenedReg);
1972 
1973  // Now harden this register's value, getting a hardened reg that is safe to
1974  // use. Note that we insert the instructions to compute this *after* the
1975  // defining instruction, not before it.
1976  unsigned HardenedReg = hardenValueInRegister(
1977  UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1978 
1979  // Finally, replace the old register (which now only has the uses of the
1980  // original def) with the hardened register.
1981  MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1982 
1983  ++NumPostLoadRegsHardened;
1984  return HardenedReg;
1985 }
1986 
1987 /// Harden a return instruction.
1988 ///
1989 /// Returns implicitly perform a load which we need to harden. Without hardening
1990 /// this load, an attacker my speculatively write over the return address to
1991 /// steer speculation of the return to an attacker controlled address. This is
1992 /// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1993 /// this paper:
1994 /// https://people.csail.mit.edu/vlk/spectre11.pdf
1995 ///
1996 /// We can harden this by introducing an LFENCE that will delay any load of the
1997 /// return address until prior instructions have retired (and thus are not being
1998 /// speculated), or we can harden the address used by the implicit load: the
1999 /// stack pointer.
2000 ///
2001 /// If we are not using an LFENCE, hardening the stack pointer has an additional
2002 /// benefit: it allows us to pass the predicate state accumulated in this
2003 /// function back to the caller. In the absence of a BCBS attack on the return,
2004 /// the caller will typically be resumed and speculatively executed due to the
2005 /// Return Stack Buffer (RSB) prediction which is very accurate and has a high
2006 /// priority. It is possible that some code from the caller will be executed
2007 /// speculatively even during a BCBS-attacked return until the steering takes
2008 /// effect. Whenever this happens, the caller can recover the (poisoned)
2009 /// predicate state from the stack pointer and continue to harden loads.
2010 void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
2011  MachineBasicBlock &MBB = *MI.getParent();
2012  const DebugLoc &Loc = MI.getDebugLoc();
2013  auto InsertPt = MI.getIterator();
2014 
2015  if (FenceCallAndRet)
2016  // No need to fence here as we'll fence at the return site itself. That
2017  // handles more cases than we can handle here.
2018  return;
2019 
2020  // Take our predicate state, shift it to the high 17 bits (so that we keep
2021  // pointers canonical) and merge it into RSP. This will allow the caller to
2022  // extract it when we return (speculatively).
2023  mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2024 }
2025 
2026 /// Trace the predicate state through a call.
2027 ///
2028 /// There are several layers of this needed to handle the full complexity of
2029 /// calls.
2030 ///
2031 /// First, we need to send the predicate state into the called function. We do
2032 /// this by merging it into the high bits of the stack pointer.
2033 ///
2034 /// For tail calls, this is all we need to do.
2035 ///
2036 /// For calls where we might return and resume the control flow, we need to
2037 /// extract the predicate state from the high bits of the stack pointer after
2038 /// control returns from the called function.
2039 ///
2040 /// We also need to verify that we intended to return to this location in the
2041 /// code. An attacker might arrange for the processor to mispredict the return
2042 /// to this valid but incorrect return address in the program rather than the
2043 /// correct one. See the paper on this attack, called "ret2spec" by the
2044 /// researchers, here:
2045 /// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2046 ///
2047 /// The way we verify that we returned to the correct location is by preserving
2048 /// the expected return address across the call. One technique involves taking
2049 /// advantage of the red-zone to load the return address from `8(%rsp)` where it
2050 /// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2051 /// directly save the address into a register that will be preserved across the
2052 /// call. We compare this intended return address against the address
2053 /// immediately following the call (the observed return address). If these
2054 /// mismatch, we have detected misspeculation and can poison our predicate
2055 /// state.
2056 void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
2057  MachineInstr &MI) {
2058  MachineBasicBlock &MBB = *MI.getParent();
2059  MachineFunction &MF = *MBB.getParent();
2060  auto InsertPt = MI.getIterator();
2061  const DebugLoc &Loc = MI.getDebugLoc();
2062 
2063  if (FenceCallAndRet) {
2064  if (MI.isReturn())
2065  // Tail call, we don't return to this function.
2066  // FIXME: We should also handle noreturn calls.
2067  return;
2068 
2069  // We don't need to fence before the call because the function should fence
2070  // in its entry. However, we do need to fence after the call returns.
2071  // Fencing before the return doesn't correctly handle cases where the return
2072  // itself is mispredicted.
2073  BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2074  ++NumInstsInserted;
2075  ++NumLFENCEsInserted;
2076  return;
2077  }
2078 
2079  // First, we transfer the predicate state into the called function by merging
2080  // it into the stack pointer. This will kill the current def of the state.
2081  unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2082  mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2083 
2084  // If this call is also a return, it is a tail call and we don't need anything
2085  // else to handle it so just return. Also, if there are no further
2086  // instructions and no successors, this call does not return so we can also
2087  // bail.
2088  if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2089  return;
2090 
2091  // Create a symbol to track the return address and attach it to the call
2092  // machine instruction. We will lower extra symbols attached to call
2093  // instructions as label immediately following the call.
2094  MCSymbol *RetSymbol =
2095  MF.getContext().createTempSymbol("slh_ret_addr",
2096  /*AlwaysAddSuffix*/ true);
2097  MI.setPostInstrSymbol(MF, RetSymbol);
2098 
2099  const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2100  unsigned ExpectedRetAddrReg = 0;
2101 
2102  // If we have no red zones or if the function returns twice (possibly without
2103  // using the `ret` instruction) like setjmp, we need to save the expected
2104  // return address prior to the call.
2105  if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2106  MF.exposesReturnsTwice()) {
2107  // If we don't have red zones, we need to compute the expected return
2108  // address prior to the call and store it in a register that lives across
2109  // the call.
2110  //
2111  // In some ways, this is doubly satisfying as a mitigation because it will
2112  // also successfully detect stack smashing bugs in some cases (typically,
2113  // when a callee-saved register is used and the callee doesn't push it onto
2114  // the stack). But that isn't our primary goal, so we only use it as
2115  // a fallback.
2116  //
2117  // FIXME: It isn't clear that this is reliable in the face of
2118  // rematerialization in the register allocator. We somehow need to force
2119  // that to not occur for this particular instruction, and instead to spill
2120  // or otherwise preserve the value computed *prior* to the call.
2121  //
2122  // FIXME: It is even less clear why MachineCSE can't just fold this when we
2123  // end up having to use identical instructions both before and after the
2124  // call to feed the comparison.
2125  ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2126  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2127  !Subtarget->isPositionIndependent()) {
2128  BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2129  .addSym(RetSymbol);
2130  } else {
2131  BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2132  .addReg(/*Base*/ X86::RIP)
2133  .addImm(/*Scale*/ 1)
2134  .addReg(/*Index*/ 0)
2135  .addSym(RetSymbol)
2136  .addReg(/*Segment*/ 0);
2137  }
2138  }
2139 
2140  // Step past the call to handle when it returns.
2141  ++InsertPt;
2142 
2143  // If we didn't pre-compute the expected return address into a register, then
2144  // red zones are enabled and the return address is still available on the
2145  // stack immediately after the call. As the very first instruction, we load it
2146  // into a register.
2147  if (!ExpectedRetAddrReg) {
2148  ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2149  BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2150  .addReg(/*Base*/ X86::RSP)
2151  .addImm(/*Scale*/ 1)
2152  .addReg(/*Index*/ 0)
2153  .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2154  // the return address is 8-bytes past it.
2155  .addReg(/*Segment*/ 0);
2156  }
2157 
2158  // Now we extract the callee's predicate state from the stack pointer.
2159  unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2160 
2161  // Test the expected return address against our actual address. If we can
2162  // form this basic block's address as an immediate, this is easy. Otherwise
2163  // we compute it.
2164  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2165  !Subtarget->isPositionIndependent()) {
2166  // FIXME: Could we fold this with the load? It would require careful EFLAGS
2167  // management.
2168  BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2169  .addReg(ExpectedRetAddrReg, RegState::Kill)
2170  .addSym(RetSymbol);
2171  } else {
2172  Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2173  BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2174  .addReg(/*Base*/ X86::RIP)
2175  .addImm(/*Scale*/ 1)
2176  .addReg(/*Index*/ 0)
2177  .addSym(RetSymbol)
2178  .addReg(/*Segment*/ 0);
2179  BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2180  .addReg(ExpectedRetAddrReg, RegState::Kill)
2181  .addReg(ActualRetAddrReg, RegState::Kill);
2182  }
2183 
2184  // Now conditionally update the predicate state we just extracted if we ended
2185  // up at a different return address than expected.
2186  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2187  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2188 
2189  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2190  auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2191  .addReg(NewStateReg, RegState::Kill)
2192  .addReg(PS->PoisonReg)
2193  .addImm(X86::COND_NE);
2194  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
2195  ++NumInstsInserted;
2196  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2197 
2198  PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2199 }
2200 
2201 /// An attacker may speculatively store over a value that is then speculatively
2202 /// loaded and used as the target of an indirect call or jump instruction. This
2203 /// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2204 /// in this paper:
2205 /// https://people.csail.mit.edu/vlk/spectre11.pdf
2206 ///
2207 /// When this happens, the speculative execution of the call or jump will end up
2208 /// being steered to this attacker controlled address. While most such loads
2209 /// will be adequately hardened already, we want to ensure that they are
2210 /// definitively treated as needing post-load hardening. While address hardening
2211 /// is sufficient to prevent secret data from leaking to the attacker, it may
2212 /// not be sufficient to prevent an attacker from steering speculative
2213 /// execution. We forcibly unfolded all relevant loads above and so will always
2214 /// have an opportunity to post-load harden here, we just need to scan for cases
2215 /// not already flagged and add them.
2216 void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
2217  MachineInstr &MI,
2218  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
2219  switch (MI.getOpcode()) {
2220  case X86::FARCALL16m:
2221  case X86::FARCALL32m:
2222  case X86::FARCALL64m:
2223  case X86::FARJMP16m:
2224  case X86::FARJMP32m:
2225  case X86::FARJMP64m:
2226  // We don't need to harden either far calls or far jumps as they are
2227  // safe from Spectre.
2228  return;
2229 
2230  default:
2231  break;
2232  }
2233 
2234  // We should never see a loading instruction at this point, as those should
2235  // have been unfolded.
2236  assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2237 
2238  // If the first operand isn't a register, this is a branch or call
2239  // instruction with an immediate operand which doesn't need to be hardened.
2240  if (!MI.getOperand(0).isReg())
2241  return;
2242 
2243  // For all of these, the target register is the first operand of the
2244  // instruction.
2245  auto &TargetOp = MI.getOperand(0);
2246  Register OldTargetReg = TargetOp.getReg();
2247 
2248  // Try to lookup a hardened version of this register. We retain a reference
2249  // here as we want to update the map to track any newly computed hardened
2250  // register.
2251  unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2252 
2253  // If we don't have a hardened register yet, compute one. Otherwise, just use
2254  // the already hardened register.
2255  //
2256  // FIXME: It is a little suspect that we use partially hardened registers that
2257  // only feed addresses. The complexity of partial hardening with SHRX
2258  // continues to pile up. Should definitively measure its value and consider
2259  // eliminating it.
2260  if (!HardenedTargetReg)
2261  HardenedTargetReg = hardenValueInRegister(
2262  OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2263 
2264  // Set the target operand to the hardened register.
2265  TargetOp.setReg(HardenedTargetReg);
2266 
2267  ++NumCallsOrJumpsHardened;
2268 }
2269 
2270 INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
2271  "X86 speculative load hardener", false, false)
2272 INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
2273  "X86 speculative load hardener", false, false)
2274 
2276  return new X86SpeculativeLoadHardeningPass();
2277 }
llvm::MachineInstr::isBranch
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:855
llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition: MachineBasicBlock.h:344
llvm::X86InstrInfo::isDataInvariant
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
Definition: X86InstrInfo.cpp:139
llvm::MachineInstr::addRegisterDead
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
Definition: MachineInstr.cpp:1958
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::X86II::getMemoryOperandNo
int getMemoryOperandNo(uint64_t TSFlags)
The function returns the MCInst operand # for the first field of the memory operand.
Definition: X86BaseInfo.h:1095
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
MachineSSAUpdater.h
Reg
unsigned Reg
Definition: MachineSink.cpp:1558
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:102
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::MachineBasicBlock::isLiveIn
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Definition: MachineBasicBlock.cpp:579
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
Optional.h
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
X86Subtarget.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
HardenIndirectCallsAndJumps
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
llvm::MachineFunction::getContext
MCContext & getContext() const
Definition: MachineFunction.h:586
llvm::SparseBitVector::clear
void clear()
Definition: SparseBitVector.h:451
Pass.h
X86InstrBuilder.h
llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition: MachineBasicBlock.h:263
llvm::MachineBasicBlock::isEHFuncletEntry
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
Definition: MachineBasicBlock.h:552
llvm::SystemZII::Is128Bit
@ Is128Bit
Definition: SystemZInstrInfo.h:40
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:500
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::MachineFunction::end
iterator end()
Definition: MachineFunction.h:818
llvm::X86Subtarget
Definition: X86Subtarget.h:52
llvm::MachineFunction::exposesReturnsTwice
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
Definition: MachineFunction.h:696
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1781
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
DenseMap.h
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:485
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:359
TargetInstrInfo.h
llvm::MachineInstr::findRegisterUseOperand
MachineOperand * findRegisterUseOperand(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
Definition: MachineInstr.h:1432
HardenLoads
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:835
llvm::SmallSet< unsigned, 16 >
llvm::Optional
Definition: APInt.h:33
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::X86InstrInfo::isDataInvariantLoad
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
Definition: X86InstrInfo.cpp:434
llvm::X86::getCondFromBranch
CondCode getCondFromBranch(const MachineInstr &MI)
Definition: X86InstrInfo.cpp:2816
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:288
llvm::X86::COND_INVALID
@ COND_INVALID
Definition: X86BaseInfo.h:107
llvm::MachineOperand::isFI
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
Definition: MachineOperand.h:331
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1559
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
MachineRegisterInfo.h
llvm::MachineBasicBlock::dump
void dump() const
Definition: MachineBasicBlock.cpp:294
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SparseBitVector.h
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:750
CommandLine.h
llvm::SparseBitVector
Definition: SparseBitVector.h:255
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1600
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:644
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:203
X86.h
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:380
TargetMachine.h
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:912
getRegClassForUnfoldedLoad
static const TargetRegisterClass * getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
Definition: X86SpeculativeLoadHardening.cpp:841
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::createX86SpeculativeLoadHardeningPass
FunctionPass * createX86SpeculativeLoadHardeningPass()
Definition: X86SpeculativeLoadHardening.cpp:2275
llvm::MachineOperand::isMBB
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
Definition: MachineOperand.h:329
false
Definition: StackSlotColoring.cpp:142
llvm::X86ISD::MFENCE
@ MFENCE
Definition: X86ISelLowering.h:667
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
FenceCallAndRet
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
llvm::TargetRegisterClass::hasSuperClassEq
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
Definition: TargetRegisterInfo.h:138
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::HexagonInstrInfo::insertBranch
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
Definition: HexagonInstrInfo.cpp:582
llvm::MachineFunction::begin
iterator begin()
Definition: MachineFunction.h:816
DebugLoc.h
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::MachineBasicBlock::isCleanupFuncletEntry
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
Definition: MachineBasicBlock.h:558
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:94
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
HardenEdgesWithLFENCE
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:634
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:626
llvm::cl::opt< bool >
SSA
Memory SSA
Definition: MemorySSA.cpp:73
llvm::X86::AddrBaseReg
@ AddrBaseReg
Definition: X86BaseInfo.h:32
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
HardenInterprocedurally
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
TargetSchedule.h
llvm::X86II::getOperandBias
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
Definition: X86BaseInfo.h:1055
MCSchedule.h
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:506
llvm::SparseBitVector::set
void set(unsigned Idx)
Definition: SparseBitVector.h:507
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:258
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
splitEdge
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
Definition: X86SpeculativeLoadHardening.cpp:224
canonicalizePHIOperands
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
Definition: X86SpeculativeLoadHardening.cpp:328
llvm::MachineOperand::CreateMBB
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
Definition: MachineOperand.h:816
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
llvm::MachineBasicBlock::SkipPHIsAndLabels
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
Definition: MachineBasicBlock.cpp:209
llvm::MachineBasicBlock::instr_rbegin
reverse_instr_iterator instr_rbegin()
Definition: MachineBasicBlock.h:256
llvm::SparseBitVector::test
bool test(unsigned Idx) const
Definition: SparseBitVector.h:471
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:593
MachineConstantPool.h
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:415
ArrayRef.h
MachineFunctionPass.h
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:542
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::setHasAddressTaken
void setHasAddressTaken()
Set this block to reflect that it potentially is the target of an indirect branch.
Definition: MachineBasicBlock.h:215
load
LLVM currently emits rax rax movq rax rax ret It could narrow the loads and stores to emit rax rax movq rax rax ret The trouble is that there is a TokenFactor between the store and the load
Definition: README.txt:1531
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
MachineModuleInfo.h
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:349
llvm::X86::GetOppositeBranchCondition
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
Definition: X86InstrInfo.cpp:2848
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::MachineBasicBlock::succ_empty
bool succ_empty() const
Definition: MachineBasicBlock.h:347
llvm::X86InstrInfo
Definition: X86InstrInfo.h:130
hardener
X86 speculative load hardener
Definition: X86SpeculativeLoadHardening.cpp:2273
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:241
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:552
llvm::MachineBasicBlock::isEHScopeEntry
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
Definition: MachineBasicBlock.h:539
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1607
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:355
llvm::MachineBasicBlock::isEHPad
bool isEHPad() const
Returns true if the block is a landing pad.
Definition: MachineBasicBlock.h:526
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
TargetSubtargetInfo.h
llvm::MachineInstr::isTerminator
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:847
llvm::MachineBasicBlock::splitSuccessor
void splitSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New, bool NormalizeSuccProbs=false)
Split the old successor into old plus new and updates the probability info.
Definition: MachineBasicBlock.cpp:769
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:131
llvm::MCContext::createTempSymbol
MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Definition: MCContext.cpp:303
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineBasicBlock::replaceSuccessor
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Definition: MachineBasicBlock.cpp:811
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:600
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:276
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:630
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1314
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1541
PASS_KEY
#define PASS_KEY
Definition: X86SpeculativeLoadHardening.cpp:64
llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition: TargetMachine.cpp:74
llvm::unique
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:1753
llvm::MachineBasicBlock::isLayoutSuccessor
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Definition: MachineBasicBlock.cpp:916
llvm::X86::getCMovOpcode
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
Definition: X86InstrInfo.cpp:2929
llvm::MachineBasicBlock::normalizeSuccProbs
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
Definition: MachineBasicBlock.h:654
llvm::MachineSSAUpdater
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
Definition: MachineSSAUpdater.h:34
llvm::MachineInstrBuilder::addSym
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
Definition: MachineInstrBuilder.h:267
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY, "X86 speculative load hardener", false, false) INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
llvm::MachineOperand::setMBB
void setMBB(MachineBasicBlock *MBB)
Definition: MachineOperand.h:689
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:218
EnablePostLoadHardening
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
ScopeExit.h
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
EnableSpeculativeLoadHardening
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::MachineBasicBlock::SkipPHIsLabelsAndDebug
iterator SkipPHIsLabelsAndDebug(iterator I, bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
Definition: MachineBasicBlock.cpp:224
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
isEFLAGSLive
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
Definition: X86SpeculativeLoadHardening.cpp:1211
MachineFunction.h
X86InstrInfo.h
llvm::MachineInstrBundleIterator< MachineInstr >
isEFLAGSDefLive
static bool isEFLAGSDefLive(const MachineInstr &MI)
Definition: X86SpeculativeLoadHardening.cpp:1204
TargetRegisterInfo.h
llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:313
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
hasVulnerableLoad
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
Definition: X86SpeculativeLoadHardening.cpp:372
llvm::X86::COND_NE
@ COND_NE
Definition: X86BaseInfo.h:86
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38