LLVM  12.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1 //====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// Provide a pass which mitigates speculative execution attacks which operate
11 /// by speculating incorrectly past some predicate (a type check, bounds check,
12 /// or other condition) to reach a load with invalid inputs and leak the data
13 /// accessed by that load using a side channel out of the speculative domain.
14 ///
15 /// For details on the attacks, see the first variant in both the Project Zero
16 /// writeup and the Spectre paper:
17 /// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18 /// https://spectreattack.com/spectre.pdf
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #include "X86.h"
23 #include "X86InstrBuilder.h"
24 #include "X86InstrInfo.h"
25 #include "X86Subtarget.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/ScopeExit.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
50 #include "llvm/IR/DebugLoc.h"
51 #include "llvm/MC/MCSchedule.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Debug.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <iterator>
60 #include <utility>
61 
62 using namespace llvm;
63 
64 #define PASS_KEY "x86-slh"
65 #define DEBUG_TYPE PASS_KEY
66 
67 STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
68 STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
69 STATISTIC(NumAddrRegsHardened,
70  "Number of address mode used registers hardaned");
71 STATISTIC(NumPostLoadRegsHardened,
72  "Number of post-load register values hardened");
73 STATISTIC(NumCallsOrJumpsHardened,
74  "Number of calls or jumps requiring extra hardening");
75 STATISTIC(NumInstsInserted, "Number of instructions inserted");
76 STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
77 
79  "x86-speculative-load-hardening",
80  cl::desc("Force enable speculative load hardening"), cl::init(false),
81  cl::Hidden);
82 
84  PASS_KEY "-lfence",
85  cl::desc(
86  "Use LFENCE along each conditional edge to harden against speculative "
87  "loads rather than conditional movs and poisoned pointers."),
88  cl::init(false), cl::Hidden);
89 
91  PASS_KEY "-post-load",
92  cl::desc("Harden the value loaded *after* it is loaded by "
93  "flushing the loaded bits to 1. This is hard to do "
94  "in general but can be done easily for GPRs."),
95  cl::init(true), cl::Hidden);
96 
98  PASS_KEY "-fence-call-and-ret",
99  cl::desc("Use a full speculation fence to harden both call and ret edges "
100  "rather than a lighter weight mitigation."),
101  cl::init(false), cl::Hidden);
102 
104  PASS_KEY "-ip",
105  cl::desc("Harden interprocedurally by passing our state in and out of "
106  "functions in the high bits of the stack pointer."),
107  cl::init(true), cl::Hidden);
108 
109 static cl::opt<bool>
110  HardenLoads(PASS_KEY "-loads",
111  cl::desc("Sanitize loads from memory. When disable, no "
112  "significant security is provided."),
113  cl::init(true), cl::Hidden);
114 
116  PASS_KEY "-indirect",
117  cl::desc("Harden indirect calls and jumps against using speculatively "
118  "stored attacker controlled addresses. This is designed to "
119  "mitigate Spectre v1.2 style attacks."),
120  cl::init(true), cl::Hidden);
121 
122 namespace {
123 
124 class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
125 public:
126  X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
127 
128  StringRef getPassName() const override {
129  return "X86 speculative load hardening";
130  }
131  bool runOnMachineFunction(MachineFunction &MF) override;
132  void getAnalysisUsage(AnalysisUsage &AU) const override;
133 
134  /// Pass identification, replacement for typeid.
135  static char ID;
136 
137 private:
138  /// The information about a block's conditional terminators needed to trace
139  /// our predicate state through the exiting edges.
140  struct BlockCondInfo {
142 
143  // We mostly have one conditional branch, and in extremely rare cases have
144  // two. Three and more are so rare as to be unimportant for compile time.
146 
147  MachineInstr *UncondBr;
148  };
149 
150  /// Manages the predicate state traced through the program.
151  struct PredState {
152  unsigned InitialReg = 0;
153  unsigned PoisonReg = 0;
154 
155  const TargetRegisterClass *RC;
157 
158  PredState(MachineFunction &MF, const TargetRegisterClass *RC)
159  : RC(RC), SSA(MF) {}
160  };
161 
162  const X86Subtarget *Subtarget = nullptr;
163  MachineRegisterInfo *MRI = nullptr;
164  const X86InstrInfo *TII = nullptr;
165  const TargetRegisterInfo *TRI = nullptr;
166 
168 
169  void hardenEdgesWithLFENCE(MachineFunction &MF);
170 
171  SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
172 
174  tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
175 
176  void unfoldCallAndJumpLoads(MachineFunction &MF);
177 
179  tracePredStateThroughIndirectBranches(MachineFunction &MF);
180 
181  void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
182 
183  unsigned saveEFLAGS(MachineBasicBlock &MBB,
184  MachineBasicBlock::iterator InsertPt, DebugLoc Loc);
185  void restoreEFLAGS(MachineBasicBlock &MBB,
187  unsigned OFReg);
188 
189  void mergePredStateIntoSP(MachineBasicBlock &MBB,
191  unsigned PredStateReg);
192  unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
194  DebugLoc Loc);
195 
196  void
197  hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
198  MachineOperand &IndexMO,
199  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
200  MachineInstr *
201  sinkPostLoadHardenedInst(MachineInstr &MI,
202  SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
203  bool canHardenRegister(unsigned Reg);
204  unsigned hardenValueInRegister(unsigned Reg, MachineBasicBlock &MBB,
206  DebugLoc Loc);
207  unsigned hardenPostLoad(MachineInstr &MI);
208  void hardenReturnInstr(MachineInstr &MI);
209  void tracePredStateThroughCall(MachineInstr &MI);
210  void hardenIndirectCallOrJumpInstr(
211  MachineInstr &MI,
212  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
213 };
214 
215 } // end anonymous namespace
216 
218 
219 void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
220  AnalysisUsage &AU) const {
222 }
223 
225  MachineBasicBlock &Succ, int SuccCount,
226  MachineInstr *Br, MachineInstr *&UncondBr,
227  const X86InstrInfo &TII) {
228  assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
229 
230  MachineFunction &MF = *MBB.getParent();
231 
232  MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
233 
234  // We have to insert the new block immediately after the current one as we
235  // don't know what layout-successor relationships the successor has and we
236  // may not be able to (and generally don't want to) try to fix those up.
237  MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
238 
239  // Update the branch instruction if necessary.
240  if (Br) {
241  assert(Br->getOperand(0).getMBB() == &Succ &&
242  "Didn't start with the right target!");
243  Br->getOperand(0).setMBB(&NewMBB);
244 
245  // If this successor was reached through a branch rather than fallthrough,
246  // we might have *broken* fallthrough and so need to inject a new
247  // unconditional branch.
248  if (!UncondBr) {
249  MachineBasicBlock &OldLayoutSucc =
250  *std::next(MachineFunction::iterator(&NewMBB));
251  assert(MBB.isSuccessor(&OldLayoutSucc) &&
252  "Without an unconditional branch, the old layout successor should "
253  "be an actual successor!");
254  auto BrBuilder =
255  BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
256  // Update the unconditional branch now that we've added one.
257  UncondBr = &*BrBuilder;
258  }
259 
260  // Insert unconditional "jump Succ" instruction in the new block if
261  // necessary.
262  if (!NewMBB.isLayoutSuccessor(&Succ)) {
264  TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
265  }
266  } else {
267  assert(!UncondBr &&
268  "Cannot have a branchless successor and an unconditional branch!");
269  assert(NewMBB.isLayoutSuccessor(&Succ) &&
270  "A non-branch successor must have been a layout successor before "
271  "and now is a layout successor of the new block.");
272  }
273 
274  // If this is the only edge to the successor, we can just replace it in the
275  // CFG. Otherwise we need to add a new entry in the CFG for the new
276  // successor.
277  if (SuccCount == 1) {
278  MBB.replaceSuccessor(&Succ, &NewMBB);
279  } else {
280  MBB.splitSuccessor(&Succ, &NewMBB);
281  }
282 
283  // Hook up the edge from the new basic block to the old successor in the CFG.
284  NewMBB.addSuccessor(&Succ);
285 
286  // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
287  for (MachineInstr &MI : Succ) {
288  if (!MI.isPHI())
289  break;
290  for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
291  OpIdx += 2) {
292  MachineOperand &OpV = MI.getOperand(OpIdx);
293  MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
294  assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
295  if (OpMBB.getMBB() != &MBB)
296  continue;
297 
298  // If this is the last edge to the succesor, just replace MBB in the PHI
299  if (SuccCount == 1) {
300  OpMBB.setMBB(&NewMBB);
301  break;
302  }
303 
304  // Otherwise, append a new pair of operands for the new incoming edge.
305  MI.addOperand(MF, OpV);
306  MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
307  break;
308  }
309  }
310 
311  // Inherit live-ins from the successor
312  for (auto &LI : Succ.liveins())
313  NewMBB.addLiveIn(LI);
314 
315  LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
316  << Succ.getName() << "'.\n");
317  return NewMBB;
318 }
319 
320 /// Removing duplicate PHI operands to leave the PHI in a canonical and
321 /// predictable form.
322 ///
323 /// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
324 /// isn't what you might expect. We may have multiple entries in PHI nodes for
325 /// a single predecessor. This makes CFG-updating extremely complex, so here we
326 /// simplify all PHI nodes to a model even simpler than the IR's model: exactly
327 /// one entry per predecessor, regardless of how many edges there are.
330  SmallVector<int, 4> DupIndices;
331  for (auto &MBB : MF)
332  for (auto &MI : MBB) {
333  if (!MI.isPHI())
334  break;
335 
336  // First we scan the operands of the PHI looking for duplicate entries
337  // a particular predecessor. We retain the operand index of each duplicate
338  // entry found.
339  for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
340  OpIdx += 2)
341  if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
342  DupIndices.push_back(OpIdx);
343 
344  // Now walk the duplicate indices, removing both the block and value. Note
345  // that these are stored as a vector making this element-wise removal
346  // :w
347  // potentially quadratic.
348  //
349  // FIXME: It is really frustrating that we have to use a quadratic
350  // removal algorithm here. There should be a better way, but the use-def
351  // updates required make that impossible using the public API.
352  //
353  // Note that we have to process these backwards so that we don't
354  // invalidate other indices with each removal.
355  while (!DupIndices.empty()) {
356  int OpIdx = DupIndices.pop_back_val();
357  // Remove both the block and value operand, again in reverse order to
358  // preserve indices.
359  MI.RemoveOperand(OpIdx + 1);
360  MI.RemoveOperand(OpIdx);
361  }
362 
363  Preds.clear();
364  }
365 }
366 
367 /// Helper to scan a function for loads vulnerable to misspeculation that we
368 /// want to harden.
369 ///
370 /// We use this to avoid making changes to functions where there is nothing we
371 /// need to do to harden against misspeculation.
373  for (MachineBasicBlock &MBB : MF) {
374  for (MachineInstr &MI : MBB) {
375  // Loads within this basic block after an LFENCE are not at risk of
376  // speculatively executing with invalid predicates from prior control
377  // flow. So break out of this block but continue scanning the function.
378  if (MI.getOpcode() == X86::LFENCE)
379  break;
380 
381  // Looking for loads only.
382  if (!MI.mayLoad())
383  continue;
384 
385  // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
386  if (MI.getOpcode() == X86::MFENCE)
387  continue;
388 
389  // We found a load.
390  return true;
391  }
392  }
393 
394  // No loads found.
395  return false;
396 }
397 
398 bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
399  MachineFunction &MF) {
400  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
401  << " **********\n");
402 
403  // Only run if this pass is forced enabled or we detect the relevant function
404  // attribute requesting SLH.
406  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
407  return false;
408 
409  Subtarget = &MF.getSubtarget<X86Subtarget>();
410  MRI = &MF.getRegInfo();
411  TII = Subtarget->getInstrInfo();
412  TRI = Subtarget->getRegisterInfo();
413 
414  // FIXME: Support for 32-bit.
415  PS.emplace(MF, &X86::GR64_NOSPRegClass);
416 
417  if (MF.begin() == MF.end())
418  // Nothing to do for a degenerate empty function...
419  return false;
420 
421  // We support an alternative hardening technique based on a debug flag.
422  if (HardenEdgesWithLFENCE) {
423  hardenEdgesWithLFENCE(MF);
424  return true;
425  }
426 
427  // Create a dummy debug loc to use for all the generated code here.
428  DebugLoc Loc;
429 
430  MachineBasicBlock &Entry = *MF.begin();
431  auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
432 
433  // Do a quick scan to see if we have any checkable loads.
434  bool HasVulnerableLoad = hasVulnerableLoad(MF);
435 
436  // See if we have any conditional branching blocks that we will need to trace
437  // predicate state through.
438  SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
439 
440  // If we have no interesting conditions or loads, nothing to do here.
441  if (!HasVulnerableLoad && Infos.empty())
442  return true;
443 
444  // The poison value is required to be an all-ones value for many aspects of
445  // this mitigation.
446  const int PoisonVal = -1;
447  PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
448  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
449  .addImm(PoisonVal);
450  ++NumInstsInserted;
451 
452  // If we have loads being hardened and we've asked for call and ret edges to
453  // get a full fence-based mitigation, inject that fence.
454  if (HasVulnerableLoad && FenceCallAndRet) {
455  // We need to insert an LFENCE at the start of the function to suspend any
456  // incoming misspeculation from the caller. This helps two-fold: the caller
457  // may not have been protected as this code has been, and this code gets to
458  // not take any specific action to protect across calls.
459  // FIXME: We could skip this for functions which unconditionally return
460  // a constant.
461  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
462  ++NumInstsInserted;
463  ++NumLFENCEsInserted;
464  }
465 
466  // If we guarded the entry with an LFENCE and have no conditionals to protect
467  // in blocks, then we're done.
468  if (FenceCallAndRet && Infos.empty())
469  // We may have changed the function's code at this point to insert fences.
470  return true;
471 
472  // For every basic block in the function which can b
474  // Set up the predicate state by extracting it from the incoming stack
475  // pointer so we pick up any misspeculation in our caller.
476  PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
477  } else {
478  // Otherwise, just build the predicate state itself by zeroing a register
479  // as we don't need any initial state.
480  PS->InitialReg = MRI->createVirtualRegister(PS->RC);
481  Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
482  auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
483  PredStateSubReg);
484  ++NumInstsInserted;
485  MachineOperand *ZeroEFLAGSDefOp =
486  ZeroI->findRegisterDefOperand(X86::EFLAGS);
487  assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
488  "Must have an implicit def of EFLAGS!");
489  ZeroEFLAGSDefOp->setIsDead(true);
490  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
491  PS->InitialReg)
492  .addImm(0)
493  .addReg(PredStateSubReg)
494  .addImm(X86::sub_32bit);
495  }
496 
497  // We're going to need to trace predicate state throughout the function's
498  // CFG. Prepare for this by setting up our initial state of PHIs with unique
499  // predecessor entries and all the initial predicate state.
501 
502  // Track the updated values in an SSA updater to rewrite into SSA form at the
503  // end.
504  PS->SSA.Initialize(PS->InitialReg);
505  PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
506 
507  // Trace through the CFG.
508  auto CMovs = tracePredStateThroughCFG(MF, Infos);
509 
510  // We may also enter basic blocks in this function via exception handling
511  // control flow. Here, if we are hardening interprocedurally, we need to
512  // re-capture the predicate state from the throwing code. In the Itanium ABI,
513  // the throw will always look like a call to __cxa_throw and will have the
514  // predicate state in the stack pointer, so extract fresh predicate state from
515  // the stack pointer and make it available in SSA.
516  // FIXME: Handle non-itanium ABI EH models.
518  for (MachineBasicBlock &MBB : MF) {
519  assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
520  assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
521  assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
522  if (!MBB.isEHPad())
523  continue;
524  PS->SSA.AddAvailableValue(
525  &MBB,
526  extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
527  }
528  }
529 
531  // If we are going to harden calls and jumps we need to unfold their memory
532  // operands.
533  unfoldCallAndJumpLoads(MF);
534 
535  // Then we trace predicate state through the indirect branches.
536  auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
537  CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
538  }
539 
540  // Now that we have the predicate state available at the start of each block
541  // in the CFG, trace it through each block, hardening vulnerable instructions
542  // as we go.
543  tracePredStateThroughBlocksAndHarden(MF);
544 
545  // Now rewrite all the uses of the pred state using the SSA updater to insert
546  // PHIs connecting the state between blocks along the CFG edges.
547  for (MachineInstr *CMovI : CMovs)
548  for (MachineOperand &Op : CMovI->operands()) {
549  if (!Op.isReg() || Op.getReg() != PS->InitialReg)
550  continue;
551 
552  PS->SSA.RewriteUse(Op);
553  }
554 
555  LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
556  dbgs() << "\n"; MF.verify(this));
557  return true;
558 }
559 
560 /// Implements the naive hardening approach of putting an LFENCE after every
561 /// potentially mis-predicted control flow construct.
562 ///
563 /// We include this as an alternative mostly for the purpose of comparison. The
564 /// performance impact of this is expected to be extremely severe and not
565 /// practical for any real-world users.
566 void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
567  MachineFunction &MF) {
568  // First, we scan the function looking for blocks that are reached along edges
569  // that we might want to harden.
571  for (MachineBasicBlock &MBB : MF) {
572  // If there are no or only one successor, nothing to do here.
573  if (MBB.succ_size() <= 1)
574  continue;
575 
576  // Skip blocks unless their terminators start with a branch. Other
577  // terminators don't seem interesting for guarding against misspeculation.
578  auto TermIt = MBB.getFirstTerminator();
579  if (TermIt == MBB.end() || !TermIt->isBranch())
580  continue;
581 
582  // Add all the non-EH-pad succossors to the blocks we want to harden. We
583  // skip EH pads because there isn't really a condition of interest on
584  // entering.
585  for (MachineBasicBlock *SuccMBB : MBB.successors())
586  if (!SuccMBB->isEHPad())
587  Blocks.insert(SuccMBB);
588  }
589 
590  for (MachineBasicBlock *MBB : Blocks) {
591  auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
592  BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
593  ++NumInstsInserted;
594  ++NumLFENCEsInserted;
595  }
596 }
597 
599 X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
601 
602  // Walk the function and build up a summary for each block's conditions that
603  // we need to trace through.
604  for (MachineBasicBlock &MBB : MF) {
605  // If there are no or only one successor, nothing to do here.
606  if (MBB.succ_size() <= 1)
607  continue;
608 
609  // We want to reliably handle any conditional branch terminators in the
610  // MBB, so we manually analyze the branch. We can handle all of the
611  // permutations here, including ones that analyze branch cannot.
612  //
613  // The approach is to walk backwards across the terminators, resetting at
614  // any unconditional non-indirect branch, and track all conditional edges
615  // to basic blocks as well as the fallthrough or unconditional successor
616  // edge. For each conditional edge, we track the target and the opposite
617  // condition code in order to inject a "no-op" cmov into that successor
618  // that will harden the predicate. For the fallthrough/unconditional
619  // edge, we inject a separate cmov for each conditional branch with
620  // matching condition codes. This effectively implements an "and" of the
621  // condition flags, even if there isn't a single condition flag that would
622  // directly implement that. We don't bother trying to optimize either of
623  // these cases because if such an optimization is possible, LLVM should
624  // have optimized the conditional *branches* in that way already to reduce
625  // instruction count. This late, we simply assume the minimal number of
626  // branch instructions is being emitted and use that to guide our cmov
627  // insertion.
628 
629  BlockCondInfo Info = {&MBB, {}, nullptr};
630 
631  // Now walk backwards through the terminators and build up successors they
632  // reach and the conditions.
633  for (MachineInstr &MI : llvm::reverse(MBB)) {
634  // Once we've handled all the terminators, we're done.
635  if (!MI.isTerminator())
636  break;
637 
638  // If we see a non-branch terminator, we can't handle anything so bail.
639  if (!MI.isBranch()) {
640  Info.CondBrs.clear();
641  break;
642  }
643 
644  // If we see an unconditional branch, reset our state, clear any
645  // fallthrough, and set this is the "else" successor.
646  if (MI.getOpcode() == X86::JMP_1) {
647  Info.CondBrs.clear();
648  Info.UncondBr = &MI;
649  continue;
650  }
651 
652  // If we get an invalid condition, we have an indirect branch or some
653  // other unanalyzable "fallthrough" case. We model this as a nullptr for
654  // the destination so we can still guard any conditional successors.
655  // Consider code sequences like:
656  // ```
657  // jCC L1
658  // jmpq *%rax
659  // ```
660  // We still want to harden the edge to `L1`.
662  Info.CondBrs.clear();
663  Info.UncondBr = &MI;
664  continue;
665  }
666 
667  // We have a vanilla conditional branch, add it to our list.
668  Info.CondBrs.push_back(&MI);
669  }
670  if (Info.CondBrs.empty()) {
671  ++NumBranchesUntraced;
672  LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
673  MBB.dump());
674  continue;
675  }
676 
677  Infos.push_back(Info);
678  }
679 
680  return Infos;
681 }
682 
683 /// Trace the predicate state through the CFG, instrumenting each conditional
684 /// branch such that misspeculation through an edge will poison the predicate
685 /// state.
686 ///
687 /// Returns the list of inserted CMov instructions so that they can have their
688 /// uses of the predicate state rewritten into proper SSA form once it is
689 /// complete.
691 X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
693  // Collect the inserted cmov instructions so we can rewrite their uses of the
694  // predicate state into SSA form.
696 
697  // Now walk all of the basic blocks looking for ones that end in conditional
698  // jumps where we need to update this register along each edge.
699  for (const BlockCondInfo &Info : Infos) {
700  MachineBasicBlock &MBB = *Info.MBB;
701  const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
702  MachineInstr *UncondBr = Info.UncondBr;
703 
704  LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
705  << "\n");
706  ++NumCondBranchesTraced;
707 
708  // Compute the non-conditional successor as either the target of any
709  // unconditional branch or the layout successor.
710  MachineBasicBlock *UncondSucc =
711  UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
712  ? UncondBr->getOperand(0).getMBB()
713  : nullptr)
714  : &*std::next(MachineFunction::iterator(&MBB));
715 
716  // Count how many edges there are to any given successor.
718  if (UncondSucc)
719  ++SuccCounts[UncondSucc];
720  for (auto *CondBr : CondBrs)
721  ++SuccCounts[CondBr->getOperand(0).getMBB()];
722 
723  // A lambda to insert cmov instructions into a block checking all of the
724  // condition codes in a sequence.
725  auto BuildCheckingBlockForSuccAndConds =
726  [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
727  MachineInstr *Br, MachineInstr *&UncondBr,
728  ArrayRef<X86::CondCode> Conds) {
729  // First, we split the edge to insert the checking block into a safe
730  // location.
731  auto &CheckingMBB =
732  (SuccCount == 1 && Succ.pred_size() == 1)
733  ? Succ
734  : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
735 
736  bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
737  if (!LiveEFLAGS)
738  CheckingMBB.addLiveIn(X86::EFLAGS);
739 
740  // Now insert the cmovs to implement the checks.
741  auto InsertPt = CheckingMBB.begin();
742  assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
743  "Should never have a PHI in the initial checking block as it "
744  "always has a single predecessor!");
745 
746  // We will wire each cmov to each other, but need to start with the
747  // incoming pred state.
748  unsigned CurStateReg = PS->InitialReg;
749 
750  for (X86::CondCode Cond : Conds) {
751  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
752  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
753 
754  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
755  // Note that we intentionally use an empty debug location so that
756  // this picks up the preceding location.
757  auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
758  TII->get(CMovOp), UpdatedStateReg)
759  .addReg(CurStateReg)
760  .addReg(PS->PoisonReg)
761  .addImm(Cond);
762  // If this is the last cmov and the EFLAGS weren't originally
763  // live-in, mark them as killed.
764  if (!LiveEFLAGS && Cond == Conds.back())
765  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
766 
767  ++NumInstsInserted;
768  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
769  dbgs() << "\n");
770 
771  // The first one of the cmovs will be using the top level
772  // `PredStateReg` and need to get rewritten into SSA form.
773  if (CurStateReg == PS->InitialReg)
774  CMovs.push_back(&*CMovI);
775 
776  // The next cmov should start from this one's def.
777  CurStateReg = UpdatedStateReg;
778  }
779 
780  // And put the last one into the available values for SSA form of our
781  // predicate state.
782  PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
783  };
784 
785  std::vector<X86::CondCode> UncondCodeSeq;
786  for (auto *CondBr : CondBrs) {
787  MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
788  int &SuccCount = SuccCounts[&Succ];
789 
792  UncondCodeSeq.push_back(Cond);
793 
794  BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
795  {InvCond});
796 
797  // Decrement the successor count now that we've split one of the edges.
798  // We need to keep the count of edges to the successor accurate in order
799  // to know above when to *replace* the successor in the CFG vs. just
800  // adding the new successor.
801  --SuccCount;
802  }
803 
804  // Since we may have split edges and changed the number of successors,
805  // normalize the probabilities. This avoids doing it each time we split an
806  // edge.
807  MBB.normalizeSuccProbs();
808 
809  // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
810  // need to intersect the other condition codes. We can do this by just
811  // doing a cmov for each one.
812  if (!UncondSucc)
813  // If we have no fallthrough to protect (perhaps it is an indirect jump?)
814  // just skip this and continue.
815  continue;
816 
817  assert(SuccCounts[UncondSucc] == 1 &&
818  "We should never have more than one edge to the unconditional "
819  "successor at this point because every other edge must have been "
820  "split above!");
821 
822  // Sort and unique the codes to minimize them.
823  llvm::sort(UncondCodeSeq);
824  UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()),
825  UncondCodeSeq.end());
826 
827  // Build a checking version of the successor.
828  BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
829  UncondBr, UncondBr, UncondCodeSeq);
830  }
831 
832  return CMovs;
833 }
834 
835 /// Compute the register class for the unfolded load.
836 ///
837 /// FIXME: This should probably live in X86InstrInfo, potentially by adding
838 /// a way to unfold into a newly created vreg rather than requiring a register
839 /// input.
840 static const TargetRegisterClass *
842  unsigned Opcode) {
843  unsigned Index;
844  unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
845  Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
846  const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
847  return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
848 }
849 
850 void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
851  MachineFunction &MF) {
852  for (MachineBasicBlock &MBB : MF)
853  for (auto MII = MBB.instr_begin(), MIE = MBB.instr_end(); MII != MIE;) {
854  // Grab a reference and increment the iterator so we can remove this
855  // instruction if needed without disturbing the iteration.
856  MachineInstr &MI = *MII++;
857 
858  // Must either be a call or a branch.
859  if (!MI.isCall() && !MI.isBranch())
860  continue;
861  // We only care about loading variants of these instructions.
862  if (!MI.mayLoad())
863  continue;
864 
865  switch (MI.getOpcode()) {
866  default: {
867  LLVM_DEBUG(
868  dbgs() << "ERROR: Found an unexpected loading branch or call "
869  "instruction:\n";
870  MI.dump(); dbgs() << "\n");
871  report_fatal_error("Unexpected loading branch or call!");
872  }
873 
874  case X86::FARCALL16m:
875  case X86::FARCALL32m:
876  case X86::FARCALL64m:
877  case X86::FARJMP16m:
878  case X86::FARJMP32m:
879  case X86::FARJMP64m:
880  // We cannot mitigate far jumps or calls, but we also don't expect them
881  // to be vulnerable to Spectre v1.2 style attacks.
882  continue;
883 
884  case X86::CALL16m:
885  case X86::CALL16m_NT:
886  case X86::CALL32m:
887  case X86::CALL32m_NT:
888  case X86::CALL64m:
889  case X86::CALL64m_NT:
890  case X86::JMP16m:
891  case X86::JMP16m_NT:
892  case X86::JMP32m:
893  case X86::JMP32m_NT:
894  case X86::JMP64m:
895  case X86::JMP64m_NT:
896  case X86::TAILJMPm64:
897  case X86::TAILJMPm64_REX:
898  case X86::TAILJMPm:
899  case X86::TCRETURNmi64:
900  case X86::TCRETURNmi: {
901  // Use the generic unfold logic now that we know we're dealing with
902  // expected instructions.
903  // FIXME: We don't have test coverage for all of these!
904  auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
905  if (!UnfoldedRC) {
906  LLVM_DEBUG(dbgs()
907  << "ERROR: Unable to unfold load from instruction:\n";
908  MI.dump(); dbgs() << "\n");
909  report_fatal_error("Unable to unfold load!");
910  }
911  Register Reg = MRI->createVirtualRegister(UnfoldedRC);
913  // If we were able to compute an unfolded reg class, any failure here
914  // is just a programming error so just assert.
915  bool Unfolded =
916  TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
917  /*UnfoldStore*/ false, NewMIs);
918  (void)Unfolded;
919  assert(Unfolded &&
920  "Computed unfolded register class but failed to unfold");
921  // Now stitch the new instructions into place and erase the old one.
922  for (auto *NewMI : NewMIs)
923  MBB.insert(MI.getIterator(), NewMI);
924 
925  // Update the call site info.
927  MF.eraseCallSiteInfo(&MI);
928 
929  MI.eraseFromParent();
930  LLVM_DEBUG({
931  dbgs() << "Unfolded load successfully into:\n";
932  for (auto *NewMI : NewMIs) {
933  NewMI->dump();
934  dbgs() << "\n";
935  }
936  });
937  continue;
938  }
939  }
940  llvm_unreachable("Escaped switch with default!");
941  }
942 }
943 
944 /// Trace the predicate state through indirect branches, instrumenting them to
945 /// poison the state if a target is reached that does not match the expected
946 /// target.
947 ///
948 /// This is designed to mitigate Spectre variant 1 attacks where an indirect
949 /// branch is trained to predict a particular target and then mispredicts that
950 /// target in a way that can leak data. Despite using an indirect branch, this
951 /// is really a variant 1 style attack: it does not steer execution to an
952 /// arbitrary or attacker controlled address, and it does not require any
953 /// special code executing next to the victim. This attack can also be mitigated
954 /// through retpolines, but those require either replacing indirect branches
955 /// with conditional direct branches or lowering them through a device that
956 /// blocks speculation. This mitigation can replace these retpoline-style
957 /// mitigations for jump tables and other indirect branches within a function
958 /// when variant 2 isn't a risk while allowing limited speculation. Indirect
959 /// calls, however, cannot be mitigated through this technique without changing
960 /// the ABI in a fundamental way.
962 X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
963  MachineFunction &MF) {
964  // We use the SSAUpdater to insert PHI nodes for the target addresses of
965  // indirect branches. We don't actually need the full power of the SSA updater
966  // in this particular case as we always have immediately available values, but
967  // this avoids us having to re-implement the PHI construction logic.
968  MachineSSAUpdater TargetAddrSSA(MF);
969  TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
970 
971  // Track which blocks were terminated with an indirect branch.
972  SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
973 
974  // We need to know what blocks end up reached via indirect branches. We
975  // expect this to be a subset of those whose address is taken and so track it
976  // directly via the CFG.
977  SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
978 
979  // Walk all the blocks which end in an indirect branch and make the
980  // target address available.
981  for (MachineBasicBlock &MBB : MF) {
982  // Find the last terminator.
983  auto MII = MBB.instr_rbegin();
984  while (MII != MBB.instr_rend() && MII->isDebugInstr())
985  ++MII;
986  if (MII == MBB.instr_rend())
987  continue;
988  MachineInstr &TI = *MII;
989  if (!TI.isTerminator() || !TI.isBranch())
990  // No terminator or non-branch terminator.
991  continue;
992 
993  unsigned TargetReg;
994 
995  switch (TI.getOpcode()) {
996  default:
997  // Direct branch or conditional branch (leading to fallthrough).
998  continue;
999 
1000  case X86::FARJMP16m:
1001  case X86::FARJMP32m:
1002  case X86::FARJMP64m:
1003  // We cannot mitigate far jumps or calls, but we also don't expect them
1004  // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1005  continue;
1006 
1007  case X86::JMP16m:
1008  case X86::JMP16m_NT:
1009  case X86::JMP32m:
1010  case X86::JMP32m_NT:
1011  case X86::JMP64m:
1012  case X86::JMP64m_NT:
1013  // Mostly as documentation.
1014  report_fatal_error("Memory operand jumps should have been unfolded!");
1015 
1016  case X86::JMP16r:
1018  "Support for 16-bit indirect branches is not implemented.");
1019  case X86::JMP32r:
1021  "Support for 32-bit indirect branches is not implemented.");
1022 
1023  case X86::JMP64r:
1024  TargetReg = TI.getOperand(0).getReg();
1025  }
1026 
1027  // We have definitely found an indirect branch. Verify that there are no
1028  // preceding conditional branches as we don't yet support that.
1029  if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1030  return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1031  })) {
1032  LLVM_DEBUG({
1033  dbgs() << "ERROR: Found other terminators in a block with an indirect "
1034  "branch! This is not yet supported! Terminator sequence:\n";
1035  for (MachineInstr &MI : MBB.terminators()) {
1036  MI.dump();
1037  dbgs() << '\n';
1038  }
1039  });
1040  report_fatal_error("Unimplemented terminator sequence!");
1041  }
1042 
1043  // Make the target register an available value for this block.
1044  TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1045  IndirectTerminatedMBBs.insert(&MBB);
1046 
1047  // Add all the successors to our target candidates.
1048  for (MachineBasicBlock *Succ : MBB.successors())
1049  IndirectTargetMBBs.insert(Succ);
1050  }
1051 
1052  // Keep track of the cmov instructions we insert so we can return them.
1054 
1055  // If we didn't find any indirect branches with targets, nothing to do here.
1056  if (IndirectTargetMBBs.empty())
1057  return CMovs;
1058 
1059  // We found indirect branches and targets that need to be instrumented to
1060  // harden loads within them. Walk the blocks of the function (to get a stable
1061  // ordering) and instrument each target of an indirect branch.
1062  for (MachineBasicBlock &MBB : MF) {
1063  // Skip the blocks that aren't candidate targets.
1064  if (!IndirectTargetMBBs.count(&MBB))
1065  continue;
1066 
1067  // We don't expect EH pads to ever be reached via an indirect branch. If
1068  // this is desired for some reason, we could simply skip them here rather
1069  // than asserting.
1070  assert(!MBB.isEHPad() &&
1071  "Unexpected EH pad as target of an indirect branch!");
1072 
1073  // We should never end up threading EFLAGS into a block to harden
1074  // conditional jumps as there would be an additional successor via the
1075  // indirect branch. As a consequence, all such edges would be split before
1076  // reaching here, and the inserted block will handle the EFLAGS-based
1077  // hardening.
1078  assert(!MBB.isLiveIn(X86::EFLAGS) &&
1079  "Cannot check within a block that already has live-in EFLAGS!");
1080 
1081  // We can't handle having non-indirect edges into this block unless this is
1082  // the only successor and we can synthesize the necessary target address.
1083  for (MachineBasicBlock *Pred : MBB.predecessors()) {
1084  // If we've already handled this by extracting the target directly,
1085  // nothing to do.
1086  if (IndirectTerminatedMBBs.count(Pred))
1087  continue;
1088 
1089  // Otherwise, we have to be the only successor. We generally expect this
1090  // to be true as conditional branches should have had a critical edge
1091  // split already. We don't however need to worry about EH pad successors
1092  // as they'll happily ignore the target and their hardening strategy is
1093  // resilient to all ways in which they could be reached speculatively.
1094  if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1095  return Succ->isEHPad() || Succ == &MBB;
1096  })) {
1097  LLVM_DEBUG({
1098  dbgs() << "ERROR: Found conditional entry to target of indirect "
1099  "branch!\n";
1100  Pred->dump();
1101  MBB.dump();
1102  });
1103  report_fatal_error("Cannot harden a conditional entry to a target of "
1104  "an indirect branch!");
1105  }
1106 
1107  // Now we need to compute the address of this block and install it as a
1108  // synthetic target in the predecessor. We do this at the bottom of the
1109  // predecessor.
1110  auto InsertPt = Pred->getFirstTerminator();
1111  Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1112  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1113  !Subtarget->isPositionIndependent()) {
1114  // Directly materialize it into an immediate.
1115  auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1116  TII->get(X86::MOV64ri32), TargetReg)
1117  .addMBB(&MBB);
1118  ++NumInstsInserted;
1119  (void)AddrI;
1120  LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1121  dbgs() << "\n");
1122  } else {
1123  auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1124  TargetReg)
1125  .addReg(/*Base*/ X86::RIP)
1126  .addImm(/*Scale*/ 1)
1127  .addReg(/*Index*/ 0)
1128  .addMBB(&MBB)
1129  .addReg(/*Segment*/ 0);
1130  ++NumInstsInserted;
1131  (void)AddrI;
1132  LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1133  dbgs() << "\n");
1134  }
1135  // And make this available.
1136  TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1137  }
1138 
1139  // Materialize the needed SSA value of the target. Note that we need the
1140  // middle of the block as this block might at the bottom have an indirect
1141  // branch back to itself. We can do this here because at this point, every
1142  // predecessor of this block has an available value. This is basically just
1143  // automating the construction of a PHI node for this target.
1144  unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1145 
1146  // Insert a comparison of the incoming target register with this block's
1147  // address. This also requires us to mark the block as having its address
1148  // taken explicitly.
1150  auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1151  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1152  !Subtarget->isPositionIndependent()) {
1153  // Check directly against a relocated immediate when we can.
1154  auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1155  .addReg(TargetReg, RegState::Kill)
1156  .addMBB(&MBB);
1157  ++NumInstsInserted;
1158  (void)CheckI;
1159  LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1160  } else {
1161  // Otherwise compute the address into a register first.
1162  Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1163  auto AddrI =
1164  BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1165  .addReg(/*Base*/ X86::RIP)
1166  .addImm(/*Scale*/ 1)
1167  .addReg(/*Index*/ 0)
1168  .addMBB(&MBB)
1169  .addReg(/*Segment*/ 0);
1170  ++NumInstsInserted;
1171  (void)AddrI;
1172  LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1173  auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1174  .addReg(TargetReg, RegState::Kill)
1175  .addReg(AddrReg, RegState::Kill);
1176  ++NumInstsInserted;
1177  (void)CheckI;
1178  LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1179  }
1180 
1181  // Now cmov over the predicate if the comparison wasn't equal.
1182  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1183  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1184  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1185  auto CMovI =
1186  BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1187  .addReg(PS->InitialReg)
1188  .addReg(PS->PoisonReg)
1189  .addImm(X86::COND_NE);
1190  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
1191  ++NumInstsInserted;
1192  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1193  CMovs.push_back(&*CMovI);
1194 
1195  // And put the new value into the available values for SSA form of our
1196  // predicate state.
1197  PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1198  }
1199 
1200  // Return all the newly inserted cmov instructions of the predicate state.
1201  return CMovs;
1202 }
1203 
1204 // Returns true if the MI has EFLAGS as a register def operand and it's live,
1205 // otherwise it returns false
1206 static bool isEFLAGSDefLive(const MachineInstr &MI) {
1207  if (const MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1208  return !DefOp->isDead();
1209  }
1210  return false;
1211 }
1212 
1214  const TargetRegisterInfo &TRI) {
1215  // Check if EFLAGS are alive by seeing if there is a def of them or they
1216  // live-in, and then seeing if that def is in turn used.
1217  for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) {
1218  if (MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1219  // If the def is dead, then EFLAGS is not live.
1220  if (DefOp->isDead())
1221  return false;
1222 
1223  // Otherwise we've def'ed it, and it is live.
1224  return true;
1225  }
1226  // While at this instruction, also check if we use and kill EFLAGS
1227  // which means it isn't live.
1228  if (MI.killsRegister(X86::EFLAGS, &TRI))
1229  return false;
1230  }
1231 
1232  // If we didn't find anything conclusive (neither definitely alive or
1233  // definitely dead) return whether it lives into the block.
1234  return MBB.isLiveIn(X86::EFLAGS);
1235 }
1236 
1237 /// Trace the predicate state through each of the blocks in the function,
1238 /// hardening everything necessary along the way.
1239 ///
1240 /// We call this routine once the initial predicate state has been established
1241 /// for each basic block in the function in the SSA updater. This routine traces
1242 /// it through the instructions within each basic block, and for non-returning
1243 /// blocks informs the SSA updater about the final state that lives out of the
1244 /// block. Along the way, it hardens any vulnerable instruction using the
1245 /// currently valid predicate state. We have to do these two things together
1246 /// because the SSA updater only works across blocks. Within a block, we track
1247 /// the current predicate state directly and update it as it changes.
1248 ///
1249 /// This operates in two passes over each block. First, we analyze the loads in
1250 /// the block to determine which strategy will be used to harden them: hardening
1251 /// the address or hardening the loaded value when loaded into a register
1252 /// amenable to hardening. We have to process these first because the two
1253 /// strategies may interact -- later hardening may change what strategy we wish
1254 /// to use. We also will analyze data dependencies between loads and avoid
1255 /// hardening those loads that are data dependent on a load with a hardened
1256 /// address. We also skip hardening loads already behind an LFENCE as that is
1257 /// sufficient to harden them against misspeculation.
1258 ///
1259 /// Second, we actively trace the predicate state through the block, applying
1260 /// the hardening steps we determined necessary in the first pass as we go.
1261 ///
1262 /// These two passes are applied to each basic block. We operate one block at a
1263 /// time to simplify reasoning about reachability and sequencing.
1264 void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
1265  MachineFunction &MF) {
1266  SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1267  SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1268 
1269  SmallSet<unsigned, 16> HardenedAddrRegs;
1270 
1271  SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg;
1272 
1273  // Track the set of load-dependent registers through the basic block. Because
1274  // the values of these registers have an existing data dependency on a loaded
1275  // value which we would have checked, we can omit any checks on them.
1276  SparseBitVector<> LoadDepRegs;
1277 
1278  for (MachineBasicBlock &MBB : MF) {
1279  // The first pass over the block: collect all the loads which can have their
1280  // loaded value hardened and all the loads that instead need their address
1281  // hardened. During this walk we propagate load dependence for address
1282  // hardened loads and also look for LFENCE to stop hardening wherever
1283  // possible. When deciding whether or not to harden the loaded value or not,
1284  // we check to see if any registers used in the address will have been
1285  // hardened at this point and if so, harden any remaining address registers
1286  // as that often successfully re-uses hardened addresses and minimizes
1287  // instructions.
1288  //
1289  // FIXME: We should consider an aggressive mode where we continue to keep as
1290  // many loads value hardened even when some address register hardening would
1291  // be free (due to reuse).
1292  //
1293  // Note that we only need this pass if we are actually hardening loads.
1294  if (HardenLoads)
1295  for (MachineInstr &MI : MBB) {
1296  // We naively assume that all def'ed registers of an instruction have
1297  // a data dependency on all of their operands.
1298  // FIXME: Do a more careful analysis of x86 to build a conservative
1299  // model here.
1300  if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1301  return Op.isReg() && LoadDepRegs.test(Op.getReg());
1302  }))
1303  for (MachineOperand &Def : MI.defs())
1304  if (Def.isReg())
1305  LoadDepRegs.set(Def.getReg());
1306 
1307  // Both Intel and AMD are guiding that they will change the semantics of
1308  // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1309  // no more need to guard things in this block.
1310  if (MI.getOpcode() == X86::LFENCE)
1311  break;
1312 
1313  // If this instruction cannot load, nothing to do.
1314  if (!MI.mayLoad())
1315  continue;
1316 
1317  // Some instructions which "load" are trivially safe or unimportant.
1318  if (MI.getOpcode() == X86::MFENCE)
1319  continue;
1320 
1321  // Extract the memory operand information about this instruction.
1322  // FIXME: This doesn't handle loading pseudo instructions which we often
1323  // could handle with similarly generic logic. We probably need to add an
1324  // MI-layer routine similar to the MC-layer one we use here which maps
1325  // pseudos much like this maps real instructions.
1326  const MCInstrDesc &Desc = MI.getDesc();
1327  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1328  if (MemRefBeginIdx < 0) {
1329  LLVM_DEBUG(dbgs()
1330  << "WARNING: unable to harden loading instruction: ";
1331  MI.dump());
1332  continue;
1333  }
1334 
1335  MemRefBeginIdx += X86II::getOperandBias(Desc);
1336 
1337  MachineOperand &BaseMO =
1338  MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1339  MachineOperand &IndexMO =
1340  MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1341 
1342  // If we have at least one (non-frame-index, non-RIP) register operand,
1343  // and neither operand is load-dependent, we need to check the load.
1344  unsigned BaseReg = 0, IndexReg = 0;
1345  if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1346  BaseMO.getReg() != X86::NoRegister)
1347  BaseReg = BaseMO.getReg();
1348  if (IndexMO.getReg() != X86::NoRegister)
1349  IndexReg = IndexMO.getReg();
1350 
1351  if (!BaseReg && !IndexReg)
1352  // No register operands!
1353  continue;
1354 
1355  // If any register operand is dependent, this load is dependent and we
1356  // needn't check it.
1357  // FIXME: Is this true in the case where we are hardening loads after
1358  // they complete? Unclear, need to investigate.
1359  if ((BaseReg && LoadDepRegs.test(BaseReg)) ||
1360  (IndexReg && LoadDepRegs.test(IndexReg)))
1361  continue;
1362 
1363  // If post-load hardening is enabled, this load is compatible with
1364  // post-load hardening, and we aren't already going to harden one of the
1365  // address registers, queue it up to be hardened post-load. Notably,
1366  // even once hardened this won't introduce a useful dependency that
1367  // could prune out subsequent loads.
1369  !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1370  MI.getOperand(0).isReg() &&
1371  canHardenRegister(MI.getOperand(0).getReg()) &&
1372  !HardenedAddrRegs.count(BaseReg) &&
1373  !HardenedAddrRegs.count(IndexReg)) {
1374  HardenPostLoad.insert(&MI);
1375  HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1376  continue;
1377  }
1378 
1379  // Record this instruction for address hardening and record its register
1380  // operands as being address-hardened.
1381  HardenLoadAddr.insert(&MI);
1382  if (BaseReg)
1383  HardenedAddrRegs.insert(BaseReg);
1384  if (IndexReg)
1385  HardenedAddrRegs.insert(IndexReg);
1386 
1387  for (MachineOperand &Def : MI.defs())
1388  if (Def.isReg())
1389  LoadDepRegs.set(Def.getReg());
1390  }
1391 
1392  // Now re-walk the instructions in the basic block, and apply whichever
1393  // hardening strategy we have elected. Note that we do this in a second
1394  // pass specifically so that we have the complete set of instructions for
1395  // which we will do post-load hardening and can defer it in certain
1396  // circumstances.
1397  for (MachineInstr &MI : MBB) {
1398  if (HardenLoads) {
1399  // We cannot both require hardening the def of a load and its address.
1400  assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1401  "Requested to harden both the address and def of a load!");
1402 
1403  // Check if this is a load whose address needs to be hardened.
1404  if (HardenLoadAddr.erase(&MI)) {
1405  const MCInstrDesc &Desc = MI.getDesc();
1406  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1407  assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1408 
1409  MemRefBeginIdx += X86II::getOperandBias(Desc);
1410 
1411  MachineOperand &BaseMO =
1412  MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1413  MachineOperand &IndexMO =
1414  MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1415  hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1416  continue;
1417  }
1418 
1419  // Test if this instruction is one of our post load instructions (and
1420  // remove it from the set if so).
1421  if (HardenPostLoad.erase(&MI)) {
1422  assert(!MI.isCall() && "Must not try to post-load harden a call!");
1423 
1424  // If this is a data-invariant load and there is no EFLAGS
1425  // interference, we want to try and sink any hardening as far as
1426  // possible.
1428  // Sink the instruction we'll need to harden as far as we can down
1429  // the graph.
1430  MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1431 
1432  // If we managed to sink this instruction, update everything so we
1433  // harden that instruction when we reach it in the instruction
1434  // sequence.
1435  if (SunkMI != &MI) {
1436  // If in sinking there was no instruction needing to be hardened,
1437  // we're done.
1438  if (!SunkMI)
1439  continue;
1440 
1441  // Otherwise, add this to the set of defs we harden.
1442  HardenPostLoad.insert(SunkMI);
1443  continue;
1444  }
1445  }
1446 
1447  unsigned HardenedReg = hardenPostLoad(MI);
1448 
1449  // Mark the resulting hardened register as such so we don't re-harden.
1450  AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1451 
1452  continue;
1453  }
1454 
1455  // Check for an indirect call or branch that may need its input hardened
1456  // even if we couldn't find the specific load used, or were able to
1457  // avoid hardening it for some reason. Note that here we cannot break
1458  // out afterward as we may still need to handle any call aspect of this
1459  // instruction.
1460  if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1461  hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1462  }
1463 
1464  // After we finish hardening loads we handle interprocedural hardening if
1465  // enabled and relevant for this instruction.
1467  continue;
1468  if (!MI.isCall() && !MI.isReturn())
1469  continue;
1470 
1471  // If this is a direct return (IE, not a tail call) just directly harden
1472  // it.
1473  if (MI.isReturn() && !MI.isCall()) {
1474  hardenReturnInstr(MI);
1475  continue;
1476  }
1477 
1478  // Otherwise we have a call. We need to handle transferring the predicate
1479  // state into a call and recovering it after the call returns (unless this
1480  // is a tail call).
1481  assert(MI.isCall() && "Should only reach here for calls!");
1482  tracePredStateThroughCall(MI);
1483  }
1484 
1485  HardenPostLoad.clear();
1486  HardenLoadAddr.clear();
1487  HardenedAddrRegs.clear();
1488  AddrRegToHardenedReg.clear();
1489 
1490  // Currently, we only track data-dependent loads within a basic block.
1491  // FIXME: We should see if this is necessary or if we could be more
1492  // aggressive here without opening up attack avenues.
1493  LoadDepRegs.clear();
1494  }
1495 }
1496 
1497 /// Save EFLAGS into the returned GPR. This can in turn be restored with
1498 /// `restoreEFLAGS`.
1499 ///
1500 /// Note that LLVM can only lower very simple patterns of saved and restored
1501 /// EFLAGS registers. The restore should always be within the same basic block
1502 /// as the save so that no PHI nodes are inserted.
1503 unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
1505  DebugLoc Loc) {
1506  // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1507  // what instruction selection does.
1508  Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1509  // We directly copy the FLAGS register and rely on later lowering to clean
1510  // this up into the appropriate setCC instructions.
1511  BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1512  ++NumInstsInserted;
1513  return Reg;
1514 }
1515 
1516 /// Restore EFLAGS from the provided GPR. This should be produced by
1517 /// `saveEFLAGS`.
1518 ///
1519 /// This must be done within the same basic block as the save in order to
1520 /// reliably lower.
1521 void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1523  unsigned Reg) {
1524  BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1525  ++NumInstsInserted;
1526 }
1527 
1528 /// Takes the current predicate state (in a register) and merges it into the
1529 /// stack pointer. The state is essentially a single bit, but we merge this in
1530 /// a way that won't form non-canonical pointers and also will be preserved
1531 /// across normal stack adjustments.
1532 void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1534  unsigned PredStateReg) {
1535  Register TmpReg = MRI->createVirtualRegister(PS->RC);
1536  // FIXME: This hard codes a shift distance based on the number of bits needed
1537  // to stay canonical on 64-bit. We should compute this somehow and support
1538  // 32-bit as part of that.
1539  auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1540  .addReg(PredStateReg, RegState::Kill)
1541  .addImm(47);
1542  ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1543  ++NumInstsInserted;
1544  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1545  .addReg(X86::RSP)
1546  .addReg(TmpReg, RegState::Kill);
1547  OrI->addRegisterDead(X86::EFLAGS, TRI);
1548  ++NumInstsInserted;
1549 }
1550 
1551 /// Extracts the predicate state stored in the high bits of the stack pointer.
1552 unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1554  DebugLoc Loc) {
1555  Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1556  Register TmpReg = MRI->createVirtualRegister(PS->RC);
1557 
1558  // We know that the stack pointer will have any preserved predicate state in
1559  // its high bit. We just want to smear this across the other bits. Turns out,
1560  // this is exactly what an arithmetic right shift does.
1561  BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1562  .addReg(X86::RSP);
1563  auto ShiftI =
1564  BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1565  .addReg(TmpReg, RegState::Kill)
1566  .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1567  ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1568  ++NumInstsInserted;
1569 
1570  return PredStateReg;
1571 }
1572 
1573 void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1574  MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1575  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
1576  MachineBasicBlock &MBB = *MI.getParent();
1577  DebugLoc Loc = MI.getDebugLoc();
1578 
1579  // Check if EFLAGS are alive by seeing if there is a def of them or they
1580  // live-in, and then seeing if that def is in turn used.
1581  bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1582 
1583  SmallVector<MachineOperand *, 2> HardenOpRegs;
1584 
1585  if (BaseMO.isFI()) {
1586  // A frame index is never a dynamically controllable load, so only
1587  // harden it if we're covering fixed address loads as well.
1588  LLVM_DEBUG(
1589  dbgs() << " Skipping hardening base of explicit stack frame load: ";
1590  MI.dump(); dbgs() << "\n");
1591  } else if (BaseMO.getReg() == X86::RSP) {
1592  // Some idempotent atomic operations are lowered directly to a locked
1593  // OR with 0 to the top of stack(or slightly offset from top) which uses an
1594  // explicit RSP register as the base.
1595  assert(IndexMO.getReg() == X86::NoRegister &&
1596  "Explicit RSP access with dynamic index!");
1597  LLVM_DEBUG(
1598  dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1599  } else if (BaseMO.getReg() == X86::RIP ||
1600  BaseMO.getReg() == X86::NoRegister) {
1601  // For both RIP-relative addressed loads or absolute loads, we cannot
1602  // meaningfully harden them because the address being loaded has no
1603  // dynamic component.
1604  //
1605  // FIXME: When using a segment base (like TLS does) we end up with the
1606  // dynamic address being the base plus -1 because we can't mutate the
1607  // segment register here. This allows the signed 32-bit offset to point at
1608  // valid segment-relative addresses and load them successfully.
1609  LLVM_DEBUG(
1610  dbgs() << " Cannot harden base of "
1611  << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1612  << " address in a load!");
1613  } else {
1614  assert(BaseMO.isReg() &&
1615  "Only allowed to have a frame index or register base.");
1616  HardenOpRegs.push_back(&BaseMO);
1617  }
1618 
1619  if (IndexMO.getReg() != X86::NoRegister &&
1620  (HardenOpRegs.empty() ||
1621  HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1622  HardenOpRegs.push_back(&IndexMO);
1623 
1624  assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1625  "Should have exactly one or two registers to harden!");
1626  assert((HardenOpRegs.size() == 1 ||
1627  HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1628  "Should not have two of the same registers!");
1629 
1630  // Remove any registers that have alreaded been checked.
1631  llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1632  // See if this operand's register has already been checked.
1633  auto It = AddrRegToHardenedReg.find(Op->getReg());
1634  if (It == AddrRegToHardenedReg.end())
1635  // Not checked, so retain this one.
1636  return false;
1637 
1638  // Otherwise, we can directly update this operand and remove it.
1639  Op->setReg(It->second);
1640  return true;
1641  });
1642  // If there are none left, we're done.
1643  if (HardenOpRegs.empty())
1644  return;
1645 
1646  // Compute the current predicate state.
1647  unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1648 
1649  auto InsertPt = MI.getIterator();
1650 
1651  // If EFLAGS are live and we don't have access to instructions that avoid
1652  // clobbering EFLAGS we need to save and restore them. This in turn makes
1653  // the EFLAGS no longer live.
1654  unsigned FlagsReg = 0;
1655  if (EFLAGSLive && !Subtarget->hasBMI2()) {
1656  EFLAGSLive = false;
1657  FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1658  }
1659 
1660  for (MachineOperand *Op : HardenOpRegs) {
1661  Register OpReg = Op->getReg();
1662  auto *OpRC = MRI->getRegClass(OpReg);
1663  Register TmpReg = MRI->createVirtualRegister(OpRC);
1664 
1665  // If this is a vector register, we'll need somewhat custom logic to handle
1666  // hardening it.
1667  if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1668  OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1669  assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1670  bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1671 
1672  // Move our state into a vector register.
1673  // FIXME: We could skip this at the cost of longer encodings with AVX-512
1674  // but that doesn't seem likely worth it.
1675  Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1676  auto MovI =
1677  BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1678  .addReg(StateReg);
1679  (void)MovI;
1680  ++NumInstsInserted;
1681  LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1682 
1683  // Broadcast it across the vector register.
1684  Register VBStateReg = MRI->createVirtualRegister(OpRC);
1685  auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1686  TII->get(Is128Bit ? X86::VPBROADCASTQrr
1687  : X86::VPBROADCASTQYrr),
1688  VBStateReg)
1689  .addReg(VStateReg);
1690  (void)BroadcastI;
1691  ++NumInstsInserted;
1692  LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1693  dbgs() << "\n");
1694 
1695  // Merge our potential poison state into the value with a vector or.
1696  auto OrI =
1697  BuildMI(MBB, InsertPt, Loc,
1698  TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1699  .addReg(VBStateReg)
1700  .addReg(OpReg);
1701  (void)OrI;
1702  ++NumInstsInserted;
1703  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1704  } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1705  OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1706  OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1707  assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1708  bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1709  bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1710  if (Is128Bit || Is256Bit)
1711  assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1712 
1713  // Broadcast our state into a vector register.
1714  Register VStateReg = MRI->createVirtualRegister(OpRC);
1715  unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1716  : Is256Bit ? X86::VPBROADCASTQrZ256rr
1717  : X86::VPBROADCASTQrZrr;
1718  auto BroadcastI =
1719  BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1720  .addReg(StateReg);
1721  (void)BroadcastI;
1722  ++NumInstsInserted;
1723  LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1724  dbgs() << "\n");
1725 
1726  // Merge our potential poison state into the value with a vector or.
1727  unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1728  : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1729  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1730  .addReg(VStateReg)
1731  .addReg(OpReg);
1732  (void)OrI;
1733  ++NumInstsInserted;
1734  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1735  } else {
1736  // FIXME: Need to support GR32 here for 32-bit code.
1737  assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1738  "Not a supported register class for address hardening!");
1739 
1740  if (!EFLAGSLive) {
1741  // Merge our potential poison state into the value with an or.
1742  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1743  .addReg(StateReg)
1744  .addReg(OpReg);
1745  OrI->addRegisterDead(X86::EFLAGS, TRI);
1746  ++NumInstsInserted;
1747  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1748  } else {
1749  // We need to avoid touching EFLAGS so shift out all but the least
1750  // significant bit using the instruction that doesn't update flags.
1751  auto ShiftI =
1752  BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1753  .addReg(OpReg)
1754  .addReg(StateReg);
1755  (void)ShiftI;
1756  ++NumInstsInserted;
1757  LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1758  dbgs() << "\n");
1759  }
1760  }
1761 
1762  // Record this register as checked and update the operand.
1763  assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1764  "Should not have checked this register yet!");
1765  AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1766  Op->setReg(TmpReg);
1767  ++NumAddrRegsHardened;
1768  }
1769 
1770  // And restore the flags if needed.
1771  if (FlagsReg)
1772  restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1773 }
1774 
1775 MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1776  MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1778  "Cannot get here with a non-invariant load!");
1779  assert(!isEFLAGSDefLive(InitialMI) &&
1780  "Cannot get here with a data invariant load "
1781  "that interferes with EFLAGS!");
1782 
1783  // See if we can sink hardening the loaded value.
1784  auto SinkCheckToSingleUse =
1786  Register DefReg = MI.getOperand(0).getReg();
1787 
1788  // We need to find a single use which we can sink the check. We can
1789  // primarily do this because many uses may already end up checked on their
1790  // own.
1791  MachineInstr *SingleUseMI = nullptr;
1792  for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1793  // If we're already going to harden this use, it is data invariant, it
1794  // does not interfere with EFLAGS, and within our block.
1795  if (HardenedInstrs.count(&UseMI)) {
1797  // If we've already decided to harden a non-load, we must have sunk
1798  // some other post-load hardened instruction to it and it must itself
1799  // be data-invariant.
1801  "Data variant instruction being hardened!");
1802  continue;
1803  }
1804 
1805  // Otherwise, this is a load and the load component can't be data
1806  // invariant so check how this register is being used.
1807  const MCInstrDesc &Desc = UseMI.getDesc();
1808  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1809  assert(MemRefBeginIdx >= 0 &&
1810  "Should always have mem references here!");
1811  MemRefBeginIdx += X86II::getOperandBias(Desc);
1812 
1813  MachineOperand &BaseMO =
1814  UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1815  MachineOperand &IndexMO =
1816  UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1817  if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1818  (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1819  // The load uses the register as part of its address making it not
1820  // invariant.
1821  return {};
1822 
1823  continue;
1824  }
1825 
1826  if (SingleUseMI)
1827  // We already have a single use, this would make two. Bail.
1828  return {};
1829 
1830  // If this single use isn't data invariant, isn't in this block, or has
1831  // interfering EFLAGS, we can't sink the hardening to it.
1832  if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1834  return {};
1835 
1836  // If this instruction defines multiple registers bail as we won't harden
1837  // all of them.
1838  if (UseMI.getDesc().getNumDefs() > 1)
1839  return {};
1840 
1841  // If this register isn't a virtual register we can't walk uses of sanely,
1842  // just bail. Also check that its register class is one of the ones we
1843  // can harden.
1844  Register UseDefReg = UseMI.getOperand(0).getReg();
1845  if (!Register::isVirtualRegister(UseDefReg) ||
1846  !canHardenRegister(UseDefReg))
1847  return {};
1848 
1849  SingleUseMI = &UseMI;
1850  }
1851 
1852  // If SingleUseMI is still null, there is no use that needs its own
1853  // checking. Otherwise, it is the single use that needs checking.
1854  return {SingleUseMI};
1855  };
1856 
1857  MachineInstr *MI = &InitialMI;
1858  while (Optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1859  // Update which MI we're checking now.
1860  MI = *SingleUse;
1861  if (!MI)
1862  break;
1863  }
1864 
1865  return MI;
1866 }
1867 
1868 bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) {
1869  auto *RC = MRI->getRegClass(Reg);
1870  int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1871  if (RegBytes > 8)
1872  // We don't support post-load hardening of vectors.
1873  return false;
1874 
1875  unsigned RegIdx = Log2_32(RegBytes);
1876  assert(RegIdx < 4 && "Unsupported register size");
1877 
1878  // If this register class is explicitly constrained to a class that doesn't
1879  // require REX prefix, we may not be able to satisfy that constraint when
1880  // emitting the hardening instructions, so bail out here.
1881  // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1882  // end up both with a NOREX and REX-only register as operands to the hardening
1883  // instructions. It would be better to fix that code to handle this situation
1884  // rather than hack around it in this way.
1885  const TargetRegisterClass *NOREXRegClasses[] = {
1886  &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1887  &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1888  if (RC == NOREXRegClasses[RegIdx])
1889  return false;
1890 
1891  const TargetRegisterClass *GPRRegClasses[] = {
1892  &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1893  &X86::GR64RegClass};
1894  return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1895 }
1896 
1897 /// Harden a value in a register.
1898 ///
1899 /// This is the low-level logic to fully harden a value sitting in a register
1900 /// against leaking during speculative execution.
1901 ///
1902 /// Unlike hardening an address that is used by a load, this routine is required
1903 /// to hide *all* incoming bits in the register.
1904 ///
1905 /// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1906 /// larger than the predicate state register. FIXME: We should support vector
1907 /// registers here by broadcasting the predicate state.
1908 ///
1909 /// The new, hardened virtual register is returned. It will have the same
1910 /// register class as `Reg`.
1911 unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
1912  unsigned Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1913  DebugLoc Loc) {
1914  assert(canHardenRegister(Reg) && "Cannot harden this register!");
1915  assert(Register::isVirtualRegister(Reg) && "Cannot harden a physical register!");
1916 
1917  auto *RC = MRI->getRegClass(Reg);
1918  int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1919 
1920  unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1921 
1922  // FIXME: Need to teach this about 32-bit mode.
1923  if (Bytes != 8) {
1924  unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1925  unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1926  Register NarrowStateReg = MRI->createVirtualRegister(RC);
1927  BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1928  .addReg(StateReg, 0, SubRegImm);
1929  StateReg = NarrowStateReg;
1930  }
1931 
1932  unsigned FlagsReg = 0;
1933  if (isEFLAGSLive(MBB, InsertPt, *TRI))
1934  FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1935 
1936  Register NewReg = MRI->createVirtualRegister(RC);
1937  unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1938  unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1939  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1940  .addReg(StateReg)
1941  .addReg(Reg);
1942  OrI->addRegisterDead(X86::EFLAGS, TRI);
1943  ++NumInstsInserted;
1944  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1945 
1946  if (FlagsReg)
1947  restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1948 
1949  return NewReg;
1950 }
1951 
1952 /// Harden a load by hardening the loaded value in the defined register.
1953 ///
1954 /// We can harden a non-leaking load into a register without touching the
1955 /// address by just hiding all of the loaded bits during misspeculation. We use
1956 /// an `or` instruction to do this because we set up our poison value as all
1957 /// ones. And the goal is just for the loaded bits to not be exposed to
1958 /// execution and coercing them to one is sufficient.
1959 ///
1960 /// Returns the newly hardened register.
1961 unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
1962  MachineBasicBlock &MBB = *MI.getParent();
1963  DebugLoc Loc = MI.getDebugLoc();
1964 
1965  auto &DefOp = MI.getOperand(0);
1966  Register OldDefReg = DefOp.getReg();
1967  auto *DefRC = MRI->getRegClass(OldDefReg);
1968 
1969  // Because we want to completely replace the uses of this def'ed value with
1970  // the hardened value, create a dedicated new register that will only be used
1971  // to communicate the unhardened value to the hardening.
1972  Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1973  DefOp.setReg(UnhardenedReg);
1974 
1975  // Now harden this register's value, getting a hardened reg that is safe to
1976  // use. Note that we insert the instructions to compute this *after* the
1977  // defining instruction, not before it.
1978  unsigned HardenedReg = hardenValueInRegister(
1979  UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1980 
1981  // Finally, replace the old register (which now only has the uses of the
1982  // original def) with the hardened register.
1983  MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1984 
1985  ++NumPostLoadRegsHardened;
1986  return HardenedReg;
1987 }
1988 
1989 /// Harden a return instruction.
1990 ///
1991 /// Returns implicitly perform a load which we need to harden. Without hardening
1992 /// this load, an attacker my speculatively write over the return address to
1993 /// steer speculation of the return to an attacker controlled address. This is
1994 /// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1995 /// this paper:
1996 /// https://people.csail.mit.edu/vlk/spectre11.pdf
1997 ///
1998 /// We can harden this by introducing an LFENCE that will delay any load of the
1999 /// return address until prior instructions have retired (and thus are not being
2000 /// speculated), or we can harden the address used by the implicit load: the
2001 /// stack pointer.
2002 ///
2003 /// If we are not using an LFENCE, hardening the stack pointer has an additional
2004 /// benefit: it allows us to pass the predicate state accumulated in this
2005 /// function back to the caller. In the absence of a BCBS attack on the return,
2006 /// the caller will typically be resumed and speculatively executed due to the
2007 /// Return Stack Buffer (RSB) prediction which is very accurate and has a high
2008 /// priority. It is possible that some code from the caller will be executed
2009 /// speculatively even during a BCBS-attacked return until the steering takes
2010 /// effect. Whenever this happens, the caller can recover the (poisoned)
2011 /// predicate state from the stack pointer and continue to harden loads.
2012 void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
2013  MachineBasicBlock &MBB = *MI.getParent();
2014  DebugLoc Loc = MI.getDebugLoc();
2015  auto InsertPt = MI.getIterator();
2016 
2017  if (FenceCallAndRet)
2018  // No need to fence here as we'll fence at the return site itself. That
2019  // handles more cases than we can handle here.
2020  return;
2021 
2022  // Take our predicate state, shift it to the high 17 bits (so that we keep
2023  // pointers canonical) and merge it into RSP. This will allow the caller to
2024  // extract it when we return (speculatively).
2025  mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2026 }
2027 
2028 /// Trace the predicate state through a call.
2029 ///
2030 /// There are several layers of this needed to handle the full complexity of
2031 /// calls.
2032 ///
2033 /// First, we need to send the predicate state into the called function. We do
2034 /// this by merging it into the high bits of the stack pointer.
2035 ///
2036 /// For tail calls, this is all we need to do.
2037 ///
2038 /// For calls where we might return and resume the control flow, we need to
2039 /// extract the predicate state from the high bits of the stack pointer after
2040 /// control returns from the called function.
2041 ///
2042 /// We also need to verify that we intended to return to this location in the
2043 /// code. An attacker might arrange for the processor to mispredict the return
2044 /// to this valid but incorrect return address in the program rather than the
2045 /// correct one. See the paper on this attack, called "ret2spec" by the
2046 /// researchers, here:
2047 /// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2048 ///
2049 /// The way we verify that we returned to the correct location is by preserving
2050 /// the expected return address across the call. One technique involves taking
2051 /// advantage of the red-zone to load the return address from `8(%rsp)` where it
2052 /// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2053 /// directly save the address into a register that will be preserved across the
2054 /// call. We compare this intended return address against the address
2055 /// immediately following the call (the observed return address). If these
2056 /// mismatch, we have detected misspeculation and can poison our predicate
2057 /// state.
2058 void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
2059  MachineInstr &MI) {
2060  MachineBasicBlock &MBB = *MI.getParent();
2061  MachineFunction &MF = *MBB.getParent();
2062  auto InsertPt = MI.getIterator();
2063  DebugLoc Loc = MI.getDebugLoc();
2064 
2065  if (FenceCallAndRet) {
2066  if (MI.isReturn())
2067  // Tail call, we don't return to this function.
2068  // FIXME: We should also handle noreturn calls.
2069  return;
2070 
2071  // We don't need to fence before the call because the function should fence
2072  // in its entry. However, we do need to fence after the call returns.
2073  // Fencing before the return doesn't correctly handle cases where the return
2074  // itself is mispredicted.
2075  BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2076  ++NumInstsInserted;
2077  ++NumLFENCEsInserted;
2078  return;
2079  }
2080 
2081  // First, we transfer the predicate state into the called function by merging
2082  // it into the stack pointer. This will kill the current def of the state.
2083  unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2084  mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2085 
2086  // If this call is also a return, it is a tail call and we don't need anything
2087  // else to handle it so just return. Also, if there are no further
2088  // instructions and no successors, this call does not return so we can also
2089  // bail.
2090  if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2091  return;
2092 
2093  // Create a symbol to track the return address and attach it to the call
2094  // machine instruction. We will lower extra symbols attached to call
2095  // instructions as label immediately following the call.
2096  MCSymbol *RetSymbol =
2097  MF.getContext().createTempSymbol("slh_ret_addr",
2098  /*AlwaysAddSuffix*/ true);
2099  MI.setPostInstrSymbol(MF, RetSymbol);
2100 
2101  const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2102  unsigned ExpectedRetAddrReg = 0;
2103 
2104  // If we have no red zones or if the function returns twice (possibly without
2105  // using the `ret` instruction) like setjmp, we need to save the expected
2106  // return address prior to the call.
2107  if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2108  MF.exposesReturnsTwice()) {
2109  // If we don't have red zones, we need to compute the expected return
2110  // address prior to the call and store it in a register that lives across
2111  // the call.
2112  //
2113  // In some ways, this is doubly satisfying as a mitigation because it will
2114  // also successfully detect stack smashing bugs in some cases (typically,
2115  // when a callee-saved register is used and the callee doesn't push it onto
2116  // the stack). But that isn't our primary goal, so we only use it as
2117  // a fallback.
2118  //
2119  // FIXME: It isn't clear that this is reliable in the face of
2120  // rematerialization in the register allocator. We somehow need to force
2121  // that to not occur for this particular instruction, and instead to spill
2122  // or otherwise preserve the value computed *prior* to the call.
2123  //
2124  // FIXME: It is even less clear why MachineCSE can't just fold this when we
2125  // end up having to use identical instructions both before and after the
2126  // call to feed the comparison.
2127  ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2128  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2129  !Subtarget->isPositionIndependent()) {
2130  BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2131  .addSym(RetSymbol);
2132  } else {
2133  BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2134  .addReg(/*Base*/ X86::RIP)
2135  .addImm(/*Scale*/ 1)
2136  .addReg(/*Index*/ 0)
2137  .addSym(RetSymbol)
2138  .addReg(/*Segment*/ 0);
2139  }
2140  }
2141 
2142  // Step past the call to handle when it returns.
2143  ++InsertPt;
2144 
2145  // If we didn't pre-compute the expected return address into a register, then
2146  // red zones are enabled and the return address is still available on the
2147  // stack immediately after the call. As the very first instruction, we load it
2148  // into a register.
2149  if (!ExpectedRetAddrReg) {
2150  ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2151  BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2152  .addReg(/*Base*/ X86::RSP)
2153  .addImm(/*Scale*/ 1)
2154  .addReg(/*Index*/ 0)
2155  .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2156  // the return address is 8-bytes past it.
2157  .addReg(/*Segment*/ 0);
2158  }
2159 
2160  // Now we extract the callee's predicate state from the stack pointer.
2161  unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2162 
2163  // Test the expected return address against our actual address. If we can
2164  // form this basic block's address as an immediate, this is easy. Otherwise
2165  // we compute it.
2166  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2167  !Subtarget->isPositionIndependent()) {
2168  // FIXME: Could we fold this with the load? It would require careful EFLAGS
2169  // management.
2170  BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2171  .addReg(ExpectedRetAddrReg, RegState::Kill)
2172  .addSym(RetSymbol);
2173  } else {
2174  Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2175  BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2176  .addReg(/*Base*/ X86::RIP)
2177  .addImm(/*Scale*/ 1)
2178  .addReg(/*Index*/ 0)
2179  .addSym(RetSymbol)
2180  .addReg(/*Segment*/ 0);
2181  BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2182  .addReg(ExpectedRetAddrReg, RegState::Kill)
2183  .addReg(ActualRetAddrReg, RegState::Kill);
2184  }
2185 
2186  // Now conditionally update the predicate state we just extracted if we ended
2187  // up at a different return address than expected.
2188  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2189  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2190 
2191  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2192  auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2193  .addReg(NewStateReg, RegState::Kill)
2194  .addReg(PS->PoisonReg)
2195  .addImm(X86::COND_NE);
2196  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
2197  ++NumInstsInserted;
2198  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2199 
2200  PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2201 }
2202 
2203 /// An attacker may speculatively store over a value that is then speculatively
2204 /// loaded and used as the target of an indirect call or jump instruction. This
2205 /// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2206 /// in this paper:
2207 /// https://people.csail.mit.edu/vlk/spectre11.pdf
2208 ///
2209 /// When this happens, the speculative execution of the call or jump will end up
2210 /// being steered to this attacker controlled address. While most such loads
2211 /// will be adequately hardened already, we want to ensure that they are
2212 /// definitively treated as needing post-load hardening. While address hardening
2213 /// is sufficient to prevent secret data from leaking to the attacker, it may
2214 /// not be sufficient to prevent an attacker from steering speculative
2215 /// execution. We forcibly unfolded all relevant loads above and so will always
2216 /// have an opportunity to post-load harden here, we just need to scan for cases
2217 /// not already flagged and add them.
2218 void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
2219  MachineInstr &MI,
2220  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
2221  switch (MI.getOpcode()) {
2222  case X86::FARCALL16m:
2223  case X86::FARCALL32m:
2224  case X86::FARCALL64m:
2225  case X86::FARJMP16m:
2226  case X86::FARJMP32m:
2227  case X86::FARJMP64m:
2228  // We don't need to harden either far calls or far jumps as they are
2229  // safe from Spectre.
2230  return;
2231 
2232  default:
2233  break;
2234  }
2235 
2236  // We should never see a loading instruction at this point, as those should
2237  // have been unfolded.
2238  assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2239 
2240  // If the first operand isn't a register, this is a branch or call
2241  // instruction with an immediate operand which doesn't need to be hardened.
2242  if (!MI.getOperand(0).isReg())
2243  return;
2244 
2245  // For all of these, the target register is the first operand of the
2246  // instruction.
2247  auto &TargetOp = MI.getOperand(0);
2248  Register OldTargetReg = TargetOp.getReg();
2249 
2250  // Try to lookup a hardened version of this register. We retain a reference
2251  // here as we want to update the map to track any newly computed hardened
2252  // register.
2253  unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2254 
2255  // If we don't have a hardened register yet, compute one. Otherwise, just use
2256  // the already hardened register.
2257  //
2258  // FIXME: It is a little suspect that we use partially hardened registers that
2259  // only feed addresses. The complexity of partial hardening with SHRX
2260  // continues to pile up. Should definitively measure its value and consider
2261  // eliminating it.
2262  if (!HardenedTargetReg)
2263  HardenedTargetReg = hardenValueInRegister(
2264  OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2265 
2266  // Set the target operand to the hardened register.
2267  TargetOp.setReg(HardenedTargetReg);
2268 
2269  ++NumCallsOrJumpsHardened;
2270 }
2271 
2272 INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
2273  "X86 speculative load hardener", false, false)
2274 INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
2275  "X86 speculative load hardener", false, false)
2276 
2278  return new X86SpeculativeLoadHardeningPass();
2279 }
MachineOperand * findRegisterUseOperand(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
instr_iterator instr_begin()
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:760
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void Initialize(Register V)
Initialize - Reset this object to get ready for a new set of SSA updates.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Register getReg(unsigned Idx) const
Get the register for the operand index.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:69
void set(unsigned Idx)
void push_back(const T &Elt)
Definition: SmallVector.h:246
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:409
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:187
unsigned Reg
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
static const TargetRegisterClass * getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:330
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1491
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
void setIsDead(bool Val=true)
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
iterator_range< succ_iterator > successors()
Function & getFunction()
Return the LLVM function that this machine code represents.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
MachineBasicBlock & MBB
bool test(unsigned Idx) const
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:784
iterator_range< iterator > terminators()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:456
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
Register GetValueInMiddleOfBlock(MachineBasicBlock *BB)
GetValueInMiddleOfBlock - Construct SSA form, materializing a value that is live in the middle of the...
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
void clear()
Definition: SmallSet.h:218
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
void dump() const
Definition: Pass.cpp:131
Memory SSA
Definition: MemorySSA.cpp:66
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
void setReg(Register Reg)
Change the register this operand corresponds to.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:792
Analysis containing CSE Info
Definition: CSEInfo.cpp:25
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:750
SmallVector< MachineOperand, 4 > Cond
MCContext & getContext() const
#define PASS_KEY
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:434
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
MCSymbol * createTempSymbol(bool CanBeUnnamed=true)
Create and return a new assembler temporary symbol with a unique but unspecified name.
Definition: MCContext.cpp:239
unsigned const MachineRegisterInfo * MRI
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
MachineInstrBuilder & UseMI
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1498
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
void AddAvailableValue(MachineBasicBlock *BB, Register V)
AddAvailableValue - Indicate that a rewritten value is available at the end of the specified block wi...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:375
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped...
Definition: X86BaseInfo.h:1035
self_iterator getIterator()
Definition: ilist_node.h:81
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
iterator_range< pred_iterator > predecessors()
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1433
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineOperand * findRegisterDefOperand(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
static bool isEFLAGSDefLive(const MachineInstr &MI)
void setIsKill(bool Val=true)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
CondCode getCondFromBranch(const MachineInstr &MI)
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:302
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:439
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rbegin()
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:371
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:420
const X86RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: X86InstrInfo.h:149
FunctionPass * createX86SpeculativeLoadHardeningPass()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1653
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:597
reverse_instr_iterator instr_rend()
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
CodeModel::Model getCodeModel() const
Returns the code model.
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void setHasAddressTaken()
Set this block to reflect that it potentially is the target of an indirect branch.
unsigned succ_size() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:280
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:62
bool isCandidateForCallSiteEntry(QueryType Type=IgnoreBundle) const
Return true if this is a call instruction that may have an associated call site entry in the debug in...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool isEHPad() const
Returns true if the block is a landing pad.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
#define I(x, y, z)
Definition: MD5.cpp:59
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
void splitSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New, bool NormalizeSuccProbs=false)
Split the old successor into old plus new and updates the probability info.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
X86 speculative load hardener
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:942
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY, "X86 speculative load hardener", false, false) INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass
iterator SkipPHIsLabelsAndDebug(iterator I)
Return the first instruction in MBB after I that is not a PHI, label or debug.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
Register getReg() const
getReg - Returns the register number.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:341
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:466
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex=nullptr) const override
getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new instruction after load / store ar...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isImplicit() const
int getMemoryOperandNo(uint64_t TSFlags)
The function returns the MCInst operand # for the first field of the memory operand.
Definition: X86BaseInfo.h:1075
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164