LLVM 22.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// Provide a pass which mitigates speculative execution attacks which operate
11/// by speculating incorrectly past some predicate (a type check, bounds check,
12/// or other condition) to reach a load with invalid inputs and leak the data
13/// accessed by that load using a side channel out of the speculative domain.
14///
15/// For details on the attacks, see the first variant in both the Project Zero
16/// writeup and the Spectre paper:
17/// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18/// https://spectreattack.com/spectre.pdf
19///
20//===----------------------------------------------------------------------===//
21
22#include "X86.h"
23#include "X86InstrInfo.h"
24#include "X86Subtarget.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/Statistic.h"
47#include "llvm/IR/DebugLoc.h"
48#include "llvm/MC/MCSchedule.h"
49#include "llvm/Pass.h"
51#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <iterator>
56#include <optional>
57
58using namespace llvm;
59
60#define PASS_KEY "x86-slh"
61#define DEBUG_TYPE PASS_KEY
62
63STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
64STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
65STATISTIC(NumAddrRegsHardened,
66 "Number of address mode used registers hardaned");
67STATISTIC(NumPostLoadRegsHardened,
68 "Number of post-load register values hardened");
69STATISTIC(NumCallsOrJumpsHardened,
70 "Number of calls or jumps requiring extra hardening");
71STATISTIC(NumInstsInserted, "Number of instructions inserted");
72STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
73
75 "x86-speculative-load-hardening",
76 cl::desc("Force enable speculative load hardening"), cl::init(false),
78
80 PASS_KEY "-lfence",
82 "Use LFENCE along each conditional edge to harden against speculative "
83 "loads rather than conditional movs and poisoned pointers."),
84 cl::init(false), cl::Hidden);
85
87 PASS_KEY "-post-load",
88 cl::desc("Harden the value loaded *after* it is loaded by "
89 "flushing the loaded bits to 1. This is hard to do "
90 "in general but can be done easily for GPRs."),
91 cl::init(true), cl::Hidden);
92
94 PASS_KEY "-fence-call-and-ret",
95 cl::desc("Use a full speculation fence to harden both call and ret edges "
96 "rather than a lighter weight mitigation."),
97 cl::init(false), cl::Hidden);
98
100 PASS_KEY "-ip",
101 cl::desc("Harden interprocedurally by passing our state in and out of "
102 "functions in the high bits of the stack pointer."),
103 cl::init(true), cl::Hidden);
104
105static cl::opt<bool>
107 cl::desc("Sanitize loads from memory. When disable, no "
108 "significant security is provided."),
109 cl::init(true), cl::Hidden);
110
112 PASS_KEY "-indirect",
113 cl::desc("Harden indirect calls and jumps against using speculatively "
114 "stored attacker controlled addresses. This is designed to "
115 "mitigate Spectre v1.2 style attacks."),
116 cl::init(true), cl::Hidden);
117
118namespace {
119
120class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
121public:
122 X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
123
124 StringRef getPassName() const override {
125 return "X86 speculative load hardening";
126 }
127 bool runOnMachineFunction(MachineFunction &MF) override;
128 void getAnalysisUsage(AnalysisUsage &AU) const override;
129
130 /// Pass identification, replacement for typeid.
131 static char ID;
132
133private:
134 /// The information about a block's conditional terminators needed to trace
135 /// our predicate state through the exiting edges.
136 struct BlockCondInfo {
137 MachineBasicBlock *MBB;
138
139 // We mostly have one conditional branch, and in extremely rare cases have
140 // two. Three and more are so rare as to be unimportant for compile time.
141 SmallVector<MachineInstr *, 2> CondBrs;
142
143 MachineInstr *UncondBr;
144 };
145
146 /// Manages the predicate state traced through the program.
147 struct PredState {
148 Register InitialReg;
149 Register PoisonReg;
150
151 const TargetRegisterClass *RC;
152 MachineSSAUpdater SSA;
153
154 PredState(MachineFunction &MF, const TargetRegisterClass *RC)
155 : RC(RC), SSA(MF) {}
156 };
157
158 const X86Subtarget *Subtarget = nullptr;
159 MachineRegisterInfo *MRI = nullptr;
160 const X86InstrInfo *TII = nullptr;
161 const TargetRegisterInfo *TRI = nullptr;
162
163 std::optional<PredState> PS;
164
165 void hardenEdgesWithLFENCE(MachineFunction &MF);
166
167 SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
168
170 tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
171
172 void unfoldCallAndJumpLoads(MachineFunction &MF);
173
175 tracePredStateThroughIndirectBranches(MachineFunction &MF);
176
177 void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
178
179 Register saveEFLAGS(MachineBasicBlock &MBB,
181 const DebugLoc &Loc);
182 void restoreEFLAGS(MachineBasicBlock &MBB,
183 MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc,
184 Register Reg);
185
186 void mergePredStateIntoSP(MachineBasicBlock &MBB,
188 const DebugLoc &Loc, Register PredStateReg);
189 Register extractPredStateFromSP(MachineBasicBlock &MBB,
191 const DebugLoc &Loc);
192
193 void
194 hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
195 MachineOperand &IndexMO,
196 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg);
197 MachineInstr *
198 sinkPostLoadHardenedInst(MachineInstr &MI,
199 SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
200 bool canHardenRegister(Register Reg);
201 Register hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
203 const DebugLoc &Loc);
204 Register hardenPostLoad(MachineInstr &MI);
205 void hardenReturnInstr(MachineInstr &MI);
206 void tracePredStateThroughCall(MachineInstr &MI);
207 void hardenIndirectCallOrJumpInstr(
208 MachineInstr &MI,
209 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg);
210};
211
212} // end anonymous namespace
213
214char X86SpeculativeLoadHardeningPass::ID = 0;
215
216void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
217 AnalysisUsage &AU) const {
219}
220
222 MachineBasicBlock &Succ, int SuccCount,
223 MachineInstr *Br, MachineInstr *&UncondBr,
224 const X86InstrInfo &TII) {
225 assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
226
227 MachineFunction &MF = *MBB.getParent();
228
230
231 // We have to insert the new block immediately after the current one as we
232 // don't know what layout-successor relationships the successor has and we
233 // may not be able to (and generally don't want to) try to fix those up.
234 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
235
236 // Update the branch instruction if necessary.
237 if (Br) {
238 assert(Br->getOperand(0).getMBB() == &Succ &&
239 "Didn't start with the right target!");
240 Br->getOperand(0).setMBB(&NewMBB);
241
242 // If this successor was reached through a branch rather than fallthrough,
243 // we might have *broken* fallthrough and so need to inject a new
244 // unconditional branch.
245 if (!UncondBr) {
246 MachineBasicBlock &OldLayoutSucc =
247 *std::next(MachineFunction::iterator(&NewMBB));
248 assert(MBB.isSuccessor(&OldLayoutSucc) &&
249 "Without an unconditional branch, the old layout successor should "
250 "be an actual successor!");
251 auto BrBuilder =
252 BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
253 // Update the unconditional branch now that we've added one.
254 UncondBr = &*BrBuilder;
255 }
256
257 // Insert unconditional "jump Succ" instruction in the new block if
258 // necessary.
259 if (!NewMBB.isLayoutSuccessor(&Succ)) {
261 TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
262 }
263 } else {
264 assert(!UncondBr &&
265 "Cannot have a branchless successor and an unconditional branch!");
266 assert(NewMBB.isLayoutSuccessor(&Succ) &&
267 "A non-branch successor must have been a layout successor before "
268 "and now is a layout successor of the new block.");
269 }
270
271 // If this is the only edge to the successor, we can just replace it in the
272 // CFG. Otherwise we need to add a new entry in the CFG for the new
273 // successor.
274 if (SuccCount == 1) {
275 MBB.replaceSuccessor(&Succ, &NewMBB);
276 } else {
277 MBB.splitSuccessor(&Succ, &NewMBB);
278 }
279
280 // Hook up the edge from the new basic block to the old successor in the CFG.
281 NewMBB.addSuccessor(&Succ);
282
283 // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
284 for (MachineInstr &MI : Succ) {
285 if (!MI.isPHI())
286 break;
287 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
288 OpIdx += 2) {
289 MachineOperand &OpV = MI.getOperand(OpIdx);
290 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
291 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
292 if (OpMBB.getMBB() != &MBB)
293 continue;
294
295 // If this is the last edge to the succesor, just replace MBB in the PHI
296 if (SuccCount == 1) {
297 OpMBB.setMBB(&NewMBB);
298 break;
299 }
300
301 // Otherwise, append a new pair of operands for the new incoming edge.
302 MI.addOperand(MF, OpV);
303 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
304 break;
305 }
306 }
307
308 // Inherit live-ins from the successor
309 for (auto &LI : Succ.liveins())
310 NewMBB.addLiveIn(LI);
311
312 LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
313 << Succ.getName() << "'.\n");
314 return NewMBB;
315}
316
317/// Removing duplicate PHI operands to leave the PHI in a canonical and
318/// predictable form.
319///
320/// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
321/// isn't what you might expect. We may have multiple entries in PHI nodes for
322/// a single predecessor. This makes CFG-updating extremely complex, so here we
323/// simplify all PHI nodes to a model even simpler than the IR's model: exactly
324/// one entry per predecessor, regardless of how many edges there are.
327 SmallVector<int, 4> DupIndices;
328 for (auto &MBB : MF)
329 for (auto &MI : MBB) {
330 if (!MI.isPHI())
331 break;
332
333 // First we scan the operands of the PHI looking for duplicate entries
334 // a particular predecessor. We retain the operand index of each duplicate
335 // entry found.
336 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
337 OpIdx += 2)
338 if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
339 DupIndices.push_back(OpIdx);
340
341 // Now walk the duplicate indices, removing both the block and value. Note
342 // that these are stored as a vector making this element-wise removal
343 // :w
344 // potentially quadratic.
345 //
346 // FIXME: It is really frustrating that we have to use a quadratic
347 // removal algorithm here. There should be a better way, but the use-def
348 // updates required make that impossible using the public API.
349 //
350 // Note that we have to process these backwards so that we don't
351 // invalidate other indices with each removal.
352 while (!DupIndices.empty()) {
353 int OpIdx = DupIndices.pop_back_val();
354 // Remove both the block and value operand, again in reverse order to
355 // preserve indices.
356 MI.removeOperand(OpIdx + 1);
357 MI.removeOperand(OpIdx);
358 }
359
360 Preds.clear();
361 }
362}
363
364/// Helper to scan a function for loads vulnerable to misspeculation that we
365/// want to harden.
366///
367/// We use this to avoid making changes to functions where there is nothing we
368/// need to do to harden against misspeculation.
370 for (MachineBasicBlock &MBB : MF) {
371 for (MachineInstr &MI : MBB) {
372 // Loads within this basic block after an LFENCE are not at risk of
373 // speculatively executing with invalid predicates from prior control
374 // flow. So break out of this block but continue scanning the function.
375 if (MI.getOpcode() == X86::LFENCE)
376 break;
377
378 // Looking for loads only.
379 if (!MI.mayLoad())
380 continue;
381
382 // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
383 if (MI.getOpcode() == X86::MFENCE)
384 continue;
385
386 // We found a load.
387 return true;
388 }
389 }
390
391 // No loads found.
392 return false;
393}
394
395bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
396 MachineFunction &MF) {
397 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
398 << " **********\n");
399
400 // Only run if this pass is forced enabled or we detect the relevant function
401 // attribute requesting SLH.
403 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
404 return false;
405
406 Subtarget = &MF.getSubtarget<X86Subtarget>();
407 MRI = &MF.getRegInfo();
408 TII = Subtarget->getInstrInfo();
409 TRI = Subtarget->getRegisterInfo();
410
411 // FIXME: Support for 32-bit.
412 PS.emplace(MF, &X86::GR64_NOSPRegClass);
413
414 if (MF.begin() == MF.end())
415 // Nothing to do for a degenerate empty function...
416 return false;
417
418 // We support an alternative hardening technique based on a debug flag.
420 hardenEdgesWithLFENCE(MF);
421 return true;
422 }
423
424 // Create a dummy debug loc to use for all the generated code here.
425 DebugLoc Loc;
426
427 MachineBasicBlock &Entry = *MF.begin();
428 auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
429
430 // Do a quick scan to see if we have any checkable loads.
431 bool HasVulnerableLoad = hasVulnerableLoad(MF);
432
433 // See if we have any conditional branching blocks that we will need to trace
434 // predicate state through.
435 SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
436
437 // If we have no interesting conditions or loads, nothing to do here.
438 if (!HasVulnerableLoad && Infos.empty())
439 return true;
440
441 // The poison value is required to be an all-ones value for many aspects of
442 // this mitigation.
443 const int PoisonVal = -1;
444 PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
445 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
446 .addImm(PoisonVal);
447 ++NumInstsInserted;
448
449 // If we have loads being hardened and we've asked for call and ret edges to
450 // get a full fence-based mitigation, inject that fence.
451 if (HasVulnerableLoad && FenceCallAndRet) {
452 // We need to insert an LFENCE at the start of the function to suspend any
453 // incoming misspeculation from the caller. This helps two-fold: the caller
454 // may not have been protected as this code has been, and this code gets to
455 // not take any specific action to protect across calls.
456 // FIXME: We could skip this for functions which unconditionally return
457 // a constant.
458 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
459 ++NumInstsInserted;
460 ++NumLFENCEsInserted;
461 }
462
463 // If we guarded the entry with an LFENCE and have no conditionals to protect
464 // in blocks, then we're done.
465 if (FenceCallAndRet && Infos.empty())
466 // We may have changed the function's code at this point to insert fences.
467 return true;
468
469 // For every basic block in the function which can b
471 // Set up the predicate state by extracting it from the incoming stack
472 // pointer so we pick up any misspeculation in our caller.
473 PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
474 } else {
475 // Otherwise, just build the predicate state itself by zeroing a register
476 // as we don't need any initial state.
477 PS->InitialReg = MRI->createVirtualRegister(PS->RC);
478 Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
479 auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
480 PredStateSubReg);
481 ++NumInstsInserted;
482 MachineOperand *ZeroEFLAGSDefOp =
483 ZeroI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
484 assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
485 "Must have an implicit def of EFLAGS!");
486 ZeroEFLAGSDefOp->setIsDead(true);
487 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
488 PS->InitialReg)
489 .addImm(0)
490 .addReg(PredStateSubReg)
491 .addImm(X86::sub_32bit);
492 }
493
494 // We're going to need to trace predicate state throughout the function's
495 // CFG. Prepare for this by setting up our initial state of PHIs with unique
496 // predecessor entries and all the initial predicate state.
498
499 // Track the updated values in an SSA updater to rewrite into SSA form at the
500 // end.
501 PS->SSA.Initialize(PS->InitialReg);
502 PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
503
504 // Trace through the CFG.
505 auto CMovs = tracePredStateThroughCFG(MF, Infos);
506
507 // We may also enter basic blocks in this function via exception handling
508 // control flow. Here, if we are hardening interprocedurally, we need to
509 // re-capture the predicate state from the throwing code. In the Itanium ABI,
510 // the throw will always look like a call to __cxa_throw and will have the
511 // predicate state in the stack pointer, so extract fresh predicate state from
512 // the stack pointer and make it available in SSA.
513 // FIXME: Handle non-itanium ABI EH models.
515 for (MachineBasicBlock &MBB : MF) {
516 assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
517 assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
518 assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
519 if (!MBB.isEHPad())
520 continue;
521 PS->SSA.AddAvailableValue(
522 &MBB,
523 extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
524 }
525 }
526
528 // If we are going to harden calls and jumps we need to unfold their memory
529 // operands.
530 unfoldCallAndJumpLoads(MF);
531
532 // Then we trace predicate state through the indirect branches.
533 auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
534 CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
535 }
536
537 // Now that we have the predicate state available at the start of each block
538 // in the CFG, trace it through each block, hardening vulnerable instructions
539 // as we go.
540 tracePredStateThroughBlocksAndHarden(MF);
541
542 // Now rewrite all the uses of the pred state using the SSA updater to insert
543 // PHIs connecting the state between blocks along the CFG edges.
544 for (MachineInstr *CMovI : CMovs)
545 for (MachineOperand &Op : CMovI->operands()) {
546 if (!Op.isReg() || Op.getReg() != PS->InitialReg)
547 continue;
548
549 PS->SSA.RewriteUse(Op);
550 }
551
552 LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
553 dbgs() << "\n"; MF.verify(this));
554 return true;
555}
556
557/// Implements the naive hardening approach of putting an LFENCE after every
558/// potentially mis-predicted control flow construct.
559///
560/// We include this as an alternative mostly for the purpose of comparison. The
561/// performance impact of this is expected to be extremely severe and not
562/// practical for any real-world users.
563void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
564 MachineFunction &MF) {
565 // First, we scan the function looking for blocks that are reached along edges
566 // that we might want to harden.
567 SmallSetVector<MachineBasicBlock *, 8> Blocks;
568 for (MachineBasicBlock &MBB : MF) {
569 // If there are no or only one successor, nothing to do here.
570 if (MBB.succ_size() <= 1)
571 continue;
572
573 // Skip blocks unless their terminators start with a branch. Other
574 // terminators don't seem interesting for guarding against misspeculation.
575 auto TermIt = MBB.getFirstTerminator();
576 if (TermIt == MBB.end() || !TermIt->isBranch())
577 continue;
578
579 // Add all the non-EH-pad succossors to the blocks we want to harden. We
580 // skip EH pads because there isn't really a condition of interest on
581 // entering.
582 for (MachineBasicBlock *SuccMBB : MBB.successors())
583 if (!SuccMBB->isEHPad())
584 Blocks.insert(SuccMBB);
585 }
586
587 for (MachineBasicBlock *MBB : Blocks) {
588 auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
589 BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
590 ++NumInstsInserted;
591 ++NumLFENCEsInserted;
592 }
593}
594
596X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
598
599 // Walk the function and build up a summary for each block's conditions that
600 // we need to trace through.
601 for (MachineBasicBlock &MBB : MF) {
602 // If there are no or only one successor, nothing to do here.
603 if (MBB.succ_size() <= 1)
604 continue;
605
606 // We want to reliably handle any conditional branch terminators in the
607 // MBB, so we manually analyze the branch. We can handle all of the
608 // permutations here, including ones that analyze branch cannot.
609 //
610 // The approach is to walk backwards across the terminators, resetting at
611 // any unconditional non-indirect branch, and track all conditional edges
612 // to basic blocks as well as the fallthrough or unconditional successor
613 // edge. For each conditional edge, we track the target and the opposite
614 // condition code in order to inject a "no-op" cmov into that successor
615 // that will harden the predicate. For the fallthrough/unconditional
616 // edge, we inject a separate cmov for each conditional branch with
617 // matching condition codes. This effectively implements an "and" of the
618 // condition flags, even if there isn't a single condition flag that would
619 // directly implement that. We don't bother trying to optimize either of
620 // these cases because if such an optimization is possible, LLVM should
621 // have optimized the conditional *branches* in that way already to reduce
622 // instruction count. This late, we simply assume the minimal number of
623 // branch instructions is being emitted and use that to guide our cmov
624 // insertion.
625
626 BlockCondInfo Info = {&MBB, {}, nullptr};
627
628 // Now walk backwards through the terminators and build up successors they
629 // reach and the conditions.
630 for (MachineInstr &MI : llvm::reverse(MBB)) {
631 // Once we've handled all the terminators, we're done.
632 if (!MI.isTerminator())
633 break;
634
635 // If we see a non-branch terminator, we can't handle anything so bail.
636 if (!MI.isBranch()) {
637 Info.CondBrs.clear();
638 break;
639 }
640
641 // If we see an unconditional branch, reset our state, clear any
642 // fallthrough, and set this is the "else" successor.
643 if (MI.getOpcode() == X86::JMP_1) {
644 Info.CondBrs.clear();
645 Info.UncondBr = &MI;
646 continue;
647 }
648
649 // If we get an invalid condition, we have an indirect branch or some
650 // other unanalyzable "fallthrough" case. We model this as a nullptr for
651 // the destination so we can still guard any conditional successors.
652 // Consider code sequences like:
653 // ```
654 // jCC L1
655 // jmpq *%rax
656 // ```
657 // We still want to harden the edge to `L1`.
659 Info.CondBrs.clear();
660 Info.UncondBr = &MI;
661 continue;
662 }
663
664 // We have a vanilla conditional branch, add it to our list.
665 Info.CondBrs.push_back(&MI);
666 }
667 if (Info.CondBrs.empty()) {
668 ++NumBranchesUntraced;
669 LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
670 MBB.dump());
671 continue;
672 }
673
674 Infos.push_back(Info);
675 }
676
677 return Infos;
678}
679
680/// Trace the predicate state through the CFG, instrumenting each conditional
681/// branch such that misspeculation through an edge will poison the predicate
682/// state.
683///
684/// Returns the list of inserted CMov instructions so that they can have their
685/// uses of the predicate state rewritten into proper SSA form once it is
686/// complete.
688X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
689 MachineFunction &MF, ArrayRef<BlockCondInfo> Infos) {
690 // Collect the inserted cmov instructions so we can rewrite their uses of the
691 // predicate state into SSA form.
693
694 // Now walk all of the basic blocks looking for ones that end in conditional
695 // jumps where we need to update this register along each edge.
696 for (const BlockCondInfo &Info : Infos) {
697 MachineBasicBlock &MBB = *Info.MBB;
698 const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
699 MachineInstr *UncondBr = Info.UncondBr;
700
701 LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
702 << "\n");
703 ++NumCondBranchesTraced;
704
705 // Compute the non-conditional successor as either the target of any
706 // unconditional branch or the layout successor.
707 MachineBasicBlock *UncondSucc =
708 UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
709 ? UncondBr->getOperand(0).getMBB()
710 : nullptr)
711 : &*std::next(MachineFunction::iterator(&MBB));
712
713 // Count how many edges there are to any given successor.
714 SmallDenseMap<MachineBasicBlock *, int> SuccCounts;
715 if (UncondSucc)
716 ++SuccCounts[UncondSucc];
717 for (auto *CondBr : CondBrs)
718 ++SuccCounts[CondBr->getOperand(0).getMBB()];
719
720 // A lambda to insert cmov instructions into a block checking all of the
721 // condition codes in a sequence.
722 auto BuildCheckingBlockForSuccAndConds =
723 [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
724 MachineInstr *Br, MachineInstr *&UncondBr,
726 // First, we split the edge to insert the checking block into a safe
727 // location.
728 auto &CheckingMBB =
729 (SuccCount == 1 && Succ.pred_size() == 1)
730 ? Succ
731 : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
732
733 bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
734 if (!LiveEFLAGS)
735 CheckingMBB.addLiveIn(X86::EFLAGS);
736
737 // Now insert the cmovs to implement the checks.
738 auto InsertPt = CheckingMBB.begin();
739 assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
740 "Should never have a PHI in the initial checking block as it "
741 "always has a single predecessor!");
742
743 // We will wire each cmov to each other, but need to start with the
744 // incoming pred state.
745 Register CurStateReg = PS->InitialReg;
746
747 for (X86::CondCode Cond : Conds) {
748 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
749 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
750
751 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
752 // Note that we intentionally use an empty debug location so that
753 // this picks up the preceding location.
754 auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
755 TII->get(CMovOp), UpdatedStateReg)
756 .addReg(CurStateReg)
757 .addReg(PS->PoisonReg)
758 .addImm(Cond);
759 // If this is the last cmov and the EFLAGS weren't originally
760 // live-in, mark them as killed.
761 if (!LiveEFLAGS && Cond == Conds.back())
762 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
763 ->setIsKill(true);
764
765 ++NumInstsInserted;
766 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
767 dbgs() << "\n");
768
769 // The first one of the cmovs will be using the top level
770 // `PredStateReg` and need to get rewritten into SSA form.
771 if (CurStateReg == PS->InitialReg)
772 CMovs.push_back(&*CMovI);
773
774 // The next cmov should start from this one's def.
775 CurStateReg = UpdatedStateReg;
776 }
777
778 // And put the last one into the available values for SSA form of our
779 // predicate state.
780 PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
781 };
782
783 std::vector<X86::CondCode> UncondCodeSeq;
784 for (auto *CondBr : CondBrs) {
785 MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
786 int &SuccCount = SuccCounts[&Succ];
787
790 UncondCodeSeq.push_back(Cond);
791
792 BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
793 {InvCond});
794
795 // Decrement the successor count now that we've split one of the edges.
796 // We need to keep the count of edges to the successor accurate in order
797 // to know above when to *replace* the successor in the CFG vs. just
798 // adding the new successor.
799 --SuccCount;
800 }
801
802 // Since we may have split edges and changed the number of successors,
803 // normalize the probabilities. This avoids doing it each time we split an
804 // edge.
806
807 // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
808 // need to intersect the other condition codes. We can do this by just
809 // doing a cmov for each one.
810 if (!UncondSucc)
811 // If we have no fallthrough to protect (perhaps it is an indirect jump?)
812 // just skip this and continue.
813 continue;
814
815 assert(SuccCounts[UncondSucc] == 1 &&
816 "We should never have more than one edge to the unconditional "
817 "successor at this point because every other edge must have been "
818 "split above!");
819
820 // Sort and unique the codes to minimize them.
821 llvm::sort(UncondCodeSeq);
822 UncondCodeSeq.erase(llvm::unique(UncondCodeSeq), UncondCodeSeq.end());
823
824 // Build a checking version of the successor.
825 BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
826 UncondBr, UncondBr, UncondCodeSeq);
827 }
828
829 return CMovs;
830}
831
832/// Compute the register class for the unfolded load.
833///
834/// FIXME: This should probably live in X86InstrInfo, potentially by adding
835/// a way to unfold into a newly created vreg rather than requiring a register
836/// input.
837static const TargetRegisterClass *
839 unsigned Index;
840 unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
841 Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
842 const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
843 return TII.getRegClass(MCID, Index);
844}
845
846void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
847 MachineFunction &MF) {
848 for (MachineBasicBlock &MBB : MF)
849 // We use make_early_inc_range here so we can remove instructions if needed
850 // without disturbing the iteration.
851 for (MachineInstr &MI : llvm::make_early_inc_range(MBB.instrs())) {
852 // Must either be a call or a branch.
853 if (!MI.isCall() && !MI.isBranch())
854 continue;
855 // We only care about loading variants of these instructions.
856 if (!MI.mayLoad())
857 continue;
858
859 switch (MI.getOpcode()) {
860 default: {
862 dbgs() << "ERROR: Found an unexpected loading branch or call "
863 "instruction:\n";
864 MI.dump(); dbgs() << "\n");
865 report_fatal_error("Unexpected loading branch or call!");
866 }
867
868 case X86::FARCALL16m:
869 case X86::FARCALL32m:
870 case X86::FARCALL64m:
871 case X86::FARJMP16m:
872 case X86::FARJMP32m:
873 case X86::FARJMP64m:
874 // We cannot mitigate far jumps or calls, but we also don't expect them
875 // to be vulnerable to Spectre v1.2 style attacks.
876 continue;
877
878 case X86::CALL16m:
879 case X86::CALL16m_NT:
880 case X86::CALL32m:
881 case X86::CALL32m_NT:
882 case X86::CALL64m:
883 case X86::CALL64m_NT:
884 case X86::JMP16m:
885 case X86::JMP16m_NT:
886 case X86::JMP32m:
887 case X86::JMP32m_NT:
888 case X86::JMP64m:
889 case X86::JMP64m_NT:
890 case X86::TAILJMPm64:
891 case X86::TAILJMPm64_REX:
892 case X86::TAILJMPm:
893 case X86::TCRETURNmi64:
894 case X86::TCRETURN_WINmi64:
895 case X86::TCRETURNmi: {
896 // Use the generic unfold logic now that we know we're dealing with
897 // expected instructions.
898 // FIXME: We don't have test coverage for all of these!
899 auto *UnfoldedRC = getRegClassForUnfoldedLoad(*TII, MI.getOpcode());
900 if (!UnfoldedRC) {
902 << "ERROR: Unable to unfold load from instruction:\n";
903 MI.dump(); dbgs() << "\n");
904 report_fatal_error("Unable to unfold load!");
905 }
906 Register Reg = MRI->createVirtualRegister(UnfoldedRC);
907 SmallVector<MachineInstr *, 2> NewMIs;
908 // If we were able to compute an unfolded reg class, any failure here
909 // is just a programming error so just assert.
910 bool Unfolded =
911 TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
912 /*UnfoldStore*/ false, NewMIs);
913 (void)Unfolded;
914 assert(Unfolded &&
915 "Computed unfolded register class but failed to unfold");
916 // Now stitch the new instructions into place and erase the old one.
917 for (auto *NewMI : NewMIs)
918 MBB.insert(MI.getIterator(), NewMI);
919
920 // Update the call info.
921 if (MI.isCandidateForAdditionalCallInfo())
922 MF.eraseAdditionalCallInfo(&MI);
923
924 MI.eraseFromParent();
925 LLVM_DEBUG({
926 dbgs() << "Unfolded load successfully into:\n";
927 for (auto *NewMI : NewMIs) {
928 NewMI->dump();
929 dbgs() << "\n";
930 }
931 });
932 continue;
933 }
934 }
935 llvm_unreachable("Escaped switch with default!");
936 }
937}
938
939/// Trace the predicate state through indirect branches, instrumenting them to
940/// poison the state if a target is reached that does not match the expected
941/// target.
942///
943/// This is designed to mitigate Spectre variant 1 attacks where an indirect
944/// branch is trained to predict a particular target and then mispredicts that
945/// target in a way that can leak data. Despite using an indirect branch, this
946/// is really a variant 1 style attack: it does not steer execution to an
947/// arbitrary or attacker controlled address, and it does not require any
948/// special code executing next to the victim. This attack can also be mitigated
949/// through retpolines, but those require either replacing indirect branches
950/// with conditional direct branches or lowering them through a device that
951/// blocks speculation. This mitigation can replace these retpoline-style
952/// mitigations for jump tables and other indirect branches within a function
953/// when variant 2 isn't a risk while allowing limited speculation. Indirect
954/// calls, however, cannot be mitigated through this technique without changing
955/// the ABI in a fundamental way.
957X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
958 MachineFunction &MF) {
959 // We use the SSAUpdater to insert PHI nodes for the target addresses of
960 // indirect branches. We don't actually need the full power of the SSA updater
961 // in this particular case as we always have immediately available values, but
962 // this avoids us having to re-implement the PHI construction logic.
963 MachineSSAUpdater TargetAddrSSA(MF);
964 TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
965
966 // Track which blocks were terminated with an indirect branch.
967 SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
968
969 // We need to know what blocks end up reached via indirect branches. We
970 // expect this to be a subset of those whose address is taken and so track it
971 // directly via the CFG.
972 SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
973
974 // Walk all the blocks which end in an indirect branch and make the
975 // target address available.
976 for (MachineBasicBlock &MBB : MF) {
977 // Find the last terminator.
978 auto MII = MBB.instr_rbegin();
979 while (MII != MBB.instr_rend() && MII->isDebugInstr())
980 ++MII;
981 if (MII == MBB.instr_rend())
982 continue;
983 MachineInstr &TI = *MII;
984 if (!TI.isTerminator() || !TI.isBranch())
985 // No terminator or non-branch terminator.
986 continue;
987
988 Register TargetReg;
989
990 switch (TI.getOpcode()) {
991 default:
992 // Direct branch or conditional branch (leading to fallthrough).
993 continue;
994
995 case X86::FARJMP16m:
996 case X86::FARJMP32m:
997 case X86::FARJMP64m:
998 // We cannot mitigate far jumps or calls, but we also don't expect them
999 // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1000 continue;
1001
1002 case X86::JMP16m:
1003 case X86::JMP16m_NT:
1004 case X86::JMP32m:
1005 case X86::JMP32m_NT:
1006 case X86::JMP64m:
1007 case X86::JMP64m_NT:
1008 // Mostly as documentation.
1009 report_fatal_error("Memory operand jumps should have been unfolded!");
1010
1011 case X86::JMP16r:
1013 "Support for 16-bit indirect branches is not implemented.");
1014 case X86::JMP32r:
1016 "Support for 32-bit indirect branches is not implemented.");
1017
1018 case X86::JMP64r:
1019 TargetReg = TI.getOperand(0).getReg();
1020 }
1021
1022 // We have definitely found an indirect branch. Verify that there are no
1023 // preceding conditional branches as we don't yet support that.
1024 if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1025 return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1026 })) {
1027 LLVM_DEBUG({
1028 dbgs() << "ERROR: Found other terminators in a block with an indirect "
1029 "branch! This is not yet supported! Terminator sequence:\n";
1030 for (MachineInstr &MI : MBB.terminators()) {
1031 MI.dump();
1032 dbgs() << '\n';
1033 }
1034 });
1035 report_fatal_error("Unimplemented terminator sequence!");
1036 }
1037
1038 // Make the target register an available value for this block.
1039 TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1040 IndirectTerminatedMBBs.insert(&MBB);
1041
1042 // Add all the successors to our target candidates.
1043 IndirectTargetMBBs.insert_range(MBB.successors());
1044 }
1045
1046 // Keep track of the cmov instructions we insert so we can return them.
1048
1049 // If we didn't find any indirect branches with targets, nothing to do here.
1050 if (IndirectTargetMBBs.empty())
1051 return CMovs;
1052
1053 // We found indirect branches and targets that need to be instrumented to
1054 // harden loads within them. Walk the blocks of the function (to get a stable
1055 // ordering) and instrument each target of an indirect branch.
1056 for (MachineBasicBlock &MBB : MF) {
1057 // Skip the blocks that aren't candidate targets.
1058 if (!IndirectTargetMBBs.count(&MBB))
1059 continue;
1060
1061 // We don't expect EH pads to ever be reached via an indirect branch. If
1062 // this is desired for some reason, we could simply skip them here rather
1063 // than asserting.
1064 assert(!MBB.isEHPad() &&
1065 "Unexpected EH pad as target of an indirect branch!");
1066
1067 // We should never end up threading EFLAGS into a block to harden
1068 // conditional jumps as there would be an additional successor via the
1069 // indirect branch. As a consequence, all such edges would be split before
1070 // reaching here, and the inserted block will handle the EFLAGS-based
1071 // hardening.
1072 assert(!MBB.isLiveIn(X86::EFLAGS) &&
1073 "Cannot check within a block that already has live-in EFLAGS!");
1074
1075 // We can't handle having non-indirect edges into this block unless this is
1076 // the only successor and we can synthesize the necessary target address.
1077 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1078 // If we've already handled this by extracting the target directly,
1079 // nothing to do.
1080 if (IndirectTerminatedMBBs.count(Pred))
1081 continue;
1082
1083 // Otherwise, we have to be the only successor. We generally expect this
1084 // to be true as conditional branches should have had a critical edge
1085 // split already. We don't however need to worry about EH pad successors
1086 // as they'll happily ignore the target and their hardening strategy is
1087 // resilient to all ways in which they could be reached speculatively.
1088 if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1089 return Succ->isEHPad() || Succ == &MBB;
1090 })) {
1091 LLVM_DEBUG({
1092 dbgs() << "ERROR: Found conditional entry to target of indirect "
1093 "branch!\n";
1094 Pred->dump();
1095 MBB.dump();
1096 });
1097 report_fatal_error("Cannot harden a conditional entry to a target of "
1098 "an indirect branch!");
1099 }
1100
1101 // Now we need to compute the address of this block and install it as a
1102 // synthetic target in the predecessor. We do this at the bottom of the
1103 // predecessor.
1104 auto InsertPt = Pred->getFirstTerminator();
1105 Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1106 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1107 !Subtarget->isPositionIndependent()) {
1108 // Directly materialize it into an immediate.
1109 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1110 TII->get(X86::MOV64ri32), TargetReg)
1111 .addMBB(&MBB);
1112 ++NumInstsInserted;
1113 (void)AddrI;
1114 LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1115 dbgs() << "\n");
1116 } else {
1117 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1118 TargetReg)
1119 .addReg(/*Base*/ X86::RIP)
1120 .addImm(/*Scale*/ 1)
1121 .addReg(/*Index*/ 0)
1122 .addMBB(&MBB)
1123 .addReg(/*Segment*/ 0);
1124 ++NumInstsInserted;
1125 (void)AddrI;
1126 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1127 dbgs() << "\n");
1128 }
1129 // And make this available.
1130 TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1131 }
1132
1133 // Materialize the needed SSA value of the target. Note that we need the
1134 // middle of the block as this block might at the bottom have an indirect
1135 // branch back to itself. We can do this here because at this point, every
1136 // predecessor of this block has an available value. This is basically just
1137 // automating the construction of a PHI node for this target.
1138 Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1139
1140 // Insert a comparison of the incoming target register with this block's
1141 // address. This also requires us to mark the block as having its address
1142 // taken explicitly.
1144 auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1145 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1146 !Subtarget->isPositionIndependent()) {
1147 // Check directly against a relocated immediate when we can.
1148 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1149 .addReg(TargetReg, RegState::Kill)
1150 .addMBB(&MBB);
1151 ++NumInstsInserted;
1152 (void)CheckI;
1153 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1154 } else {
1155 // Otherwise compute the address into a register first.
1156 Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1157 auto AddrI =
1158 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1159 .addReg(/*Base*/ X86::RIP)
1160 .addImm(/*Scale*/ 1)
1161 .addReg(/*Index*/ 0)
1162 .addMBB(&MBB)
1163 .addReg(/*Segment*/ 0);
1164 ++NumInstsInserted;
1165 (void)AddrI;
1166 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1167 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1168 .addReg(TargetReg, RegState::Kill)
1169 .addReg(AddrReg, RegState::Kill);
1170 ++NumInstsInserted;
1171 (void)CheckI;
1172 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1173 }
1174
1175 // Now cmov over the predicate if the comparison wasn't equal.
1176 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1177 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1178 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1179 auto CMovI =
1180 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1181 .addReg(PS->InitialReg)
1182 .addReg(PS->PoisonReg)
1184 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
1185 ->setIsKill(true);
1186 ++NumInstsInserted;
1187 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1188 CMovs.push_back(&*CMovI);
1189
1190 // And put the new value into the available values for SSA form of our
1191 // predicate state.
1192 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1193 }
1194
1195 // Return all the newly inserted cmov instructions of the predicate state.
1196 return CMovs;
1197}
1198
1199// Returns true if the MI has EFLAGS as a register def operand and it's live,
1200// otherwise it returns false
1201static bool isEFLAGSDefLive(const MachineInstr &MI) {
1202 if (const MachineOperand *DefOp =
1203 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1204 return !DefOp->isDead();
1205 }
1206 return false;
1207}
1208
1210 const TargetRegisterInfo &TRI) {
1211 // Check if EFLAGS are alive by seeing if there is a def of them or they
1212 // live-in, and then seeing if that def is in turn used.
1213 for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) {
1214 if (MachineOperand *DefOp =
1215 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1216 // If the def is dead, then EFLAGS is not live.
1217 if (DefOp->isDead())
1218 return false;
1219
1220 // Otherwise we've def'ed it, and it is live.
1221 return true;
1222 }
1223 // While at this instruction, also check if we use and kill EFLAGS
1224 // which means it isn't live.
1225 if (MI.killsRegister(X86::EFLAGS, &TRI))
1226 return false;
1227 }
1228
1229 // If we didn't find anything conclusive (neither definitely alive or
1230 // definitely dead) return whether it lives into the block.
1231 return MBB.isLiveIn(X86::EFLAGS);
1232}
1233
1234/// Trace the predicate state through each of the blocks in the function,
1235/// hardening everything necessary along the way.
1236///
1237/// We call this routine once the initial predicate state has been established
1238/// for each basic block in the function in the SSA updater. This routine traces
1239/// it through the instructions within each basic block, and for non-returning
1240/// blocks informs the SSA updater about the final state that lives out of the
1241/// block. Along the way, it hardens any vulnerable instruction using the
1242/// currently valid predicate state. We have to do these two things together
1243/// because the SSA updater only works across blocks. Within a block, we track
1244/// the current predicate state directly and update it as it changes.
1245///
1246/// This operates in two passes over each block. First, we analyze the loads in
1247/// the block to determine which strategy will be used to harden them: hardening
1248/// the address or hardening the loaded value when loaded into a register
1249/// amenable to hardening. We have to process these first because the two
1250/// strategies may interact -- later hardening may change what strategy we wish
1251/// to use. We also will analyze data dependencies between loads and avoid
1252/// hardening those loads that are data dependent on a load with a hardened
1253/// address. We also skip hardening loads already behind an LFENCE as that is
1254/// sufficient to harden them against misspeculation.
1255///
1256/// Second, we actively trace the predicate state through the block, applying
1257/// the hardening steps we determined necessary in the first pass as we go.
1258///
1259/// These two passes are applied to each basic block. We operate one block at a
1260/// time to simplify reasoning about reachability and sequencing.
1261void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
1262 MachineFunction &MF) {
1263 SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1264 SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1265
1266 SmallSet<Register, 16> HardenedAddrRegs;
1267
1268 SmallDenseMap<Register, Register, 32> AddrRegToHardenedReg;
1269
1270 // Track the set of load-dependent registers through the basic block. Because
1271 // the values of these registers have an existing data dependency on a loaded
1272 // value which we would have checked, we can omit any checks on them.
1273 SparseBitVector<> LoadDepRegs;
1274
1275 for (MachineBasicBlock &MBB : MF) {
1276 // The first pass over the block: collect all the loads which can have their
1277 // loaded value hardened and all the loads that instead need their address
1278 // hardened. During this walk we propagate load dependence for address
1279 // hardened loads and also look for LFENCE to stop hardening wherever
1280 // possible. When deciding whether or not to harden the loaded value or not,
1281 // we check to see if any registers used in the address will have been
1282 // hardened at this point and if so, harden any remaining address registers
1283 // as that often successfully re-uses hardened addresses and minimizes
1284 // instructions.
1285 //
1286 // FIXME: We should consider an aggressive mode where we continue to keep as
1287 // many loads value hardened even when some address register hardening would
1288 // be free (due to reuse).
1289 //
1290 // Note that we only need this pass if we are actually hardening loads.
1291 if (HardenLoads)
1292 for (MachineInstr &MI : MBB) {
1293 // We naively assume that all def'ed registers of an instruction have
1294 // a data dependency on all of their operands.
1295 // FIXME: Do a more careful analysis of x86 to build a conservative
1296 // model here.
1297 if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1298 return Op.isReg() && LoadDepRegs.test(Op.getReg().id());
1299 }))
1300 for (MachineOperand &Def : MI.defs())
1301 if (Def.isReg())
1302 LoadDepRegs.set(Def.getReg().id());
1303
1304 // Both Intel and AMD are guiding that they will change the semantics of
1305 // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1306 // no more need to guard things in this block.
1307 if (MI.getOpcode() == X86::LFENCE)
1308 break;
1309
1310 // If this instruction cannot load, nothing to do.
1311 if (!MI.mayLoad())
1312 continue;
1313
1314 // Some instructions which "load" are trivially safe or unimportant.
1315 if (MI.getOpcode() == X86::MFENCE)
1316 continue;
1317
1318 // Extract the memory operand information about this instruction.
1319 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1320 if (MemRefBeginIdx < 0) {
1322 << "WARNING: unable to harden loading instruction: ";
1323 MI.dump());
1324 continue;
1325 }
1326
1327 MachineOperand &BaseMO =
1328 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1329 MachineOperand &IndexMO =
1330 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1331
1332 // If we have at least one (non-frame-index, non-RIP) register operand,
1333 // and neither operand is load-dependent, we need to check the load.
1334 Register BaseReg, IndexReg;
1335 if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1336 BaseMO.getReg().isValid())
1337 BaseReg = BaseMO.getReg();
1338 if (IndexMO.getReg().isValid())
1339 IndexReg = IndexMO.getReg();
1340
1341 if (!BaseReg && !IndexReg)
1342 // No register operands!
1343 continue;
1344
1345 // If any register operand is dependent, this load is dependent and we
1346 // needn't check it.
1347 // FIXME: Is this true in the case where we are hardening loads after
1348 // they complete? Unclear, need to investigate.
1349 if ((BaseReg && LoadDepRegs.test(BaseReg.id())) ||
1350 (IndexReg && LoadDepRegs.test(IndexReg.id())))
1351 continue;
1352
1353 // If post-load hardening is enabled, this load is compatible with
1354 // post-load hardening, and we aren't already going to harden one of the
1355 // address registers, queue it up to be hardened post-load. Notably,
1356 // even once hardened this won't introduce a useful dependency that
1357 // could prune out subsequent loads.
1359 !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1360 MI.getOperand(0).isReg() &&
1361 canHardenRegister(MI.getOperand(0).getReg()) &&
1362 !HardenedAddrRegs.count(BaseReg) &&
1363 !HardenedAddrRegs.count(IndexReg)) {
1364 HardenPostLoad.insert(&MI);
1365 HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1366 continue;
1367 }
1368
1369 // Record this instruction for address hardening and record its register
1370 // operands as being address-hardened.
1371 HardenLoadAddr.insert(&MI);
1372 if (BaseReg)
1373 HardenedAddrRegs.insert(BaseReg);
1374 if (IndexReg)
1375 HardenedAddrRegs.insert(IndexReg);
1376
1377 for (MachineOperand &Def : MI.defs())
1378 if (Def.isReg())
1379 LoadDepRegs.set(Def.getReg().id());
1380 }
1381
1382 // Now re-walk the instructions in the basic block, and apply whichever
1383 // hardening strategy we have elected. Note that we do this in a second
1384 // pass specifically so that we have the complete set of instructions for
1385 // which we will do post-load hardening and can defer it in certain
1386 // circumstances.
1387 for (MachineInstr &MI : MBB) {
1388 if (HardenLoads) {
1389 // We cannot both require hardening the def of a load and its address.
1390 assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1391 "Requested to harden both the address and def of a load!");
1392
1393 // Check if this is a load whose address needs to be hardened.
1394 if (HardenLoadAddr.erase(&MI)) {
1395 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1396 assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1397
1398 MachineOperand &BaseMO =
1399 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1400 MachineOperand &IndexMO =
1401 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1402 hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1403 continue;
1404 }
1405
1406 // Test if this instruction is one of our post load instructions (and
1407 // remove it from the set if so).
1408 if (HardenPostLoad.erase(&MI)) {
1409 assert(!MI.isCall() && "Must not try to post-load harden a call!");
1410
1411 // If this is a data-invariant load and there is no EFLAGS
1412 // interference, we want to try and sink any hardening as far as
1413 // possible.
1415 // Sink the instruction we'll need to harden as far as we can down
1416 // the graph.
1417 MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1418
1419 // If we managed to sink this instruction, update everything so we
1420 // harden that instruction when we reach it in the instruction
1421 // sequence.
1422 if (SunkMI != &MI) {
1423 // If in sinking there was no instruction needing to be hardened,
1424 // we're done.
1425 if (!SunkMI)
1426 continue;
1427
1428 // Otherwise, add this to the set of defs we harden.
1429 HardenPostLoad.insert(SunkMI);
1430 continue;
1431 }
1432 }
1433
1434 Register HardenedReg = hardenPostLoad(MI);
1435
1436 // Mark the resulting hardened register as such so we don't re-harden.
1437 AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1438
1439 continue;
1440 }
1441
1442 // Check for an indirect call or branch that may need its input hardened
1443 // even if we couldn't find the specific load used, or were able to
1444 // avoid hardening it for some reason. Note that here we cannot break
1445 // out afterward as we may still need to handle any call aspect of this
1446 // instruction.
1447 if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1448 hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1449 }
1450
1451 // After we finish hardening loads we handle interprocedural hardening if
1452 // enabled and relevant for this instruction.
1454 continue;
1455 if (!MI.isCall() && !MI.isReturn())
1456 continue;
1457
1458 // If this is a direct return (IE, not a tail call) just directly harden
1459 // it.
1460 if (MI.isReturn() && !MI.isCall()) {
1461 hardenReturnInstr(MI);
1462 continue;
1463 }
1464
1465 // Otherwise we have a call. We need to handle transferring the predicate
1466 // state into a call and recovering it after the call returns (unless this
1467 // is a tail call).
1468 assert(MI.isCall() && "Should only reach here for calls!");
1469 tracePredStateThroughCall(MI);
1470 }
1471
1472 HardenPostLoad.clear();
1473 HardenLoadAddr.clear();
1474 HardenedAddrRegs.clear();
1475 AddrRegToHardenedReg.clear();
1476
1477 // Currently, we only track data-dependent loads within a basic block.
1478 // FIXME: We should see if this is necessary or if we could be more
1479 // aggressive here without opening up attack avenues.
1480 LoadDepRegs.clear();
1481 }
1482}
1483
1484/// Save EFLAGS into the returned GPR. This can in turn be restored with
1485/// `restoreEFLAGS`.
1486///
1487/// Note that LLVM can only lower very simple patterns of saved and restored
1488/// EFLAGS registers. The restore should always be within the same basic block
1489/// as the save so that no PHI nodes are inserted.
1490Register X86SpeculativeLoadHardeningPass::saveEFLAGS(
1491 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1492 const DebugLoc &Loc) {
1493 // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1494 // what instruction selection does.
1495 Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1496 // We directly copy the FLAGS register and rely on later lowering to clean
1497 // this up into the appropriate setCC instructions.
1498 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1499 ++NumInstsInserted;
1500 return Reg;
1501}
1502
1503/// Restore EFLAGS from the provided GPR. This should be produced by
1504/// `saveEFLAGS`.
1505///
1506/// This must be done within the same basic block as the save in order to
1507/// reliably lower.
1508void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1509 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1510 const DebugLoc &Loc, Register Reg) {
1511 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1512 ++NumInstsInserted;
1513}
1514
1515/// Takes the current predicate state (in a register) and merges it into the
1516/// stack pointer. The state is essentially a single bit, but we merge this in
1517/// a way that won't form non-canonical pointers and also will be preserved
1518/// across normal stack adjustments.
1519void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1520 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1521 const DebugLoc &Loc, Register PredStateReg) {
1522 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1523 // FIXME: This hard codes a shift distance based on the number of bits needed
1524 // to stay canonical on 64-bit. We should compute this somehow and support
1525 // 32-bit as part of that.
1526 auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1527 .addReg(PredStateReg, RegState::Kill)
1528 .addImm(47);
1529 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1530 ++NumInstsInserted;
1531 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1532 .addReg(X86::RSP)
1533 .addReg(TmpReg, RegState::Kill);
1534 OrI->addRegisterDead(X86::EFLAGS, TRI);
1535 ++NumInstsInserted;
1536}
1537
1538/// Extracts the predicate state stored in the high bits of the stack pointer.
1539Register X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1540 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1541 const DebugLoc &Loc) {
1542 Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1543 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1544
1545 // We know that the stack pointer will have any preserved predicate state in
1546 // its high bit. We just want to smear this across the other bits. Turns out,
1547 // this is exactly what an arithmetic right shift does.
1548 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1549 .addReg(X86::RSP);
1550 auto ShiftI =
1551 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1552 .addReg(TmpReg, RegState::Kill)
1553 .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1554 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1555 ++NumInstsInserted;
1556
1557 return PredStateReg;
1558}
1559
1560void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1561 MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1562 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg) {
1563 MachineBasicBlock &MBB = *MI.getParent();
1564 const DebugLoc &Loc = MI.getDebugLoc();
1565
1566 // Check if EFLAGS are alive by seeing if there is a def of them or they
1567 // live-in, and then seeing if that def is in turn used.
1568 bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1569
1571
1572 if (BaseMO.isFI()) {
1573 // A frame index is never a dynamically controllable load, so only
1574 // harden it if we're covering fixed address loads as well.
1575 LLVM_DEBUG(
1576 dbgs() << " Skipping hardening base of explicit stack frame load: ";
1577 MI.dump(); dbgs() << "\n");
1578 } else if (BaseMO.getReg() == X86::RSP) {
1579 // Some idempotent atomic operations are lowered directly to a locked
1580 // OR with 0 to the top of stack(or slightly offset from top) which uses an
1581 // explicit RSP register as the base.
1582 assert(IndexMO.getReg() == X86::NoRegister &&
1583 "Explicit RSP access with dynamic index!");
1584 LLVM_DEBUG(
1585 dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1586 } else if (BaseMO.getReg() == X86::RIP ||
1587 BaseMO.getReg() == X86::NoRegister) {
1588 // For both RIP-relative addressed loads or absolute loads, we cannot
1589 // meaningfully harden them because the address being loaded has no
1590 // dynamic component.
1591 //
1592 // FIXME: When using a segment base (like TLS does) we end up with the
1593 // dynamic address being the base plus -1 because we can't mutate the
1594 // segment register here. This allows the signed 32-bit offset to point at
1595 // valid segment-relative addresses and load them successfully.
1596 LLVM_DEBUG(
1597 dbgs() << " Cannot harden base of "
1598 << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1599 << " address in a load!");
1600 } else {
1601 assert(BaseMO.isReg() &&
1602 "Only allowed to have a frame index or register base.");
1603 HardenOpRegs.push_back(&BaseMO);
1604 }
1605
1606 if (IndexMO.getReg() != X86::NoRegister &&
1607 (HardenOpRegs.empty() ||
1608 HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1609 HardenOpRegs.push_back(&IndexMO);
1610
1611 assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1612 "Should have exactly one or two registers to harden!");
1613 assert((HardenOpRegs.size() == 1 ||
1614 HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1615 "Should not have two of the same registers!");
1616
1617 // Remove any registers that have alreaded been checked.
1618 llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1619 // See if this operand's register has already been checked.
1620 auto It = AddrRegToHardenedReg.find(Op->getReg());
1621 if (It == AddrRegToHardenedReg.end())
1622 // Not checked, so retain this one.
1623 return false;
1624
1625 // Otherwise, we can directly update this operand and remove it.
1626 Op->setReg(It->second);
1627 return true;
1628 });
1629 // If there are none left, we're done.
1630 if (HardenOpRegs.empty())
1631 return;
1632
1633 // Compute the current predicate state.
1634 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1635
1636 auto InsertPt = MI.getIterator();
1637
1638 // If EFLAGS are live and we don't have access to instructions that avoid
1639 // clobbering EFLAGS we need to save and restore them. This in turn makes
1640 // the EFLAGS no longer live.
1641 Register FlagsReg;
1642 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1643 EFLAGSLive = false;
1644 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1645 }
1646
1647 for (MachineOperand *Op : HardenOpRegs) {
1648 Register OpReg = Op->getReg();
1649 auto *OpRC = MRI->getRegClass(OpReg);
1650 Register TmpReg = MRI->createVirtualRegister(OpRC);
1651
1652 // If this is a vector register, we'll need somewhat custom logic to handle
1653 // hardening it.
1654 if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1655 OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1656 assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1657 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1658
1659 // Move our state into a vector register.
1660 // FIXME: We could skip this at the cost of longer encodings with AVX-512
1661 // but that doesn't seem likely worth it.
1662 Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1663 auto MovI =
1664 BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1665 .addReg(StateReg);
1666 (void)MovI;
1667 ++NumInstsInserted;
1668 LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1669
1670 // Broadcast it across the vector register.
1671 Register VBStateReg = MRI->createVirtualRegister(OpRC);
1672 auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1673 TII->get(Is128Bit ? X86::VPBROADCASTQrr
1674 : X86::VPBROADCASTQYrr),
1675 VBStateReg)
1676 .addReg(VStateReg);
1677 (void)BroadcastI;
1678 ++NumInstsInserted;
1679 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1680 dbgs() << "\n");
1681
1682 // Merge our potential poison state into the value with a vector or.
1683 auto OrI =
1684 BuildMI(MBB, InsertPt, Loc,
1685 TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1686 .addReg(VBStateReg)
1687 .addReg(OpReg);
1688 (void)OrI;
1689 ++NumInstsInserted;
1690 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1691 } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1692 OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1693 OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1694 assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1695 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1696 bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1697 if (Is128Bit || Is256Bit)
1698 assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1699
1700 // Broadcast our state into a vector register.
1701 Register VStateReg = MRI->createVirtualRegister(OpRC);
1702 unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1703 : Is256Bit ? X86::VPBROADCASTQrZ256rr
1704 : X86::VPBROADCASTQrZrr;
1705 auto BroadcastI =
1706 BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1707 .addReg(StateReg);
1708 (void)BroadcastI;
1709 ++NumInstsInserted;
1710 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1711 dbgs() << "\n");
1712
1713 // Merge our potential poison state into the value with a vector or.
1714 unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1715 : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1716 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1717 .addReg(VStateReg)
1718 .addReg(OpReg);
1719 (void)OrI;
1720 ++NumInstsInserted;
1721 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1722 } else {
1723 // FIXME: Need to support GR32 here for 32-bit code.
1724 assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1725 "Not a supported register class for address hardening!");
1726
1727 if (!EFLAGSLive) {
1728 // Merge our potential poison state into the value with an or.
1729 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1730 .addReg(StateReg)
1731 .addReg(OpReg);
1732 OrI->addRegisterDead(X86::EFLAGS, TRI);
1733 ++NumInstsInserted;
1734 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1735 } else {
1736 // We need to avoid touching EFLAGS so shift out all but the least
1737 // significant bit using the instruction that doesn't update flags.
1738 auto ShiftI =
1739 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1740 .addReg(OpReg)
1741 .addReg(StateReg);
1742 (void)ShiftI;
1743 ++NumInstsInserted;
1744 LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1745 dbgs() << "\n");
1746 }
1747 }
1748
1749 // Record this register as checked and update the operand.
1750 assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1751 "Should not have checked this register yet!");
1752 AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1753 Op->setReg(TmpReg);
1754 ++NumAddrRegsHardened;
1755 }
1756
1757 // And restore the flags if needed.
1758 if (FlagsReg)
1759 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1760}
1761
1762MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1763 MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1765 "Cannot get here with a non-invariant load!");
1766 assert(!isEFLAGSDefLive(InitialMI) &&
1767 "Cannot get here with a data invariant load "
1768 "that interferes with EFLAGS!");
1769
1770 // See if we can sink hardening the loaded value.
1771 auto SinkCheckToSingleUse =
1772 [&](MachineInstr &MI) -> std::optional<MachineInstr *> {
1773 Register DefReg = MI.getOperand(0).getReg();
1774
1775 // We need to find a single use which we can sink the check. We can
1776 // primarily do this because many uses may already end up checked on their
1777 // own.
1778 MachineInstr *SingleUseMI = nullptr;
1779 for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1780 // If we're already going to harden this use, it is data invariant, it
1781 // does not interfere with EFLAGS, and within our block.
1782 if (HardenedInstrs.count(&UseMI)) {
1784 // If we've already decided to harden a non-load, we must have sunk
1785 // some other post-load hardened instruction to it and it must itself
1786 // be data-invariant.
1788 "Data variant instruction being hardened!");
1789 continue;
1790 }
1791
1792 // Otherwise, this is a load and the load component can't be data
1793 // invariant so check how this register is being used.
1794 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(UseMI);
1795 assert(MemRefBeginIdx >= 0 &&
1796 "Should always have mem references here!");
1797
1798 MachineOperand &BaseMO =
1799 UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1800 MachineOperand &IndexMO =
1801 UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1802 if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1803 (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1804 // The load uses the register as part of its address making it not
1805 // invariant.
1806 return {};
1807
1808 continue;
1809 }
1810
1811 if (SingleUseMI)
1812 // We already have a single use, this would make two. Bail.
1813 return {};
1814
1815 // If this single use isn't data invariant, isn't in this block, or has
1816 // interfering EFLAGS, we can't sink the hardening to it.
1817 if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1819 return {};
1820
1821 // If this instruction defines multiple registers bail as we won't harden
1822 // all of them.
1823 if (UseMI.getDesc().getNumDefs() > 1)
1824 return {};
1825
1826 // If this register isn't a virtual register we can't walk uses of sanely,
1827 // just bail. Also check that its register class is one of the ones we
1828 // can harden.
1829 Register UseDefReg = UseMI.getOperand(0).getReg();
1830 if (!canHardenRegister(UseDefReg))
1831 return {};
1832
1833 SingleUseMI = &UseMI;
1834 }
1835
1836 // If SingleUseMI is still null, there is no use that needs its own
1837 // checking. Otherwise, it is the single use that needs checking.
1838 return {SingleUseMI};
1839 };
1840
1841 MachineInstr *MI = &InitialMI;
1842 while (std::optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1843 // Update which MI we're checking now.
1844 MI = *SingleUse;
1845 if (!MI)
1846 break;
1847 }
1848
1849 return MI;
1850}
1851
1852bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
1853 // We only support hardening virtual registers.
1854 if (!Reg.isVirtual())
1855 return false;
1856
1857 auto *RC = MRI->getRegClass(Reg);
1858 int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1859 if (RegBytes > 8)
1860 // We don't support post-load hardening of vectors.
1861 return false;
1862
1863 unsigned RegIdx = Log2_32(RegBytes);
1864 assert(RegIdx < 4 && "Unsupported register size");
1865
1866 // If this register class is explicitly constrained to a class that doesn't
1867 // require REX prefix, we may not be able to satisfy that constraint when
1868 // emitting the hardening instructions, so bail out here.
1869 // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1870 // end up both with a NOREX and REX-only register as operands to the hardening
1871 // instructions. It would be better to fix that code to handle this situation
1872 // rather than hack around it in this way.
1873 const TargetRegisterClass *NOREXRegClasses[] = {
1874 &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1875 &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1876 if (RC == NOREXRegClasses[RegIdx])
1877 return false;
1878
1879 const TargetRegisterClass *GPRRegClasses[] = {
1880 &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1881 &X86::GR64RegClass};
1882 return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1883}
1884
1885/// Harden a value in a register.
1886///
1887/// This is the low-level logic to fully harden a value sitting in a register
1888/// against leaking during speculative execution.
1889///
1890/// Unlike hardening an address that is used by a load, this routine is required
1891/// to hide *all* incoming bits in the register.
1892///
1893/// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1894/// larger than the predicate state register. FIXME: We should support vector
1895/// registers here by broadcasting the predicate state.
1896///
1897/// The new, hardened virtual register is returned. It will have the same
1898/// register class as `Reg`.
1899Register X86SpeculativeLoadHardeningPass::hardenValueInRegister(
1900 Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1901 const DebugLoc &Loc) {
1902 assert(canHardenRegister(Reg) && "Cannot harden this register!");
1903
1904 auto *RC = MRI->getRegClass(Reg);
1905 int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1906 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1907 assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
1908 "Unknown register size");
1909
1910 // FIXME: Need to teach this about 32-bit mode.
1911 if (Bytes != 8) {
1912 unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1913 unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1914 Register NarrowStateReg = MRI->createVirtualRegister(RC);
1915 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1916 .addReg(StateReg, 0, SubRegImm);
1917 StateReg = NarrowStateReg;
1918 }
1919
1920 Register FlagsReg;
1921 if (isEFLAGSLive(MBB, InsertPt, *TRI))
1922 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1923
1924 Register NewReg = MRI->createVirtualRegister(RC);
1925 unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1926 unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1927 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1928 .addReg(StateReg)
1929 .addReg(Reg);
1930 OrI->addRegisterDead(X86::EFLAGS, TRI);
1931 ++NumInstsInserted;
1932 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1933
1934 if (FlagsReg)
1935 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1936
1937 return NewReg;
1938}
1939
1940/// Harden a load by hardening the loaded value in the defined register.
1941///
1942/// We can harden a non-leaking load into a register without touching the
1943/// address by just hiding all of the loaded bits during misspeculation. We use
1944/// an `or` instruction to do this because we set up our poison value as all
1945/// ones. And the goal is just for the loaded bits to not be exposed to
1946/// execution and coercing them to one is sufficient.
1947///
1948/// Returns the newly hardened register.
1949Register X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
1950 MachineBasicBlock &MBB = *MI.getParent();
1951 const DebugLoc &Loc = MI.getDebugLoc();
1952
1953 auto &DefOp = MI.getOperand(0);
1954 Register OldDefReg = DefOp.getReg();
1955 auto *DefRC = MRI->getRegClass(OldDefReg);
1956
1957 // Because we want to completely replace the uses of this def'ed value with
1958 // the hardened value, create a dedicated new register that will only be used
1959 // to communicate the unhardened value to the hardening.
1960 Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1961 DefOp.setReg(UnhardenedReg);
1962
1963 // Now harden this register's value, getting a hardened reg that is safe to
1964 // use. Note that we insert the instructions to compute this *after* the
1965 // defining instruction, not before it.
1966 Register HardenedReg = hardenValueInRegister(
1967 UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1968
1969 // Finally, replace the old register (which now only has the uses of the
1970 // original def) with the hardened register.
1971 MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1972
1973 ++NumPostLoadRegsHardened;
1974 return HardenedReg;
1975}
1976
1977/// Harden a return instruction.
1978///
1979/// Returns implicitly perform a load which we need to harden. Without hardening
1980/// this load, an attacker my speculatively write over the return address to
1981/// steer speculation of the return to an attacker controlled address. This is
1982/// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1983/// this paper:
1984/// https://people.csail.mit.edu/vlk/spectre11.pdf
1985///
1986/// We can harden this by introducing an LFENCE that will delay any load of the
1987/// return address until prior instructions have retired (and thus are not being
1988/// speculated), or we can harden the address used by the implicit load: the
1989/// stack pointer.
1990///
1991/// If we are not using an LFENCE, hardening the stack pointer has an additional
1992/// benefit: it allows us to pass the predicate state accumulated in this
1993/// function back to the caller. In the absence of a BCBS attack on the return,
1994/// the caller will typically be resumed and speculatively executed due to the
1995/// Return Stack Buffer (RSB) prediction which is very accurate and has a high
1996/// priority. It is possible that some code from the caller will be executed
1997/// speculatively even during a BCBS-attacked return until the steering takes
1998/// effect. Whenever this happens, the caller can recover the (poisoned)
1999/// predicate state from the stack pointer and continue to harden loads.
2000void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
2001 MachineBasicBlock &MBB = *MI.getParent();
2002 const DebugLoc &Loc = MI.getDebugLoc();
2003 auto InsertPt = MI.getIterator();
2004
2005 if (FenceCallAndRet)
2006 // No need to fence here as we'll fence at the return site itself. That
2007 // handles more cases than we can handle here.
2008 return;
2009
2010 // Take our predicate state, shift it to the high 17 bits (so that we keep
2011 // pointers canonical) and merge it into RSP. This will allow the caller to
2012 // extract it when we return (speculatively).
2013 mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2014}
2015
2016/// Trace the predicate state through a call.
2017///
2018/// There are several layers of this needed to handle the full complexity of
2019/// calls.
2020///
2021/// First, we need to send the predicate state into the called function. We do
2022/// this by merging it into the high bits of the stack pointer.
2023///
2024/// For tail calls, this is all we need to do.
2025///
2026/// For calls where we might return and resume the control flow, we need to
2027/// extract the predicate state from the high bits of the stack pointer after
2028/// control returns from the called function.
2029///
2030/// We also need to verify that we intended to return to this location in the
2031/// code. An attacker might arrange for the processor to mispredict the return
2032/// to this valid but incorrect return address in the program rather than the
2033/// correct one. See the paper on this attack, called "ret2spec" by the
2034/// researchers, here:
2035/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2036///
2037/// The way we verify that we returned to the correct location is by preserving
2038/// the expected return address across the call. One technique involves taking
2039/// advantage of the red-zone to load the return address from `8(%rsp)` where it
2040/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2041/// directly save the address into a register that will be preserved across the
2042/// call. We compare this intended return address against the address
2043/// immediately following the call (the observed return address). If these
2044/// mismatch, we have detected misspeculation and can poison our predicate
2045/// state.
2046void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
2047 MachineInstr &MI) {
2048 MachineBasicBlock &MBB = *MI.getParent();
2049 MachineFunction &MF = *MBB.getParent();
2050 auto InsertPt = MI.getIterator();
2051 const DebugLoc &Loc = MI.getDebugLoc();
2052
2053 if (FenceCallAndRet) {
2054 if (MI.isReturn())
2055 // Tail call, we don't return to this function.
2056 // FIXME: We should also handle noreturn calls.
2057 return;
2058
2059 // We don't need to fence before the call because the function should fence
2060 // in its entry. However, we do need to fence after the call returns.
2061 // Fencing before the return doesn't correctly handle cases where the return
2062 // itself is mispredicted.
2063 BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2064 ++NumInstsInserted;
2065 ++NumLFENCEsInserted;
2066 return;
2067 }
2068
2069 // First, we transfer the predicate state into the called function by merging
2070 // it into the stack pointer. This will kill the current def of the state.
2071 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2072 mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2073
2074 // If this call is also a return, it is a tail call and we don't need anything
2075 // else to handle it so just return. Also, if there are no further
2076 // instructions and no successors, this call does not return so we can also
2077 // bail.
2078 if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2079 return;
2080
2081 // Create a symbol to track the return address and attach it to the call
2082 // machine instruction. We will lower extra symbols attached to call
2083 // instructions as label immediately following the call.
2084 MCSymbol *RetSymbol =
2085 MF.getContext().createTempSymbol("slh_ret_addr",
2086 /*AlwaysAddSuffix*/ true);
2087 MI.setPostInstrSymbol(MF, RetSymbol);
2088
2089 const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2090 Register ExpectedRetAddrReg;
2091
2092 // If we have no red zones or if the function returns twice (possibly without
2093 // using the `ret` instruction) like setjmp, we need to save the expected
2094 // return address prior to the call.
2095 if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2096 MF.exposesReturnsTwice()) {
2097 // If we don't have red zones, we need to compute the expected return
2098 // address prior to the call and store it in a register that lives across
2099 // the call.
2100 //
2101 // In some ways, this is doubly satisfying as a mitigation because it will
2102 // also successfully detect stack smashing bugs in some cases (typically,
2103 // when a callee-saved register is used and the callee doesn't push it onto
2104 // the stack). But that isn't our primary goal, so we only use it as
2105 // a fallback.
2106 //
2107 // FIXME: It isn't clear that this is reliable in the face of
2108 // rematerialization in the register allocator. We somehow need to force
2109 // that to not occur for this particular instruction, and instead to spill
2110 // or otherwise preserve the value computed *prior* to the call.
2111 //
2112 // FIXME: It is even less clear why MachineCSE can't just fold this when we
2113 // end up having to use identical instructions both before and after the
2114 // call to feed the comparison.
2115 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2116 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2117 !Subtarget->isPositionIndependent()) {
2118 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2119 .addSym(RetSymbol);
2120 } else {
2121 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2122 .addReg(/*Base*/ X86::RIP)
2123 .addImm(/*Scale*/ 1)
2124 .addReg(/*Index*/ 0)
2125 .addSym(RetSymbol)
2126 .addReg(/*Segment*/ 0);
2127 }
2128 }
2129
2130 // Step past the call to handle when it returns.
2131 ++InsertPt;
2132
2133 // If we didn't pre-compute the expected return address into a register, then
2134 // red zones are enabled and the return address is still available on the
2135 // stack immediately after the call. As the very first instruction, we load it
2136 // into a register.
2137 if (!ExpectedRetAddrReg) {
2138 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2139 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2140 .addReg(/*Base*/ X86::RSP)
2141 .addImm(/*Scale*/ 1)
2142 .addReg(/*Index*/ 0)
2143 .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2144 // the return address is 8-bytes past it.
2145 .addReg(/*Segment*/ 0);
2146 }
2147
2148 // Now we extract the callee's predicate state from the stack pointer.
2149 Register NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2150
2151 // Test the expected return address against our actual address. If we can
2152 // form this basic block's address as an immediate, this is easy. Otherwise
2153 // we compute it.
2154 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2155 !Subtarget->isPositionIndependent()) {
2156 // FIXME: Could we fold this with the load? It would require careful EFLAGS
2157 // management.
2158 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2159 .addReg(ExpectedRetAddrReg, RegState::Kill)
2160 .addSym(RetSymbol);
2161 } else {
2162 Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2163 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2164 .addReg(/*Base*/ X86::RIP)
2165 .addImm(/*Scale*/ 1)
2166 .addReg(/*Index*/ 0)
2167 .addSym(RetSymbol)
2168 .addReg(/*Segment*/ 0);
2169 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2170 .addReg(ExpectedRetAddrReg, RegState::Kill)
2171 .addReg(ActualRetAddrReg, RegState::Kill);
2172 }
2173
2174 // Now conditionally update the predicate state we just extracted if we ended
2175 // up at a different return address than expected.
2176 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2177 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2178
2179 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2180 auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2181 .addReg(NewStateReg, RegState::Kill)
2182 .addReg(PS->PoisonReg)
2184 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
2185 ++NumInstsInserted;
2186 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2187
2188 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2189}
2190
2191/// An attacker may speculatively store over a value that is then speculatively
2192/// loaded and used as the target of an indirect call or jump instruction. This
2193/// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2194/// in this paper:
2195/// https://people.csail.mit.edu/vlk/spectre11.pdf
2196///
2197/// When this happens, the speculative execution of the call or jump will end up
2198/// being steered to this attacker controlled address. While most such loads
2199/// will be adequately hardened already, we want to ensure that they are
2200/// definitively treated as needing post-load hardening. While address hardening
2201/// is sufficient to prevent secret data from leaking to the attacker, it may
2202/// not be sufficient to prevent an attacker from steering speculative
2203/// execution. We forcibly unfolded all relevant loads above and so will always
2204/// have an opportunity to post-load harden here, we just need to scan for cases
2205/// not already flagged and add them.
2206void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
2207 MachineInstr &MI,
2208 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg) {
2209 switch (MI.getOpcode()) {
2210 case X86::FARCALL16m:
2211 case X86::FARCALL32m:
2212 case X86::FARCALL64m:
2213 case X86::FARJMP16m:
2214 case X86::FARJMP32m:
2215 case X86::FARJMP64m:
2216 // We don't need to harden either far calls or far jumps as they are
2217 // safe from Spectre.
2218 return;
2219
2220 default:
2221 break;
2222 }
2223
2224 // We should never see a loading instruction at this point, as those should
2225 // have been unfolded.
2226 assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2227
2228 // If the first operand isn't a register, this is a branch or call
2229 // instruction with an immediate operand which doesn't need to be hardened.
2230 if (!MI.getOperand(0).isReg())
2231 return;
2232
2233 // For all of these, the target register is the first operand of the
2234 // instruction.
2235 auto &TargetOp = MI.getOperand(0);
2236 Register OldTargetReg = TargetOp.getReg();
2237
2238 // Try to lookup a hardened version of this register. We retain a reference
2239 // here as we want to update the map to track any newly computed hardened
2240 // register.
2241 Register &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2242
2243 // If we don't have a hardened register yet, compute one. Otherwise, just use
2244 // the already hardened register.
2245 //
2246 // FIXME: It is a little suspect that we use partially hardened registers that
2247 // only feed addresses. The complexity of partial hardening with SHRX
2248 // continues to pile up. Should definitively measure its value and consider
2249 // eliminating it.
2250 if (!HardenedTargetReg)
2251 HardenedTargetReg = hardenValueInRegister(
2252 OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2253
2254 // Set the target operand to the hardened register.
2255 TargetOp.setReg(HardenedTargetReg);
2256
2257 ++NumCallsOrJumpsHardened;
2258}
2259
2260INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
2261 "X86 speculative load hardener", false, false)
2262INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
2263 "X86 speculative load hardener", false, false)
2264
2266 return new X86SpeculativeLoadHardeningPass();
2267}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > HardenLoads("aarch64-slh-loads", cl::Hidden, cl::desc("Sanitize loads from memory."), cl::init(true))
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the SparseBitVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_KEY
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
static const TargetRegisterClass * getRegClassForUnfoldedLoad(const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
static bool isEFLAGSDefLive(const MachineInstr &MI)
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
Represent the analysis usage information of a pass.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
LLVM_ABI MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Describe properties that are true of each instruction in the target description file.
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
LLVM_ABI iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
void dump() const
Definition Pass.cpp:146
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr unsigned id() const
Definition Register.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:149
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void set(unsigned Idx)
bool test(unsigned Idx) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
const X86InstrInfo * getInstrInfo() const override
bool hasAVX512() const
bool isPositionIndependent() const
const X86RegisterInfo * getRegisterInfo() const override
const X86FrameLowering * getFrameLowering() const override
bool hasAVX2() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Kill
The last use of a register.
CondCode getCondFromBranch(const MachineInstr &MI)
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false, bool HasNDD=false)
Return a cmov opcode for the given register size in bytes, and operand type.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
BBIterator iterator
Definition BasicBlock.h:87
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2076
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
FunctionPass * createX86SpeculativeLoadHardeningPass()
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120