LLVM 20.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// Provide a pass which mitigates speculative execution attacks which operate
11/// by speculating incorrectly past some predicate (a type check, bounds check,
12/// or other condition) to reach a load with invalid inputs and leak the data
13/// accessed by that load using a side channel out of the speculative domain.
14///
15/// For details on the attacks, see the first variant in both the Project Zero
16/// writeup and the Spectre paper:
17/// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18/// https://spectreattack.com/spectre.pdf
19///
20//===----------------------------------------------------------------------===//
21
22#include "X86.h"
23#include "X86InstrBuilder.h"
24#include "X86InstrInfo.h"
25#include "X86Subtarget.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/Statistic.h"
48#include "llvm/IR/DebugLoc.h"
49#include "llvm/MC/MCSchedule.h"
50#include "llvm/Pass.h"
52#include "llvm/Support/Debug.h"
55#include <algorithm>
56#include <cassert>
57#include <iterator>
58#include <optional>
59#include <utility>
60
61using namespace llvm;
62
63#define PASS_KEY "x86-slh"
64#define DEBUG_TYPE PASS_KEY
65
66STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
67STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
68STATISTIC(NumAddrRegsHardened,
69 "Number of address mode used registers hardaned");
70STATISTIC(NumPostLoadRegsHardened,
71 "Number of post-load register values hardened");
72STATISTIC(NumCallsOrJumpsHardened,
73 "Number of calls or jumps requiring extra hardening");
74STATISTIC(NumInstsInserted, "Number of instructions inserted");
75STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
76
78 "x86-speculative-load-hardening",
79 cl::desc("Force enable speculative load hardening"), cl::init(false),
81
83 PASS_KEY "-lfence",
85 "Use LFENCE along each conditional edge to harden against speculative "
86 "loads rather than conditional movs and poisoned pointers."),
87 cl::init(false), cl::Hidden);
88
90 PASS_KEY "-post-load",
91 cl::desc("Harden the value loaded *after* it is loaded by "
92 "flushing the loaded bits to 1. This is hard to do "
93 "in general but can be done easily for GPRs."),
94 cl::init(true), cl::Hidden);
95
97 PASS_KEY "-fence-call-and-ret",
98 cl::desc("Use a full speculation fence to harden both call and ret edges "
99 "rather than a lighter weight mitigation."),
100 cl::init(false), cl::Hidden);
101
103 PASS_KEY "-ip",
104 cl::desc("Harden interprocedurally by passing our state in and out of "
105 "functions in the high bits of the stack pointer."),
106 cl::init(true), cl::Hidden);
107
108static cl::opt<bool>
110 cl::desc("Sanitize loads from memory. When disable, no "
111 "significant security is provided."),
112 cl::init(true), cl::Hidden);
113
115 PASS_KEY "-indirect",
116 cl::desc("Harden indirect calls and jumps against using speculatively "
117 "stored attacker controlled addresses. This is designed to "
118 "mitigate Spectre v1.2 style attacks."),
119 cl::init(true), cl::Hidden);
120
121namespace {
122
123class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
124public:
125 X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
126
127 StringRef getPassName() const override {
128 return "X86 speculative load hardening";
129 }
130 bool runOnMachineFunction(MachineFunction &MF) override;
131 void getAnalysisUsage(AnalysisUsage &AU) const override;
132
133 /// Pass identification, replacement for typeid.
134 static char ID;
135
136private:
137 /// The information about a block's conditional terminators needed to trace
138 /// our predicate state through the exiting edges.
139 struct BlockCondInfo {
141
142 // We mostly have one conditional branch, and in extremely rare cases have
143 // two. Three and more are so rare as to be unimportant for compile time.
145
146 MachineInstr *UncondBr;
147 };
148
149 /// Manages the predicate state traced through the program.
150 struct PredState {
151 unsigned InitialReg = 0;
152 unsigned PoisonReg = 0;
153
154 const TargetRegisterClass *RC;
156
157 PredState(MachineFunction &MF, const TargetRegisterClass *RC)
158 : RC(RC), SSA(MF) {}
159 };
160
161 const X86Subtarget *Subtarget = nullptr;
162 MachineRegisterInfo *MRI = nullptr;
163 const X86InstrInfo *TII = nullptr;
164 const TargetRegisterInfo *TRI = nullptr;
165
166 std::optional<PredState> PS;
167
168 void hardenEdgesWithLFENCE(MachineFunction &MF);
169
170 SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
171
173 tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
174
175 void unfoldCallAndJumpLoads(MachineFunction &MF);
176
178 tracePredStateThroughIndirectBranches(MachineFunction &MF);
179
180 void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
181
182 unsigned saveEFLAGS(MachineBasicBlock &MBB,
184 const DebugLoc &Loc);
185 void restoreEFLAGS(MachineBasicBlock &MBB,
186 MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc,
187 Register Reg);
188
189 void mergePredStateIntoSP(MachineBasicBlock &MBB,
191 const DebugLoc &Loc, unsigned PredStateReg);
192 unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
194 const DebugLoc &Loc);
195
196 void
197 hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
198 MachineOperand &IndexMO,
199 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
201 sinkPostLoadHardenedInst(MachineInstr &MI,
202 SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
203 bool canHardenRegister(Register Reg);
204 unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
206 const DebugLoc &Loc);
207 unsigned hardenPostLoad(MachineInstr &MI);
208 void hardenReturnInstr(MachineInstr &MI);
209 void tracePredStateThroughCall(MachineInstr &MI);
210 void hardenIndirectCallOrJumpInstr(
212 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
213};
214
215} // end anonymous namespace
216
217char X86SpeculativeLoadHardeningPass::ID = 0;
218
219void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
220 AnalysisUsage &AU) const {
222}
223
225 MachineBasicBlock &Succ, int SuccCount,
226 MachineInstr *Br, MachineInstr *&UncondBr,
227 const X86InstrInfo &TII) {
228 assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
229
231
233
234 // We have to insert the new block immediately after the current one as we
235 // don't know what layout-successor relationships the successor has and we
236 // may not be able to (and generally don't want to) try to fix those up.
237 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
238
239 // Update the branch instruction if necessary.
240 if (Br) {
241 assert(Br->getOperand(0).getMBB() == &Succ &&
242 "Didn't start with the right target!");
243 Br->getOperand(0).setMBB(&NewMBB);
244
245 // If this successor was reached through a branch rather than fallthrough,
246 // we might have *broken* fallthrough and so need to inject a new
247 // unconditional branch.
248 if (!UncondBr) {
249 MachineBasicBlock &OldLayoutSucc =
250 *std::next(MachineFunction::iterator(&NewMBB));
251 assert(MBB.isSuccessor(&OldLayoutSucc) &&
252 "Without an unconditional branch, the old layout successor should "
253 "be an actual successor!");
254 auto BrBuilder =
255 BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
256 // Update the unconditional branch now that we've added one.
257 UncondBr = &*BrBuilder;
258 }
259
260 // Insert unconditional "jump Succ" instruction in the new block if
261 // necessary.
262 if (!NewMBB.isLayoutSuccessor(&Succ)) {
264 TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
265 }
266 } else {
267 assert(!UncondBr &&
268 "Cannot have a branchless successor and an unconditional branch!");
269 assert(NewMBB.isLayoutSuccessor(&Succ) &&
270 "A non-branch successor must have been a layout successor before "
271 "and now is a layout successor of the new block.");
272 }
273
274 // If this is the only edge to the successor, we can just replace it in the
275 // CFG. Otherwise we need to add a new entry in the CFG for the new
276 // successor.
277 if (SuccCount == 1) {
278 MBB.replaceSuccessor(&Succ, &NewMBB);
279 } else {
280 MBB.splitSuccessor(&Succ, &NewMBB);
281 }
282
283 // Hook up the edge from the new basic block to the old successor in the CFG.
284 NewMBB.addSuccessor(&Succ);
285
286 // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
287 for (MachineInstr &MI : Succ) {
288 if (!MI.isPHI())
289 break;
290 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
291 OpIdx += 2) {
292 MachineOperand &OpV = MI.getOperand(OpIdx);
293 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
294 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
295 if (OpMBB.getMBB() != &MBB)
296 continue;
297
298 // If this is the last edge to the succesor, just replace MBB in the PHI
299 if (SuccCount == 1) {
300 OpMBB.setMBB(&NewMBB);
301 break;
302 }
303
304 // Otherwise, append a new pair of operands for the new incoming edge.
305 MI.addOperand(MF, OpV);
306 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
307 break;
308 }
309 }
310
311 // Inherit live-ins from the successor
312 for (auto &LI : Succ.liveins())
313 NewMBB.addLiveIn(LI);
314
315 LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
316 << Succ.getName() << "'.\n");
317 return NewMBB;
318}
319
320/// Removing duplicate PHI operands to leave the PHI in a canonical and
321/// predictable form.
322///
323/// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
324/// isn't what you might expect. We may have multiple entries in PHI nodes for
325/// a single predecessor. This makes CFG-updating extremely complex, so here we
326/// simplify all PHI nodes to a model even simpler than the IR's model: exactly
327/// one entry per predecessor, regardless of how many edges there are.
330 SmallVector<int, 4> DupIndices;
331 for (auto &MBB : MF)
332 for (auto &MI : MBB) {
333 if (!MI.isPHI())
334 break;
335
336 // First we scan the operands of the PHI looking for duplicate entries
337 // a particular predecessor. We retain the operand index of each duplicate
338 // entry found.
339 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
340 OpIdx += 2)
341 if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
342 DupIndices.push_back(OpIdx);
343
344 // Now walk the duplicate indices, removing both the block and value. Note
345 // that these are stored as a vector making this element-wise removal
346 // :w
347 // potentially quadratic.
348 //
349 // FIXME: It is really frustrating that we have to use a quadratic
350 // removal algorithm here. There should be a better way, but the use-def
351 // updates required make that impossible using the public API.
352 //
353 // Note that we have to process these backwards so that we don't
354 // invalidate other indices with each removal.
355 while (!DupIndices.empty()) {
356 int OpIdx = DupIndices.pop_back_val();
357 // Remove both the block and value operand, again in reverse order to
358 // preserve indices.
359 MI.removeOperand(OpIdx + 1);
360 MI.removeOperand(OpIdx);
361 }
362
363 Preds.clear();
364 }
365}
366
367/// Helper to scan a function for loads vulnerable to misspeculation that we
368/// want to harden.
369///
370/// We use this to avoid making changes to functions where there is nothing we
371/// need to do to harden against misspeculation.
373 for (MachineBasicBlock &MBB : MF) {
374 for (MachineInstr &MI : MBB) {
375 // Loads within this basic block after an LFENCE are not at risk of
376 // speculatively executing with invalid predicates from prior control
377 // flow. So break out of this block but continue scanning the function.
378 if (MI.getOpcode() == X86::LFENCE)
379 break;
380
381 // Looking for loads only.
382 if (!MI.mayLoad())
383 continue;
384
385 // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
386 if (MI.getOpcode() == X86::MFENCE)
387 continue;
388
389 // We found a load.
390 return true;
391 }
392 }
393
394 // No loads found.
395 return false;
396}
397
398bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
399 MachineFunction &MF) {
400 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
401 << " **********\n");
402
403 // Only run if this pass is forced enabled or we detect the relevant function
404 // attribute requesting SLH.
406 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
407 return false;
408
409 Subtarget = &MF.getSubtarget<X86Subtarget>();
410 MRI = &MF.getRegInfo();
411 TII = Subtarget->getInstrInfo();
412 TRI = Subtarget->getRegisterInfo();
413
414 // FIXME: Support for 32-bit.
415 PS.emplace(MF, &X86::GR64_NOSPRegClass);
416
417 if (MF.begin() == MF.end())
418 // Nothing to do for a degenerate empty function...
419 return false;
420
421 // We support an alternative hardening technique based on a debug flag.
423 hardenEdgesWithLFENCE(MF);
424 return true;
425 }
426
427 // Create a dummy debug loc to use for all the generated code here.
428 DebugLoc Loc;
429
431 auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
432
433 // Do a quick scan to see if we have any checkable loads.
434 bool HasVulnerableLoad = hasVulnerableLoad(MF);
435
436 // See if we have any conditional branching blocks that we will need to trace
437 // predicate state through.
438 SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
439
440 // If we have no interesting conditions or loads, nothing to do here.
441 if (!HasVulnerableLoad && Infos.empty())
442 return true;
443
444 // The poison value is required to be an all-ones value for many aspects of
445 // this mitigation.
446 const int PoisonVal = -1;
447 PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
448 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
449 .addImm(PoisonVal);
450 ++NumInstsInserted;
451
452 // If we have loads being hardened and we've asked for call and ret edges to
453 // get a full fence-based mitigation, inject that fence.
454 if (HasVulnerableLoad && FenceCallAndRet) {
455 // We need to insert an LFENCE at the start of the function to suspend any
456 // incoming misspeculation from the caller. This helps two-fold: the caller
457 // may not have been protected as this code has been, and this code gets to
458 // not take any specific action to protect across calls.
459 // FIXME: We could skip this for functions which unconditionally return
460 // a constant.
461 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
462 ++NumInstsInserted;
463 ++NumLFENCEsInserted;
464 }
465
466 // If we guarded the entry with an LFENCE and have no conditionals to protect
467 // in blocks, then we're done.
468 if (FenceCallAndRet && Infos.empty())
469 // We may have changed the function's code at this point to insert fences.
470 return true;
471
472 // For every basic block in the function which can b
474 // Set up the predicate state by extracting it from the incoming stack
475 // pointer so we pick up any misspeculation in our caller.
476 PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
477 } else {
478 // Otherwise, just build the predicate state itself by zeroing a register
479 // as we don't need any initial state.
480 PS->InitialReg = MRI->createVirtualRegister(PS->RC);
481 Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
482 auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
483 PredStateSubReg);
484 ++NumInstsInserted;
485 MachineOperand *ZeroEFLAGSDefOp =
486 ZeroI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
487 assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
488 "Must have an implicit def of EFLAGS!");
489 ZeroEFLAGSDefOp->setIsDead(true);
490 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
491 PS->InitialReg)
492 .addImm(0)
493 .addReg(PredStateSubReg)
494 .addImm(X86::sub_32bit);
495 }
496
497 // We're going to need to trace predicate state throughout the function's
498 // CFG. Prepare for this by setting up our initial state of PHIs with unique
499 // predecessor entries and all the initial predicate state.
501
502 // Track the updated values in an SSA updater to rewrite into SSA form at the
503 // end.
504 PS->SSA.Initialize(PS->InitialReg);
505 PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
506
507 // Trace through the CFG.
508 auto CMovs = tracePredStateThroughCFG(MF, Infos);
509
510 // We may also enter basic blocks in this function via exception handling
511 // control flow. Here, if we are hardening interprocedurally, we need to
512 // re-capture the predicate state from the throwing code. In the Itanium ABI,
513 // the throw will always look like a call to __cxa_throw and will have the
514 // predicate state in the stack pointer, so extract fresh predicate state from
515 // the stack pointer and make it available in SSA.
516 // FIXME: Handle non-itanium ABI EH models.
518 for (MachineBasicBlock &MBB : MF) {
519 assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
520 assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
521 assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
522 if (!MBB.isEHPad())
523 continue;
524 PS->SSA.AddAvailableValue(
525 &MBB,
526 extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
527 }
528 }
529
531 // If we are going to harden calls and jumps we need to unfold their memory
532 // operands.
533 unfoldCallAndJumpLoads(MF);
534
535 // Then we trace predicate state through the indirect branches.
536 auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
537 CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
538 }
539
540 // Now that we have the predicate state available at the start of each block
541 // in the CFG, trace it through each block, hardening vulnerable instructions
542 // as we go.
543 tracePredStateThroughBlocksAndHarden(MF);
544
545 // Now rewrite all the uses of the pred state using the SSA updater to insert
546 // PHIs connecting the state between blocks along the CFG edges.
547 for (MachineInstr *CMovI : CMovs)
548 for (MachineOperand &Op : CMovI->operands()) {
549 if (!Op.isReg() || Op.getReg() != PS->InitialReg)
550 continue;
551
552 PS->SSA.RewriteUse(Op);
553 }
554
555 LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
556 dbgs() << "\n"; MF.verify(this));
557 return true;
558}
559
560/// Implements the naive hardening approach of putting an LFENCE after every
561/// potentially mis-predicted control flow construct.
562///
563/// We include this as an alternative mostly for the purpose of comparison. The
564/// performance impact of this is expected to be extremely severe and not
565/// practical for any real-world users.
566void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
567 MachineFunction &MF) {
568 // First, we scan the function looking for blocks that are reached along edges
569 // that we might want to harden.
571 for (MachineBasicBlock &MBB : MF) {
572 // If there are no or only one successor, nothing to do here.
573 if (MBB.succ_size() <= 1)
574 continue;
575
576 // Skip blocks unless their terminators start with a branch. Other
577 // terminators don't seem interesting for guarding against misspeculation.
578 auto TermIt = MBB.getFirstTerminator();
579 if (TermIt == MBB.end() || !TermIt->isBranch())
580 continue;
581
582 // Add all the non-EH-pad succossors to the blocks we want to harden. We
583 // skip EH pads because there isn't really a condition of interest on
584 // entering.
585 for (MachineBasicBlock *SuccMBB : MBB.successors())
586 if (!SuccMBB->isEHPad())
587 Blocks.insert(SuccMBB);
588 }
589
590 for (MachineBasicBlock *MBB : Blocks) {
591 auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
592 BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
593 ++NumInstsInserted;
594 ++NumLFENCEsInserted;
595 }
596}
597
599X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
601
602 // Walk the function and build up a summary for each block's conditions that
603 // we need to trace through.
604 for (MachineBasicBlock &MBB : MF) {
605 // If there are no or only one successor, nothing to do here.
606 if (MBB.succ_size() <= 1)
607 continue;
608
609 // We want to reliably handle any conditional branch terminators in the
610 // MBB, so we manually analyze the branch. We can handle all of the
611 // permutations here, including ones that analyze branch cannot.
612 //
613 // The approach is to walk backwards across the terminators, resetting at
614 // any unconditional non-indirect branch, and track all conditional edges
615 // to basic blocks as well as the fallthrough or unconditional successor
616 // edge. For each conditional edge, we track the target and the opposite
617 // condition code in order to inject a "no-op" cmov into that successor
618 // that will harden the predicate. For the fallthrough/unconditional
619 // edge, we inject a separate cmov for each conditional branch with
620 // matching condition codes. This effectively implements an "and" of the
621 // condition flags, even if there isn't a single condition flag that would
622 // directly implement that. We don't bother trying to optimize either of
623 // these cases because if such an optimization is possible, LLVM should
624 // have optimized the conditional *branches* in that way already to reduce
625 // instruction count. This late, we simply assume the minimal number of
626 // branch instructions is being emitted and use that to guide our cmov
627 // insertion.
628
629 BlockCondInfo Info = {&MBB, {}, nullptr};
630
631 // Now walk backwards through the terminators and build up successors they
632 // reach and the conditions.
633 for (MachineInstr &MI : llvm::reverse(MBB)) {
634 // Once we've handled all the terminators, we're done.
635 if (!MI.isTerminator())
636 break;
637
638 // If we see a non-branch terminator, we can't handle anything so bail.
639 if (!MI.isBranch()) {
640 Info.CondBrs.clear();
641 break;
642 }
643
644 // If we see an unconditional branch, reset our state, clear any
645 // fallthrough, and set this is the "else" successor.
646 if (MI.getOpcode() == X86::JMP_1) {
647 Info.CondBrs.clear();
648 Info.UncondBr = &MI;
649 continue;
650 }
651
652 // If we get an invalid condition, we have an indirect branch or some
653 // other unanalyzable "fallthrough" case. We model this as a nullptr for
654 // the destination so we can still guard any conditional successors.
655 // Consider code sequences like:
656 // ```
657 // jCC L1
658 // jmpq *%rax
659 // ```
660 // We still want to harden the edge to `L1`.
662 Info.CondBrs.clear();
663 Info.UncondBr = &MI;
664 continue;
665 }
666
667 // We have a vanilla conditional branch, add it to our list.
668 Info.CondBrs.push_back(&MI);
669 }
670 if (Info.CondBrs.empty()) {
671 ++NumBranchesUntraced;
672 LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
673 MBB.dump());
674 continue;
675 }
676
677 Infos.push_back(Info);
678 }
679
680 return Infos;
681}
682
683/// Trace the predicate state through the CFG, instrumenting each conditional
684/// branch such that misspeculation through an edge will poison the predicate
685/// state.
686///
687/// Returns the list of inserted CMov instructions so that they can have their
688/// uses of the predicate state rewritten into proper SSA form once it is
689/// complete.
691X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
693 // Collect the inserted cmov instructions so we can rewrite their uses of the
694 // predicate state into SSA form.
696
697 // Now walk all of the basic blocks looking for ones that end in conditional
698 // jumps where we need to update this register along each edge.
699 for (const BlockCondInfo &Info : Infos) {
700 MachineBasicBlock &MBB = *Info.MBB;
701 const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
702 MachineInstr *UncondBr = Info.UncondBr;
703
704 LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
705 << "\n");
706 ++NumCondBranchesTraced;
707
708 // Compute the non-conditional successor as either the target of any
709 // unconditional branch or the layout successor.
710 MachineBasicBlock *UncondSucc =
711 UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
712 ? UncondBr->getOperand(0).getMBB()
713 : nullptr)
714 : &*std::next(MachineFunction::iterator(&MBB));
715
716 // Count how many edges there are to any given successor.
718 if (UncondSucc)
719 ++SuccCounts[UncondSucc];
720 for (auto *CondBr : CondBrs)
721 ++SuccCounts[CondBr->getOperand(0).getMBB()];
722
723 // A lambda to insert cmov instructions into a block checking all of the
724 // condition codes in a sequence.
725 auto BuildCheckingBlockForSuccAndConds =
726 [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
727 MachineInstr *Br, MachineInstr *&UncondBr,
729 // First, we split the edge to insert the checking block into a safe
730 // location.
731 auto &CheckingMBB =
732 (SuccCount == 1 && Succ.pred_size() == 1)
733 ? Succ
734 : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
735
736 bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
737 if (!LiveEFLAGS)
738 CheckingMBB.addLiveIn(X86::EFLAGS);
739
740 // Now insert the cmovs to implement the checks.
741 auto InsertPt = CheckingMBB.begin();
742 assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
743 "Should never have a PHI in the initial checking block as it "
744 "always has a single predecessor!");
745
746 // We will wire each cmov to each other, but need to start with the
747 // incoming pred state.
748 unsigned CurStateReg = PS->InitialReg;
749
750 for (X86::CondCode Cond : Conds) {
751 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
752 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
753
754 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
755 // Note that we intentionally use an empty debug location so that
756 // this picks up the preceding location.
757 auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
758 TII->get(CMovOp), UpdatedStateReg)
759 .addReg(CurStateReg)
760 .addReg(PS->PoisonReg)
761 .addImm(Cond);
762 // If this is the last cmov and the EFLAGS weren't originally
763 // live-in, mark them as killed.
764 if (!LiveEFLAGS && Cond == Conds.back())
765 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
766 ->setIsKill(true);
767
768 ++NumInstsInserted;
769 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
770 dbgs() << "\n");
771
772 // The first one of the cmovs will be using the top level
773 // `PredStateReg` and need to get rewritten into SSA form.
774 if (CurStateReg == PS->InitialReg)
775 CMovs.push_back(&*CMovI);
776
777 // The next cmov should start from this one's def.
778 CurStateReg = UpdatedStateReg;
779 }
780
781 // And put the last one into the available values for SSA form of our
782 // predicate state.
783 PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
784 };
785
786 std::vector<X86::CondCode> UncondCodeSeq;
787 for (auto *CondBr : CondBrs) {
788 MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
789 int &SuccCount = SuccCounts[&Succ];
790
793 UncondCodeSeq.push_back(Cond);
794
795 BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
796 {InvCond});
797
798 // Decrement the successor count now that we've split one of the edges.
799 // We need to keep the count of edges to the successor accurate in order
800 // to know above when to *replace* the successor in the CFG vs. just
801 // adding the new successor.
802 --SuccCount;
803 }
804
805 // Since we may have split edges and changed the number of successors,
806 // normalize the probabilities. This avoids doing it each time we split an
807 // edge.
809
810 // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
811 // need to intersect the other condition codes. We can do this by just
812 // doing a cmov for each one.
813 if (!UncondSucc)
814 // If we have no fallthrough to protect (perhaps it is an indirect jump?)
815 // just skip this and continue.
816 continue;
817
818 assert(SuccCounts[UncondSucc] == 1 &&
819 "We should never have more than one edge to the unconditional "
820 "successor at this point because every other edge must have been "
821 "split above!");
822
823 // Sort and unique the codes to minimize them.
824 llvm::sort(UncondCodeSeq);
825 UncondCodeSeq.erase(llvm::unique(UncondCodeSeq), UncondCodeSeq.end());
826
827 // Build a checking version of the successor.
828 BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
829 UncondBr, UncondBr, UncondCodeSeq);
830 }
831
832 return CMovs;
833}
834
835/// Compute the register class for the unfolded load.
836///
837/// FIXME: This should probably live in X86InstrInfo, potentially by adding
838/// a way to unfold into a newly created vreg rather than requiring a register
839/// input.
840static const TargetRegisterClass *
842 unsigned Opcode) {
843 unsigned Index;
844 unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
845 Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
846 const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
847 return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
848}
849
850void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
851 MachineFunction &MF) {
852 for (MachineBasicBlock &MBB : MF)
853 // We use make_early_inc_range here so we can remove instructions if needed
854 // without disturbing the iteration.
856 // Must either be a call or a branch.
857 if (!MI.isCall() && !MI.isBranch())
858 continue;
859 // We only care about loading variants of these instructions.
860 if (!MI.mayLoad())
861 continue;
862
863 switch (MI.getOpcode()) {
864 default: {
866 dbgs() << "ERROR: Found an unexpected loading branch or call "
867 "instruction:\n";
868 MI.dump(); dbgs() << "\n");
869 report_fatal_error("Unexpected loading branch or call!");
870 }
871
872 case X86::FARCALL16m:
873 case X86::FARCALL32m:
874 case X86::FARCALL64m:
875 case X86::FARJMP16m:
876 case X86::FARJMP32m:
877 case X86::FARJMP64m:
878 // We cannot mitigate far jumps or calls, but we also don't expect them
879 // to be vulnerable to Spectre v1.2 style attacks.
880 continue;
881
882 case X86::CALL16m:
883 case X86::CALL16m_NT:
884 case X86::CALL32m:
885 case X86::CALL32m_NT:
886 case X86::CALL64m:
887 case X86::CALL64m_NT:
888 case X86::JMP16m:
889 case X86::JMP16m_NT:
890 case X86::JMP32m:
891 case X86::JMP32m_NT:
892 case X86::JMP64m:
893 case X86::JMP64m_NT:
894 case X86::TAILJMPm64:
895 case X86::TAILJMPm64_REX:
896 case X86::TAILJMPm:
897 case X86::TCRETURNmi64:
898 case X86::TCRETURNmi: {
899 // Use the generic unfold logic now that we know we're dealing with
900 // expected instructions.
901 // FIXME: We don't have test coverage for all of these!
902 auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
903 if (!UnfoldedRC) {
905 << "ERROR: Unable to unfold load from instruction:\n";
906 MI.dump(); dbgs() << "\n");
907 report_fatal_error("Unable to unfold load!");
908 }
909 Register Reg = MRI->createVirtualRegister(UnfoldedRC);
911 // If we were able to compute an unfolded reg class, any failure here
912 // is just a programming error so just assert.
913 bool Unfolded =
914 TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
915 /*UnfoldStore*/ false, NewMIs);
916 (void)Unfolded;
917 assert(Unfolded &&
918 "Computed unfolded register class but failed to unfold");
919 // Now stitch the new instructions into place and erase the old one.
920 for (auto *NewMI : NewMIs)
921 MBB.insert(MI.getIterator(), NewMI);
922
923 // Update the call site info.
924 if (MI.isCandidateForCallSiteEntry())
925 MF.eraseCallSiteInfo(&MI);
926
927 MI.eraseFromParent();
928 LLVM_DEBUG({
929 dbgs() << "Unfolded load successfully into:\n";
930 for (auto *NewMI : NewMIs) {
931 NewMI->dump();
932 dbgs() << "\n";
933 }
934 });
935 continue;
936 }
937 }
938 llvm_unreachable("Escaped switch with default!");
939 }
940}
941
942/// Trace the predicate state through indirect branches, instrumenting them to
943/// poison the state if a target is reached that does not match the expected
944/// target.
945///
946/// This is designed to mitigate Spectre variant 1 attacks where an indirect
947/// branch is trained to predict a particular target and then mispredicts that
948/// target in a way that can leak data. Despite using an indirect branch, this
949/// is really a variant 1 style attack: it does not steer execution to an
950/// arbitrary or attacker controlled address, and it does not require any
951/// special code executing next to the victim. This attack can also be mitigated
952/// through retpolines, but those require either replacing indirect branches
953/// with conditional direct branches or lowering them through a device that
954/// blocks speculation. This mitigation can replace these retpoline-style
955/// mitigations for jump tables and other indirect branches within a function
956/// when variant 2 isn't a risk while allowing limited speculation. Indirect
957/// calls, however, cannot be mitigated through this technique without changing
958/// the ABI in a fundamental way.
960X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
961 MachineFunction &MF) {
962 // We use the SSAUpdater to insert PHI nodes for the target addresses of
963 // indirect branches. We don't actually need the full power of the SSA updater
964 // in this particular case as we always have immediately available values, but
965 // this avoids us having to re-implement the PHI construction logic.
966 MachineSSAUpdater TargetAddrSSA(MF);
967 TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
968
969 // Track which blocks were terminated with an indirect branch.
970 SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
971
972 // We need to know what blocks end up reached via indirect branches. We
973 // expect this to be a subset of those whose address is taken and so track it
974 // directly via the CFG.
975 SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
976
977 // Walk all the blocks which end in an indirect branch and make the
978 // target address available.
979 for (MachineBasicBlock &MBB : MF) {
980 // Find the last terminator.
981 auto MII = MBB.instr_rbegin();
982 while (MII != MBB.instr_rend() && MII->isDebugInstr())
983 ++MII;
984 if (MII == MBB.instr_rend())
985 continue;
986 MachineInstr &TI = *MII;
987 if (!TI.isTerminator() || !TI.isBranch())
988 // No terminator or non-branch terminator.
989 continue;
990
991 unsigned TargetReg;
992
993 switch (TI.getOpcode()) {
994 default:
995 // Direct branch or conditional branch (leading to fallthrough).
996 continue;
997
998 case X86::FARJMP16m:
999 case X86::FARJMP32m:
1000 case X86::FARJMP64m:
1001 // We cannot mitigate far jumps or calls, but we also don't expect them
1002 // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1003 continue;
1004
1005 case X86::JMP16m:
1006 case X86::JMP16m_NT:
1007 case X86::JMP32m:
1008 case X86::JMP32m_NT:
1009 case X86::JMP64m:
1010 case X86::JMP64m_NT:
1011 // Mostly as documentation.
1012 report_fatal_error("Memory operand jumps should have been unfolded!");
1013
1014 case X86::JMP16r:
1016 "Support for 16-bit indirect branches is not implemented.");
1017 case X86::JMP32r:
1019 "Support for 32-bit indirect branches is not implemented.");
1020
1021 case X86::JMP64r:
1022 TargetReg = TI.getOperand(0).getReg();
1023 }
1024
1025 // We have definitely found an indirect branch. Verify that there are no
1026 // preceding conditional branches as we don't yet support that.
1027 if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1028 return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1029 })) {
1030 LLVM_DEBUG({
1031 dbgs() << "ERROR: Found other terminators in a block with an indirect "
1032 "branch! This is not yet supported! Terminator sequence:\n";
1033 for (MachineInstr &MI : MBB.terminators()) {
1034 MI.dump();
1035 dbgs() << '\n';
1036 }
1037 });
1038 report_fatal_error("Unimplemented terminator sequence!");
1039 }
1040
1041 // Make the target register an available value for this block.
1042 TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1043 IndirectTerminatedMBBs.insert(&MBB);
1044
1045 // Add all the successors to our target candidates.
1046 for (MachineBasicBlock *Succ : MBB.successors())
1047 IndirectTargetMBBs.insert(Succ);
1048 }
1049
1050 // Keep track of the cmov instructions we insert so we can return them.
1052
1053 // If we didn't find any indirect branches with targets, nothing to do here.
1054 if (IndirectTargetMBBs.empty())
1055 return CMovs;
1056
1057 // We found indirect branches and targets that need to be instrumented to
1058 // harden loads within them. Walk the blocks of the function (to get a stable
1059 // ordering) and instrument each target of an indirect branch.
1060 for (MachineBasicBlock &MBB : MF) {
1061 // Skip the blocks that aren't candidate targets.
1062 if (!IndirectTargetMBBs.count(&MBB))
1063 continue;
1064
1065 // We don't expect EH pads to ever be reached via an indirect branch. If
1066 // this is desired for some reason, we could simply skip them here rather
1067 // than asserting.
1068 assert(!MBB.isEHPad() &&
1069 "Unexpected EH pad as target of an indirect branch!");
1070
1071 // We should never end up threading EFLAGS into a block to harden
1072 // conditional jumps as there would be an additional successor via the
1073 // indirect branch. As a consequence, all such edges would be split before
1074 // reaching here, and the inserted block will handle the EFLAGS-based
1075 // hardening.
1076 assert(!MBB.isLiveIn(X86::EFLAGS) &&
1077 "Cannot check within a block that already has live-in EFLAGS!");
1078
1079 // We can't handle having non-indirect edges into this block unless this is
1080 // the only successor and we can synthesize the necessary target address.
1081 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1082 // If we've already handled this by extracting the target directly,
1083 // nothing to do.
1084 if (IndirectTerminatedMBBs.count(Pred))
1085 continue;
1086
1087 // Otherwise, we have to be the only successor. We generally expect this
1088 // to be true as conditional branches should have had a critical edge
1089 // split already. We don't however need to worry about EH pad successors
1090 // as they'll happily ignore the target and their hardening strategy is
1091 // resilient to all ways in which they could be reached speculatively.
1092 if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1093 return Succ->isEHPad() || Succ == &MBB;
1094 })) {
1095 LLVM_DEBUG({
1096 dbgs() << "ERROR: Found conditional entry to target of indirect "
1097 "branch!\n";
1098 Pred->dump();
1099 MBB.dump();
1100 });
1101 report_fatal_error("Cannot harden a conditional entry to a target of "
1102 "an indirect branch!");
1103 }
1104
1105 // Now we need to compute the address of this block and install it as a
1106 // synthetic target in the predecessor. We do this at the bottom of the
1107 // predecessor.
1108 auto InsertPt = Pred->getFirstTerminator();
1109 Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1110 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1111 !Subtarget->isPositionIndependent()) {
1112 // Directly materialize it into an immediate.
1113 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1114 TII->get(X86::MOV64ri32), TargetReg)
1115 .addMBB(&MBB);
1116 ++NumInstsInserted;
1117 (void)AddrI;
1118 LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1119 dbgs() << "\n");
1120 } else {
1121 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1122 TargetReg)
1123 .addReg(/*Base*/ X86::RIP)
1124 .addImm(/*Scale*/ 1)
1125 .addReg(/*Index*/ 0)
1126 .addMBB(&MBB)
1127 .addReg(/*Segment*/ 0);
1128 ++NumInstsInserted;
1129 (void)AddrI;
1130 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1131 dbgs() << "\n");
1132 }
1133 // And make this available.
1134 TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1135 }
1136
1137 // Materialize the needed SSA value of the target. Note that we need the
1138 // middle of the block as this block might at the bottom have an indirect
1139 // branch back to itself. We can do this here because at this point, every
1140 // predecessor of this block has an available value. This is basically just
1141 // automating the construction of a PHI node for this target.
1142 Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1143
1144 // Insert a comparison of the incoming target register with this block's
1145 // address. This also requires us to mark the block as having its address
1146 // taken explicitly.
1148 auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1149 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1150 !Subtarget->isPositionIndependent()) {
1151 // Check directly against a relocated immediate when we can.
1152 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1153 .addReg(TargetReg, RegState::Kill)
1154 .addMBB(&MBB);
1155 ++NumInstsInserted;
1156 (void)CheckI;
1157 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1158 } else {
1159 // Otherwise compute the address into a register first.
1160 Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1161 auto AddrI =
1162 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1163 .addReg(/*Base*/ X86::RIP)
1164 .addImm(/*Scale*/ 1)
1165 .addReg(/*Index*/ 0)
1166 .addMBB(&MBB)
1167 .addReg(/*Segment*/ 0);
1168 ++NumInstsInserted;
1169 (void)AddrI;
1170 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1171 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1172 .addReg(TargetReg, RegState::Kill)
1173 .addReg(AddrReg, RegState::Kill);
1174 ++NumInstsInserted;
1175 (void)CheckI;
1176 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1177 }
1178
1179 // Now cmov over the predicate if the comparison wasn't equal.
1180 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1181 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1182 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1183 auto CMovI =
1184 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1185 .addReg(PS->InitialReg)
1186 .addReg(PS->PoisonReg)
1188 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
1189 ->setIsKill(true);
1190 ++NumInstsInserted;
1191 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1192 CMovs.push_back(&*CMovI);
1193
1194 // And put the new value into the available values for SSA form of our
1195 // predicate state.
1196 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1197 }
1198
1199 // Return all the newly inserted cmov instructions of the predicate state.
1200 return CMovs;
1201}
1202
1203// Returns true if the MI has EFLAGS as a register def operand and it's live,
1204// otherwise it returns false
1205static bool isEFLAGSDefLive(const MachineInstr &MI) {
1206 if (const MachineOperand *DefOp =
1207 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1208 return !DefOp->isDead();
1209 }
1210 return false;
1211}
1212
1214 const TargetRegisterInfo &TRI) {
1215 // Check if EFLAGS are alive by seeing if there is a def of them or they
1216 // live-in, and then seeing if that def is in turn used.
1218 if (MachineOperand *DefOp =
1219 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1220 // If the def is dead, then EFLAGS is not live.
1221 if (DefOp->isDead())
1222 return false;
1223
1224 // Otherwise we've def'ed it, and it is live.
1225 return true;
1226 }
1227 // While at this instruction, also check if we use and kill EFLAGS
1228 // which means it isn't live.
1229 if (MI.killsRegister(X86::EFLAGS, &TRI))
1230 return false;
1231 }
1232
1233 // If we didn't find anything conclusive (neither definitely alive or
1234 // definitely dead) return whether it lives into the block.
1235 return MBB.isLiveIn(X86::EFLAGS);
1236}
1237
1238/// Trace the predicate state through each of the blocks in the function,
1239/// hardening everything necessary along the way.
1240///
1241/// We call this routine once the initial predicate state has been established
1242/// for each basic block in the function in the SSA updater. This routine traces
1243/// it through the instructions within each basic block, and for non-returning
1244/// blocks informs the SSA updater about the final state that lives out of the
1245/// block. Along the way, it hardens any vulnerable instruction using the
1246/// currently valid predicate state. We have to do these two things together
1247/// because the SSA updater only works across blocks. Within a block, we track
1248/// the current predicate state directly and update it as it changes.
1249///
1250/// This operates in two passes over each block. First, we analyze the loads in
1251/// the block to determine which strategy will be used to harden them: hardening
1252/// the address or hardening the loaded value when loaded into a register
1253/// amenable to hardening. We have to process these first because the two
1254/// strategies may interact -- later hardening may change what strategy we wish
1255/// to use. We also will analyze data dependencies between loads and avoid
1256/// hardening those loads that are data dependent on a load with a hardened
1257/// address. We also skip hardening loads already behind an LFENCE as that is
1258/// sufficient to harden them against misspeculation.
1259///
1260/// Second, we actively trace the predicate state through the block, applying
1261/// the hardening steps we determined necessary in the first pass as we go.
1262///
1263/// These two passes are applied to each basic block. We operate one block at a
1264/// time to simplify reasoning about reachability and sequencing.
1265void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
1266 MachineFunction &MF) {
1267 SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1268 SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1269
1270 SmallSet<unsigned, 16> HardenedAddrRegs;
1271
1272 SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg;
1273
1274 // Track the set of load-dependent registers through the basic block. Because
1275 // the values of these registers have an existing data dependency on a loaded
1276 // value which we would have checked, we can omit any checks on them.
1277 SparseBitVector<> LoadDepRegs;
1278
1279 for (MachineBasicBlock &MBB : MF) {
1280 // The first pass over the block: collect all the loads which can have their
1281 // loaded value hardened and all the loads that instead need their address
1282 // hardened. During this walk we propagate load dependence for address
1283 // hardened loads and also look for LFENCE to stop hardening wherever
1284 // possible. When deciding whether or not to harden the loaded value or not,
1285 // we check to see if any registers used in the address will have been
1286 // hardened at this point and if so, harden any remaining address registers
1287 // as that often successfully re-uses hardened addresses and minimizes
1288 // instructions.
1289 //
1290 // FIXME: We should consider an aggressive mode where we continue to keep as
1291 // many loads value hardened even when some address register hardening would
1292 // be free (due to reuse).
1293 //
1294 // Note that we only need this pass if we are actually hardening loads.
1295 if (HardenLoads)
1296 for (MachineInstr &MI : MBB) {
1297 // We naively assume that all def'ed registers of an instruction have
1298 // a data dependency on all of their operands.
1299 // FIXME: Do a more careful analysis of x86 to build a conservative
1300 // model here.
1301 if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1302 return Op.isReg() && LoadDepRegs.test(Op.getReg());
1303 }))
1304 for (MachineOperand &Def : MI.defs())
1305 if (Def.isReg())
1306 LoadDepRegs.set(Def.getReg());
1307
1308 // Both Intel and AMD are guiding that they will change the semantics of
1309 // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1310 // no more need to guard things in this block.
1311 if (MI.getOpcode() == X86::LFENCE)
1312 break;
1313
1314 // If this instruction cannot load, nothing to do.
1315 if (!MI.mayLoad())
1316 continue;
1317
1318 // Some instructions which "load" are trivially safe or unimportant.
1319 if (MI.getOpcode() == X86::MFENCE)
1320 continue;
1321
1322 // Extract the memory operand information about this instruction.
1323 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1324 if (MemRefBeginIdx < 0) {
1326 << "WARNING: unable to harden loading instruction: ";
1327 MI.dump());
1328 continue;
1329 }
1330
1331 MachineOperand &BaseMO =
1332 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1333 MachineOperand &IndexMO =
1334 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1335
1336 // If we have at least one (non-frame-index, non-RIP) register operand,
1337 // and neither operand is load-dependent, we need to check the load.
1338 unsigned BaseReg = 0, IndexReg = 0;
1339 if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1340 BaseMO.getReg() != X86::NoRegister)
1341 BaseReg = BaseMO.getReg();
1342 if (IndexMO.getReg() != X86::NoRegister)
1343 IndexReg = IndexMO.getReg();
1344
1345 if (!BaseReg && !IndexReg)
1346 // No register operands!
1347 continue;
1348
1349 // If any register operand is dependent, this load is dependent and we
1350 // needn't check it.
1351 // FIXME: Is this true in the case where we are hardening loads after
1352 // they complete? Unclear, need to investigate.
1353 if ((BaseReg && LoadDepRegs.test(BaseReg)) ||
1354 (IndexReg && LoadDepRegs.test(IndexReg)))
1355 continue;
1356
1357 // If post-load hardening is enabled, this load is compatible with
1358 // post-load hardening, and we aren't already going to harden one of the
1359 // address registers, queue it up to be hardened post-load. Notably,
1360 // even once hardened this won't introduce a useful dependency that
1361 // could prune out subsequent loads.
1363 !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1364 MI.getOperand(0).isReg() &&
1365 canHardenRegister(MI.getOperand(0).getReg()) &&
1366 !HardenedAddrRegs.count(BaseReg) &&
1367 !HardenedAddrRegs.count(IndexReg)) {
1368 HardenPostLoad.insert(&MI);
1369 HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1370 continue;
1371 }
1372
1373 // Record this instruction for address hardening and record its register
1374 // operands as being address-hardened.
1375 HardenLoadAddr.insert(&MI);
1376 if (BaseReg)
1377 HardenedAddrRegs.insert(BaseReg);
1378 if (IndexReg)
1379 HardenedAddrRegs.insert(IndexReg);
1380
1381 for (MachineOperand &Def : MI.defs())
1382 if (Def.isReg())
1383 LoadDepRegs.set(Def.getReg());
1384 }
1385
1386 // Now re-walk the instructions in the basic block, and apply whichever
1387 // hardening strategy we have elected. Note that we do this in a second
1388 // pass specifically so that we have the complete set of instructions for
1389 // which we will do post-load hardening and can defer it in certain
1390 // circumstances.
1391 for (MachineInstr &MI : MBB) {
1392 if (HardenLoads) {
1393 // We cannot both require hardening the def of a load and its address.
1394 assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1395 "Requested to harden both the address and def of a load!");
1396
1397 // Check if this is a load whose address needs to be hardened.
1398 if (HardenLoadAddr.erase(&MI)) {
1399 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1400 assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1401
1402 MachineOperand &BaseMO =
1403 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1404 MachineOperand &IndexMO =
1405 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1406 hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1407 continue;
1408 }
1409
1410 // Test if this instruction is one of our post load instructions (and
1411 // remove it from the set if so).
1412 if (HardenPostLoad.erase(&MI)) {
1413 assert(!MI.isCall() && "Must not try to post-load harden a call!");
1414
1415 // If this is a data-invariant load and there is no EFLAGS
1416 // interference, we want to try and sink any hardening as far as
1417 // possible.
1419 // Sink the instruction we'll need to harden as far as we can down
1420 // the graph.
1421 MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1422
1423 // If we managed to sink this instruction, update everything so we
1424 // harden that instruction when we reach it in the instruction
1425 // sequence.
1426 if (SunkMI != &MI) {
1427 // If in sinking there was no instruction needing to be hardened,
1428 // we're done.
1429 if (!SunkMI)
1430 continue;
1431
1432 // Otherwise, add this to the set of defs we harden.
1433 HardenPostLoad.insert(SunkMI);
1434 continue;
1435 }
1436 }
1437
1438 unsigned HardenedReg = hardenPostLoad(MI);
1439
1440 // Mark the resulting hardened register as such so we don't re-harden.
1441 AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1442
1443 continue;
1444 }
1445
1446 // Check for an indirect call or branch that may need its input hardened
1447 // even if we couldn't find the specific load used, or were able to
1448 // avoid hardening it for some reason. Note that here we cannot break
1449 // out afterward as we may still need to handle any call aspect of this
1450 // instruction.
1451 if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1452 hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1453 }
1454
1455 // After we finish hardening loads we handle interprocedural hardening if
1456 // enabled and relevant for this instruction.
1458 continue;
1459 if (!MI.isCall() && !MI.isReturn())
1460 continue;
1461
1462 // If this is a direct return (IE, not a tail call) just directly harden
1463 // it.
1464 if (MI.isReturn() && !MI.isCall()) {
1465 hardenReturnInstr(MI);
1466 continue;
1467 }
1468
1469 // Otherwise we have a call. We need to handle transferring the predicate
1470 // state into a call and recovering it after the call returns (unless this
1471 // is a tail call).
1472 assert(MI.isCall() && "Should only reach here for calls!");
1473 tracePredStateThroughCall(MI);
1474 }
1475
1476 HardenPostLoad.clear();
1477 HardenLoadAddr.clear();
1478 HardenedAddrRegs.clear();
1479 AddrRegToHardenedReg.clear();
1480
1481 // Currently, we only track data-dependent loads within a basic block.
1482 // FIXME: We should see if this is necessary or if we could be more
1483 // aggressive here without opening up attack avenues.
1484 LoadDepRegs.clear();
1485 }
1486}
1487
1488/// Save EFLAGS into the returned GPR. This can in turn be restored with
1489/// `restoreEFLAGS`.
1490///
1491/// Note that LLVM can only lower very simple patterns of saved and restored
1492/// EFLAGS registers. The restore should always be within the same basic block
1493/// as the save so that no PHI nodes are inserted.
1494unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
1496 const DebugLoc &Loc) {
1497 // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1498 // what instruction selection does.
1499 Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1500 // We directly copy the FLAGS register and rely on later lowering to clean
1501 // this up into the appropriate setCC instructions.
1502 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1503 ++NumInstsInserted;
1504 return Reg;
1505}
1506
1507/// Restore EFLAGS from the provided GPR. This should be produced by
1508/// `saveEFLAGS`.
1509///
1510/// This must be done within the same basic block as the save in order to
1511/// reliably lower.
1512void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1514 const DebugLoc &Loc, Register Reg) {
1515 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1516 ++NumInstsInserted;
1517}
1518
1519/// Takes the current predicate state (in a register) and merges it into the
1520/// stack pointer. The state is essentially a single bit, but we merge this in
1521/// a way that won't form non-canonical pointers and also will be preserved
1522/// across normal stack adjustments.
1523void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1525 const DebugLoc &Loc, unsigned PredStateReg) {
1526 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1527 // FIXME: This hard codes a shift distance based on the number of bits needed
1528 // to stay canonical on 64-bit. We should compute this somehow and support
1529 // 32-bit as part of that.
1530 auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1531 .addReg(PredStateReg, RegState::Kill)
1532 .addImm(47);
1533 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1534 ++NumInstsInserted;
1535 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1536 .addReg(X86::RSP)
1537 .addReg(TmpReg, RegState::Kill);
1538 OrI->addRegisterDead(X86::EFLAGS, TRI);
1539 ++NumInstsInserted;
1540}
1541
1542/// Extracts the predicate state stored in the high bits of the stack pointer.
1543unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1545 const DebugLoc &Loc) {
1546 Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1547 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1548
1549 // We know that the stack pointer will have any preserved predicate state in
1550 // its high bit. We just want to smear this across the other bits. Turns out,
1551 // this is exactly what an arithmetic right shift does.
1552 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1553 .addReg(X86::RSP);
1554 auto ShiftI =
1555 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1556 .addReg(TmpReg, RegState::Kill)
1557 .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1558 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1559 ++NumInstsInserted;
1560
1561 return PredStateReg;
1562}
1563
1564void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1565 MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1566 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
1567 MachineBasicBlock &MBB = *MI.getParent();
1568 const DebugLoc &Loc = MI.getDebugLoc();
1569
1570 // Check if EFLAGS are alive by seeing if there is a def of them or they
1571 // live-in, and then seeing if that def is in turn used.
1572 bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1573
1575
1576 if (BaseMO.isFI()) {
1577 // A frame index is never a dynamically controllable load, so only
1578 // harden it if we're covering fixed address loads as well.
1579 LLVM_DEBUG(
1580 dbgs() << " Skipping hardening base of explicit stack frame load: ";
1581 MI.dump(); dbgs() << "\n");
1582 } else if (BaseMO.getReg() == X86::RSP) {
1583 // Some idempotent atomic operations are lowered directly to a locked
1584 // OR with 0 to the top of stack(or slightly offset from top) which uses an
1585 // explicit RSP register as the base.
1586 assert(IndexMO.getReg() == X86::NoRegister &&
1587 "Explicit RSP access with dynamic index!");
1588 LLVM_DEBUG(
1589 dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1590 } else if (BaseMO.getReg() == X86::RIP ||
1591 BaseMO.getReg() == X86::NoRegister) {
1592 // For both RIP-relative addressed loads or absolute loads, we cannot
1593 // meaningfully harden them because the address being loaded has no
1594 // dynamic component.
1595 //
1596 // FIXME: When using a segment base (like TLS does) we end up with the
1597 // dynamic address being the base plus -1 because we can't mutate the
1598 // segment register here. This allows the signed 32-bit offset to point at
1599 // valid segment-relative addresses and load them successfully.
1600 LLVM_DEBUG(
1601 dbgs() << " Cannot harden base of "
1602 << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1603 << " address in a load!");
1604 } else {
1605 assert(BaseMO.isReg() &&
1606 "Only allowed to have a frame index or register base.");
1607 HardenOpRegs.push_back(&BaseMO);
1608 }
1609
1610 if (IndexMO.getReg() != X86::NoRegister &&
1611 (HardenOpRegs.empty() ||
1612 HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1613 HardenOpRegs.push_back(&IndexMO);
1614
1615 assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1616 "Should have exactly one or two registers to harden!");
1617 assert((HardenOpRegs.size() == 1 ||
1618 HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1619 "Should not have two of the same registers!");
1620
1621 // Remove any registers that have alreaded been checked.
1622 llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1623 // See if this operand's register has already been checked.
1624 auto It = AddrRegToHardenedReg.find(Op->getReg());
1625 if (It == AddrRegToHardenedReg.end())
1626 // Not checked, so retain this one.
1627 return false;
1628
1629 // Otherwise, we can directly update this operand and remove it.
1630 Op->setReg(It->second);
1631 return true;
1632 });
1633 // If there are none left, we're done.
1634 if (HardenOpRegs.empty())
1635 return;
1636
1637 // Compute the current predicate state.
1638 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1639
1640 auto InsertPt = MI.getIterator();
1641
1642 // If EFLAGS are live and we don't have access to instructions that avoid
1643 // clobbering EFLAGS we need to save and restore them. This in turn makes
1644 // the EFLAGS no longer live.
1645 unsigned FlagsReg = 0;
1646 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1647 EFLAGSLive = false;
1648 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1649 }
1650
1651 for (MachineOperand *Op : HardenOpRegs) {
1652 Register OpReg = Op->getReg();
1653 auto *OpRC = MRI->getRegClass(OpReg);
1654 Register TmpReg = MRI->createVirtualRegister(OpRC);
1655
1656 // If this is a vector register, we'll need somewhat custom logic to handle
1657 // hardening it.
1658 if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1659 OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1660 assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1661 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1662
1663 // Move our state into a vector register.
1664 // FIXME: We could skip this at the cost of longer encodings with AVX-512
1665 // but that doesn't seem likely worth it.
1666 Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1667 auto MovI =
1668 BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1669 .addReg(StateReg);
1670 (void)MovI;
1671 ++NumInstsInserted;
1672 LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1673
1674 // Broadcast it across the vector register.
1675 Register VBStateReg = MRI->createVirtualRegister(OpRC);
1676 auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1677 TII->get(Is128Bit ? X86::VPBROADCASTQrr
1678 : X86::VPBROADCASTQYrr),
1679 VBStateReg)
1680 .addReg(VStateReg);
1681 (void)BroadcastI;
1682 ++NumInstsInserted;
1683 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1684 dbgs() << "\n");
1685
1686 // Merge our potential poison state into the value with a vector or.
1687 auto OrI =
1688 BuildMI(MBB, InsertPt, Loc,
1689 TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1690 .addReg(VBStateReg)
1691 .addReg(OpReg);
1692 (void)OrI;
1693 ++NumInstsInserted;
1694 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1695 } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1696 OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1697 OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1698 assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1699 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1700 bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1701 if (Is128Bit || Is256Bit)
1702 assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1703
1704 // Broadcast our state into a vector register.
1705 Register VStateReg = MRI->createVirtualRegister(OpRC);
1706 unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1707 : Is256Bit ? X86::VPBROADCASTQrZ256rr
1708 : X86::VPBROADCASTQrZrr;
1709 auto BroadcastI =
1710 BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1711 .addReg(StateReg);
1712 (void)BroadcastI;
1713 ++NumInstsInserted;
1714 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1715 dbgs() << "\n");
1716
1717 // Merge our potential poison state into the value with a vector or.
1718 unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1719 : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1720 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1721 .addReg(VStateReg)
1722 .addReg(OpReg);
1723 (void)OrI;
1724 ++NumInstsInserted;
1725 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1726 } else {
1727 // FIXME: Need to support GR32 here for 32-bit code.
1728 assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1729 "Not a supported register class for address hardening!");
1730
1731 if (!EFLAGSLive) {
1732 // Merge our potential poison state into the value with an or.
1733 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1734 .addReg(StateReg)
1735 .addReg(OpReg);
1736 OrI->addRegisterDead(X86::EFLAGS, TRI);
1737 ++NumInstsInserted;
1738 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1739 } else {
1740 // We need to avoid touching EFLAGS so shift out all but the least
1741 // significant bit using the instruction that doesn't update flags.
1742 auto ShiftI =
1743 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1744 .addReg(OpReg)
1745 .addReg(StateReg);
1746 (void)ShiftI;
1747 ++NumInstsInserted;
1748 LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1749 dbgs() << "\n");
1750 }
1751 }
1752
1753 // Record this register as checked and update the operand.
1754 assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1755 "Should not have checked this register yet!");
1756 AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1757 Op->setReg(TmpReg);
1758 ++NumAddrRegsHardened;
1759 }
1760
1761 // And restore the flags if needed.
1762 if (FlagsReg)
1763 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1764}
1765
1766MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1767 MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1769 "Cannot get here with a non-invariant load!");
1770 assert(!isEFLAGSDefLive(InitialMI) &&
1771 "Cannot get here with a data invariant load "
1772 "that interferes with EFLAGS!");
1773
1774 // See if we can sink hardening the loaded value.
1775 auto SinkCheckToSingleUse =
1776 [&](MachineInstr &MI) -> std::optional<MachineInstr *> {
1777 Register DefReg = MI.getOperand(0).getReg();
1778
1779 // We need to find a single use which we can sink the check. We can
1780 // primarily do this because many uses may already end up checked on their
1781 // own.
1782 MachineInstr *SingleUseMI = nullptr;
1783 for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1784 // If we're already going to harden this use, it is data invariant, it
1785 // does not interfere with EFLAGS, and within our block.
1786 if (HardenedInstrs.count(&UseMI)) {
1788 // If we've already decided to harden a non-load, we must have sunk
1789 // some other post-load hardened instruction to it and it must itself
1790 // be data-invariant.
1792 "Data variant instruction being hardened!");
1793 continue;
1794 }
1795
1796 // Otherwise, this is a load and the load component can't be data
1797 // invariant so check how this register is being used.
1798 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(UseMI);
1799 assert(MemRefBeginIdx >= 0 &&
1800 "Should always have mem references here!");
1801
1802 MachineOperand &BaseMO =
1803 UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1804 MachineOperand &IndexMO =
1805 UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1806 if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1807 (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1808 // The load uses the register as part of its address making it not
1809 // invariant.
1810 return {};
1811
1812 continue;
1813 }
1814
1815 if (SingleUseMI)
1816 // We already have a single use, this would make two. Bail.
1817 return {};
1818
1819 // If this single use isn't data invariant, isn't in this block, or has
1820 // interfering EFLAGS, we can't sink the hardening to it.
1821 if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1823 return {};
1824
1825 // If this instruction defines multiple registers bail as we won't harden
1826 // all of them.
1827 if (UseMI.getDesc().getNumDefs() > 1)
1828 return {};
1829
1830 // If this register isn't a virtual register we can't walk uses of sanely,
1831 // just bail. Also check that its register class is one of the ones we
1832 // can harden.
1833 Register UseDefReg = UseMI.getOperand(0).getReg();
1834 if (!canHardenRegister(UseDefReg))
1835 return {};
1836
1837 SingleUseMI = &UseMI;
1838 }
1839
1840 // If SingleUseMI is still null, there is no use that needs its own
1841 // checking. Otherwise, it is the single use that needs checking.
1842 return {SingleUseMI};
1843 };
1844
1845 MachineInstr *MI = &InitialMI;
1846 while (std::optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1847 // Update which MI we're checking now.
1848 MI = *SingleUse;
1849 if (!MI)
1850 break;
1851 }
1852
1853 return MI;
1854}
1855
1856bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
1857 // We only support hardening virtual registers.
1858 if (!Reg.isVirtual())
1859 return false;
1860
1861 auto *RC = MRI->getRegClass(Reg);
1862 int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1863 if (RegBytes > 8)
1864 // We don't support post-load hardening of vectors.
1865 return false;
1866
1867 unsigned RegIdx = Log2_32(RegBytes);
1868 assert(RegIdx < 4 && "Unsupported register size");
1869
1870 // If this register class is explicitly constrained to a class that doesn't
1871 // require REX prefix, we may not be able to satisfy that constraint when
1872 // emitting the hardening instructions, so bail out here.
1873 // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1874 // end up both with a NOREX and REX-only register as operands to the hardening
1875 // instructions. It would be better to fix that code to handle this situation
1876 // rather than hack around it in this way.
1877 const TargetRegisterClass *NOREXRegClasses[] = {
1878 &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1879 &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1880 if (RC == NOREXRegClasses[RegIdx])
1881 return false;
1882
1883 const TargetRegisterClass *GPRRegClasses[] = {
1884 &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1885 &X86::GR64RegClass};
1886 return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1887}
1888
1889/// Harden a value in a register.
1890///
1891/// This is the low-level logic to fully harden a value sitting in a register
1892/// against leaking during speculative execution.
1893///
1894/// Unlike hardening an address that is used by a load, this routine is required
1895/// to hide *all* incoming bits in the register.
1896///
1897/// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1898/// larger than the predicate state register. FIXME: We should support vector
1899/// registers here by broadcasting the predicate state.
1900///
1901/// The new, hardened virtual register is returned. It will have the same
1902/// register class as `Reg`.
1903unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
1905 const DebugLoc &Loc) {
1906 assert(canHardenRegister(Reg) && "Cannot harden this register!");
1907
1908 auto *RC = MRI->getRegClass(Reg);
1909 int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1910 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1911 assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
1912 "Unknown register size");
1913
1914 // FIXME: Need to teach this about 32-bit mode.
1915 if (Bytes != 8) {
1916 unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1917 unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1918 Register NarrowStateReg = MRI->createVirtualRegister(RC);
1919 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1920 .addReg(StateReg, 0, SubRegImm);
1921 StateReg = NarrowStateReg;
1922 }
1923
1924 unsigned FlagsReg = 0;
1925 if (isEFLAGSLive(MBB, InsertPt, *TRI))
1926 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1927
1928 Register NewReg = MRI->createVirtualRegister(RC);
1929 unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1930 unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1931 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1932 .addReg(StateReg)
1933 .addReg(Reg);
1934 OrI->addRegisterDead(X86::EFLAGS, TRI);
1935 ++NumInstsInserted;
1936 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1937
1938 if (FlagsReg)
1939 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1940
1941 return NewReg;
1942}
1943
1944/// Harden a load by hardening the loaded value in the defined register.
1945///
1946/// We can harden a non-leaking load into a register without touching the
1947/// address by just hiding all of the loaded bits during misspeculation. We use
1948/// an `or` instruction to do this because we set up our poison value as all
1949/// ones. And the goal is just for the loaded bits to not be exposed to
1950/// execution and coercing them to one is sufficient.
1951///
1952/// Returns the newly hardened register.
1953unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
1954 MachineBasicBlock &MBB = *MI.getParent();
1955 const DebugLoc &Loc = MI.getDebugLoc();
1956
1957 auto &DefOp = MI.getOperand(0);
1958 Register OldDefReg = DefOp.getReg();
1959 auto *DefRC = MRI->getRegClass(OldDefReg);
1960
1961 // Because we want to completely replace the uses of this def'ed value with
1962 // the hardened value, create a dedicated new register that will only be used
1963 // to communicate the unhardened value to the hardening.
1964 Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1965 DefOp.setReg(UnhardenedReg);
1966
1967 // Now harden this register's value, getting a hardened reg that is safe to
1968 // use. Note that we insert the instructions to compute this *after* the
1969 // defining instruction, not before it.
1970 unsigned HardenedReg = hardenValueInRegister(
1971 UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1972
1973 // Finally, replace the old register (which now only has the uses of the
1974 // original def) with the hardened register.
1975 MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1976
1977 ++NumPostLoadRegsHardened;
1978 return HardenedReg;
1979}
1980
1981/// Harden a return instruction.
1982///
1983/// Returns implicitly perform a load which we need to harden. Without hardening
1984/// this load, an attacker my speculatively write over the return address to
1985/// steer speculation of the return to an attacker controlled address. This is
1986/// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1987/// this paper:
1988/// https://people.csail.mit.edu/vlk/spectre11.pdf
1989///
1990/// We can harden this by introducing an LFENCE that will delay any load of the
1991/// return address until prior instructions have retired (and thus are not being
1992/// speculated), or we can harden the address used by the implicit load: the
1993/// stack pointer.
1994///
1995/// If we are not using an LFENCE, hardening the stack pointer has an additional
1996/// benefit: it allows us to pass the predicate state accumulated in this
1997/// function back to the caller. In the absence of a BCBS attack on the return,
1998/// the caller will typically be resumed and speculatively executed due to the
1999/// Return Stack Buffer (RSB) prediction which is very accurate and has a high
2000/// priority. It is possible that some code from the caller will be executed
2001/// speculatively even during a BCBS-attacked return until the steering takes
2002/// effect. Whenever this happens, the caller can recover the (poisoned)
2003/// predicate state from the stack pointer and continue to harden loads.
2004void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
2005 MachineBasicBlock &MBB = *MI.getParent();
2006 const DebugLoc &Loc = MI.getDebugLoc();
2007 auto InsertPt = MI.getIterator();
2008
2009 if (FenceCallAndRet)
2010 // No need to fence here as we'll fence at the return site itself. That
2011 // handles more cases than we can handle here.
2012 return;
2013
2014 // Take our predicate state, shift it to the high 17 bits (so that we keep
2015 // pointers canonical) and merge it into RSP. This will allow the caller to
2016 // extract it when we return (speculatively).
2017 mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2018}
2019
2020/// Trace the predicate state through a call.
2021///
2022/// There are several layers of this needed to handle the full complexity of
2023/// calls.
2024///
2025/// First, we need to send the predicate state into the called function. We do
2026/// this by merging it into the high bits of the stack pointer.
2027///
2028/// For tail calls, this is all we need to do.
2029///
2030/// For calls where we might return and resume the control flow, we need to
2031/// extract the predicate state from the high bits of the stack pointer after
2032/// control returns from the called function.
2033///
2034/// We also need to verify that we intended to return to this location in the
2035/// code. An attacker might arrange for the processor to mispredict the return
2036/// to this valid but incorrect return address in the program rather than the
2037/// correct one. See the paper on this attack, called "ret2spec" by the
2038/// researchers, here:
2039/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2040///
2041/// The way we verify that we returned to the correct location is by preserving
2042/// the expected return address across the call. One technique involves taking
2043/// advantage of the red-zone to load the return address from `8(%rsp)` where it
2044/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2045/// directly save the address into a register that will be preserved across the
2046/// call. We compare this intended return address against the address
2047/// immediately following the call (the observed return address). If these
2048/// mismatch, we have detected misspeculation and can poison our predicate
2049/// state.
2050void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
2051 MachineInstr &MI) {
2052 MachineBasicBlock &MBB = *MI.getParent();
2053 MachineFunction &MF = *MBB.getParent();
2054 auto InsertPt = MI.getIterator();
2055 const DebugLoc &Loc = MI.getDebugLoc();
2056
2057 if (FenceCallAndRet) {
2058 if (MI.isReturn())
2059 // Tail call, we don't return to this function.
2060 // FIXME: We should also handle noreturn calls.
2061 return;
2062
2063 // We don't need to fence before the call because the function should fence
2064 // in its entry. However, we do need to fence after the call returns.
2065 // Fencing before the return doesn't correctly handle cases where the return
2066 // itself is mispredicted.
2067 BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2068 ++NumInstsInserted;
2069 ++NumLFENCEsInserted;
2070 return;
2071 }
2072
2073 // First, we transfer the predicate state into the called function by merging
2074 // it into the stack pointer. This will kill the current def of the state.
2075 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2076 mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2077
2078 // If this call is also a return, it is a tail call and we don't need anything
2079 // else to handle it so just return. Also, if there are no further
2080 // instructions and no successors, this call does not return so we can also
2081 // bail.
2082 if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2083 return;
2084
2085 // Create a symbol to track the return address and attach it to the call
2086 // machine instruction. We will lower extra symbols attached to call
2087 // instructions as label immediately following the call.
2088 MCSymbol *RetSymbol =
2089 MF.getContext().createTempSymbol("slh_ret_addr",
2090 /*AlwaysAddSuffix*/ true);
2091 MI.setPostInstrSymbol(MF, RetSymbol);
2092
2093 const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2094 unsigned ExpectedRetAddrReg = 0;
2095
2096 // If we have no red zones or if the function returns twice (possibly without
2097 // using the `ret` instruction) like setjmp, we need to save the expected
2098 // return address prior to the call.
2099 if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2100 MF.exposesReturnsTwice()) {
2101 // If we don't have red zones, we need to compute the expected return
2102 // address prior to the call and store it in a register that lives across
2103 // the call.
2104 //
2105 // In some ways, this is doubly satisfying as a mitigation because it will
2106 // also successfully detect stack smashing bugs in some cases (typically,
2107 // when a callee-saved register is used and the callee doesn't push it onto
2108 // the stack). But that isn't our primary goal, so we only use it as
2109 // a fallback.
2110 //
2111 // FIXME: It isn't clear that this is reliable in the face of
2112 // rematerialization in the register allocator. We somehow need to force
2113 // that to not occur for this particular instruction, and instead to spill
2114 // or otherwise preserve the value computed *prior* to the call.
2115 //
2116 // FIXME: It is even less clear why MachineCSE can't just fold this when we
2117 // end up having to use identical instructions both before and after the
2118 // call to feed the comparison.
2119 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2120 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2121 !Subtarget->isPositionIndependent()) {
2122 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2123 .addSym(RetSymbol);
2124 } else {
2125 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2126 .addReg(/*Base*/ X86::RIP)
2127 .addImm(/*Scale*/ 1)
2128 .addReg(/*Index*/ 0)
2129 .addSym(RetSymbol)
2130 .addReg(/*Segment*/ 0);
2131 }
2132 }
2133
2134 // Step past the call to handle when it returns.
2135 ++InsertPt;
2136
2137 // If we didn't pre-compute the expected return address into a register, then
2138 // red zones are enabled and the return address is still available on the
2139 // stack immediately after the call. As the very first instruction, we load it
2140 // into a register.
2141 if (!ExpectedRetAddrReg) {
2142 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2143 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2144 .addReg(/*Base*/ X86::RSP)
2145 .addImm(/*Scale*/ 1)
2146 .addReg(/*Index*/ 0)
2147 .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2148 // the return address is 8-bytes past it.
2149 .addReg(/*Segment*/ 0);
2150 }
2151
2152 // Now we extract the callee's predicate state from the stack pointer.
2153 unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2154
2155 // Test the expected return address against our actual address. If we can
2156 // form this basic block's address as an immediate, this is easy. Otherwise
2157 // we compute it.
2158 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2159 !Subtarget->isPositionIndependent()) {
2160 // FIXME: Could we fold this with the load? It would require careful EFLAGS
2161 // management.
2162 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2163 .addReg(ExpectedRetAddrReg, RegState::Kill)
2164 .addSym(RetSymbol);
2165 } else {
2166 Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2167 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2168 .addReg(/*Base*/ X86::RIP)
2169 .addImm(/*Scale*/ 1)
2170 .addReg(/*Index*/ 0)
2171 .addSym(RetSymbol)
2172 .addReg(/*Segment*/ 0);
2173 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2174 .addReg(ExpectedRetAddrReg, RegState::Kill)
2175 .addReg(ActualRetAddrReg, RegState::Kill);
2176 }
2177
2178 // Now conditionally update the predicate state we just extracted if we ended
2179 // up at a different return address than expected.
2180 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2181 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2182
2183 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2184 auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2185 .addReg(NewStateReg, RegState::Kill)
2186 .addReg(PS->PoisonReg)
2188 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
2189 ++NumInstsInserted;
2190 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2191
2192 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2193}
2194
2195/// An attacker may speculatively store over a value that is then speculatively
2196/// loaded and used as the target of an indirect call or jump instruction. This
2197/// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2198/// in this paper:
2199/// https://people.csail.mit.edu/vlk/spectre11.pdf
2200///
2201/// When this happens, the speculative execution of the call or jump will end up
2202/// being steered to this attacker controlled address. While most such loads
2203/// will be adequately hardened already, we want to ensure that they are
2204/// definitively treated as needing post-load hardening. While address hardening
2205/// is sufficient to prevent secret data from leaking to the attacker, it may
2206/// not be sufficient to prevent an attacker from steering speculative
2207/// execution. We forcibly unfolded all relevant loads above and so will always
2208/// have an opportunity to post-load harden here, we just need to scan for cases
2209/// not already flagged and add them.
2210void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
2212 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
2213 switch (MI.getOpcode()) {
2214 case X86::FARCALL16m:
2215 case X86::FARCALL32m:
2216 case X86::FARCALL64m:
2217 case X86::FARJMP16m:
2218 case X86::FARJMP32m:
2219 case X86::FARJMP64m:
2220 // We don't need to harden either far calls or far jumps as they are
2221 // safe from Spectre.
2222 return;
2223
2224 default:
2225 break;
2226 }
2227
2228 // We should never see a loading instruction at this point, as those should
2229 // have been unfolded.
2230 assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2231
2232 // If the first operand isn't a register, this is a branch or call
2233 // instruction with an immediate operand which doesn't need to be hardened.
2234 if (!MI.getOperand(0).isReg())
2235 return;
2236
2237 // For all of these, the target register is the first operand of the
2238 // instruction.
2239 auto &TargetOp = MI.getOperand(0);
2240 Register OldTargetReg = TargetOp.getReg();
2241
2242 // Try to lookup a hardened version of this register. We retain a reference
2243 // here as we want to update the map to track any newly computed hardened
2244 // register.
2245 unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2246
2247 // If we don't have a hardened register yet, compute one. Otherwise, just use
2248 // the already hardened register.
2249 //
2250 // FIXME: It is a little suspect that we use partially hardened registers that
2251 // only feed addresses. The complexity of partial hardening with SHRX
2252 // continues to pile up. Should definitively measure its value and consider
2253 // eliminating it.
2254 if (!HardenedTargetReg)
2255 HardenedTargetReg = hardenValueInRegister(
2256 OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2257
2258 // Set the target operand to the hardened register.
2259 TargetOp.setReg(HardenedTargetReg);
2260
2261 ++NumCallsOrJumpsHardened;
2262}
2263
2264INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
2265 "X86 speculative load hardener", false, false)
2266INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
2267 "X86 speculative load hardener", false, false)
2268
2270 return new X86SpeculativeLoadHardeningPass();
2271}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
AMDGPU Mark last scratch load
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
Memory SSA
Definition: MemorySSA.cpp:72
if(VerifyEach)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the SparseBitVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
static const TargetRegisterClass * getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
static bool isEFLAGSDefLive(const MachineInstr &MI)
X86 speculative load hardener
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
#define PASS_KEY
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Definition: MCContext.cpp:346
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
bool isEHPad() const
Returns true if the block is a landing pad.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
reverse_instr_iterator instr_rbegin()
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
void splitSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New, bool NormalizeSuccProbs=false)
Split the old successor into old plus new and updates the probability info.
iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:974
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:982
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
MachineOperand class - Representation of each machine instruction operand.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:347
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:385
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:436
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
void set(unsigned Idx)
bool test(unsigned Idx) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeModel::Model getCodeModel() const
Returns the code model.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:826
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
CondCode getCondFromBranch(const MachineInstr &MI)
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
@ AddrIndexReg
Definition: X86BaseInfo.h:31
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false, bool HasNDD=false)
Return a cmov opcode for the given register size in bytes, and operand type.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2013
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
FunctionPass * createX86SpeculativeLoadHardeningPass()
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2057