LLVM 23.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// Provide a pass which mitigates speculative execution attacks which operate
11/// by speculating incorrectly past some predicate (a type check, bounds check,
12/// or other condition) to reach a load with invalid inputs and leak the data
13/// accessed by that load using a side channel out of the speculative domain.
14///
15/// For details on the attacks, see the first variant in both the Project Zero
16/// writeup and the Spectre paper:
17/// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18/// https://spectreattack.com/spectre.pdf
19///
20//===----------------------------------------------------------------------===//
21
22#include "X86.h"
23#include "X86InstrInfo.h"
24#include "X86Subtarget.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/Statistic.h"
47#include "llvm/IR/DebugLoc.h"
48#include "llvm/MC/MCSchedule.h"
49#include "llvm/Pass.h"
51#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <iterator>
56#include <optional>
57
58using namespace llvm;
59
60#define PASS_KEY "x86-slh"
61#define DEBUG_TYPE PASS_KEY
62
63STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
64STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
65STATISTIC(NumAddrRegsHardened,
66 "Number of address mode used registers hardaned");
67STATISTIC(NumPostLoadRegsHardened,
68 "Number of post-load register values hardened");
69STATISTIC(NumCallsOrJumpsHardened,
70 "Number of calls or jumps requiring extra hardening");
71STATISTIC(NumInstsInserted, "Number of instructions inserted");
72STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
73
75 "x86-speculative-load-hardening",
76 cl::desc("Force enable speculative load hardening"), cl::init(false),
78
80 PASS_KEY "-lfence",
82 "Use LFENCE along each conditional edge to harden against speculative "
83 "loads rather than conditional movs and poisoned pointers."),
84 cl::init(false), cl::Hidden);
85
87 PASS_KEY "-post-load",
88 cl::desc("Harden the value loaded *after* it is loaded by "
89 "flushing the loaded bits to 1. This is hard to do "
90 "in general but can be done easily for GPRs."),
91 cl::init(true), cl::Hidden);
92
94 PASS_KEY "-fence-call-and-ret",
95 cl::desc("Use a full speculation fence to harden both call and ret edges "
96 "rather than a lighter weight mitigation."),
97 cl::init(false), cl::Hidden);
98
100 PASS_KEY "-ip",
101 cl::desc("Harden interprocedurally by passing our state in and out of "
102 "functions in the high bits of the stack pointer."),
103 cl::init(true), cl::Hidden);
104
105static cl::opt<bool>
107 cl::desc("Sanitize loads from memory. When disable, no "
108 "significant security is provided."),
109 cl::init(true), cl::Hidden);
110
112 PASS_KEY "-indirect",
113 cl::desc("Harden indirect calls and jumps against using speculatively "
114 "stored attacker controlled addresses. This is designed to "
115 "mitigate Spectre v1.2 style attacks."),
116 cl::init(true), cl::Hidden);
117
118namespace {
119
120constexpr StringRef X86SLHPassName = "X86 speculative load hardening";
121
122class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
123public:
124 X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
125
126 StringRef getPassName() const override { return X86SLHPassName; }
127 bool runOnMachineFunction(MachineFunction &MF) override;
128 void getAnalysisUsage(AnalysisUsage &AU) const override;
129
130 /// Pass identification, replacement for typeid.
131 static char ID;
132};
133
134class X86SpeculativeLoadHardeningImpl {
135public:
136 X86SpeculativeLoadHardeningImpl() = default;
137
138 bool run(MachineFunction &MF);
139
140private:
141 /// The information about a block's conditional terminators needed to trace
142 /// our predicate state through the exiting edges.
143 struct BlockCondInfo {
144 MachineBasicBlock *MBB;
145
146 // We mostly have one conditional branch, and in extremely rare cases have
147 // two. Three and more are so rare as to be unimportant for compile time.
148 SmallVector<MachineInstr *, 2> CondBrs;
149
150 MachineInstr *UncondBr;
151 };
152
153 /// Manages the predicate state traced through the program.
154 struct PredState {
155 Register InitialReg;
156 Register PoisonReg;
157
158 const TargetRegisterClass *RC;
159 MachineSSAUpdater SSA;
160
161 PredState(MachineFunction &MF, const TargetRegisterClass *RC)
162 : RC(RC), SSA(MF) {}
163 };
164
165 const X86Subtarget *Subtarget = nullptr;
166 MachineRegisterInfo *MRI = nullptr;
167 const X86InstrInfo *TII = nullptr;
168 const TargetRegisterInfo *TRI = nullptr;
169
170 std::optional<PredState> PS;
171
172 void hardenEdgesWithLFENCE(MachineFunction &MF);
173
174 SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
175
177 tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
178
179 void unfoldCallAndJumpLoads(MachineFunction &MF);
180
182 tracePredStateThroughIndirectBranches(MachineFunction &MF);
183
184 void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
185
186 Register saveEFLAGS(MachineBasicBlock &MBB,
188 const DebugLoc &Loc);
189 void restoreEFLAGS(MachineBasicBlock &MBB,
190 MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc,
191 Register Reg);
192
193 void mergePredStateIntoSP(MachineBasicBlock &MBB,
195 const DebugLoc &Loc, Register PredStateReg);
196 Register extractPredStateFromSP(MachineBasicBlock &MBB,
198 const DebugLoc &Loc);
199
200 void
201 hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
202 MachineOperand &IndexMO,
203 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg);
204 MachineInstr *
205 sinkPostLoadHardenedInst(MachineInstr &MI,
206 SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
207 bool canHardenRegister(Register Reg);
208 Register hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
210 const DebugLoc &Loc);
211 Register hardenPostLoad(MachineInstr &MI);
212 void hardenReturnInstr(MachineInstr &MI);
213 void tracePredStateThroughCall(MachineInstr &MI);
214 void hardenIndirectCallOrJumpInstr(
215 MachineInstr &MI,
216 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg);
217};
218
219} // end anonymous namespace
220
221bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
222 MachineFunction &MF) {
223 X86SpeculativeLoadHardeningImpl Impl;
224 bool Changed = Impl.run(MF);
225 LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
226 dbgs() << "\n"; MF.verify(this));
227 return Changed;
228}
229
230char X86SpeculativeLoadHardeningPass::ID = 0;
231
232void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
233 AnalysisUsage &AU) const {
235}
236
238 MachineBasicBlock &Succ, int SuccCount,
239 MachineInstr *Br, MachineInstr *&UncondBr,
240 const X86InstrInfo &TII) {
241 assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
242
243 MachineFunction &MF = *MBB.getParent();
244
246
247 // We have to insert the new block immediately after the current one as we
248 // don't know what layout-successor relationships the successor has and we
249 // may not be able to (and generally don't want to) try to fix those up.
250 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
251
252 // Update the branch instruction if necessary.
253 if (Br) {
254 assert(Br->getOperand(0).getMBB() == &Succ &&
255 "Didn't start with the right target!");
256 Br->getOperand(0).setMBB(&NewMBB);
257
258 // If this successor was reached through a branch rather than fallthrough,
259 // we might have *broken* fallthrough and so need to inject a new
260 // unconditional branch.
261 if (!UncondBr) {
262 MachineBasicBlock &OldLayoutSucc =
263 *std::next(MachineFunction::iterator(&NewMBB));
264 assert(MBB.isSuccessor(&OldLayoutSucc) &&
265 "Without an unconditional branch, the old layout successor should "
266 "be an actual successor!");
267 auto BrBuilder =
268 BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
269 // Update the unconditional branch now that we've added one.
270 UncondBr = &*BrBuilder;
271 }
272
273 // Insert unconditional "jump Succ" instruction in the new block if
274 // necessary.
275 if (!NewMBB.isLayoutSuccessor(&Succ)) {
277 TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
278 }
279 } else {
280 assert(!UncondBr &&
281 "Cannot have a branchless successor and an unconditional branch!");
282 assert(NewMBB.isLayoutSuccessor(&Succ) &&
283 "A non-branch successor must have been a layout successor before "
284 "and now is a layout successor of the new block.");
285 }
286
287 // If this is the only edge to the successor, we can just replace it in the
288 // CFG. Otherwise we need to add a new entry in the CFG for the new
289 // successor.
290 if (SuccCount == 1) {
291 MBB.replaceSuccessor(&Succ, &NewMBB);
292 } else {
293 MBB.splitSuccessor(&Succ, &NewMBB);
294 }
295
296 // Hook up the edge from the new basic block to the old successor in the CFG.
297 NewMBB.addSuccessor(&Succ);
298
299 // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
300 for (MachineInstr &MI : Succ) {
301 if (!MI.isPHI())
302 break;
303 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
304 OpIdx += 2) {
305 MachineOperand &OpV = MI.getOperand(OpIdx);
306 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
307 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
308 if (OpMBB.getMBB() != &MBB)
309 continue;
310
311 // If this is the last edge to the succesor, just replace MBB in the PHI
312 if (SuccCount == 1) {
313 OpMBB.setMBB(&NewMBB);
314 break;
315 }
316
317 // Otherwise, append a new pair of operands for the new incoming edge.
318 MI.addOperand(MF, OpV);
319 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
320 break;
321 }
322 }
323
324 // Inherit live-ins from the successor
325 for (auto &LI : Succ.liveins())
326 NewMBB.addLiveIn(LI);
327
328 LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
329 << Succ.getName() << "'.\n");
330 return NewMBB;
331}
332
333/// Removing duplicate PHI operands to leave the PHI in a canonical and
334/// predictable form.
335///
336/// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
337/// isn't what you might expect. We may have multiple entries in PHI nodes for
338/// a single predecessor. This makes CFG-updating extremely complex, so here we
339/// simplify all PHI nodes to a model even simpler than the IR's model: exactly
340/// one entry per predecessor, regardless of how many edges there are.
343 SmallVector<int, 4> DupIndices;
344 for (auto &MBB : MF)
345 for (auto &MI : MBB) {
346 if (!MI.isPHI())
347 break;
348
349 // First we scan the operands of the PHI looking for duplicate entries
350 // a particular predecessor. We retain the operand index of each duplicate
351 // entry found.
352 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
353 OpIdx += 2)
354 if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
355 DupIndices.push_back(OpIdx);
356
357 // Now walk the duplicate indices, removing both the block and value. Note
358 // that these are stored as a vector making this element-wise removal
359 // :w
360 // potentially quadratic.
361 //
362 // FIXME: It is really frustrating that we have to use a quadratic
363 // removal algorithm here. There should be a better way, but the use-def
364 // updates required make that impossible using the public API.
365 //
366 // Note that we have to process these backwards so that we don't
367 // invalidate other indices with each removal.
368 while (!DupIndices.empty()) {
369 int OpIdx = DupIndices.pop_back_val();
370 // Remove both the block and value operand, again in reverse order to
371 // preserve indices.
372 MI.removeOperand(OpIdx + 1);
373 MI.removeOperand(OpIdx);
374 }
375
376 Preds.clear();
377 }
378}
379
380/// Helper to scan a function for loads vulnerable to misspeculation that we
381/// want to harden.
382///
383/// We use this to avoid making changes to functions where there is nothing we
384/// need to do to harden against misspeculation.
386 for (MachineBasicBlock &MBB : MF) {
387 for (MachineInstr &MI : MBB) {
388 // Loads within this basic block after an LFENCE are not at risk of
389 // speculatively executing with invalid predicates from prior control
390 // flow. So break out of this block but continue scanning the function.
391 if (MI.getOpcode() == X86::LFENCE)
392 break;
393
394 // Looking for loads only.
395 if (!MI.mayLoad())
396 continue;
397
398 // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
399 if (MI.getOpcode() == X86::MFENCE)
400 continue;
401
402 // We found a load.
403 return true;
404 }
405 }
406
407 // No loads found.
408 return false;
409}
410
411bool X86SpeculativeLoadHardeningImpl::run(MachineFunction &MF) {
412 LLVM_DEBUG(dbgs() << "********** " << X86SLHPassName << " : " << MF.getName()
413 << " **********\n");
414
415 // Only run if this pass is forced enabled or we detect the relevant function
416 // attribute requesting SLH.
418 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
419 return false;
420
421 Subtarget = &MF.getSubtarget<X86Subtarget>();
422 MRI = &MF.getRegInfo();
423 TII = Subtarget->getInstrInfo();
424 TRI = Subtarget->getRegisterInfo();
425
426 // FIXME: Support for 32-bit.
427 PS.emplace(MF, &X86::GR64_NOSPRegClass);
428
429 if (MF.begin() == MF.end())
430 // Nothing to do for a degenerate empty function...
431 return false;
432
433 // We support an alternative hardening technique based on a debug flag.
435 hardenEdgesWithLFENCE(MF);
436 return true;
437 }
438
439 // Create a dummy debug loc to use for all the generated code here.
440 DebugLoc Loc;
441
442 MachineBasicBlock &Entry = *MF.begin();
443 auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
444
445 // Do a quick scan to see if we have any checkable loads.
446 bool HasVulnerableLoad = hasVulnerableLoad(MF);
447
448 // See if we have any conditional branching blocks that we will need to trace
449 // predicate state through.
450 SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
451
452 // If we have no interesting conditions or loads, nothing to do here.
453 if (!HasVulnerableLoad && Infos.empty())
454 return true;
455
456 // The poison value is required to be an all-ones value for many aspects of
457 // this mitigation.
458 const int PoisonVal = -1;
459 PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
460 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
461 .addImm(PoisonVal);
462 ++NumInstsInserted;
463
464 // If we have loads being hardened and we've asked for call and ret edges to
465 // get a full fence-based mitigation, inject that fence.
466 if (HasVulnerableLoad && FenceCallAndRet) {
467 // We need to insert an LFENCE at the start of the function to suspend any
468 // incoming misspeculation from the caller. This helps two-fold: the caller
469 // may not have been protected as this code has been, and this code gets to
470 // not take any specific action to protect across calls.
471 // FIXME: We could skip this for functions which unconditionally return
472 // a constant.
473 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
474 ++NumInstsInserted;
475 ++NumLFENCEsInserted;
476 }
477
478 // If we guarded the entry with an LFENCE and have no conditionals to protect
479 // in blocks, then we're done.
480 if (FenceCallAndRet && Infos.empty())
481 // We may have changed the function's code at this point to insert fences.
482 return true;
483
484 // For every basic block in the function which can b
486 // Set up the predicate state by extracting it from the incoming stack
487 // pointer so we pick up any misspeculation in our caller.
488 PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
489 } else {
490 // Otherwise, just build the predicate state itself by zeroing a register
491 // as we don't need any initial state.
492 PS->InitialReg = MRI->createVirtualRegister(PS->RC);
493 Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
494 auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
495 PredStateSubReg);
496 ++NumInstsInserted;
497 MachineOperand *ZeroEFLAGSDefOp =
498 ZeroI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
499 assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
500 "Must have an implicit def of EFLAGS!");
501 ZeroEFLAGSDefOp->setIsDead(true);
502 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
503 PS->InitialReg)
504 .addImm(0)
505 .addReg(PredStateSubReg)
506 .addImm(X86::sub_32bit);
507 }
508
509 // We're going to need to trace predicate state throughout the function's
510 // CFG. Prepare for this by setting up our initial state of PHIs with unique
511 // predecessor entries and all the initial predicate state.
513
514 // Track the updated values in an SSA updater to rewrite into SSA form at the
515 // end.
516 PS->SSA.Initialize(PS->InitialReg);
517 PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
518
519 // Trace through the CFG.
520 auto CMovs = tracePredStateThroughCFG(MF, Infos);
521
522 // We may also enter basic blocks in this function via exception handling
523 // control flow. Here, if we are hardening interprocedurally, we need to
524 // re-capture the predicate state from the throwing code. In the Itanium ABI,
525 // the throw will always look like a call to __cxa_throw and will have the
526 // predicate state in the stack pointer, so extract fresh predicate state from
527 // the stack pointer and make it available in SSA.
528 // FIXME: Handle non-itanium ABI EH models.
530 for (MachineBasicBlock &MBB : MF) {
531 assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
532 assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
533 assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
534 if (!MBB.isEHPad())
535 continue;
536 PS->SSA.AddAvailableValue(
537 &MBB,
538 extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
539 }
540 }
541
543 // If we are going to harden calls and jumps we need to unfold their memory
544 // operands.
545 unfoldCallAndJumpLoads(MF);
546
547 // Then we trace predicate state through the indirect branches.
548 auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
549 CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
550 }
551
552 // Now that we have the predicate state available at the start of each block
553 // in the CFG, trace it through each block, hardening vulnerable instructions
554 // as we go.
555 tracePredStateThroughBlocksAndHarden(MF);
556
557 // Now rewrite all the uses of the pred state using the SSA updater to insert
558 // PHIs connecting the state between blocks along the CFG edges.
559 for (MachineInstr *CMovI : CMovs)
560 for (MachineOperand &Op : CMovI->operands()) {
561 if (!Op.isReg() || Op.getReg() != PS->InitialReg)
562 continue;
563
564 PS->SSA.RewriteUse(Op);
565 }
566
567 return true;
568}
569
570/// Implements the naive hardening approach of putting an LFENCE after every
571/// potentially mis-predicted control flow construct.
572///
573/// We include this as an alternative mostly for the purpose of comparison. The
574/// performance impact of this is expected to be extremely severe and not
575/// practical for any real-world users.
576void X86SpeculativeLoadHardeningImpl::hardenEdgesWithLFENCE(
577 MachineFunction &MF) {
578 // First, we scan the function looking for blocks that are reached along edges
579 // that we might want to harden.
580 SmallSetVector<MachineBasicBlock *, 8> Blocks;
581 for (MachineBasicBlock &MBB : MF) {
582 // If there are no or only one successor, nothing to do here.
583 if (MBB.succ_size() <= 1)
584 continue;
585
586 // Skip blocks unless their terminators start with a branch. Other
587 // terminators don't seem interesting for guarding against misspeculation.
588 auto TermIt = MBB.getFirstTerminator();
589 if (TermIt == MBB.end() || !TermIt->isBranch())
590 continue;
591
592 // Add all the non-EH-pad succossors to the blocks we want to harden. We
593 // skip EH pads because there isn't really a condition of interest on
594 // entering.
595 for (MachineBasicBlock *SuccMBB : MBB.successors())
596 if (!SuccMBB->isEHPad())
597 Blocks.insert(SuccMBB);
598 }
599
600 for (MachineBasicBlock *MBB : Blocks) {
601 auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
602 BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
603 ++NumInstsInserted;
604 ++NumLFENCEsInserted;
605 }
606}
607
609X86SpeculativeLoadHardeningImpl::collectBlockCondInfo(MachineFunction &MF) {
611
612 // Walk the function and build up a summary for each block's conditions that
613 // we need to trace through.
614 for (MachineBasicBlock &MBB : MF) {
615 // If there are no or only one successor, nothing to do here.
616 if (MBB.succ_size() <= 1)
617 continue;
618
619 // We want to reliably handle any conditional branch terminators in the
620 // MBB, so we manually analyze the branch. We can handle all of the
621 // permutations here, including ones that analyze branch cannot.
622 //
623 // The approach is to walk backwards across the terminators, resetting at
624 // any unconditional non-indirect branch, and track all conditional edges
625 // to basic blocks as well as the fallthrough or unconditional successor
626 // edge. For each conditional edge, we track the target and the opposite
627 // condition code in order to inject a "no-op" cmov into that successor
628 // that will harden the predicate. For the fallthrough/unconditional
629 // edge, we inject a separate cmov for each conditional branch with
630 // matching condition codes. This effectively implements an "and" of the
631 // condition flags, even if there isn't a single condition flag that would
632 // directly implement that. We don't bother trying to optimize either of
633 // these cases because if such an optimization is possible, LLVM should
634 // have optimized the conditional *branches* in that way already to reduce
635 // instruction count. This late, we simply assume the minimal number of
636 // branch instructions is being emitted and use that to guide our cmov
637 // insertion.
638
639 BlockCondInfo Info = {&MBB, {}, nullptr};
640
641 // Now walk backwards through the terminators and build up successors they
642 // reach and the conditions.
643 for (MachineInstr &MI : llvm::reverse(MBB)) {
644 // Once we've handled all the terminators, we're done.
645 if (!MI.isTerminator())
646 break;
647
648 // If we see a non-branch terminator, we can't handle anything so bail.
649 if (!MI.isBranch()) {
650 Info.CondBrs.clear();
651 break;
652 }
653
654 // If we see an unconditional branch, reset our state, clear any
655 // fallthrough, and set this is the "else" successor.
656 if (MI.getOpcode() == X86::JMP_1) {
657 Info.CondBrs.clear();
658 Info.UncondBr = &MI;
659 continue;
660 }
661
662 // If we get an invalid condition, we have an indirect branch or some
663 // other unanalyzable "fallthrough" case. We model this as a nullptr for
664 // the destination so we can still guard any conditional successors.
665 // Consider code sequences like:
666 // ```
667 // jCC L1
668 // jmpq *%rax
669 // ```
670 // We still want to harden the edge to `L1`.
672 Info.CondBrs.clear();
673 Info.UncondBr = &MI;
674 continue;
675 }
676
677 // We have a vanilla conditional branch, add it to our list.
678 Info.CondBrs.push_back(&MI);
679 }
680 if (Info.CondBrs.empty()) {
681 ++NumBranchesUntraced;
682 LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
683 MBB.dump());
684 continue;
685 }
686
687 Infos.push_back(Info);
688 }
689
690 return Infos;
691}
692
693/// Trace the predicate state through the CFG, instrumenting each conditional
694/// branch such that misspeculation through an edge will poison the predicate
695/// state.
696///
697/// Returns the list of inserted CMov instructions so that they can have their
698/// uses of the predicate state rewritten into proper SSA form once it is
699/// complete.
701X86SpeculativeLoadHardeningImpl::tracePredStateThroughCFG(
702 MachineFunction &MF, ArrayRef<BlockCondInfo> Infos) {
703 // Collect the inserted cmov instructions so we can rewrite their uses of the
704 // predicate state into SSA form.
706
707 // Now walk all of the basic blocks looking for ones that end in conditional
708 // jumps where we need to update this register along each edge.
709 for (const BlockCondInfo &Info : Infos) {
710 MachineBasicBlock &MBB = *Info.MBB;
711 const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
712 MachineInstr *UncondBr = Info.UncondBr;
713
714 LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
715 << "\n");
716 ++NumCondBranchesTraced;
717
718 // Compute the non-conditional successor as either the target of any
719 // unconditional branch or the layout successor.
720 MachineBasicBlock *UncondSucc =
721 UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
722 ? UncondBr->getOperand(0).getMBB()
723 : nullptr)
724 : &*std::next(MachineFunction::iterator(&MBB));
725
726 // Count how many edges there are to any given successor.
727 SmallDenseMap<MachineBasicBlock *, int> SuccCounts;
728 if (UncondSucc)
729 ++SuccCounts[UncondSucc];
730 for (auto *CondBr : CondBrs)
731 ++SuccCounts[CondBr->getOperand(0).getMBB()];
732
733 // A lambda to insert cmov instructions into a block checking all of the
734 // condition codes in a sequence.
735 auto BuildCheckingBlockForSuccAndConds =
736 [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
737 MachineInstr *Br, MachineInstr *&UncondBr,
739 // First, we split the edge to insert the checking block into a safe
740 // location.
741 auto &CheckingMBB =
742 (SuccCount == 1 && Succ.pred_size() == 1)
743 ? Succ
744 : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
745
746 bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
747 if (!LiveEFLAGS)
748 CheckingMBB.addLiveIn(X86::EFLAGS);
749
750 // Now insert the cmovs to implement the checks.
751 auto InsertPt = CheckingMBB.begin();
752 assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
753 "Should never have a PHI in the initial checking block as it "
754 "always has a single predecessor!");
755
756 // We will wire each cmov to each other, but need to start with the
757 // incoming pred state.
758 Register CurStateReg = PS->InitialReg;
759
760 for (X86::CondCode Cond : Conds) {
761 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
762 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
763
764 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
765 // Note that we intentionally use an empty debug location so that
766 // this picks up the preceding location.
767 auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
768 TII->get(CMovOp), UpdatedStateReg)
769 .addReg(CurStateReg)
770 .addReg(PS->PoisonReg)
771 .addImm(Cond);
772 // If this is the last cmov and the EFLAGS weren't originally
773 // live-in, mark them as killed.
774 if (!LiveEFLAGS && Cond == Conds.back())
775 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
776 ->setIsKill(true);
777
778 ++NumInstsInserted;
779 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
780 dbgs() << "\n");
781
782 // The first one of the cmovs will be using the top level
783 // `PredStateReg` and need to get rewritten into SSA form.
784 if (CurStateReg == PS->InitialReg)
785 CMovs.push_back(&*CMovI);
786
787 // The next cmov should start from this one's def.
788 CurStateReg = UpdatedStateReg;
789 }
790
791 // And put the last one into the available values for SSA form of our
792 // predicate state.
793 PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
794 };
795
796 std::vector<X86::CondCode> UncondCodeSeq;
797 for (auto *CondBr : CondBrs) {
798 MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
799 int &SuccCount = SuccCounts[&Succ];
800
803 UncondCodeSeq.push_back(Cond);
804
805 BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
806 {InvCond});
807
808 // Decrement the successor count now that we've split one of the edges.
809 // We need to keep the count of edges to the successor accurate in order
810 // to know above when to *replace* the successor in the CFG vs. just
811 // adding the new successor.
812 --SuccCount;
813 }
814
815 // Since we may have split edges and changed the number of successors,
816 // normalize the probabilities. This avoids doing it each time we split an
817 // edge.
819
820 // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
821 // need to intersect the other condition codes. We can do this by just
822 // doing a cmov for each one.
823 if (!UncondSucc)
824 // If we have no fallthrough to protect (perhaps it is an indirect jump?)
825 // just skip this and continue.
826 continue;
827
828 assert(SuccCounts[UncondSucc] == 1 &&
829 "We should never have more than one edge to the unconditional "
830 "successor at this point because every other edge must have been "
831 "split above!");
832
833 // Sort and unique the codes to minimize them.
834 llvm::sort(UncondCodeSeq);
835 UncondCodeSeq.erase(llvm::unique(UncondCodeSeq), UncondCodeSeq.end());
836
837 // Build a checking version of the successor.
838 BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
839 UncondBr, UncondBr, UncondCodeSeq);
840 }
841
842 return CMovs;
843}
844
845/// Compute the register class for the unfolded load.
846///
847/// FIXME: This should probably live in X86InstrInfo, potentially by adding
848/// a way to unfold into a newly created vreg rather than requiring a register
849/// input.
850static const TargetRegisterClass *
852 unsigned Index;
853 unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
854 Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
855 const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
856 return TII.getRegClass(MCID, Index);
857}
858
859void X86SpeculativeLoadHardeningImpl::unfoldCallAndJumpLoads(
860 MachineFunction &MF) {
861 for (MachineBasicBlock &MBB : MF)
862 // We use make_early_inc_range here so we can remove instructions if needed
863 // without disturbing the iteration.
864 for (MachineInstr &MI : llvm::make_early_inc_range(MBB.instrs())) {
865 // Must either be a call or a branch.
866 if (!MI.isCall() && !MI.isBranch())
867 continue;
868 // We only care about loading variants of these instructions.
869 if (!MI.mayLoad())
870 continue;
871
872 switch (MI.getOpcode()) {
873 default: {
875 dbgs() << "ERROR: Found an unexpected loading branch or call "
876 "instruction:\n";
877 MI.dump(); dbgs() << "\n");
878 report_fatal_error("Unexpected loading branch or call!");
879 }
880
881 case X86::FARCALL16m:
882 case X86::FARCALL32m:
883 case X86::FARCALL64m:
884 case X86::FARJMP16m:
885 case X86::FARJMP32m:
886 case X86::FARJMP64m:
887 // We cannot mitigate far jumps or calls, but we also don't expect them
888 // to be vulnerable to Spectre v1.2 style attacks.
889 continue;
890
891 case X86::CALL16m:
892 case X86::CALL16m_NT:
893 case X86::CALL32m:
894 case X86::CALL32m_NT:
895 case X86::CALL64m:
896 case X86::CALL64m_NT:
897 case X86::JMP16m:
898 case X86::JMP16m_NT:
899 case X86::JMP32m:
900 case X86::JMP32m_NT:
901 case X86::JMP64m:
902 case X86::JMP64m_NT:
903 case X86::TAILJMPm64:
904 case X86::TAILJMPm64_REX:
905 case X86::TAILJMPm:
906 case X86::TCRETURNmi64:
907 case X86::TCRETURN_WINmi64:
908 case X86::TCRETURNmi: {
909 // Use the generic unfold logic now that we know we're dealing with
910 // expected instructions.
911 // FIXME: We don't have test coverage for all of these!
912 auto *UnfoldedRC = getRegClassForUnfoldedLoad(*TII, MI.getOpcode());
913 if (!UnfoldedRC) {
915 << "ERROR: Unable to unfold load from instruction:\n";
916 MI.dump(); dbgs() << "\n");
917 report_fatal_error("Unable to unfold load!");
918 }
919 Register Reg = MRI->createVirtualRegister(UnfoldedRC);
920 SmallVector<MachineInstr *, 2> NewMIs;
921 // If we were able to compute an unfolded reg class, any failure here
922 // is just a programming error so just assert.
923 bool Unfolded =
924 TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
925 /*UnfoldStore*/ false, NewMIs);
926 (void)Unfolded;
927 assert(Unfolded &&
928 "Computed unfolded register class but failed to unfold");
929 // Now stitch the new instructions into place and erase the old one.
930 for (auto *NewMI : NewMIs)
931 MBB.insert(MI.getIterator(), NewMI);
932
933 // Update the call info.
934 if (MI.isCandidateForAdditionalCallInfo())
935 MF.eraseAdditionalCallInfo(&MI);
936
937 MI.eraseFromParent();
938 LLVM_DEBUG({
939 dbgs() << "Unfolded load successfully into:\n";
940 for (auto *NewMI : NewMIs) {
941 NewMI->dump();
942 dbgs() << "\n";
943 }
944 });
945 continue;
946 }
947 }
948 llvm_unreachable("Escaped switch with default!");
949 }
950}
951
952/// Trace the predicate state through indirect branches, instrumenting them to
953/// poison the state if a target is reached that does not match the expected
954/// target.
955///
956/// This is designed to mitigate Spectre variant 1 attacks where an indirect
957/// branch is trained to predict a particular target and then mispredicts that
958/// target in a way that can leak data. Despite using an indirect branch, this
959/// is really a variant 1 style attack: it does not steer execution to an
960/// arbitrary or attacker controlled address, and it does not require any
961/// special code executing next to the victim. This attack can also be mitigated
962/// through retpolines, but those require either replacing indirect branches
963/// with conditional direct branches or lowering them through a device that
964/// blocks speculation. This mitigation can replace these retpoline-style
965/// mitigations for jump tables and other indirect branches within a function
966/// when variant 2 isn't a risk while allowing limited speculation. Indirect
967/// calls, however, cannot be mitigated through this technique without changing
968/// the ABI in a fundamental way.
970X86SpeculativeLoadHardeningImpl::tracePredStateThroughIndirectBranches(
971 MachineFunction &MF) {
972 // We use the SSAUpdater to insert PHI nodes for the target addresses of
973 // indirect branches. We don't actually need the full power of the SSA updater
974 // in this particular case as we always have immediately available values, but
975 // this avoids us having to re-implement the PHI construction logic.
976 MachineSSAUpdater TargetAddrSSA(MF);
977 TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
978
979 // Track which blocks were terminated with an indirect branch.
980 SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
981
982 // We need to know what blocks end up reached via indirect branches. We
983 // expect this to be a subset of those whose address is taken and so track it
984 // directly via the CFG.
985 SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
986
987 // Walk all the blocks which end in an indirect branch and make the
988 // target address available.
989 for (MachineBasicBlock &MBB : MF) {
990 // Find the last terminator.
991 auto MII = MBB.instr_rbegin();
992 while (MII != MBB.instr_rend() && MII->isDebugInstr())
993 ++MII;
994 if (MII == MBB.instr_rend())
995 continue;
996 MachineInstr &TI = *MII;
997 if (!TI.isTerminator() || !TI.isBranch())
998 // No terminator or non-branch terminator.
999 continue;
1000
1001 Register TargetReg;
1002
1003 switch (TI.getOpcode()) {
1004 default:
1005 // Direct branch or conditional branch (leading to fallthrough).
1006 continue;
1007
1008 case X86::FARJMP16m:
1009 case X86::FARJMP32m:
1010 case X86::FARJMP64m:
1011 // We cannot mitigate far jumps or calls, but we also don't expect them
1012 // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1013 continue;
1014
1015 case X86::JMP16m:
1016 case X86::JMP16m_NT:
1017 case X86::JMP32m:
1018 case X86::JMP32m_NT:
1019 case X86::JMP64m:
1020 case X86::JMP64m_NT:
1021 // Mostly as documentation.
1022 report_fatal_error("Memory operand jumps should have been unfolded!");
1023
1024 case X86::JMP16r:
1026 "Support for 16-bit indirect branches is not implemented.");
1027 case X86::JMP32r:
1029 "Support for 32-bit indirect branches is not implemented.");
1030
1031 case X86::JMP64r:
1032 TargetReg = TI.getOperand(0).getReg();
1033 }
1034
1035 // We have definitely found an indirect branch. Verify that there are no
1036 // preceding conditional branches as we don't yet support that.
1037 if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1038 return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1039 })) {
1040 LLVM_DEBUG({
1041 dbgs() << "ERROR: Found other terminators in a block with an indirect "
1042 "branch! This is not yet supported! Terminator sequence:\n";
1043 for (MachineInstr &MI : MBB.terminators()) {
1044 MI.dump();
1045 dbgs() << '\n';
1046 }
1047 });
1048 report_fatal_error("Unimplemented terminator sequence!");
1049 }
1050
1051 // Make the target register an available value for this block.
1052 TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1053 IndirectTerminatedMBBs.insert(&MBB);
1054
1055 // Add all the successors to our target candidates.
1056 IndirectTargetMBBs.insert_range(MBB.successors());
1057 }
1058
1059 // Keep track of the cmov instructions we insert so we can return them.
1061
1062 // If we didn't find any indirect branches with targets, nothing to do here.
1063 if (IndirectTargetMBBs.empty())
1064 return CMovs;
1065
1066 // We found indirect branches and targets that need to be instrumented to
1067 // harden loads within them. Walk the blocks of the function (to get a stable
1068 // ordering) and instrument each target of an indirect branch.
1069 for (MachineBasicBlock &MBB : MF) {
1070 // Skip the blocks that aren't candidate targets.
1071 if (!IndirectTargetMBBs.count(&MBB))
1072 continue;
1073
1074 // We don't expect EH pads to ever be reached via an indirect branch. If
1075 // this is desired for some reason, we could simply skip them here rather
1076 // than asserting.
1077 assert(!MBB.isEHPad() &&
1078 "Unexpected EH pad as target of an indirect branch!");
1079
1080 // We should never end up threading EFLAGS into a block to harden
1081 // conditional jumps as there would be an additional successor via the
1082 // indirect branch. As a consequence, all such edges would be split before
1083 // reaching here, and the inserted block will handle the EFLAGS-based
1084 // hardening.
1085 assert(!MBB.isLiveIn(X86::EFLAGS) &&
1086 "Cannot check within a block that already has live-in EFLAGS!");
1087
1088 // We can't handle having non-indirect edges into this block unless this is
1089 // the only successor and we can synthesize the necessary target address.
1090 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1091 // If we've already handled this by extracting the target directly,
1092 // nothing to do.
1093 if (IndirectTerminatedMBBs.count(Pred))
1094 continue;
1095
1096 // Otherwise, we have to be the only successor. We generally expect this
1097 // to be true as conditional branches should have had a critical edge
1098 // split already. We don't however need to worry about EH pad successors
1099 // as they'll happily ignore the target and their hardening strategy is
1100 // resilient to all ways in which they could be reached speculatively.
1101 if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1102 return Succ->isEHPad() || Succ == &MBB;
1103 })) {
1104 LLVM_DEBUG({
1105 dbgs() << "ERROR: Found conditional entry to target of indirect "
1106 "branch!\n";
1107 Pred->dump();
1108 MBB.dump();
1109 });
1110 report_fatal_error("Cannot harden a conditional entry to a target of "
1111 "an indirect branch!");
1112 }
1113
1114 // Now we need to compute the address of this block and install it as a
1115 // synthetic target in the predecessor. We do this at the bottom of the
1116 // predecessor.
1117 auto InsertPt = Pred->getFirstTerminator();
1118 Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1119 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1120 !Subtarget->isPositionIndependent()) {
1121 // Directly materialize it into an immediate.
1122 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1123 TII->get(X86::MOV64ri32), TargetReg)
1124 .addMBB(&MBB);
1125 ++NumInstsInserted;
1126 (void)AddrI;
1127 LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1128 dbgs() << "\n");
1129 } else {
1130 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1131 TargetReg)
1132 .addReg(/*Base*/ X86::RIP)
1133 .addImm(/*Scale*/ 1)
1134 .addReg(/*Index*/ 0)
1135 .addMBB(&MBB)
1136 .addReg(/*Segment*/ 0);
1137 ++NumInstsInserted;
1138 (void)AddrI;
1139 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1140 dbgs() << "\n");
1141 }
1142 // And make this available.
1143 TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1144 }
1145
1146 // Materialize the needed SSA value of the target. Note that we need the
1147 // middle of the block as this block might at the bottom have an indirect
1148 // branch back to itself. We can do this here because at this point, every
1149 // predecessor of this block has an available value. This is basically just
1150 // automating the construction of a PHI node for this target.
1151 Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1152
1153 // Insert a comparison of the incoming target register with this block's
1154 // address. This also requires us to mark the block as having its address
1155 // taken explicitly.
1157 auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1158 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1159 !Subtarget->isPositionIndependent()) {
1160 // Check directly against a relocated immediate when we can.
1161 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1162 .addReg(TargetReg, RegState::Kill)
1163 .addMBB(&MBB);
1164 ++NumInstsInserted;
1165 (void)CheckI;
1166 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1167 } else {
1168 // Otherwise compute the address into a register first.
1169 Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1170 auto AddrI =
1171 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1172 .addReg(/*Base*/ X86::RIP)
1173 .addImm(/*Scale*/ 1)
1174 .addReg(/*Index*/ 0)
1175 .addMBB(&MBB)
1176 .addReg(/*Segment*/ 0);
1177 ++NumInstsInserted;
1178 (void)AddrI;
1179 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1180 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1181 .addReg(TargetReg, RegState::Kill)
1182 .addReg(AddrReg, RegState::Kill);
1183 ++NumInstsInserted;
1184 (void)CheckI;
1185 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1186 }
1187
1188 // Now cmov over the predicate if the comparison wasn't equal.
1189 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1190 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1191 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1192 auto CMovI =
1193 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1194 .addReg(PS->InitialReg)
1195 .addReg(PS->PoisonReg)
1197 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
1198 ->setIsKill(true);
1199 ++NumInstsInserted;
1200 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1201 CMovs.push_back(&*CMovI);
1202
1203 // And put the new value into the available values for SSA form of our
1204 // predicate state.
1205 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1206 }
1207
1208 // Return all the newly inserted cmov instructions of the predicate state.
1209 return CMovs;
1210}
1211
1212// Returns true if the MI has EFLAGS as a register def operand and it's live,
1213// otherwise it returns false
1214static bool isEFLAGSDefLive(const MachineInstr &MI) {
1215 if (const MachineOperand *DefOp =
1216 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1217 return !DefOp->isDead();
1218 }
1219 return false;
1220}
1221
1223 const TargetRegisterInfo &TRI) {
1224 // Check if EFLAGS are alive by seeing if there is a def of them or they
1225 // live-in, and then seeing if that def is in turn used.
1226 for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) {
1227 if (MachineOperand *DefOp =
1228 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1229 // If the def is dead, then EFLAGS is not live.
1230 if (DefOp->isDead())
1231 return false;
1232
1233 // Otherwise we've def'ed it, and it is live.
1234 return true;
1235 }
1236 // While at this instruction, also check if we use and kill EFLAGS
1237 // which means it isn't live.
1238 if (MI.killsRegister(X86::EFLAGS, &TRI))
1239 return false;
1240 }
1241
1242 // If we didn't find anything conclusive (neither definitely alive or
1243 // definitely dead) return whether it lives into the block.
1244 return MBB.isLiveIn(X86::EFLAGS);
1245}
1246
1247/// Trace the predicate state through each of the blocks in the function,
1248/// hardening everything necessary along the way.
1249///
1250/// We call this routine once the initial predicate state has been established
1251/// for each basic block in the function in the SSA updater. This routine traces
1252/// it through the instructions within each basic block, and for non-returning
1253/// blocks informs the SSA updater about the final state that lives out of the
1254/// block. Along the way, it hardens any vulnerable instruction using the
1255/// currently valid predicate state. We have to do these two things together
1256/// because the SSA updater only works across blocks. Within a block, we track
1257/// the current predicate state directly and update it as it changes.
1258///
1259/// This operates in two passes over each block. First, we analyze the loads in
1260/// the block to determine which strategy will be used to harden them: hardening
1261/// the address or hardening the loaded value when loaded into a register
1262/// amenable to hardening. We have to process these first because the two
1263/// strategies may interact -- later hardening may change what strategy we wish
1264/// to use. We also will analyze data dependencies between loads and avoid
1265/// hardening those loads that are data dependent on a load with a hardened
1266/// address. We also skip hardening loads already behind an LFENCE as that is
1267/// sufficient to harden them against misspeculation.
1268///
1269/// Second, we actively trace the predicate state through the block, applying
1270/// the hardening steps we determined necessary in the first pass as we go.
1271///
1272/// These two passes are applied to each basic block. We operate one block at a
1273/// time to simplify reasoning about reachability and sequencing.
1274void X86SpeculativeLoadHardeningImpl::tracePredStateThroughBlocksAndHarden(
1275 MachineFunction &MF) {
1276 SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1277 SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1278
1279 SmallSet<Register, 16> HardenedAddrRegs;
1280
1281 SmallDenseMap<Register, Register, 32> AddrRegToHardenedReg;
1282
1283 // Track the set of load-dependent registers through the basic block. Because
1284 // the values of these registers have an existing data dependency on a loaded
1285 // value which we would have checked, we can omit any checks on them.
1286 SparseBitVector<> LoadDepRegs;
1287
1288 for (MachineBasicBlock &MBB : MF) {
1289 // The first pass over the block: collect all the loads which can have their
1290 // loaded value hardened and all the loads that instead need their address
1291 // hardened. During this walk we propagate load dependence for address
1292 // hardened loads and also look for LFENCE to stop hardening wherever
1293 // possible. When deciding whether or not to harden the loaded value or not,
1294 // we check to see if any registers used in the address will have been
1295 // hardened at this point and if so, harden any remaining address registers
1296 // as that often successfully re-uses hardened addresses and minimizes
1297 // instructions.
1298 //
1299 // FIXME: We should consider an aggressive mode where we continue to keep as
1300 // many loads value hardened even when some address register hardening would
1301 // be free (due to reuse).
1302 //
1303 // Note that we only need this pass if we are actually hardening loads.
1304 if (HardenLoads)
1305 for (MachineInstr &MI : MBB) {
1306 // We naively assume that all def'ed registers of an instruction have
1307 // a data dependency on all of their operands.
1308 // FIXME: Do a more careful analysis of x86 to build a conservative
1309 // model here.
1310 if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1311 return Op.isReg() && LoadDepRegs.test(Op.getReg().id());
1312 }))
1313 for (MachineOperand &Def : MI.defs())
1314 if (Def.isReg())
1315 LoadDepRegs.set(Def.getReg().id());
1316
1317 // Both Intel and AMD are guiding that they will change the semantics of
1318 // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1319 // no more need to guard things in this block.
1320 if (MI.getOpcode() == X86::LFENCE)
1321 break;
1322
1323 // If this instruction cannot load, nothing to do.
1324 if (!MI.mayLoad())
1325 continue;
1326
1327 // Some instructions which "load" are trivially safe or unimportant.
1328 if (MI.getOpcode() == X86::MFENCE)
1329 continue;
1330
1331 // Extract the memory operand information about this instruction.
1332 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1333 if (MemRefBeginIdx < 0) {
1335 << "WARNING: unable to harden loading instruction: ";
1336 MI.dump());
1337 continue;
1338 }
1339
1340 MachineOperand &BaseMO =
1341 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1342 MachineOperand &IndexMO =
1343 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1344
1345 // If we have at least one (non-frame-index, non-RIP) register operand,
1346 // and neither operand is load-dependent, we need to check the load.
1347 Register BaseReg, IndexReg;
1348 if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1349 BaseMO.getReg().isValid())
1350 BaseReg = BaseMO.getReg();
1351 if (IndexMO.getReg().isValid())
1352 IndexReg = IndexMO.getReg();
1353
1354 if (!BaseReg && !IndexReg)
1355 // No register operands!
1356 continue;
1357
1358 // If any register operand is dependent, this load is dependent and we
1359 // needn't check it.
1360 // FIXME: Is this true in the case where we are hardening loads after
1361 // they complete? Unclear, need to investigate.
1362 if ((BaseReg && LoadDepRegs.test(BaseReg.id())) ||
1363 (IndexReg && LoadDepRegs.test(IndexReg.id())))
1364 continue;
1365
1366 // If post-load hardening is enabled, this load is compatible with
1367 // post-load hardening, and we aren't already going to harden one of the
1368 // address registers, queue it up to be hardened post-load. Notably,
1369 // even once hardened this won't introduce a useful dependency that
1370 // could prune out subsequent loads.
1372 !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1373 MI.getOperand(0).isReg() &&
1374 canHardenRegister(MI.getOperand(0).getReg()) &&
1375 !HardenedAddrRegs.count(BaseReg) &&
1376 !HardenedAddrRegs.count(IndexReg)) {
1377 HardenPostLoad.insert(&MI);
1378 HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1379 continue;
1380 }
1381
1382 // Record this instruction for address hardening and record its register
1383 // operands as being address-hardened.
1384 HardenLoadAddr.insert(&MI);
1385 if (BaseReg)
1386 HardenedAddrRegs.insert(BaseReg);
1387 if (IndexReg)
1388 HardenedAddrRegs.insert(IndexReg);
1389
1390 for (MachineOperand &Def : MI.defs())
1391 if (Def.isReg())
1392 LoadDepRegs.set(Def.getReg().id());
1393 }
1394
1395 // Now re-walk the instructions in the basic block, and apply whichever
1396 // hardening strategy we have elected. Note that we do this in a second
1397 // pass specifically so that we have the complete set of instructions for
1398 // which we will do post-load hardening and can defer it in certain
1399 // circumstances.
1400 for (MachineInstr &MI : MBB) {
1401 if (HardenLoads) {
1402 // We cannot both require hardening the def of a load and its address.
1403 assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1404 "Requested to harden both the address and def of a load!");
1405
1406 // Check if this is a load whose address needs to be hardened.
1407 if (HardenLoadAddr.erase(&MI)) {
1408 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1409 assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1410
1411 MachineOperand &BaseMO =
1412 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1413 MachineOperand &IndexMO =
1414 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1415 hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1416 continue;
1417 }
1418
1419 // Test if this instruction is one of our post load instructions (and
1420 // remove it from the set if so).
1421 if (HardenPostLoad.erase(&MI)) {
1422 assert(!MI.isCall() && "Must not try to post-load harden a call!");
1423
1424 // If this is a data-invariant load and there is no EFLAGS
1425 // interference, we want to try and sink any hardening as far as
1426 // possible.
1428 // Sink the instruction we'll need to harden as far as we can down
1429 // the graph.
1430 MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1431
1432 // If we managed to sink this instruction, update everything so we
1433 // harden that instruction when we reach it in the instruction
1434 // sequence.
1435 if (SunkMI != &MI) {
1436 // If in sinking there was no instruction needing to be hardened,
1437 // we're done.
1438 if (!SunkMI)
1439 continue;
1440
1441 // Otherwise, add this to the set of defs we harden.
1442 HardenPostLoad.insert(SunkMI);
1443 continue;
1444 }
1445 }
1446
1447 Register HardenedReg = hardenPostLoad(MI);
1448
1449 // Mark the resulting hardened register as such so we don't re-harden.
1450 AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1451
1452 continue;
1453 }
1454
1455 // Check for an indirect call or branch that may need its input hardened
1456 // even if we couldn't find the specific load used, or were able to
1457 // avoid hardening it for some reason. Note that here we cannot break
1458 // out afterward as we may still need to handle any call aspect of this
1459 // instruction.
1460 if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1461 hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1462 }
1463
1464 // After we finish hardening loads we handle interprocedural hardening if
1465 // enabled and relevant for this instruction.
1467 continue;
1468 if (!MI.isCall() && !MI.isReturn())
1469 continue;
1470
1471 // If this is a direct return (IE, not a tail call) just directly harden
1472 // it.
1473 if (MI.isReturn() && !MI.isCall()) {
1474 hardenReturnInstr(MI);
1475 continue;
1476 }
1477
1478 // Otherwise we have a call. We need to handle transferring the predicate
1479 // state into a call and recovering it after the call returns (unless this
1480 // is a tail call).
1481 assert(MI.isCall() && "Should only reach here for calls!");
1482 tracePredStateThroughCall(MI);
1483 }
1484
1485 HardenPostLoad.clear();
1486 HardenLoadAddr.clear();
1487 HardenedAddrRegs.clear();
1488 AddrRegToHardenedReg.clear();
1489
1490 // Currently, we only track data-dependent loads within a basic block.
1491 // FIXME: We should see if this is necessary or if we could be more
1492 // aggressive here without opening up attack avenues.
1493 LoadDepRegs.clear();
1494 }
1495}
1496
1497/// Save EFLAGS into the returned GPR. This can in turn be restored with
1498/// `restoreEFLAGS`.
1499///
1500/// Note that LLVM can only lower very simple patterns of saved and restored
1501/// EFLAGS registers. The restore should always be within the same basic block
1502/// as the save so that no PHI nodes are inserted.
1503Register X86SpeculativeLoadHardeningImpl::saveEFLAGS(
1504 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1505 const DebugLoc &Loc) {
1506 // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1507 // what instruction selection does.
1508 Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1509 // We directly copy the FLAGS register and rely on later lowering to clean
1510 // this up into the appropriate setCC instructions.
1511 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1512 ++NumInstsInserted;
1513 return Reg;
1514}
1515
1516/// Restore EFLAGS from the provided GPR. This should be produced by
1517/// `saveEFLAGS`.
1518///
1519/// This must be done within the same basic block as the save in order to
1520/// reliably lower.
1521void X86SpeculativeLoadHardeningImpl::restoreEFLAGS(
1522 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1523 const DebugLoc &Loc, Register Reg) {
1524 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1525 ++NumInstsInserted;
1526}
1527
1528/// Takes the current predicate state (in a register) and merges it into the
1529/// stack pointer. The state is essentially a single bit, but we merge this in
1530/// a way that won't form non-canonical pointers and also will be preserved
1531/// across normal stack adjustments.
1532void X86SpeculativeLoadHardeningImpl::mergePredStateIntoSP(
1533 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1534 const DebugLoc &Loc, Register PredStateReg) {
1535 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1536 // FIXME: This hard codes a shift distance based on the number of bits needed
1537 // to stay canonical on 64-bit. We should compute this somehow and support
1538 // 32-bit as part of that.
1539 auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1540 .addReg(PredStateReg, RegState::Kill)
1541 .addImm(47);
1542 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1543 ++NumInstsInserted;
1544 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1545 .addReg(X86::RSP)
1546 .addReg(TmpReg, RegState::Kill);
1547 OrI->addRegisterDead(X86::EFLAGS, TRI);
1548 ++NumInstsInserted;
1549}
1550
1551/// Extracts the predicate state stored in the high bits of the stack pointer.
1552Register X86SpeculativeLoadHardeningImpl::extractPredStateFromSP(
1553 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1554 const DebugLoc &Loc) {
1555 Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1556 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1557
1558 // We know that the stack pointer will have any preserved predicate state in
1559 // its high bit. We just want to smear this across the other bits. Turns out,
1560 // this is exactly what an arithmetic right shift does.
1561 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1562 .addReg(X86::RSP);
1563 auto ShiftI =
1564 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1565 .addReg(TmpReg, RegState::Kill)
1566 .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1567 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1568 ++NumInstsInserted;
1569
1570 return PredStateReg;
1571}
1572
1573void X86SpeculativeLoadHardeningImpl::hardenLoadAddr(
1574 MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1575 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg) {
1576 MachineBasicBlock &MBB = *MI.getParent();
1577 const DebugLoc &Loc = MI.getDebugLoc();
1578
1579 // Check if EFLAGS are alive by seeing if there is a def of them or they
1580 // live-in, and then seeing if that def is in turn used.
1581 bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1582
1584
1585 if (BaseMO.isFI()) {
1586 // A frame index is never a dynamically controllable load, so only
1587 // harden it if we're covering fixed address loads as well.
1588 LLVM_DEBUG(
1589 dbgs() << " Skipping hardening base of explicit stack frame load: ";
1590 MI.dump(); dbgs() << "\n");
1591 } else if (BaseMO.getReg() == X86::RSP) {
1592 // Some idempotent atomic operations are lowered directly to a locked
1593 // OR with 0 to the top of stack(or slightly offset from top) which uses an
1594 // explicit RSP register as the base.
1595 assert(IndexMO.getReg() == X86::NoRegister &&
1596 "Explicit RSP access with dynamic index!");
1597 LLVM_DEBUG(
1598 dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1599 } else if (BaseMO.getReg() == X86::RIP ||
1600 BaseMO.getReg() == X86::NoRegister) {
1601 // For both RIP-relative addressed loads or absolute loads, we cannot
1602 // meaningfully harden them because the address being loaded has no
1603 // dynamic component.
1604 //
1605 // FIXME: When using a segment base (like TLS does) we end up with the
1606 // dynamic address being the base plus -1 because we can't mutate the
1607 // segment register here. This allows the signed 32-bit offset to point at
1608 // valid segment-relative addresses and load them successfully.
1609 LLVM_DEBUG(
1610 dbgs() << " Cannot harden base of "
1611 << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1612 << " address in a load!");
1613 } else {
1614 assert(BaseMO.isReg() &&
1615 "Only allowed to have a frame index or register base.");
1616 HardenOpRegs.push_back(&BaseMO);
1617 }
1618
1619 if (IndexMO.getReg() != X86::NoRegister &&
1620 (HardenOpRegs.empty() ||
1621 HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1622 HardenOpRegs.push_back(&IndexMO);
1623
1624 assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1625 "Should have exactly one or two registers to harden!");
1626 assert((HardenOpRegs.size() == 1 ||
1627 HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1628 "Should not have two of the same registers!");
1629
1630 // Remove any registers that have alreaded been checked.
1631 llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1632 // See if this operand's register has already been checked.
1633 auto It = AddrRegToHardenedReg.find(Op->getReg());
1634 if (It == AddrRegToHardenedReg.end())
1635 // Not checked, so retain this one.
1636 return false;
1637
1638 // Otherwise, we can directly update this operand and remove it.
1639 Op->setReg(It->second);
1640 return true;
1641 });
1642 // If there are none left, we're done.
1643 if (HardenOpRegs.empty())
1644 return;
1645
1646 // Compute the current predicate state.
1647 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1648
1649 auto InsertPt = MI.getIterator();
1650
1651 // If EFLAGS are live and we don't have access to instructions that avoid
1652 // clobbering EFLAGS we need to save and restore them. This in turn makes
1653 // the EFLAGS no longer live.
1654 Register FlagsReg;
1655 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1656 EFLAGSLive = false;
1657 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1658 }
1659
1660 for (MachineOperand *Op : HardenOpRegs) {
1661 Register OpReg = Op->getReg();
1662 auto *OpRC = MRI->getRegClass(OpReg);
1663 Register TmpReg = MRI->createVirtualRegister(OpRC);
1664
1665 // If this is a vector register, we'll need somewhat custom logic to handle
1666 // hardening it.
1667 if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1668 OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1669 assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1670 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1671
1672 // Move our state into a vector register.
1673 // FIXME: We could skip this at the cost of longer encodings with AVX-512
1674 // but that doesn't seem likely worth it.
1675 Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1676 auto MovI =
1677 BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1678 .addReg(StateReg);
1679 (void)MovI;
1680 ++NumInstsInserted;
1681 LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1682
1683 // Broadcast it across the vector register.
1684 Register VBStateReg = MRI->createVirtualRegister(OpRC);
1685 auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1686 TII->get(Is128Bit ? X86::VPBROADCASTQrr
1687 : X86::VPBROADCASTQYrr),
1688 VBStateReg)
1689 .addReg(VStateReg);
1690 (void)BroadcastI;
1691 ++NumInstsInserted;
1692 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1693 dbgs() << "\n");
1694
1695 // Merge our potential poison state into the value with a vector or.
1696 auto OrI =
1697 BuildMI(MBB, InsertPt, Loc,
1698 TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1699 .addReg(VBStateReg)
1700 .addReg(OpReg);
1701 (void)OrI;
1702 ++NumInstsInserted;
1703 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1704 } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1705 OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1706 OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1707 assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1708 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1709 bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1710 if (Is128Bit || Is256Bit)
1711 assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1712
1713 // Broadcast our state into a vector register.
1714 Register VStateReg = MRI->createVirtualRegister(OpRC);
1715 unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1716 : Is256Bit ? X86::VPBROADCASTQrZ256rr
1717 : X86::VPBROADCASTQrZrr;
1718 auto BroadcastI =
1719 BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1720 .addReg(StateReg);
1721 (void)BroadcastI;
1722 ++NumInstsInserted;
1723 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1724 dbgs() << "\n");
1725
1726 // Merge our potential poison state into the value with a vector or.
1727 unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1728 : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1729 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1730 .addReg(VStateReg)
1731 .addReg(OpReg);
1732 (void)OrI;
1733 ++NumInstsInserted;
1734 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1735 } else {
1736 // FIXME: Need to support GR32 here for 32-bit code.
1737 assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1738 "Not a supported register class for address hardening!");
1739
1740 if (!EFLAGSLive) {
1741 // Merge our potential poison state into the value with an or.
1742 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1743 .addReg(StateReg)
1744 .addReg(OpReg);
1745 OrI->addRegisterDead(X86::EFLAGS, TRI);
1746 ++NumInstsInserted;
1747 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1748 } else {
1749 // We need to avoid touching EFLAGS so shift out all but the least
1750 // significant bit using the instruction that doesn't update flags.
1751 auto ShiftI =
1752 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1753 .addReg(OpReg)
1754 .addReg(StateReg);
1755 (void)ShiftI;
1756 ++NumInstsInserted;
1757 LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1758 dbgs() << "\n");
1759 }
1760 }
1761
1762 // Record this register as checked and update the operand.
1763 assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1764 "Should not have checked this register yet!");
1765 AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1766 Op->setReg(TmpReg);
1767 ++NumAddrRegsHardened;
1768 }
1769
1770 // And restore the flags if needed.
1771 if (FlagsReg)
1772 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1773}
1774
1775MachineInstr *X86SpeculativeLoadHardeningImpl::sinkPostLoadHardenedInst(
1776 MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1778 "Cannot get here with a non-invariant load!");
1779 assert(!isEFLAGSDefLive(InitialMI) &&
1780 "Cannot get here with a data invariant load "
1781 "that interferes with EFLAGS!");
1782
1783 // See if we can sink hardening the loaded value.
1784 auto SinkCheckToSingleUse =
1785 [&](MachineInstr &MI) -> std::optional<MachineInstr *> {
1786 Register DefReg = MI.getOperand(0).getReg();
1787
1788 // We need to find a single use which we can sink the check. We can
1789 // primarily do this because many uses may already end up checked on their
1790 // own.
1791 MachineInstr *SingleUseMI = nullptr;
1792 for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1793 // If we're already going to harden this use, it is data invariant, it
1794 // does not interfere with EFLAGS, and within our block.
1795 if (HardenedInstrs.count(&UseMI)) {
1797 // If we've already decided to harden a non-load, we must have sunk
1798 // some other post-load hardened instruction to it and it must itself
1799 // be data-invariant.
1801 "Data variant instruction being hardened!");
1802 continue;
1803 }
1804
1805 // Otherwise, this is a load and the load component can't be data
1806 // invariant so check how this register is being used.
1807 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(UseMI);
1808 assert(MemRefBeginIdx >= 0 &&
1809 "Should always have mem references here!");
1810
1811 MachineOperand &BaseMO =
1812 UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1813 MachineOperand &IndexMO =
1814 UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1815 if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1816 (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1817 // The load uses the register as part of its address making it not
1818 // invariant.
1819 return {};
1820
1821 continue;
1822 }
1823
1824 if (SingleUseMI)
1825 // We already have a single use, this would make two. Bail.
1826 return {};
1827
1828 // If this single use isn't data invariant, isn't in this block, or has
1829 // interfering EFLAGS, we can't sink the hardening to it.
1830 if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1832 return {};
1833
1834 // If this instruction defines multiple registers bail as we won't harden
1835 // all of them.
1836 if (UseMI.getDesc().getNumDefs() > 1)
1837 return {};
1838
1839 // If this register isn't a virtual register we can't walk uses of sanely,
1840 // just bail. Also check that its register class is one of the ones we
1841 // can harden.
1842 Register UseDefReg = UseMI.getOperand(0).getReg();
1843 if (!canHardenRegister(UseDefReg))
1844 return {};
1845
1846 SingleUseMI = &UseMI;
1847 }
1848
1849 // If SingleUseMI is still null, there is no use that needs its own
1850 // checking. Otherwise, it is the single use that needs checking.
1851 return {SingleUseMI};
1852 };
1853
1854 MachineInstr *MI = &InitialMI;
1855 while (std::optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1856 // Update which MI we're checking now.
1857 MI = *SingleUse;
1858 if (!MI)
1859 break;
1860 }
1861
1862 return MI;
1863}
1864
1865bool X86SpeculativeLoadHardeningImpl::canHardenRegister(Register Reg) {
1866 // We only support hardening virtual registers.
1867 if (!Reg.isVirtual())
1868 return false;
1869
1870 auto *RC = MRI->getRegClass(Reg);
1871 int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1872 if (RegBytes > 8)
1873 // We don't support post-load hardening of vectors.
1874 return false;
1875
1876 unsigned RegIdx = Log2_32(RegBytes);
1877 assert(RegIdx < 4 && "Unsupported register size");
1878
1879 // If this register class is explicitly constrained to a class that doesn't
1880 // require REX prefix, we may not be able to satisfy that constraint when
1881 // emitting the hardening instructions, so bail out here.
1882 // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1883 // end up both with a NOREX and REX-only register as operands to the hardening
1884 // instructions. It would be better to fix that code to handle this situation
1885 // rather than hack around it in this way.
1886 const TargetRegisterClass *NOREXRegClasses[] = {
1887 &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1888 &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1889 if (RC == NOREXRegClasses[RegIdx])
1890 return false;
1891
1892 const TargetRegisterClass *GPRRegClasses[] = {
1893 &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1894 &X86::GR64RegClass};
1895 return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1896}
1897
1898/// Harden a value in a register.
1899///
1900/// This is the low-level logic to fully harden a value sitting in a register
1901/// against leaking during speculative execution.
1902///
1903/// Unlike hardening an address that is used by a load, this routine is required
1904/// to hide *all* incoming bits in the register.
1905///
1906/// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1907/// larger than the predicate state register. FIXME: We should support vector
1908/// registers here by broadcasting the predicate state.
1909///
1910/// The new, hardened virtual register is returned. It will have the same
1911/// register class as `Reg`.
1912Register X86SpeculativeLoadHardeningImpl::hardenValueInRegister(
1913 Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1914 const DebugLoc &Loc) {
1915 assert(canHardenRegister(Reg) && "Cannot harden this register!");
1916
1917 auto *RC = MRI->getRegClass(Reg);
1918 int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1919 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1920 assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
1921 "Unknown register size");
1922
1923 // FIXME: Need to teach this about 32-bit mode.
1924 if (Bytes != 8) {
1925 unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1926 unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1927 Register NarrowStateReg = MRI->createVirtualRegister(RC);
1928 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1929 .addReg(StateReg, 0, SubRegImm);
1930 StateReg = NarrowStateReg;
1931 }
1932
1933 Register FlagsReg;
1934 if (isEFLAGSLive(MBB, InsertPt, *TRI))
1935 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1936
1937 Register NewReg = MRI->createVirtualRegister(RC);
1938 unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1939 unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1940 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1941 .addReg(StateReg)
1942 .addReg(Reg);
1943 OrI->addRegisterDead(X86::EFLAGS, TRI);
1944 ++NumInstsInserted;
1945 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1946
1947 if (FlagsReg)
1948 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1949
1950 return NewReg;
1951}
1952
1953/// Harden a load by hardening the loaded value in the defined register.
1954///
1955/// We can harden a non-leaking load into a register without touching the
1956/// address by just hiding all of the loaded bits during misspeculation. We use
1957/// an `or` instruction to do this because we set up our poison value as all
1958/// ones. And the goal is just for the loaded bits to not be exposed to
1959/// execution and coercing them to one is sufficient.
1960///
1961/// Returns the newly hardened register.
1962Register X86SpeculativeLoadHardeningImpl::hardenPostLoad(MachineInstr &MI) {
1963 MachineBasicBlock &MBB = *MI.getParent();
1964 const DebugLoc &Loc = MI.getDebugLoc();
1965
1966 auto &DefOp = MI.getOperand(0);
1967 Register OldDefReg = DefOp.getReg();
1968 auto *DefRC = MRI->getRegClass(OldDefReg);
1969
1970 // Because we want to completely replace the uses of this def'ed value with
1971 // the hardened value, create a dedicated new register that will only be used
1972 // to communicate the unhardened value to the hardening.
1973 Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1974 DefOp.setReg(UnhardenedReg);
1975
1976 // Now harden this register's value, getting a hardened reg that is safe to
1977 // use. Note that we insert the instructions to compute this *after* the
1978 // defining instruction, not before it.
1979 Register HardenedReg = hardenValueInRegister(
1980 UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1981
1982 // Finally, replace the old register (which now only has the uses of the
1983 // original def) with the hardened register.
1984 MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1985
1986 ++NumPostLoadRegsHardened;
1987 return HardenedReg;
1988}
1989
1990/// Harden a return instruction.
1991///
1992/// Returns implicitly perform a load which we need to harden. Without hardening
1993/// this load, an attacker my speculatively write over the return address to
1994/// steer speculation of the return to an attacker controlled address. This is
1995/// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1996/// this paper:
1997/// https://people.csail.mit.edu/vlk/spectre11.pdf
1998///
1999/// We can harden this by introducing an LFENCE that will delay any load of the
2000/// return address until prior instructions have retired (and thus are not being
2001/// speculated), or we can harden the address used by the implicit load: the
2002/// stack pointer.
2003///
2004/// If we are not using an LFENCE, hardening the stack pointer has an additional
2005/// benefit: it allows us to pass the predicate state accumulated in this
2006/// function back to the caller. In the absence of a BCBS attack on the return,
2007/// the caller will typically be resumed and speculatively executed due to the
2008/// Return Stack Buffer (RSB) prediction which is very accurate and has a high
2009/// priority. It is possible that some code from the caller will be executed
2010/// speculatively even during a BCBS-attacked return until the steering takes
2011/// effect. Whenever this happens, the caller can recover the (poisoned)
2012/// predicate state from the stack pointer and continue to harden loads.
2013void X86SpeculativeLoadHardeningImpl::hardenReturnInstr(MachineInstr &MI) {
2014 MachineBasicBlock &MBB = *MI.getParent();
2015 const DebugLoc &Loc = MI.getDebugLoc();
2016 auto InsertPt = MI.getIterator();
2017
2018 if (FenceCallAndRet)
2019 // No need to fence here as we'll fence at the return site itself. That
2020 // handles more cases than we can handle here.
2021 return;
2022
2023 // Take our predicate state, shift it to the high 17 bits (so that we keep
2024 // pointers canonical) and merge it into RSP. This will allow the caller to
2025 // extract it when we return (speculatively).
2026 mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2027}
2028
2029/// Trace the predicate state through a call.
2030///
2031/// There are several layers of this needed to handle the full complexity of
2032/// calls.
2033///
2034/// First, we need to send the predicate state into the called function. We do
2035/// this by merging it into the high bits of the stack pointer.
2036///
2037/// For tail calls, this is all we need to do.
2038///
2039/// For calls where we might return and resume the control flow, we need to
2040/// extract the predicate state from the high bits of the stack pointer after
2041/// control returns from the called function.
2042///
2043/// We also need to verify that we intended to return to this location in the
2044/// code. An attacker might arrange for the processor to mispredict the return
2045/// to this valid but incorrect return address in the program rather than the
2046/// correct one. See the paper on this attack, called "ret2spec" by the
2047/// researchers, here:
2048/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2049///
2050/// The way we verify that we returned to the correct location is by preserving
2051/// the expected return address across the call. One technique involves taking
2052/// advantage of the red-zone to load the return address from `8(%rsp)` where it
2053/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2054/// directly save the address into a register that will be preserved across the
2055/// call. We compare this intended return address against the address
2056/// immediately following the call (the observed return address). If these
2057/// mismatch, we have detected misspeculation and can poison our predicate
2058/// state.
2059void X86SpeculativeLoadHardeningImpl::tracePredStateThroughCall(
2060 MachineInstr &MI) {
2061 MachineBasicBlock &MBB = *MI.getParent();
2062 MachineFunction &MF = *MBB.getParent();
2063 auto InsertPt = MI.getIterator();
2064 const DebugLoc &Loc = MI.getDebugLoc();
2065
2066 if (FenceCallAndRet) {
2067 if (MI.isReturn())
2068 // Tail call, we don't return to this function.
2069 // FIXME: We should also handle noreturn calls.
2070 return;
2071
2072 // We don't need to fence before the call because the function should fence
2073 // in its entry. However, we do need to fence after the call returns.
2074 // Fencing before the return doesn't correctly handle cases where the return
2075 // itself is mispredicted.
2076 BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2077 ++NumInstsInserted;
2078 ++NumLFENCEsInserted;
2079 return;
2080 }
2081
2082 // First, we transfer the predicate state into the called function by merging
2083 // it into the stack pointer. This will kill the current def of the state.
2084 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2085 mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2086
2087 // If this call is also a return, it is a tail call and we don't need anything
2088 // else to handle it so just return. Also, if there are no further
2089 // instructions and no successors, this call does not return so we can also
2090 // bail.
2091 if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2092 return;
2093
2094 // Create a symbol to track the return address and attach it to the call
2095 // machine instruction. We will lower extra symbols attached to call
2096 // instructions as label immediately following the call.
2097 MCSymbol *RetSymbol =
2098 MF.getContext().createTempSymbol("slh_ret_addr",
2099 /*AlwaysAddSuffix*/ true);
2100 MI.setPostInstrSymbol(MF, RetSymbol);
2101
2102 const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2103 Register ExpectedRetAddrReg;
2104
2105 // If we have no red zones or if the function returns twice (possibly without
2106 // using the `ret` instruction) like setjmp, we need to save the expected
2107 // return address prior to the call.
2108 if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2109 MF.exposesReturnsTwice()) {
2110 // If we don't have red zones, we need to compute the expected return
2111 // address prior to the call and store it in a register that lives across
2112 // the call.
2113 //
2114 // In some ways, this is doubly satisfying as a mitigation because it will
2115 // also successfully detect stack smashing bugs in some cases (typically,
2116 // when a callee-saved register is used and the callee doesn't push it onto
2117 // the stack). But that isn't our primary goal, so we only use it as
2118 // a fallback.
2119 //
2120 // FIXME: It isn't clear that this is reliable in the face of
2121 // rematerialization in the register allocator. We somehow need to force
2122 // that to not occur for this particular instruction, and instead to spill
2123 // or otherwise preserve the value computed *prior* to the call.
2124 //
2125 // FIXME: It is even less clear why MachineCSE can't just fold this when we
2126 // end up having to use identical instructions both before and after the
2127 // call to feed the comparison.
2128 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2129 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2130 !Subtarget->isPositionIndependent()) {
2131 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2132 .addSym(RetSymbol);
2133 } else {
2134 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2135 .addReg(/*Base*/ X86::RIP)
2136 .addImm(/*Scale*/ 1)
2137 .addReg(/*Index*/ 0)
2138 .addSym(RetSymbol)
2139 .addReg(/*Segment*/ 0);
2140 }
2141 }
2142
2143 // Step past the call to handle when it returns.
2144 ++InsertPt;
2145
2146 // If we didn't pre-compute the expected return address into a register, then
2147 // red zones are enabled and the return address is still available on the
2148 // stack immediately after the call. As the very first instruction, we load it
2149 // into a register.
2150 if (!ExpectedRetAddrReg) {
2151 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2152 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2153 .addReg(/*Base*/ X86::RSP)
2154 .addImm(/*Scale*/ 1)
2155 .addReg(/*Index*/ 0)
2156 .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2157 // the return address is 8-bytes past it.
2158 .addReg(/*Segment*/ 0);
2159 }
2160
2161 // Now we extract the callee's predicate state from the stack pointer.
2162 Register NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2163
2164 // Test the expected return address against our actual address. If we can
2165 // form this basic block's address as an immediate, this is easy. Otherwise
2166 // we compute it.
2167 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2168 !Subtarget->isPositionIndependent()) {
2169 // FIXME: Could we fold this with the load? It would require careful EFLAGS
2170 // management.
2171 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2172 .addReg(ExpectedRetAddrReg, RegState::Kill)
2173 .addSym(RetSymbol);
2174 } else {
2175 Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2176 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2177 .addReg(/*Base*/ X86::RIP)
2178 .addImm(/*Scale*/ 1)
2179 .addReg(/*Index*/ 0)
2180 .addSym(RetSymbol)
2181 .addReg(/*Segment*/ 0);
2182 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2183 .addReg(ExpectedRetAddrReg, RegState::Kill)
2184 .addReg(ActualRetAddrReg, RegState::Kill);
2185 }
2186
2187 // Now conditionally update the predicate state we just extracted if we ended
2188 // up at a different return address than expected.
2189 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2190 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2191
2192 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2193 auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2194 .addReg(NewStateReg, RegState::Kill)
2195 .addReg(PS->PoisonReg)
2197 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
2198 ++NumInstsInserted;
2199 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2200
2201 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2202}
2203
2204/// An attacker may speculatively store over a value that is then speculatively
2205/// loaded and used as the target of an indirect call or jump instruction. This
2206/// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2207/// in this paper:
2208/// https://people.csail.mit.edu/vlk/spectre11.pdf
2209///
2210/// When this happens, the speculative execution of the call or jump will end up
2211/// being steered to this attacker controlled address. While most such loads
2212/// will be adequately hardened already, we want to ensure that they are
2213/// definitively treated as needing post-load hardening. While address hardening
2214/// is sufficient to prevent secret data from leaking to the attacker, it may
2215/// not be sufficient to prevent an attacker from steering speculative
2216/// execution. We forcibly unfolded all relevant loads above and so will always
2217/// have an opportunity to post-load harden here, we just need to scan for cases
2218/// not already flagged and add them.
2219void X86SpeculativeLoadHardeningImpl::hardenIndirectCallOrJumpInstr(
2220 MachineInstr &MI,
2221 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg) {
2222 switch (MI.getOpcode()) {
2223 case X86::FARCALL16m:
2224 case X86::FARCALL32m:
2225 case X86::FARCALL64m:
2226 case X86::FARJMP16m:
2227 case X86::FARJMP32m:
2228 case X86::FARJMP64m:
2229 // We don't need to harden either far calls or far jumps as they are
2230 // safe from Spectre.
2231 return;
2232
2233 default:
2234 break;
2235 }
2236
2237 // We should never see a loading instruction at this point, as those should
2238 // have been unfolded.
2239 assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2240
2241 // If the first operand isn't a register, this is a branch or call
2242 // instruction with an immediate operand which doesn't need to be hardened.
2243 if (!MI.getOperand(0).isReg())
2244 return;
2245
2246 // For all of these, the target register is the first operand of the
2247 // instruction.
2248 auto &TargetOp = MI.getOperand(0);
2249 Register OldTargetReg = TargetOp.getReg();
2250
2251 // Try to lookup a hardened version of this register. We retain a reference
2252 // here as we want to update the map to track any newly computed hardened
2253 // register.
2254 Register &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2255
2256 // If we don't have a hardened register yet, compute one. Otherwise, just use
2257 // the already hardened register.
2258 //
2259 // FIXME: It is a little suspect that we use partially hardened registers that
2260 // only feed addresses. The complexity of partial hardening with SHRX
2261 // continues to pile up. Should definitively measure its value and consider
2262 // eliminating it.
2263 if (!HardenedTargetReg)
2264 HardenedTargetReg = hardenValueInRegister(
2265 OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2266
2267 // Set the target operand to the hardened register.
2268 TargetOp.setReg(HardenedTargetReg);
2269
2270 ++NumCallsOrJumpsHardened;
2271}
2272
2273INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
2274 "X86 speculative load hardener", false, false)
2275INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
2276 "X86 speculative load hardener", false, false)
2277
2279 return new X86SpeculativeLoadHardeningPass();
2280}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > HardenLoads("aarch64-slh-loads", cl::Hidden, cl::desc("Sanitize loads from memory."), cl::init(true))
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the SparseBitVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_KEY
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
static const TargetRegisterClass * getRegClassForUnfoldedLoad(const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
static bool isEFLAGSDefLive(const MachineInstr &MI)
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
LLVM_ABI MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Describe properties that are true of each instruction in the target description file.
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
LLVM_ABI iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
void dump() const
Definition Pass.cpp:146
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr unsigned id() const
Definition Register.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void set(unsigned Idx)
bool test(unsigned Idx) const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
const X86InstrInfo * getInstrInfo() const override
bool hasAVX512() const
bool isPositionIndependent() const
const X86RegisterInfo * getRegisterInfo() const override
const X86FrameLowering * getFrameLowering() const override
bool hasAVX2() const
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Kill
The last use of a register.
CondCode getCondFromBranch(const MachineInstr &MI)
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false, bool HasNDD=false)
Return a cmov opcode for the given register size in bytes, and operand type.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
BBIterator iterator
Definition BasicBlock.h:87
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2124
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
FunctionPass * createX86SpeculativeLoadHardeningPass()
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182