LLVM 23.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// Provide a pass which mitigates speculative execution attacks which operate
11/// by speculating incorrectly past some predicate (a type check, bounds check,
12/// or other condition) to reach a load with invalid inputs and leak the data
13/// accessed by that load using a side channel out of the speculative domain.
14///
15/// For details on the attacks, see the first variant in both the Project Zero
16/// writeup and the Spectre paper:
17/// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18/// https://spectreattack.com/spectre.pdf
19///
20//===----------------------------------------------------------------------===//
21
22#include "X86.h"
23#include "X86InstrInfo.h"
24#include "X86Subtarget.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/Statistic.h"
47#include "llvm/IR/DebugLoc.h"
48#include "llvm/MC/MCSchedule.h"
49#include "llvm/Pass.h"
51#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <iterator>
56#include <optional>
57
58using namespace llvm;
59
60#define PASS_KEY "x86-slh"
61#define DEBUG_TYPE PASS_KEY
62
63STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
64STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
65STATISTIC(NumAddrRegsHardened,
66 "Number of address mode used registers hardened");
67STATISTIC(NumPostLoadRegsHardened,
68 "Number of post-load register values hardened");
69STATISTIC(NumCallsOrJumpsHardened,
70 "Number of calls or jumps requiring extra hardening");
71STATISTIC(NumInstsInserted, "Number of instructions inserted");
72STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
73
75 "x86-speculative-load-hardening",
76 cl::desc("Force enable speculative load hardening"), cl::init(false),
78
80 PASS_KEY "-lfence",
82 "Use LFENCE along each conditional edge to harden against speculative "
83 "loads rather than conditional movs and poisoned pointers."),
84 cl::init(false), cl::Hidden);
85
87 PASS_KEY "-post-load",
88 cl::desc("Harden the value loaded *after* it is loaded by "
89 "flushing the loaded bits to 1. This is hard to do "
90 "in general but can be done easily for GPRs."),
91 cl::init(true), cl::Hidden);
92
94 PASS_KEY "-fence-call-and-ret",
95 cl::desc("Use a full speculation fence to harden both call and ret edges "
96 "rather than a lighter weight mitigation."),
97 cl::init(false), cl::Hidden);
98
100 PASS_KEY "-ip",
101 cl::desc("Harden interprocedurally by passing our state in and out of "
102 "functions in the high bits of the stack pointer."),
103 cl::init(true), cl::Hidden);
104
105static cl::opt<bool>
107 cl::desc("Sanitize loads from memory. When disable, no "
108 "significant security is provided."),
109 cl::init(true), cl::Hidden);
110
112 PASS_KEY "-indirect",
113 cl::desc("Harden indirect calls and jumps against using speculatively "
114 "stored attacker controlled addresses. This is designed to "
115 "mitigate Spectre v1.2 style attacks."),
116 cl::init(true), cl::Hidden);
117
118namespace {
119
120constexpr StringRef X86SLHPassName = "X86 speculative load hardening";
121
122class X86SpeculativeLoadHardeningLegacy : public MachineFunctionPass {
123public:
124 X86SpeculativeLoadHardeningLegacy() : MachineFunctionPass(ID) {}
125
126 StringRef getPassName() const override { return X86SLHPassName; }
127 bool runOnMachineFunction(MachineFunction &MF) override;
128 void getAnalysisUsage(AnalysisUsage &AU) const override;
129
130 /// Pass identification, replacement for typeid.
131 static char ID;
132};
133
134class X86SpeculativeLoadHardeningImpl {
135public:
136 X86SpeculativeLoadHardeningImpl() = default;
137
138 bool run(MachineFunction &MF);
139
140private:
141 /// The information about a block's conditional terminators needed to trace
142 /// our predicate state through the exiting edges.
143 struct BlockCondInfo {
144 MachineBasicBlock *MBB;
145
146 // We mostly have one conditional branch, and in extremely rare cases have
147 // two. Three and more are so rare as to be unimportant for compile time.
148 SmallVector<MachineInstr *, 2> CondBrs;
149
150 MachineInstr *UncondBr;
151 };
152
153 /// Manages the predicate state traced through the program.
154 struct PredState {
155 Register InitialReg;
156 Register PoisonReg;
157
158 const TargetRegisterClass *RC;
159 MachineSSAUpdater SSA;
160
161 PredState(MachineFunction &MF, const TargetRegisterClass *RC)
162 : RC(RC), SSA(MF) {}
163 };
164
165 const X86Subtarget *Subtarget = nullptr;
166 MachineRegisterInfo *MRI = nullptr;
167 const X86InstrInfo *TII = nullptr;
168 const TargetRegisterInfo *TRI = nullptr;
169
170 std::optional<PredState> PS;
171
172 void hardenEdgesWithLFENCE(MachineFunction &MF);
173
174 SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
175
177 tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
178
179 void unfoldCallAndJumpLoads(MachineFunction &MF);
180
182 tracePredStateThroughIndirectBranches(MachineFunction &MF);
183
184 void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
185
186 Register saveEFLAGS(MachineBasicBlock &MBB,
188 const DebugLoc &Loc);
189 void restoreEFLAGS(MachineBasicBlock &MBB,
190 MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc,
191 Register Reg);
192
193 void mergePredStateIntoSP(MachineBasicBlock &MBB,
195 const DebugLoc &Loc, Register PredStateReg);
196 Register extractPredStateFromSP(MachineBasicBlock &MBB,
198 const DebugLoc &Loc);
199
200 void
201 hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
202 MachineOperand &IndexMO,
203 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg);
204 MachineInstr *
205 sinkPostLoadHardenedInst(MachineInstr &MI,
206 SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
207 bool canHardenRegister(Register Reg);
208 Register hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
210 const DebugLoc &Loc);
211 Register hardenPostLoad(MachineInstr &MI);
212 void hardenReturnInstr(MachineInstr &MI);
213 void tracePredStateThroughCall(MachineInstr &MI);
214 void hardenIndirectCallOrJumpInstr(
215 MachineInstr &MI,
216 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg);
217};
218
219} // end anonymous namespace
220
221bool X86SpeculativeLoadHardeningLegacy::runOnMachineFunction(
222 MachineFunction &MF) {
223 X86SpeculativeLoadHardeningImpl Impl;
224 bool Changed = Impl.run(MF);
225 LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
226 dbgs() << "\n"; MF.verify(this));
227 return Changed;
228}
229
230char X86SpeculativeLoadHardeningLegacy::ID = 0;
231
232void X86SpeculativeLoadHardeningLegacy::getAnalysisUsage(
233 AnalysisUsage &AU) const {
235}
236
238 MachineBasicBlock &Succ, int SuccCount,
239 MachineInstr *Br, MachineInstr *&UncondBr,
240 const X86InstrInfo &TII) {
241 assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
242
243 MachineFunction &MF = *MBB.getParent();
244
246
247 // We have to insert the new block immediately after the current one as we
248 // don't know what layout-successor relationships the successor has and we
249 // may not be able to (and generally don't want to) try to fix those up.
250 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
251
252 // Update the branch instruction if necessary.
253 if (Br) {
254 assert(Br->getOperand(0).getMBB() == &Succ &&
255 "Didn't start with the right target!");
256 Br->getOperand(0).setMBB(&NewMBB);
257
258 // If this successor was reached through a branch rather than fallthrough,
259 // we might have *broken* fallthrough and so need to inject a new
260 // unconditional branch.
261 if (!UncondBr) {
262 MachineBasicBlock &OldLayoutSucc =
263 *std::next(MachineFunction::iterator(&NewMBB));
264 assert(MBB.isSuccessor(&OldLayoutSucc) &&
265 "Without an unconditional branch, the old layout successor should "
266 "be an actual successor!");
267 auto BrBuilder =
268 BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
269 // Update the unconditional branch now that we've added one.
270 UncondBr = &*BrBuilder;
271 }
272
273 // Insert unconditional "jump Succ" instruction in the new block if
274 // necessary.
275 if (!NewMBB.isLayoutSuccessor(&Succ)) {
277 TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
278 }
279 } else {
280 assert(!UncondBr &&
281 "Cannot have a branchless successor and an unconditional branch!");
282 assert(NewMBB.isLayoutSuccessor(&Succ) &&
283 "A non-branch successor must have been a layout successor before "
284 "and now is a layout successor of the new block.");
285 }
286
287 // If this is the only edge to the successor, we can just replace it in the
288 // CFG. Otherwise we need to add a new entry in the CFG for the new
289 // successor.
290 if (SuccCount == 1) {
291 MBB.replaceSuccessor(&Succ, &NewMBB);
292 } else {
293 MBB.splitSuccessor(&Succ, &NewMBB);
294 }
295
296 // Hook up the edge from the new basic block to the old successor in the CFG.
297 NewMBB.addSuccessor(&Succ);
298
299 // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
300 for (MachineInstr &MI : Succ) {
301 if (!MI.isPHI())
302 break;
303 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
304 OpIdx += 2) {
305 MachineOperand &OpV = MI.getOperand(OpIdx);
306 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
307 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
308 if (OpMBB.getMBB() != &MBB)
309 continue;
310
311 // If this is the last edge to the succesor, just replace MBB in the PHI
312 if (SuccCount == 1) {
313 OpMBB.setMBB(&NewMBB);
314 break;
315 }
316
317 // Otherwise, append a new pair of operands for the new incoming edge.
318 MI.addOperand(MF, OpV);
319 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
320 break;
321 }
322 }
323
324 // Inherit live-ins from the successor
325 for (auto &LI : Succ.liveins())
326 NewMBB.addLiveIn(LI);
327
328 LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
329 << Succ.getName() << "'.\n");
330 return NewMBB;
331}
332
333/// Removing duplicate PHI operands to leave the PHI in a canonical and
334/// predictable form.
335///
336/// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
337/// isn't what you might expect. We may have multiple entries in PHI nodes for
338/// a single predecessor. This makes CFG-updating extremely complex, so here we
339/// simplify all PHI nodes to a model even simpler than the IR's model: exactly
340/// one entry per predecessor, regardless of how many edges there are.
343 SmallVector<int, 4> DupIndices;
344 for (auto &MBB : MF)
345 for (auto &MI : MBB) {
346 if (!MI.isPHI())
347 break;
348
349 // First we scan the operands of the PHI looking for duplicate entries
350 // a particular predecessor. We retain the operand index of each duplicate
351 // entry found.
352 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
353 OpIdx += 2)
354 if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
355 DupIndices.push_back(OpIdx);
356
357 // Now walk the duplicate indices, removing both the block and value. Note
358 // that these are stored as a vector making this element-wise removal
359 // potentially quadratic.
360 //
361 // FIXME: It is really frustrating that we have to use a quadratic
362 // removal algorithm here. There should be a better way, but the use-def
363 // updates required make that impossible using the public API.
364 //
365 // Note that we have to process these backwards so that we don't
366 // invalidate other indices with each removal.
367 while (!DupIndices.empty()) {
368 int OpIdx = DupIndices.pop_back_val();
369 // Remove both the block and value operand, again in reverse order to
370 // preserve indices.
371 MI.removeOperand(OpIdx + 1);
372 MI.removeOperand(OpIdx);
373 }
374
375 Preds.clear();
376 }
377}
378
379/// Helper to scan a function for loads vulnerable to misspeculation that we
380/// want to harden.
381///
382/// We use this to avoid making changes to functions where there is nothing we
383/// need to do to harden against misspeculation.
385 for (MachineBasicBlock &MBB : MF) {
386 for (MachineInstr &MI : MBB) {
387 // Loads within this basic block after an LFENCE are not at risk of
388 // speculatively executing with invalid predicates from prior control
389 // flow. So break out of this block but continue scanning the function.
390 if (MI.getOpcode() == X86::LFENCE)
391 break;
392
393 // Looking for loads only.
394 if (!MI.mayLoad())
395 continue;
396
397 // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
398 if (MI.getOpcode() == X86::MFENCE)
399 continue;
400
401 // We found a load.
402 return true;
403 }
404 }
405
406 // No loads found.
407 return false;
408}
409
410bool X86SpeculativeLoadHardeningImpl::run(MachineFunction &MF) {
411 LLVM_DEBUG(dbgs() << "********** " << X86SLHPassName << " : " << MF.getName()
412 << " **********\n");
413
414 // Only run if this pass is forced enabled or we detect the relevant function
415 // attribute requesting SLH.
417 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
418 return false;
419
420 Subtarget = &MF.getSubtarget<X86Subtarget>();
421 MRI = &MF.getRegInfo();
422 TII = Subtarget->getInstrInfo();
423 TRI = Subtarget->getRegisterInfo();
424
425 // FIXME: Support for 32-bit.
426 PS.emplace(MF, &X86::GR64_NOSPRegClass);
427
428 if (MF.begin() == MF.end())
429 // Nothing to do for a degenerate empty function...
430 return false;
431
432 // We support an alternative hardening technique based on a debug flag.
434 hardenEdgesWithLFENCE(MF);
435 return true;
436 }
437
438 // Create a dummy debug loc to use for all the generated code here.
439 DebugLoc Loc;
440
441 MachineBasicBlock &Entry = *MF.begin();
442 auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
443
444 // Do a quick scan to see if we have any checkable loads.
445 bool HasVulnerableLoad = hasVulnerableLoad(MF);
446
447 // See if we have any conditional branching blocks that we will need to trace
448 // predicate state through.
449 SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
450
451 // If we have no interesting conditions or loads, nothing to do here.
452 if (!HasVulnerableLoad && Infos.empty())
453 return true;
454
455 // The poison value is required to be an all-ones value for many aspects of
456 // this mitigation.
457 const int PoisonVal = -1;
458 PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
459 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
460 .addImm(PoisonVal);
461 ++NumInstsInserted;
462
463 // If we have loads being hardened and we've asked for call and ret edges to
464 // get a full fence-based mitigation, inject that fence.
465 if (HasVulnerableLoad && FenceCallAndRet) {
466 // We need to insert an LFENCE at the start of the function to suspend any
467 // incoming misspeculation from the caller. This helps two-fold: the caller
468 // may not have been protected as this code has been, and this code gets to
469 // not take any specific action to protect across calls.
470 // FIXME: We could skip this for functions which unconditionally return
471 // a constant.
472 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
473 ++NumInstsInserted;
474 ++NumLFENCEsInserted;
475 }
476
477 // If we guarded the entry with an LFENCE and have no conditionals to protect
478 // in blocks, then we're done.
479 if (FenceCallAndRet && Infos.empty())
480 // We may have changed the function's code at this point to insert fences.
481 return true;
482
483 // For every basic block in the function which can b
485 // Set up the predicate state by extracting it from the incoming stack
486 // pointer so we pick up any misspeculation in our caller.
487 PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
488 } else {
489 // Otherwise, just build the predicate state itself by zeroing a register
490 // as we don't need any initial state.
491 PS->InitialReg = MRI->createVirtualRegister(PS->RC);
492 Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
493 auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
494 PredStateSubReg);
495 ++NumInstsInserted;
496 MachineOperand *ZeroEFLAGSDefOp =
497 ZeroI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
498 assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
499 "Must have an implicit def of EFLAGS!");
500 ZeroEFLAGSDefOp->setIsDead(true);
501 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
502 PS->InitialReg)
503 .addReg(PredStateSubReg)
504 .addImm(X86::sub_32bit);
505 }
506
507 // We're going to need to trace predicate state throughout the function's
508 // CFG. Prepare for this by setting up our initial state of PHIs with unique
509 // predecessor entries and all the initial predicate state.
511
512 // Track the updated values in an SSA updater to rewrite into SSA form at the
513 // end.
514 PS->SSA.Initialize(PS->InitialReg);
515 PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
516
517 // Trace through the CFG.
518 auto CMovs = tracePredStateThroughCFG(MF, Infos);
519
520 // We may also enter basic blocks in this function via exception handling
521 // control flow. Here, if we are hardening interprocedurally, we need to
522 // re-capture the predicate state from the throwing code. In the Itanium ABI,
523 // the throw will always look like a call to __cxa_throw and will have the
524 // predicate state in the stack pointer, so extract fresh predicate state from
525 // the stack pointer and make it available in SSA.
526 // FIXME: Handle non-itanium ABI EH models.
528 for (MachineBasicBlock &MBB : MF) {
529 assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
530 assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
531 assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
532 if (!MBB.isEHPad())
533 continue;
534 PS->SSA.AddAvailableValue(
535 &MBB,
536 extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
537 }
538 }
539
541 // If we are going to harden calls and jumps we need to unfold their memory
542 // operands.
543 unfoldCallAndJumpLoads(MF);
544
545 // Then we trace predicate state through the indirect branches.
546 auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
547 CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
548 }
549
550 // Now that we have the predicate state available at the start of each block
551 // in the CFG, trace it through each block, hardening vulnerable instructions
552 // as we go.
553 tracePredStateThroughBlocksAndHarden(MF);
554
555 // Now rewrite all the uses of the pred state using the SSA updater to insert
556 // PHIs connecting the state between blocks along the CFG edges.
557 for (MachineInstr *CMovI : CMovs)
558 for (MachineOperand &Op : CMovI->operands()) {
559 if (!Op.isReg() || Op.getReg() != PS->InitialReg)
560 continue;
561
562 PS->SSA.RewriteUse(Op);
563 }
564
565 return true;
566}
567
568/// Implements the naive hardening approach of putting an LFENCE after every
569/// potentially mis-predicted control flow construct.
570///
571/// We include this as an alternative mostly for the purpose of comparison. The
572/// performance impact of this is expected to be extremely severe and not
573/// practical for any real-world users.
574void X86SpeculativeLoadHardeningImpl::hardenEdgesWithLFENCE(
575 MachineFunction &MF) {
576 // First, we scan the function looking for blocks that are reached along edges
577 // that we might want to harden.
578 SmallSetVector<MachineBasicBlock *, 8> Blocks;
579 for (MachineBasicBlock &MBB : MF) {
580 // If there are no or only one successor, nothing to do here.
581 if (MBB.succ_size() <= 1)
582 continue;
583
584 // Skip blocks unless their terminators start with a branch. Other
585 // terminators don't seem interesting for guarding against misspeculation.
586 auto TermIt = MBB.getFirstTerminator();
587 if (TermIt == MBB.end() || !TermIt->isBranch())
588 continue;
589
590 // Add all the non-EH-pad succossors to the blocks we want to harden. We
591 // skip EH pads because there isn't really a condition of interest on
592 // entering.
593 for (MachineBasicBlock *SuccMBB : MBB.successors())
594 if (!SuccMBB->isEHPad())
595 Blocks.insert(SuccMBB);
596 }
597
598 for (MachineBasicBlock *MBB : Blocks) {
599 auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
600 BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
601 ++NumInstsInserted;
602 ++NumLFENCEsInserted;
603 }
604}
605
607X86SpeculativeLoadHardeningImpl::collectBlockCondInfo(MachineFunction &MF) {
609
610 // Walk the function and build up a summary for each block's conditions that
611 // we need to trace through.
612 for (MachineBasicBlock &MBB : MF) {
613 // If there are no or only one successor, nothing to do here.
614 if (MBB.succ_size() <= 1)
615 continue;
616
617 // We want to reliably handle any conditional branch terminators in the
618 // MBB, so we manually analyze the branch. We can handle all of the
619 // permutations here, including ones that analyze branch cannot.
620 //
621 // The approach is to walk backwards across the terminators, resetting at
622 // any unconditional non-indirect branch, and track all conditional edges
623 // to basic blocks as well as the fallthrough or unconditional successor
624 // edge. For each conditional edge, we track the target and the opposite
625 // condition code in order to inject a "no-op" cmov into that successor
626 // that will harden the predicate. For the fallthrough/unconditional
627 // edge, we inject a separate cmov for each conditional branch with
628 // matching condition codes. This effectively implements an "and" of the
629 // condition flags, even if there isn't a single condition flag that would
630 // directly implement that. We don't bother trying to optimize either of
631 // these cases because if such an optimization is possible, LLVM should
632 // have optimized the conditional *branches* in that way already to reduce
633 // instruction count. This late, we simply assume the minimal number of
634 // branch instructions is being emitted and use that to guide our cmov
635 // insertion.
636
637 BlockCondInfo Info = {&MBB, {}, nullptr};
638
639 // Now walk backwards through the terminators and build up successors they
640 // reach and the conditions.
641 for (MachineInstr &MI : llvm::reverse(MBB)) {
642 // Once we've handled all the terminators, we're done.
643 if (!MI.isTerminator())
644 break;
645
646 // If we see a non-branch terminator, we can't handle anything so bail.
647 if (!MI.isBranch()) {
648 Info.CondBrs.clear();
649 break;
650 }
651
652 // If we see an unconditional branch, reset our state, clear any
653 // fallthrough, and set this is the "else" successor.
654 if (MI.getOpcode() == X86::JMP_1) {
655 Info.CondBrs.clear();
656 Info.UncondBr = &MI;
657 continue;
658 }
659
660 // If we get an invalid condition, we have an indirect branch or some
661 // other unanalyzable "fallthrough" case. We model this as a nullptr for
662 // the destination so we can still guard any conditional successors.
663 // Consider code sequences like:
664 // ```
665 // jCC L1
666 // jmpq *%rax
667 // ```
668 // We still want to harden the edge to `L1`.
670 Info.CondBrs.clear();
671 Info.UncondBr = &MI;
672 continue;
673 }
674
675 // We have a vanilla conditional branch, add it to our list.
676 Info.CondBrs.push_back(&MI);
677 }
678 if (Info.CondBrs.empty()) {
679 ++NumBranchesUntraced;
680 LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
681 MBB.dump());
682 continue;
683 }
684
685 Infos.push_back(Info);
686 }
687
688 return Infos;
689}
690
691/// Trace the predicate state through the CFG, instrumenting each conditional
692/// branch such that misspeculation through an edge will poison the predicate
693/// state.
694///
695/// Returns the list of inserted CMov instructions so that they can have their
696/// uses of the predicate state rewritten into proper SSA form once it is
697/// complete.
699X86SpeculativeLoadHardeningImpl::tracePredStateThroughCFG(
700 MachineFunction &MF, ArrayRef<BlockCondInfo> Infos) {
701 // Collect the inserted cmov instructions so we can rewrite their uses of the
702 // predicate state into SSA form.
704
705 // Now walk all of the basic blocks looking for ones that end in conditional
706 // jumps where we need to update this register along each edge.
707 for (const BlockCondInfo &Info : Infos) {
708 MachineBasicBlock &MBB = *Info.MBB;
709 const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
710 MachineInstr *UncondBr = Info.UncondBr;
711
712 LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
713 << "\n");
714 ++NumCondBranchesTraced;
715
716 // Compute the non-conditional successor as either the target of any
717 // unconditional branch or the layout successor.
718 MachineBasicBlock *UncondSucc =
719 UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
720 ? UncondBr->getOperand(0).getMBB()
721 : nullptr)
722 : &*std::next(MachineFunction::iterator(&MBB));
723
724 // Count how many edges there are to any given successor.
725 SmallDenseMap<MachineBasicBlock *, int> SuccCounts;
726 if (UncondSucc)
727 ++SuccCounts[UncondSucc];
728 for (auto *CondBr : CondBrs)
729 ++SuccCounts[CondBr->getOperand(0).getMBB()];
730
731 // A lambda to insert cmov instructions into a block checking all of the
732 // condition codes in a sequence.
733 auto BuildCheckingBlockForSuccAndConds =
734 [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
735 MachineInstr *Br, MachineInstr *&UncondBr,
737 // First, we split the edge to insert the checking block into a safe
738 // location.
739 auto &CheckingMBB =
740 (SuccCount == 1 && Succ.pred_size() == 1)
741 ? Succ
742 : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
743
744 bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
745 if (!LiveEFLAGS)
746 CheckingMBB.addLiveIn(X86::EFLAGS);
747
748 // Now insert the cmovs to implement the checks.
749 auto InsertPt = CheckingMBB.begin();
750 assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
751 "Should never have a PHI in the initial checking block as it "
752 "always has a single predecessor!");
753
754 // We will wire each cmov to each other, but need to start with the
755 // incoming pred state.
756 Register CurStateReg = PS->InitialReg;
757
758 for (X86::CondCode Cond : Conds) {
759 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
760 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
761
762 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
763 // Note that we intentionally use an empty debug location so that
764 // this picks up the preceding location.
765 auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
766 TII->get(CMovOp), UpdatedStateReg)
767 .addReg(CurStateReg)
768 .addReg(PS->PoisonReg)
769 .addImm(Cond);
770 // If this is the last cmov and the EFLAGS weren't originally
771 // live-in, mark them as killed.
772 if (!LiveEFLAGS && Cond == Conds.back())
773 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
774 ->setIsKill(true);
775
776 ++NumInstsInserted;
777 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
778 dbgs() << "\n");
779
780 // The first one of the cmovs will be using the top level
781 // `PredStateReg` and need to get rewritten into SSA form.
782 if (CurStateReg == PS->InitialReg)
783 CMovs.push_back(&*CMovI);
784
785 // The next cmov should start from this one's def.
786 CurStateReg = UpdatedStateReg;
787 }
788
789 // And put the last one into the available values for SSA form of our
790 // predicate state.
791 PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
792 };
793
794 std::vector<X86::CondCode> UncondCodeSeq;
795 for (auto *CondBr : CondBrs) {
796 MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
797 int &SuccCount = SuccCounts[&Succ];
798
801 UncondCodeSeq.push_back(Cond);
802
803 BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
804 {InvCond});
805
806 // Decrement the successor count now that we've split one of the edges.
807 // We need to keep the count of edges to the successor accurate in order
808 // to know above when to *replace* the successor in the CFG vs. just
809 // adding the new successor.
810 --SuccCount;
811 }
812
813 // Since we may have split edges and changed the number of successors,
814 // normalize the probabilities. This avoids doing it each time we split an
815 // edge.
817
818 // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
819 // need to intersect the other condition codes. We can do this by just
820 // doing a cmov for each one.
821 if (!UncondSucc)
822 // If we have no fallthrough to protect (perhaps it is an indirect jump?)
823 // just skip this and continue.
824 continue;
825
826 assert(SuccCounts[UncondSucc] == 1 &&
827 "We should never have more than one edge to the unconditional "
828 "successor at this point because every other edge must have been "
829 "split above!");
830
831 // Sort and unique the codes to minimize them.
832 llvm::sort(UncondCodeSeq);
833 UncondCodeSeq.erase(llvm::unique(UncondCodeSeq), UncondCodeSeq.end());
834
835 // Build a checking version of the successor.
836 BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
837 UncondBr, UncondBr, UncondCodeSeq);
838 }
839
840 return CMovs;
841}
842
843/// Compute the register class for the unfolded load.
844///
845/// FIXME: This should probably live in X86InstrInfo, potentially by adding
846/// a way to unfold into a newly created vreg rather than requiring a register
847/// input.
848static const TargetRegisterClass *
850 unsigned Index;
851 unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
852 Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
853 const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
854 return TII.getRegClass(MCID, Index);
855}
856
857void X86SpeculativeLoadHardeningImpl::unfoldCallAndJumpLoads(
858 MachineFunction &MF) {
859 for (MachineBasicBlock &MBB : MF)
860 // We use make_early_inc_range here so we can remove instructions if needed
861 // without disturbing the iteration.
862 for (MachineInstr &MI : llvm::make_early_inc_range(MBB.instrs())) {
863 // Must either be a call or a branch.
864 if (!MI.isCall() && !MI.isBranch())
865 continue;
866 // We only care about loading variants of these instructions.
867 if (!MI.mayLoad())
868 continue;
869
870 switch (MI.getOpcode()) {
871 default: {
873 dbgs() << "ERROR: Found an unexpected loading branch or call "
874 "instruction:\n";
875 MI.dump(); dbgs() << "\n");
876 report_fatal_error("Unexpected loading branch or call!");
877 }
878
879 case X86::FARCALL16m:
880 case X86::FARCALL32m:
881 case X86::FARCALL64m:
882 case X86::FARJMP16m:
883 case X86::FARJMP32m:
884 case X86::FARJMP64m:
885 // We cannot mitigate far jumps or calls, but we also don't expect them
886 // to be vulnerable to Spectre v1.2 style attacks.
887 continue;
888
889 case X86::CALL16m:
890 case X86::CALL16m_NT:
891 case X86::CALL32m:
892 case X86::CALL32m_NT:
893 case X86::CALL64m:
894 case X86::CALL64m_NT:
895 case X86::JMP16m:
896 case X86::JMP16m_NT:
897 case X86::JMP32m:
898 case X86::JMP32m_NT:
899 case X86::JMP64m:
900 case X86::JMP64m_NT:
901 case X86::TAILJMPm64:
902 case X86::TAILJMPm64_REX:
903 case X86::TAILJMPm:
904 case X86::TCRETURNmi64:
905 case X86::TCRETURN_WINmi64:
906 case X86::TCRETURNmi: {
907 // Use the generic unfold logic now that we know we're dealing with
908 // expected instructions.
909 // FIXME: We don't have test coverage for all of these!
910 auto *UnfoldedRC = getRegClassForUnfoldedLoad(*TII, MI.getOpcode());
911 if (!UnfoldedRC) {
913 << "ERROR: Unable to unfold load from instruction:\n";
914 MI.dump(); dbgs() << "\n");
915 report_fatal_error("Unable to unfold load!");
916 }
917 Register Reg = MRI->createVirtualRegister(UnfoldedRC);
918 SmallVector<MachineInstr *, 2> NewMIs;
919 // If we were able to compute an unfolded reg class, any failure here
920 // is just a programming error so just assert.
921 bool Unfolded =
922 TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
923 /*UnfoldStore*/ false, NewMIs);
924 (void)Unfolded;
925 assert(Unfolded &&
926 "Computed unfolded register class but failed to unfold");
927 // Now stitch the new instructions into place and erase the old one.
928 for (auto *NewMI : NewMIs)
929 MBB.insert(MI.getIterator(), NewMI);
930
931 // Update the call info.
932 if (MI.isCandidateForAdditionalCallInfo())
933 MF.eraseAdditionalCallInfo(&MI);
934
935 MI.eraseFromParent();
936 LLVM_DEBUG({
937 dbgs() << "Unfolded load successfully into:\n";
938 for (auto *NewMI : NewMIs) {
939 NewMI->dump();
940 dbgs() << "\n";
941 }
942 });
943 continue;
944 }
945 }
946 llvm_unreachable("Escaped switch with default!");
947 }
948}
949
950/// Trace the predicate state through indirect branches, instrumenting them to
951/// poison the state if a target is reached that does not match the expected
952/// target.
953///
954/// This is designed to mitigate Spectre variant 1 attacks where an indirect
955/// branch is trained to predict a particular target and then mispredicts that
956/// target in a way that can leak data. Despite using an indirect branch, this
957/// is really a variant 1 style attack: it does not steer execution to an
958/// arbitrary or attacker controlled address, and it does not require any
959/// special code executing next to the victim. This attack can also be mitigated
960/// through retpolines, but those require either replacing indirect branches
961/// with conditional direct branches or lowering them through a device that
962/// blocks speculation. This mitigation can replace these retpoline-style
963/// mitigations for jump tables and other indirect branches within a function
964/// when variant 2 isn't a risk while allowing limited speculation. Indirect
965/// calls, however, cannot be mitigated through this technique without changing
966/// the ABI in a fundamental way.
968X86SpeculativeLoadHardeningImpl::tracePredStateThroughIndirectBranches(
969 MachineFunction &MF) {
970 // We use the SSAUpdater to insert PHI nodes for the target addresses of
971 // indirect branches. We don't actually need the full power of the SSA updater
972 // in this particular case as we always have immediately available values, but
973 // this avoids us having to re-implement the PHI construction logic.
974 MachineSSAUpdater TargetAddrSSA(MF);
975 TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
976
977 // Track which blocks were terminated with an indirect branch.
978 SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
979
980 // We need to know what blocks end up reached via indirect branches. We
981 // expect this to be a subset of those whose address is taken and so track it
982 // directly via the CFG.
983 SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
984
985 // Walk all the blocks which end in an indirect branch and make the
986 // target address available.
987 for (MachineBasicBlock &MBB : MF) {
988 // Find the last terminator.
989 auto MII = MBB.instr_rbegin();
990 while (MII != MBB.instr_rend() && MII->isDebugInstr())
991 ++MII;
992 if (MII == MBB.instr_rend())
993 continue;
994 MachineInstr &TI = *MII;
995 if (!TI.isTerminator() || !TI.isBranch())
996 // No terminator or non-branch terminator.
997 continue;
998
999 Register TargetReg;
1000
1001 switch (TI.getOpcode()) {
1002 default:
1003 // Direct branch or conditional branch (leading to fallthrough).
1004 continue;
1005
1006 case X86::FARJMP16m:
1007 case X86::FARJMP32m:
1008 case X86::FARJMP64m:
1009 // We cannot mitigate far jumps or calls, but we also don't expect them
1010 // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1011 continue;
1012
1013 case X86::JMP16m:
1014 case X86::JMP16m_NT:
1015 case X86::JMP32m:
1016 case X86::JMP32m_NT:
1017 case X86::JMP64m:
1018 case X86::JMP64m_NT:
1019 // Mostly as documentation.
1020 report_fatal_error("Memory operand jumps should have been unfolded!");
1021
1022 case X86::JMP16r:
1024 "Support for 16-bit indirect branches is not implemented.");
1025 case X86::JMP32r:
1027 "Support for 32-bit indirect branches is not implemented.");
1028
1029 case X86::JMP64r:
1030 TargetReg = TI.getOperand(0).getReg();
1031 }
1032
1033 // We have definitely found an indirect branch. Verify that there are no
1034 // preceding conditional branches as we don't yet support that.
1035 if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1036 return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1037 })) {
1038 LLVM_DEBUG({
1039 dbgs() << "ERROR: Found other terminators in a block with an indirect "
1040 "branch! This is not yet supported! Terminator sequence:\n";
1041 for (MachineInstr &MI : MBB.terminators()) {
1042 MI.dump();
1043 dbgs() << '\n';
1044 }
1045 });
1046 report_fatal_error("Unimplemented terminator sequence!");
1047 }
1048
1049 // Make the target register an available value for this block.
1050 TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1051 IndirectTerminatedMBBs.insert(&MBB);
1052
1053 // Add all the successors to our target candidates.
1054 IndirectTargetMBBs.insert_range(MBB.successors());
1055 }
1056
1057 // Keep track of the cmov instructions we insert so we can return them.
1059
1060 // If we didn't find any indirect branches with targets, nothing to do here.
1061 if (IndirectTargetMBBs.empty())
1062 return CMovs;
1063
1064 // We found indirect branches and targets that need to be instrumented to
1065 // harden loads within them. Walk the blocks of the function (to get a stable
1066 // ordering) and instrument each target of an indirect branch.
1067 for (MachineBasicBlock &MBB : MF) {
1068 // Skip the blocks that aren't candidate targets.
1069 if (!IndirectTargetMBBs.count(&MBB))
1070 continue;
1071
1072 // We don't expect EH pads to ever be reached via an indirect branch. If
1073 // this is desired for some reason, we could simply skip them here rather
1074 // than asserting.
1075 assert(!MBB.isEHPad() &&
1076 "Unexpected EH pad as target of an indirect branch!");
1077
1078 // We should never end up threading EFLAGS into a block to harden
1079 // conditional jumps as there would be an additional successor via the
1080 // indirect branch. As a consequence, all such edges would be split before
1081 // reaching here, and the inserted block will handle the EFLAGS-based
1082 // hardening.
1083 assert(!MBB.isLiveIn(X86::EFLAGS) &&
1084 "Cannot check within a block that already has live-in EFLAGS!");
1085
1086 // We can't handle having non-indirect edges into this block unless this is
1087 // the only successor and we can synthesize the necessary target address.
1088 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1089 // If we've already handled this by extracting the target directly,
1090 // nothing to do.
1091 if (IndirectTerminatedMBBs.count(Pred))
1092 continue;
1093
1094 // Otherwise, we have to be the only successor. We generally expect this
1095 // to be true as conditional branches should have had a critical edge
1096 // split already. We don't however need to worry about EH pad successors
1097 // as they'll happily ignore the target and their hardening strategy is
1098 // resilient to all ways in which they could be reached speculatively.
1099 if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1100 return Succ->isEHPad() || Succ == &MBB;
1101 })) {
1102 LLVM_DEBUG({
1103 dbgs() << "ERROR: Found conditional entry to target of indirect "
1104 "branch!\n";
1105 Pred->dump();
1106 MBB.dump();
1107 });
1108 report_fatal_error("Cannot harden a conditional entry to a target of "
1109 "an indirect branch!");
1110 }
1111
1112 // Now we need to compute the address of this block and install it as a
1113 // synthetic target in the predecessor. We do this at the bottom of the
1114 // predecessor.
1115 auto InsertPt = Pred->getFirstTerminator();
1116 Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1117 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1118 !Subtarget->isPositionIndependent()) {
1119 // Directly materialize it into an immediate.
1120 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1121 TII->get(X86::MOV64ri32), TargetReg)
1122 .addMBB(&MBB);
1123 ++NumInstsInserted;
1124 (void)AddrI;
1125 LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1126 dbgs() << "\n");
1127 } else {
1128 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1129 TargetReg)
1130 .addReg(/*Base*/ X86::RIP)
1131 .addImm(/*Scale*/ 1)
1132 .addReg(/*Index*/ 0)
1133 .addMBB(&MBB)
1134 .addReg(/*Segment*/ 0);
1135 ++NumInstsInserted;
1136 (void)AddrI;
1137 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1138 dbgs() << "\n");
1139 }
1140 // And make this available.
1141 TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1142 }
1143
1144 // Materialize the needed SSA value of the target. Note that we need the
1145 // middle of the block as this block might at the bottom have an indirect
1146 // branch back to itself. We can do this here because at this point, every
1147 // predecessor of this block has an available value. This is basically just
1148 // automating the construction of a PHI node for this target.
1149 Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1150
1151 // Insert a comparison of the incoming target register with this block's
1152 // address. This also requires us to mark the block as having its address
1153 // taken explicitly.
1155 auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1156 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1157 !Subtarget->isPositionIndependent()) {
1158 // Check directly against a relocated immediate when we can.
1159 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1160 .addReg(TargetReg, RegState::Kill)
1161 .addMBB(&MBB);
1162 ++NumInstsInserted;
1163 (void)CheckI;
1164 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1165 } else {
1166 // Otherwise compute the address into a register first.
1167 Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1168 auto AddrI =
1169 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1170 .addReg(/*Base*/ X86::RIP)
1171 .addImm(/*Scale*/ 1)
1172 .addReg(/*Index*/ 0)
1173 .addMBB(&MBB)
1174 .addReg(/*Segment*/ 0);
1175 ++NumInstsInserted;
1176 (void)AddrI;
1177 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1178 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1179 .addReg(TargetReg, RegState::Kill)
1180 .addReg(AddrReg, RegState::Kill);
1181 ++NumInstsInserted;
1182 (void)CheckI;
1183 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1184 }
1185
1186 // Now cmov over the predicate if the comparison wasn't equal.
1187 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1188 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1189 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1190 auto CMovI =
1191 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1192 .addReg(PS->InitialReg)
1193 .addReg(PS->PoisonReg)
1195 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
1196 ->setIsKill(true);
1197 ++NumInstsInserted;
1198 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1199 CMovs.push_back(&*CMovI);
1200
1201 // And put the new value into the available values for SSA form of our
1202 // predicate state.
1203 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1204 }
1205
1206 // Return all the newly inserted cmov instructions of the predicate state.
1207 return CMovs;
1208}
1209
1210// Returns true if the MI has EFLAGS as a register def operand and it's live,
1211// otherwise it returns false
1212static bool isEFLAGSDefLive(const MachineInstr &MI) {
1213 if (const MachineOperand *DefOp =
1214 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1215 return !DefOp->isDead();
1216 }
1217 return false;
1218}
1219
1221 const TargetRegisterInfo &TRI) {
1222 // Check if EFLAGS are alive by seeing if there is a def of them or they
1223 // live-in, and then seeing if that def is in turn used.
1224 for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) {
1225 if (MachineOperand *DefOp =
1226 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1227 // If the def is dead, then EFLAGS is not live.
1228 if (DefOp->isDead())
1229 return false;
1230
1231 // Otherwise we've def'ed it, and it is live.
1232 return true;
1233 }
1234 // While at this instruction, also check if we use and kill EFLAGS
1235 // which means it isn't live.
1236 if (MI.killsRegister(X86::EFLAGS, &TRI))
1237 return false;
1238 }
1239
1240 // If we didn't find anything conclusive (neither definitely alive or
1241 // definitely dead) return whether it lives into the block.
1242 return MBB.isLiveIn(X86::EFLAGS);
1243}
1244
1245/// Trace the predicate state through each of the blocks in the function,
1246/// hardening everything necessary along the way.
1247///
1248/// We call this routine once the initial predicate state has been established
1249/// for each basic block in the function in the SSA updater. This routine traces
1250/// it through the instructions within each basic block, and for non-returning
1251/// blocks informs the SSA updater about the final state that lives out of the
1252/// block. Along the way, it hardens any vulnerable instruction using the
1253/// currently valid predicate state. We have to do these two things together
1254/// because the SSA updater only works across blocks. Within a block, we track
1255/// the current predicate state directly and update it as it changes.
1256///
1257/// This operates in two passes over each block. First, we analyze the loads in
1258/// the block to determine which strategy will be used to harden them: hardening
1259/// the address or hardening the loaded value when loaded into a register
1260/// amenable to hardening. We have to process these first because the two
1261/// strategies may interact -- later hardening may change what strategy we wish
1262/// to use. We also will analyze data dependencies between loads and avoid
1263/// hardening those loads that are data dependent on a load with a hardened
1264/// address. We also skip hardening loads already behind an LFENCE as that is
1265/// sufficient to harden them against misspeculation.
1266///
1267/// Second, we actively trace the predicate state through the block, applying
1268/// the hardening steps we determined necessary in the first pass as we go.
1269///
1270/// These two passes are applied to each basic block. We operate one block at a
1271/// time to simplify reasoning about reachability and sequencing.
1272void X86SpeculativeLoadHardeningImpl::tracePredStateThroughBlocksAndHarden(
1273 MachineFunction &MF) {
1274 SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1275 SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1276
1277 SmallSet<Register, 16> HardenedAddrRegs;
1278
1279 SmallDenseMap<Register, Register, 32> AddrRegToHardenedReg;
1280
1281 // Track the set of load-dependent registers through the basic block. Because
1282 // the values of these registers have an existing data dependency on a loaded
1283 // value which we would have checked, we can omit any checks on them.
1284 SparseBitVector<> LoadDepRegs;
1285
1286 for (MachineBasicBlock &MBB : MF) {
1287 // The first pass over the block: collect all the loads which can have their
1288 // loaded value hardened and all the loads that instead need their address
1289 // hardened. During this walk we propagate load dependence for address
1290 // hardened loads and also look for LFENCE to stop hardening wherever
1291 // possible. When deciding whether or not to harden the loaded value or not,
1292 // we check to see if any registers used in the address will have been
1293 // hardened at this point and if so, harden any remaining address registers
1294 // as that often successfully re-uses hardened addresses and minimizes
1295 // instructions.
1296 //
1297 // FIXME: We should consider an aggressive mode where we continue to keep as
1298 // many loads value hardened even when some address register hardening would
1299 // be free (due to reuse).
1300 //
1301 // Note that we only need this pass if we are actually hardening loads.
1302 if (HardenLoads)
1303 for (MachineInstr &MI : MBB) {
1304 // We naively assume that all def'ed registers of an instruction have
1305 // a data dependency on all of their operands.
1306 // FIXME: Do a more careful analysis of x86 to build a conservative
1307 // model here.
1308 if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1309 return Op.isReg() && LoadDepRegs.test(Op.getReg().id());
1310 }))
1311 for (MachineOperand &Def : MI.defs())
1312 if (Def.isReg())
1313 LoadDepRegs.set(Def.getReg().id());
1314
1315 // Both Intel and AMD are guiding that they will change the semantics of
1316 // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1317 // no more need to guard things in this block.
1318 if (MI.getOpcode() == X86::LFENCE)
1319 break;
1320
1321 // If this instruction cannot load, nothing to do.
1322 if (!MI.mayLoad())
1323 continue;
1324
1325 // Some instructions which "load" are trivially safe or unimportant.
1326 if (MI.getOpcode() == X86::MFENCE)
1327 continue;
1328
1329 // Extract the memory operand information about this instruction.
1330 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1331 if (MemRefBeginIdx < 0) {
1333 << "WARNING: unable to harden loading instruction: ";
1334 MI.dump());
1335 continue;
1336 }
1337
1338 MachineOperand &BaseMO =
1339 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1340 MachineOperand &IndexMO =
1341 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1342
1343 // If we have at least one (non-frame-index, non-RIP) register operand,
1344 // and neither operand is load-dependent, we need to check the load.
1345 Register BaseReg, IndexReg;
1346 if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1347 BaseMO.getReg().isValid())
1348 BaseReg = BaseMO.getReg();
1349 if (IndexMO.getReg().isValid())
1350 IndexReg = IndexMO.getReg();
1351
1352 if (!BaseReg && !IndexReg)
1353 // No register operands!
1354 continue;
1355
1356 // If any register operand is dependent, this load is dependent and we
1357 // needn't check it.
1358 // FIXME: Is this true in the case where we are hardening loads after
1359 // they complete? Unclear, need to investigate.
1360 if ((BaseReg && LoadDepRegs.test(BaseReg.id())) ||
1361 (IndexReg && LoadDepRegs.test(IndexReg.id())))
1362 continue;
1363
1364 // If post-load hardening is enabled, this load is compatible with
1365 // post-load hardening, and we aren't already going to harden one of the
1366 // address registers, queue it up to be hardened post-load. Notably,
1367 // even once hardened this won't introduce a useful dependency that
1368 // could prune out subsequent loads.
1370 !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1371 MI.getOperand(0).isReg() &&
1372 canHardenRegister(MI.getOperand(0).getReg()) &&
1373 !HardenedAddrRegs.count(BaseReg) &&
1374 !HardenedAddrRegs.count(IndexReg)) {
1375 HardenPostLoad.insert(&MI);
1376 HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1377 continue;
1378 }
1379
1380 // Record this instruction for address hardening and record its register
1381 // operands as being address-hardened.
1382 HardenLoadAddr.insert(&MI);
1383 if (BaseReg)
1384 HardenedAddrRegs.insert(BaseReg);
1385 if (IndexReg)
1386 HardenedAddrRegs.insert(IndexReg);
1387
1388 for (MachineOperand &Def : MI.defs())
1389 if (Def.isReg())
1390 LoadDepRegs.set(Def.getReg().id());
1391 }
1392
1393 // Now re-walk the instructions in the basic block, and apply whichever
1394 // hardening strategy we have elected. Note that we do this in a second
1395 // pass specifically so that we have the complete set of instructions for
1396 // which we will do post-load hardening and can defer it in certain
1397 // circumstances.
1398 for (MachineInstr &MI : MBB) {
1399 if (HardenLoads) {
1400 // We cannot both require hardening the def of a load and its address.
1401 assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1402 "Requested to harden both the address and def of a load!");
1403
1404 // Check if this is a load whose address needs to be hardened.
1405 if (HardenLoadAddr.erase(&MI)) {
1406 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1407 assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1408
1409 MachineOperand &BaseMO =
1410 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1411 MachineOperand &IndexMO =
1412 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1413 hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1414 continue;
1415 }
1416
1417 // Test if this instruction is one of our post load instructions (and
1418 // remove it from the set if so).
1419 if (HardenPostLoad.erase(&MI)) {
1420 assert(!MI.isCall() && "Must not try to post-load harden a call!");
1421
1422 // If this is a data-invariant load and there is no EFLAGS
1423 // interference, we want to try and sink any hardening as far as
1424 // possible.
1426 // Sink the instruction we'll need to harden as far as we can down
1427 // the graph.
1428 MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1429
1430 // If we managed to sink this instruction, update everything so we
1431 // harden that instruction when we reach it in the instruction
1432 // sequence.
1433 if (SunkMI != &MI) {
1434 // If in sinking there was no instruction needing to be hardened,
1435 // we're done.
1436 if (!SunkMI)
1437 continue;
1438
1439 // Otherwise, add this to the set of defs we harden.
1440 HardenPostLoad.insert(SunkMI);
1441 continue;
1442 }
1443 }
1444
1445 Register HardenedReg = hardenPostLoad(MI);
1446
1447 // Mark the resulting hardened register as such so we don't re-harden.
1448 AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1449
1450 continue;
1451 }
1452
1453 // Check for an indirect call or branch that may need its input hardened
1454 // even if we couldn't find the specific load used, or were able to
1455 // avoid hardening it for some reason. Note that here we cannot break
1456 // out afterward as we may still need to handle any call aspect of this
1457 // instruction.
1458 if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1459 hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1460 }
1461
1462 // After we finish hardening loads we handle interprocedural hardening if
1463 // enabled and relevant for this instruction.
1465 continue;
1466 if (!MI.isCall() && !MI.isReturn())
1467 continue;
1468
1469 // If this is a direct return (IE, not a tail call) just directly harden
1470 // it.
1471 if (MI.isReturn() && !MI.isCall()) {
1472 hardenReturnInstr(MI);
1473 continue;
1474 }
1475
1476 // Otherwise we have a call. We need to handle transferring the predicate
1477 // state into a call and recovering it after the call returns (unless this
1478 // is a tail call).
1479 assert(MI.isCall() && "Should only reach here for calls!");
1480 tracePredStateThroughCall(MI);
1481 }
1482
1483 HardenPostLoad.clear();
1484 HardenLoadAddr.clear();
1485 HardenedAddrRegs.clear();
1486 AddrRegToHardenedReg.clear();
1487
1488 // Currently, we only track data-dependent loads within a basic block.
1489 // FIXME: We should see if this is necessary or if we could be more
1490 // aggressive here without opening up attack avenues.
1491 LoadDepRegs.clear();
1492 }
1493}
1494
1495/// Save EFLAGS into the returned GPR. This can in turn be restored with
1496/// `restoreEFLAGS`.
1497///
1498/// Note that LLVM can only lower very simple patterns of saved and restored
1499/// EFLAGS registers. The restore should always be within the same basic block
1500/// as the save so that no PHI nodes are inserted.
1501Register X86SpeculativeLoadHardeningImpl::saveEFLAGS(
1502 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1503 const DebugLoc &Loc) {
1504 // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1505 // what instruction selection does.
1506 Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1507 // We directly copy the FLAGS register and rely on later lowering to clean
1508 // this up into the appropriate setCC instructions.
1509 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1510 ++NumInstsInserted;
1511 return Reg;
1512}
1513
1514/// Restore EFLAGS from the provided GPR. This should be produced by
1515/// `saveEFLAGS`.
1516///
1517/// This must be done within the same basic block as the save in order to
1518/// reliably lower.
1519void X86SpeculativeLoadHardeningImpl::restoreEFLAGS(
1520 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1521 const DebugLoc &Loc, Register Reg) {
1522 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1523 ++NumInstsInserted;
1524}
1525
1526/// Takes the current predicate state (in a register) and merges it into the
1527/// stack pointer. The state is essentially a single bit, but we merge this in
1528/// a way that won't form non-canonical pointers and also will be preserved
1529/// across normal stack adjustments.
1530void X86SpeculativeLoadHardeningImpl::mergePredStateIntoSP(
1531 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1532 const DebugLoc &Loc, Register PredStateReg) {
1533 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1534 // FIXME: This hard codes a shift distance based on the number of bits needed
1535 // to stay canonical on 64-bit. We should compute this somehow and support
1536 // 32-bit as part of that.
1537 auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1538 .addReg(PredStateReg, RegState::Kill)
1539 .addImm(47);
1540 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1541 ++NumInstsInserted;
1542 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1543 .addReg(X86::RSP)
1544 .addReg(TmpReg, RegState::Kill);
1545 OrI->addRegisterDead(X86::EFLAGS, TRI);
1546 ++NumInstsInserted;
1547}
1548
1549/// Extracts the predicate state stored in the high bits of the stack pointer.
1550Register X86SpeculativeLoadHardeningImpl::extractPredStateFromSP(
1551 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1552 const DebugLoc &Loc) {
1553 Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1554 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1555
1556 // We know that the stack pointer will have any preserved predicate state in
1557 // its high bit. We just want to smear this across the other bits. Turns out,
1558 // this is exactly what an arithmetic right shift does.
1559 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1560 .addReg(X86::RSP);
1561 auto ShiftI =
1562 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1563 .addReg(TmpReg, RegState::Kill)
1564 .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1565 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1566 ++NumInstsInserted;
1567
1568 return PredStateReg;
1569}
1570
1571void X86SpeculativeLoadHardeningImpl::hardenLoadAddr(
1572 MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1573 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg) {
1574 MachineBasicBlock &MBB = *MI.getParent();
1575 const DebugLoc &Loc = MI.getDebugLoc();
1576
1577 // Check if EFLAGS are alive by seeing if there is a def of them or they
1578 // live-in, and then seeing if that def is in turn used.
1579 bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1580
1582
1583 if (BaseMO.isFI()) {
1584 // A frame index is never a dynamically controllable load, so only
1585 // harden it if we're covering fixed address loads as well.
1586 LLVM_DEBUG(
1587 dbgs() << " Skipping hardening base of explicit stack frame load: ";
1588 MI.dump(); dbgs() << "\n");
1589 } else if (BaseMO.getReg() == X86::RSP) {
1590 // Some idempotent atomic operations are lowered directly to a locked
1591 // OR with 0 to the top of stack(or slightly offset from top) which uses an
1592 // explicit RSP register as the base.
1593 assert(IndexMO.getReg() == X86::NoRegister &&
1594 "Explicit RSP access with dynamic index!");
1595 LLVM_DEBUG(
1596 dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1597 } else if (BaseMO.getReg() == X86::RIP ||
1598 BaseMO.getReg() == X86::NoRegister) {
1599 // For both RIP-relative addressed loads or absolute loads, we cannot
1600 // meaningfully harden them because the address being loaded has no
1601 // dynamic component.
1602 //
1603 // FIXME: When using a segment base (like TLS does) we end up with the
1604 // dynamic address being the base plus -1 because we can't mutate the
1605 // segment register here. This allows the signed 32-bit offset to point at
1606 // valid segment-relative addresses and load them successfully.
1607 LLVM_DEBUG(
1608 dbgs() << " Cannot harden base of "
1609 << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1610 << " address in a load!");
1611 } else {
1612 assert(BaseMO.isReg() &&
1613 "Only allowed to have a frame index or register base.");
1614 HardenOpRegs.push_back(&BaseMO);
1615 }
1616
1617 if (IndexMO.getReg() != X86::NoRegister &&
1618 (HardenOpRegs.empty() ||
1619 HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1620 HardenOpRegs.push_back(&IndexMO);
1621
1622 assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1623 "Should have exactly one or two registers to harden!");
1624 assert((HardenOpRegs.size() == 1 ||
1625 HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1626 "Should not have two of the same registers!");
1627
1628 // Remove any registers that have alreaded been checked.
1629 llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1630 // See if this operand's register has already been checked.
1631 auto It = AddrRegToHardenedReg.find(Op->getReg());
1632 if (It == AddrRegToHardenedReg.end())
1633 // Not checked, so retain this one.
1634 return false;
1635
1636 // Otherwise, we can directly update this operand and remove it.
1637 Op->setReg(It->second);
1638 return true;
1639 });
1640 // If there are none left, we're done.
1641 if (HardenOpRegs.empty())
1642 return;
1643
1644 // Compute the current predicate state.
1645 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1646
1647 auto InsertPt = MI.getIterator();
1648
1649 // If EFLAGS are live and we don't have access to instructions that avoid
1650 // clobbering EFLAGS we need to save and restore them. This in turn makes
1651 // the EFLAGS no longer live.
1652 Register FlagsReg;
1653 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1654 EFLAGSLive = false;
1655 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1656 }
1657
1658 for (MachineOperand *Op : HardenOpRegs) {
1659 Register OpReg = Op->getReg();
1660 auto *OpRC = MRI->getRegClass(OpReg);
1661 Register TmpReg = MRI->createVirtualRegister(OpRC);
1662
1663 // If this is a vector register, we'll need somewhat custom logic to handle
1664 // hardening it.
1665 if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1666 OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1667 assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1668 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1669
1670 // Move our state into a vector register.
1671 // FIXME: We could skip this at the cost of longer encodings with AVX-512
1672 // but that doesn't seem likely worth it.
1673 Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1674 auto MovI =
1675 BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1676 .addReg(StateReg);
1677 (void)MovI;
1678 ++NumInstsInserted;
1679 LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1680
1681 // Broadcast it across the vector register.
1682 Register VBStateReg = MRI->createVirtualRegister(OpRC);
1683 auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1684 TII->get(Is128Bit ? X86::VPBROADCASTQrr
1685 : X86::VPBROADCASTQYrr),
1686 VBStateReg)
1687 .addReg(VStateReg);
1688 (void)BroadcastI;
1689 ++NumInstsInserted;
1690 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1691 dbgs() << "\n");
1692
1693 // Merge our potential poison state into the value with a vector or.
1694 auto OrI =
1695 BuildMI(MBB, InsertPt, Loc,
1696 TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1697 .addReg(VBStateReg)
1698 .addReg(OpReg);
1699 (void)OrI;
1700 ++NumInstsInserted;
1701 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1702 } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1703 OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1704 OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1705 assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1706 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1707 bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1708 if (Is128Bit || Is256Bit)
1709 assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1710
1711 // Broadcast our state into a vector register.
1712 Register VStateReg = MRI->createVirtualRegister(OpRC);
1713 unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1714 : Is256Bit ? X86::VPBROADCASTQrZ256rr
1715 : X86::VPBROADCASTQrZrr;
1716 auto BroadcastI =
1717 BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1718 .addReg(StateReg);
1719 (void)BroadcastI;
1720 ++NumInstsInserted;
1721 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1722 dbgs() << "\n");
1723
1724 // Merge our potential poison state into the value with a vector or.
1725 unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1726 : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1727 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1728 .addReg(VStateReg)
1729 .addReg(OpReg);
1730 (void)OrI;
1731 ++NumInstsInserted;
1732 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1733 } else {
1734 // FIXME: Need to support GR32 here for 32-bit code.
1735 assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1736 "Not a supported register class for address hardening!");
1737
1738 if (!EFLAGSLive) {
1739 // Merge our potential poison state into the value with an or.
1740 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1741 .addReg(StateReg)
1742 .addReg(OpReg);
1743 OrI->addRegisterDead(X86::EFLAGS, TRI);
1744 ++NumInstsInserted;
1745 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1746 } else {
1747 // We need to avoid touching EFLAGS so shift out all but the least
1748 // significant bit using the instruction that doesn't update flags.
1749 auto ShiftI =
1750 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1751 .addReg(OpReg)
1752 .addReg(StateReg);
1753 (void)ShiftI;
1754 ++NumInstsInserted;
1755 LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1756 dbgs() << "\n");
1757 }
1758 }
1759
1760 // Record this register as checked and update the operand.
1761 assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1762 "Should not have checked this register yet!");
1763 AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1764 Op->setReg(TmpReg);
1765 ++NumAddrRegsHardened;
1766 }
1767
1768 // And restore the flags if needed.
1769 if (FlagsReg)
1770 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1771}
1772
1773MachineInstr *X86SpeculativeLoadHardeningImpl::sinkPostLoadHardenedInst(
1774 MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1776 "Cannot get here with a non-invariant load!");
1777 assert(!isEFLAGSDefLive(InitialMI) &&
1778 "Cannot get here with a data invariant load "
1779 "that interferes with EFLAGS!");
1780
1781 // See if we can sink hardening the loaded value.
1782 auto SinkCheckToSingleUse =
1783 [&](MachineInstr &MI) -> std::optional<MachineInstr *> {
1784 Register DefReg = MI.getOperand(0).getReg();
1785
1786 // We need to find a single use which we can sink the check. We can
1787 // primarily do this because many uses may already end up checked on their
1788 // own.
1789 MachineInstr *SingleUseMI = nullptr;
1790 for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1791 // If we're already going to harden this use, it is data invariant, it
1792 // does not interfere with EFLAGS, and within our block.
1793 if (HardenedInstrs.count(&UseMI)) {
1795 // If we've already decided to harden a non-load, we must have sunk
1796 // some other post-load hardened instruction to it and it must itself
1797 // be data-invariant.
1799 "Data variant instruction being hardened!");
1800 continue;
1801 }
1802
1803 // Otherwise, this is a load and the load component can't be data
1804 // invariant so check how this register is being used.
1805 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(UseMI);
1806 assert(MemRefBeginIdx >= 0 &&
1807 "Should always have mem references here!");
1808
1809 MachineOperand &BaseMO =
1810 UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1811 MachineOperand &IndexMO =
1812 UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1813 if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1814 (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1815 // The load uses the register as part of its address making it not
1816 // invariant.
1817 return {};
1818
1819 continue;
1820 }
1821
1822 if (SingleUseMI)
1823 // We already have a single use, this would make two. Bail.
1824 return {};
1825
1826 // If this single use isn't data invariant, isn't in this block, or has
1827 // interfering EFLAGS, we can't sink the hardening to it.
1828 if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1830 return {};
1831
1832 // If this instruction defines multiple registers bail as we won't harden
1833 // all of them.
1834 if (UseMI.getDesc().getNumDefs() > 1)
1835 return {};
1836
1837 // If this register isn't a virtual register we can't walk uses of sanely,
1838 // just bail. Also check that its register class is one of the ones we
1839 // can harden.
1840 Register UseDefReg = UseMI.getOperand(0).getReg();
1841 if (!canHardenRegister(UseDefReg))
1842 return {};
1843
1844 SingleUseMI = &UseMI;
1845 }
1846
1847 // If SingleUseMI is still null, there is no use that needs its own
1848 // checking. Otherwise, it is the single use that needs checking.
1849 return {SingleUseMI};
1850 };
1851
1852 MachineInstr *MI = &InitialMI;
1853 while (std::optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1854 // Update which MI we're checking now.
1855 MI = *SingleUse;
1856 if (!MI)
1857 break;
1858 }
1859
1860 return MI;
1861}
1862
1863bool X86SpeculativeLoadHardeningImpl::canHardenRegister(Register Reg) {
1864 // We only support hardening virtual registers.
1865 if (!Reg.isVirtual())
1866 return false;
1867
1868 auto *RC = MRI->getRegClass(Reg);
1869 int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1870 if (RegBytes > 8)
1871 // We don't support post-load hardening of vectors.
1872 return false;
1873
1874 unsigned RegIdx = Log2_32(RegBytes);
1875 assert(RegIdx < 4 && "Unsupported register size");
1876
1877 // If this register class is explicitly constrained to a class that doesn't
1878 // require REX prefix, we may not be able to satisfy that constraint when
1879 // emitting the hardening instructions, so bail out here.
1880 // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1881 // end up both with a NOREX and REX-only register as operands to the hardening
1882 // instructions. It would be better to fix that code to handle this situation
1883 // rather than hack around it in this way.
1884 const TargetRegisterClass *NOREXRegClasses[] = {
1885 &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1886 &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1887 if (RC == NOREXRegClasses[RegIdx])
1888 return false;
1889
1890 const TargetRegisterClass *GPRRegClasses[] = {
1891 &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1892 &X86::GR64RegClass};
1893 return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1894}
1895
1896/// Harden a value in a register.
1897///
1898/// This is the low-level logic to fully harden a value sitting in a register
1899/// against leaking during speculative execution.
1900///
1901/// Unlike hardening an address that is used by a load, this routine is required
1902/// to hide *all* incoming bits in the register.
1903///
1904/// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1905/// larger than the predicate state register. FIXME: We should support vector
1906/// registers here by broadcasting the predicate state.
1907///
1908/// The new, hardened virtual register is returned. It will have the same
1909/// register class as `Reg`.
1910Register X86SpeculativeLoadHardeningImpl::hardenValueInRegister(
1911 Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1912 const DebugLoc &Loc) {
1913 assert(canHardenRegister(Reg) && "Cannot harden this register!");
1914
1915 auto *RC = MRI->getRegClass(Reg);
1916 int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1917 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1918 assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
1919 "Unknown register size");
1920
1921 // FIXME: Need to teach this about 32-bit mode.
1922 if (Bytes != 8) {
1923 unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1924 unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1925 Register NarrowStateReg = MRI->createVirtualRegister(RC);
1926 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1927 .addReg(StateReg, {}, SubRegImm);
1928 StateReg = NarrowStateReg;
1929 }
1930
1931 Register FlagsReg;
1932 if (isEFLAGSLive(MBB, InsertPt, *TRI))
1933 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1934
1935 Register NewReg = MRI->createVirtualRegister(RC);
1936 unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1937 unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1938 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1939 .addReg(StateReg)
1940 .addReg(Reg);
1941 OrI->addRegisterDead(X86::EFLAGS, TRI);
1942 ++NumInstsInserted;
1943 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1944
1945 if (FlagsReg)
1946 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1947
1948 return NewReg;
1949}
1950
1951/// Harden a load by hardening the loaded value in the defined register.
1952///
1953/// We can harden a non-leaking load into a register without touching the
1954/// address by just hiding all of the loaded bits during misspeculation. We use
1955/// an `or` instruction to do this because we set up our poison value as all
1956/// ones. And the goal is just for the loaded bits to not be exposed to
1957/// execution and coercing them to one is sufficient.
1958///
1959/// Returns the newly hardened register.
1960Register X86SpeculativeLoadHardeningImpl::hardenPostLoad(MachineInstr &MI) {
1961 MachineBasicBlock &MBB = *MI.getParent();
1962 const DebugLoc &Loc = MI.getDebugLoc();
1963
1964 auto &DefOp = MI.getOperand(0);
1965 Register OldDefReg = DefOp.getReg();
1966 auto *DefRC = MRI->getRegClass(OldDefReg);
1967
1968 // Because we want to completely replace the uses of this def'ed value with
1969 // the hardened value, create a dedicated new register that will only be used
1970 // to communicate the unhardened value to the hardening.
1971 Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1972 DefOp.setReg(UnhardenedReg);
1973
1974 // Now harden this register's value, getting a hardened reg that is safe to
1975 // use. Note that we insert the instructions to compute this *after* the
1976 // defining instruction, not before it.
1977 Register HardenedReg = hardenValueInRegister(
1978 UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1979
1980 // Finally, replace the old register (which now only has the uses of the
1981 // original def) with the hardened register.
1982 MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1983
1984 ++NumPostLoadRegsHardened;
1985 return HardenedReg;
1986}
1987
1988/// Harden a return instruction.
1989///
1990/// Returns implicitly perform a load which we need to harden. Without hardening
1991/// this load, an attacker my speculatively write over the return address to
1992/// steer speculation of the return to an attacker controlled address. This is
1993/// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1994/// this paper:
1995/// https://people.csail.mit.edu/vlk/spectre11.pdf
1996///
1997/// We can harden this by introducing an LFENCE that will delay any load of the
1998/// return address until prior instructions have retired (and thus are not being
1999/// speculated), or we can harden the address used by the implicit load: the
2000/// stack pointer.
2001///
2002/// If we are not using an LFENCE, hardening the stack pointer has an additional
2003/// benefit: it allows us to pass the predicate state accumulated in this
2004/// function back to the caller. In the absence of a BCBS attack on the return,
2005/// the caller will typically be resumed and speculatively executed due to the
2006/// Return Stack Buffer (RSB) prediction which is very accurate and has a high
2007/// priority. It is possible that some code from the caller will be executed
2008/// speculatively even during a BCBS-attacked return until the steering takes
2009/// effect. Whenever this happens, the caller can recover the (poisoned)
2010/// predicate state from the stack pointer and continue to harden loads.
2011void X86SpeculativeLoadHardeningImpl::hardenReturnInstr(MachineInstr &MI) {
2012 MachineBasicBlock &MBB = *MI.getParent();
2013 const DebugLoc &Loc = MI.getDebugLoc();
2014 auto InsertPt = MI.getIterator();
2015
2016 if (FenceCallAndRet)
2017 // No need to fence here as we'll fence at the return site itself. That
2018 // handles more cases than we can handle here.
2019 return;
2020
2021 // Take our predicate state, shift it to the high 17 bits (so that we keep
2022 // pointers canonical) and merge it into RSP. This will allow the caller to
2023 // extract it when we return (speculatively).
2024 mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2025}
2026
2027/// Trace the predicate state through a call.
2028///
2029/// There are several layers of this needed to handle the full complexity of
2030/// calls.
2031///
2032/// First, we need to send the predicate state into the called function. We do
2033/// this by merging it into the high bits of the stack pointer.
2034///
2035/// For tail calls, this is all we need to do.
2036///
2037/// For calls where we might return and resume the control flow, we need to
2038/// extract the predicate state from the high bits of the stack pointer after
2039/// control returns from the called function.
2040///
2041/// We also need to verify that we intended to return to this location in the
2042/// code. An attacker might arrange for the processor to mispredict the return
2043/// to this valid but incorrect return address in the program rather than the
2044/// correct one. See the paper on this attack, called "ret2spec" by the
2045/// researchers, here:
2046/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2047///
2048/// The way we verify that we returned to the correct location is by preserving
2049/// the expected return address across the call. One technique involves taking
2050/// advantage of the red-zone to load the return address from `8(%rsp)` where it
2051/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2052/// directly save the address into a register that will be preserved across the
2053/// call. We compare this intended return address against the address
2054/// immediately following the call (the observed return address). If these
2055/// mismatch, we have detected misspeculation and can poison our predicate
2056/// state.
2057void X86SpeculativeLoadHardeningImpl::tracePredStateThroughCall(
2058 MachineInstr &MI) {
2059 MachineBasicBlock &MBB = *MI.getParent();
2060 MachineFunction &MF = *MBB.getParent();
2061 auto InsertPt = MI.getIterator();
2062 const DebugLoc &Loc = MI.getDebugLoc();
2063
2064 if (FenceCallAndRet) {
2065 if (MI.isReturn())
2066 // Tail call, we don't return to this function.
2067 // FIXME: We should also handle noreturn calls.
2068 return;
2069
2070 // We don't need to fence before the call because the function should fence
2071 // in its entry. However, we do need to fence after the call returns.
2072 // Fencing before the return doesn't correctly handle cases where the return
2073 // itself is mispredicted.
2074 BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2075 ++NumInstsInserted;
2076 ++NumLFENCEsInserted;
2077 return;
2078 }
2079
2080 // First, we transfer the predicate state into the called function by merging
2081 // it into the stack pointer. This will kill the current def of the state.
2082 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2083 mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2084
2085 // If this call is also a return, it is a tail call and we don't need anything
2086 // else to handle it so just return. Also, if there are no further
2087 // instructions and no successors, this call does not return so we can also
2088 // bail.
2089 if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2090 return;
2091
2092 // Create a symbol to track the return address and attach it to the call
2093 // machine instruction. We will lower extra symbols attached to call
2094 // instructions as label immediately following the call.
2095 MCSymbol *RetSymbol =
2096 MF.getContext().createTempSymbol("slh_ret_addr",
2097 /*AlwaysAddSuffix*/ true);
2098 MI.setPostInstrSymbol(MF, RetSymbol);
2099
2100 const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2101 Register ExpectedRetAddrReg;
2102
2103 // If we have no red zones or if the function returns twice (possibly without
2104 // using the `ret` instruction) like setjmp, we need to save the expected
2105 // return address prior to the call.
2106 if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2107 MF.exposesReturnsTwice()) {
2108 // If we don't have red zones, we need to compute the expected return
2109 // address prior to the call and store it in a register that lives across
2110 // the call.
2111 //
2112 // In some ways, this is doubly satisfying as a mitigation because it will
2113 // also successfully detect stack smashing bugs in some cases (typically,
2114 // when a callee-saved register is used and the callee doesn't push it onto
2115 // the stack). But that isn't our primary goal, so we only use it as
2116 // a fallback.
2117 //
2118 // FIXME: It isn't clear that this is reliable in the face of
2119 // rematerialization in the register allocator. We somehow need to force
2120 // that to not occur for this particular instruction, and instead to spill
2121 // or otherwise preserve the value computed *prior* to the call.
2122 //
2123 // FIXME: It is even less clear why MachineCSE can't just fold this when we
2124 // end up having to use identical instructions both before and after the
2125 // call to feed the comparison.
2126 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2127 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2128 !Subtarget->isPositionIndependent()) {
2129 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2130 .addSym(RetSymbol);
2131 } else {
2132 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2133 .addReg(/*Base*/ X86::RIP)
2134 .addImm(/*Scale*/ 1)
2135 .addReg(/*Index*/ 0)
2136 .addSym(RetSymbol)
2137 .addReg(/*Segment*/ 0);
2138 }
2139 }
2140
2141 // Step past the call to handle when it returns.
2142 ++InsertPt;
2143
2144 // If we didn't pre-compute the expected return address into a register, then
2145 // red zones are enabled and the return address is still available on the
2146 // stack immediately after the call. As the very first instruction, we load it
2147 // into a register.
2148 if (!ExpectedRetAddrReg) {
2149 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2150 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2151 .addReg(/*Base*/ X86::RSP)
2152 .addImm(/*Scale*/ 1)
2153 .addReg(/*Index*/ 0)
2154 .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2155 // the return address is 8-bytes past it.
2156 .addReg(/*Segment*/ 0);
2157 }
2158
2159 // Now we extract the callee's predicate state from the stack pointer.
2160 Register NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2161
2162 // Test the expected return address against our actual address. If we can
2163 // form this basic block's address as an immediate, this is easy. Otherwise
2164 // we compute it.
2165 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2166 !Subtarget->isPositionIndependent()) {
2167 // FIXME: Could we fold this with the load? It would require careful EFLAGS
2168 // management.
2169 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2170 .addReg(ExpectedRetAddrReg, RegState::Kill)
2171 .addSym(RetSymbol);
2172 } else {
2173 Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2174 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2175 .addReg(/*Base*/ X86::RIP)
2176 .addImm(/*Scale*/ 1)
2177 .addReg(/*Index*/ 0)
2178 .addSym(RetSymbol)
2179 .addReg(/*Segment*/ 0);
2180 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2181 .addReg(ExpectedRetAddrReg, RegState::Kill)
2182 .addReg(ActualRetAddrReg, RegState::Kill);
2183 }
2184
2185 // Now conditionally update the predicate state we just extracted if we ended
2186 // up at a different return address than expected.
2187 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2188 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2189
2190 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2191 auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2192 .addReg(NewStateReg, RegState::Kill)
2193 .addReg(PS->PoisonReg)
2195 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
2196 ++NumInstsInserted;
2197 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2198
2199 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2200}
2201
2202/// An attacker may speculatively store over a value that is then speculatively
2203/// loaded and used as the target of an indirect call or jump instruction. This
2204/// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2205/// in this paper:
2206/// https://people.csail.mit.edu/vlk/spectre11.pdf
2207///
2208/// When this happens, the speculative execution of the call or jump will end up
2209/// being steered to this attacker controlled address. While most such loads
2210/// will be adequately hardened already, we want to ensure that they are
2211/// definitively treated as needing post-load hardening. While address hardening
2212/// is sufficient to prevent secret data from leaking to the attacker, it may
2213/// not be sufficient to prevent an attacker from steering speculative
2214/// execution. We forcibly unfolded all relevant loads above and so will always
2215/// have an opportunity to post-load harden here, we just need to scan for cases
2216/// not already flagged and add them.
2217void X86SpeculativeLoadHardeningImpl::hardenIndirectCallOrJumpInstr(
2218 MachineInstr &MI,
2219 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg) {
2220 switch (MI.getOpcode()) {
2221 case X86::FARCALL16m:
2222 case X86::FARCALL32m:
2223 case X86::FARCALL64m:
2224 case X86::FARJMP16m:
2225 case X86::FARJMP32m:
2226 case X86::FARJMP64m:
2227 // We don't need to harden either far calls or far jumps as they are
2228 // safe from Spectre.
2229 return;
2230
2231 default:
2232 break;
2233 }
2234
2235 // We should never see a loading instruction at this point, as those should
2236 // have been unfolded.
2237 assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2238
2239 // If the first operand isn't a register, this is a branch or call
2240 // instruction with an immediate operand which doesn't need to be hardened.
2241 if (!MI.getOperand(0).isReg())
2242 return;
2243
2244 // For all of these, the target register is the first operand of the
2245 // instruction.
2246 auto &TargetOp = MI.getOperand(0);
2247 Register OldTargetReg = TargetOp.getReg();
2248
2249 // Try to lookup a hardened version of this register. We retain a reference
2250 // here as we want to update the map to track any newly computed hardened
2251 // register.
2252 Register &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2253
2254 // If we don't have a hardened register yet, compute one. Otherwise, just use
2255 // the already hardened register.
2256 //
2257 // FIXME: It is a little suspect that we use partially hardened registers that
2258 // only feed addresses. The complexity of partial hardening with SHRX
2259 // continues to pile up. Should definitively measure its value and consider
2260 // eliminating it.
2261 if (!HardenedTargetReg)
2262 HardenedTargetReg = hardenValueInRegister(
2263 OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2264
2265 // Set the target operand to the hardened register.
2266 TargetOp.setReg(HardenedTargetReg);
2267
2268 ++NumCallsOrJumpsHardened;
2269}
2270
2271PreservedAnalyses
2274 X86SpeculativeLoadHardeningImpl Impl;
2275 const bool Changed = Impl.run(MF);
2276 LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
2277 dbgs() << "\n"; MF.verify(MFAM));
2281}
2282
2283INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningLegacy, PASS_KEY,
2284 "X86 speculative load hardener", false, false)
2285INITIALIZE_PASS_END(X86SpeculativeLoadHardeningLegacy, PASS_KEY,
2286 "X86 speculative load hardener", false, false)
2287
2289 return new X86SpeculativeLoadHardeningLegacy();
2290}
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > HardenLoads("aarch64-slh-loads", cl::Hidden, cl::desc("Sanitize loads from memory."), cl::init(true))
MachineBasicBlock & MBB
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the SparseBitVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define PASS_KEY
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
static const TargetRegisterClass * getRegClassForUnfoldedLoad(const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
static bool isEFLAGSDefLive(const MachineInstr &MI)
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:221
iterator end()
Definition DenseMap.h:143
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
LLVM_ABI MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Describe properties that are true of each instruction in the target description file.
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
LLVM_ABI iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
void dump() const
Definition Pass.cpp:146
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr unsigned id() const
Definition Register.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void set(unsigned Idx)
bool test(unsigned Idx) const
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const X86InstrInfo * getInstrInfo() const override
bool hasAVX512() const
bool isPositionIndependent() const
const X86RegisterInfo * getRegisterInfo() const override
const X86FrameLowering * getFrameLowering() const override
bool hasAVX2() const
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
CondCode getCondFromBranch(const MachineInstr &MI)
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false, bool HasNDD=false)
Return a cmov opcode for the given register size in bytes, and operand type.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
BBIterator iterator
Definition BasicBlock.h:87
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2133
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createX86SpeculativeLoadHardeningLegacyPass()
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191