LLVM 20.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// Provide a pass which mitigates speculative execution attacks which operate
11/// by speculating incorrectly past some predicate (a type check, bounds check,
12/// or other condition) to reach a load with invalid inputs and leak the data
13/// accessed by that load using a side channel out of the speculative domain.
14///
15/// For details on the attacks, see the first variant in both the Project Zero
16/// writeup and the Spectre paper:
17/// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18/// https://spectreattack.com/spectre.pdf
19///
20//===----------------------------------------------------------------------===//
21
22#include "X86.h"
23#include "X86InstrInfo.h"
24#include "X86Subtarget.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/Statistic.h"
47#include "llvm/IR/DebugLoc.h"
48#include "llvm/MC/MCSchedule.h"
49#include "llvm/Pass.h"
51#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <iterator>
56#include <optional>
57#include <utility>
58
59using namespace llvm;
60
61#define PASS_KEY "x86-slh"
62#define DEBUG_TYPE PASS_KEY
63
64STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
65STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
66STATISTIC(NumAddrRegsHardened,
67 "Number of address mode used registers hardaned");
68STATISTIC(NumPostLoadRegsHardened,
69 "Number of post-load register values hardened");
70STATISTIC(NumCallsOrJumpsHardened,
71 "Number of calls or jumps requiring extra hardening");
72STATISTIC(NumInstsInserted, "Number of instructions inserted");
73STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
74
76 "x86-speculative-load-hardening",
77 cl::desc("Force enable speculative load hardening"), cl::init(false),
79
81 PASS_KEY "-lfence",
83 "Use LFENCE along each conditional edge to harden against speculative "
84 "loads rather than conditional movs and poisoned pointers."),
85 cl::init(false), cl::Hidden);
86
88 PASS_KEY "-post-load",
89 cl::desc("Harden the value loaded *after* it is loaded by "
90 "flushing the loaded bits to 1. This is hard to do "
91 "in general but can be done easily for GPRs."),
92 cl::init(true), cl::Hidden);
93
95 PASS_KEY "-fence-call-and-ret",
96 cl::desc("Use a full speculation fence to harden both call and ret edges "
97 "rather than a lighter weight mitigation."),
98 cl::init(false), cl::Hidden);
99
101 PASS_KEY "-ip",
102 cl::desc("Harden interprocedurally by passing our state in and out of "
103 "functions in the high bits of the stack pointer."),
104 cl::init(true), cl::Hidden);
105
106static cl::opt<bool>
108 cl::desc("Sanitize loads from memory. When disable, no "
109 "significant security is provided."),
110 cl::init(true), cl::Hidden);
111
113 PASS_KEY "-indirect",
114 cl::desc("Harden indirect calls and jumps against using speculatively "
115 "stored attacker controlled addresses. This is designed to "
116 "mitigate Spectre v1.2 style attacks."),
117 cl::init(true), cl::Hidden);
118
119namespace {
120
121class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
122public:
123 X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
124
125 StringRef getPassName() const override {
126 return "X86 speculative load hardening";
127 }
128 bool runOnMachineFunction(MachineFunction &MF) override;
129 void getAnalysisUsage(AnalysisUsage &AU) const override;
130
131 /// Pass identification, replacement for typeid.
132 static char ID;
133
134private:
135 /// The information about a block's conditional terminators needed to trace
136 /// our predicate state through the exiting edges.
137 struct BlockCondInfo {
139
140 // We mostly have one conditional branch, and in extremely rare cases have
141 // two. Three and more are so rare as to be unimportant for compile time.
143
144 MachineInstr *UncondBr;
145 };
146
147 /// Manages the predicate state traced through the program.
148 struct PredState {
149 unsigned InitialReg = 0;
150 unsigned PoisonReg = 0;
151
152 const TargetRegisterClass *RC;
154
155 PredState(MachineFunction &MF, const TargetRegisterClass *RC)
156 : RC(RC), SSA(MF) {}
157 };
158
159 const X86Subtarget *Subtarget = nullptr;
160 MachineRegisterInfo *MRI = nullptr;
161 const X86InstrInfo *TII = nullptr;
162 const TargetRegisterInfo *TRI = nullptr;
163
164 std::optional<PredState> PS;
165
166 void hardenEdgesWithLFENCE(MachineFunction &MF);
167
168 SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
169
171 tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
172
173 void unfoldCallAndJumpLoads(MachineFunction &MF);
174
176 tracePredStateThroughIndirectBranches(MachineFunction &MF);
177
178 void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
179
180 unsigned saveEFLAGS(MachineBasicBlock &MBB,
182 const DebugLoc &Loc);
183 void restoreEFLAGS(MachineBasicBlock &MBB,
184 MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc,
185 Register Reg);
186
187 void mergePredStateIntoSP(MachineBasicBlock &MBB,
189 const DebugLoc &Loc, unsigned PredStateReg);
190 unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
192 const DebugLoc &Loc);
193
194 void
195 hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
196 MachineOperand &IndexMO,
197 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
199 sinkPostLoadHardenedInst(MachineInstr &MI,
200 SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
201 bool canHardenRegister(Register Reg);
202 unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
204 const DebugLoc &Loc);
205 unsigned hardenPostLoad(MachineInstr &MI);
206 void hardenReturnInstr(MachineInstr &MI);
207 void tracePredStateThroughCall(MachineInstr &MI);
208 void hardenIndirectCallOrJumpInstr(
210 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
211};
212
213} // end anonymous namespace
214
215char X86SpeculativeLoadHardeningPass::ID = 0;
216
217void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
218 AnalysisUsage &AU) const {
220}
221
223 MachineBasicBlock &Succ, int SuccCount,
224 MachineInstr *Br, MachineInstr *&UncondBr,
225 const X86InstrInfo &TII) {
226 assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
227
229
231
232 // We have to insert the new block immediately after the current one as we
233 // don't know what layout-successor relationships the successor has and we
234 // may not be able to (and generally don't want to) try to fix those up.
235 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
236
237 // Update the branch instruction if necessary.
238 if (Br) {
239 assert(Br->getOperand(0).getMBB() == &Succ &&
240 "Didn't start with the right target!");
241 Br->getOperand(0).setMBB(&NewMBB);
242
243 // If this successor was reached through a branch rather than fallthrough,
244 // we might have *broken* fallthrough and so need to inject a new
245 // unconditional branch.
246 if (!UncondBr) {
247 MachineBasicBlock &OldLayoutSucc =
248 *std::next(MachineFunction::iterator(&NewMBB));
249 assert(MBB.isSuccessor(&OldLayoutSucc) &&
250 "Without an unconditional branch, the old layout successor should "
251 "be an actual successor!");
252 auto BrBuilder =
253 BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
254 // Update the unconditional branch now that we've added one.
255 UncondBr = &*BrBuilder;
256 }
257
258 // Insert unconditional "jump Succ" instruction in the new block if
259 // necessary.
260 if (!NewMBB.isLayoutSuccessor(&Succ)) {
262 TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
263 }
264 } else {
265 assert(!UncondBr &&
266 "Cannot have a branchless successor and an unconditional branch!");
267 assert(NewMBB.isLayoutSuccessor(&Succ) &&
268 "A non-branch successor must have been a layout successor before "
269 "and now is a layout successor of the new block.");
270 }
271
272 // If this is the only edge to the successor, we can just replace it in the
273 // CFG. Otherwise we need to add a new entry in the CFG for the new
274 // successor.
275 if (SuccCount == 1) {
276 MBB.replaceSuccessor(&Succ, &NewMBB);
277 } else {
278 MBB.splitSuccessor(&Succ, &NewMBB);
279 }
280
281 // Hook up the edge from the new basic block to the old successor in the CFG.
282 NewMBB.addSuccessor(&Succ);
283
284 // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
285 for (MachineInstr &MI : Succ) {
286 if (!MI.isPHI())
287 break;
288 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
289 OpIdx += 2) {
290 MachineOperand &OpV = MI.getOperand(OpIdx);
291 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
292 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
293 if (OpMBB.getMBB() != &MBB)
294 continue;
295
296 // If this is the last edge to the succesor, just replace MBB in the PHI
297 if (SuccCount == 1) {
298 OpMBB.setMBB(&NewMBB);
299 break;
300 }
301
302 // Otherwise, append a new pair of operands for the new incoming edge.
303 MI.addOperand(MF, OpV);
304 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
305 break;
306 }
307 }
308
309 // Inherit live-ins from the successor
310 for (auto &LI : Succ.liveins())
311 NewMBB.addLiveIn(LI);
312
313 LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
314 << Succ.getName() << "'.\n");
315 return NewMBB;
316}
317
318/// Removing duplicate PHI operands to leave the PHI in a canonical and
319/// predictable form.
320///
321/// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
322/// isn't what you might expect. We may have multiple entries in PHI nodes for
323/// a single predecessor. This makes CFG-updating extremely complex, so here we
324/// simplify all PHI nodes to a model even simpler than the IR's model: exactly
325/// one entry per predecessor, regardless of how many edges there are.
328 SmallVector<int, 4> DupIndices;
329 for (auto &MBB : MF)
330 for (auto &MI : MBB) {
331 if (!MI.isPHI())
332 break;
333
334 // First we scan the operands of the PHI looking for duplicate entries
335 // a particular predecessor. We retain the operand index of each duplicate
336 // entry found.
337 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
338 OpIdx += 2)
339 if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
340 DupIndices.push_back(OpIdx);
341
342 // Now walk the duplicate indices, removing both the block and value. Note
343 // that these are stored as a vector making this element-wise removal
344 // :w
345 // potentially quadratic.
346 //
347 // FIXME: It is really frustrating that we have to use a quadratic
348 // removal algorithm here. There should be a better way, but the use-def
349 // updates required make that impossible using the public API.
350 //
351 // Note that we have to process these backwards so that we don't
352 // invalidate other indices with each removal.
353 while (!DupIndices.empty()) {
354 int OpIdx = DupIndices.pop_back_val();
355 // Remove both the block and value operand, again in reverse order to
356 // preserve indices.
357 MI.removeOperand(OpIdx + 1);
358 MI.removeOperand(OpIdx);
359 }
360
361 Preds.clear();
362 }
363}
364
365/// Helper to scan a function for loads vulnerable to misspeculation that we
366/// want to harden.
367///
368/// We use this to avoid making changes to functions where there is nothing we
369/// need to do to harden against misspeculation.
371 for (MachineBasicBlock &MBB : MF) {
372 for (MachineInstr &MI : MBB) {
373 // Loads within this basic block after an LFENCE are not at risk of
374 // speculatively executing with invalid predicates from prior control
375 // flow. So break out of this block but continue scanning the function.
376 if (MI.getOpcode() == X86::LFENCE)
377 break;
378
379 // Looking for loads only.
380 if (!MI.mayLoad())
381 continue;
382
383 // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
384 if (MI.getOpcode() == X86::MFENCE)
385 continue;
386
387 // We found a load.
388 return true;
389 }
390 }
391
392 // No loads found.
393 return false;
394}
395
396bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
397 MachineFunction &MF) {
398 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
399 << " **********\n");
400
401 // Only run if this pass is forced enabled or we detect the relevant function
402 // attribute requesting SLH.
404 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
405 return false;
406
407 Subtarget = &MF.getSubtarget<X86Subtarget>();
408 MRI = &MF.getRegInfo();
409 TII = Subtarget->getInstrInfo();
410 TRI = Subtarget->getRegisterInfo();
411
412 // FIXME: Support for 32-bit.
413 PS.emplace(MF, &X86::GR64_NOSPRegClass);
414
415 if (MF.begin() == MF.end())
416 // Nothing to do for a degenerate empty function...
417 return false;
418
419 // We support an alternative hardening technique based on a debug flag.
421 hardenEdgesWithLFENCE(MF);
422 return true;
423 }
424
425 // Create a dummy debug loc to use for all the generated code here.
426 DebugLoc Loc;
427
429 auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
430
431 // Do a quick scan to see if we have any checkable loads.
432 bool HasVulnerableLoad = hasVulnerableLoad(MF);
433
434 // See if we have any conditional branching blocks that we will need to trace
435 // predicate state through.
436 SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
437
438 // If we have no interesting conditions or loads, nothing to do here.
439 if (!HasVulnerableLoad && Infos.empty())
440 return true;
441
442 // The poison value is required to be an all-ones value for many aspects of
443 // this mitigation.
444 const int PoisonVal = -1;
445 PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
446 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
447 .addImm(PoisonVal);
448 ++NumInstsInserted;
449
450 // If we have loads being hardened and we've asked for call and ret edges to
451 // get a full fence-based mitigation, inject that fence.
452 if (HasVulnerableLoad && FenceCallAndRet) {
453 // We need to insert an LFENCE at the start of the function to suspend any
454 // incoming misspeculation from the caller. This helps two-fold: the caller
455 // may not have been protected as this code has been, and this code gets to
456 // not take any specific action to protect across calls.
457 // FIXME: We could skip this for functions which unconditionally return
458 // a constant.
459 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
460 ++NumInstsInserted;
461 ++NumLFENCEsInserted;
462 }
463
464 // If we guarded the entry with an LFENCE and have no conditionals to protect
465 // in blocks, then we're done.
466 if (FenceCallAndRet && Infos.empty())
467 // We may have changed the function's code at this point to insert fences.
468 return true;
469
470 // For every basic block in the function which can b
472 // Set up the predicate state by extracting it from the incoming stack
473 // pointer so we pick up any misspeculation in our caller.
474 PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
475 } else {
476 // Otherwise, just build the predicate state itself by zeroing a register
477 // as we don't need any initial state.
478 PS->InitialReg = MRI->createVirtualRegister(PS->RC);
479 Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
480 auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
481 PredStateSubReg);
482 ++NumInstsInserted;
483 MachineOperand *ZeroEFLAGSDefOp =
484 ZeroI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
485 assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
486 "Must have an implicit def of EFLAGS!");
487 ZeroEFLAGSDefOp->setIsDead(true);
488 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
489 PS->InitialReg)
490 .addImm(0)
491 .addReg(PredStateSubReg)
492 .addImm(X86::sub_32bit);
493 }
494
495 // We're going to need to trace predicate state throughout the function's
496 // CFG. Prepare for this by setting up our initial state of PHIs with unique
497 // predecessor entries and all the initial predicate state.
499
500 // Track the updated values in an SSA updater to rewrite into SSA form at the
501 // end.
502 PS->SSA.Initialize(PS->InitialReg);
503 PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
504
505 // Trace through the CFG.
506 auto CMovs = tracePredStateThroughCFG(MF, Infos);
507
508 // We may also enter basic blocks in this function via exception handling
509 // control flow. Here, if we are hardening interprocedurally, we need to
510 // re-capture the predicate state from the throwing code. In the Itanium ABI,
511 // the throw will always look like a call to __cxa_throw and will have the
512 // predicate state in the stack pointer, so extract fresh predicate state from
513 // the stack pointer and make it available in SSA.
514 // FIXME: Handle non-itanium ABI EH models.
516 for (MachineBasicBlock &MBB : MF) {
517 assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
518 assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
519 assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
520 if (!MBB.isEHPad())
521 continue;
522 PS->SSA.AddAvailableValue(
523 &MBB,
524 extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
525 }
526 }
527
529 // If we are going to harden calls and jumps we need to unfold their memory
530 // operands.
531 unfoldCallAndJumpLoads(MF);
532
533 // Then we trace predicate state through the indirect branches.
534 auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
535 CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
536 }
537
538 // Now that we have the predicate state available at the start of each block
539 // in the CFG, trace it through each block, hardening vulnerable instructions
540 // as we go.
541 tracePredStateThroughBlocksAndHarden(MF);
542
543 // Now rewrite all the uses of the pred state using the SSA updater to insert
544 // PHIs connecting the state between blocks along the CFG edges.
545 for (MachineInstr *CMovI : CMovs)
546 for (MachineOperand &Op : CMovI->operands()) {
547 if (!Op.isReg() || Op.getReg() != PS->InitialReg)
548 continue;
549
550 PS->SSA.RewriteUse(Op);
551 }
552
553 LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
554 dbgs() << "\n"; MF.verify(this));
555 return true;
556}
557
558/// Implements the naive hardening approach of putting an LFENCE after every
559/// potentially mis-predicted control flow construct.
560///
561/// We include this as an alternative mostly for the purpose of comparison. The
562/// performance impact of this is expected to be extremely severe and not
563/// practical for any real-world users.
564void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
565 MachineFunction &MF) {
566 // First, we scan the function looking for blocks that are reached along edges
567 // that we might want to harden.
569 for (MachineBasicBlock &MBB : MF) {
570 // If there are no or only one successor, nothing to do here.
571 if (MBB.succ_size() <= 1)
572 continue;
573
574 // Skip blocks unless their terminators start with a branch. Other
575 // terminators don't seem interesting for guarding against misspeculation.
576 auto TermIt = MBB.getFirstTerminator();
577 if (TermIt == MBB.end() || !TermIt->isBranch())
578 continue;
579
580 // Add all the non-EH-pad succossors to the blocks we want to harden. We
581 // skip EH pads because there isn't really a condition of interest on
582 // entering.
583 for (MachineBasicBlock *SuccMBB : MBB.successors())
584 if (!SuccMBB->isEHPad())
585 Blocks.insert(SuccMBB);
586 }
587
588 for (MachineBasicBlock *MBB : Blocks) {
589 auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
590 BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
591 ++NumInstsInserted;
592 ++NumLFENCEsInserted;
593 }
594}
595
597X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
599
600 // Walk the function and build up a summary for each block's conditions that
601 // we need to trace through.
602 for (MachineBasicBlock &MBB : MF) {
603 // If there are no or only one successor, nothing to do here.
604 if (MBB.succ_size() <= 1)
605 continue;
606
607 // We want to reliably handle any conditional branch terminators in the
608 // MBB, so we manually analyze the branch. We can handle all of the
609 // permutations here, including ones that analyze branch cannot.
610 //
611 // The approach is to walk backwards across the terminators, resetting at
612 // any unconditional non-indirect branch, and track all conditional edges
613 // to basic blocks as well as the fallthrough or unconditional successor
614 // edge. For each conditional edge, we track the target and the opposite
615 // condition code in order to inject a "no-op" cmov into that successor
616 // that will harden the predicate. For the fallthrough/unconditional
617 // edge, we inject a separate cmov for each conditional branch with
618 // matching condition codes. This effectively implements an "and" of the
619 // condition flags, even if there isn't a single condition flag that would
620 // directly implement that. We don't bother trying to optimize either of
621 // these cases because if such an optimization is possible, LLVM should
622 // have optimized the conditional *branches* in that way already to reduce
623 // instruction count. This late, we simply assume the minimal number of
624 // branch instructions is being emitted and use that to guide our cmov
625 // insertion.
626
627 BlockCondInfo Info = {&MBB, {}, nullptr};
628
629 // Now walk backwards through the terminators and build up successors they
630 // reach and the conditions.
631 for (MachineInstr &MI : llvm::reverse(MBB)) {
632 // Once we've handled all the terminators, we're done.
633 if (!MI.isTerminator())
634 break;
635
636 // If we see a non-branch terminator, we can't handle anything so bail.
637 if (!MI.isBranch()) {
638 Info.CondBrs.clear();
639 break;
640 }
641
642 // If we see an unconditional branch, reset our state, clear any
643 // fallthrough, and set this is the "else" successor.
644 if (MI.getOpcode() == X86::JMP_1) {
645 Info.CondBrs.clear();
646 Info.UncondBr = &MI;
647 continue;
648 }
649
650 // If we get an invalid condition, we have an indirect branch or some
651 // other unanalyzable "fallthrough" case. We model this as a nullptr for
652 // the destination so we can still guard any conditional successors.
653 // Consider code sequences like:
654 // ```
655 // jCC L1
656 // jmpq *%rax
657 // ```
658 // We still want to harden the edge to `L1`.
660 Info.CondBrs.clear();
661 Info.UncondBr = &MI;
662 continue;
663 }
664
665 // We have a vanilla conditional branch, add it to our list.
666 Info.CondBrs.push_back(&MI);
667 }
668 if (Info.CondBrs.empty()) {
669 ++NumBranchesUntraced;
670 LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
671 MBB.dump());
672 continue;
673 }
674
675 Infos.push_back(Info);
676 }
677
678 return Infos;
679}
680
681/// Trace the predicate state through the CFG, instrumenting each conditional
682/// branch such that misspeculation through an edge will poison the predicate
683/// state.
684///
685/// Returns the list of inserted CMov instructions so that they can have their
686/// uses of the predicate state rewritten into proper SSA form once it is
687/// complete.
689X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
691 // Collect the inserted cmov instructions so we can rewrite their uses of the
692 // predicate state into SSA form.
694
695 // Now walk all of the basic blocks looking for ones that end in conditional
696 // jumps where we need to update this register along each edge.
697 for (const BlockCondInfo &Info : Infos) {
698 MachineBasicBlock &MBB = *Info.MBB;
699 const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
700 MachineInstr *UncondBr = Info.UncondBr;
701
702 LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
703 << "\n");
704 ++NumCondBranchesTraced;
705
706 // Compute the non-conditional successor as either the target of any
707 // unconditional branch or the layout successor.
708 MachineBasicBlock *UncondSucc =
709 UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
710 ? UncondBr->getOperand(0).getMBB()
711 : nullptr)
712 : &*std::next(MachineFunction::iterator(&MBB));
713
714 // Count how many edges there are to any given successor.
716 if (UncondSucc)
717 ++SuccCounts[UncondSucc];
718 for (auto *CondBr : CondBrs)
719 ++SuccCounts[CondBr->getOperand(0).getMBB()];
720
721 // A lambda to insert cmov instructions into a block checking all of the
722 // condition codes in a sequence.
723 auto BuildCheckingBlockForSuccAndConds =
724 [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
725 MachineInstr *Br, MachineInstr *&UncondBr,
727 // First, we split the edge to insert the checking block into a safe
728 // location.
729 auto &CheckingMBB =
730 (SuccCount == 1 && Succ.pred_size() == 1)
731 ? Succ
732 : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
733
734 bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
735 if (!LiveEFLAGS)
736 CheckingMBB.addLiveIn(X86::EFLAGS);
737
738 // Now insert the cmovs to implement the checks.
739 auto InsertPt = CheckingMBB.begin();
740 assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
741 "Should never have a PHI in the initial checking block as it "
742 "always has a single predecessor!");
743
744 // We will wire each cmov to each other, but need to start with the
745 // incoming pred state.
746 unsigned CurStateReg = PS->InitialReg;
747
748 for (X86::CondCode Cond : Conds) {
749 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
750 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
751
752 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
753 // Note that we intentionally use an empty debug location so that
754 // this picks up the preceding location.
755 auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
756 TII->get(CMovOp), UpdatedStateReg)
757 .addReg(CurStateReg)
758 .addReg(PS->PoisonReg)
759 .addImm(Cond);
760 // If this is the last cmov and the EFLAGS weren't originally
761 // live-in, mark them as killed.
762 if (!LiveEFLAGS && Cond == Conds.back())
763 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
764 ->setIsKill(true);
765
766 ++NumInstsInserted;
767 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
768 dbgs() << "\n");
769
770 // The first one of the cmovs will be using the top level
771 // `PredStateReg` and need to get rewritten into SSA form.
772 if (CurStateReg == PS->InitialReg)
773 CMovs.push_back(&*CMovI);
774
775 // The next cmov should start from this one's def.
776 CurStateReg = UpdatedStateReg;
777 }
778
779 // And put the last one into the available values for SSA form of our
780 // predicate state.
781 PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
782 };
783
784 std::vector<X86::CondCode> UncondCodeSeq;
785 for (auto *CondBr : CondBrs) {
786 MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
787 int &SuccCount = SuccCounts[&Succ];
788
791 UncondCodeSeq.push_back(Cond);
792
793 BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
794 {InvCond});
795
796 // Decrement the successor count now that we've split one of the edges.
797 // We need to keep the count of edges to the successor accurate in order
798 // to know above when to *replace* the successor in the CFG vs. just
799 // adding the new successor.
800 --SuccCount;
801 }
802
803 // Since we may have split edges and changed the number of successors,
804 // normalize the probabilities. This avoids doing it each time we split an
805 // edge.
807
808 // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
809 // need to intersect the other condition codes. We can do this by just
810 // doing a cmov for each one.
811 if (!UncondSucc)
812 // If we have no fallthrough to protect (perhaps it is an indirect jump?)
813 // just skip this and continue.
814 continue;
815
816 assert(SuccCounts[UncondSucc] == 1 &&
817 "We should never have more than one edge to the unconditional "
818 "successor at this point because every other edge must have been "
819 "split above!");
820
821 // Sort and unique the codes to minimize them.
822 llvm::sort(UncondCodeSeq);
823 UncondCodeSeq.erase(llvm::unique(UncondCodeSeq), UncondCodeSeq.end());
824
825 // Build a checking version of the successor.
826 BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
827 UncondBr, UncondBr, UncondCodeSeq);
828 }
829
830 return CMovs;
831}
832
833/// Compute the register class for the unfolded load.
834///
835/// FIXME: This should probably live in X86InstrInfo, potentially by adding
836/// a way to unfold into a newly created vreg rather than requiring a register
837/// input.
838static const TargetRegisterClass *
840 unsigned Opcode) {
841 unsigned Index;
842 unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
843 Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
844 const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
845 return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
846}
847
848void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
849 MachineFunction &MF) {
850 for (MachineBasicBlock &MBB : MF)
851 // We use make_early_inc_range here so we can remove instructions if needed
852 // without disturbing the iteration.
854 // Must either be a call or a branch.
855 if (!MI.isCall() && !MI.isBranch())
856 continue;
857 // We only care about loading variants of these instructions.
858 if (!MI.mayLoad())
859 continue;
860
861 switch (MI.getOpcode()) {
862 default: {
864 dbgs() << "ERROR: Found an unexpected loading branch or call "
865 "instruction:\n";
866 MI.dump(); dbgs() << "\n");
867 report_fatal_error("Unexpected loading branch or call!");
868 }
869
870 case X86::FARCALL16m:
871 case X86::FARCALL32m:
872 case X86::FARCALL64m:
873 case X86::FARJMP16m:
874 case X86::FARJMP32m:
875 case X86::FARJMP64m:
876 // We cannot mitigate far jumps or calls, but we also don't expect them
877 // to be vulnerable to Spectre v1.2 style attacks.
878 continue;
879
880 case X86::CALL16m:
881 case X86::CALL16m_NT:
882 case X86::CALL32m:
883 case X86::CALL32m_NT:
884 case X86::CALL64m:
885 case X86::CALL64m_NT:
886 case X86::JMP16m:
887 case X86::JMP16m_NT:
888 case X86::JMP32m:
889 case X86::JMP32m_NT:
890 case X86::JMP64m:
891 case X86::JMP64m_NT:
892 case X86::TAILJMPm64:
893 case X86::TAILJMPm64_REX:
894 case X86::TAILJMPm:
895 case X86::TCRETURNmi64:
896 case X86::TCRETURNmi: {
897 // Use the generic unfold logic now that we know we're dealing with
898 // expected instructions.
899 // FIXME: We don't have test coverage for all of these!
900 auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
901 if (!UnfoldedRC) {
903 << "ERROR: Unable to unfold load from instruction:\n";
904 MI.dump(); dbgs() << "\n");
905 report_fatal_error("Unable to unfold load!");
906 }
907 Register Reg = MRI->createVirtualRegister(UnfoldedRC);
909 // If we were able to compute an unfolded reg class, any failure here
910 // is just a programming error so just assert.
911 bool Unfolded =
912 TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
913 /*UnfoldStore*/ false, NewMIs);
914 (void)Unfolded;
915 assert(Unfolded &&
916 "Computed unfolded register class but failed to unfold");
917 // Now stitch the new instructions into place and erase the old one.
918 for (auto *NewMI : NewMIs)
919 MBB.insert(MI.getIterator(), NewMI);
920
921 // Update the call site info.
922 if (MI.isCandidateForCallSiteEntry())
923 MF.eraseCallSiteInfo(&MI);
924
925 MI.eraseFromParent();
926 LLVM_DEBUG({
927 dbgs() << "Unfolded load successfully into:\n";
928 for (auto *NewMI : NewMIs) {
929 NewMI->dump();
930 dbgs() << "\n";
931 }
932 });
933 continue;
934 }
935 }
936 llvm_unreachable("Escaped switch with default!");
937 }
938}
939
940/// Trace the predicate state through indirect branches, instrumenting them to
941/// poison the state if a target is reached that does not match the expected
942/// target.
943///
944/// This is designed to mitigate Spectre variant 1 attacks where an indirect
945/// branch is trained to predict a particular target and then mispredicts that
946/// target in a way that can leak data. Despite using an indirect branch, this
947/// is really a variant 1 style attack: it does not steer execution to an
948/// arbitrary or attacker controlled address, and it does not require any
949/// special code executing next to the victim. This attack can also be mitigated
950/// through retpolines, but those require either replacing indirect branches
951/// with conditional direct branches or lowering them through a device that
952/// blocks speculation. This mitigation can replace these retpoline-style
953/// mitigations for jump tables and other indirect branches within a function
954/// when variant 2 isn't a risk while allowing limited speculation. Indirect
955/// calls, however, cannot be mitigated through this technique without changing
956/// the ABI in a fundamental way.
958X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
959 MachineFunction &MF) {
960 // We use the SSAUpdater to insert PHI nodes for the target addresses of
961 // indirect branches. We don't actually need the full power of the SSA updater
962 // in this particular case as we always have immediately available values, but
963 // this avoids us having to re-implement the PHI construction logic.
964 MachineSSAUpdater TargetAddrSSA(MF);
965 TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
966
967 // Track which blocks were terminated with an indirect branch.
968 SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
969
970 // We need to know what blocks end up reached via indirect branches. We
971 // expect this to be a subset of those whose address is taken and so track it
972 // directly via the CFG.
973 SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
974
975 // Walk all the blocks which end in an indirect branch and make the
976 // target address available.
977 for (MachineBasicBlock &MBB : MF) {
978 // Find the last terminator.
979 auto MII = MBB.instr_rbegin();
980 while (MII != MBB.instr_rend() && MII->isDebugInstr())
981 ++MII;
982 if (MII == MBB.instr_rend())
983 continue;
984 MachineInstr &TI = *MII;
985 if (!TI.isTerminator() || !TI.isBranch())
986 // No terminator or non-branch terminator.
987 continue;
988
989 unsigned TargetReg;
990
991 switch (TI.getOpcode()) {
992 default:
993 // Direct branch or conditional branch (leading to fallthrough).
994 continue;
995
996 case X86::FARJMP16m:
997 case X86::FARJMP32m:
998 case X86::FARJMP64m:
999 // We cannot mitigate far jumps or calls, but we also don't expect them
1000 // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1001 continue;
1002
1003 case X86::JMP16m:
1004 case X86::JMP16m_NT:
1005 case X86::JMP32m:
1006 case X86::JMP32m_NT:
1007 case X86::JMP64m:
1008 case X86::JMP64m_NT:
1009 // Mostly as documentation.
1010 report_fatal_error("Memory operand jumps should have been unfolded!");
1011
1012 case X86::JMP16r:
1014 "Support for 16-bit indirect branches is not implemented.");
1015 case X86::JMP32r:
1017 "Support for 32-bit indirect branches is not implemented.");
1018
1019 case X86::JMP64r:
1020 TargetReg = TI.getOperand(0).getReg();
1021 }
1022
1023 // We have definitely found an indirect branch. Verify that there are no
1024 // preceding conditional branches as we don't yet support that.
1025 if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1026 return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1027 })) {
1028 LLVM_DEBUG({
1029 dbgs() << "ERROR: Found other terminators in a block with an indirect "
1030 "branch! This is not yet supported! Terminator sequence:\n";
1031 for (MachineInstr &MI : MBB.terminators()) {
1032 MI.dump();
1033 dbgs() << '\n';
1034 }
1035 });
1036 report_fatal_error("Unimplemented terminator sequence!");
1037 }
1038
1039 // Make the target register an available value for this block.
1040 TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1041 IndirectTerminatedMBBs.insert(&MBB);
1042
1043 // Add all the successors to our target candidates.
1044 for (MachineBasicBlock *Succ : MBB.successors())
1045 IndirectTargetMBBs.insert(Succ);
1046 }
1047
1048 // Keep track of the cmov instructions we insert so we can return them.
1050
1051 // If we didn't find any indirect branches with targets, nothing to do here.
1052 if (IndirectTargetMBBs.empty())
1053 return CMovs;
1054
1055 // We found indirect branches and targets that need to be instrumented to
1056 // harden loads within them. Walk the blocks of the function (to get a stable
1057 // ordering) and instrument each target of an indirect branch.
1058 for (MachineBasicBlock &MBB : MF) {
1059 // Skip the blocks that aren't candidate targets.
1060 if (!IndirectTargetMBBs.count(&MBB))
1061 continue;
1062
1063 // We don't expect EH pads to ever be reached via an indirect branch. If
1064 // this is desired for some reason, we could simply skip them here rather
1065 // than asserting.
1066 assert(!MBB.isEHPad() &&
1067 "Unexpected EH pad as target of an indirect branch!");
1068
1069 // We should never end up threading EFLAGS into a block to harden
1070 // conditional jumps as there would be an additional successor via the
1071 // indirect branch. As a consequence, all such edges would be split before
1072 // reaching here, and the inserted block will handle the EFLAGS-based
1073 // hardening.
1074 assert(!MBB.isLiveIn(X86::EFLAGS) &&
1075 "Cannot check within a block that already has live-in EFLAGS!");
1076
1077 // We can't handle having non-indirect edges into this block unless this is
1078 // the only successor and we can synthesize the necessary target address.
1079 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1080 // If we've already handled this by extracting the target directly,
1081 // nothing to do.
1082 if (IndirectTerminatedMBBs.count(Pred))
1083 continue;
1084
1085 // Otherwise, we have to be the only successor. We generally expect this
1086 // to be true as conditional branches should have had a critical edge
1087 // split already. We don't however need to worry about EH pad successors
1088 // as they'll happily ignore the target and their hardening strategy is
1089 // resilient to all ways in which they could be reached speculatively.
1090 if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1091 return Succ->isEHPad() || Succ == &MBB;
1092 })) {
1093 LLVM_DEBUG({
1094 dbgs() << "ERROR: Found conditional entry to target of indirect "
1095 "branch!\n";
1096 Pred->dump();
1097 MBB.dump();
1098 });
1099 report_fatal_error("Cannot harden a conditional entry to a target of "
1100 "an indirect branch!");
1101 }
1102
1103 // Now we need to compute the address of this block and install it as a
1104 // synthetic target in the predecessor. We do this at the bottom of the
1105 // predecessor.
1106 auto InsertPt = Pred->getFirstTerminator();
1107 Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1108 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1109 !Subtarget->isPositionIndependent()) {
1110 // Directly materialize it into an immediate.
1111 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1112 TII->get(X86::MOV64ri32), TargetReg)
1113 .addMBB(&MBB);
1114 ++NumInstsInserted;
1115 (void)AddrI;
1116 LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1117 dbgs() << "\n");
1118 } else {
1119 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1120 TargetReg)
1121 .addReg(/*Base*/ X86::RIP)
1122 .addImm(/*Scale*/ 1)
1123 .addReg(/*Index*/ 0)
1124 .addMBB(&MBB)
1125 .addReg(/*Segment*/ 0);
1126 ++NumInstsInserted;
1127 (void)AddrI;
1128 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1129 dbgs() << "\n");
1130 }
1131 // And make this available.
1132 TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1133 }
1134
1135 // Materialize the needed SSA value of the target. Note that we need the
1136 // middle of the block as this block might at the bottom have an indirect
1137 // branch back to itself. We can do this here because at this point, every
1138 // predecessor of this block has an available value. This is basically just
1139 // automating the construction of a PHI node for this target.
1140 Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1141
1142 // Insert a comparison of the incoming target register with this block's
1143 // address. This also requires us to mark the block as having its address
1144 // taken explicitly.
1146 auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1147 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1148 !Subtarget->isPositionIndependent()) {
1149 // Check directly against a relocated immediate when we can.
1150 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1151 .addReg(TargetReg, RegState::Kill)
1152 .addMBB(&MBB);
1153 ++NumInstsInserted;
1154 (void)CheckI;
1155 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1156 } else {
1157 // Otherwise compute the address into a register first.
1158 Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1159 auto AddrI =
1160 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1161 .addReg(/*Base*/ X86::RIP)
1162 .addImm(/*Scale*/ 1)
1163 .addReg(/*Index*/ 0)
1164 .addMBB(&MBB)
1165 .addReg(/*Segment*/ 0);
1166 ++NumInstsInserted;
1167 (void)AddrI;
1168 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1169 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1170 .addReg(TargetReg, RegState::Kill)
1171 .addReg(AddrReg, RegState::Kill);
1172 ++NumInstsInserted;
1173 (void)CheckI;
1174 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1175 }
1176
1177 // Now cmov over the predicate if the comparison wasn't equal.
1178 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1179 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1180 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1181 auto CMovI =
1182 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1183 .addReg(PS->InitialReg)
1184 .addReg(PS->PoisonReg)
1186 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
1187 ->setIsKill(true);
1188 ++NumInstsInserted;
1189 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1190 CMovs.push_back(&*CMovI);
1191
1192 // And put the new value into the available values for SSA form of our
1193 // predicate state.
1194 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1195 }
1196
1197 // Return all the newly inserted cmov instructions of the predicate state.
1198 return CMovs;
1199}
1200
1201// Returns true if the MI has EFLAGS as a register def operand and it's live,
1202// otherwise it returns false
1203static bool isEFLAGSDefLive(const MachineInstr &MI) {
1204 if (const MachineOperand *DefOp =
1205 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1206 return !DefOp->isDead();
1207 }
1208 return false;
1209}
1210
1212 const TargetRegisterInfo &TRI) {
1213 // Check if EFLAGS are alive by seeing if there is a def of them or they
1214 // live-in, and then seeing if that def is in turn used.
1216 if (MachineOperand *DefOp =
1217 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1218 // If the def is dead, then EFLAGS is not live.
1219 if (DefOp->isDead())
1220 return false;
1221
1222 // Otherwise we've def'ed it, and it is live.
1223 return true;
1224 }
1225 // While at this instruction, also check if we use and kill EFLAGS
1226 // which means it isn't live.
1227 if (MI.killsRegister(X86::EFLAGS, &TRI))
1228 return false;
1229 }
1230
1231 // If we didn't find anything conclusive (neither definitely alive or
1232 // definitely dead) return whether it lives into the block.
1233 return MBB.isLiveIn(X86::EFLAGS);
1234}
1235
1236/// Trace the predicate state through each of the blocks in the function,
1237/// hardening everything necessary along the way.
1238///
1239/// We call this routine once the initial predicate state has been established
1240/// for each basic block in the function in the SSA updater. This routine traces
1241/// it through the instructions within each basic block, and for non-returning
1242/// blocks informs the SSA updater about the final state that lives out of the
1243/// block. Along the way, it hardens any vulnerable instruction using the
1244/// currently valid predicate state. We have to do these two things together
1245/// because the SSA updater only works across blocks. Within a block, we track
1246/// the current predicate state directly and update it as it changes.
1247///
1248/// This operates in two passes over each block. First, we analyze the loads in
1249/// the block to determine which strategy will be used to harden them: hardening
1250/// the address or hardening the loaded value when loaded into a register
1251/// amenable to hardening. We have to process these first because the two
1252/// strategies may interact -- later hardening may change what strategy we wish
1253/// to use. We also will analyze data dependencies between loads and avoid
1254/// hardening those loads that are data dependent on a load with a hardened
1255/// address. We also skip hardening loads already behind an LFENCE as that is
1256/// sufficient to harden them against misspeculation.
1257///
1258/// Second, we actively trace the predicate state through the block, applying
1259/// the hardening steps we determined necessary in the first pass as we go.
1260///
1261/// These two passes are applied to each basic block. We operate one block at a
1262/// time to simplify reasoning about reachability and sequencing.
1263void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
1264 MachineFunction &MF) {
1265 SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1266 SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1267
1268 SmallSet<unsigned, 16> HardenedAddrRegs;
1269
1270 SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg;
1271
1272 // Track the set of load-dependent registers through the basic block. Because
1273 // the values of these registers have an existing data dependency on a loaded
1274 // value which we would have checked, we can omit any checks on them.
1275 SparseBitVector<> LoadDepRegs;
1276
1277 for (MachineBasicBlock &MBB : MF) {
1278 // The first pass over the block: collect all the loads which can have their
1279 // loaded value hardened and all the loads that instead need their address
1280 // hardened. During this walk we propagate load dependence for address
1281 // hardened loads and also look for LFENCE to stop hardening wherever
1282 // possible. When deciding whether or not to harden the loaded value or not,
1283 // we check to see if any registers used in the address will have been
1284 // hardened at this point and if so, harden any remaining address registers
1285 // as that often successfully re-uses hardened addresses and minimizes
1286 // instructions.
1287 //
1288 // FIXME: We should consider an aggressive mode where we continue to keep as
1289 // many loads value hardened even when some address register hardening would
1290 // be free (due to reuse).
1291 //
1292 // Note that we only need this pass if we are actually hardening loads.
1293 if (HardenLoads)
1294 for (MachineInstr &MI : MBB) {
1295 // We naively assume that all def'ed registers of an instruction have
1296 // a data dependency on all of their operands.
1297 // FIXME: Do a more careful analysis of x86 to build a conservative
1298 // model here.
1299 if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1300 return Op.isReg() && LoadDepRegs.test(Op.getReg());
1301 }))
1302 for (MachineOperand &Def : MI.defs())
1303 if (Def.isReg())
1304 LoadDepRegs.set(Def.getReg());
1305
1306 // Both Intel and AMD are guiding that they will change the semantics of
1307 // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1308 // no more need to guard things in this block.
1309 if (MI.getOpcode() == X86::LFENCE)
1310 break;
1311
1312 // If this instruction cannot load, nothing to do.
1313 if (!MI.mayLoad())
1314 continue;
1315
1316 // Some instructions which "load" are trivially safe or unimportant.
1317 if (MI.getOpcode() == X86::MFENCE)
1318 continue;
1319
1320 // Extract the memory operand information about this instruction.
1321 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1322 if (MemRefBeginIdx < 0) {
1324 << "WARNING: unable to harden loading instruction: ";
1325 MI.dump());
1326 continue;
1327 }
1328
1329 MachineOperand &BaseMO =
1330 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1331 MachineOperand &IndexMO =
1332 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1333
1334 // If we have at least one (non-frame-index, non-RIP) register operand,
1335 // and neither operand is load-dependent, we need to check the load.
1336 unsigned BaseReg = 0, IndexReg = 0;
1337 if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1338 BaseMO.getReg() != X86::NoRegister)
1339 BaseReg = BaseMO.getReg();
1340 if (IndexMO.getReg() != X86::NoRegister)
1341 IndexReg = IndexMO.getReg();
1342
1343 if (!BaseReg && !IndexReg)
1344 // No register operands!
1345 continue;
1346
1347 // If any register operand is dependent, this load is dependent and we
1348 // needn't check it.
1349 // FIXME: Is this true in the case where we are hardening loads after
1350 // they complete? Unclear, need to investigate.
1351 if ((BaseReg && LoadDepRegs.test(BaseReg)) ||
1352 (IndexReg && LoadDepRegs.test(IndexReg)))
1353 continue;
1354
1355 // If post-load hardening is enabled, this load is compatible with
1356 // post-load hardening, and we aren't already going to harden one of the
1357 // address registers, queue it up to be hardened post-load. Notably,
1358 // even once hardened this won't introduce a useful dependency that
1359 // could prune out subsequent loads.
1361 !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1362 MI.getOperand(0).isReg() &&
1363 canHardenRegister(MI.getOperand(0).getReg()) &&
1364 !HardenedAddrRegs.count(BaseReg) &&
1365 !HardenedAddrRegs.count(IndexReg)) {
1366 HardenPostLoad.insert(&MI);
1367 HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1368 continue;
1369 }
1370
1371 // Record this instruction for address hardening and record its register
1372 // operands as being address-hardened.
1373 HardenLoadAddr.insert(&MI);
1374 if (BaseReg)
1375 HardenedAddrRegs.insert(BaseReg);
1376 if (IndexReg)
1377 HardenedAddrRegs.insert(IndexReg);
1378
1379 for (MachineOperand &Def : MI.defs())
1380 if (Def.isReg())
1381 LoadDepRegs.set(Def.getReg());
1382 }
1383
1384 // Now re-walk the instructions in the basic block, and apply whichever
1385 // hardening strategy we have elected. Note that we do this in a second
1386 // pass specifically so that we have the complete set of instructions for
1387 // which we will do post-load hardening and can defer it in certain
1388 // circumstances.
1389 for (MachineInstr &MI : MBB) {
1390 if (HardenLoads) {
1391 // We cannot both require hardening the def of a load and its address.
1392 assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1393 "Requested to harden both the address and def of a load!");
1394
1395 // Check if this is a load whose address needs to be hardened.
1396 if (HardenLoadAddr.erase(&MI)) {
1397 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1398 assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1399
1400 MachineOperand &BaseMO =
1401 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1402 MachineOperand &IndexMO =
1403 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1404 hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1405 continue;
1406 }
1407
1408 // Test if this instruction is one of our post load instructions (and
1409 // remove it from the set if so).
1410 if (HardenPostLoad.erase(&MI)) {
1411 assert(!MI.isCall() && "Must not try to post-load harden a call!");
1412
1413 // If this is a data-invariant load and there is no EFLAGS
1414 // interference, we want to try and sink any hardening as far as
1415 // possible.
1417 // Sink the instruction we'll need to harden as far as we can down
1418 // the graph.
1419 MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1420
1421 // If we managed to sink this instruction, update everything so we
1422 // harden that instruction when we reach it in the instruction
1423 // sequence.
1424 if (SunkMI != &MI) {
1425 // If in sinking there was no instruction needing to be hardened,
1426 // we're done.
1427 if (!SunkMI)
1428 continue;
1429
1430 // Otherwise, add this to the set of defs we harden.
1431 HardenPostLoad.insert(SunkMI);
1432 continue;
1433 }
1434 }
1435
1436 unsigned HardenedReg = hardenPostLoad(MI);
1437
1438 // Mark the resulting hardened register as such so we don't re-harden.
1439 AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1440
1441 continue;
1442 }
1443
1444 // Check for an indirect call or branch that may need its input hardened
1445 // even if we couldn't find the specific load used, or were able to
1446 // avoid hardening it for some reason. Note that here we cannot break
1447 // out afterward as we may still need to handle any call aspect of this
1448 // instruction.
1449 if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1450 hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1451 }
1452
1453 // After we finish hardening loads we handle interprocedural hardening if
1454 // enabled and relevant for this instruction.
1456 continue;
1457 if (!MI.isCall() && !MI.isReturn())
1458 continue;
1459
1460 // If this is a direct return (IE, not a tail call) just directly harden
1461 // it.
1462 if (MI.isReturn() && !MI.isCall()) {
1463 hardenReturnInstr(MI);
1464 continue;
1465 }
1466
1467 // Otherwise we have a call. We need to handle transferring the predicate
1468 // state into a call and recovering it after the call returns (unless this
1469 // is a tail call).
1470 assert(MI.isCall() && "Should only reach here for calls!");
1471 tracePredStateThroughCall(MI);
1472 }
1473
1474 HardenPostLoad.clear();
1475 HardenLoadAddr.clear();
1476 HardenedAddrRegs.clear();
1477 AddrRegToHardenedReg.clear();
1478
1479 // Currently, we only track data-dependent loads within a basic block.
1480 // FIXME: We should see if this is necessary or if we could be more
1481 // aggressive here without opening up attack avenues.
1482 LoadDepRegs.clear();
1483 }
1484}
1485
1486/// Save EFLAGS into the returned GPR. This can in turn be restored with
1487/// `restoreEFLAGS`.
1488///
1489/// Note that LLVM can only lower very simple patterns of saved and restored
1490/// EFLAGS registers. The restore should always be within the same basic block
1491/// as the save so that no PHI nodes are inserted.
1492unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
1494 const DebugLoc &Loc) {
1495 // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1496 // what instruction selection does.
1497 Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1498 // We directly copy the FLAGS register and rely on later lowering to clean
1499 // this up into the appropriate setCC instructions.
1500 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1501 ++NumInstsInserted;
1502 return Reg;
1503}
1504
1505/// Restore EFLAGS from the provided GPR. This should be produced by
1506/// `saveEFLAGS`.
1507///
1508/// This must be done within the same basic block as the save in order to
1509/// reliably lower.
1510void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1512 const DebugLoc &Loc, Register Reg) {
1513 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1514 ++NumInstsInserted;
1515}
1516
1517/// Takes the current predicate state (in a register) and merges it into the
1518/// stack pointer. The state is essentially a single bit, but we merge this in
1519/// a way that won't form non-canonical pointers and also will be preserved
1520/// across normal stack adjustments.
1521void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1523 const DebugLoc &Loc, unsigned PredStateReg) {
1524 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1525 // FIXME: This hard codes a shift distance based on the number of bits needed
1526 // to stay canonical on 64-bit. We should compute this somehow and support
1527 // 32-bit as part of that.
1528 auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1529 .addReg(PredStateReg, RegState::Kill)
1530 .addImm(47);
1531 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1532 ++NumInstsInserted;
1533 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1534 .addReg(X86::RSP)
1535 .addReg(TmpReg, RegState::Kill);
1536 OrI->addRegisterDead(X86::EFLAGS, TRI);
1537 ++NumInstsInserted;
1538}
1539
1540/// Extracts the predicate state stored in the high bits of the stack pointer.
1541unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1543 const DebugLoc &Loc) {
1544 Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1545 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1546
1547 // We know that the stack pointer will have any preserved predicate state in
1548 // its high bit. We just want to smear this across the other bits. Turns out,
1549 // this is exactly what an arithmetic right shift does.
1550 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1551 .addReg(X86::RSP);
1552 auto ShiftI =
1553 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1554 .addReg(TmpReg, RegState::Kill)
1555 .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1556 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1557 ++NumInstsInserted;
1558
1559 return PredStateReg;
1560}
1561
1562void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1563 MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1564 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
1565 MachineBasicBlock &MBB = *MI.getParent();
1566 const DebugLoc &Loc = MI.getDebugLoc();
1567
1568 // Check if EFLAGS are alive by seeing if there is a def of them or they
1569 // live-in, and then seeing if that def is in turn used.
1570 bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1571
1573
1574 if (BaseMO.isFI()) {
1575 // A frame index is never a dynamically controllable load, so only
1576 // harden it if we're covering fixed address loads as well.
1577 LLVM_DEBUG(
1578 dbgs() << " Skipping hardening base of explicit stack frame load: ";
1579 MI.dump(); dbgs() << "\n");
1580 } else if (BaseMO.getReg() == X86::RSP) {
1581 // Some idempotent atomic operations are lowered directly to a locked
1582 // OR with 0 to the top of stack(or slightly offset from top) which uses an
1583 // explicit RSP register as the base.
1584 assert(IndexMO.getReg() == X86::NoRegister &&
1585 "Explicit RSP access with dynamic index!");
1586 LLVM_DEBUG(
1587 dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1588 } else if (BaseMO.getReg() == X86::RIP ||
1589 BaseMO.getReg() == X86::NoRegister) {
1590 // For both RIP-relative addressed loads or absolute loads, we cannot
1591 // meaningfully harden them because the address being loaded has no
1592 // dynamic component.
1593 //
1594 // FIXME: When using a segment base (like TLS does) we end up with the
1595 // dynamic address being the base plus -1 because we can't mutate the
1596 // segment register here. This allows the signed 32-bit offset to point at
1597 // valid segment-relative addresses and load them successfully.
1598 LLVM_DEBUG(
1599 dbgs() << " Cannot harden base of "
1600 << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1601 << " address in a load!");
1602 } else {
1603 assert(BaseMO.isReg() &&
1604 "Only allowed to have a frame index or register base.");
1605 HardenOpRegs.push_back(&BaseMO);
1606 }
1607
1608 if (IndexMO.getReg() != X86::NoRegister &&
1609 (HardenOpRegs.empty() ||
1610 HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1611 HardenOpRegs.push_back(&IndexMO);
1612
1613 assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1614 "Should have exactly one or two registers to harden!");
1615 assert((HardenOpRegs.size() == 1 ||
1616 HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1617 "Should not have two of the same registers!");
1618
1619 // Remove any registers that have alreaded been checked.
1620 llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1621 // See if this operand's register has already been checked.
1622 auto It = AddrRegToHardenedReg.find(Op->getReg());
1623 if (It == AddrRegToHardenedReg.end())
1624 // Not checked, so retain this one.
1625 return false;
1626
1627 // Otherwise, we can directly update this operand and remove it.
1628 Op->setReg(It->second);
1629 return true;
1630 });
1631 // If there are none left, we're done.
1632 if (HardenOpRegs.empty())
1633 return;
1634
1635 // Compute the current predicate state.
1636 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1637
1638 auto InsertPt = MI.getIterator();
1639
1640 // If EFLAGS are live and we don't have access to instructions that avoid
1641 // clobbering EFLAGS we need to save and restore them. This in turn makes
1642 // the EFLAGS no longer live.
1643 unsigned FlagsReg = 0;
1644 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1645 EFLAGSLive = false;
1646 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1647 }
1648
1649 for (MachineOperand *Op : HardenOpRegs) {
1650 Register OpReg = Op->getReg();
1651 auto *OpRC = MRI->getRegClass(OpReg);
1652 Register TmpReg = MRI->createVirtualRegister(OpRC);
1653
1654 // If this is a vector register, we'll need somewhat custom logic to handle
1655 // hardening it.
1656 if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1657 OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1658 assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1659 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1660
1661 // Move our state into a vector register.
1662 // FIXME: We could skip this at the cost of longer encodings with AVX-512
1663 // but that doesn't seem likely worth it.
1664 Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1665 auto MovI =
1666 BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1667 .addReg(StateReg);
1668 (void)MovI;
1669 ++NumInstsInserted;
1670 LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1671
1672 // Broadcast it across the vector register.
1673 Register VBStateReg = MRI->createVirtualRegister(OpRC);
1674 auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1675 TII->get(Is128Bit ? X86::VPBROADCASTQrr
1676 : X86::VPBROADCASTQYrr),
1677 VBStateReg)
1678 .addReg(VStateReg);
1679 (void)BroadcastI;
1680 ++NumInstsInserted;
1681 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1682 dbgs() << "\n");
1683
1684 // Merge our potential poison state into the value with a vector or.
1685 auto OrI =
1686 BuildMI(MBB, InsertPt, Loc,
1687 TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1688 .addReg(VBStateReg)
1689 .addReg(OpReg);
1690 (void)OrI;
1691 ++NumInstsInserted;
1692 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1693 } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1694 OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1695 OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1696 assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1697 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1698 bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1699 if (Is128Bit || Is256Bit)
1700 assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1701
1702 // Broadcast our state into a vector register.
1703 Register VStateReg = MRI->createVirtualRegister(OpRC);
1704 unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1705 : Is256Bit ? X86::VPBROADCASTQrZ256rr
1706 : X86::VPBROADCASTQrZrr;
1707 auto BroadcastI =
1708 BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1709 .addReg(StateReg);
1710 (void)BroadcastI;
1711 ++NumInstsInserted;
1712 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1713 dbgs() << "\n");
1714
1715 // Merge our potential poison state into the value with a vector or.
1716 unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1717 : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1718 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1719 .addReg(VStateReg)
1720 .addReg(OpReg);
1721 (void)OrI;
1722 ++NumInstsInserted;
1723 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1724 } else {
1725 // FIXME: Need to support GR32 here for 32-bit code.
1726 assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1727 "Not a supported register class for address hardening!");
1728
1729 if (!EFLAGSLive) {
1730 // Merge our potential poison state into the value with an or.
1731 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1732 .addReg(StateReg)
1733 .addReg(OpReg);
1734 OrI->addRegisterDead(X86::EFLAGS, TRI);
1735 ++NumInstsInserted;
1736 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1737 } else {
1738 // We need to avoid touching EFLAGS so shift out all but the least
1739 // significant bit using the instruction that doesn't update flags.
1740 auto ShiftI =
1741 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1742 .addReg(OpReg)
1743 .addReg(StateReg);
1744 (void)ShiftI;
1745 ++NumInstsInserted;
1746 LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1747 dbgs() << "\n");
1748 }
1749 }
1750
1751 // Record this register as checked and update the operand.
1752 assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1753 "Should not have checked this register yet!");
1754 AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1755 Op->setReg(TmpReg);
1756 ++NumAddrRegsHardened;
1757 }
1758
1759 // And restore the flags if needed.
1760 if (FlagsReg)
1761 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1762}
1763
1764MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1765 MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1767 "Cannot get here with a non-invariant load!");
1768 assert(!isEFLAGSDefLive(InitialMI) &&
1769 "Cannot get here with a data invariant load "
1770 "that interferes with EFLAGS!");
1771
1772 // See if we can sink hardening the loaded value.
1773 auto SinkCheckToSingleUse =
1774 [&](MachineInstr &MI) -> std::optional<MachineInstr *> {
1775 Register DefReg = MI.getOperand(0).getReg();
1776
1777 // We need to find a single use which we can sink the check. We can
1778 // primarily do this because many uses may already end up checked on their
1779 // own.
1780 MachineInstr *SingleUseMI = nullptr;
1781 for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1782 // If we're already going to harden this use, it is data invariant, it
1783 // does not interfere with EFLAGS, and within our block.
1784 if (HardenedInstrs.count(&UseMI)) {
1786 // If we've already decided to harden a non-load, we must have sunk
1787 // some other post-load hardened instruction to it and it must itself
1788 // be data-invariant.
1790 "Data variant instruction being hardened!");
1791 continue;
1792 }
1793
1794 // Otherwise, this is a load and the load component can't be data
1795 // invariant so check how this register is being used.
1796 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(UseMI);
1797 assert(MemRefBeginIdx >= 0 &&
1798 "Should always have mem references here!");
1799
1800 MachineOperand &BaseMO =
1801 UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1802 MachineOperand &IndexMO =
1803 UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1804 if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1805 (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1806 // The load uses the register as part of its address making it not
1807 // invariant.
1808 return {};
1809
1810 continue;
1811 }
1812
1813 if (SingleUseMI)
1814 // We already have a single use, this would make two. Bail.
1815 return {};
1816
1817 // If this single use isn't data invariant, isn't in this block, or has
1818 // interfering EFLAGS, we can't sink the hardening to it.
1819 if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1821 return {};
1822
1823 // If this instruction defines multiple registers bail as we won't harden
1824 // all of them.
1825 if (UseMI.getDesc().getNumDefs() > 1)
1826 return {};
1827
1828 // If this register isn't a virtual register we can't walk uses of sanely,
1829 // just bail. Also check that its register class is one of the ones we
1830 // can harden.
1831 Register UseDefReg = UseMI.getOperand(0).getReg();
1832 if (!canHardenRegister(UseDefReg))
1833 return {};
1834
1835 SingleUseMI = &UseMI;
1836 }
1837
1838 // If SingleUseMI is still null, there is no use that needs its own
1839 // checking. Otherwise, it is the single use that needs checking.
1840 return {SingleUseMI};
1841 };
1842
1843 MachineInstr *MI = &InitialMI;
1844 while (std::optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1845 // Update which MI we're checking now.
1846 MI = *SingleUse;
1847 if (!MI)
1848 break;
1849 }
1850
1851 return MI;
1852}
1853
1854bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
1855 // We only support hardening virtual registers.
1856 if (!Reg.isVirtual())
1857 return false;
1858
1859 auto *RC = MRI->getRegClass(Reg);
1860 int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1861 if (RegBytes > 8)
1862 // We don't support post-load hardening of vectors.
1863 return false;
1864
1865 unsigned RegIdx = Log2_32(RegBytes);
1866 assert(RegIdx < 4 && "Unsupported register size");
1867
1868 // If this register class is explicitly constrained to a class that doesn't
1869 // require REX prefix, we may not be able to satisfy that constraint when
1870 // emitting the hardening instructions, so bail out here.
1871 // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1872 // end up both with a NOREX and REX-only register as operands to the hardening
1873 // instructions. It would be better to fix that code to handle this situation
1874 // rather than hack around it in this way.
1875 const TargetRegisterClass *NOREXRegClasses[] = {
1876 &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1877 &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1878 if (RC == NOREXRegClasses[RegIdx])
1879 return false;
1880
1881 const TargetRegisterClass *GPRRegClasses[] = {
1882 &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1883 &X86::GR64RegClass};
1884 return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1885}
1886
1887/// Harden a value in a register.
1888///
1889/// This is the low-level logic to fully harden a value sitting in a register
1890/// against leaking during speculative execution.
1891///
1892/// Unlike hardening an address that is used by a load, this routine is required
1893/// to hide *all* incoming bits in the register.
1894///
1895/// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1896/// larger than the predicate state register. FIXME: We should support vector
1897/// registers here by broadcasting the predicate state.
1898///
1899/// The new, hardened virtual register is returned. It will have the same
1900/// register class as `Reg`.
1901unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
1903 const DebugLoc &Loc) {
1904 assert(canHardenRegister(Reg) && "Cannot harden this register!");
1905
1906 auto *RC = MRI->getRegClass(Reg);
1907 int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1908 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1909 assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
1910 "Unknown register size");
1911
1912 // FIXME: Need to teach this about 32-bit mode.
1913 if (Bytes != 8) {
1914 unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1915 unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1916 Register NarrowStateReg = MRI->createVirtualRegister(RC);
1917 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1918 .addReg(StateReg, 0, SubRegImm);
1919 StateReg = NarrowStateReg;
1920 }
1921
1922 unsigned FlagsReg = 0;
1923 if (isEFLAGSLive(MBB, InsertPt, *TRI))
1924 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1925
1926 Register NewReg = MRI->createVirtualRegister(RC);
1927 unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1928 unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1929 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1930 .addReg(StateReg)
1931 .addReg(Reg);
1932 OrI->addRegisterDead(X86::EFLAGS, TRI);
1933 ++NumInstsInserted;
1934 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1935
1936 if (FlagsReg)
1937 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1938
1939 return NewReg;
1940}
1941
1942/// Harden a load by hardening the loaded value in the defined register.
1943///
1944/// We can harden a non-leaking load into a register without touching the
1945/// address by just hiding all of the loaded bits during misspeculation. We use
1946/// an `or` instruction to do this because we set up our poison value as all
1947/// ones. And the goal is just for the loaded bits to not be exposed to
1948/// execution and coercing them to one is sufficient.
1949///
1950/// Returns the newly hardened register.
1951unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
1952 MachineBasicBlock &MBB = *MI.getParent();
1953 const DebugLoc &Loc = MI.getDebugLoc();
1954
1955 auto &DefOp = MI.getOperand(0);
1956 Register OldDefReg = DefOp.getReg();
1957 auto *DefRC = MRI->getRegClass(OldDefReg);
1958
1959 // Because we want to completely replace the uses of this def'ed value with
1960 // the hardened value, create a dedicated new register that will only be used
1961 // to communicate the unhardened value to the hardening.
1962 Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1963 DefOp.setReg(UnhardenedReg);
1964
1965 // Now harden this register's value, getting a hardened reg that is safe to
1966 // use. Note that we insert the instructions to compute this *after* the
1967 // defining instruction, not before it.
1968 unsigned HardenedReg = hardenValueInRegister(
1969 UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1970
1971 // Finally, replace the old register (which now only has the uses of the
1972 // original def) with the hardened register.
1973 MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1974
1975 ++NumPostLoadRegsHardened;
1976 return HardenedReg;
1977}
1978
1979/// Harden a return instruction.
1980///
1981/// Returns implicitly perform a load which we need to harden. Without hardening
1982/// this load, an attacker my speculatively write over the return address to
1983/// steer speculation of the return to an attacker controlled address. This is
1984/// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1985/// this paper:
1986/// https://people.csail.mit.edu/vlk/spectre11.pdf
1987///
1988/// We can harden this by introducing an LFENCE that will delay any load of the
1989/// return address until prior instructions have retired (and thus are not being
1990/// speculated), or we can harden the address used by the implicit load: the
1991/// stack pointer.
1992///
1993/// If we are not using an LFENCE, hardening the stack pointer has an additional
1994/// benefit: it allows us to pass the predicate state accumulated in this
1995/// function back to the caller. In the absence of a BCBS attack on the return,
1996/// the caller will typically be resumed and speculatively executed due to the
1997/// Return Stack Buffer (RSB) prediction which is very accurate and has a high
1998/// priority. It is possible that some code from the caller will be executed
1999/// speculatively even during a BCBS-attacked return until the steering takes
2000/// effect. Whenever this happens, the caller can recover the (poisoned)
2001/// predicate state from the stack pointer and continue to harden loads.
2002void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
2003 MachineBasicBlock &MBB = *MI.getParent();
2004 const DebugLoc &Loc = MI.getDebugLoc();
2005 auto InsertPt = MI.getIterator();
2006
2007 if (FenceCallAndRet)
2008 // No need to fence here as we'll fence at the return site itself. That
2009 // handles more cases than we can handle here.
2010 return;
2011
2012 // Take our predicate state, shift it to the high 17 bits (so that we keep
2013 // pointers canonical) and merge it into RSP. This will allow the caller to
2014 // extract it when we return (speculatively).
2015 mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2016}
2017
2018/// Trace the predicate state through a call.
2019///
2020/// There are several layers of this needed to handle the full complexity of
2021/// calls.
2022///
2023/// First, we need to send the predicate state into the called function. We do
2024/// this by merging it into the high bits of the stack pointer.
2025///
2026/// For tail calls, this is all we need to do.
2027///
2028/// For calls where we might return and resume the control flow, we need to
2029/// extract the predicate state from the high bits of the stack pointer after
2030/// control returns from the called function.
2031///
2032/// We also need to verify that we intended to return to this location in the
2033/// code. An attacker might arrange for the processor to mispredict the return
2034/// to this valid but incorrect return address in the program rather than the
2035/// correct one. See the paper on this attack, called "ret2spec" by the
2036/// researchers, here:
2037/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2038///
2039/// The way we verify that we returned to the correct location is by preserving
2040/// the expected return address across the call. One technique involves taking
2041/// advantage of the red-zone to load the return address from `8(%rsp)` where it
2042/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2043/// directly save the address into a register that will be preserved across the
2044/// call. We compare this intended return address against the address
2045/// immediately following the call (the observed return address). If these
2046/// mismatch, we have detected misspeculation and can poison our predicate
2047/// state.
2048void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
2049 MachineInstr &MI) {
2050 MachineBasicBlock &MBB = *MI.getParent();
2051 MachineFunction &MF = *MBB.getParent();
2052 auto InsertPt = MI.getIterator();
2053 const DebugLoc &Loc = MI.getDebugLoc();
2054
2055 if (FenceCallAndRet) {
2056 if (MI.isReturn())
2057 // Tail call, we don't return to this function.
2058 // FIXME: We should also handle noreturn calls.
2059 return;
2060
2061 // We don't need to fence before the call because the function should fence
2062 // in its entry. However, we do need to fence after the call returns.
2063 // Fencing before the return doesn't correctly handle cases where the return
2064 // itself is mispredicted.
2065 BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2066 ++NumInstsInserted;
2067 ++NumLFENCEsInserted;
2068 return;
2069 }
2070
2071 // First, we transfer the predicate state into the called function by merging
2072 // it into the stack pointer. This will kill the current def of the state.
2073 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2074 mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2075
2076 // If this call is also a return, it is a tail call and we don't need anything
2077 // else to handle it so just return. Also, if there are no further
2078 // instructions and no successors, this call does not return so we can also
2079 // bail.
2080 if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2081 return;
2082
2083 // Create a symbol to track the return address and attach it to the call
2084 // machine instruction. We will lower extra symbols attached to call
2085 // instructions as label immediately following the call.
2086 MCSymbol *RetSymbol =
2087 MF.getContext().createTempSymbol("slh_ret_addr",
2088 /*AlwaysAddSuffix*/ true);
2089 MI.setPostInstrSymbol(MF, RetSymbol);
2090
2091 const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2092 unsigned ExpectedRetAddrReg = 0;
2093
2094 // If we have no red zones or if the function returns twice (possibly without
2095 // using the `ret` instruction) like setjmp, we need to save the expected
2096 // return address prior to the call.
2097 if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2098 MF.exposesReturnsTwice()) {
2099 // If we don't have red zones, we need to compute the expected return
2100 // address prior to the call and store it in a register that lives across
2101 // the call.
2102 //
2103 // In some ways, this is doubly satisfying as a mitigation because it will
2104 // also successfully detect stack smashing bugs in some cases (typically,
2105 // when a callee-saved register is used and the callee doesn't push it onto
2106 // the stack). But that isn't our primary goal, so we only use it as
2107 // a fallback.
2108 //
2109 // FIXME: It isn't clear that this is reliable in the face of
2110 // rematerialization in the register allocator. We somehow need to force
2111 // that to not occur for this particular instruction, and instead to spill
2112 // or otherwise preserve the value computed *prior* to the call.
2113 //
2114 // FIXME: It is even less clear why MachineCSE can't just fold this when we
2115 // end up having to use identical instructions both before and after the
2116 // call to feed the comparison.
2117 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2118 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2119 !Subtarget->isPositionIndependent()) {
2120 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2121 .addSym(RetSymbol);
2122 } else {
2123 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2124 .addReg(/*Base*/ X86::RIP)
2125 .addImm(/*Scale*/ 1)
2126 .addReg(/*Index*/ 0)
2127 .addSym(RetSymbol)
2128 .addReg(/*Segment*/ 0);
2129 }
2130 }
2131
2132 // Step past the call to handle when it returns.
2133 ++InsertPt;
2134
2135 // If we didn't pre-compute the expected return address into a register, then
2136 // red zones are enabled and the return address is still available on the
2137 // stack immediately after the call. As the very first instruction, we load it
2138 // into a register.
2139 if (!ExpectedRetAddrReg) {
2140 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2141 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2142 .addReg(/*Base*/ X86::RSP)
2143 .addImm(/*Scale*/ 1)
2144 .addReg(/*Index*/ 0)
2145 .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2146 // the return address is 8-bytes past it.
2147 .addReg(/*Segment*/ 0);
2148 }
2149
2150 // Now we extract the callee's predicate state from the stack pointer.
2151 unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2152
2153 // Test the expected return address against our actual address. If we can
2154 // form this basic block's address as an immediate, this is easy. Otherwise
2155 // we compute it.
2156 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2157 !Subtarget->isPositionIndependent()) {
2158 // FIXME: Could we fold this with the load? It would require careful EFLAGS
2159 // management.
2160 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2161 .addReg(ExpectedRetAddrReg, RegState::Kill)
2162 .addSym(RetSymbol);
2163 } else {
2164 Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2165 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2166 .addReg(/*Base*/ X86::RIP)
2167 .addImm(/*Scale*/ 1)
2168 .addReg(/*Index*/ 0)
2169 .addSym(RetSymbol)
2170 .addReg(/*Segment*/ 0);
2171 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2172 .addReg(ExpectedRetAddrReg, RegState::Kill)
2173 .addReg(ActualRetAddrReg, RegState::Kill);
2174 }
2175
2176 // Now conditionally update the predicate state we just extracted if we ended
2177 // up at a different return address than expected.
2178 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2179 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2180
2181 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2182 auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2183 .addReg(NewStateReg, RegState::Kill)
2184 .addReg(PS->PoisonReg)
2186 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
2187 ++NumInstsInserted;
2188 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2189
2190 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2191}
2192
2193/// An attacker may speculatively store over a value that is then speculatively
2194/// loaded and used as the target of an indirect call or jump instruction. This
2195/// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2196/// in this paper:
2197/// https://people.csail.mit.edu/vlk/spectre11.pdf
2198///
2199/// When this happens, the speculative execution of the call or jump will end up
2200/// being steered to this attacker controlled address. While most such loads
2201/// will be adequately hardened already, we want to ensure that they are
2202/// definitively treated as needing post-load hardening. While address hardening
2203/// is sufficient to prevent secret data from leaking to the attacker, it may
2204/// not be sufficient to prevent an attacker from steering speculative
2205/// execution. We forcibly unfolded all relevant loads above and so will always
2206/// have an opportunity to post-load harden here, we just need to scan for cases
2207/// not already flagged and add them.
2208void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
2210 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
2211 switch (MI.getOpcode()) {
2212 case X86::FARCALL16m:
2213 case X86::FARCALL32m:
2214 case X86::FARCALL64m:
2215 case X86::FARJMP16m:
2216 case X86::FARJMP32m:
2217 case X86::FARJMP64m:
2218 // We don't need to harden either far calls or far jumps as they are
2219 // safe from Spectre.
2220 return;
2221
2222 default:
2223 break;
2224 }
2225
2226 // We should never see a loading instruction at this point, as those should
2227 // have been unfolded.
2228 assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2229
2230 // If the first operand isn't a register, this is a branch or call
2231 // instruction with an immediate operand which doesn't need to be hardened.
2232 if (!MI.getOperand(0).isReg())
2233 return;
2234
2235 // For all of these, the target register is the first operand of the
2236 // instruction.
2237 auto &TargetOp = MI.getOperand(0);
2238 Register OldTargetReg = TargetOp.getReg();
2239
2240 // Try to lookup a hardened version of this register. We retain a reference
2241 // here as we want to update the map to track any newly computed hardened
2242 // register.
2243 unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2244
2245 // If we don't have a hardened register yet, compute one. Otherwise, just use
2246 // the already hardened register.
2247 //
2248 // FIXME: It is a little suspect that we use partially hardened registers that
2249 // only feed addresses. The complexity of partial hardening with SHRX
2250 // continues to pile up. Should definitively measure its value and consider
2251 // eliminating it.
2252 if (!HardenedTargetReg)
2253 HardenedTargetReg = hardenValueInRegister(
2254 OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2255
2256 // Set the target operand to the hardened register.
2257 TargetOp.setReg(HardenedTargetReg);
2258
2259 ++NumCallsOrJumpsHardened;
2260}
2261
2262INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
2263 "X86 speculative load hardener", false, false)
2264INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
2265 "X86 speculative load hardener", false, false)
2266
2268 return new X86SpeculativeLoadHardeningPass();
2269}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
AMDGPU Mark last scratch load
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
Memory SSA
Definition: MemorySSA.cpp:72
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the SparseBitVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
static const TargetRegisterClass * getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
static bool isEFLAGSDefLive(const MachineInstr &MI)
X86 speculative load hardener
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
#define PASS_KEY
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Definition: MCContext.cpp:345
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
bool isEHPad() const
Returns true if the block is a landing pad.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
reverse_instr_iterator instr_rbegin()
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
void splitSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New, bool NormalizeSuccProbs=false)
Split the old successor into old plus new and updates the probability info.
iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:980
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:988
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
MachineOperand class - Representation of each machine instruction operand.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
void clear()
Definition: SmallSet.h:204
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
void set(unsigned Idx)
bool test(unsigned Idx) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
CodeModel::Model getCodeModel() const
Returns the code model.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:844
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
CondCode getCondFromBranch(const MachineInstr &MI)
@ AddrIndexReg
Definition: X86BaseInfo.h:31
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false, bool HasNDD=false)
Return a cmov opcode for the given register size in bytes, and operand type.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2055
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
FunctionPass * createX86SpeculativeLoadHardeningPass()
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099