Bug Summary

File:lib/Target/X86/X86SpeculativeLoadHardening.cpp
Warning:line 1867, column 3
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86SpeculativeLoadHardening.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn337657/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86 -I /build/llvm-toolchain-snapshot-7~svn337657/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn337657/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/lib/gcc/x86_64-linux-gnu/8/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn337657/build-llvm/lib/Target/X86 -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-07-23-043044-26795-1 -x c++ /build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp -faddrsig
1//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10///
11/// Provide a pass which mitigates speculative execution attacks which operate
12/// by speculating incorrectly past some predicate (a type check, bounds check,
13/// or other condition) to reach a load with invalid inputs and leak the data
14/// accessed by that load using a side channel out of the speculative domain.
15///
16/// For details on the attacks, see the first variant in both the Project Zero
17/// writeup and the Spectre paper:
18/// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
19/// https://spectreattack.com/spectre.pdf
20///
21//===----------------------------------------------------------------------===//
22
23#include "X86.h"
24#include "X86InstrBuilder.h"
25#include "X86InstrInfo.h"
26#include "X86Subtarget.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/Optional.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/ScopeExit.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/SparseBitVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/CodeGen/MachineBasicBlock.h"
38#include "llvm/CodeGen/MachineConstantPool.h"
39#include "llvm/CodeGen/MachineFunction.h"
40#include "llvm/CodeGen/MachineFunctionPass.h"
41#include "llvm/CodeGen/MachineInstr.h"
42#include "llvm/CodeGen/MachineInstrBuilder.h"
43#include "llvm/CodeGen/MachineModuleInfo.h"
44#include "llvm/CodeGen/MachineOperand.h"
45#include "llvm/CodeGen/MachineRegisterInfo.h"
46#include "llvm/CodeGen/MachineSSAUpdater.h"
47#include "llvm/CodeGen/TargetInstrInfo.h"
48#include "llvm/CodeGen/TargetRegisterInfo.h"
49#include "llvm/CodeGen/TargetSchedule.h"
50#include "llvm/CodeGen/TargetSubtargetInfo.h"
51#include "llvm/IR/DebugLoc.h"
52#include "llvm/MC/MCSchedule.h"
53#include "llvm/Pass.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/raw_ostream.h"
57#include <algorithm>
58#include <cassert>
59#include <iterator>
60#include <utility>
61
62using namespace llvm;
63
64#define PASS_KEY"x86-speculative-load-hardening" "x86-speculative-load-hardening"
65#define DEBUG_TYPE"x86-speculative-load-hardening" PASS_KEY"x86-speculative-load-hardening"
66
67STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced")static llvm::Statistic NumCondBranchesTraced = {"x86-speculative-load-hardening"
, "NumCondBranchesTraced", "Number of conditional branches traced"
, {0}, {false}}
;
68STATISTIC(NumBranchesUntraced, "Number of branches unable to trace")static llvm::Statistic NumBranchesUntraced = {"x86-speculative-load-hardening"
, "NumBranchesUntraced", "Number of branches unable to trace"
, {0}, {false}}
;
69STATISTIC(NumAddrRegsHardened,static llvm::Statistic NumAddrRegsHardened = {"x86-speculative-load-hardening"
, "NumAddrRegsHardened", "Number of address mode used registers hardaned"
, {0}, {false}}
70 "Number of address mode used registers hardaned")static llvm::Statistic NumAddrRegsHardened = {"x86-speculative-load-hardening"
, "NumAddrRegsHardened", "Number of address mode used registers hardaned"
, {0}, {false}}
;
71STATISTIC(NumPostLoadRegsHardened,static llvm::Statistic NumPostLoadRegsHardened = {"x86-speculative-load-hardening"
, "NumPostLoadRegsHardened", "Number of post-load register values hardened"
, {0}, {false}}
72 "Number of post-load register values hardened")static llvm::Statistic NumPostLoadRegsHardened = {"x86-speculative-load-hardening"
, "NumPostLoadRegsHardened", "Number of post-load register values hardened"
, {0}, {false}}
;
73STATISTIC(NumInstsInserted, "Number of instructions inserted")static llvm::Statistic NumInstsInserted = {"x86-speculative-load-hardening"
, "NumInstsInserted", "Number of instructions inserted", {0},
{false}}
;
74STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted")static llvm::Statistic NumLFENCEsInserted = {"x86-speculative-load-hardening"
, "NumLFENCEsInserted", "Number of lfence instructions inserted"
, {0}, {false}}
;
75
76static cl::opt<bool> HardenEdgesWithLFENCE(
77 PASS_KEY"x86-speculative-load-hardening" "-lfence",
78 cl::desc(
79 "Use LFENCE along each conditional edge to harden against speculative "
80 "loads rather than conditional movs and poisoned pointers."),
81 cl::init(false), cl::Hidden);
82
83static cl::opt<bool> EnablePostLoadHardening(
84 PASS_KEY"x86-speculative-load-hardening" "-post-load",
85 cl::desc("Harden the value loaded *after* it is loaded by "
86 "flushing the loaded bits to 1. This is hard to do "
87 "in general but can be done easily for GPRs."),
88 cl::init(true), cl::Hidden);
89
90static cl::opt<bool> FenceCallAndRet(
91 PASS_KEY"x86-speculative-load-hardening" "-fence-call-and-ret",
92 cl::desc("Use a full speculation fence to harden both call and ret edges "
93 "rather than a lighter weight mitigation."),
94 cl::init(false), cl::Hidden);
95
96static cl::opt<bool> HardenInterprocedurally(
97 PASS_KEY"x86-speculative-load-hardening" "-ip",
98 cl::desc("Harden interprocedurally by passing our state in and out of "
99 "functions in the high bits of the stack pointer."),
100 cl::init(true), cl::Hidden);
101
102static cl::opt<bool>
103 HardenLoads(PASS_KEY"x86-speculative-load-hardening" "-loads",
104 cl::desc("Sanitize loads from memory. When disable, no "
105 "significant security is provided."),
106 cl::init(true), cl::Hidden);
107
108namespace llvm {
109
110void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
111
112} // end namespace llvm
113
114namespace {
115
116class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
117public:
118 X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) {
119 initializeX86SpeculativeLoadHardeningPassPass(
120 *PassRegistry::getPassRegistry());
121 }
122
123 StringRef getPassName() const override {
124 return "X86 speculative load hardening";
125 }
126 bool runOnMachineFunction(MachineFunction &MF) override;
127 void getAnalysisUsage(AnalysisUsage &AU) const override;
128
129 /// Pass identification, replacement for typeid.
130 static char ID;
131
132private:
133 /// The information about a block's conditional terminators needed to trace
134 /// our predicate state through the exiting edges.
135 struct BlockCondInfo {
136 MachineBasicBlock *MBB;
137
138 // We mostly have one conditional branch, and in extremely rare cases have
139 // two. Three and more are so rare as to be unimportant for compile time.
140 SmallVector<MachineInstr *, 2> CondBrs;
141
142 MachineInstr *UncondBr;
143 };
144
145 /// Manages the predicate state traced through the program.
146 struct PredState {
147 unsigned InitialReg;
148 unsigned PoisonReg;
149
150 const TargetRegisterClass *RC;
151 MachineSSAUpdater SSA;
152
153 PredState(MachineFunction &MF, const TargetRegisterClass *RC)
154 : RC(RC), SSA(MF) {}
155 };
156
157 const X86Subtarget *Subtarget;
158 MachineRegisterInfo *MRI;
159 const X86InstrInfo *TII;
160 const TargetRegisterInfo *TRI;
161
162 Optional<PredState> PS;
163
164 void hardenEdgesWithLFENCE(MachineFunction &MF);
165
166 SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
167
168 SmallVector<MachineInstr *, 16>
169 tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
170
171 void checkAllLoads(MachineFunction &MF);
172
173 unsigned saveEFLAGS(MachineBasicBlock &MBB,
174 MachineBasicBlock::iterator InsertPt, DebugLoc Loc);
175 void restoreEFLAGS(MachineBasicBlock &MBB,
176 MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
177 unsigned OFReg);
178
179 void mergePredStateIntoSP(MachineBasicBlock &MBB,
180 MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
181 unsigned PredStateReg);
182 unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
183 MachineBasicBlock::iterator InsertPt,
184 DebugLoc Loc);
185
186 void
187 hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
188 MachineOperand &IndexMO,
189 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
190 MachineInstr *
191 sinkPostLoadHardenedInst(MachineInstr &MI,
192 SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
193 bool canHardenRegister(unsigned Reg);
194 void hardenPostLoad(MachineInstr &MI);
195 void hardenReturnInstr(MachineInstr &MI);
196};
197
198} // end anonymous namespace
199
200char X86SpeculativeLoadHardeningPass::ID = 0;
201
202void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
203 AnalysisUsage &AU) const {
204 MachineFunctionPass::getAnalysisUsage(AU);
205}
206
207static MachineBasicBlock &splitEdge(MachineBasicBlock &MBB,
208 MachineBasicBlock &Succ, int SuccCount,
209 MachineInstr *Br, MachineInstr *&UncondBr,
210 const X86InstrInfo &TII) {
211 assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!")(static_cast <bool> (!Succ.isEHPad() && "Shouldn't get edges to EH pads!"
) ? void (0) : __assert_fail ("!Succ.isEHPad() && \"Shouldn't get edges to EH pads!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 211, __extension__ __PRETTY_FUNCTION__))
;
212
213 MachineFunction &MF = *MBB.getParent();
214
215 MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
216
217 // We have to insert the new block immediately after the current one as we
218 // don't know what layout-successor relationships the successor has and we
219 // may not be able to (and generally don't want to) try to fix those up.
220 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
221
222 // Update the branch instruction if necessary.
223 if (Br) {
224 assert(Br->getOperand(0).getMBB() == &Succ &&(static_cast <bool> (Br->getOperand(0).getMBB() == &
Succ && "Didn't start with the right target!") ? void
(0) : __assert_fail ("Br->getOperand(0).getMBB() == &Succ && \"Didn't start with the right target!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 225, __extension__ __PRETTY_FUNCTION__))
225 "Didn't start with the right target!")(static_cast <bool> (Br->getOperand(0).getMBB() == &
Succ && "Didn't start with the right target!") ? void
(0) : __assert_fail ("Br->getOperand(0).getMBB() == &Succ && \"Didn't start with the right target!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 225, __extension__ __PRETTY_FUNCTION__))
;
226 Br->getOperand(0).setMBB(&NewMBB);
227
228 // If this successor was reached through a branch rather than fallthrough,
229 // we might have *broken* fallthrough and so need to inject a new
230 // unconditional branch.
231 if (!UncondBr) {
232 MachineBasicBlock &OldLayoutSucc =
233 *std::next(MachineFunction::iterator(&NewMBB));
234 assert(MBB.isSuccessor(&OldLayoutSucc) &&(static_cast <bool> (MBB.isSuccessor(&OldLayoutSucc
) && "Without an unconditional branch, the old layout successor should "
"be an actual successor!") ? void (0) : __assert_fail ("MBB.isSuccessor(&OldLayoutSucc) && \"Without an unconditional branch, the old layout successor should \" \"be an actual successor!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 236, __extension__ __PRETTY_FUNCTION__))
235 "Without an unconditional branch, the old layout successor should "(static_cast <bool> (MBB.isSuccessor(&OldLayoutSucc
) && "Without an unconditional branch, the old layout successor should "
"be an actual successor!") ? void (0) : __assert_fail ("MBB.isSuccessor(&OldLayoutSucc) && \"Without an unconditional branch, the old layout successor should \" \"be an actual successor!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 236, __extension__ __PRETTY_FUNCTION__))
236 "be an actual successor!")(static_cast <bool> (MBB.isSuccessor(&OldLayoutSucc
) && "Without an unconditional branch, the old layout successor should "
"be an actual successor!") ? void (0) : __assert_fail ("MBB.isSuccessor(&OldLayoutSucc) && \"Without an unconditional branch, the old layout successor should \" \"be an actual successor!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 236, __extension__ __PRETTY_FUNCTION__))
;
237 auto BrBuilder =
238 BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
239 // Update the unconditional branch now that we've added one.
240 UncondBr = &*BrBuilder;
241 }
242
243 // Insert unconditional "jump Succ" instruction in the new block if
244 // necessary.
245 if (!NewMBB.isLayoutSuccessor(&Succ)) {
246 SmallVector<MachineOperand, 4> Cond;
247 TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
248 }
249 } else {
250 assert(!UncondBr &&(static_cast <bool> (!UncondBr && "Cannot have a branchless successor and an unconditional branch!"
) ? void (0) : __assert_fail ("!UncondBr && \"Cannot have a branchless successor and an unconditional branch!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 251, __extension__ __PRETTY_FUNCTION__))
251 "Cannot have a branchless successor and an unconditional branch!")(static_cast <bool> (!UncondBr && "Cannot have a branchless successor and an unconditional branch!"
) ? void (0) : __assert_fail ("!UncondBr && \"Cannot have a branchless successor and an unconditional branch!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 251, __extension__ __PRETTY_FUNCTION__))
;
252 assert(NewMBB.isLayoutSuccessor(&Succ) &&(static_cast <bool> (NewMBB.isLayoutSuccessor(&Succ
) && "A non-branch successor must have been a layout successor before "
"and now is a layout successor of the new block.") ? void (0
) : __assert_fail ("NewMBB.isLayoutSuccessor(&Succ) && \"A non-branch successor must have been a layout successor before \" \"and now is a layout successor of the new block.\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 254, __extension__ __PRETTY_FUNCTION__))
253 "A non-branch successor must have been a layout successor before "(static_cast <bool> (NewMBB.isLayoutSuccessor(&Succ
) && "A non-branch successor must have been a layout successor before "
"and now is a layout successor of the new block.") ? void (0
) : __assert_fail ("NewMBB.isLayoutSuccessor(&Succ) && \"A non-branch successor must have been a layout successor before \" \"and now is a layout successor of the new block.\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 254, __extension__ __PRETTY_FUNCTION__))
254 "and now is a layout successor of the new block.")(static_cast <bool> (NewMBB.isLayoutSuccessor(&Succ
) && "A non-branch successor must have been a layout successor before "
"and now is a layout successor of the new block.") ? void (0
) : __assert_fail ("NewMBB.isLayoutSuccessor(&Succ) && \"A non-branch successor must have been a layout successor before \" \"and now is a layout successor of the new block.\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 254, __extension__ __PRETTY_FUNCTION__))
;
255 }
256
257 // If this is the only edge to the successor, we can just replace it in the
258 // CFG. Otherwise we need to add a new entry in the CFG for the new
259 // successor.
260 if (SuccCount == 1) {
261 MBB.replaceSuccessor(&Succ, &NewMBB);
262 } else {
263 MBB.splitSuccessor(&Succ, &NewMBB);
264 }
265
266 // Hook up the edge from the new basic block to the old successor in the CFG.
267 NewMBB.addSuccessor(&Succ);
268
269 // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
270 for (MachineInstr &MI : Succ) {
271 if (!MI.isPHI())
272 break;
273 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
274 OpIdx += 2) {
275 MachineOperand &OpV = MI.getOperand(OpIdx);
276 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
277 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!")(static_cast <bool> (OpMBB.isMBB() && "Block operand to a PHI is not a block!"
) ? void (0) : __assert_fail ("OpMBB.isMBB() && \"Block operand to a PHI is not a block!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 277, __extension__ __PRETTY_FUNCTION__))
;
278 if (OpMBB.getMBB() != &MBB)
279 continue;
280
281 // If this is the last edge to the succesor, just replace MBB in the PHI
282 if (SuccCount == 1) {
283 OpMBB.setMBB(&NewMBB);
284 break;
285 }
286
287 // Otherwise, append a new pair of operands for the new incoming edge.
288 MI.addOperand(MF, OpV);
289 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
290 break;
291 }
292 }
293
294 // Inherit live-ins from the successor
295 for (auto &LI : Succ.liveins())
296 NewMBB.addLiveIn(LI);
297
298 LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Split edge from '"
<< MBB.getName() << "' to '" << Succ.getName
() << "'.\n"; } } while (false)
299 << Succ.getName() << "'.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Split edge from '"
<< MBB.getName() << "' to '" << Succ.getName
() << "'.\n"; } } while (false)
;
300 return NewMBB;
301}
302
303/// Removing duplicate PHI operands to leave the PHI in a canonical and
304/// predictable form.
305///
306/// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
307/// isn't what you might expect. We may have multiple entries in PHI nodes for
308/// a single predecessor. This makes CFG-updating extremely complex, so here we
309/// simplify all PHI nodes to a model even simpler than the IR's model: exactly
310/// one entry per predecessor, regardless of how many edges there are.
311static void canonicalizePHIOperands(MachineFunction &MF) {
312 SmallPtrSet<MachineBasicBlock *, 4> Preds;
313 SmallVector<int, 4> DupIndices;
314 for (auto &MBB : MF)
315 for (auto &MI : MBB) {
316 if (!MI.isPHI())
317 break;
318
319 // First we scan the operands of the PHI looking for duplicate entries
320 // a particular predecessor. We retain the operand index of each duplicate
321 // entry found.
322 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
323 OpIdx += 2)
324 if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
325 DupIndices.push_back(OpIdx);
326
327 // Now walk the duplicate indices, removing both the block and value. Note
328 // that these are stored as a vector making this element-wise removal
329 // :w
330 // potentially quadratic.
331 //
332 // FIXME: It is really frustrating that we have to use a quadratic
333 // removal algorithm here. There should be a better way, but the use-def
334 // updates required make that impossible using the public API.
335 //
336 // Note that we have to process these backwards so that we don't
337 // invalidate other indices with each removal.
338 while (!DupIndices.empty()) {
339 int OpIdx = DupIndices.pop_back_val();
340 // Remove both the block and value operand, again in reverse order to
341 // preserve indices.
342 MI.RemoveOperand(OpIdx + 1);
343 MI.RemoveOperand(OpIdx);
344 }
345
346 Preds.clear();
347 }
348}
349
350/// Helper to scan a function for loads vulnerable to misspeculation that we
351/// want to harden.
352///
353/// We use this to avoid making changes to functions where there is nothing we
354/// need to do to harden against misspeculation.
355static bool hasVulnerableLoad(MachineFunction &MF) {
356 for (MachineBasicBlock &MBB : MF) {
357 for (MachineInstr &MI : MBB) {
358 // Loads within this basic block after an LFENCE are not at risk of
359 // speculatively executing with invalid predicates from prior control
360 // flow. So break out of this block but continue scanning the function.
361 if (MI.getOpcode() == X86::LFENCE)
362 break;
363
364 // Looking for loads only.
365 if (!MI.mayLoad())
366 continue;
367
368 // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
369 if (MI.getOpcode() == X86::MFENCE)
370 continue;
371
372 // We found a load.
373 return true;
374 }
375 }
376
377 // No loads found.
378 return false;
379}
380
381bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
382 MachineFunction &MF) {
383 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << "********** "
<< getPassName() << " : " << MF.getName() <<
" **********\n"; } } while (false)
384 << " **********\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << "********** "
<< getPassName() << " : " << MF.getName() <<
" **********\n"; } } while (false)
;
385
386 Subtarget = &MF.getSubtarget<X86Subtarget>();
387 MRI = &MF.getRegInfo();
388 TII = Subtarget->getInstrInfo();
389 TRI = Subtarget->getRegisterInfo();
390
391 // FIXME: Support for 32-bit.
392 PS.emplace(MF, &X86::GR64_NOSPRegClass);
393
394 if (MF.begin() == MF.end())
1
Taking false branch
395 // Nothing to do for a degenerate empty function...
396 return false;
397
398 // We support an alternative hardening technique based on a debug flag.
399 if (HardenEdgesWithLFENCE) {
2
Assuming the condition is false
3
Taking false branch
400 hardenEdgesWithLFENCE(MF);
401 return true;
402 }
403
404 // Create a dummy debug loc to use for all the generated code here.
405 DebugLoc Loc;
406
407 MachineBasicBlock &Entry = *MF.begin();
408 auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
409
410 // Do a quick scan to see if we have any checkable loads.
411 bool HasVulnerableLoad = hasVulnerableLoad(MF);
412
413 // See if we have any conditional branching blocks that we will need to trace
414 // predicate state through.
415 SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
416
417 // If we have no interesting conditions or loads, nothing to do here.
418 if (!HasVulnerableLoad && Infos.empty())
4
Taking false branch
419 return true;
420
421 // The poison value is required to be an all-ones value for many aspects of
422 // this mitigation.
423 const int PoisonVal = -1;
424 PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
425 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
426 .addImm(PoisonVal);
427 ++NumInstsInserted;
428
429 // If we have loads being hardened and we've asked for call and ret edges to
430 // get a full fence-based mitigation, inject that fence.
431 if (HasVulnerableLoad && FenceCallAndRet) {
432 // We need to insert an LFENCE at the start of the function to suspend any
433 // incoming misspeculation from the caller. This helps two-fold: the caller
434 // may not have been protected as this code has been, and this code gets to
435 // not take any specific action to protect across calls.
436 // FIXME: We could skip this for functions which unconditionally return
437 // a constant.
438 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
439 ++NumInstsInserted;
440 ++NumLFENCEsInserted;
441 }
442
443 // If we guarded the entry with an LFENCE and have no conditionals to protect
444 // in blocks, then we're done.
445 if (FenceCallAndRet && Infos.empty())
5
Assuming the condition is false
446 // We may have changed the function's code at this point to insert fences.
447 return true;
448
449 // For every basic block in the function which can b
450 if (HardenInterprocedurally && !FenceCallAndRet) {
6
Assuming the condition is true
7
Taking true branch
451 // Set up the predicate state by extracting it from the incoming stack
452 // pointer so we pick up any misspeculation in our caller.
453 PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
454 } else {
455 // Otherwise, just build the predicate state itself by zeroing a register
456 // as we don't need any initial state.
457 PS->InitialReg = MRI->createVirtualRegister(PS->RC);
458 unsigned PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
459 auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
460 PredStateSubReg);
461 ++NumInstsInserted;
462 MachineOperand *ZeroEFLAGSDefOp =
463 ZeroI->findRegisterDefOperand(X86::EFLAGS);
464 assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&(static_cast <bool> (ZeroEFLAGSDefOp && ZeroEFLAGSDefOp
->isImplicit() && "Must have an implicit def of EFLAGS!"
) ? void (0) : __assert_fail ("ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() && \"Must have an implicit def of EFLAGS!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 465, __extension__ __PRETTY_FUNCTION__))
465 "Must have an implicit def of EFLAGS!")(static_cast <bool> (ZeroEFLAGSDefOp && ZeroEFLAGSDefOp
->isImplicit() && "Must have an implicit def of EFLAGS!"
) ? void (0) : __assert_fail ("ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() && \"Must have an implicit def of EFLAGS!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 465, __extension__ __PRETTY_FUNCTION__))
;
466 ZeroEFLAGSDefOp->setIsDead(true);
467 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
468 PS->InitialReg)
469 .addImm(0)
470 .addReg(PredStateSubReg)
471 .addImm(X86::sub_32bit);
472 }
473
474 // We're going to need to trace predicate state throughout the function's
475 // CFG. Prepare for this by setting up our initial state of PHIs with unique
476 // predecessor entries and all the initial predicate state.
477 canonicalizePHIOperands(MF);
478
479 // Track the updated values in an SSA updater to rewrite into SSA form at the
480 // end.
481 PS->SSA.Initialize(PS->InitialReg);
482 PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
483
484 // Trace through the CFG.
485 auto CMovs = tracePredStateThroughCFG(MF, Infos);
486
487 // We may also enter basic blocks in this function via exception handling
488 // control flow. Here, if we are hardening interprocedurally, we need to
489 // re-capture the predicate state from the throwing code. In the Itanium ABI,
490 // the throw will always look like a call to __cxa_throw and will have the
491 // predicate state in the stack pointer, so extract fresh predicate state from
492 // the stack pointer and make it available in SSA.
493 // FIXME: Handle non-itanium ABI EH models.
494 if (HardenInterprocedurally) {
8
Assuming the condition is false
9
Taking false branch
495 for (MachineBasicBlock &MBB : MF) {
496 assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!")(static_cast <bool> (!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!"
) ? void (0) : __assert_fail ("!MBB.isEHScopeEntry() && \"Only Itanium ABI EH supported!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 496, __extension__ __PRETTY_FUNCTION__))
;
497 assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!")(static_cast <bool> (!MBB.isEHFuncletEntry() &&
"Only Itanium ABI EH supported!") ? void (0) : __assert_fail
("!MBB.isEHFuncletEntry() && \"Only Itanium ABI EH supported!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 497, __extension__ __PRETTY_FUNCTION__))
;
498 assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!")(static_cast <bool> (!MBB.isCleanupFuncletEntry() &&
"Only Itanium ABI EH supported!") ? void (0) : __assert_fail
("!MBB.isCleanupFuncletEntry() && \"Only Itanium ABI EH supported!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 498, __extension__ __PRETTY_FUNCTION__))
;
499 if (!MBB.isEHPad())
500 continue;
501 PS->SSA.AddAvailableValue(
502 &MBB,
503 extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
504 }
505 }
506
507 // Now check all of the loads using the predicate state.
508 checkAllLoads(MF);
10
Calling 'X86SpeculativeLoadHardeningPass::checkAllLoads'
509
510 // Now rewrite all the uses of the pred state using the SSA updater so that
511 // we track updates through the CFG.
512 for (MachineInstr *CMovI : CMovs)
513 for (MachineOperand &Op : CMovI->operands()) {
514 if (!Op.isReg() || Op.getReg() != PS->InitialReg)
515 continue;
516
517 PS->SSA.RewriteUse(Op);
518 }
519
520 // If we are hardening interprocedurally, find each returning block and
521 // protect the caller from being returned to through misspeculation.
522 if (HardenInterprocedurally)
523 for (MachineBasicBlock &MBB : MF) {
524 if (MBB.empty())
525 continue;
526
527 MachineInstr &MI = MBB.back();
528 if (!MI.isReturn())
529 continue;
530
531 hardenReturnInstr(MI);
532 }
533
534 LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << "Final speculative load hardened function:\n"
; MF.dump(); dbgs() << "\n"; MF.verify(this); } } while
(false)
535 dbgs() << "\n"; MF.verify(this))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << "Final speculative load hardened function:\n"
; MF.dump(); dbgs() << "\n"; MF.verify(this); } } while
(false)
;
536 return true;
537}
538
539/// Implements the naive hardening approach of putting an LFENCE after every
540/// potentially mis-predicted control flow construct.
541///
542/// We include this as an alternative mostly for the purpose of comparison. The
543/// performance impact of this is expected to be extremely severe and not
544/// practical for any real-world users.
545void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
546 MachineFunction &MF) {
547 // First, we scan the function looking for blocks that are reached along edges
548 // that we might want to harden.
549 SmallSetVector<MachineBasicBlock *, 8> Blocks;
550 for (MachineBasicBlock &MBB : MF) {
551 // If there are no or only one successor, nothing to do here.
552 if (MBB.succ_size() <= 1)
553 continue;
554
555 // Skip blocks unless their terminators start with a branch. Other
556 // terminators don't seem interesting for guarding against misspeculation.
557 auto TermIt = MBB.getFirstTerminator();
558 if (TermIt == MBB.end() || !TermIt->isBranch())
559 continue;
560
561 // Add all the non-EH-pad succossors to the blocks we want to harden. We
562 // skip EH pads because there isn't really a condition of interest on
563 // entering.
564 for (MachineBasicBlock *SuccMBB : MBB.successors())
565 if (!SuccMBB->isEHPad())
566 Blocks.insert(SuccMBB);
567 }
568
569 for (MachineBasicBlock *MBB : Blocks) {
570 auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
571 BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
572 ++NumInstsInserted;
573 ++NumLFENCEsInserted;
574 }
575}
576
577SmallVector<X86SpeculativeLoadHardeningPass::BlockCondInfo, 16>
578X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
579 SmallVector<BlockCondInfo, 16> Infos;
580
581 // Walk the function and build up a summary for each block's conditions that
582 // we need to trace through.
583 for (MachineBasicBlock &MBB : MF) {
584 // If there are no or only one successor, nothing to do here.
585 if (MBB.succ_size() <= 1)
586 continue;
587
588 // We want to reliably handle any conditional branch terminators in the
589 // MBB, so we manually analyze the branch. We can handle all of the
590 // permutations here, including ones that analyze branch cannot.
591 //
592 // The approach is to walk backwards across the terminators, resetting at
593 // any unconditional non-indirect branch, and track all conditional edges
594 // to basic blocks as well as the fallthrough or unconditional successor
595 // edge. For each conditional edge, we track the target and the opposite
596 // condition code in order to inject a "no-op" cmov into that successor
597 // that will harden the predicate. For the fallthrough/unconditional
598 // edge, we inject a separate cmov for each conditional branch with
599 // matching condition codes. This effectively implements an "and" of the
600 // condition flags, even if there isn't a single condition flag that would
601 // directly implement that. We don't bother trying to optimize either of
602 // these cases because if such an optimization is possible, LLVM should
603 // have optimized the conditional *branches* in that way already to reduce
604 // instruction count. This late, we simply assume the minimal number of
605 // branch instructions is being emitted and use that to guide our cmov
606 // insertion.
607
608 BlockCondInfo Info = {&MBB, {}, nullptr};
609
610 // Now walk backwards through the terminators and build up successors they
611 // reach and the conditions.
612 for (MachineInstr &MI : llvm::reverse(MBB)) {
613 // Once we've handled all the terminators, we're done.
614 if (!MI.isTerminator())
615 break;
616
617 // If we see a non-branch terminator, we can't handle anything so bail.
618 if (!MI.isBranch()) {
619 Info.CondBrs.clear();
620 break;
621 }
622
623 // If we see an unconditional branch, reset our state, clear any
624 // fallthrough, and set this is the "else" successor.
625 if (MI.getOpcode() == X86::JMP_1) {
626 Info.CondBrs.clear();
627 Info.UncondBr = &MI;
628 continue;
629 }
630
631 // If we get an invalid condition, we have an indirect branch or some
632 // other unanalyzable "fallthrough" case. We model this as a nullptr for
633 // the destination so we can still guard any conditional successors.
634 // Consider code sequences like:
635 // ```
636 // jCC L1
637 // jmpq *%rax
638 // ```
639 // We still want to harden the edge to `L1`.
640 if (X86::getCondFromBranchOpc(MI.getOpcode()) == X86::COND_INVALID) {
641 Info.CondBrs.clear();
642 Info.UncondBr = &MI;
643 continue;
644 }
645
646 // We have a vanilla conditional branch, add it to our list.
647 Info.CondBrs.push_back(&MI);
648 }
649 if (Info.CondBrs.empty()) {
650 ++NumBranchesUntraced;
651 LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << "WARNING: unable to secure successors of block:\n"
; MBB.dump(); } } while (false)
652 MBB.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << "WARNING: unable to secure successors of block:\n"
; MBB.dump(); } } while (false)
;
653 continue;
654 }
655
656 Infos.push_back(Info);
657 }
658
659 return Infos;
660}
661
662/// Trace the predicate state through the CFG, instrumenting each conditional
663/// branch such that misspeculation through an edge will poison the predicate
664/// state.
665///
666/// Returns the list of inserted CMov instructions so that they can have their
667/// uses of the predicate state rewritten into proper SSA form once it is
668/// complete.
669SmallVector<MachineInstr *, 16>
670X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
671 MachineFunction &MF, ArrayRef<BlockCondInfo> Infos) {
672 // Collect the inserted cmov instructions so we can rewrite their uses of the
673 // predicate state into SSA form.
674 SmallVector<MachineInstr *, 16> CMovs;
675
676 // Now walk all of the basic blocks looking for ones that end in conditional
677 // jumps where we need to update this register along each edge.
678 for (const BlockCondInfo &Info : Infos) {
679 MachineBasicBlock &MBB = *Info.MBB;
680 const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
681 MachineInstr *UncondBr = Info.UncondBr;
682
683 LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << "Tracing predicate through block: "
<< MBB.getName() << "\n"; } } while (false)
684 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << "Tracing predicate through block: "
<< MBB.getName() << "\n"; } } while (false)
;
685 ++NumCondBranchesTraced;
686
687 // Compute the non-conditional successor as either the target of any
688 // unconditional branch or the layout successor.
689 MachineBasicBlock *UncondSucc =
690 UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
691 ? UncondBr->getOperand(0).getMBB()
692 : nullptr)
693 : &*std::next(MachineFunction::iterator(&MBB));
694
695 // Count how many edges there are to any given successor.
696 SmallDenseMap<MachineBasicBlock *, int> SuccCounts;
697 if (UncondSucc)
698 ++SuccCounts[UncondSucc];
699 for (auto *CondBr : CondBrs)
700 ++SuccCounts[CondBr->getOperand(0).getMBB()];
701
702 // A lambda to insert cmov instructions into a block checking all of the
703 // condition codes in a sequence.
704 auto BuildCheckingBlockForSuccAndConds =
705 [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
706 MachineInstr *Br, MachineInstr *&UncondBr,
707 ArrayRef<X86::CondCode> Conds) {
708 // First, we split the edge to insert the checking block into a safe
709 // location.
710 auto &CheckingMBB =
711 (SuccCount == 1 && Succ.pred_size() == 1)
712 ? Succ
713 : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
714
715 bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
716 if (!LiveEFLAGS)
717 CheckingMBB.addLiveIn(X86::EFLAGS);
718
719 // Now insert the cmovs to implement the checks.
720 auto InsertPt = CheckingMBB.begin();
721 assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&(static_cast <bool> ((InsertPt == CheckingMBB.end() || !
InsertPt->isPHI()) && "Should never have a PHI in the initial checking block as it "
"always has a single predecessor!") ? void (0) : __assert_fail
("(InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && \"Should never have a PHI in the initial checking block as it \" \"always has a single predecessor!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 723, __extension__ __PRETTY_FUNCTION__))
722 "Should never have a PHI in the initial checking block as it "(static_cast <bool> ((InsertPt == CheckingMBB.end() || !
InsertPt->isPHI()) && "Should never have a PHI in the initial checking block as it "
"always has a single predecessor!") ? void (0) : __assert_fail
("(InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && \"Should never have a PHI in the initial checking block as it \" \"always has a single predecessor!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 723, __extension__ __PRETTY_FUNCTION__))
723 "always has a single predecessor!")(static_cast <bool> ((InsertPt == CheckingMBB.end() || !
InsertPt->isPHI()) && "Should never have a PHI in the initial checking block as it "
"always has a single predecessor!") ? void (0) : __assert_fail
("(InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && \"Should never have a PHI in the initial checking block as it \" \"always has a single predecessor!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 723, __extension__ __PRETTY_FUNCTION__))
;
724
725 // We will wire each cmov to each other, but need to start with the
726 // incoming pred state.
727 unsigned CurStateReg = PS->InitialReg;
728
729 for (X86::CondCode Cond : Conds) {
730 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
731 auto CMovOp = X86::getCMovFromCond(Cond, PredStateSizeInBytes);
732
733 unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
734 // Note that we intentionally use an empty debug location so that
735 // this picks up the preceding location.
736 auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
737 TII->get(CMovOp), UpdatedStateReg)
738 .addReg(CurStateReg)
739 .addReg(PS->PoisonReg);
740 // If this is the last cmov and the EFLAGS weren't originally
741 // live-in, mark them as killed.
742 if (!LiveEFLAGS && Cond == Conds.back())
743 CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
744
745 ++NumInstsInserted;
746 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting cmov: "
; CMovI->dump(); dbgs() << "\n"; } } while (false)
747 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting cmov: "
; CMovI->dump(); dbgs() << "\n"; } } while (false)
;
748
749 // The first one of the cmovs will be using the top level
750 // `PredStateReg` and need to get rewritten into SSA form.
751 if (CurStateReg == PS->InitialReg)
752 CMovs.push_back(&*CMovI);
753
754 // The next cmov should start from this one's def.
755 CurStateReg = UpdatedStateReg;
756 }
757
758 // And put the last one into the available values for SSA form of our
759 // predicate state.
760 PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
761 };
762
763 std::vector<X86::CondCode> UncondCodeSeq;
764 for (auto *CondBr : CondBrs) {
765 MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
766 int &SuccCount = SuccCounts[&Succ];
767
768 X86::CondCode Cond = X86::getCondFromBranchOpc(CondBr->getOpcode());
769 X86::CondCode InvCond = X86::GetOppositeBranchCondition(Cond);
770 UncondCodeSeq.push_back(Cond);
771
772 BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
773 {InvCond});
774
775 // Decrement the successor count now that we've split one of the edges.
776 // We need to keep the count of edges to the successor accurate in order
777 // to know above when to *replace* the successor in the CFG vs. just
778 // adding the new successor.
779 --SuccCount;
780 }
781
782 // Since we may have split edges and changed the number of successors,
783 // normalize the probabilities. This avoids doing it each time we split an
784 // edge.
785 MBB.normalizeSuccProbs();
786
787 // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
788 // need to intersect the other condition codes. We can do this by just
789 // doing a cmov for each one.
790 if (!UncondSucc)
791 // If we have no fallthrough to protect (perhaps it is an indirect jump?)
792 // just skip this and continue.
793 continue;
794
795 assert(SuccCounts[UncondSucc] == 1 &&(static_cast <bool> (SuccCounts[UncondSucc] == 1 &&
"We should never have more than one edge to the unconditional "
"successor at this point because every other edge must have been "
"split above!") ? void (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 798, __extension__ __PRETTY_FUNCTION__))
796 "We should never have more than one edge to the unconditional "(static_cast <bool> (SuccCounts[UncondSucc] == 1 &&
"We should never have more than one edge to the unconditional "
"successor at this point because every other edge must have been "
"split above!") ? void (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 798, __extension__ __PRETTY_FUNCTION__))
797 "successor at this point because every other edge must have been "(static_cast <bool> (SuccCounts[UncondSucc] == 1 &&
"We should never have more than one edge to the unconditional "
"successor at this point because every other edge must have been "
"split above!") ? void (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 798, __extension__ __PRETTY_FUNCTION__))
798 "split above!")(static_cast <bool> (SuccCounts[UncondSucc] == 1 &&
"We should never have more than one edge to the unconditional "
"successor at this point because every other edge must have been "
"split above!") ? void (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 798, __extension__ __PRETTY_FUNCTION__))
;
799
800 // Sort and unique the codes to minimize them.
801 llvm::sort(UncondCodeSeq.begin(), UncondCodeSeq.end());
802 UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()),
803 UncondCodeSeq.end());
804
805 // Build a checking version of the successor.
806 BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
807 UncondBr, UncondBr, UncondCodeSeq);
808 }
809
810 return CMovs;
811}
812
813/// Returns true if the instruction has no behavior (specified or otherwise)
814/// that is based on the value of any of its register operands
815///
816/// A classical example of something that is inherently not data invariant is an
817/// indirect jump -- the destination is loaded into icache based on the bits set
818/// in the jump destination register.
819///
820/// FIXME: This should become part of our instruction tables.
821static bool isDataInvariant(MachineInstr &MI) {
822 switch (MI.getOpcode()) {
823 default:
824 // By default, assume that the instruction is not data invariant.
825 return false;
826
827 // Some target-independent operations that trivially lower to data-invariant
828 // instructions.
829 case TargetOpcode::COPY:
830 case TargetOpcode::INSERT_SUBREG:
831 case TargetOpcode::SUBREG_TO_REG:
832 return true;
833
834 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
835 // However, they set flags and are perhaps the most surprisingly constant
836 // time operations so we call them out here separately.
837 case X86::IMUL16rr:
838 case X86::IMUL16rri8:
839 case X86::IMUL16rri:
840 case X86::IMUL32rr:
841 case X86::IMUL32rri8:
842 case X86::IMUL32rri:
843 case X86::IMUL64rr:
844 case X86::IMUL64rri32:
845 case X86::IMUL64rri8:
846
847 // Bit scanning and counting instructions that are somewhat surprisingly
848 // constant time as they scan across bits and do other fairly complex
849 // operations like popcnt, but are believed to be constant time on x86.
850 // However, these set flags.
851 case X86::BSF16rr:
852 case X86::BSF32rr:
853 case X86::BSF64rr:
854 case X86::BSR16rr:
855 case X86::BSR32rr:
856 case X86::BSR64rr:
857 case X86::LZCNT16rr:
858 case X86::LZCNT32rr:
859 case X86::LZCNT64rr:
860 case X86::POPCNT16rr:
861 case X86::POPCNT32rr:
862 case X86::POPCNT64rr:
863 case X86::TZCNT16rr:
864 case X86::TZCNT32rr:
865 case X86::TZCNT64rr:
866
867 // Bit manipulation instructions are effectively combinations of basic
868 // arithmetic ops, and should still execute in constant time. These also
869 // set flags.
870 case X86::BLCFILL32rr:
871 case X86::BLCFILL64rr:
872 case X86::BLCI32rr:
873 case X86::BLCI64rr:
874 case X86::BLCIC32rr:
875 case X86::BLCIC64rr:
876 case X86::BLCMSK32rr:
877 case X86::BLCMSK64rr:
878 case X86::BLCS32rr:
879 case X86::BLCS64rr:
880 case X86::BLSFILL32rr:
881 case X86::BLSFILL64rr:
882 case X86::BLSI32rr:
883 case X86::BLSI64rr:
884 case X86::BLSIC32rr:
885 case X86::BLSIC64rr:
886 case X86::BLSMSK32rr:
887 case X86::BLSMSK64rr:
888 case X86::BLSR32rr:
889 case X86::BLSR64rr:
890 case X86::TZMSK32rr:
891 case X86::TZMSK64rr:
892
893 // Bit extracting and clearing instructions should execute in constant time,
894 // and set flags.
895 case X86::BEXTR32rr:
896 case X86::BEXTR64rr:
897 case X86::BEXTRI32ri:
898 case X86::BEXTRI64ri:
899 case X86::BZHI32rr:
900 case X86::BZHI64rr:
901
902 // Shift and rotate.
903 case X86::ROL8r1: case X86::ROL16r1: case X86::ROL32r1: case X86::ROL64r1:
904 case X86::ROL8rCL: case X86::ROL16rCL: case X86::ROL32rCL: case X86::ROL64rCL:
905 case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
906 case X86::ROR8r1: case X86::ROR16r1: case X86::ROR32r1: case X86::ROR64r1:
907 case X86::ROR8rCL: case X86::ROR16rCL: case X86::ROR32rCL: case X86::ROR64rCL:
908 case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
909 case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1: case X86::SAR64r1:
910 case X86::SAR8rCL: case X86::SAR16rCL: case X86::SAR32rCL: case X86::SAR64rCL:
911 case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
912 case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1: case X86::SHL64r1:
913 case X86::SHL8rCL: case X86::SHL16rCL: case X86::SHL32rCL: case X86::SHL64rCL:
914 case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri:
915 case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1: case X86::SHR64r1:
916 case X86::SHR8rCL: case X86::SHR16rCL: case X86::SHR32rCL: case X86::SHR64rCL:
917 case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
918 case X86::SHLD16rrCL: case X86::SHLD32rrCL: case X86::SHLD64rrCL:
919 case X86::SHLD16rri8: case X86::SHLD32rri8: case X86::SHLD64rri8:
920 case X86::SHRD16rrCL: case X86::SHRD32rrCL: case X86::SHRD64rrCL:
921 case X86::SHRD16rri8: case X86::SHRD32rri8: case X86::SHRD64rri8:
922
923 // Basic arithmetic is constant time on the input but does set flags.
924 case X86::ADC8rr: case X86::ADC8ri:
925 case X86::ADC16rr: case X86::ADC16ri: case X86::ADC16ri8:
926 case X86::ADC32rr: case X86::ADC32ri: case X86::ADC32ri8:
927 case X86::ADC64rr: case X86::ADC64ri8: case X86::ADC64ri32:
928 case X86::ADD8rr: case X86::ADD8ri:
929 case X86::ADD16rr: case X86::ADD16ri: case X86::ADD16ri8:
930 case X86::ADD32rr: case X86::ADD32ri: case X86::ADD32ri8:
931 case X86::ADD64rr: case X86::ADD64ri8: case X86::ADD64ri32:
932 case X86::AND8rr: case X86::AND8ri:
933 case X86::AND16rr: case X86::AND16ri: case X86::AND16ri8:
934 case X86::AND32rr: case X86::AND32ri: case X86::AND32ri8:
935 case X86::AND64rr: case X86::AND64ri8: case X86::AND64ri32:
936 case X86::OR8rr: case X86::OR8ri:
937 case X86::OR16rr: case X86::OR16ri: case X86::OR16ri8:
938 case X86::OR32rr: case X86::OR32ri: case X86::OR32ri8:
939 case X86::OR64rr: case X86::OR64ri8: case X86::OR64ri32:
940 case X86::SBB8rr: case X86::SBB8ri:
941 case X86::SBB16rr: case X86::SBB16ri: case X86::SBB16ri8:
942 case X86::SBB32rr: case X86::SBB32ri: case X86::SBB32ri8:
943 case X86::SBB64rr: case X86::SBB64ri8: case X86::SBB64ri32:
944 case X86::SUB8rr: case X86::SUB8ri:
945 case X86::SUB16rr: case X86::SUB16ri: case X86::SUB16ri8:
946 case X86::SUB32rr: case X86::SUB32ri: case X86::SUB32ri8:
947 case X86::SUB64rr: case X86::SUB64ri8: case X86::SUB64ri32:
948 case X86::XOR8rr: case X86::XOR8ri:
949 case X86::XOR16rr: case X86::XOR16ri: case X86::XOR16ri8:
950 case X86::XOR32rr: case X86::XOR32ri: case X86::XOR32ri8:
951 case X86::XOR64rr: case X86::XOR64ri8: case X86::XOR64ri32:
952 // Arithmetic with just 32-bit and 64-bit variants and no immediates.
953 case X86::ADCX32rr: case X86::ADCX64rr:
954 case X86::ADOX32rr: case X86::ADOX64rr:
955 case X86::ANDN32rr: case X86::ANDN64rr:
956 // Unary arithmetic operations.
957 case X86::DEC8r: case X86::DEC16r: case X86::DEC32r: case X86::DEC64r:
958 case X86::INC8r: case X86::INC16r: case X86::INC32r: case X86::INC64r:
959 case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
960 // Check whether the EFLAGS implicit-def is dead. We assume that this will
961 // always find the implicit-def because this code should only be reached
962 // for instructions that do in fact implicitly def this.
963 if (!MI.findRegisterDefOperand(X86::EFLAGS)->isDead()) {
964 // If we would clobber EFLAGS that are used, just bail for now.
965 LLVM_DEBUG(dbgs() << " Unable to harden post-load due to EFLAGS: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Unable to harden post-load due to EFLAGS: "
; MI.dump(); dbgs() << "\n"; } } while (false)
966 MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Unable to harden post-load due to EFLAGS: "
; MI.dump(); dbgs() << "\n"; } } while (false)
;
967 return false;
968 }
969
970 // Otherwise, fallthrough to handle these the same as instructions that
971 // don't set EFLAGS.
972 LLVM_FALLTHROUGH[[clang::fallthrough]];
973
974 // Unlike other arithmetic, NOT doesn't set EFLAGS.
975 case X86::NOT8r: case X86::NOT16r: case X86::NOT32r: case X86::NOT64r:
976
977 // Various move instructions used to zero or sign extend things. Note that we
978 // intentionally don't support the _NOREX variants as we can't handle that
979 // register constraint anyways.
980 case X86::MOVSX16rr8:
981 case X86::MOVSX32rr8: case X86::MOVSX32rr16:
982 case X86::MOVSX64rr8: case X86::MOVSX64rr16: case X86::MOVSX64rr32:
983 case X86::MOVZX16rr8:
984 case X86::MOVZX32rr8: case X86::MOVZX32rr16:
985 case X86::MOVZX64rr8: case X86::MOVZX64rr16:
986 case X86::MOV32rr:
987
988 // Arithmetic instructions that are both constant time and don't set flags.
989 case X86::RORX32ri:
990 case X86::RORX64ri:
991 case X86::SARX32rr:
992 case X86::SARX64rr:
993 case X86::SHLX32rr:
994 case X86::SHLX64rr:
995 case X86::SHRX32rr:
996 case X86::SHRX64rr:
997
998 // LEA doesn't actually access memory, and its arithmetic is constant time.
999 case X86::LEA16r:
1000 case X86::LEA32r:
1001 case X86::LEA64_32r:
1002 case X86::LEA64r:
1003 return true;
1004 }
1005}
1006
1007/// Returns true if the instruction has no behavior (specified or otherwise)
1008/// that is based on the value loaded from memory or the value of any
1009/// non-address register operands.
1010///
1011/// For example, if the latency of the instruction is dependent on the
1012/// particular bits set in any of the registers *or* any of the bits loaded from
1013/// memory.
1014///
1015/// A classical example of something that is inherently not data invariant is an
1016/// indirect jump -- the destination is loaded into icache based on the bits set
1017/// in the jump destination register.
1018///
1019/// FIXME: This should become part of our instruction tables.
1020static bool isDataInvariantLoad(MachineInstr &MI) {
1021 switch (MI.getOpcode()) {
1022 default:
1023 // By default, assume that the load will immediately leak.
1024 return false;
1025
1026 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
1027 // However, they set flags and are perhaps the most surprisingly constant
1028 // time operations so we call them out here separately.
1029 case X86::IMUL16rm:
1030 case X86::IMUL16rmi8:
1031 case X86::IMUL16rmi:
1032 case X86::IMUL32rm:
1033 case X86::IMUL32rmi8:
1034 case X86::IMUL32rmi:
1035 case X86::IMUL64rm:
1036 case X86::IMUL64rmi32:
1037 case X86::IMUL64rmi8:
1038
1039 // Bit scanning and counting instructions that are somewhat surprisingly
1040 // constant time as they scan across bits and do other fairly complex
1041 // operations like popcnt, but are believed to be constant time on x86.
1042 // However, these set flags.
1043 case X86::BSF16rm:
1044 case X86::BSF32rm:
1045 case X86::BSF64rm:
1046 case X86::BSR16rm:
1047 case X86::BSR32rm:
1048 case X86::BSR64rm:
1049 case X86::LZCNT16rm:
1050 case X86::LZCNT32rm:
1051 case X86::LZCNT64rm:
1052 case X86::POPCNT16rm:
1053 case X86::POPCNT32rm:
1054 case X86::POPCNT64rm:
1055 case X86::TZCNT16rm:
1056 case X86::TZCNT32rm:
1057 case X86::TZCNT64rm:
1058
1059 // Bit manipulation instructions are effectively combinations of basic
1060 // arithmetic ops, and should still execute in constant time. These also
1061 // set flags.
1062 case X86::BLCFILL32rm:
1063 case X86::BLCFILL64rm:
1064 case X86::BLCI32rm:
1065 case X86::BLCI64rm:
1066 case X86::BLCIC32rm:
1067 case X86::BLCIC64rm:
1068 case X86::BLCMSK32rm:
1069 case X86::BLCMSK64rm:
1070 case X86::BLCS32rm:
1071 case X86::BLCS64rm:
1072 case X86::BLSFILL32rm:
1073 case X86::BLSFILL64rm:
1074 case X86::BLSI32rm:
1075 case X86::BLSI64rm:
1076 case X86::BLSIC32rm:
1077 case X86::BLSIC64rm:
1078 case X86::BLSMSK32rm:
1079 case X86::BLSMSK64rm:
1080 case X86::BLSR32rm:
1081 case X86::BLSR64rm:
1082 case X86::TZMSK32rm:
1083 case X86::TZMSK64rm:
1084
1085 // Bit extracting and clearing instructions should execute in constant time,
1086 // and set flags.
1087 case X86::BEXTR32rm:
1088 case X86::BEXTR64rm:
1089 case X86::BEXTRI32mi:
1090 case X86::BEXTRI64mi:
1091 case X86::BZHI32rm:
1092 case X86::BZHI64rm:
1093
1094 // Basic arithmetic is constant time on the input but does set flags.
1095 case X86::ADC8rm:
1096 case X86::ADC16rm:
1097 case X86::ADC32rm:
1098 case X86::ADC64rm:
1099 case X86::ADCX32rm:
1100 case X86::ADCX64rm:
1101 case X86::ADD8rm:
1102 case X86::ADD16rm:
1103 case X86::ADD32rm:
1104 case X86::ADD64rm:
1105 case X86::ADOX32rm:
1106 case X86::ADOX64rm:
1107 case X86::AND8rm:
1108 case X86::AND16rm:
1109 case X86::AND32rm:
1110 case X86::AND64rm:
1111 case X86::ANDN32rm:
1112 case X86::ANDN64rm:
1113 case X86::OR8rm:
1114 case X86::OR16rm:
1115 case X86::OR32rm:
1116 case X86::OR64rm:
1117 case X86::SBB8rm:
1118 case X86::SBB16rm:
1119 case X86::SBB32rm:
1120 case X86::SBB64rm:
1121 case X86::SUB8rm:
1122 case X86::SUB16rm:
1123 case X86::SUB32rm:
1124 case X86::SUB64rm:
1125 case X86::XOR8rm:
1126 case X86::XOR16rm:
1127 case X86::XOR32rm:
1128 case X86::XOR64rm:
1129 // Check whether the EFLAGS implicit-def is dead. We assume that this will
1130 // always find the implicit-def because this code should only be reached
1131 // for instructions that do in fact implicitly def this.
1132 if (!MI.findRegisterDefOperand(X86::EFLAGS)->isDead()) {
1133 // If we would clobber EFLAGS that are used, just bail for now.
1134 LLVM_DEBUG(dbgs() << " Unable to harden post-load due to EFLAGS: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Unable to harden post-load due to EFLAGS: "
; MI.dump(); dbgs() << "\n"; } } while (false)
1135 MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Unable to harden post-load due to EFLAGS: "
; MI.dump(); dbgs() << "\n"; } } while (false)
;
1136 return false;
1137 }
1138
1139 // Otherwise, fallthrough to handle these the same as instructions that
1140 // don't set EFLAGS.
1141 LLVM_FALLTHROUGH[[clang::fallthrough]];
1142
1143 // Integer multiply w/o affecting flags is still believed to be constant
1144 // time on x86. Called out separately as this is among the most surprising
1145 // instructions to exhibit that behavior.
1146 case X86::MULX32rm:
1147 case X86::MULX64rm:
1148
1149 // Arithmetic instructions that are both constant time and don't set flags.
1150 case X86::RORX32mi:
1151 case X86::RORX64mi:
1152 case X86::SARX32rm:
1153 case X86::SARX64rm:
1154 case X86::SHLX32rm:
1155 case X86::SHLX64rm:
1156 case X86::SHRX32rm:
1157 case X86::SHRX64rm:
1158
1159 // Conversions are believed to be constant time and don't set flags.
1160 case X86::CVTTSD2SI64rm: case X86::VCVTTSD2SI64rm: case X86::VCVTTSD2SI64Zrm:
1161 case X86::CVTTSD2SIrm: case X86::VCVTTSD2SIrm: case X86::VCVTTSD2SIZrm:
1162 case X86::CVTTSS2SI64rm: case X86::VCVTTSS2SI64rm: case X86::VCVTTSS2SI64Zrm:
1163 case X86::CVTTSS2SIrm: case X86::VCVTTSS2SIrm: case X86::VCVTTSS2SIZrm:
1164 case X86::CVTSI2SDrm: case X86::VCVTSI2SDrm: case X86::VCVTSI2SDZrm:
1165 case X86::CVTSI2SSrm: case X86::VCVTSI2SSrm: case X86::VCVTSI2SSZrm:
1166 case X86::CVTSI642SDrm: case X86::VCVTSI642SDrm: case X86::VCVTSI642SDZrm:
1167 case X86::CVTSI642SSrm: case X86::VCVTSI642SSrm: case X86::VCVTSI642SSZrm:
1168 case X86::CVTSS2SDrm: case X86::VCVTSS2SDrm: case X86::VCVTSS2SDZrm:
1169 case X86::CVTSD2SSrm: case X86::VCVTSD2SSrm: case X86::VCVTSD2SSZrm:
1170 // AVX512 added unsigned integer conversions.
1171 case X86::VCVTTSD2USI64Zrm:
1172 case X86::VCVTTSD2USIZrm:
1173 case X86::VCVTTSS2USI64Zrm:
1174 case X86::VCVTTSS2USIZrm:
1175 case X86::VCVTUSI2SDZrm:
1176 case X86::VCVTUSI642SDZrm:
1177 case X86::VCVTUSI2SSZrm:
1178 case X86::VCVTUSI642SSZrm:
1179
1180 // Loads to register don't set flags.
1181 case X86::MOV8rm:
1182 case X86::MOV8rm_NOREX:
1183 case X86::MOV16rm:
1184 case X86::MOV32rm:
1185 case X86::MOV64rm:
1186 case X86::MOVSX16rm8:
1187 case X86::MOVSX32rm16:
1188 case X86::MOVSX32rm8:
1189 case X86::MOVSX32rm8_NOREX:
1190 case X86::MOVSX64rm16:
1191 case X86::MOVSX64rm32:
1192 case X86::MOVSX64rm8:
1193 case X86::MOVZX16rm8:
1194 case X86::MOVZX32rm16:
1195 case X86::MOVZX32rm8:
1196 case X86::MOVZX32rm8_NOREX:
1197 case X86::MOVZX64rm16:
1198 case X86::MOVZX64rm8:
1199 return true;
1200 }
1201}
1202
1203static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
1204 const TargetRegisterInfo &TRI) {
1205 // Check if EFLAGS are alive by seeing if there is a def of them or they
1206 // live-in, and then seeing if that def is in turn used.
1207 for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) {
1208 if (MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1209 // If the def is dead, then EFLAGS is not live.
1210 if (DefOp->isDead())
1211 return false;
1212
1213 // Otherwise we've def'ed it, and it is live.
1214 return true;
1215 }
1216 // While at this instruction, also check if we use and kill EFLAGS
1217 // which means it isn't live.
1218 if (MI.killsRegister(X86::EFLAGS, &TRI))
1219 return false;
1220 }
1221
1222 // If we didn't find anything conclusive (neither definitely alive or
1223 // definitely dead) return whether it lives into the block.
1224 return MBB.isLiveIn(X86::EFLAGS);
1225}
1226
1227void X86SpeculativeLoadHardeningPass::checkAllLoads(MachineFunction &MF) {
1228 // If the actual checking of loads is disabled, skip doing anything here.
1229 if (!HardenLoads)
11
Assuming the condition is false
12
Taking false branch
1230 return;
1231
1232 SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1233 SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1234
1235 SmallSet<unsigned, 16> HardenedAddrRegs;
1236
1237 SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg;
1238
1239 // Track the set of load-dependent registers through the basic block. Because
1240 // the values of these registers have an existing data dependency on a loaded
1241 // value which we would have checked, we can omit any checks on them.
1242 SparseBitVector<> LoadDepRegs;
1243
1244 for (MachineBasicBlock &MBB : MF) {
1245 // We harden the loads of a basic block in several passes:
1246 //
1247 // 1) Collect all the loads which can have their loaded value hardened
1248 // and all the loads that instead need their address hardened. During
1249 // this walk we propagate load dependence for address hardened loads and
1250 // also look for LFENCE to stop hardening wherever possible. When
1251 // deciding whether or not to harden the loaded value or not, we check
1252 // to see if any registers used in the address will have been hardened
1253 // at this point and if so, harden any remaining address registers as
1254 // that often successfully re-uses hardened addresses and minimizes
1255 // instructions. FIXME: We should consider an aggressive mode where we
1256 // continue to keep as many loads value hardened even when some address
1257 // register hardening would be free (due to reuse).
1258 for (MachineInstr &MI : MBB) {
1259 // We naively assume that all def'ed registers of an instruction have
1260 // a data dependency on all of their operands.
1261 // FIXME: Do a more careful analysis of x86 to build a conservative model
1262 // here.
1263 if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1264 return Op.isReg() && LoadDepRegs.test(Op.getReg());
1265 }))
1266 for (MachineOperand &Def : MI.defs())
1267 if (Def.isReg())
1268 LoadDepRegs.set(Def.getReg());
1269
1270 // Both Intel and AMD are guiding that they will change the semantics of
1271 // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1272 // no more need to guard things in this block.
1273 if (MI.getOpcode() == X86::LFENCE)
1274 break;
1275
1276 // If this instruction cannot load, nothing to do.
1277 if (!MI.mayLoad())
1278 continue;
1279
1280 // Some instructions which "load" are trivially safe or unimportant.
1281 if (MI.getOpcode() == X86::MFENCE)
1282 continue;
1283
1284 // Extract the memory operand information about this instruction.
1285 // FIXME: This doesn't handle loading pseudo instructions which we often
1286 // could handle with similarly generic logic. We probably need to add an
1287 // MI-layer routine similar to the MC-layer one we use here which maps
1288 // pseudos much like this maps real instructions.
1289 const MCInstrDesc &Desc = MI.getDesc();
1290 int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1291 if (MemRefBeginIdx < 0) {
1292 LLVM_DEBUG(dbgs() << "WARNING: unable to harden loading instruction: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << "WARNING: unable to harden loading instruction: "
; MI.dump(); } } while (false)
1293 MI.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << "WARNING: unable to harden loading instruction: "
; MI.dump(); } } while (false)
;
1294 continue;
1295 }
1296
1297 MemRefBeginIdx += X86II::getOperandBias(Desc);
1298
1299 MachineOperand &BaseMO = MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1300 MachineOperand &IndexMO =
1301 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1302
1303 // If we have at least one (non-frame-index, non-RIP) register operand,
1304 // and neither operand is load-dependent, we need to check the load.
1305 unsigned BaseReg = 0, IndexReg = 0;
1306 if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1307 BaseMO.getReg() != X86::NoRegister)
1308 BaseReg = BaseMO.getReg();
1309 if (IndexMO.getReg() != X86::NoRegister)
1310 IndexReg = IndexMO.getReg();
1311
1312 if (!BaseReg && !IndexReg)
1313 // No register operands!
1314 continue;
1315
1316 // If any register operand is dependent, this load is dependent and we
1317 // needn't check it.
1318 // FIXME: Is this true in the case where we are hardening loads after
1319 // they complete? Unclear, need to investigate.
1320 if ((BaseReg && LoadDepRegs.test(BaseReg)) ||
1321 (IndexReg && LoadDepRegs.test(IndexReg)))
1322 continue;
1323
1324 // If post-load hardening is enabled, this load is compatible with
1325 // post-load hardening, and we aren't already going to harden one of the
1326 // address registers, queue it up to be hardened post-load. Notably, even
1327 // once hardened this won't introduce a useful dependency that could prune
1328 // out subsequent loads.
1329 if (EnablePostLoadHardening && isDataInvariantLoad(MI) &&
1330 MI.getDesc().getNumDefs() == 1 && MI.getOperand(0).isReg() &&
1331 canHardenRegister(MI.getOperand(0).getReg()) &&
1332 !HardenedAddrRegs.count(BaseReg) &&
1333 !HardenedAddrRegs.count(IndexReg)) {
1334 HardenPostLoad.insert(&MI);
1335 HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1336 continue;
1337 }
1338
1339 // Record this instruction for address hardening and record its register
1340 // operands as being address-hardened.
1341 HardenLoadAddr.insert(&MI);
1342 if (BaseReg)
1343 HardenedAddrRegs.insert(BaseReg);
1344 if (IndexReg)
1345 HardenedAddrRegs.insert(IndexReg);
1346
1347 for (MachineOperand &Def : MI.defs())
1348 if (Def.isReg())
1349 LoadDepRegs.set(Def.getReg());
1350 }
1351
1352 // Now re-walk the instructions in the basic block, and apply whichever
1353 // hardening strategy we have elected. Note that we do this in a second
1354 // pass specifically so that we have the complete set of instructions for
1355 // which we will do post-load hardening and can defer it in certain
1356 // circumstances.
1357 //
1358 // FIXME: This could probably be made even more effective by doing it
1359 // across the entire function. Rather than just walking the flat list
1360 // backwards here, we could walk the function in PO and each block bottom
1361 // up, allowing us to in some cases sink hardening across block blocks. As
1362 // long as the in-block predicate state is used at the eventual hardening
1363 // site, this remains safe.
1364 for (MachineInstr &MI : MBB) {
1365 // We cannot both require hardening the def of a load and its address.
1366 assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&(static_cast <bool> (!(HardenLoadAddr.count(&MI) &&
HardenPostLoad.count(&MI)) && "Requested to harden both the address and def of a load!"
) ? void (0) : __assert_fail ("!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) && \"Requested to harden both the address and def of a load!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1367, __extension__ __PRETTY_FUNCTION__))
1367 "Requested to harden both the address and def of a load!")(static_cast <bool> (!(HardenLoadAddr.count(&MI) &&
HardenPostLoad.count(&MI)) && "Requested to harden both the address and def of a load!"
) ? void (0) : __assert_fail ("!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) && \"Requested to harden both the address and def of a load!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1367, __extension__ __PRETTY_FUNCTION__))
;
1368
1369 // Check if this is a load whose address needs to be hardened.
1370 if (HardenLoadAddr.erase(&MI)) {
13
Assuming the condition is false
14
Taking false branch
1371 const MCInstrDesc &Desc = MI.getDesc();
1372 int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1373 assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!")(static_cast <bool> (MemRefBeginIdx >= 0 && "Cannot have an invalid index here!"
) ? void (0) : __assert_fail ("MemRefBeginIdx >= 0 && \"Cannot have an invalid index here!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1373, __extension__ __PRETTY_FUNCTION__))
;
1374
1375 MemRefBeginIdx += X86II::getOperandBias(Desc);
1376
1377 MachineOperand &BaseMO =
1378 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1379 MachineOperand &IndexMO =
1380 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1381 hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1382 continue;
1383 }
1384
1385 // Test if this instruction is one of our post load instructions (and
1386 // remove it from the set if so).
1387 if (HardenPostLoad.erase(&MI)) {
15
Assuming the condition is true
16
Taking true branch
1388 assert(!MI.isCall() && "Must not try to post-load harden a call!")(static_cast <bool> (!MI.isCall() && "Must not try to post-load harden a call!"
) ? void (0) : __assert_fail ("!MI.isCall() && \"Must not try to post-load harden a call!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1388, __extension__ __PRETTY_FUNCTION__))
;
1389
1390 // If this is a data-invariant load, we want to try and sink any
1391 // hardening as far as possible.
1392 if (isDataInvariantLoad(MI)) {
17
Assuming the condition is false
18
Taking false branch
1393 // Sink the instruction we'll need to harden as far as we can down the
1394 // graph.
1395 MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1396
1397 // If we managed to sink this instruction, update everything so we
1398 // harden that instruction when we reach it in the instruction
1399 // sequence.
1400 if (SunkMI != &MI) {
1401 // If in sinking there was no instruction needing to be hardened,
1402 // we're done.
1403 if (!SunkMI)
1404 continue;
1405
1406 // Otherwise, add this to the set of defs we harden.
1407 HardenPostLoad.insert(SunkMI);
1408 continue;
1409 }
1410 }
1411
1412 // The register def'ed by this instruction is trivially hardened so map
1413 // it to itself.
1414 AddrRegToHardenedReg[MI.getOperand(0).getReg()] =
1415 MI.getOperand(0).getReg();
1416
1417 hardenPostLoad(MI);
19
Calling 'X86SpeculativeLoadHardeningPass::hardenPostLoad'
1418 continue;
1419 }
1420
1421 // After we finish processing the instruction and doing any hardening
1422 // necessary for it, we need to handle transferring the predicate state
1423 // into a call and recovering it after the call returns (if it returns).
1424 if (!MI.isCall())
1425 continue;
1426
1427 // If we're not hardening interprocedurally, we can just skip calls.
1428 if (!HardenInterprocedurally)
1429 continue;
1430
1431 auto InsertPt = MI.getIterator();
1432 DebugLoc Loc = MI.getDebugLoc();
1433
1434 // First, we transfer the predicate state into the called function by
1435 // merging it into the stack pointer. This will kill the current def of
1436 // the state.
1437 unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1438 mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
1439
1440 // If this call is also a return (because it is a tail call) we're done.
1441 if (MI.isReturn())
1442 continue;
1443
1444 // Otherwise we need to step past the call and recover the predicate
1445 // state from SP after the return, and make this new state available.
1446 ++InsertPt;
1447 unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
1448 PS->SSA.AddAvailableValue(&MBB, NewStateReg);
1449 }
1450
1451 HardenPostLoad.clear();
1452 HardenLoadAddr.clear();
1453 HardenedAddrRegs.clear();
1454 AddrRegToHardenedReg.clear();
1455
1456 // Currently, we only track data-dependent loads within a basic block.
1457 // FIXME: We should see if this is necessary or if we could be more
1458 // aggressive here without opening up attack avenues.
1459 LoadDepRegs.clear();
1460 }
1461}
1462
1463/// Save EFLAGS into the returned GPR. This can in turn be restored with
1464/// `restoreEFLAGS`.
1465///
1466/// Note that LLVM can only lower very simple patterns of saved and restored
1467/// EFLAGS registers. The restore should always be within the same basic block
1468/// as the save so that no PHI nodes are inserted.
1469unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
1470 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1471 DebugLoc Loc) {
1472 // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1473 // what instruction selection does.
1474 unsigned Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1475 // We directly copy the FLAGS register and rely on later lowering to clean
1476 // this up into the appropriate setCC instructions.
1477 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1478 ++NumInstsInserted;
1479 return Reg;
1480}
1481
1482/// Restore EFLAGS from the provided GPR. This should be produced by
1483/// `saveEFLAGS`.
1484///
1485/// This must be done within the same basic block as the save in order to
1486/// reliably lower.
1487void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1488 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
1489 unsigned Reg) {
1490 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1491 ++NumInstsInserted;
1492}
1493
1494/// Takes the current predicate state (in a register) and merges it into the
1495/// stack pointer. The state is essentially a single bit, but we merge this in
1496/// a way that won't form non-canonical pointers and also will be preserved
1497/// across normal stack adjustments.
1498void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1499 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
1500 unsigned PredStateReg) {
1501 unsigned TmpReg = MRI->createVirtualRegister(PS->RC);
1502 // FIXME: This hard codes a shift distance based on the number of bits needed
1503 // to stay canonical on 64-bit. We should compute this somehow and support
1504 // 32-bit as part of that.
1505 auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1506 .addReg(PredStateReg, RegState::Kill)
1507 .addImm(47);
1508 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1509 ++NumInstsInserted;
1510 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1511 .addReg(X86::RSP)
1512 .addReg(TmpReg, RegState::Kill);
1513 OrI->addRegisterDead(X86::EFLAGS, TRI);
1514 ++NumInstsInserted;
1515}
1516
1517/// Extracts the predicate state stored in the high bits of the stack pointer.
1518unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1519 MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
1520 DebugLoc Loc) {
1521 unsigned PredStateReg = MRI->createVirtualRegister(PS->RC);
1522 unsigned TmpReg = MRI->createVirtualRegister(PS->RC);
1523
1524 // We know that the stack pointer will have any preserved predicate state in
1525 // its high bit. We just want to smear this across the other bits. Turns out,
1526 // this is exactly what an arithmetic right shift does.
1527 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1528 .addReg(X86::RSP);
1529 auto ShiftI =
1530 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1531 .addReg(TmpReg, RegState::Kill)
1532 .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1533 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1534 ++NumInstsInserted;
1535
1536 return PredStateReg;
1537}
1538
1539void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1540 MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1541 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
1542 MachineBasicBlock &MBB = *MI.getParent();
1543 DebugLoc Loc = MI.getDebugLoc();
1544
1545 // Check if EFLAGS are alive by seeing if there is a def of them or they
1546 // live-in, and then seeing if that def is in turn used.
1547 bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1548
1549 SmallVector<MachineOperand *, 2> HardenOpRegs;
1550
1551 if (BaseMO.isFI()) {
1552 // A frame index is never a dynamically controllable load, so only
1553 // harden it if we're covering fixed address loads as well.
1554 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Skipping hardening base of explicit stack frame load: "
; MI.dump(); dbgs() << "\n"; } } while (false)
1555 dbgs() << " Skipping hardening base of explicit stack frame load: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Skipping hardening base of explicit stack frame load: "
; MI.dump(); dbgs() << "\n"; } } while (false)
1556 MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Skipping hardening base of explicit stack frame load: "
; MI.dump(); dbgs() << "\n"; } } while (false)
;
1557 } else if (BaseMO.getReg() == X86::RIP ||
1558 BaseMO.getReg() == X86::NoRegister) {
1559 // For both RIP-relative addressed loads or absolute loads, we cannot
1560 // meaningfully harden them because the address being loaded has no
1561 // dynamic component.
1562 //
1563 // FIXME: When using a segment base (like TLS does) we end up with the
1564 // dynamic address being the base plus -1 because we can't mutate the
1565 // segment register here. This allows the signed 32-bit offset to point at
1566 // valid segment-relative addresses and load them successfully.
1567 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Cannot harden base of "
<< (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base"
) << " address in a load!"; } } while (false)
1568 dbgs() << " Cannot harden base of "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Cannot harden base of "
<< (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base"
) << " address in a load!"; } } while (false)
1569 << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Cannot harden base of "
<< (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base"
) << " address in a load!"; } } while (false)
1570 << " address in a load!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Cannot harden base of "
<< (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base"
) << " address in a load!"; } } while (false)
;
1571 } else {
1572 assert(BaseMO.isReg() &&(static_cast <bool> (BaseMO.isReg() && "Only allowed to have a frame index or register base."
) ? void (0) : __assert_fail ("BaseMO.isReg() && \"Only allowed to have a frame index or register base.\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1573, __extension__ __PRETTY_FUNCTION__))
1573 "Only allowed to have a frame index or register base.")(static_cast <bool> (BaseMO.isReg() && "Only allowed to have a frame index or register base."
) ? void (0) : __assert_fail ("BaseMO.isReg() && \"Only allowed to have a frame index or register base.\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1573, __extension__ __PRETTY_FUNCTION__))
;
1574 HardenOpRegs.push_back(&BaseMO);
1575 }
1576
1577 if (IndexMO.getReg() != X86::NoRegister &&
1578 (HardenOpRegs.empty() ||
1579 HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1580 HardenOpRegs.push_back(&IndexMO);
1581
1582 assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&(static_cast <bool> ((HardenOpRegs.size() == 1 || HardenOpRegs
.size() == 2) && "Should have exactly one or two registers to harden!"
) ? void (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) && \"Should have exactly one or two registers to harden!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1583, __extension__ __PRETTY_FUNCTION__))
1583 "Should have exactly one or two registers to harden!")(static_cast <bool> ((HardenOpRegs.size() == 1 || HardenOpRegs
.size() == 2) && "Should have exactly one or two registers to harden!"
) ? void (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) && \"Should have exactly one or two registers to harden!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1583, __extension__ __PRETTY_FUNCTION__))
;
1584 assert((HardenOpRegs.size() == 1 ||(static_cast <bool> ((HardenOpRegs.size() == 1 || HardenOpRegs
[0]->getReg() != HardenOpRegs[1]->getReg()) && "Should not have two of the same registers!"
) ? void (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && \"Should not have two of the same registers!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1586, __extension__ __PRETTY_FUNCTION__))
1585 HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&(static_cast <bool> ((HardenOpRegs.size() == 1 || HardenOpRegs
[0]->getReg() != HardenOpRegs[1]->getReg()) && "Should not have two of the same registers!"
) ? void (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && \"Should not have two of the same registers!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1586, __extension__ __PRETTY_FUNCTION__))
1586 "Should not have two of the same registers!")(static_cast <bool> ((HardenOpRegs.size() == 1 || HardenOpRegs
[0]->getReg() != HardenOpRegs[1]->getReg()) && "Should not have two of the same registers!"
) ? void (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && \"Should not have two of the same registers!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1586, __extension__ __PRETTY_FUNCTION__))
;
1587
1588 // Remove any registers that have alreaded been checked.
1589 llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1590 // See if this operand's register has already been checked.
1591 auto It = AddrRegToHardenedReg.find(Op->getReg());
1592 if (It == AddrRegToHardenedReg.end())
1593 // Not checked, so retain this one.
1594 return false;
1595
1596 // Otherwise, we can directly update this operand and remove it.
1597 Op->setReg(It->second);
1598 return true;
1599 });
1600 // If there are none left, we're done.
1601 if (HardenOpRegs.empty())
1602 return;
1603
1604 // Compute the current predicate state.
1605 unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1606
1607 auto InsertPt = MI.getIterator();
1608
1609 // If EFLAGS are live and we don't have access to instructions that avoid
1610 // clobbering EFLAGS we need to save and restore them. This in turn makes
1611 // the EFLAGS no longer live.
1612 unsigned FlagsReg = 0;
1613 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1614 EFLAGSLive = false;
1615 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1616 }
1617
1618 for (MachineOperand *Op : HardenOpRegs) {
1619 unsigned OpReg = Op->getReg();
1620 auto *OpRC = MRI->getRegClass(OpReg);
1621 unsigned TmpReg = MRI->createVirtualRegister(OpRC);
1622
1623 // If this is a vector register, we'll need somewhat custom logic to handle
1624 // hardening it.
1625 if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1626 OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1627 assert(Subtarget->hasAVX2() && "AVX2-specific register classes!")(static_cast <bool> (Subtarget->hasAVX2() &&
"AVX2-specific register classes!") ? void (0) : __assert_fail
("Subtarget->hasAVX2() && \"AVX2-specific register classes!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1627, __extension__ __PRETTY_FUNCTION__))
;
1628 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1629
1630 // Move our state into a vector register.
1631 // FIXME: We could skip this at the cost of longer encodings with AVX-512
1632 // but that doesn't seem likely worth it.
1633 unsigned VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1634 auto MovI =
1635 BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1636 .addReg(StateReg);
1637 (void)MovI;
1638 ++NumInstsInserted;
1639 LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting mov: "
; MovI->dump(); dbgs() << "\n"; } } while (false)
;
1640
1641 // Broadcast it across the vector register.
1642 unsigned VBStateReg = MRI->createVirtualRegister(OpRC);
1643 auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1644 TII->get(Is128Bit ? X86::VPBROADCASTQrr
1645 : X86::VPBROADCASTQYrr),
1646 VBStateReg)
1647 .addReg(VStateReg);
1648 (void)BroadcastI;
1649 ++NumInstsInserted;
1650 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting broadcast: "
; BroadcastI->dump(); dbgs() << "\n"; } } while (false
)
1651 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting broadcast: "
; BroadcastI->dump(); dbgs() << "\n"; } } while (false
)
;
1652
1653 // Merge our potential poison state into the value with a vector or.
1654 auto OrI =
1655 BuildMI(MBB, InsertPt, Loc,
1656 TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1657 .addReg(VBStateReg)
1658 .addReg(OpReg);
1659 (void)OrI;
1660 ++NumInstsInserted;
1661 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting or: "
; OrI->dump(); dbgs() << "\n"; } } while (false)
;
1662 } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1663 OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1664 OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1665 assert(Subtarget->hasAVX512() && "AVX512-specific register classes!")(static_cast <bool> (Subtarget->hasAVX512() &&
"AVX512-specific register classes!") ? void (0) : __assert_fail
("Subtarget->hasAVX512() && \"AVX512-specific register classes!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1665, __extension__ __PRETTY_FUNCTION__))
;
1666 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1667 bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1668 if (Is128Bit || Is256Bit)
1669 assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!")(static_cast <bool> (Subtarget->hasVLX() && "AVX512VL-specific register classes!"
) ? void (0) : __assert_fail ("Subtarget->hasVLX() && \"AVX512VL-specific register classes!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1669, __extension__ __PRETTY_FUNCTION__))
;
1670
1671 // Broadcast our state into a vector register.
1672 unsigned VStateReg = MRI->createVirtualRegister(OpRC);
1673 unsigned BroadcastOp =
1674 Is128Bit ? X86::VPBROADCASTQrZ128r
1675 : Is256Bit ? X86::VPBROADCASTQrZ256r : X86::VPBROADCASTQrZr;
1676 auto BroadcastI =
1677 BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1678 .addReg(StateReg);
1679 (void)BroadcastI;
1680 ++NumInstsInserted;
1681 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting broadcast: "
; BroadcastI->dump(); dbgs() << "\n"; } } while (false
)
1682 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting broadcast: "
; BroadcastI->dump(); dbgs() << "\n"; } } while (false
)
;
1683
1684 // Merge our potential poison state into the value with a vector or.
1685 unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1686 : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1687 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1688 .addReg(VStateReg)
1689 .addReg(OpReg);
1690 (void)OrI;
1691 ++NumInstsInserted;
1692 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting or: "
; OrI->dump(); dbgs() << "\n"; } } while (false)
;
1693 } else {
1694 // FIXME: Need to support GR32 here for 32-bit code.
1695 assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&(static_cast <bool> (OpRC->hasSuperClassEq(&X86::
GR64RegClass) && "Not a supported register class for address hardening!"
) ? void (0) : __assert_fail ("OpRC->hasSuperClassEq(&X86::GR64RegClass) && \"Not a supported register class for address hardening!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1696, __extension__ __PRETTY_FUNCTION__))
1696 "Not a supported register class for address hardening!")(static_cast <bool> (OpRC->hasSuperClassEq(&X86::
GR64RegClass) && "Not a supported register class for address hardening!"
) ? void (0) : __assert_fail ("OpRC->hasSuperClassEq(&X86::GR64RegClass) && \"Not a supported register class for address hardening!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1696, __extension__ __PRETTY_FUNCTION__))
;
1697
1698 if (!EFLAGSLive) {
1699 // Merge our potential poison state into the value with an or.
1700 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1701 .addReg(StateReg)
1702 .addReg(OpReg);
1703 OrI->addRegisterDead(X86::EFLAGS, TRI);
1704 ++NumInstsInserted;
1705 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting or: "
; OrI->dump(); dbgs() << "\n"; } } while (false)
;
1706 } else {
1707 // We need to avoid touching EFLAGS so shift out all but the least
1708 // significant bit using the instruction that doesn't update flags.
1709 auto ShiftI =
1710 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1711 .addReg(OpReg)
1712 .addReg(StateReg);
1713 (void)ShiftI;
1714 ++NumInstsInserted;
1715 LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting shrx: "
; ShiftI->dump(); dbgs() << "\n"; } } while (false)
1716 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting shrx: "
; ShiftI->dump(); dbgs() << "\n"; } } while (false)
;
1717 }
1718 }
1719
1720 // Record this register as checked and update the operand.
1721 assert(!AddrRegToHardenedReg.count(Op->getReg()) &&(static_cast <bool> (!AddrRegToHardenedReg.count(Op->
getReg()) && "Should not have checked this register yet!"
) ? void (0) : __assert_fail ("!AddrRegToHardenedReg.count(Op->getReg()) && \"Should not have checked this register yet!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1722, __extension__ __PRETTY_FUNCTION__))
1722 "Should not have checked this register yet!")(static_cast <bool> (!AddrRegToHardenedReg.count(Op->
getReg()) && "Should not have checked this register yet!"
) ? void (0) : __assert_fail ("!AddrRegToHardenedReg.count(Op->getReg()) && \"Should not have checked this register yet!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1722, __extension__ __PRETTY_FUNCTION__))
;
1723 AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1724 Op->setReg(TmpReg);
1725 ++NumAddrRegsHardened;
1726 }
1727
1728 // And restore the flags if needed.
1729 if (FlagsReg)
1730 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1731}
1732
1733MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1734 MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1735 assert(isDataInvariantLoad(InitialMI) &&(static_cast <bool> (isDataInvariantLoad(InitialMI) &&
"Cannot get here with a non-invariant load!") ? void (0) : __assert_fail
("isDataInvariantLoad(InitialMI) && \"Cannot get here with a non-invariant load!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1736, __extension__ __PRETTY_FUNCTION__))
1736 "Cannot get here with a non-invariant load!")(static_cast <bool> (isDataInvariantLoad(InitialMI) &&
"Cannot get here with a non-invariant load!") ? void (0) : __assert_fail
("isDataInvariantLoad(InitialMI) && \"Cannot get here with a non-invariant load!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1736, __extension__ __PRETTY_FUNCTION__))
;
1737
1738 // See if we can sink hardening the loaded value.
1739 auto SinkCheckToSingleUse =
1740 [&](MachineInstr &MI) -> Optional<MachineInstr *> {
1741 unsigned DefReg = MI.getOperand(0).getReg();
1742
1743 // We need to find a single use which we can sink the check. We can
1744 // primarily do this because many uses may already end up checked on their
1745 // own.
1746 MachineInstr *SingleUseMI = nullptr;
1747 for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1748 // If we're already going to harden this use, it is data invariant and
1749 // within our block.
1750 if (HardenedInstrs.count(&UseMI)) {
1751 if (!isDataInvariantLoad(UseMI)) {
1752 // If we've already decided to harden a non-load, we must have sunk
1753 // some other post-load hardened instruction to it and it must itself
1754 // be data-invariant.
1755 assert(isDataInvariant(UseMI) &&(static_cast <bool> (isDataInvariant(UseMI) && "Data variant instruction being hardened!"
) ? void (0) : __assert_fail ("isDataInvariant(UseMI) && \"Data variant instruction being hardened!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1756, __extension__ __PRETTY_FUNCTION__))
1756 "Data variant instruction being hardened!")(static_cast <bool> (isDataInvariant(UseMI) && "Data variant instruction being hardened!"
) ? void (0) : __assert_fail ("isDataInvariant(UseMI) && \"Data variant instruction being hardened!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1756, __extension__ __PRETTY_FUNCTION__))
;
1757 continue;
1758 }
1759
1760 // Otherwise, this is a load and the load component can't be data
1761 // invariant so check how this register is being used.
1762 const MCInstrDesc &Desc = UseMI.getDesc();
1763 int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1764 assert(MemRefBeginIdx >= 0 &&(static_cast <bool> (MemRefBeginIdx >= 0 && "Should always have mem references here!"
) ? void (0) : __assert_fail ("MemRefBeginIdx >= 0 && \"Should always have mem references here!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1765, __extension__ __PRETTY_FUNCTION__))
1765 "Should always have mem references here!")(static_cast <bool> (MemRefBeginIdx >= 0 && "Should always have mem references here!"
) ? void (0) : __assert_fail ("MemRefBeginIdx >= 0 && \"Should always have mem references here!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1765, __extension__ __PRETTY_FUNCTION__))
;
1766 MemRefBeginIdx += X86II::getOperandBias(Desc);
1767
1768 MachineOperand &BaseMO =
1769 UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1770 MachineOperand &IndexMO =
1771 UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1772 if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1773 (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1774 // The load uses the register as part of its address making it not
1775 // invariant.
1776 return {};
1777
1778 continue;
1779 }
1780
1781 if (SingleUseMI)
1782 // We already have a single use, this would make two. Bail.
1783 return {};
1784
1785 // If this single use isn't data invariant, isn't in this block, or has
1786 // interfering EFLAGS, we can't sink the hardening to it.
1787 if (!isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent())
1788 return {};
1789
1790 // If this instruction defines multiple registers bail as we won't harden
1791 // all of them.
1792 if (UseMI.getDesc().getNumDefs() > 1)
1793 return {};
1794
1795 // If this register isn't a virtual register we can't walk uses of sanely,
1796 // just bail. Also check that its register class is one of the ones we
1797 // can harden.
1798 unsigned UseDefReg = UseMI.getOperand(0).getReg();
1799 if (!TRI->isVirtualRegister(UseDefReg) ||
1800 !canHardenRegister(UseDefReg))
1801 return {};
1802
1803 SingleUseMI = &UseMI;
1804 }
1805
1806 // If SingleUseMI is still null, there is no use that needs its own
1807 // checking. Otherwise, it is the single use that needs checking.
1808 return {SingleUseMI};
1809 };
1810
1811 MachineInstr *MI = &InitialMI;
1812 while (Optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1813 // Update which MI we're checking now.
1814 MI = *SingleUse;
1815 if (!MI)
1816 break;
1817 }
1818
1819 return MI;
1820}
1821
1822bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) {
1823 auto *RC = MRI->getRegClass(Reg);
1824 int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1825 if (RegBytes > 8)
1826 // We don't support post-load hardening of vectors.
1827 return false;
1828
1829 // If this register class is explicitly constrained to a class that doesn't
1830 // require REX prefix, we may not be able to satisfy that constraint when
1831 // emitting the hardening instructions, so bail out here.
1832 // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1833 // end up both with a NOREX and REX-only register as operands to the hardening
1834 // instructions. It would be better to fix that code to handle this situation
1835 // rather than hack around it in this way.
1836 const TargetRegisterClass *NOREXRegClasses[] = {
1837 &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1838 &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1839 if (RC == NOREXRegClasses[Log2_32(RegBytes)])
1840 return false;
1841
1842 const TargetRegisterClass *GPRRegClasses[] = {
1843 &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1844 &X86::GR64RegClass};
1845 return RC->hasSuperClassEq(GPRRegClasses[Log2_32(RegBytes)]);
1846}
1847
1848// We can harden non-leaking loads into register without touching the address
1849// by just hiding all of the loaded bits. We use an `or` instruction to do
1850// this because having the poison value be all ones allows us to use the same
1851// value below. And the goal is just for the loaded bits to not be exposed to
1852// execution and coercing them to one is sufficient.
1853void X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
1854 MachineBasicBlock &MBB = *MI.getParent();
1855 DebugLoc Loc = MI.getDebugLoc();
1856
1857 // For all of these, the def'ed register operand is operand zero.
1858 auto &DefOp = MI.getOperand(0);
1859 unsigned OldDefReg = DefOp.getReg();
1860 assert(canHardenRegister(OldDefReg) &&(static_cast <bool> (canHardenRegister(OldDefReg) &&
"Cannot harden this instruction's defined register!") ? void
(0) : __assert_fail ("canHardenRegister(OldDefReg) && \"Cannot harden this instruction's defined register!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1861, __extension__ __PRETTY_FUNCTION__))
1861 "Cannot harden this instruction's defined register!")(static_cast <bool> (canHardenRegister(OldDefReg) &&
"Cannot harden this instruction's defined register!") ? void
(0) : __assert_fail ("canHardenRegister(OldDefReg) && \"Cannot harden this instruction's defined register!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1861, __extension__ __PRETTY_FUNCTION__))
;
1862
1863 auto *DefRC = MRI->getRegClass(OldDefReg);
1864 int DefRegBytes = TRI->getRegSizeInBits(*DefRC) / 8;
1865
1866 unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1867 unsigned OrOpCode = OrOpCodes[Log2_32(DefRegBytes)];
20
Assigned value is garbage or undefined
1868
1869 unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1870
1871 auto GetStateRegInRC = [&](const TargetRegisterClass &RC) {
1872 unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1873
1874 int Bytes = TRI->getRegSizeInBits(RC) / 8;
1875 // FIXME: Need to teach this about 32-bit mode.
1876 if (Bytes != 8) {
1877 unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1878 unsigned NarrowStateReg = MRI->createVirtualRegister(&RC);
1879 BuildMI(MBB, MI.getIterator(), Loc, TII->get(TargetOpcode::COPY),
1880 NarrowStateReg)
1881 .addReg(StateReg, 0, SubRegImm);
1882 StateReg = NarrowStateReg;
1883 }
1884 return StateReg;
1885 };
1886
1887 auto InsertPt = std::next(MI.getIterator());
1888 unsigned FlagsReg = 0;
1889 bool EFLAGSLive = isEFLAGSLive(MBB, InsertPt, *TRI);
1890 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1891 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1892 EFLAGSLive = false;
1893 }
1894
1895 if (!EFLAGSLive) {
1896 unsigned StateReg = GetStateRegInRC(*DefRC);
1897 unsigned NewDefReg = MRI->createVirtualRegister(DefRC);
1898 DefOp.setReg(NewDefReg);
1899 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), OldDefReg)
1900 .addReg(StateReg)
1901 .addReg(NewDefReg);
1902 OrI->addRegisterDead(X86::EFLAGS, TRI);
1903 ++NumInstsInserted;
1904 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting or: "
; OrI->dump(); dbgs() << "\n"; } } while (false)
;
1905 } else {
1906 assert(Subtarget->hasBMI2() &&(static_cast <bool> (Subtarget->hasBMI2() &&
"Cannot harden loads and preserve EFLAGS without BMI2!") ? void
(0) : __assert_fail ("Subtarget->hasBMI2() && \"Cannot harden loads and preserve EFLAGS without BMI2!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1907, __extension__ __PRETTY_FUNCTION__))
1907 "Cannot harden loads and preserve EFLAGS without BMI2!")(static_cast <bool> (Subtarget->hasBMI2() &&
"Cannot harden loads and preserve EFLAGS without BMI2!") ? void
(0) : __assert_fail ("Subtarget->hasBMI2() && \"Cannot harden loads and preserve EFLAGS without BMI2!\""
, "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp"
, 1907, __extension__ __PRETTY_FUNCTION__))
;
1908
1909 unsigned ShiftOpCode = DefRegBytes < 4 ? X86::SHRX32rr : X86::SHRX64rr;
1910 auto &ShiftRC =
1911 DefRegBytes < 4 ? X86::GR32_NOSPRegClass : X86::GR64_NOSPRegClass;
1912 int ShiftRegBytes = TRI->getRegSizeInBits(ShiftRC) / 8;
1913 unsigned DefSubRegImm = SubRegImms[Log2_32(DefRegBytes)];
1914
1915 unsigned StateReg = GetStateRegInRC(ShiftRC);
1916
1917 // First have the def instruction def a temporary register.
1918 unsigned TmpReg = MRI->createVirtualRegister(DefRC);
1919 DefOp.setReg(TmpReg);
1920 // Now copy it into a register of the shift RC.
1921 unsigned ShiftInputReg = TmpReg;
1922 if (DefRegBytes != ShiftRegBytes) {
1923 unsigned UndefReg = MRI->createVirtualRegister(&ShiftRC);
1924 BuildMI(MBB, InsertPt, Loc, TII->get(X86::IMPLICIT_DEF), UndefReg);
1925 ShiftInputReg = MRI->createVirtualRegister(&ShiftRC);
1926 BuildMI(MBB, InsertPt, Loc, TII->get(X86::INSERT_SUBREG), ShiftInputReg)
1927 .addReg(UndefReg)
1928 .addReg(TmpReg)
1929 .addImm(DefSubRegImm);
1930 }
1931
1932 // We shift this once if the shift is wider than the def and thus we can
1933 // shift *all* of the def'ed bytes out. Otherwise we need to do two shifts.
1934
1935 unsigned ShiftedReg = MRI->createVirtualRegister(&ShiftRC);
1936 auto Shift1I =
1937 BuildMI(MBB, InsertPt, Loc, TII->get(ShiftOpCode), ShiftedReg)
1938 .addReg(ShiftInputReg)
1939 .addReg(StateReg);
1940 (void)Shift1I;
1941 ++NumInstsInserted;
1942 LLVM_DEBUG(dbgs() << " Inserting shrx: "; Shift1I->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting shrx: "
; Shift1I->dump(); dbgs() << "\n"; } } while (false)
;
1943
1944 // The only way we have a bit left is if all 8 bytes were defined. Do an
1945 // extra shift to get the last bit in this case.
1946 if (DefRegBytes == ShiftRegBytes) {
1947 // We can just directly def the old def register as its the same size.
1948 ShiftInputReg = ShiftedReg;
1949 auto Shift2I =
1950 BuildMI(MBB, InsertPt, Loc, TII->get(ShiftOpCode), OldDefReg)
1951 .addReg(ShiftInputReg)
1952 .addReg(StateReg);
1953 (void)Shift2I;
1954 ++NumInstsInserted;
1955 LLVM_DEBUG(dbgs() << " Inserting shrx: "; Shift2I->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting shrx: "
; Shift2I->dump(); dbgs() << "\n"; } } while (false)
1956 dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("x86-speculative-load-hardening")) { dbgs() << " Inserting shrx: "
; Shift2I->dump(); dbgs() << "\n"; } } while (false)
;
1957 } else {
1958 // When we have different size shift register we need to fix up the
1959 // class. We can do that as we copy into the old def register.
1960 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), OldDefReg)
1961 .addReg(ShiftedReg, 0, DefSubRegImm);
1962 }
1963 }
1964
1965 if (FlagsReg)
1966 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1967
1968 ++NumPostLoadRegsHardened;
1969}
1970
1971/// Harden a return instruction.
1972///
1973/// Returns implicitly perform a load which we need to harden. Without hardening
1974/// this load, an attacker my speculatively write over the return address to
1975/// steer speculation of the return to an attacker controlled address. This is
1976/// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1977/// this paper:
1978/// https://people.csail.mit.edu/vlk/spectre11.pdf
1979///
1980/// We can harden this by introducing an LFENCE that will delay any load of the
1981/// return address until prior instructions have retired (and thus are not being
1982/// speculated), or we can harden the address used by the implicit load: the
1983/// stack pointer.
1984///
1985/// If we are not using an LFENCE, hardening the stack pointer has an additional
1986/// benefit: it allows us to pass the predicate state accumulated in this
1987/// function back to the caller. In the absence of a BCBS attack on the return,
1988/// the caller will typically be resumed and speculatively executed due to the
1989/// Return Stack Buffer (RSB) prediction which is very accurate and has a high
1990/// priority. It is possible that some code from the caller will be executed
1991/// speculatively even during a BCBS-attacked return until the steering takes
1992/// effect. Whenever this happens, the caller can recover the (poisoned)
1993/// predicate state from the stack pointer and continue to harden loads.
1994void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
1995 MachineBasicBlock &MBB = *MI.getParent();
1996 DebugLoc Loc = MI.getDebugLoc();
1997 auto InsertPt = MI.getIterator();
1998
1999 if (FenceCallAndRet) {
2000 // Simply forcibly block speculation of loads out of the function by using
2001 // an LFENCE. This is potentially a heavy-weight mitigation strategy, but
2002 // should be secure, is simple from an ABI perspective, and the cost can be
2003 // minimized through inlining.
2004 //
2005 // FIXME: We should investigate ways to establish a strong data-dependency
2006 // on the return. However, poisoning the stack pointer is unlikely to work
2007 // because the return is *predicted* rather than relying on the load of the
2008 // return address to actually resolve.
2009 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LFENCE));
2010 ++NumInstsInserted;
2011 ++NumLFENCEsInserted;
2012 return;
2013 }
2014
2015 // Take our predicate state, shift it to the high 17 bits (so that we keep
2016 // pointers canonical) and merge it into RSP. This will allow the caller to
2017 // extract it when we return (speculatively).
2018 mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2019}
2020
2021INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,static void *initializeX86SpeculativeLoadHardeningPassPassOnce
(PassRegistry &Registry) {
2022 "X86 speculative load hardener", false, false)static void *initializeX86SpeculativeLoadHardeningPassPassOnce
(PassRegistry &Registry) {
2023INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,PassInfo *PI = new PassInfo( "X86 speculative load hardener",
"x86-speculative-load-hardening", &X86SpeculativeLoadHardeningPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<X86SpeculativeLoadHardeningPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeX86SpeculativeLoadHardeningPassPassFlag
; void llvm::initializeX86SpeculativeLoadHardeningPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeX86SpeculativeLoadHardeningPassPassFlag
, initializeX86SpeculativeLoadHardeningPassPassOnce, std::ref
(Registry)); }
2024 "X86 speculative load hardener", false, false)PassInfo *PI = new PassInfo( "X86 speculative load hardener",
"x86-speculative-load-hardening", &X86SpeculativeLoadHardeningPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<X86SpeculativeLoadHardeningPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeX86SpeculativeLoadHardeningPassPassFlag
; void llvm::initializeX86SpeculativeLoadHardeningPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeX86SpeculativeLoadHardeningPassPassFlag
, initializeX86SpeculativeLoadHardeningPassPassOnce, std::ref
(Registry)); }
2025
2026FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() {
2027 return new X86SpeculativeLoadHardeningPass();
2028}