File: | lib/Target/X86/X86SpeculativeLoadHardening.cpp |
Warning: | line 1877, column 7 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | /// \file | |||
10 | /// | |||
11 | /// Provide a pass which mitigates speculative execution attacks which operate | |||
12 | /// by speculating incorrectly past some predicate (a type check, bounds check, | |||
13 | /// or other condition) to reach a load with invalid inputs and leak the data | |||
14 | /// accessed by that load using a side channel out of the speculative domain. | |||
15 | /// | |||
16 | /// For details on the attacks, see the first variant in both the Project Zero | |||
17 | /// writeup and the Spectre paper: | |||
18 | /// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html | |||
19 | /// https://spectreattack.com/spectre.pdf | |||
20 | /// | |||
21 | //===----------------------------------------------------------------------===// | |||
22 | ||||
23 | #include "X86.h" | |||
24 | #include "X86InstrBuilder.h" | |||
25 | #include "X86InstrInfo.h" | |||
26 | #include "X86Subtarget.h" | |||
27 | #include "llvm/ADT/ArrayRef.h" | |||
28 | #include "llvm/ADT/DenseMap.h" | |||
29 | #include "llvm/ADT/Optional.h" | |||
30 | #include "llvm/ADT/STLExtras.h" | |||
31 | #include "llvm/ADT/ScopeExit.h" | |||
32 | #include "llvm/ADT/SmallPtrSet.h" | |||
33 | #include "llvm/ADT/SmallSet.h" | |||
34 | #include "llvm/ADT/SmallVector.h" | |||
35 | #include "llvm/ADT/SparseBitVector.h" | |||
36 | #include "llvm/ADT/Statistic.h" | |||
37 | #include "llvm/CodeGen/MachineBasicBlock.h" | |||
38 | #include "llvm/CodeGen/MachineConstantPool.h" | |||
39 | #include "llvm/CodeGen/MachineFunction.h" | |||
40 | #include "llvm/CodeGen/MachineFunctionPass.h" | |||
41 | #include "llvm/CodeGen/MachineInstr.h" | |||
42 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
43 | #include "llvm/CodeGen/MachineModuleInfo.h" | |||
44 | #include "llvm/CodeGen/MachineOperand.h" | |||
45 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
46 | #include "llvm/CodeGen/MachineSSAUpdater.h" | |||
47 | #include "llvm/CodeGen/TargetInstrInfo.h" | |||
48 | #include "llvm/CodeGen/TargetRegisterInfo.h" | |||
49 | #include "llvm/CodeGen/TargetSchedule.h" | |||
50 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | |||
51 | #include "llvm/IR/DebugLoc.h" | |||
52 | #include "llvm/MC/MCSchedule.h" | |||
53 | #include "llvm/Pass.h" | |||
54 | #include "llvm/Support/CommandLine.h" | |||
55 | #include "llvm/Support/Debug.h" | |||
56 | #include "llvm/Support/raw_ostream.h" | |||
57 | #include <algorithm> | |||
58 | #include <cassert> | |||
59 | #include <iterator> | |||
60 | #include <utility> | |||
61 | ||||
62 | using namespace llvm; | |||
63 | ||||
64 | #define PASS_KEY"x86-speculative-load-hardening" "x86-speculative-load-hardening" | |||
65 | #define DEBUG_TYPE"x86-speculative-load-hardening" PASS_KEY"x86-speculative-load-hardening" | |||
66 | ||||
67 | STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced")static llvm::Statistic NumCondBranchesTraced = {"x86-speculative-load-hardening" , "NumCondBranchesTraced", "Number of conditional branches traced" , {0}, {false}}; | |||
68 | STATISTIC(NumBranchesUntraced, "Number of branches unable to trace")static llvm::Statistic NumBranchesUntraced = {"x86-speculative-load-hardening" , "NumBranchesUntraced", "Number of branches unable to trace" , {0}, {false}}; | |||
69 | STATISTIC(NumAddrRegsHardened,static llvm::Statistic NumAddrRegsHardened = {"x86-speculative-load-hardening" , "NumAddrRegsHardened", "Number of address mode used registers hardaned" , {0}, {false}} | |||
70 | "Number of address mode used registers hardaned")static llvm::Statistic NumAddrRegsHardened = {"x86-speculative-load-hardening" , "NumAddrRegsHardened", "Number of address mode used registers hardaned" , {0}, {false}}; | |||
71 | STATISTIC(NumPostLoadRegsHardened,static llvm::Statistic NumPostLoadRegsHardened = {"x86-speculative-load-hardening" , "NumPostLoadRegsHardened", "Number of post-load register values hardened" , {0}, {false}} | |||
72 | "Number of post-load register values hardened")static llvm::Statistic NumPostLoadRegsHardened = {"x86-speculative-load-hardening" , "NumPostLoadRegsHardened", "Number of post-load register values hardened" , {0}, {false}}; | |||
73 | STATISTIC(NumInstsInserted, "Number of instructions inserted")static llvm::Statistic NumInstsInserted = {"x86-speculative-load-hardening" , "NumInstsInserted", "Number of instructions inserted", {0}, {false}}; | |||
74 | STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted")static llvm::Statistic NumLFENCEsInserted = {"x86-speculative-load-hardening" , "NumLFENCEsInserted", "Number of lfence instructions inserted" , {0}, {false}}; | |||
75 | ||||
76 | static cl::opt<bool> HardenEdgesWithLFENCE( | |||
77 | PASS_KEY"x86-speculative-load-hardening" "-lfence", | |||
78 | cl::desc( | |||
79 | "Use LFENCE along each conditional edge to harden against speculative " | |||
80 | "loads rather than conditional movs and poisoned pointers."), | |||
81 | cl::init(false), cl::Hidden); | |||
82 | ||||
83 | static cl::opt<bool> EnablePostLoadHardening( | |||
84 | PASS_KEY"x86-speculative-load-hardening" "-post-load", | |||
85 | cl::desc("Harden the value loaded *after* it is loaded by " | |||
86 | "flushing the loaded bits to 1. This is hard to do " | |||
87 | "in general but can be done easily for GPRs."), | |||
88 | cl::init(true), cl::Hidden); | |||
89 | ||||
90 | static cl::opt<bool> FenceCallAndRet( | |||
91 | PASS_KEY"x86-speculative-load-hardening" "-fence-call-and-ret", | |||
92 | cl::desc("Use a full speculation fence to harden both call and ret edges " | |||
93 | "rather than a lighter weight mitigation."), | |||
94 | cl::init(false), cl::Hidden); | |||
95 | ||||
96 | static cl::opt<bool> HardenInterprocedurally( | |||
97 | PASS_KEY"x86-speculative-load-hardening" "-ip", | |||
98 | cl::desc("Harden interprocedurally by passing our state in and out of " | |||
99 | "functions in the high bits of the stack pointer."), | |||
100 | cl::init(true), cl::Hidden); | |||
101 | ||||
102 | static cl::opt<bool> | |||
103 | HardenLoads(PASS_KEY"x86-speculative-load-hardening" "-loads", | |||
104 | cl::desc("Sanitize loads from memory. When disable, no " | |||
105 | "significant security is provided."), | |||
106 | cl::init(true), cl::Hidden); | |||
107 | ||||
108 | namespace llvm { | |||
109 | ||||
110 | void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); | |||
111 | ||||
112 | } // end namespace llvm | |||
113 | ||||
114 | namespace { | |||
115 | ||||
116 | class X86SpeculativeLoadHardeningPass : public MachineFunctionPass { | |||
117 | public: | |||
118 | X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { | |||
119 | initializeX86SpeculativeLoadHardeningPassPass( | |||
120 | *PassRegistry::getPassRegistry()); | |||
121 | } | |||
122 | ||||
123 | StringRef getPassName() const override { | |||
124 | return "X86 speculative load hardening"; | |||
125 | } | |||
126 | bool runOnMachineFunction(MachineFunction &MF) override; | |||
127 | void getAnalysisUsage(AnalysisUsage &AU) const override; | |||
128 | ||||
129 | /// Pass identification, replacement for typeid. | |||
130 | static char ID; | |||
131 | ||||
132 | private: | |||
133 | /// The information about a block's conditional terminators needed to trace | |||
134 | /// our predicate state through the exiting edges. | |||
135 | struct BlockCondInfo { | |||
136 | MachineBasicBlock *MBB; | |||
137 | ||||
138 | // We mostly have one conditional branch, and in extremely rare cases have | |||
139 | // two. Three and more are so rare as to be unimportant for compile time. | |||
140 | SmallVector<MachineInstr *, 2> CondBrs; | |||
141 | ||||
142 | MachineInstr *UncondBr; | |||
143 | }; | |||
144 | ||||
145 | /// Manages the predicate state traced through the program. | |||
146 | struct PredState { | |||
147 | unsigned InitialReg; | |||
148 | unsigned PoisonReg; | |||
149 | ||||
150 | const TargetRegisterClass *RC; | |||
151 | MachineSSAUpdater SSA; | |||
152 | ||||
153 | PredState(MachineFunction &MF, const TargetRegisterClass *RC) | |||
154 | : RC(RC), SSA(MF) {} | |||
155 | }; | |||
156 | ||||
157 | const X86Subtarget *Subtarget; | |||
158 | MachineRegisterInfo *MRI; | |||
159 | const X86InstrInfo *TII; | |||
160 | const TargetRegisterInfo *TRI; | |||
161 | ||||
162 | Optional<PredState> PS; | |||
163 | ||||
164 | void hardenEdgesWithLFENCE(MachineFunction &MF); | |||
165 | ||||
166 | SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF); | |||
167 | ||||
168 | SmallVector<MachineInstr *, 16> | |||
169 | tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos); | |||
170 | ||||
171 | void checkAllLoads(MachineFunction &MF); | |||
172 | ||||
173 | unsigned saveEFLAGS(MachineBasicBlock &MBB, | |||
174 | MachineBasicBlock::iterator InsertPt, DebugLoc Loc); | |||
175 | void restoreEFLAGS(MachineBasicBlock &MBB, | |||
176 | MachineBasicBlock::iterator InsertPt, DebugLoc Loc, | |||
177 | unsigned OFReg); | |||
178 | ||||
179 | void mergePredStateIntoSP(MachineBasicBlock &MBB, | |||
180 | MachineBasicBlock::iterator InsertPt, DebugLoc Loc, | |||
181 | unsigned PredStateReg); | |||
182 | unsigned extractPredStateFromSP(MachineBasicBlock &MBB, | |||
183 | MachineBasicBlock::iterator InsertPt, | |||
184 | DebugLoc Loc); | |||
185 | ||||
186 | void | |||
187 | hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO, | |||
188 | MachineOperand &IndexMO, | |||
189 | SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg); | |||
190 | MachineInstr * | |||
191 | sinkPostLoadHardenedInst(MachineInstr &MI, | |||
192 | SmallPtrSetImpl<MachineInstr *> &HardenedInstrs); | |||
193 | bool canHardenRegister(unsigned Reg); | |||
194 | void hardenPostLoad(MachineInstr &MI); | |||
195 | void hardenReturnInstr(MachineInstr &MI); | |||
196 | }; | |||
197 | ||||
198 | } // end anonymous namespace | |||
199 | ||||
200 | char X86SpeculativeLoadHardeningPass::ID = 0; | |||
201 | ||||
202 | void X86SpeculativeLoadHardeningPass::getAnalysisUsage( | |||
203 | AnalysisUsage &AU) const { | |||
204 | MachineFunctionPass::getAnalysisUsage(AU); | |||
205 | } | |||
206 | ||||
207 | static MachineBasicBlock &splitEdge(MachineBasicBlock &MBB, | |||
208 | MachineBasicBlock &Succ, int SuccCount, | |||
209 | MachineInstr *Br, MachineInstr *&UncondBr, | |||
210 | const X86InstrInfo &TII) { | |||
211 | assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!")(static_cast <bool> (!Succ.isEHPad() && "Shouldn't get edges to EH pads!" ) ? void (0) : __assert_fail ("!Succ.isEHPad() && \"Shouldn't get edges to EH pads!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 211, __extension__ __PRETTY_FUNCTION__)); | |||
212 | ||||
213 | MachineFunction &MF = *MBB.getParent(); | |||
214 | ||||
215 | MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); | |||
216 | ||||
217 | // We have to insert the new block immediately after the current one as we | |||
218 | // don't know what layout-successor relationships the successor has and we | |||
219 | // may not be able to (and generally don't want to) try to fix those up. | |||
220 | MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); | |||
221 | ||||
222 | // Update the branch instruction if necessary. | |||
223 | if (Br) { | |||
224 | assert(Br->getOperand(0).getMBB() == &Succ &&(static_cast <bool> (Br->getOperand(0).getMBB() == & Succ && "Didn't start with the right target!") ? void (0) : __assert_fail ("Br->getOperand(0).getMBB() == &Succ && \"Didn't start with the right target!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 225, __extension__ __PRETTY_FUNCTION__)) | |||
225 | "Didn't start with the right target!")(static_cast <bool> (Br->getOperand(0).getMBB() == & Succ && "Didn't start with the right target!") ? void (0) : __assert_fail ("Br->getOperand(0).getMBB() == &Succ && \"Didn't start with the right target!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 225, __extension__ __PRETTY_FUNCTION__)); | |||
226 | Br->getOperand(0).setMBB(&NewMBB); | |||
227 | ||||
228 | // If this successor was reached through a branch rather than fallthrough, | |||
229 | // we might have *broken* fallthrough and so need to inject a new | |||
230 | // unconditional branch. | |||
231 | if (!UncondBr) { | |||
232 | MachineBasicBlock &OldLayoutSucc = | |||
233 | *std::next(MachineFunction::iterator(&NewMBB)); | |||
234 | assert(MBB.isSuccessor(&OldLayoutSucc) &&(static_cast <bool> (MBB.isSuccessor(&OldLayoutSucc ) && "Without an unconditional branch, the old layout successor should " "be an actual successor!") ? void (0) : __assert_fail ("MBB.isSuccessor(&OldLayoutSucc) && \"Without an unconditional branch, the old layout successor should \" \"be an actual successor!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 236, __extension__ __PRETTY_FUNCTION__)) | |||
235 | "Without an unconditional branch, the old layout successor should "(static_cast <bool> (MBB.isSuccessor(&OldLayoutSucc ) && "Without an unconditional branch, the old layout successor should " "be an actual successor!") ? void (0) : __assert_fail ("MBB.isSuccessor(&OldLayoutSucc) && \"Without an unconditional branch, the old layout successor should \" \"be an actual successor!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 236, __extension__ __PRETTY_FUNCTION__)) | |||
236 | "be an actual successor!")(static_cast <bool> (MBB.isSuccessor(&OldLayoutSucc ) && "Without an unconditional branch, the old layout successor should " "be an actual successor!") ? void (0) : __assert_fail ("MBB.isSuccessor(&OldLayoutSucc) && \"Without an unconditional branch, the old layout successor should \" \"be an actual successor!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 236, __extension__ __PRETTY_FUNCTION__)); | |||
237 | auto BrBuilder = | |||
238 | BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc); | |||
239 | // Update the unconditional branch now that we've added one. | |||
240 | UncondBr = &*BrBuilder; | |||
241 | } | |||
242 | ||||
243 | // Insert unconditional "jump Succ" instruction in the new block if | |||
244 | // necessary. | |||
245 | if (!NewMBB.isLayoutSuccessor(&Succ)) { | |||
246 | SmallVector<MachineOperand, 4> Cond; | |||
247 | TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc()); | |||
248 | } | |||
249 | } else { | |||
250 | assert(!UncondBr &&(static_cast <bool> (!UncondBr && "Cannot have a branchless successor and an unconditional branch!" ) ? void (0) : __assert_fail ("!UncondBr && \"Cannot have a branchless successor and an unconditional branch!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 251, __extension__ __PRETTY_FUNCTION__)) | |||
251 | "Cannot have a branchless successor and an unconditional branch!")(static_cast <bool> (!UncondBr && "Cannot have a branchless successor and an unconditional branch!" ) ? void (0) : __assert_fail ("!UncondBr && \"Cannot have a branchless successor and an unconditional branch!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 251, __extension__ __PRETTY_FUNCTION__)); | |||
252 | assert(NewMBB.isLayoutSuccessor(&Succ) &&(static_cast <bool> (NewMBB.isLayoutSuccessor(&Succ ) && "A non-branch successor must have been a layout successor before " "and now is a layout successor of the new block.") ? void (0 ) : __assert_fail ("NewMBB.isLayoutSuccessor(&Succ) && \"A non-branch successor must have been a layout successor before \" \"and now is a layout successor of the new block.\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 254, __extension__ __PRETTY_FUNCTION__)) | |||
253 | "A non-branch successor must have been a layout successor before "(static_cast <bool> (NewMBB.isLayoutSuccessor(&Succ ) && "A non-branch successor must have been a layout successor before " "and now is a layout successor of the new block.") ? void (0 ) : __assert_fail ("NewMBB.isLayoutSuccessor(&Succ) && \"A non-branch successor must have been a layout successor before \" \"and now is a layout successor of the new block.\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 254, __extension__ __PRETTY_FUNCTION__)) | |||
254 | "and now is a layout successor of the new block.")(static_cast <bool> (NewMBB.isLayoutSuccessor(&Succ ) && "A non-branch successor must have been a layout successor before " "and now is a layout successor of the new block.") ? void (0 ) : __assert_fail ("NewMBB.isLayoutSuccessor(&Succ) && \"A non-branch successor must have been a layout successor before \" \"and now is a layout successor of the new block.\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 254, __extension__ __PRETTY_FUNCTION__)); | |||
255 | } | |||
256 | ||||
257 | // If this is the only edge to the successor, we can just replace it in the | |||
258 | // CFG. Otherwise we need to add a new entry in the CFG for the new | |||
259 | // successor. | |||
260 | if (SuccCount == 1) { | |||
261 | MBB.replaceSuccessor(&Succ, &NewMBB); | |||
262 | } else { | |||
263 | MBB.splitSuccessor(&Succ, &NewMBB); | |||
264 | } | |||
265 | ||||
266 | // Hook up the edge from the new basic block to the old successor in the CFG. | |||
267 | NewMBB.addSuccessor(&Succ); | |||
268 | ||||
269 | // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB. | |||
270 | for (MachineInstr &MI : Succ) { | |||
271 | if (!MI.isPHI()) | |||
272 | break; | |||
273 | for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; | |||
274 | OpIdx += 2) { | |||
275 | MachineOperand &OpV = MI.getOperand(OpIdx); | |||
276 | MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); | |||
277 | assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!")(static_cast <bool> (OpMBB.isMBB() && "Block operand to a PHI is not a block!" ) ? void (0) : __assert_fail ("OpMBB.isMBB() && \"Block operand to a PHI is not a block!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 277, __extension__ __PRETTY_FUNCTION__)); | |||
278 | if (OpMBB.getMBB() != &MBB) | |||
279 | continue; | |||
280 | ||||
281 | // If this is the last edge to the succesor, just replace MBB in the PHI | |||
282 | if (SuccCount == 1) { | |||
283 | OpMBB.setMBB(&NewMBB); | |||
284 | break; | |||
285 | } | |||
286 | ||||
287 | // Otherwise, append a new pair of operands for the new incoming edge. | |||
288 | MI.addOperand(MF, OpV); | |||
289 | MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); | |||
290 | break; | |||
291 | } | |||
292 | } | |||
293 | ||||
294 | // Inherit live-ins from the successor | |||
295 | for (auto &LI : Succ.liveins()) | |||
296 | NewMBB.addLiveIn(LI); | |||
297 | ||||
298 | LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Split edge from '" << MBB.getName() << "' to '" << Succ.getName () << "'.\n"; } } while (false) | |||
299 | << Succ.getName() << "'.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Split edge from '" << MBB.getName() << "' to '" << Succ.getName () << "'.\n"; } } while (false); | |||
300 | return NewMBB; | |||
301 | } | |||
302 | ||||
303 | /// Removing duplicate PHI operands to leave the PHI in a canonical and | |||
304 | /// predictable form. | |||
305 | /// | |||
306 | /// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR | |||
307 | /// isn't what you might expect. We may have multiple entries in PHI nodes for | |||
308 | /// a single predecessor. This makes CFG-updating extremely complex, so here we | |||
309 | /// simplify all PHI nodes to a model even simpler than the IR's model: exactly | |||
310 | /// one entry per predecessor, regardless of how many edges there are. | |||
311 | static void canonicalizePHIOperands(MachineFunction &MF) { | |||
312 | SmallPtrSet<MachineBasicBlock *, 4> Preds; | |||
313 | SmallVector<int, 4> DupIndices; | |||
314 | for (auto &MBB : MF) | |||
315 | for (auto &MI : MBB) { | |||
316 | if (!MI.isPHI()) | |||
317 | break; | |||
318 | ||||
319 | // First we scan the operands of the PHI looking for duplicate entries | |||
320 | // a particular predecessor. We retain the operand index of each duplicate | |||
321 | // entry found. | |||
322 | for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; | |||
323 | OpIdx += 2) | |||
324 | if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second) | |||
325 | DupIndices.push_back(OpIdx); | |||
326 | ||||
327 | // Now walk the duplicate indices, removing both the block and value. Note | |||
328 | // that these are stored as a vector making this element-wise removal | |||
329 | // :w | |||
330 | // potentially quadratic. | |||
331 | // | |||
332 | // FIXME: It is really frustrating that we have to use a quadratic | |||
333 | // removal algorithm here. There should be a better way, but the use-def | |||
334 | // updates required make that impossible using the public API. | |||
335 | // | |||
336 | // Note that we have to process these backwards so that we don't | |||
337 | // invalidate other indices with each removal. | |||
338 | while (!DupIndices.empty()) { | |||
339 | int OpIdx = DupIndices.pop_back_val(); | |||
340 | // Remove both the block and value operand, again in reverse order to | |||
341 | // preserve indices. | |||
342 | MI.RemoveOperand(OpIdx + 1); | |||
343 | MI.RemoveOperand(OpIdx); | |||
344 | } | |||
345 | ||||
346 | Preds.clear(); | |||
347 | } | |||
348 | } | |||
349 | ||||
350 | /// Helper to scan a function for loads vulnerable to misspeculation that we | |||
351 | /// want to harden. | |||
352 | /// | |||
353 | /// We use this to avoid making changes to functions where there is nothing we | |||
354 | /// need to do to harden against misspeculation. | |||
355 | static bool hasVulnerableLoad(MachineFunction &MF) { | |||
356 | for (MachineBasicBlock &MBB : MF) { | |||
357 | for (MachineInstr &MI : MBB) { | |||
358 | // Loads within this basic block after an LFENCE are not at risk of | |||
359 | // speculatively executing with invalid predicates from prior control | |||
360 | // flow. So break out of this block but continue scanning the function. | |||
361 | if (MI.getOpcode() == X86::LFENCE) | |||
362 | break; | |||
363 | ||||
364 | // Looking for loads only. | |||
365 | if (!MI.mayLoad()) | |||
366 | continue; | |||
367 | ||||
368 | // An MFENCE is modeled as a load but isn't vulnerable to misspeculation. | |||
369 | if (MI.getOpcode() == X86::MFENCE) | |||
370 | continue; | |||
371 | ||||
372 | // We found a load. | |||
373 | return true; | |||
374 | } | |||
375 | } | |||
376 | ||||
377 | // No loads found. | |||
378 | return false; | |||
379 | } | |||
380 | ||||
381 | bool X86SpeculativeLoadHardeningPass::runOnMachineFunction( | |||
382 | MachineFunction &MF) { | |||
383 | LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << "********** " << getPassName() << " : " << MF.getName() << " **********\n"; } } while (false) | |||
384 | << " **********\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << "********** " << getPassName() << " : " << MF.getName() << " **********\n"; } } while (false); | |||
385 | ||||
386 | Subtarget = &MF.getSubtarget<X86Subtarget>(); | |||
387 | MRI = &MF.getRegInfo(); | |||
388 | TII = Subtarget->getInstrInfo(); | |||
389 | TRI = Subtarget->getRegisterInfo(); | |||
390 | ||||
391 | // FIXME: Support for 32-bit. | |||
392 | PS.emplace(MF, &X86::GR64_NOSPRegClass); | |||
393 | ||||
394 | if (MF.begin() == MF.end()) | |||
| ||||
395 | // Nothing to do for a degenerate empty function... | |||
396 | return false; | |||
397 | ||||
398 | // We support an alternative hardening technique based on a debug flag. | |||
399 | if (HardenEdgesWithLFENCE) { | |||
400 | hardenEdgesWithLFENCE(MF); | |||
401 | return true; | |||
402 | } | |||
403 | ||||
404 | // Create a dummy debug loc to use for all the generated code here. | |||
405 | DebugLoc Loc; | |||
406 | ||||
407 | MachineBasicBlock &Entry = *MF.begin(); | |||
408 | auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin()); | |||
409 | ||||
410 | // Do a quick scan to see if we have any checkable loads. | |||
411 | bool HasVulnerableLoad = hasVulnerableLoad(MF); | |||
412 | ||||
413 | // See if we have any conditional branching blocks that we will need to trace | |||
414 | // predicate state through. | |||
415 | SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF); | |||
416 | ||||
417 | // If we have no interesting conditions or loads, nothing to do here. | |||
418 | if (!HasVulnerableLoad && Infos.empty()) | |||
419 | return true; | |||
420 | ||||
421 | // The poison value is required to be an all-ones value for many aspects of | |||
422 | // this mitigation. | |||
423 | const int PoisonVal = -1; | |||
424 | PS->PoisonReg = MRI->createVirtualRegister(PS->RC); | |||
425 | BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg) | |||
426 | .addImm(PoisonVal); | |||
427 | ++NumInstsInserted; | |||
428 | ||||
429 | // If we have loads being hardened and we've asked for call and ret edges to | |||
430 | // get a full fence-based mitigation, inject that fence. | |||
431 | if (HasVulnerableLoad && FenceCallAndRet) { | |||
432 | // We need to insert an LFENCE at the start of the function to suspend any | |||
433 | // incoming misspeculation from the caller. This helps two-fold: the caller | |||
434 | // may not have been protected as this code has been, and this code gets to | |||
435 | // not take any specific action to protect across calls. | |||
436 | // FIXME: We could skip this for functions which unconditionally return | |||
437 | // a constant. | |||
438 | BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE)); | |||
439 | ++NumInstsInserted; | |||
440 | ++NumLFENCEsInserted; | |||
441 | } | |||
442 | ||||
443 | // If we guarded the entry with an LFENCE and have no conditionals to protect | |||
444 | // in blocks, then we're done. | |||
445 | if (FenceCallAndRet && Infos.empty()) | |||
446 | // We may have changed the function's code at this point to insert fences. | |||
447 | return true; | |||
448 | ||||
449 | // For every basic block in the function which can b | |||
450 | if (HardenInterprocedurally && !FenceCallAndRet) { | |||
451 | // Set up the predicate state by extracting it from the incoming stack | |||
452 | // pointer so we pick up any misspeculation in our caller. | |||
453 | PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc); | |||
454 | } else { | |||
455 | // Otherwise, just build the predicate state itself by zeroing a register | |||
456 | // as we don't need any initial state. | |||
457 | PS->InitialReg = MRI->createVirtualRegister(PS->RC); | |||
458 | unsigned PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass); | |||
459 | auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0), | |||
460 | PredStateSubReg); | |||
461 | ++NumInstsInserted; | |||
462 | MachineOperand *ZeroEFLAGSDefOp = | |||
463 | ZeroI->findRegisterDefOperand(X86::EFLAGS); | |||
464 | assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&(static_cast <bool> (ZeroEFLAGSDefOp && ZeroEFLAGSDefOp ->isImplicit() && "Must have an implicit def of EFLAGS!" ) ? void (0) : __assert_fail ("ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() && \"Must have an implicit def of EFLAGS!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 465, __extension__ __PRETTY_FUNCTION__)) | |||
465 | "Must have an implicit def of EFLAGS!")(static_cast <bool> (ZeroEFLAGSDefOp && ZeroEFLAGSDefOp ->isImplicit() && "Must have an implicit def of EFLAGS!" ) ? void (0) : __assert_fail ("ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() && \"Must have an implicit def of EFLAGS!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 465, __extension__ __PRETTY_FUNCTION__)); | |||
466 | ZeroEFLAGSDefOp->setIsDead(true); | |||
467 | BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG), | |||
468 | PS->InitialReg) | |||
469 | .addImm(0) | |||
470 | .addReg(PredStateSubReg) | |||
471 | .addImm(X86::sub_32bit); | |||
472 | } | |||
473 | ||||
474 | // We're going to need to trace predicate state throughout the function's | |||
475 | // CFG. Prepare for this by setting up our initial state of PHIs with unique | |||
476 | // predecessor entries and all the initial predicate state. | |||
477 | canonicalizePHIOperands(MF); | |||
478 | ||||
479 | // Track the updated values in an SSA updater to rewrite into SSA form at the | |||
480 | // end. | |||
481 | PS->SSA.Initialize(PS->InitialReg); | |||
482 | PS->SSA.AddAvailableValue(&Entry, PS->InitialReg); | |||
483 | ||||
484 | // Trace through the CFG. | |||
485 | auto CMovs = tracePredStateThroughCFG(MF, Infos); | |||
486 | ||||
487 | // We may also enter basic blocks in this function via exception handling | |||
488 | // control flow. Here, if we are hardening interprocedurally, we need to | |||
489 | // re-capture the predicate state from the throwing code. In the Itanium ABI, | |||
490 | // the throw will always look like a call to __cxa_throw and will have the | |||
491 | // predicate state in the stack pointer, so extract fresh predicate state from | |||
492 | // the stack pointer and make it available in SSA. | |||
493 | // FIXME: Handle non-itanium ABI EH models. | |||
494 | if (HardenInterprocedurally) { | |||
495 | for (MachineBasicBlock &MBB : MF) { | |||
496 | assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!")(static_cast <bool> (!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!" ) ? void (0) : __assert_fail ("!MBB.isEHScopeEntry() && \"Only Itanium ABI EH supported!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 496, __extension__ __PRETTY_FUNCTION__)); | |||
497 | assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!")(static_cast <bool> (!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!") ? void (0) : __assert_fail ("!MBB.isEHFuncletEntry() && \"Only Itanium ABI EH supported!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 497, __extension__ __PRETTY_FUNCTION__)); | |||
498 | assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!")(static_cast <bool> (!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!") ? void (0) : __assert_fail ("!MBB.isCleanupFuncletEntry() && \"Only Itanium ABI EH supported!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 498, __extension__ __PRETTY_FUNCTION__)); | |||
499 | if (!MBB.isEHPad()) | |||
500 | continue; | |||
501 | PS->SSA.AddAvailableValue( | |||
502 | &MBB, | |||
503 | extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc)); | |||
504 | } | |||
505 | } | |||
506 | ||||
507 | // Now check all of the loads using the predicate state. | |||
508 | checkAllLoads(MF); | |||
509 | ||||
510 | // Now rewrite all the uses of the pred state using the SSA updater so that | |||
511 | // we track updates through the CFG. | |||
512 | for (MachineInstr *CMovI : CMovs) | |||
513 | for (MachineOperand &Op : CMovI->operands()) { | |||
514 | if (!Op.isReg() || Op.getReg() != PS->InitialReg) | |||
515 | continue; | |||
516 | ||||
517 | PS->SSA.RewriteUse(Op); | |||
518 | } | |||
519 | ||||
520 | // If we are hardening interprocedurally, find each returning block and | |||
521 | // protect the caller from being returned to through misspeculation. | |||
522 | if (HardenInterprocedurally) | |||
523 | for (MachineBasicBlock &MBB : MF) { | |||
524 | if (MBB.empty()) | |||
525 | continue; | |||
526 | ||||
527 | MachineInstr &MI = MBB.back(); | |||
528 | if (!MI.isReturn()) | |||
529 | continue; | |||
530 | ||||
531 | hardenReturnInstr(MI); | |||
532 | } | |||
533 | ||||
534 | LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << "Final speculative load hardened function:\n" ; MF.dump(); dbgs() << "\n"; MF.verify(this); } } while (false) | |||
535 | dbgs() << "\n"; MF.verify(this))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << "Final speculative load hardened function:\n" ; MF.dump(); dbgs() << "\n"; MF.verify(this); } } while (false); | |||
536 | return true; | |||
537 | } | |||
538 | ||||
539 | /// Implements the naive hardening approach of putting an LFENCE after every | |||
540 | /// potentially mis-predicted control flow construct. | |||
541 | /// | |||
542 | /// We include this as an alternative mostly for the purpose of comparison. The | |||
543 | /// performance impact of this is expected to be extremely severe and not | |||
544 | /// practical for any real-world users. | |||
545 | void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE( | |||
546 | MachineFunction &MF) { | |||
547 | // First, we scan the function looking for blocks that are reached along edges | |||
548 | // that we might want to harden. | |||
549 | SmallSetVector<MachineBasicBlock *, 8> Blocks; | |||
550 | for (MachineBasicBlock &MBB : MF) { | |||
551 | // If there are no or only one successor, nothing to do here. | |||
552 | if (MBB.succ_size() <= 1) | |||
553 | continue; | |||
554 | ||||
555 | // Skip blocks unless their terminators start with a branch. Other | |||
556 | // terminators don't seem interesting for guarding against misspeculation. | |||
557 | auto TermIt = MBB.getFirstTerminator(); | |||
558 | if (TermIt == MBB.end() || !TermIt->isBranch()) | |||
559 | continue; | |||
560 | ||||
561 | // Add all the non-EH-pad succossors to the blocks we want to harden. We | |||
562 | // skip EH pads because there isn't really a condition of interest on | |||
563 | // entering. | |||
564 | for (MachineBasicBlock *SuccMBB : MBB.successors()) | |||
565 | if (!SuccMBB->isEHPad()) | |||
566 | Blocks.insert(SuccMBB); | |||
567 | } | |||
568 | ||||
569 | for (MachineBasicBlock *MBB : Blocks) { | |||
570 | auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin()); | |||
571 | BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE)); | |||
572 | ++NumInstsInserted; | |||
573 | ++NumLFENCEsInserted; | |||
574 | } | |||
575 | } | |||
576 | ||||
577 | SmallVector<X86SpeculativeLoadHardeningPass::BlockCondInfo, 16> | |||
578 | X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) { | |||
579 | SmallVector<BlockCondInfo, 16> Infos; | |||
580 | ||||
581 | // Walk the function and build up a summary for each block's conditions that | |||
582 | // we need to trace through. | |||
583 | for (MachineBasicBlock &MBB : MF) { | |||
584 | // If there are no or only one successor, nothing to do here. | |||
585 | if (MBB.succ_size() <= 1) | |||
586 | continue; | |||
587 | ||||
588 | // We want to reliably handle any conditional branch terminators in the | |||
589 | // MBB, so we manually analyze the branch. We can handle all of the | |||
590 | // permutations here, including ones that analyze branch cannot. | |||
591 | // | |||
592 | // The approach is to walk backwards across the terminators, resetting at | |||
593 | // any unconditional non-indirect branch, and track all conditional edges | |||
594 | // to basic blocks as well as the fallthrough or unconditional successor | |||
595 | // edge. For each conditional edge, we track the target and the opposite | |||
596 | // condition code in order to inject a "no-op" cmov into that successor | |||
597 | // that will harden the predicate. For the fallthrough/unconditional | |||
598 | // edge, we inject a separate cmov for each conditional branch with | |||
599 | // matching condition codes. This effectively implements an "and" of the | |||
600 | // condition flags, even if there isn't a single condition flag that would | |||
601 | // directly implement that. We don't bother trying to optimize either of | |||
602 | // these cases because if such an optimization is possible, LLVM should | |||
603 | // have optimized the conditional *branches* in that way already to reduce | |||
604 | // instruction count. This late, we simply assume the minimal number of | |||
605 | // branch instructions is being emitted and use that to guide our cmov | |||
606 | // insertion. | |||
607 | ||||
608 | BlockCondInfo Info = {&MBB, {}, nullptr}; | |||
609 | ||||
610 | // Now walk backwards through the terminators and build up successors they | |||
611 | // reach and the conditions. | |||
612 | for (MachineInstr &MI : llvm::reverse(MBB)) { | |||
613 | // Once we've handled all the terminators, we're done. | |||
614 | if (!MI.isTerminator()) | |||
615 | break; | |||
616 | ||||
617 | // If we see a non-branch terminator, we can't handle anything so bail. | |||
618 | if (!MI.isBranch()) { | |||
619 | Info.CondBrs.clear(); | |||
620 | break; | |||
621 | } | |||
622 | ||||
623 | // If we see an unconditional branch, reset our state, clear any | |||
624 | // fallthrough, and set this is the "else" successor. | |||
625 | if (MI.getOpcode() == X86::JMP_1) { | |||
626 | Info.CondBrs.clear(); | |||
627 | Info.UncondBr = &MI; | |||
628 | continue; | |||
629 | } | |||
630 | ||||
631 | // If we get an invalid condition, we have an indirect branch or some | |||
632 | // other unanalyzable "fallthrough" case. We model this as a nullptr for | |||
633 | // the destination so we can still guard any conditional successors. | |||
634 | // Consider code sequences like: | |||
635 | // ``` | |||
636 | // jCC L1 | |||
637 | // jmpq *%rax | |||
638 | // ``` | |||
639 | // We still want to harden the edge to `L1`. | |||
640 | if (X86::getCondFromBranchOpc(MI.getOpcode()) == X86::COND_INVALID) { | |||
641 | Info.CondBrs.clear(); | |||
642 | Info.UncondBr = &MI; | |||
643 | continue; | |||
644 | } | |||
645 | ||||
646 | // We have a vanilla conditional branch, add it to our list. | |||
647 | Info.CondBrs.push_back(&MI); | |||
648 | } | |||
649 | if (Info.CondBrs.empty()) { | |||
650 | ++NumBranchesUntraced; | |||
651 | LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << "WARNING: unable to secure successors of block:\n" ; MBB.dump(); } } while (false) | |||
652 | MBB.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << "WARNING: unable to secure successors of block:\n" ; MBB.dump(); } } while (false); | |||
653 | continue; | |||
654 | } | |||
655 | ||||
656 | Infos.push_back(Info); | |||
657 | } | |||
658 | ||||
659 | return Infos; | |||
660 | } | |||
661 | ||||
662 | /// Trace the predicate state through the CFG, instrumenting each conditional | |||
663 | /// branch such that misspeculation through an edge will poison the predicate | |||
664 | /// state. | |||
665 | /// | |||
666 | /// Returns the list of inserted CMov instructions so that they can have their | |||
667 | /// uses of the predicate state rewritten into proper SSA form once it is | |||
668 | /// complete. | |||
669 | SmallVector<MachineInstr *, 16> | |||
670 | X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( | |||
671 | MachineFunction &MF, ArrayRef<BlockCondInfo> Infos) { | |||
672 | // Collect the inserted cmov instructions so we can rewrite their uses of the | |||
673 | // predicate state into SSA form. | |||
674 | SmallVector<MachineInstr *, 16> CMovs; | |||
675 | ||||
676 | // Now walk all of the basic blocks looking for ones that end in conditional | |||
677 | // jumps where we need to update this register along each edge. | |||
678 | for (const BlockCondInfo &Info : Infos) { | |||
679 | MachineBasicBlock &MBB = *Info.MBB; | |||
680 | const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs; | |||
681 | MachineInstr *UncondBr = Info.UncondBr; | |||
682 | ||||
683 | LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << "Tracing predicate through block: " << MBB.getName() << "\n"; } } while (false) | |||
684 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << "Tracing predicate through block: " << MBB.getName() << "\n"; } } while (false); | |||
685 | ++NumCondBranchesTraced; | |||
686 | ||||
687 | // Compute the non-conditional successor as either the target of any | |||
688 | // unconditional branch or the layout successor. | |||
689 | MachineBasicBlock *UncondSucc = | |||
690 | UncondBr ? (UncondBr->getOpcode() == X86::JMP_1 | |||
691 | ? UncondBr->getOperand(0).getMBB() | |||
692 | : nullptr) | |||
693 | : &*std::next(MachineFunction::iterator(&MBB)); | |||
694 | ||||
695 | // Count how many edges there are to any given successor. | |||
696 | SmallDenseMap<MachineBasicBlock *, int> SuccCounts; | |||
697 | if (UncondSucc) | |||
698 | ++SuccCounts[UncondSucc]; | |||
699 | for (auto *CondBr : CondBrs) | |||
700 | ++SuccCounts[CondBr->getOperand(0).getMBB()]; | |||
701 | ||||
702 | // A lambda to insert cmov instructions into a block checking all of the | |||
703 | // condition codes in a sequence. | |||
704 | auto BuildCheckingBlockForSuccAndConds = | |||
705 | [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, | |||
706 | MachineInstr *Br, MachineInstr *&UncondBr, | |||
707 | ArrayRef<X86::CondCode> Conds) { | |||
708 | // First, we split the edge to insert the checking block into a safe | |||
709 | // location. | |||
710 | auto &CheckingMBB = | |||
711 | (SuccCount == 1 && Succ.pred_size() == 1) | |||
712 | ? Succ | |||
713 | : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII); | |||
714 | ||||
715 | bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS); | |||
716 | if (!LiveEFLAGS) | |||
717 | CheckingMBB.addLiveIn(X86::EFLAGS); | |||
718 | ||||
719 | // Now insert the cmovs to implement the checks. | |||
720 | auto InsertPt = CheckingMBB.begin(); | |||
721 | assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&(static_cast <bool> ((InsertPt == CheckingMBB.end() || ! InsertPt->isPHI()) && "Should never have a PHI in the initial checking block as it " "always has a single predecessor!") ? void (0) : __assert_fail ("(InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && \"Should never have a PHI in the initial checking block as it \" \"always has a single predecessor!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 723, __extension__ __PRETTY_FUNCTION__)) | |||
722 | "Should never have a PHI in the initial checking block as it "(static_cast <bool> ((InsertPt == CheckingMBB.end() || ! InsertPt->isPHI()) && "Should never have a PHI in the initial checking block as it " "always has a single predecessor!") ? void (0) : __assert_fail ("(InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && \"Should never have a PHI in the initial checking block as it \" \"always has a single predecessor!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 723, __extension__ __PRETTY_FUNCTION__)) | |||
723 | "always has a single predecessor!")(static_cast <bool> ((InsertPt == CheckingMBB.end() || ! InsertPt->isPHI()) && "Should never have a PHI in the initial checking block as it " "always has a single predecessor!") ? void (0) : __assert_fail ("(InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && \"Should never have a PHI in the initial checking block as it \" \"always has a single predecessor!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 723, __extension__ __PRETTY_FUNCTION__)); | |||
724 | ||||
725 | // We will wire each cmov to each other, but need to start with the | |||
726 | // incoming pred state. | |||
727 | unsigned CurStateReg = PS->InitialReg; | |||
728 | ||||
729 | for (X86::CondCode Cond : Conds) { | |||
730 | int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; | |||
731 | auto CMovOp = X86::getCMovFromCond(Cond, PredStateSizeInBytes); | |||
732 | ||||
733 | unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); | |||
734 | // Note that we intentionally use an empty debug location so that | |||
735 | // this picks up the preceding location. | |||
736 | auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(), | |||
737 | TII->get(CMovOp), UpdatedStateReg) | |||
738 | .addReg(CurStateReg) | |||
739 | .addReg(PS->PoisonReg); | |||
740 | // If this is the last cmov and the EFLAGS weren't originally | |||
741 | // live-in, mark them as killed. | |||
742 | if (!LiveEFLAGS && Cond == Conds.back()) | |||
743 | CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); | |||
744 | ||||
745 | ++NumInstsInserted; | |||
746 | LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting cmov: " ; CMovI->dump(); dbgs() << "\n"; } } while (false) | |||
747 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting cmov: " ; CMovI->dump(); dbgs() << "\n"; } } while (false); | |||
748 | ||||
749 | // The first one of the cmovs will be using the top level | |||
750 | // `PredStateReg` and need to get rewritten into SSA form. | |||
751 | if (CurStateReg == PS->InitialReg) | |||
752 | CMovs.push_back(&*CMovI); | |||
753 | ||||
754 | // The next cmov should start from this one's def. | |||
755 | CurStateReg = UpdatedStateReg; | |||
756 | } | |||
757 | ||||
758 | // And put the last one into the available values for SSA form of our | |||
759 | // predicate state. | |||
760 | PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg); | |||
761 | }; | |||
762 | ||||
763 | std::vector<X86::CondCode> UncondCodeSeq; | |||
764 | for (auto *CondBr : CondBrs) { | |||
765 | MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB(); | |||
766 | int &SuccCount = SuccCounts[&Succ]; | |||
767 | ||||
768 | X86::CondCode Cond = X86::getCondFromBranchOpc(CondBr->getOpcode()); | |||
769 | X86::CondCode InvCond = X86::GetOppositeBranchCondition(Cond); | |||
770 | UncondCodeSeq.push_back(Cond); | |||
771 | ||||
772 | BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr, | |||
773 | {InvCond}); | |||
774 | ||||
775 | // Decrement the successor count now that we've split one of the edges. | |||
776 | // We need to keep the count of edges to the successor accurate in order | |||
777 | // to know above when to *replace* the successor in the CFG vs. just | |||
778 | // adding the new successor. | |||
779 | --SuccCount; | |||
780 | } | |||
781 | ||||
782 | // Since we may have split edges and changed the number of successors, | |||
783 | // normalize the probabilities. This avoids doing it each time we split an | |||
784 | // edge. | |||
785 | MBB.normalizeSuccProbs(); | |||
786 | ||||
787 | // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we | |||
788 | // need to intersect the other condition codes. We can do this by just | |||
789 | // doing a cmov for each one. | |||
790 | if (!UncondSucc) | |||
791 | // If we have no fallthrough to protect (perhaps it is an indirect jump?) | |||
792 | // just skip this and continue. | |||
793 | continue; | |||
794 | ||||
795 | assert(SuccCounts[UncondSucc] == 1 &&(static_cast <bool> (SuccCounts[UncondSucc] == 1 && "We should never have more than one edge to the unconditional " "successor at this point because every other edge must have been " "split above!") ? void (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 798, __extension__ __PRETTY_FUNCTION__)) | |||
796 | "We should never have more than one edge to the unconditional "(static_cast <bool> (SuccCounts[UncondSucc] == 1 && "We should never have more than one edge to the unconditional " "successor at this point because every other edge must have been " "split above!") ? void (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 798, __extension__ __PRETTY_FUNCTION__)) | |||
797 | "successor at this point because every other edge must have been "(static_cast <bool> (SuccCounts[UncondSucc] == 1 && "We should never have more than one edge to the unconditional " "successor at this point because every other edge must have been " "split above!") ? void (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 798, __extension__ __PRETTY_FUNCTION__)) | |||
798 | "split above!")(static_cast <bool> (SuccCounts[UncondSucc] == 1 && "We should never have more than one edge to the unconditional " "successor at this point because every other edge must have been " "split above!") ? void (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 798, __extension__ __PRETTY_FUNCTION__)); | |||
799 | ||||
800 | // Sort and unique the codes to minimize them. | |||
801 | llvm::sort(UncondCodeSeq.begin(), UncondCodeSeq.end()); | |||
802 | UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()), | |||
803 | UncondCodeSeq.end()); | |||
804 | ||||
805 | // Build a checking version of the successor. | |||
806 | BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1, | |||
807 | UncondBr, UncondBr, UncondCodeSeq); | |||
808 | } | |||
809 | ||||
810 | return CMovs; | |||
811 | } | |||
812 | ||||
813 | /// Returns true if the instruction has no behavior (specified or otherwise) | |||
814 | /// that is based on the value of any of its register operands | |||
815 | /// | |||
816 | /// A classical example of something that is inherently not data invariant is an | |||
817 | /// indirect jump -- the destination is loaded into icache based on the bits set | |||
818 | /// in the jump destination register. | |||
819 | /// | |||
820 | /// FIXME: This should become part of our instruction tables. | |||
821 | static bool isDataInvariant(MachineInstr &MI) { | |||
822 | switch (MI.getOpcode()) { | |||
823 | default: | |||
824 | // By default, assume that the instruction is not data invariant. | |||
825 | return false; | |||
826 | ||||
827 | // Some target-independent operations that trivially lower to data-invariant | |||
828 | // instructions. | |||
829 | case TargetOpcode::COPY: | |||
830 | case TargetOpcode::INSERT_SUBREG: | |||
831 | case TargetOpcode::SUBREG_TO_REG: | |||
832 | return true; | |||
833 | ||||
834 | // On x86 it is believed that imul is constant time w.r.t. the loaded data. | |||
835 | // However, they set flags and are perhaps the most surprisingly constant | |||
836 | // time operations so we call them out here separately. | |||
837 | case X86::IMUL16rr: | |||
838 | case X86::IMUL16rri8: | |||
839 | case X86::IMUL16rri: | |||
840 | case X86::IMUL32rr: | |||
841 | case X86::IMUL32rri8: | |||
842 | case X86::IMUL32rri: | |||
843 | case X86::IMUL64rr: | |||
844 | case X86::IMUL64rri32: | |||
845 | case X86::IMUL64rri8: | |||
846 | ||||
847 | // Bit scanning and counting instructions that are somewhat surprisingly | |||
848 | // constant time as they scan across bits and do other fairly complex | |||
849 | // operations like popcnt, but are believed to be constant time on x86. | |||
850 | // However, these set flags. | |||
851 | case X86::BSF16rr: | |||
852 | case X86::BSF32rr: | |||
853 | case X86::BSF64rr: | |||
854 | case X86::BSR16rr: | |||
855 | case X86::BSR32rr: | |||
856 | case X86::BSR64rr: | |||
857 | case X86::LZCNT16rr: | |||
858 | case X86::LZCNT32rr: | |||
859 | case X86::LZCNT64rr: | |||
860 | case X86::POPCNT16rr: | |||
861 | case X86::POPCNT32rr: | |||
862 | case X86::POPCNT64rr: | |||
863 | case X86::TZCNT16rr: | |||
864 | case X86::TZCNT32rr: | |||
865 | case X86::TZCNT64rr: | |||
866 | ||||
867 | // Bit manipulation instructions are effectively combinations of basic | |||
868 | // arithmetic ops, and should still execute in constant time. These also | |||
869 | // set flags. | |||
870 | case X86::BLCFILL32rr: | |||
871 | case X86::BLCFILL64rr: | |||
872 | case X86::BLCI32rr: | |||
873 | case X86::BLCI64rr: | |||
874 | case X86::BLCIC32rr: | |||
875 | case X86::BLCIC64rr: | |||
876 | case X86::BLCMSK32rr: | |||
877 | case X86::BLCMSK64rr: | |||
878 | case X86::BLCS32rr: | |||
879 | case X86::BLCS64rr: | |||
880 | case X86::BLSFILL32rr: | |||
881 | case X86::BLSFILL64rr: | |||
882 | case X86::BLSI32rr: | |||
883 | case X86::BLSI64rr: | |||
884 | case X86::BLSIC32rr: | |||
885 | case X86::BLSIC64rr: | |||
886 | case X86::BLSMSK32rr: | |||
887 | case X86::BLSMSK64rr: | |||
888 | case X86::BLSR32rr: | |||
889 | case X86::BLSR64rr: | |||
890 | case X86::TZMSK32rr: | |||
891 | case X86::TZMSK64rr: | |||
892 | ||||
893 | // Bit extracting and clearing instructions should execute in constant time, | |||
894 | // and set flags. | |||
895 | case X86::BEXTR32rr: | |||
896 | case X86::BEXTR64rr: | |||
897 | case X86::BEXTRI32ri: | |||
898 | case X86::BEXTRI64ri: | |||
899 | case X86::BZHI32rr: | |||
900 | case X86::BZHI64rr: | |||
901 | ||||
902 | // Shift and rotate. | |||
903 | case X86::ROL8r1: case X86::ROL16r1: case X86::ROL32r1: case X86::ROL64r1: | |||
904 | case X86::ROL8rCL: case X86::ROL16rCL: case X86::ROL32rCL: case X86::ROL64rCL: | |||
905 | case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri: | |||
906 | case X86::ROR8r1: case X86::ROR16r1: case X86::ROR32r1: case X86::ROR64r1: | |||
907 | case X86::ROR8rCL: case X86::ROR16rCL: case X86::ROR32rCL: case X86::ROR64rCL: | |||
908 | case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri: | |||
909 | case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1: case X86::SAR64r1: | |||
910 | case X86::SAR8rCL: case X86::SAR16rCL: case X86::SAR32rCL: case X86::SAR64rCL: | |||
911 | case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri: | |||
912 | case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1: case X86::SHL64r1: | |||
913 | case X86::SHL8rCL: case X86::SHL16rCL: case X86::SHL32rCL: case X86::SHL64rCL: | |||
914 | case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri: | |||
915 | case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1: case X86::SHR64r1: | |||
916 | case X86::SHR8rCL: case X86::SHR16rCL: case X86::SHR32rCL: case X86::SHR64rCL: | |||
917 | case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri: | |||
918 | case X86::SHLD16rrCL: case X86::SHLD32rrCL: case X86::SHLD64rrCL: | |||
919 | case X86::SHLD16rri8: case X86::SHLD32rri8: case X86::SHLD64rri8: | |||
920 | case X86::SHRD16rrCL: case X86::SHRD32rrCL: case X86::SHRD64rrCL: | |||
921 | case X86::SHRD16rri8: case X86::SHRD32rri8: case X86::SHRD64rri8: | |||
922 | ||||
923 | // Basic arithmetic is constant time on the input but does set flags. | |||
924 | case X86::ADC8rr: case X86::ADC8ri: | |||
925 | case X86::ADC16rr: case X86::ADC16ri: case X86::ADC16ri8: | |||
926 | case X86::ADC32rr: case X86::ADC32ri: case X86::ADC32ri8: | |||
927 | case X86::ADC64rr: case X86::ADC64ri8: case X86::ADC64ri32: | |||
928 | case X86::ADD8rr: case X86::ADD8ri: | |||
929 | case X86::ADD16rr: case X86::ADD16ri: case X86::ADD16ri8: | |||
930 | case X86::ADD32rr: case X86::ADD32ri: case X86::ADD32ri8: | |||
931 | case X86::ADD64rr: case X86::ADD64ri8: case X86::ADD64ri32: | |||
932 | case X86::AND8rr: case X86::AND8ri: | |||
933 | case X86::AND16rr: case X86::AND16ri: case X86::AND16ri8: | |||
934 | case X86::AND32rr: case X86::AND32ri: case X86::AND32ri8: | |||
935 | case X86::AND64rr: case X86::AND64ri8: case X86::AND64ri32: | |||
936 | case X86::OR8rr: case X86::OR8ri: | |||
937 | case X86::OR16rr: case X86::OR16ri: case X86::OR16ri8: | |||
938 | case X86::OR32rr: case X86::OR32ri: case X86::OR32ri8: | |||
939 | case X86::OR64rr: case X86::OR64ri8: case X86::OR64ri32: | |||
940 | case X86::SBB8rr: case X86::SBB8ri: | |||
941 | case X86::SBB16rr: case X86::SBB16ri: case X86::SBB16ri8: | |||
942 | case X86::SBB32rr: case X86::SBB32ri: case X86::SBB32ri8: | |||
943 | case X86::SBB64rr: case X86::SBB64ri8: case X86::SBB64ri32: | |||
944 | case X86::SUB8rr: case X86::SUB8ri: | |||
945 | case X86::SUB16rr: case X86::SUB16ri: case X86::SUB16ri8: | |||
946 | case X86::SUB32rr: case X86::SUB32ri: case X86::SUB32ri8: | |||
947 | case X86::SUB64rr: case X86::SUB64ri8: case X86::SUB64ri32: | |||
948 | case X86::XOR8rr: case X86::XOR8ri: | |||
949 | case X86::XOR16rr: case X86::XOR16ri: case X86::XOR16ri8: | |||
950 | case X86::XOR32rr: case X86::XOR32ri: case X86::XOR32ri8: | |||
951 | case X86::XOR64rr: case X86::XOR64ri8: case X86::XOR64ri32: | |||
952 | // Arithmetic with just 32-bit and 64-bit variants and no immediates. | |||
953 | case X86::ADCX32rr: case X86::ADCX64rr: | |||
954 | case X86::ADOX32rr: case X86::ADOX64rr: | |||
955 | case X86::ANDN32rr: case X86::ANDN64rr: | |||
956 | // Unary arithmetic operations. | |||
957 | case X86::DEC8r: case X86::DEC16r: case X86::DEC32r: case X86::DEC64r: | |||
958 | case X86::INC8r: case X86::INC16r: case X86::INC32r: case X86::INC64r: | |||
959 | case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r: | |||
960 | // Check whether the EFLAGS implicit-def is dead. We assume that this will | |||
961 | // always find the implicit-def because this code should only be reached | |||
962 | // for instructions that do in fact implicitly def this. | |||
963 | if (!MI.findRegisterDefOperand(X86::EFLAGS)->isDead()) { | |||
964 | // If we would clobber EFLAGS that are used, just bail for now. | |||
965 | LLVM_DEBUG(dbgs() << " Unable to harden post-load due to EFLAGS: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Unable to harden post-load due to EFLAGS: " ; MI.dump(); dbgs() << "\n"; } } while (false) | |||
966 | MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Unable to harden post-load due to EFLAGS: " ; MI.dump(); dbgs() << "\n"; } } while (false); | |||
967 | return false; | |||
968 | } | |||
969 | ||||
970 | // Otherwise, fallthrough to handle these the same as instructions that | |||
971 | // don't set EFLAGS. | |||
972 | LLVM_FALLTHROUGH[[clang::fallthrough]]; | |||
973 | ||||
974 | // Unlike other arithmetic, NOT doesn't set EFLAGS. | |||
975 | case X86::NOT8r: case X86::NOT16r: case X86::NOT32r: case X86::NOT64r: | |||
976 | ||||
977 | // Various move instructions used to zero or sign extend things. Note that we | |||
978 | // intentionally don't support the _NOREX variants as we can't handle that | |||
979 | // register constraint anyways. | |||
980 | case X86::MOVSX16rr8: | |||
981 | case X86::MOVSX32rr8: case X86::MOVSX32rr16: | |||
982 | case X86::MOVSX64rr8: case X86::MOVSX64rr16: case X86::MOVSX64rr32: | |||
983 | case X86::MOVZX16rr8: | |||
984 | case X86::MOVZX32rr8: case X86::MOVZX32rr16: | |||
985 | case X86::MOVZX64rr8: case X86::MOVZX64rr16: | |||
986 | case X86::MOV32rr: | |||
987 | ||||
988 | // Arithmetic instructions that are both constant time and don't set flags. | |||
989 | case X86::RORX32ri: | |||
990 | case X86::RORX64ri: | |||
991 | case X86::SARX32rr: | |||
992 | case X86::SARX64rr: | |||
993 | case X86::SHLX32rr: | |||
994 | case X86::SHLX64rr: | |||
995 | case X86::SHRX32rr: | |||
996 | case X86::SHRX64rr: | |||
997 | ||||
998 | // LEA doesn't actually access memory, and its arithmetic is constant time. | |||
999 | case X86::LEA16r: | |||
1000 | case X86::LEA32r: | |||
1001 | case X86::LEA64_32r: | |||
1002 | case X86::LEA64r: | |||
1003 | return true; | |||
1004 | } | |||
1005 | } | |||
1006 | ||||
1007 | /// Returns true if the instruction has no behavior (specified or otherwise) | |||
1008 | /// that is based on the value loaded from memory or the value of any | |||
1009 | /// non-address register operands. | |||
1010 | /// | |||
1011 | /// For example, if the latency of the instruction is dependent on the | |||
1012 | /// particular bits set in any of the registers *or* any of the bits loaded from | |||
1013 | /// memory. | |||
1014 | /// | |||
1015 | /// A classical example of something that is inherently not data invariant is an | |||
1016 | /// indirect jump -- the destination is loaded into icache based on the bits set | |||
1017 | /// in the jump destination register. | |||
1018 | /// | |||
1019 | /// FIXME: This should become part of our instruction tables. | |||
1020 | static bool isDataInvariantLoad(MachineInstr &MI) { | |||
1021 | switch (MI.getOpcode()) { | |||
1022 | default: | |||
1023 | // By default, assume that the load will immediately leak. | |||
1024 | return false; | |||
1025 | ||||
1026 | // On x86 it is believed that imul is constant time w.r.t. the loaded data. | |||
1027 | // However, they set flags and are perhaps the most surprisingly constant | |||
1028 | // time operations so we call them out here separately. | |||
1029 | case X86::IMUL16rm: | |||
1030 | case X86::IMUL16rmi8: | |||
1031 | case X86::IMUL16rmi: | |||
1032 | case X86::IMUL32rm: | |||
1033 | case X86::IMUL32rmi8: | |||
1034 | case X86::IMUL32rmi: | |||
1035 | case X86::IMUL64rm: | |||
1036 | case X86::IMUL64rmi32: | |||
1037 | case X86::IMUL64rmi8: | |||
1038 | ||||
1039 | // Bit scanning and counting instructions that are somewhat surprisingly | |||
1040 | // constant time as they scan across bits and do other fairly complex | |||
1041 | // operations like popcnt, but are believed to be constant time on x86. | |||
1042 | // However, these set flags. | |||
1043 | case X86::BSF16rm: | |||
1044 | case X86::BSF32rm: | |||
1045 | case X86::BSF64rm: | |||
1046 | case X86::BSR16rm: | |||
1047 | case X86::BSR32rm: | |||
1048 | case X86::BSR64rm: | |||
1049 | case X86::LZCNT16rm: | |||
1050 | case X86::LZCNT32rm: | |||
1051 | case X86::LZCNT64rm: | |||
1052 | case X86::POPCNT16rm: | |||
1053 | case X86::POPCNT32rm: | |||
1054 | case X86::POPCNT64rm: | |||
1055 | case X86::TZCNT16rm: | |||
1056 | case X86::TZCNT32rm: | |||
1057 | case X86::TZCNT64rm: | |||
1058 | ||||
1059 | // Bit manipulation instructions are effectively combinations of basic | |||
1060 | // arithmetic ops, and should still execute in constant time. These also | |||
1061 | // set flags. | |||
1062 | case X86::BLCFILL32rm: | |||
1063 | case X86::BLCFILL64rm: | |||
1064 | case X86::BLCI32rm: | |||
1065 | case X86::BLCI64rm: | |||
1066 | case X86::BLCIC32rm: | |||
1067 | case X86::BLCIC64rm: | |||
1068 | case X86::BLCMSK32rm: | |||
1069 | case X86::BLCMSK64rm: | |||
1070 | case X86::BLCS32rm: | |||
1071 | case X86::BLCS64rm: | |||
1072 | case X86::BLSFILL32rm: | |||
1073 | case X86::BLSFILL64rm: | |||
1074 | case X86::BLSI32rm: | |||
1075 | case X86::BLSI64rm: | |||
1076 | case X86::BLSIC32rm: | |||
1077 | case X86::BLSIC64rm: | |||
1078 | case X86::BLSMSK32rm: | |||
1079 | case X86::BLSMSK64rm: | |||
1080 | case X86::BLSR32rm: | |||
1081 | case X86::BLSR64rm: | |||
1082 | case X86::TZMSK32rm: | |||
1083 | case X86::TZMSK64rm: | |||
1084 | ||||
1085 | // Bit extracting and clearing instructions should execute in constant time, | |||
1086 | // and set flags. | |||
1087 | case X86::BEXTR32rm: | |||
1088 | case X86::BEXTR64rm: | |||
1089 | case X86::BEXTRI32mi: | |||
1090 | case X86::BEXTRI64mi: | |||
1091 | case X86::BZHI32rm: | |||
1092 | case X86::BZHI64rm: | |||
1093 | ||||
1094 | // Basic arithmetic is constant time on the input but does set flags. | |||
1095 | case X86::ADC8rm: | |||
1096 | case X86::ADC16rm: | |||
1097 | case X86::ADC32rm: | |||
1098 | case X86::ADC64rm: | |||
1099 | case X86::ADCX32rm: | |||
1100 | case X86::ADCX64rm: | |||
1101 | case X86::ADD8rm: | |||
1102 | case X86::ADD16rm: | |||
1103 | case X86::ADD32rm: | |||
1104 | case X86::ADD64rm: | |||
1105 | case X86::ADOX32rm: | |||
1106 | case X86::ADOX64rm: | |||
1107 | case X86::AND8rm: | |||
1108 | case X86::AND16rm: | |||
1109 | case X86::AND32rm: | |||
1110 | case X86::AND64rm: | |||
1111 | case X86::ANDN32rm: | |||
1112 | case X86::ANDN64rm: | |||
1113 | case X86::OR8rm: | |||
1114 | case X86::OR16rm: | |||
1115 | case X86::OR32rm: | |||
1116 | case X86::OR64rm: | |||
1117 | case X86::SBB8rm: | |||
1118 | case X86::SBB16rm: | |||
1119 | case X86::SBB32rm: | |||
1120 | case X86::SBB64rm: | |||
1121 | case X86::SUB8rm: | |||
1122 | case X86::SUB16rm: | |||
1123 | case X86::SUB32rm: | |||
1124 | case X86::SUB64rm: | |||
1125 | case X86::XOR8rm: | |||
1126 | case X86::XOR16rm: | |||
1127 | case X86::XOR32rm: | |||
1128 | case X86::XOR64rm: | |||
1129 | // Check whether the EFLAGS implicit-def is dead. We assume that this will | |||
1130 | // always find the implicit-def because this code should only be reached | |||
1131 | // for instructions that do in fact implicitly def this. | |||
1132 | if (!MI.findRegisterDefOperand(X86::EFLAGS)->isDead()) { | |||
1133 | // If we would clobber EFLAGS that are used, just bail for now. | |||
1134 | LLVM_DEBUG(dbgs() << " Unable to harden post-load due to EFLAGS: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Unable to harden post-load due to EFLAGS: " ; MI.dump(); dbgs() << "\n"; } } while (false) | |||
1135 | MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Unable to harden post-load due to EFLAGS: " ; MI.dump(); dbgs() << "\n"; } } while (false); | |||
1136 | return false; | |||
1137 | } | |||
1138 | ||||
1139 | // Otherwise, fallthrough to handle these the same as instructions that | |||
1140 | // don't set EFLAGS. | |||
1141 | LLVM_FALLTHROUGH[[clang::fallthrough]]; | |||
1142 | ||||
1143 | // Integer multiply w/o affecting flags is still believed to be constant | |||
1144 | // time on x86. Called out separately as this is among the most surprising | |||
1145 | // instructions to exhibit that behavior. | |||
1146 | case X86::MULX32rm: | |||
1147 | case X86::MULX64rm: | |||
1148 | ||||
1149 | // Arithmetic instructions that are both constant time and don't set flags. | |||
1150 | case X86::RORX32mi: | |||
1151 | case X86::RORX64mi: | |||
1152 | case X86::SARX32rm: | |||
1153 | case X86::SARX64rm: | |||
1154 | case X86::SHLX32rm: | |||
1155 | case X86::SHLX64rm: | |||
1156 | case X86::SHRX32rm: | |||
1157 | case X86::SHRX64rm: | |||
1158 | ||||
1159 | // Conversions are believed to be constant time and don't set flags. | |||
1160 | case X86::CVTTSD2SI64rm: case X86::VCVTTSD2SI64rm: case X86::VCVTTSD2SI64Zrm: | |||
1161 | case X86::CVTTSD2SIrm: case X86::VCVTTSD2SIrm: case X86::VCVTTSD2SIZrm: | |||
1162 | case X86::CVTTSS2SI64rm: case X86::VCVTTSS2SI64rm: case X86::VCVTTSS2SI64Zrm: | |||
1163 | case X86::CVTTSS2SIrm: case X86::VCVTTSS2SIrm: case X86::VCVTTSS2SIZrm: | |||
1164 | case X86::CVTSI2SDrm: case X86::VCVTSI2SDrm: case X86::VCVTSI2SDZrm: | |||
1165 | case X86::CVTSI2SSrm: case X86::VCVTSI2SSrm: case X86::VCVTSI2SSZrm: | |||
1166 | case X86::CVTSI642SDrm: case X86::VCVTSI642SDrm: case X86::VCVTSI642SDZrm: | |||
1167 | case X86::CVTSI642SSrm: case X86::VCVTSI642SSrm: case X86::VCVTSI642SSZrm: | |||
1168 | case X86::CVTSS2SDrm: case X86::VCVTSS2SDrm: case X86::VCVTSS2SDZrm: | |||
1169 | case X86::CVTSD2SSrm: case X86::VCVTSD2SSrm: case X86::VCVTSD2SSZrm: | |||
1170 | // AVX512 added unsigned integer conversions. | |||
1171 | case X86::VCVTTSD2USI64Zrm: | |||
1172 | case X86::VCVTTSD2USIZrm: | |||
1173 | case X86::VCVTTSS2USI64Zrm: | |||
1174 | case X86::VCVTTSS2USIZrm: | |||
1175 | case X86::VCVTUSI2SDZrm: | |||
1176 | case X86::VCVTUSI642SDZrm: | |||
1177 | case X86::VCVTUSI2SSZrm: | |||
1178 | case X86::VCVTUSI642SSZrm: | |||
1179 | ||||
1180 | // Loads to register don't set flags. | |||
1181 | case X86::MOV8rm: | |||
1182 | case X86::MOV8rm_NOREX: | |||
1183 | case X86::MOV16rm: | |||
1184 | case X86::MOV32rm: | |||
1185 | case X86::MOV64rm: | |||
1186 | case X86::MOVSX16rm8: | |||
1187 | case X86::MOVSX32rm16: | |||
1188 | case X86::MOVSX32rm8: | |||
1189 | case X86::MOVSX32rm8_NOREX: | |||
1190 | case X86::MOVSX64rm16: | |||
1191 | case X86::MOVSX64rm32: | |||
1192 | case X86::MOVSX64rm8: | |||
1193 | case X86::MOVZX16rm8: | |||
1194 | case X86::MOVZX32rm16: | |||
1195 | case X86::MOVZX32rm8: | |||
1196 | case X86::MOVZX32rm8_NOREX: | |||
1197 | case X86::MOVZX64rm16: | |||
1198 | case X86::MOVZX64rm8: | |||
1199 | return true; | |||
1200 | } | |||
1201 | } | |||
1202 | ||||
1203 | static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, | |||
1204 | const TargetRegisterInfo &TRI) { | |||
1205 | // Check if EFLAGS are alive by seeing if there is a def of them or they | |||
1206 | // live-in, and then seeing if that def is in turn used. | |||
1207 | for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) { | |||
1208 | if (MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) { | |||
1209 | // If the def is dead, then EFLAGS is not live. | |||
1210 | if (DefOp->isDead()) | |||
1211 | return false; | |||
1212 | ||||
1213 | // Otherwise we've def'ed it, and it is live. | |||
1214 | return true; | |||
1215 | } | |||
1216 | // While at this instruction, also check if we use and kill EFLAGS | |||
1217 | // which means it isn't live. | |||
1218 | if (MI.killsRegister(X86::EFLAGS, &TRI)) | |||
1219 | return false; | |||
1220 | } | |||
1221 | ||||
1222 | // If we didn't find anything conclusive (neither definitely alive or | |||
1223 | // definitely dead) return whether it lives into the block. | |||
1224 | return MBB.isLiveIn(X86::EFLAGS); | |||
1225 | } | |||
1226 | ||||
1227 | void X86SpeculativeLoadHardeningPass::checkAllLoads(MachineFunction &MF) { | |||
1228 | // If the actual checking of loads is disabled, skip doing anything here. | |||
1229 | if (!HardenLoads) | |||
1230 | return; | |||
1231 | ||||
1232 | SmallPtrSet<MachineInstr *, 16> HardenPostLoad; | |||
1233 | SmallPtrSet<MachineInstr *, 16> HardenLoadAddr; | |||
1234 | ||||
1235 | SmallSet<unsigned, 16> HardenedAddrRegs; | |||
1236 | ||||
1237 | SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg; | |||
1238 | ||||
1239 | // Track the set of load-dependent registers through the basic block. Because | |||
1240 | // the values of these registers have an existing data dependency on a loaded | |||
1241 | // value which we would have checked, we can omit any checks on them. | |||
1242 | SparseBitVector<> LoadDepRegs; | |||
1243 | ||||
1244 | for (MachineBasicBlock &MBB : MF) { | |||
1245 | // We harden the loads of a basic block in several passes: | |||
1246 | // | |||
1247 | // 1) Collect all the loads which can have their loaded value hardened | |||
1248 | // and all the loads that instead need their address hardened. During | |||
1249 | // this walk we propagate load dependence for address hardened loads and | |||
1250 | // also look for LFENCE to stop hardening wherever possible. When | |||
1251 | // deciding whether or not to harden the loaded value or not, we check | |||
1252 | // to see if any registers used in the address will have been hardened | |||
1253 | // at this point and if so, harden any remaining address registers as | |||
1254 | // that often successfully re-uses hardened addresses and minimizes | |||
1255 | // instructions. FIXME: We should consider an aggressive mode where we | |||
1256 | // continue to keep as many loads value hardened even when some address | |||
1257 | // register hardening would be free (due to reuse). | |||
1258 | for (MachineInstr &MI : MBB) { | |||
1259 | // We naively assume that all def'ed registers of an instruction have | |||
1260 | // a data dependency on all of their operands. | |||
1261 | // FIXME: Do a more careful analysis of x86 to build a conservative model | |||
1262 | // here. | |||
1263 | if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) { | |||
1264 | return Op.isReg() && LoadDepRegs.test(Op.getReg()); | |||
1265 | })) | |||
1266 | for (MachineOperand &Def : MI.defs()) | |||
1267 | if (Def.isReg()) | |||
1268 | LoadDepRegs.set(Def.getReg()); | |||
1269 | ||||
1270 | // Both Intel and AMD are guiding that they will change the semantics of | |||
1271 | // LFENCE to be a speculation barrier, so if we see an LFENCE, there is | |||
1272 | // no more need to guard things in this block. | |||
1273 | if (MI.getOpcode() == X86::LFENCE) | |||
1274 | break; | |||
1275 | ||||
1276 | // If this instruction cannot load, nothing to do. | |||
1277 | if (!MI.mayLoad()) | |||
1278 | continue; | |||
1279 | ||||
1280 | // Some instructions which "load" are trivially safe or unimportant. | |||
1281 | if (MI.getOpcode() == X86::MFENCE) | |||
1282 | continue; | |||
1283 | ||||
1284 | // Extract the memory operand information about this instruction. | |||
1285 | // FIXME: This doesn't handle loading pseudo instructions which we often | |||
1286 | // could handle with similarly generic logic. We probably need to add an | |||
1287 | // MI-layer routine similar to the MC-layer one we use here which maps | |||
1288 | // pseudos much like this maps real instructions. | |||
1289 | const MCInstrDesc &Desc = MI.getDesc(); | |||
1290 | int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); | |||
1291 | if (MemRefBeginIdx < 0) { | |||
1292 | LLVM_DEBUG(dbgs() << "WARNING: unable to harden loading instruction: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << "WARNING: unable to harden loading instruction: " ; MI.dump(); } } while (false) | |||
1293 | MI.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << "WARNING: unable to harden loading instruction: " ; MI.dump(); } } while (false); | |||
1294 | continue; | |||
1295 | } | |||
1296 | ||||
1297 | MemRefBeginIdx += X86II::getOperandBias(Desc); | |||
1298 | ||||
1299 | MachineOperand &BaseMO = MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); | |||
1300 | MachineOperand &IndexMO = | |||
1301 | MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); | |||
1302 | ||||
1303 | // If we have at least one (non-frame-index, non-RIP) register operand, | |||
1304 | // and neither operand is load-dependent, we need to check the load. | |||
1305 | unsigned BaseReg = 0, IndexReg = 0; | |||
1306 | if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP && | |||
1307 | BaseMO.getReg() != X86::NoRegister) | |||
1308 | BaseReg = BaseMO.getReg(); | |||
1309 | if (IndexMO.getReg() != X86::NoRegister) | |||
1310 | IndexReg = IndexMO.getReg(); | |||
1311 | ||||
1312 | if (!BaseReg && !IndexReg) | |||
1313 | // No register operands! | |||
1314 | continue; | |||
1315 | ||||
1316 | // If any register operand is dependent, this load is dependent and we | |||
1317 | // needn't check it. | |||
1318 | // FIXME: Is this true in the case where we are hardening loads after | |||
1319 | // they complete? Unclear, need to investigate. | |||
1320 | if ((BaseReg && LoadDepRegs.test(BaseReg)) || | |||
1321 | (IndexReg && LoadDepRegs.test(IndexReg))) | |||
1322 | continue; | |||
1323 | ||||
1324 | // If post-load hardening is enabled, this load is compatible with | |||
1325 | // post-load hardening, and we aren't already going to harden one of the | |||
1326 | // address registers, queue it up to be hardened post-load. Notably, even | |||
1327 | // once hardened this won't introduce a useful dependency that could prune | |||
1328 | // out subsequent loads. | |||
1329 | if (EnablePostLoadHardening && isDataInvariantLoad(MI) && | |||
1330 | MI.getDesc().getNumDefs() == 1 && MI.getOperand(0).isReg() && | |||
1331 | canHardenRegister(MI.getOperand(0).getReg()) && | |||
1332 | !HardenedAddrRegs.count(BaseReg) && | |||
1333 | !HardenedAddrRegs.count(IndexReg)) { | |||
1334 | HardenPostLoad.insert(&MI); | |||
1335 | HardenedAddrRegs.insert(MI.getOperand(0).getReg()); | |||
1336 | continue; | |||
1337 | } | |||
1338 | ||||
1339 | // Record this instruction for address hardening and record its register | |||
1340 | // operands as being address-hardened. | |||
1341 | HardenLoadAddr.insert(&MI); | |||
1342 | if (BaseReg) | |||
1343 | HardenedAddrRegs.insert(BaseReg); | |||
1344 | if (IndexReg) | |||
1345 | HardenedAddrRegs.insert(IndexReg); | |||
1346 | ||||
1347 | for (MachineOperand &Def : MI.defs()) | |||
1348 | if (Def.isReg()) | |||
1349 | LoadDepRegs.set(Def.getReg()); | |||
1350 | } | |||
1351 | ||||
1352 | // Now re-walk the instructions in the basic block, and apply whichever | |||
1353 | // hardening strategy we have elected. Note that we do this in a second | |||
1354 | // pass specifically so that we have the complete set of instructions for | |||
1355 | // which we will do post-load hardening and can defer it in certain | |||
1356 | // circumstances. | |||
1357 | // | |||
1358 | // FIXME: This could probably be made even more effective by doing it | |||
1359 | // across the entire function. Rather than just walking the flat list | |||
1360 | // backwards here, we could walk the function in PO and each block bottom | |||
1361 | // up, allowing us to in some cases sink hardening across block blocks. As | |||
1362 | // long as the in-block predicate state is used at the eventual hardening | |||
1363 | // site, this remains safe. | |||
1364 | for (MachineInstr &MI : MBB) { | |||
1365 | // We cannot both require hardening the def of a load and its address. | |||
1366 | assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&(static_cast <bool> (!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) && "Requested to harden both the address and def of a load!" ) ? void (0) : __assert_fail ("!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) && \"Requested to harden both the address and def of a load!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1367, __extension__ __PRETTY_FUNCTION__)) | |||
1367 | "Requested to harden both the address and def of a load!")(static_cast <bool> (!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) && "Requested to harden both the address and def of a load!" ) ? void (0) : __assert_fail ("!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) && \"Requested to harden both the address and def of a load!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1367, __extension__ __PRETTY_FUNCTION__)); | |||
1368 | ||||
1369 | // Check if this is a load whose address needs to be hardened. | |||
1370 | if (HardenLoadAddr.erase(&MI)) { | |||
1371 | const MCInstrDesc &Desc = MI.getDesc(); | |||
1372 | int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); | |||
1373 | assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!")(static_cast <bool> (MemRefBeginIdx >= 0 && "Cannot have an invalid index here!" ) ? void (0) : __assert_fail ("MemRefBeginIdx >= 0 && \"Cannot have an invalid index here!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1373, __extension__ __PRETTY_FUNCTION__)); | |||
1374 | ||||
1375 | MemRefBeginIdx += X86II::getOperandBias(Desc); | |||
1376 | ||||
1377 | MachineOperand &BaseMO = | |||
1378 | MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); | |||
1379 | MachineOperand &IndexMO = | |||
1380 | MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); | |||
1381 | hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg); | |||
1382 | continue; | |||
1383 | } | |||
1384 | ||||
1385 | // Test if this instruction is one of our post load instructions (and | |||
1386 | // remove it from the set if so). | |||
1387 | if (HardenPostLoad.erase(&MI)) { | |||
1388 | assert(!MI.isCall() && "Must not try to post-load harden a call!")(static_cast <bool> (!MI.isCall() && "Must not try to post-load harden a call!" ) ? void (0) : __assert_fail ("!MI.isCall() && \"Must not try to post-load harden a call!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1388, __extension__ __PRETTY_FUNCTION__)); | |||
1389 | ||||
1390 | // If this is a data-invariant load, we want to try and sink any | |||
1391 | // hardening as far as possible. | |||
1392 | if (isDataInvariantLoad(MI)) { | |||
1393 | // Sink the instruction we'll need to harden as far as we can down the | |||
1394 | // graph. | |||
1395 | MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad); | |||
1396 | ||||
1397 | // If we managed to sink this instruction, update everything so we | |||
1398 | // harden that instruction when we reach it in the instruction | |||
1399 | // sequence. | |||
1400 | if (SunkMI != &MI) { | |||
1401 | // If in sinking there was no instruction needing to be hardened, | |||
1402 | // we're done. | |||
1403 | if (!SunkMI) | |||
1404 | continue; | |||
1405 | ||||
1406 | // Otherwise, add this to the set of defs we harden. | |||
1407 | HardenPostLoad.insert(SunkMI); | |||
1408 | continue; | |||
1409 | } | |||
1410 | } | |||
1411 | ||||
1412 | // The register def'ed by this instruction is trivially hardened so map | |||
1413 | // it to itself. | |||
1414 | AddrRegToHardenedReg[MI.getOperand(0).getReg()] = | |||
1415 | MI.getOperand(0).getReg(); | |||
1416 | ||||
1417 | hardenPostLoad(MI); | |||
1418 | continue; | |||
1419 | } | |||
1420 | ||||
1421 | // After we finish processing the instruction and doing any hardening | |||
1422 | // necessary for it, we need to handle transferring the predicate state | |||
1423 | // into a call and recovering it after the call returns (if it returns). | |||
1424 | if (!MI.isCall()) | |||
1425 | continue; | |||
1426 | ||||
1427 | // If we're not hardening interprocedurally, we can just skip calls. | |||
1428 | if (!HardenInterprocedurally) | |||
1429 | continue; | |||
1430 | ||||
1431 | auto InsertPt = MI.getIterator(); | |||
1432 | DebugLoc Loc = MI.getDebugLoc(); | |||
1433 | ||||
1434 | // First, we transfer the predicate state into the called function by | |||
1435 | // merging it into the stack pointer. This will kill the current def of | |||
1436 | // the state. | |||
1437 | unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); | |||
1438 | mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg); | |||
1439 | ||||
1440 | // If this call is also a return (because it is a tail call) we're done. | |||
1441 | if (MI.isReturn()) | |||
1442 | continue; | |||
1443 | ||||
1444 | // Otherwise we need to step past the call and recover the predicate | |||
1445 | // state from SP after the return, and make this new state available. | |||
1446 | ++InsertPt; | |||
1447 | unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc); | |||
1448 | PS->SSA.AddAvailableValue(&MBB, NewStateReg); | |||
1449 | } | |||
1450 | ||||
1451 | HardenPostLoad.clear(); | |||
1452 | HardenLoadAddr.clear(); | |||
1453 | HardenedAddrRegs.clear(); | |||
1454 | AddrRegToHardenedReg.clear(); | |||
1455 | ||||
1456 | // Currently, we only track data-dependent loads within a basic block. | |||
1457 | // FIXME: We should see if this is necessary or if we could be more | |||
1458 | // aggressive here without opening up attack avenues. | |||
1459 | LoadDepRegs.clear(); | |||
1460 | } | |||
1461 | } | |||
1462 | ||||
1463 | /// Save EFLAGS into the returned GPR. This can in turn be restored with | |||
1464 | /// `restoreEFLAGS`. | |||
1465 | /// | |||
1466 | /// Note that LLVM can only lower very simple patterns of saved and restored | |||
1467 | /// EFLAGS registers. The restore should always be within the same basic block | |||
1468 | /// as the save so that no PHI nodes are inserted. | |||
1469 | unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS( | |||
1470 | MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, | |||
1471 | DebugLoc Loc) { | |||
1472 | // FIXME: Hard coding this to a 32-bit register class seems weird, but matches | |||
1473 | // what instruction selection does. | |||
1474 | unsigned Reg = MRI->createVirtualRegister(&X86::GR32RegClass); | |||
1475 | // We directly copy the FLAGS register and rely on later lowering to clean | |||
1476 | // this up into the appropriate setCC instructions. | |||
1477 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS); | |||
1478 | ++NumInstsInserted; | |||
1479 | return Reg; | |||
1480 | } | |||
1481 | ||||
1482 | /// Restore EFLAGS from the provided GPR. This should be produced by | |||
1483 | /// `saveEFLAGS`. | |||
1484 | /// | |||
1485 | /// This must be done within the same basic block as the save in order to | |||
1486 | /// reliably lower. | |||
1487 | void X86SpeculativeLoadHardeningPass::restoreEFLAGS( | |||
1488 | MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc, | |||
1489 | unsigned Reg) { | |||
1490 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg); | |||
1491 | ++NumInstsInserted; | |||
1492 | } | |||
1493 | ||||
1494 | /// Takes the current predicate state (in a register) and merges it into the | |||
1495 | /// stack pointer. The state is essentially a single bit, but we merge this in | |||
1496 | /// a way that won't form non-canonical pointers and also will be preserved | |||
1497 | /// across normal stack adjustments. | |||
1498 | void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP( | |||
1499 | MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc, | |||
1500 | unsigned PredStateReg) { | |||
1501 | unsigned TmpReg = MRI->createVirtualRegister(PS->RC); | |||
1502 | // FIXME: This hard codes a shift distance based on the number of bits needed | |||
1503 | // to stay canonical on 64-bit. We should compute this somehow and support | |||
1504 | // 32-bit as part of that. | |||
1505 | auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg) | |||
1506 | .addReg(PredStateReg, RegState::Kill) | |||
1507 | .addImm(47); | |||
1508 | ShiftI->addRegisterDead(X86::EFLAGS, TRI); | |||
1509 | ++NumInstsInserted; | |||
1510 | auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP) | |||
1511 | .addReg(X86::RSP) | |||
1512 | .addReg(TmpReg, RegState::Kill); | |||
1513 | OrI->addRegisterDead(X86::EFLAGS, TRI); | |||
1514 | ++NumInstsInserted; | |||
1515 | } | |||
1516 | ||||
1517 | /// Extracts the predicate state stored in the high bits of the stack pointer. | |||
1518 | unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP( | |||
1519 | MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, | |||
1520 | DebugLoc Loc) { | |||
1521 | unsigned PredStateReg = MRI->createVirtualRegister(PS->RC); | |||
1522 | unsigned TmpReg = MRI->createVirtualRegister(PS->RC); | |||
1523 | ||||
1524 | // We know that the stack pointer will have any preserved predicate state in | |||
1525 | // its high bit. We just want to smear this across the other bits. Turns out, | |||
1526 | // this is exactly what an arithmetic right shift does. | |||
1527 | BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg) | |||
1528 | .addReg(X86::RSP); | |||
1529 | auto ShiftI = | |||
1530 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg) | |||
1531 | .addReg(TmpReg, RegState::Kill) | |||
1532 | .addImm(TRI->getRegSizeInBits(*PS->RC) - 1); | |||
1533 | ShiftI->addRegisterDead(X86::EFLAGS, TRI); | |||
1534 | ++NumInstsInserted; | |||
1535 | ||||
1536 | return PredStateReg; | |||
1537 | } | |||
1538 | ||||
1539 | void X86SpeculativeLoadHardeningPass::hardenLoadAddr( | |||
1540 | MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO, | |||
1541 | SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) { | |||
1542 | MachineBasicBlock &MBB = *MI.getParent(); | |||
1543 | DebugLoc Loc = MI.getDebugLoc(); | |||
1544 | ||||
1545 | // Check if EFLAGS are alive by seeing if there is a def of them or they | |||
1546 | // live-in, and then seeing if that def is in turn used. | |||
1547 | bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI); | |||
1548 | ||||
1549 | SmallVector<MachineOperand *, 2> HardenOpRegs; | |||
1550 | ||||
1551 | if (BaseMO.isFI()) { | |||
1552 | // A frame index is never a dynamically controllable load, so only | |||
1553 | // harden it if we're covering fixed address loads as well. | |||
1554 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Skipping hardening base of explicit stack frame load: " ; MI.dump(); dbgs() << "\n"; } } while (false) | |||
1555 | dbgs() << " Skipping hardening base of explicit stack frame load: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Skipping hardening base of explicit stack frame load: " ; MI.dump(); dbgs() << "\n"; } } while (false) | |||
1556 | MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Skipping hardening base of explicit stack frame load: " ; MI.dump(); dbgs() << "\n"; } } while (false); | |||
1557 | } else if (BaseMO.getReg() == X86::RIP || | |||
1558 | BaseMO.getReg() == X86::NoRegister) { | |||
1559 | // For both RIP-relative addressed loads or absolute loads, we cannot | |||
1560 | // meaningfully harden them because the address being loaded has no | |||
1561 | // dynamic component. | |||
1562 | // | |||
1563 | // FIXME: When using a segment base (like TLS does) we end up with the | |||
1564 | // dynamic address being the base plus -1 because we can't mutate the | |||
1565 | // segment register here. This allows the signed 32-bit offset to point at | |||
1566 | // valid segment-relative addresses and load them successfully. | |||
1567 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Cannot harden base of " << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base" ) << " address in a load!"; } } while (false) | |||
1568 | dbgs() << " Cannot harden base of "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Cannot harden base of " << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base" ) << " address in a load!"; } } while (false) | |||
1569 | << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Cannot harden base of " << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base" ) << " address in a load!"; } } while (false) | |||
1570 | << " address in a load!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Cannot harden base of " << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base" ) << " address in a load!"; } } while (false); | |||
1571 | } else { | |||
1572 | assert(BaseMO.isReg() &&(static_cast <bool> (BaseMO.isReg() && "Only allowed to have a frame index or register base." ) ? void (0) : __assert_fail ("BaseMO.isReg() && \"Only allowed to have a frame index or register base.\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1573, __extension__ __PRETTY_FUNCTION__)) | |||
1573 | "Only allowed to have a frame index or register base.")(static_cast <bool> (BaseMO.isReg() && "Only allowed to have a frame index or register base." ) ? void (0) : __assert_fail ("BaseMO.isReg() && \"Only allowed to have a frame index or register base.\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1573, __extension__ __PRETTY_FUNCTION__)); | |||
1574 | HardenOpRegs.push_back(&BaseMO); | |||
1575 | } | |||
1576 | ||||
1577 | if (IndexMO.getReg() != X86::NoRegister && | |||
1578 | (HardenOpRegs.empty() || | |||
1579 | HardenOpRegs.front()->getReg() != IndexMO.getReg())) | |||
1580 | HardenOpRegs.push_back(&IndexMO); | |||
1581 | ||||
1582 | assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&(static_cast <bool> ((HardenOpRegs.size() == 1 || HardenOpRegs .size() == 2) && "Should have exactly one or two registers to harden!" ) ? void (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) && \"Should have exactly one or two registers to harden!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1583, __extension__ __PRETTY_FUNCTION__)) | |||
1583 | "Should have exactly one or two registers to harden!")(static_cast <bool> ((HardenOpRegs.size() == 1 || HardenOpRegs .size() == 2) && "Should have exactly one or two registers to harden!" ) ? void (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) && \"Should have exactly one or two registers to harden!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1583, __extension__ __PRETTY_FUNCTION__)); | |||
1584 | assert((HardenOpRegs.size() == 1 ||(static_cast <bool> ((HardenOpRegs.size() == 1 || HardenOpRegs [0]->getReg() != HardenOpRegs[1]->getReg()) && "Should not have two of the same registers!" ) ? void (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && \"Should not have two of the same registers!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1586, __extension__ __PRETTY_FUNCTION__)) | |||
1585 | HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&(static_cast <bool> ((HardenOpRegs.size() == 1 || HardenOpRegs [0]->getReg() != HardenOpRegs[1]->getReg()) && "Should not have two of the same registers!" ) ? void (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && \"Should not have two of the same registers!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1586, __extension__ __PRETTY_FUNCTION__)) | |||
1586 | "Should not have two of the same registers!")(static_cast <bool> ((HardenOpRegs.size() == 1 || HardenOpRegs [0]->getReg() != HardenOpRegs[1]->getReg()) && "Should not have two of the same registers!" ) ? void (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && \"Should not have two of the same registers!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1586, __extension__ __PRETTY_FUNCTION__)); | |||
1587 | ||||
1588 | // Remove any registers that have alreaded been checked. | |||
1589 | llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) { | |||
1590 | // See if this operand's register has already been checked. | |||
1591 | auto It = AddrRegToHardenedReg.find(Op->getReg()); | |||
1592 | if (It == AddrRegToHardenedReg.end()) | |||
1593 | // Not checked, so retain this one. | |||
1594 | return false; | |||
1595 | ||||
1596 | // Otherwise, we can directly update this operand and remove it. | |||
1597 | Op->setReg(It->second); | |||
1598 | return true; | |||
1599 | }); | |||
1600 | // If there are none left, we're done. | |||
1601 | if (HardenOpRegs.empty()) | |||
1602 | return; | |||
1603 | ||||
1604 | // Compute the current predicate state. | |||
1605 | unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); | |||
1606 | ||||
1607 | auto InsertPt = MI.getIterator(); | |||
1608 | ||||
1609 | // If EFLAGS are live and we don't have access to instructions that avoid | |||
1610 | // clobbering EFLAGS we need to save and restore them. This in turn makes | |||
1611 | // the EFLAGS no longer live. | |||
1612 | unsigned FlagsReg = 0; | |||
1613 | if (EFLAGSLive && !Subtarget->hasBMI2()) { | |||
1614 | EFLAGSLive = false; | |||
1615 | FlagsReg = saveEFLAGS(MBB, InsertPt, Loc); | |||
1616 | } | |||
1617 | ||||
1618 | for (MachineOperand *Op : HardenOpRegs) { | |||
1619 | unsigned OpReg = Op->getReg(); | |||
1620 | auto *OpRC = MRI->getRegClass(OpReg); | |||
1621 | unsigned TmpReg = MRI->createVirtualRegister(OpRC); | |||
1622 | ||||
1623 | // If this is a vector register, we'll need somewhat custom logic to handle | |||
1624 | // hardening it. | |||
1625 | if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) || | |||
1626 | OpRC->hasSuperClassEq(&X86::VR256RegClass))) { | |||
1627 | assert(Subtarget->hasAVX2() && "AVX2-specific register classes!")(static_cast <bool> (Subtarget->hasAVX2() && "AVX2-specific register classes!") ? void (0) : __assert_fail ("Subtarget->hasAVX2() && \"AVX2-specific register classes!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1627, __extension__ __PRETTY_FUNCTION__)); | |||
1628 | bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass); | |||
1629 | ||||
1630 | // Move our state into a vector register. | |||
1631 | // FIXME: We could skip this at the cost of longer encodings with AVX-512 | |||
1632 | // but that doesn't seem likely worth it. | |||
1633 | unsigned VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass); | |||
1634 | auto MovI = | |||
1635 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg) | |||
1636 | .addReg(StateReg); | |||
1637 | (void)MovI; | |||
1638 | ++NumInstsInserted; | |||
1639 | LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting mov: " ; MovI->dump(); dbgs() << "\n"; } } while (false); | |||
1640 | ||||
1641 | // Broadcast it across the vector register. | |||
1642 | unsigned VBStateReg = MRI->createVirtualRegister(OpRC); | |||
1643 | auto BroadcastI = BuildMI(MBB, InsertPt, Loc, | |||
1644 | TII->get(Is128Bit ? X86::VPBROADCASTQrr | |||
1645 | : X86::VPBROADCASTQYrr), | |||
1646 | VBStateReg) | |||
1647 | .addReg(VStateReg); | |||
1648 | (void)BroadcastI; | |||
1649 | ++NumInstsInserted; | |||
1650 | LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting broadcast: " ; BroadcastI->dump(); dbgs() << "\n"; } } while (false ) | |||
1651 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting broadcast: " ; BroadcastI->dump(); dbgs() << "\n"; } } while (false ); | |||
1652 | ||||
1653 | // Merge our potential poison state into the value with a vector or. | |||
1654 | auto OrI = | |||
1655 | BuildMI(MBB, InsertPt, Loc, | |||
1656 | TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg) | |||
1657 | .addReg(VBStateReg) | |||
1658 | .addReg(OpReg); | |||
1659 | (void)OrI; | |||
1660 | ++NumInstsInserted; | |||
1661 | LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting or: " ; OrI->dump(); dbgs() << "\n"; } } while (false); | |||
1662 | } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) || | |||
1663 | OpRC->hasSuperClassEq(&X86::VR256XRegClass) || | |||
1664 | OpRC->hasSuperClassEq(&X86::VR512RegClass)) { | |||
1665 | assert(Subtarget->hasAVX512() && "AVX512-specific register classes!")(static_cast <bool> (Subtarget->hasAVX512() && "AVX512-specific register classes!") ? void (0) : __assert_fail ("Subtarget->hasAVX512() && \"AVX512-specific register classes!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1665, __extension__ __PRETTY_FUNCTION__)); | |||
1666 | bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass); | |||
1667 | bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass); | |||
1668 | if (Is128Bit || Is256Bit) | |||
1669 | assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!")(static_cast <bool> (Subtarget->hasVLX() && "AVX512VL-specific register classes!" ) ? void (0) : __assert_fail ("Subtarget->hasVLX() && \"AVX512VL-specific register classes!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1669, __extension__ __PRETTY_FUNCTION__)); | |||
1670 | ||||
1671 | // Broadcast our state into a vector register. | |||
1672 | unsigned VStateReg = MRI->createVirtualRegister(OpRC); | |||
1673 | unsigned BroadcastOp = | |||
1674 | Is128Bit ? X86::VPBROADCASTQrZ128r | |||
1675 | : Is256Bit ? X86::VPBROADCASTQrZ256r : X86::VPBROADCASTQrZr; | |||
1676 | auto BroadcastI = | |||
1677 | BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg) | |||
1678 | .addReg(StateReg); | |||
1679 | (void)BroadcastI; | |||
1680 | ++NumInstsInserted; | |||
1681 | LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting broadcast: " ; BroadcastI->dump(); dbgs() << "\n"; } } while (false ) | |||
1682 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting broadcast: " ; BroadcastI->dump(); dbgs() << "\n"; } } while (false ); | |||
1683 | ||||
1684 | // Merge our potential poison state into the value with a vector or. | |||
1685 | unsigned OrOp = Is128Bit ? X86::VPORQZ128rr | |||
1686 | : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr; | |||
1687 | auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg) | |||
1688 | .addReg(VStateReg) | |||
1689 | .addReg(OpReg); | |||
1690 | (void)OrI; | |||
1691 | ++NumInstsInserted; | |||
1692 | LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting or: " ; OrI->dump(); dbgs() << "\n"; } } while (false); | |||
1693 | } else { | |||
1694 | // FIXME: Need to support GR32 here for 32-bit code. | |||
1695 | assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&(static_cast <bool> (OpRC->hasSuperClassEq(&X86:: GR64RegClass) && "Not a supported register class for address hardening!" ) ? void (0) : __assert_fail ("OpRC->hasSuperClassEq(&X86::GR64RegClass) && \"Not a supported register class for address hardening!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1696, __extension__ __PRETTY_FUNCTION__)) | |||
1696 | "Not a supported register class for address hardening!")(static_cast <bool> (OpRC->hasSuperClassEq(&X86:: GR64RegClass) && "Not a supported register class for address hardening!" ) ? void (0) : __assert_fail ("OpRC->hasSuperClassEq(&X86::GR64RegClass) && \"Not a supported register class for address hardening!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1696, __extension__ __PRETTY_FUNCTION__)); | |||
1697 | ||||
1698 | if (!EFLAGSLive) { | |||
1699 | // Merge our potential poison state into the value with an or. | |||
1700 | auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg) | |||
1701 | .addReg(StateReg) | |||
1702 | .addReg(OpReg); | |||
1703 | OrI->addRegisterDead(X86::EFLAGS, TRI); | |||
1704 | ++NumInstsInserted; | |||
1705 | LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting or: " ; OrI->dump(); dbgs() << "\n"; } } while (false); | |||
1706 | } else { | |||
1707 | // We need to avoid touching EFLAGS so shift out all but the least | |||
1708 | // significant bit using the instruction that doesn't update flags. | |||
1709 | auto ShiftI = | |||
1710 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg) | |||
1711 | .addReg(OpReg) | |||
1712 | .addReg(StateReg); | |||
1713 | (void)ShiftI; | |||
1714 | ++NumInstsInserted; | |||
1715 | LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting shrx: " ; ShiftI->dump(); dbgs() << "\n"; } } while (false) | |||
1716 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting shrx: " ; ShiftI->dump(); dbgs() << "\n"; } } while (false); | |||
1717 | } | |||
1718 | } | |||
1719 | ||||
1720 | // Record this register as checked and update the operand. | |||
1721 | assert(!AddrRegToHardenedReg.count(Op->getReg()) &&(static_cast <bool> (!AddrRegToHardenedReg.count(Op-> getReg()) && "Should not have checked this register yet!" ) ? void (0) : __assert_fail ("!AddrRegToHardenedReg.count(Op->getReg()) && \"Should not have checked this register yet!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1722, __extension__ __PRETTY_FUNCTION__)) | |||
1722 | "Should not have checked this register yet!")(static_cast <bool> (!AddrRegToHardenedReg.count(Op-> getReg()) && "Should not have checked this register yet!" ) ? void (0) : __assert_fail ("!AddrRegToHardenedReg.count(Op->getReg()) && \"Should not have checked this register yet!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1722, __extension__ __PRETTY_FUNCTION__)); | |||
1723 | AddrRegToHardenedReg[Op->getReg()] = TmpReg; | |||
1724 | Op->setReg(TmpReg); | |||
1725 | ++NumAddrRegsHardened; | |||
1726 | } | |||
1727 | ||||
1728 | // And restore the flags if needed. | |||
1729 | if (FlagsReg) | |||
1730 | restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg); | |||
1731 | } | |||
1732 | ||||
1733 | MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst( | |||
1734 | MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) { | |||
1735 | assert(isDataInvariantLoad(InitialMI) &&(static_cast <bool> (isDataInvariantLoad(InitialMI) && "Cannot get here with a non-invariant load!") ? void (0) : __assert_fail ("isDataInvariantLoad(InitialMI) && \"Cannot get here with a non-invariant load!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1736, __extension__ __PRETTY_FUNCTION__)) | |||
1736 | "Cannot get here with a non-invariant load!")(static_cast <bool> (isDataInvariantLoad(InitialMI) && "Cannot get here with a non-invariant load!") ? void (0) : __assert_fail ("isDataInvariantLoad(InitialMI) && \"Cannot get here with a non-invariant load!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1736, __extension__ __PRETTY_FUNCTION__)); | |||
1737 | ||||
1738 | // See if we can sink hardening the loaded value. | |||
1739 | auto SinkCheckToSingleUse = | |||
1740 | [&](MachineInstr &MI) -> Optional<MachineInstr *> { | |||
1741 | unsigned DefReg = MI.getOperand(0).getReg(); | |||
1742 | ||||
1743 | // We need to find a single use which we can sink the check. We can | |||
1744 | // primarily do this because many uses may already end up checked on their | |||
1745 | // own. | |||
1746 | MachineInstr *SingleUseMI = nullptr; | |||
1747 | for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) { | |||
1748 | // If we're already going to harden this use, it is data invariant and | |||
1749 | // within our block. | |||
1750 | if (HardenedInstrs.count(&UseMI)) { | |||
1751 | if (!isDataInvariantLoad(UseMI)) { | |||
1752 | // If we've already decided to harden a non-load, we must have sunk | |||
1753 | // some other post-load hardened instruction to it and it must itself | |||
1754 | // be data-invariant. | |||
1755 | assert(isDataInvariant(UseMI) &&(static_cast <bool> (isDataInvariant(UseMI) && "Data variant instruction being hardened!" ) ? void (0) : __assert_fail ("isDataInvariant(UseMI) && \"Data variant instruction being hardened!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1756, __extension__ __PRETTY_FUNCTION__)) | |||
1756 | "Data variant instruction being hardened!")(static_cast <bool> (isDataInvariant(UseMI) && "Data variant instruction being hardened!" ) ? void (0) : __assert_fail ("isDataInvariant(UseMI) && \"Data variant instruction being hardened!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1756, __extension__ __PRETTY_FUNCTION__)); | |||
1757 | continue; | |||
1758 | } | |||
1759 | ||||
1760 | // Otherwise, this is a load and the load component can't be data | |||
1761 | // invariant so check how this register is being used. | |||
1762 | const MCInstrDesc &Desc = UseMI.getDesc(); | |||
1763 | int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); | |||
1764 | assert(MemRefBeginIdx >= 0 &&(static_cast <bool> (MemRefBeginIdx >= 0 && "Should always have mem references here!" ) ? void (0) : __assert_fail ("MemRefBeginIdx >= 0 && \"Should always have mem references here!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1765, __extension__ __PRETTY_FUNCTION__)) | |||
1765 | "Should always have mem references here!")(static_cast <bool> (MemRefBeginIdx >= 0 && "Should always have mem references here!" ) ? void (0) : __assert_fail ("MemRefBeginIdx >= 0 && \"Should always have mem references here!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1765, __extension__ __PRETTY_FUNCTION__)); | |||
1766 | MemRefBeginIdx += X86II::getOperandBias(Desc); | |||
1767 | ||||
1768 | MachineOperand &BaseMO = | |||
1769 | UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); | |||
1770 | MachineOperand &IndexMO = | |||
1771 | UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); | |||
1772 | if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) || | |||
1773 | (IndexMO.isReg() && IndexMO.getReg() == DefReg)) | |||
1774 | // The load uses the register as part of its address making it not | |||
1775 | // invariant. | |||
1776 | return {}; | |||
1777 | ||||
1778 | continue; | |||
1779 | } | |||
1780 | ||||
1781 | if (SingleUseMI) | |||
1782 | // We already have a single use, this would make two. Bail. | |||
1783 | return {}; | |||
1784 | ||||
1785 | // If this single use isn't data invariant, isn't in this block, or has | |||
1786 | // interfering EFLAGS, we can't sink the hardening to it. | |||
1787 | if (!isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent()) | |||
1788 | return {}; | |||
1789 | ||||
1790 | // If this instruction defines multiple registers bail as we won't harden | |||
1791 | // all of them. | |||
1792 | if (UseMI.getDesc().getNumDefs() > 1) | |||
1793 | return {}; | |||
1794 | ||||
1795 | // If this register isn't a virtual register we can't walk uses of sanely, | |||
1796 | // just bail. Also check that its register class is one of the ones we | |||
1797 | // can harden. | |||
1798 | unsigned UseDefReg = UseMI.getOperand(0).getReg(); | |||
1799 | if (!TRI->isVirtualRegister(UseDefReg) || | |||
1800 | !canHardenRegister(UseDefReg)) | |||
1801 | return {}; | |||
1802 | ||||
1803 | SingleUseMI = &UseMI; | |||
1804 | } | |||
1805 | ||||
1806 | // If SingleUseMI is still null, there is no use that needs its own | |||
1807 | // checking. Otherwise, it is the single use that needs checking. | |||
1808 | return {SingleUseMI}; | |||
1809 | }; | |||
1810 | ||||
1811 | MachineInstr *MI = &InitialMI; | |||
1812 | while (Optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) { | |||
1813 | // Update which MI we're checking now. | |||
1814 | MI = *SingleUse; | |||
1815 | if (!MI) | |||
1816 | break; | |||
1817 | } | |||
1818 | ||||
1819 | return MI; | |||
1820 | } | |||
1821 | ||||
1822 | bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) { | |||
1823 | auto *RC = MRI->getRegClass(Reg); | |||
1824 | int RegBytes = TRI->getRegSizeInBits(*RC) / 8; | |||
1825 | if (RegBytes > 8) | |||
1826 | // We don't support post-load hardening of vectors. | |||
1827 | return false; | |||
1828 | ||||
1829 | // If this register class is explicitly constrained to a class that doesn't | |||
1830 | // require REX prefix, we may not be able to satisfy that constraint when | |||
1831 | // emitting the hardening instructions, so bail out here. | |||
1832 | // FIXME: This seems like a pretty lame hack. The way this comes up is when we | |||
1833 | // end up both with a NOREX and REX-only register as operands to the hardening | |||
1834 | // instructions. It would be better to fix that code to handle this situation | |||
1835 | // rather than hack around it in this way. | |||
1836 | const TargetRegisterClass *NOREXRegClasses[] = { | |||
1837 | &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass, | |||
1838 | &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass}; | |||
1839 | if (RC == NOREXRegClasses[Log2_32(RegBytes)]) | |||
1840 | return false; | |||
1841 | ||||
1842 | const TargetRegisterClass *GPRRegClasses[] = { | |||
1843 | &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass, | |||
1844 | &X86::GR64RegClass}; | |||
1845 | return RC->hasSuperClassEq(GPRRegClasses[Log2_32(RegBytes)]); | |||
1846 | } | |||
1847 | ||||
1848 | // We can harden non-leaking loads into register without touching the address | |||
1849 | // by just hiding all of the loaded bits. We use an `or` instruction to do | |||
1850 | // this because having the poison value be all ones allows us to use the same | |||
1851 | // value below. And the goal is just for the loaded bits to not be exposed to | |||
1852 | // execution and coercing them to one is sufficient. | |||
1853 | void X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) { | |||
1854 | MachineBasicBlock &MBB = *MI.getParent(); | |||
1855 | DebugLoc Loc = MI.getDebugLoc(); | |||
1856 | ||||
1857 | // For all of these, the def'ed register operand is operand zero. | |||
1858 | auto &DefOp = MI.getOperand(0); | |||
1859 | unsigned OldDefReg = DefOp.getReg(); | |||
1860 | assert(canHardenRegister(OldDefReg) &&(static_cast <bool> (canHardenRegister(OldDefReg) && "Cannot harden this instruction's defined register!") ? void (0) : __assert_fail ("canHardenRegister(OldDefReg) && \"Cannot harden this instruction's defined register!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1861, __extension__ __PRETTY_FUNCTION__)) | |||
1861 | "Cannot harden this instruction's defined register!")(static_cast <bool> (canHardenRegister(OldDefReg) && "Cannot harden this instruction's defined register!") ? void (0) : __assert_fail ("canHardenRegister(OldDefReg) && \"Cannot harden this instruction's defined register!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1861, __extension__ __PRETTY_FUNCTION__)); | |||
1862 | ||||
1863 | auto *DefRC = MRI->getRegClass(OldDefReg); | |||
1864 | int DefRegBytes = TRI->getRegSizeInBits(*DefRC) / 8; | |||
1865 | ||||
1866 | unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr}; | |||
1867 | unsigned OrOpCode = OrOpCodes[Log2_32(DefRegBytes)]; | |||
1868 | ||||
1869 | unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit}; | |||
1870 | ||||
1871 | auto GetStateRegInRC = [&](const TargetRegisterClass &RC) { | |||
1872 | unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); | |||
1873 | ||||
1874 | int Bytes = TRI->getRegSizeInBits(RC) / 8; | |||
1875 | // FIXME: Need to teach this about 32-bit mode. | |||
1876 | if (Bytes != 8) { | |||
1877 | unsigned SubRegImm = SubRegImms[Log2_32(Bytes)]; | |||
| ||||
1878 | unsigned NarrowStateReg = MRI->createVirtualRegister(&RC); | |||
1879 | BuildMI(MBB, MI.getIterator(), Loc, TII->get(TargetOpcode::COPY), | |||
1880 | NarrowStateReg) | |||
1881 | .addReg(StateReg, 0, SubRegImm); | |||
1882 | StateReg = NarrowStateReg; | |||
1883 | } | |||
1884 | return StateReg; | |||
1885 | }; | |||
1886 | ||||
1887 | auto InsertPt = std::next(MI.getIterator()); | |||
1888 | unsigned FlagsReg = 0; | |||
1889 | bool EFLAGSLive = isEFLAGSLive(MBB, InsertPt, *TRI); | |||
1890 | if (EFLAGSLive && !Subtarget->hasBMI2()) { | |||
1891 | FlagsReg = saveEFLAGS(MBB, InsertPt, Loc); | |||
1892 | EFLAGSLive = false; | |||
1893 | } | |||
1894 | ||||
1895 | if (!EFLAGSLive) { | |||
1896 | unsigned StateReg = GetStateRegInRC(*DefRC); | |||
1897 | unsigned NewDefReg = MRI->createVirtualRegister(DefRC); | |||
1898 | DefOp.setReg(NewDefReg); | |||
1899 | auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), OldDefReg) | |||
1900 | .addReg(StateReg) | |||
1901 | .addReg(NewDefReg); | |||
1902 | OrI->addRegisterDead(X86::EFLAGS, TRI); | |||
1903 | ++NumInstsInserted; | |||
1904 | LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting or: " ; OrI->dump(); dbgs() << "\n"; } } while (false); | |||
1905 | } else { | |||
1906 | assert(Subtarget->hasBMI2() &&(static_cast <bool> (Subtarget->hasBMI2() && "Cannot harden loads and preserve EFLAGS without BMI2!") ? void (0) : __assert_fail ("Subtarget->hasBMI2() && \"Cannot harden loads and preserve EFLAGS without BMI2!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1907, __extension__ __PRETTY_FUNCTION__)) | |||
1907 | "Cannot harden loads and preserve EFLAGS without BMI2!")(static_cast <bool> (Subtarget->hasBMI2() && "Cannot harden loads and preserve EFLAGS without BMI2!") ? void (0) : __assert_fail ("Subtarget->hasBMI2() && \"Cannot harden loads and preserve EFLAGS without BMI2!\"" , "/build/llvm-toolchain-snapshot-7~svn337657/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1907, __extension__ __PRETTY_FUNCTION__)); | |||
1908 | ||||
1909 | unsigned ShiftOpCode = DefRegBytes < 4 ? X86::SHRX32rr : X86::SHRX64rr; | |||
1910 | auto &ShiftRC = | |||
1911 | DefRegBytes < 4 ? X86::GR32_NOSPRegClass : X86::GR64_NOSPRegClass; | |||
1912 | int ShiftRegBytes = TRI->getRegSizeInBits(ShiftRC) / 8; | |||
1913 | unsigned DefSubRegImm = SubRegImms[Log2_32(DefRegBytes)]; | |||
1914 | ||||
1915 | unsigned StateReg = GetStateRegInRC(ShiftRC); | |||
1916 | ||||
1917 | // First have the def instruction def a temporary register. | |||
1918 | unsigned TmpReg = MRI->createVirtualRegister(DefRC); | |||
1919 | DefOp.setReg(TmpReg); | |||
1920 | // Now copy it into a register of the shift RC. | |||
1921 | unsigned ShiftInputReg = TmpReg; | |||
1922 | if (DefRegBytes != ShiftRegBytes) { | |||
1923 | unsigned UndefReg = MRI->createVirtualRegister(&ShiftRC); | |||
1924 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::IMPLICIT_DEF), UndefReg); | |||
1925 | ShiftInputReg = MRI->createVirtualRegister(&ShiftRC); | |||
1926 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::INSERT_SUBREG), ShiftInputReg) | |||
1927 | .addReg(UndefReg) | |||
1928 | .addReg(TmpReg) | |||
1929 | .addImm(DefSubRegImm); | |||
1930 | } | |||
1931 | ||||
1932 | // We shift this once if the shift is wider than the def and thus we can | |||
1933 | // shift *all* of the def'ed bytes out. Otherwise we need to do two shifts. | |||
1934 | ||||
1935 | unsigned ShiftedReg = MRI->createVirtualRegister(&ShiftRC); | |||
1936 | auto Shift1I = | |||
1937 | BuildMI(MBB, InsertPt, Loc, TII->get(ShiftOpCode), ShiftedReg) | |||
1938 | .addReg(ShiftInputReg) | |||
1939 | .addReg(StateReg); | |||
1940 | (void)Shift1I; | |||
1941 | ++NumInstsInserted; | |||
1942 | LLVM_DEBUG(dbgs() << " Inserting shrx: "; Shift1I->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting shrx: " ; Shift1I->dump(); dbgs() << "\n"; } } while (false); | |||
1943 | ||||
1944 | // The only way we have a bit left is if all 8 bytes were defined. Do an | |||
1945 | // extra shift to get the last bit in this case. | |||
1946 | if (DefRegBytes == ShiftRegBytes) { | |||
1947 | // We can just directly def the old def register as its the same size. | |||
1948 | ShiftInputReg = ShiftedReg; | |||
1949 | auto Shift2I = | |||
1950 | BuildMI(MBB, InsertPt, Loc, TII->get(ShiftOpCode), OldDefReg) | |||
1951 | .addReg(ShiftInputReg) | |||
1952 | .addReg(StateReg); | |||
1953 | (void)Shift2I; | |||
1954 | ++NumInstsInserted; | |||
1955 | LLVM_DEBUG(dbgs() << " Inserting shrx: "; Shift2I->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting shrx: " ; Shift2I->dump(); dbgs() << "\n"; } } while (false) | |||
1956 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-speculative-load-hardening")) { dbgs() << " Inserting shrx: " ; Shift2I->dump(); dbgs() << "\n"; } } while (false); | |||
1957 | } else { | |||
1958 | // When we have different size shift register we need to fix up the | |||
1959 | // class. We can do that as we copy into the old def register. | |||
1960 | BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), OldDefReg) | |||
1961 | .addReg(ShiftedReg, 0, DefSubRegImm); | |||
1962 | } | |||
1963 | } | |||
1964 | ||||
1965 | if (FlagsReg) | |||
1966 | restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg); | |||
1967 | ||||
1968 | ++NumPostLoadRegsHardened; | |||
1969 | } | |||
1970 | ||||
1971 | /// Harden a return instruction. | |||
1972 | /// | |||
1973 | /// Returns implicitly perform a load which we need to harden. Without hardening | |||
1974 | /// this load, an attacker my speculatively write over the return address to | |||
1975 | /// steer speculation of the return to an attacker controlled address. This is | |||
1976 | /// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in | |||
1977 | /// this paper: | |||
1978 | /// https://people.csail.mit.edu/vlk/spectre11.pdf | |||
1979 | /// | |||
1980 | /// We can harden this by introducing an LFENCE that will delay any load of the | |||
1981 | /// return address until prior instructions have retired (and thus are not being | |||
1982 | /// speculated), or we can harden the address used by the implicit load: the | |||
1983 | /// stack pointer. | |||
1984 | /// | |||
1985 | /// If we are not using an LFENCE, hardening the stack pointer has an additional | |||
1986 | /// benefit: it allows us to pass the predicate state accumulated in this | |||
1987 | /// function back to the caller. In the absence of a BCBS attack on the return, | |||
1988 | /// the caller will typically be resumed and speculatively executed due to the | |||
1989 | /// Return Stack Buffer (RSB) prediction which is very accurate and has a high | |||
1990 | /// priority. It is possible that some code from the caller will be executed | |||
1991 | /// speculatively even during a BCBS-attacked return until the steering takes | |||
1992 | /// effect. Whenever this happens, the caller can recover the (poisoned) | |||
1993 | /// predicate state from the stack pointer and continue to harden loads. | |||
1994 | void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) { | |||
1995 | MachineBasicBlock &MBB = *MI.getParent(); | |||
1996 | DebugLoc Loc = MI.getDebugLoc(); | |||
1997 | auto InsertPt = MI.getIterator(); | |||
1998 | ||||
1999 | if (FenceCallAndRet) { | |||
2000 | // Simply forcibly block speculation of loads out of the function by using | |||
2001 | // an LFENCE. This is potentially a heavy-weight mitigation strategy, but | |||
2002 | // should be secure, is simple from an ABI perspective, and the cost can be | |||
2003 | // minimized through inlining. | |||
2004 | // | |||
2005 | // FIXME: We should investigate ways to establish a strong data-dependency | |||
2006 | // on the return. However, poisoning the stack pointer is unlikely to work | |||
2007 | // because the return is *predicted* rather than relying on the load of the | |||
2008 | // return address to actually resolve. | |||
2009 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::LFENCE)); | |||
2010 | ++NumInstsInserted; | |||
2011 | ++NumLFENCEsInserted; | |||
2012 | return; | |||
2013 | } | |||
2014 | ||||
2015 | // Take our predicate state, shift it to the high 17 bits (so that we keep | |||
2016 | // pointers canonical) and merge it into RSP. This will allow the caller to | |||
2017 | // extract it when we return (speculatively). | |||
2018 | mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB)); | |||
2019 | } | |||
2020 | ||||
2021 | INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,static void *initializeX86SpeculativeLoadHardeningPassPassOnce (PassRegistry &Registry) { | |||
2022 | "X86 speculative load hardener", false, false)static void *initializeX86SpeculativeLoadHardeningPassPassOnce (PassRegistry &Registry) { | |||
2023 | INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,PassInfo *PI = new PassInfo( "X86 speculative load hardener", "x86-speculative-load-hardening", &X86SpeculativeLoadHardeningPass ::ID, PassInfo::NormalCtor_t(callDefaultCtor<X86SpeculativeLoadHardeningPass >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeX86SpeculativeLoadHardeningPassPassFlag ; void llvm::initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &Registry) { llvm::call_once(InitializeX86SpeculativeLoadHardeningPassPassFlag , initializeX86SpeculativeLoadHardeningPassPassOnce, std::ref (Registry)); } | |||
2024 | "X86 speculative load hardener", false, false)PassInfo *PI = new PassInfo( "X86 speculative load hardener", "x86-speculative-load-hardening", &X86SpeculativeLoadHardeningPass ::ID, PassInfo::NormalCtor_t(callDefaultCtor<X86SpeculativeLoadHardeningPass >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeX86SpeculativeLoadHardeningPassPassFlag ; void llvm::initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &Registry) { llvm::call_once(InitializeX86SpeculativeLoadHardeningPassPassFlag , initializeX86SpeculativeLoadHardeningPassPassOnce, std::ref (Registry)); } | |||
2025 | ||||
2026 | FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() { | |||
2027 | return new X86SpeculativeLoadHardeningPass(); | |||
2028 | } |