LLVM  16.0.0git
CFGuard.cpp
Go to the documentation of this file.
1 //===-- CFGuard.cpp - Control Flow Guard checks -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the IR transform to add Microsoft's Control Flow Guard
11 /// checks on Windows targets.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/IR/CallingConv.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/InitializePasses.h"
23 #include "llvm/Pass.h"
24 
25 using namespace llvm;
26 
28 
29 #define DEBUG_TYPE "cfguard"
30 
31 STATISTIC(CFGuardCounter, "Number of Control Flow Guard checks added");
32 
33 namespace {
34 
35 /// Adds Control Flow Guard (CFG) checks on indirect function calls/invokes.
36 /// These checks ensure that the target address corresponds to the start of an
37 /// address-taken function. X86_64 targets use the CF_Dispatch mechanism. X86,
38 /// ARM, and AArch64 targets use the CF_Check machanism.
39 class CFGuard : public FunctionPass {
40 public:
41  static char ID;
42 
43  enum Mechanism { CF_Check, CF_Dispatch };
44 
45  // Default constructor required for the INITIALIZE_PASS macro.
46  CFGuard() : FunctionPass(ID) {
48  // By default, use the guard check mechanism.
49  GuardMechanism = CF_Check;
50  }
51 
52  // Recommended constructor used to specify the type of guard mechanism.
53  CFGuard(Mechanism Var) : FunctionPass(ID) {
55  GuardMechanism = Var;
56  }
57 
58  /// Inserts a Control Flow Guard (CFG) check on an indirect call using the CFG
59  /// check mechanism. When the image is loaded, the loader puts the appropriate
60  /// guard check function pointer in the __guard_check_icall_fptr global
61  /// symbol. This checks that the target address is a valid address-taken
62  /// function. The address of the target function is passed to the guard check
63  /// function in an architecture-specific register (e.g. ECX on 32-bit X86,
64  /// X15 on Aarch64, and R0 on ARM). The guard check function has no return
65  /// value (if the target is invalid, the guard check funtion will raise an
66  /// error).
67  ///
68  /// For example, the following LLVM IR:
69  /// \code
70  /// %func_ptr = alloca i32 ()*, align 8
71  /// store i32 ()* @target_func, i32 ()** %func_ptr, align 8
72  /// %0 = load i32 ()*, i32 ()** %func_ptr, align 8
73  /// %1 = call i32 %0()
74  /// \endcode
75  ///
76  /// is transformed to:
77  /// \code
78  /// %func_ptr = alloca i32 ()*, align 8
79  /// store i32 ()* @target_func, i32 ()** %func_ptr, align 8
80  /// %0 = load i32 ()*, i32 ()** %func_ptr, align 8
81  /// %1 = load void (i8*)*, void (i8*)** @__guard_check_icall_fptr
82  /// %2 = bitcast i32 ()* %0 to i8*
83  /// call cfguard_checkcc void %1(i8* %2)
84  /// %3 = call i32 %0()
85  /// \endcode
86  ///
87  /// For example, the following X86 assembly code:
88  /// \code
89  /// movl $_target_func, %eax
90  /// calll *%eax
91  /// \endcode
92  ///
93  /// is transformed to:
94  /// \code
95  /// movl $_target_func, %ecx
96  /// calll *___guard_check_icall_fptr
97  /// calll *%ecx
98  /// \endcode
99  ///
100  /// \param CB indirect call to instrument.
101  void insertCFGuardCheck(CallBase *CB);
102 
103  /// Inserts a Control Flow Guard (CFG) check on an indirect call using the CFG
104  /// dispatch mechanism. When the image is loaded, the loader puts the
105  /// appropriate guard check function pointer in the
106  /// __guard_dispatch_icall_fptr global symbol. This checks that the target
107  /// address is a valid address-taken function and, if so, tail calls the
108  /// target. The target address is passed in an architecture-specific register
109  /// (e.g. RAX on X86_64), with all other arguments for the target function
110  /// passed as usual.
111  ///
112  /// For example, the following LLVM IR:
113  /// \code
114  /// %func_ptr = alloca i32 ()*, align 8
115  /// store i32 ()* @target_func, i32 ()** %func_ptr, align 8
116  /// %0 = load i32 ()*, i32 ()** %func_ptr, align 8
117  /// %1 = call i32 %0()
118  /// \endcode
119  ///
120  /// is transformed to:
121  /// \code
122  /// %func_ptr = alloca i32 ()*, align 8
123  /// store i32 ()* @target_func, i32 ()** %func_ptr, align 8
124  /// %0 = load i32 ()*, i32 ()** %func_ptr, align 8
125  /// %1 = load i32 ()*, i32 ()** @__guard_dispatch_icall_fptr
126  /// %2 = call i32 %1() [ "cfguardtarget"(i32 ()* %0) ]
127  /// \endcode
128  ///
129  /// For example, the following X86_64 assembly code:
130  /// \code
131  /// leaq target_func(%rip), %rax
132  /// callq *%rax
133  /// \endcode
134  ///
135  /// is transformed to:
136  /// \code
137  /// leaq target_func(%rip), %rax
138  /// callq *__guard_dispatch_icall_fptr(%rip)
139  /// \endcode
140  ///
141  /// \param CB indirect call to instrument.
142  void insertCFGuardDispatch(CallBase *CB);
143 
144  bool doInitialization(Module &M) override;
145  bool runOnFunction(Function &F) override;
146 
147 private:
148  // Only add checks if the module has the cfguard=2 flag.
149  int cfguard_module_flag = 0;
150  Mechanism GuardMechanism = CF_Check;
151  FunctionType *GuardFnType = nullptr;
152  PointerType *GuardFnPtrType = nullptr;
153  Constant *GuardFnGlobal = nullptr;
154 };
155 
156 } // end anonymous namespace
157 
158 void CFGuard::insertCFGuardCheck(CallBase *CB) {
159 
161  "Only applicable for Windows targets");
162  assert(CB->isIndirectCall() &&
163  "Control Flow Guard checks can only be added to indirect calls");
164 
165  IRBuilder<> B(CB);
166  Value *CalledOperand = CB->getCalledOperand();
167 
168  // If the indirect call is called within catchpad or cleanuppad,
169  // we need to copy "funclet" bundle of the call.
171  if (auto Bundle = CB->getOperandBundle(LLVMContext::OB_funclet))
172  Bundles.push_back(OperandBundleDef(*Bundle));
173 
174  // Load the global symbol as a pointer to the check function.
175  LoadInst *GuardCheckLoad = B.CreateLoad(GuardFnPtrType, GuardFnGlobal);
176 
177  // Create new call instruction. The CFGuard check should always be a call,
178  // even if the original CallBase is an Invoke or CallBr instruction.
179  CallInst *GuardCheck =
180  B.CreateCall(GuardFnType, GuardCheckLoad,
181  {B.CreateBitCast(CalledOperand, B.getInt8PtrTy())}, Bundles);
182 
183  // Ensure that the first argument is passed in the correct register
184  // (e.g. ECX on 32-bit X86 targets).
186 }
187 
188 void CFGuard::insertCFGuardDispatch(CallBase *CB) {
189 
191  "Only applicable for Windows targets");
192  assert(CB->isIndirectCall() &&
193  "Control Flow Guard checks can only be added to indirect calls");
194 
195  IRBuilder<> B(CB);
196  Value *CalledOperand = CB->getCalledOperand();
197  Type *CalledOperandType = CalledOperand->getType();
198 
199  // Cast the guard dispatch global to the type of the called operand.
200  PointerType *PTy = PointerType::get(CalledOperandType, 0);
201  if (GuardFnGlobal->getType() != PTy)
202  GuardFnGlobal = ConstantExpr::getBitCast(GuardFnGlobal, PTy);
203 
204  // Load the global as a pointer to a function of the same type.
205  LoadInst *GuardDispatchLoad = B.CreateLoad(CalledOperandType, GuardFnGlobal);
206 
207  // Add the original call target as a cfguardtarget operand bundle.
209  CB->getOperandBundlesAsDefs(Bundles);
210  Bundles.emplace_back("cfguardtarget", CalledOperand);
211 
212  // Create a copy of the call/invoke instruction and add the new bundle.
213  assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&
214  "Unknown indirect call type");
215  CallBase *NewCB = CallBase::Create(CB, Bundles, CB);
216 
217  // Change the target of the call to be the guard dispatch function.
218  NewCB->setCalledOperand(GuardDispatchLoad);
219 
220  // Replace the original call/invoke with the new instruction.
221  CB->replaceAllUsesWith(NewCB);
222 
223  // Delete the original call/invoke.
224  CB->eraseFromParent();
225 }
226 
227 bool CFGuard::doInitialization(Module &M) {
228 
229  // Check if this module has the cfguard flag and read its value.
230  if (auto *MD =
231  mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("cfguard")))
232  cfguard_module_flag = MD->getZExtValue();
233 
234  // Skip modules for which CFGuard checks have been disabled.
235  if (cfguard_module_flag != 2)
236  return false;
237 
238  // Set up prototypes for the guard check and dispatch functions.
239  GuardFnType = FunctionType::get(Type::getVoidTy(M.getContext()),
240  {Type::getInt8PtrTy(M.getContext())}, false);
241  GuardFnPtrType = PointerType::get(GuardFnType, 0);
242 
243  // Get or insert the guard check or dispatch global symbols.
244  if (GuardMechanism == CF_Check) {
245  GuardFnGlobal =
246  M.getOrInsertGlobal("__guard_check_icall_fptr", GuardFnPtrType);
247  } else {
248  assert(GuardMechanism == CF_Dispatch && "Invalid CFGuard mechanism");
249  GuardFnGlobal =
250  M.getOrInsertGlobal("__guard_dispatch_icall_fptr", GuardFnPtrType);
251  }
252 
253  return true;
254 }
255 
257 
258  // Skip modules for which CFGuard checks have been disabled.
259  if (cfguard_module_flag != 2)
260  return false;
261 
262  SmallVector<CallBase *, 8> IndirectCalls;
263 
264  // Iterate over the instructions to find all indirect call/invoke/callbr
265  // instructions. Make a separate list of pointers to indirect
266  // call/invoke/callbr instructions because the original instructions will be
267  // deleted as the checks are added.
268  for (BasicBlock &BB : F.getBasicBlockList()) {
269  for (Instruction &I : BB.getInstList()) {
270  auto *CB = dyn_cast<CallBase>(&I);
271  if (CB && CB->isIndirectCall() && !CB->hasFnAttr("guard_nocf")) {
272  IndirectCalls.push_back(CB);
273  CFGuardCounter++;
274  }
275  }
276  }
277 
278  // If no checks are needed, return early.
279  if (IndirectCalls.empty()) {
280  return false;
281  }
282 
283  // For each indirect call/invoke, add the appropriate dispatch or check.
284  if (GuardMechanism == CF_Dispatch) {
285  for (CallBase *CB : IndirectCalls) {
286  insertCFGuardDispatch(CB);
287  }
288  } else {
289  for (CallBase *CB : IndirectCalls) {
290  insertCFGuardCheck(CB);
291  }
292  }
293 
294  return true;
295 }
296 
297 char CFGuard::ID = 0;
298 INITIALIZE_PASS(CFGuard, "CFGuard", "CFGuard", false, false)
299 
301  return new CFGuard(CFGuard::CF_Check);
302 }
303 
305  return new CFGuard(CFGuard::CF_Dispatch);
306 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::CallBase::getOperandBundlesAsDefs
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Definition: Instructions.cpp:396
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::CallBase::getOperandBundle
Optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Definition: InstrTypes.h:2027
llvm::Function
Definition: Function.h:60
Pass.h
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:727
llvm::LLVMContext::OB_funclet
@ OB_funclet
Definition: LLVMContext.h:90
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1183
Statistic.h
llvm::IRBuilder<>
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2199
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::CallBase::hasFnAttr
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1483
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::OperandBundleDefT
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: AutoUpgrade.h:32
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
Instruction.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:42
llvm::createCFGuardDispatchPass
FunctionPass * createCFGuardDispatchPass()
Insert Control FLow Guard dispatches on indirect function calls.
Definition: CFGuard.cpp:304
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::OperandBundleDef
OperandBundleDefT< Value * > OperandBundleDef
Definition: AutoUpgrade.h:33
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::initializeCFGuardPass
void initializeCFGuardPass(PassRegistry &)
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
CFGuard.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::createCFGuardCheckPass
FunctionPass * createCFGuardCheckPass()
Insert Control FLow Guard checks on indirect function calls.
Definition: CFGuard.cpp:300
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::CallBase::Create
static CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, Instruction *InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
Definition: Instructions.cpp:255
Triple.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::Triple::isOSWindows
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:571
CallingConv.h
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::CallBase::isIndirectCall
bool isIndirectCall() const
Return true if the callsite is an indirect call.
Definition: Instructions.cpp:289
llvm::CallBase::getCalledOperand
Value * getCalledOperand() const
Definition: InstrTypes.h:1389
llvm::CallBase::setCalledOperand
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1432
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:222
SmallVector.h
llvm::Module::getTargetTriple
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
Definition: Module.h:258
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::CallBase::setCallingConv
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1459
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:925
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38