LLVM  4.0.0
FunctionComparator.h
Go to the documentation of this file.
1 //===- FunctionComparator.h - Function Comparator ---------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the FunctionComparator and GlobalNumberState classes which
11 // are used by the MergeFunctions pass for comparing functions.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
16 #define LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/ValueMap.h"
23 #include "llvm/IR/Operator.h"
25 #include "llvm/Support/Casting.h"
26 #include <cstdint>
27 #include <tuple>
28 
29 namespace llvm {
30 
31 class GetElementPtrInst;
32 
33 /// GlobalNumberState assigns an integer to each global value in the program,
34 /// which is used by the comparison routine to order references to globals. This
35 /// state must be preserved throughout the pass, because Functions and other
36 /// globals need to maintain their relative order. Globals are assigned a number
37 /// when they are first visited. This order is deterministic, and so the
38 /// assigned numbers are as well. When two functions are merged, neither number
39 /// is updated. If the symbols are weak, this would be incorrect. If they are
40 /// strong, then one will be replaced at all references to the other, and so
41 /// direct callsites will now see one or the other symbol, and no update is
42 /// necessary. Note that if we were guaranteed unique names, we could just
43 /// compare those, but this would not work for stripped bitcodes or for those
44 /// few symbols without a name.
46  struct Config : ValueMapConfig<GlobalValue*> {
47  enum { FollowRAUW = false };
48  };
49  // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
50  // occurs, the mapping does not change. Tracking changes is unnecessary, and
51  // also problematic for weak symbols (which may be overwritten).
53  ValueNumberMap GlobalNumbers;
54  // The next unused serial number to assign to a global.
55  uint64_t NextNumber = 0;
56 
57 public:
58  GlobalNumberState() = default;
59 
60  uint64_t getNumber(GlobalValue* Global) {
62  bool Inserted;
63  std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
64  if (Inserted)
65  NextNumber++;
66  return MapIter->second;
67  }
68 
69  void clear() {
70  GlobalNumbers.clear();
71  }
72 };
73 
74 /// FunctionComparator - Compares two functions to determine whether or not
75 /// they will generate machine code with the same behaviour. DataLayout is
76 /// used if available. The comparator always fails conservatively (erring on the
77 /// side of claiming that two functions are different).
79 public:
80  FunctionComparator(const Function *F1, const Function *F2,
82  : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
83 
84  /// Test whether the two functions have equivalent behaviour.
85  int compare();
86  /// Hash a function. Equivalent functions will have the same hash, and unequal
87  /// functions will have different hashes with high probability.
88  typedef uint64_t FunctionHash;
90 
91 protected:
92  /// Start the comparison.
93  void beginCompare() {
94  sn_mapL.clear();
95  sn_mapR.clear();
96  }
97 
98  /// Compares the signature and other general attributes of the two functions.
99  int compareSignature() const;
100 
101  /// Test whether two basic blocks have equivalent behaviour.
102  int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const;
103 
104  /// Constants comparison.
105  /// Its analog to lexicographical comparison between hypothetical numbers
106  /// of next format:
107  /// <bitcastability-trait><raw-bit-contents>
108  ///
109  /// 1. Bitcastability.
110  /// Check whether L's type could be losslessly bitcasted to R's type.
111  /// On this stage method, in case when lossless bitcast is not possible
112  /// method returns -1 or 1, thus also defining which type is greater in
113  /// context of bitcastability.
114  /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
115  /// to the contents comparison.
116  /// If types differ, remember types comparison result and check
117  /// whether we still can bitcast types.
118  /// Stage 1: Types that satisfies isFirstClassType conditions are always
119  /// greater then others.
120  /// Stage 2: Vector is greater then non-vector.
121  /// If both types are vectors, then vector with greater bitwidth is
122  /// greater.
123  /// If both types are vectors with the same bitwidth, then types
124  /// are bitcastable, and we can skip other stages, and go to contents
125  /// comparison.
126  /// Stage 3: Pointer types are greater than non-pointers. If both types are
127  /// pointers of the same address space - go to contents comparison.
128  /// Different address spaces: pointer with greater address space is
129  /// greater.
130  /// Stage 4: Types are neither vectors, nor pointers. And they differ.
131  /// We don't know how to bitcast them. So, we better don't do it,
132  /// and return types comparison result (so it determines the
133  /// relationship among constants we don't know how to bitcast).
134  ///
135  /// Just for clearance, let's see how the set of constants could look
136  /// on single dimension axis:
137  ///
138  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
139  /// Where: NFCT - Not a FirstClassType
140  /// FCT - FirstClassTyp:
141  ///
142  /// 2. Compare raw contents.
143  /// It ignores types on this stage and only compares bits from L and R.
144  /// Returns 0, if L and R has equivalent contents.
145  /// -1 or 1 if values are different.
146  /// Pretty trivial:
147  /// 2.1. If contents are numbers, compare numbers.
148  /// Ints with greater bitwidth are greater. Ints with same bitwidths
149  /// compared by their contents.
150  /// 2.2. "And so on". Just to avoid discrepancies with comments
151  /// perhaps it would be better to read the implementation itself.
152  /// 3. And again about overall picture. Let's look back at how the ordered set
153  /// of constants will look like:
154  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
155  ///
156  /// Now look, what could be inside [FCT, "others"], for example:
157  /// [FCT, "others"] =
158  /// [
159  /// [double 0.1], [double 1.23],
160  /// [i32 1], [i32 2],
161  /// { double 1.0 }, ; StructTyID, NumElements = 1
162  /// { i32 1 }, ; StructTyID, NumElements = 1
163  /// { double 1, i32 1 }, ; StructTyID, NumElements = 2
164  /// { i32 1, double 1 } ; StructTyID, NumElements = 2
165  /// ]
166  ///
167  /// Let's explain the order. Float numbers will be less than integers, just
168  /// because of cmpType terms: FloatTyID < IntegerTyID.
169  /// Floats (with same fltSemantics) are sorted according to their value.
170  /// Then you can see integers, and they are, like a floats,
171  /// could be easy sorted among each others.
172  /// The structures. Structures are grouped at the tail, again because of their
173  /// TypeID: StructTyID > IntegerTyID > FloatTyID.
174  /// Structures with greater number of elements are greater. Structures with
175  /// greater elements going first are greater.
176  /// The same logic with vectors, arrays and other possible complex types.
177  ///
178  /// Bitcastable constants.
179  /// Let's assume, that some constant, belongs to some group of
180  /// "so-called-equal" values with different types, and at the same time
181  /// belongs to another group of constants with equal types
182  /// and "really" equal values.
183  ///
184  /// Now, prove that this is impossible:
185  ///
186  /// If constant A with type TyA is bitcastable to B with type TyB, then:
187  /// 1. All constants with equal types to TyA, are bitcastable to B. Since
188  /// those should be vectors (if TyA is vector), pointers
189  /// (if TyA is pointer), or else (if TyA equal to TyB), those types should
190  /// be equal to TyB.
191  /// 2. All constants with non-equal, but bitcastable types to TyA, are
192  /// bitcastable to B.
193  /// Once again, just because we allow it to vectors and pointers only.
194  /// This statement could be expanded as below:
195  /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
196  /// vector B, and thus bitcastable to B as well.
197  /// 2.2. All pointers of the same address space, no matter what they point to,
198  /// bitcastable. So if C is pointer, it could be bitcasted to A and to B.
199  /// So any constant equal or bitcastable to A is equal or bitcastable to B.
200  /// QED.
201  ///
202  /// In another words, for pointers and vectors, we ignore top-level type and
203  /// look at their particular properties (bit-width for vectors, and
204  /// address space for pointers).
205  /// If these properties are equal - compare their contents.
206  int cmpConstants(const Constant *L, const Constant *R) const;
207 
208  /// Compares two global values by number. Uses the GlobalNumbersState to
209  /// identify the same gobals across function calls.
210  int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const;
211 
212  /// Assign or look up previously assigned numbers for the two values, and
213  /// return whether the numbers are equal. Numbers are assigned in the order
214  /// visited.
215  /// Comparison order:
216  /// Stage 0: Value that is function itself is always greater then others.
217  /// If left and right values are references to their functions, then
218  /// they are equal.
219  /// Stage 1: Constants are greater than non-constants.
220  /// If both left and right are constants, then the result of
221  /// cmpConstants is used as cmpValues result.
222  /// Stage 2: InlineAsm instances are greater than others. If both left and
223  /// right are InlineAsm instances, InlineAsm* pointers casted to
224  /// integers and compared as numbers.
225  /// Stage 3: For all other cases we compare order we meet these values in
226  /// their functions. If right value was met first during scanning,
227  /// then left value is greater.
228  /// In another words, we compare serial numbers, for more details
229  /// see comments for sn_mapL and sn_mapR.
230  int cmpValues(const Value *L, const Value *R) const;
231 
232  /// Compare two Instructions for equivalence, similar to
233  /// Instruction::isSameOperationAs.
234  ///
235  /// Stages are listed in "most significant stage first" order:
236  /// On each stage below, we do comparison between some left and right
237  /// operation parts. If parts are non-equal, we assign parts comparison
238  /// result to the operation comparison result and exit from method.
239  /// Otherwise we proceed to the next stage.
240  /// Stages:
241  /// 1. Operations opcodes. Compared as numbers.
242  /// 2. Number of operands.
243  /// 3. Operation types. Compared with cmpType method.
244  /// 4. Compare operation subclass optional data as stream of bytes:
245  /// just convert it to integers and call cmpNumbers.
246  /// 5. Compare in operation operand types with cmpType in
247  /// most significant operand first order.
248  /// 6. Last stage. Check operations for some specific attributes.
249  /// For example, for Load it would be:
250  /// 6.1.Load: volatile (as boolean flag)
251  /// 6.2.Load: alignment (as integer numbers)
252  /// 6.3.Load: ordering (as underlying enum class value)
253  /// 6.4.Load: synch-scope (as integer numbers)
254  /// 6.5.Load: range metadata (as integer ranges)
255  /// On this stage its better to see the code, since its not more than 10-15
256  /// strings for particular instruction, and could change sometimes.
257  ///
258  /// Sets \p needToCmpOperands to true if the operands of the instructions
259  /// still must be compared afterwards. In this case it's already guaranteed
260  /// that both instructions have the same number of operands.
261  int cmpOperations(const Instruction *L, const Instruction *R,
262  bool &needToCmpOperands) const;
263 
264  /// cmpType - compares two types,
265  /// defines total ordering among the types set.
266  ///
267  /// Return values:
268  /// 0 if types are equal,
269  /// -1 if Left is less than Right,
270  /// +1 if Left is greater than Right.
271  ///
272  /// Description:
273  /// Comparison is broken onto stages. Like in lexicographical comparison
274  /// stage coming first has higher priority.
275  /// On each explanation stage keep in mind total ordering properties.
276  ///
277  /// 0. Before comparison we coerce pointer types of 0 address space to
278  /// integer.
279  /// We also don't bother with same type at left and right, so
280  /// just return 0 in this case.
281  ///
282  /// 1. If types are of different kind (different type IDs).
283  /// Return result of type IDs comparison, treating them as numbers.
284  /// 2. If types are integers, check that they have the same width. If they
285  /// are vectors, check that they have the same count and subtype.
286  /// 3. Types have the same ID, so check whether they are one of:
287  /// * Void
288  /// * Float
289  /// * Double
290  /// * X86_FP80
291  /// * FP128
292  /// * PPC_FP128
293  /// * Label
294  /// * Metadata
295  /// We can treat these types as equal whenever their IDs are same.
296  /// 4. If Left and Right are pointers, return result of address space
297  /// comparison (numbers comparison). We can treat pointer types of same
298  /// address space as equal.
299  /// 5. If types are complex.
300  /// Then both Left and Right are to be expanded and their element types will
301  /// be checked with the same way. If we get Res != 0 on some stage, return it.
302  /// Otherwise return 0.
303  /// 6. For all other cases put llvm_unreachable.
304  int cmpTypes(Type *TyL, Type *TyR) const;
305 
306  int cmpNumbers(uint64_t L, uint64_t R) const;
307  int cmpAPInts(const APInt &L, const APInt &R) const;
308  int cmpAPFloats(const APFloat &L, const APFloat &R) const;
309  int cmpMem(StringRef L, StringRef R) const;
310 
311  // The two functions undergoing comparison.
312  const Function *FnL, *FnR;
313 
314 private:
315  int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const;
316  int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
317  int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
318  int cmpRangeMetadata(const MDNode *L, const MDNode *R) const;
319  int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
320 
321  /// Compare two GEPs for equivalent pointer arithmetic.
322  /// Parts to be compared for each comparison stage,
323  /// most significant stage first:
324  /// 1. Address space. As numbers.
325  /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
326  /// 3. Pointer operand type (using cmpType method).
327  /// 4. Number of operands.
328  /// 5. Compare operands, using cmpValues method.
329  int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) const;
330  int cmpGEPs(const GetElementPtrInst *GEPL,
331  const GetElementPtrInst *GEPR) const {
332  return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
333  }
334 
335  /// Assign serial numbers to values from left function, and values from
336  /// right function.
337  /// Explanation:
338  /// Being comparing functions we need to compare values we meet at left and
339  /// right sides.
340  /// Its easy to sort things out for external values. It just should be
341  /// the same value at left and right.
342  /// But for local values (those were introduced inside function body)
343  /// we have to ensure they were introduced at exactly the same place,
344  /// and plays the same role.
345  /// Let's assign serial number to each value when we meet it first time.
346  /// Values that were met at same place will be with same serial numbers.
347  /// In this case it would be good to explain few points about values assigned
348  /// to BBs and other ways of implementation (see below).
349  ///
350  /// 1. Safety of BB reordering.
351  /// It's safe to change the order of BasicBlocks in function.
352  /// Relationship with other functions and serial numbering will not be
353  /// changed in this case.
354  /// As follows from FunctionComparator::compare(), we do CFG walk: we start
355  /// from the entry, and then take each terminator. So it doesn't matter how in
356  /// fact BBs are ordered in function. And since cmpValues are called during
357  /// this walk, the numbering depends only on how BBs located inside the CFG.
358  /// So the answer is - yes. We will get the same numbering.
359  ///
360  /// 2. Impossibility to use dominance properties of values.
361  /// If we compare two instruction operands: first is usage of local
362  /// variable AL from function FL, and second is usage of local variable AR
363  /// from FR, we could compare their origins and check whether they are
364  /// defined at the same place.
365  /// But, we are still not able to compare operands of PHI nodes, since those
366  /// could be operands from further BBs we didn't scan yet.
367  /// So it's impossible to use dominance properties in general.
368  mutable DenseMap<const Value*, int> sn_mapL, sn_mapR;
369 
370  // The global state we will use
371  GlobalNumberState* GlobalNumbers;
372 };
373 
374 } // end namespace llvm
375 
376 #endif // LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
MachineLoop * L
void clear()
Definition: ValueMap.h:148
int cmpValues(const Value *L, const Value *R) const
Assign or look up previously assigned numbers for the two values, and return whether the numbers are ...
Atomic ordering constants.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: ValueMap.h:175
This class defines the default behavior for configurable aspects of ValueMap<>.
Definition: ValueMap.h:59
Metadata node.
Definition: Metadata.h:830
int cmpAPInts(const APInt &L, const APInt &R) const
int cmpOperations(const Instruction *L, const Instruction *R, bool &needToCmpOperands) const
Compare two Instructions for equivalence, similar to Instruction::isSameOperationAs.
int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const
Compares two global values by number.
FunctionComparator(const Function *F1, const Function *F2, GlobalNumberState *GN)
AtomicOrdering
Atomic ordering for LLVM's memory model.
void beginCompare()
Start the comparison.
uint64_t getNumber(GlobalValue *Global)
static FunctionHash functionHash(Function &)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:830
FunctionComparator - Compares two functions to determine whether or not they will generate machine co...
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important base class in LLVM.
Definition: Constant.h:42
This file declares a class to represent arbitrary precision floating point values and provide a varie...
int cmpNumbers(uint64_t L, uint64_t R) const
int cmpMem(StringRef L, StringRef R) const
uint64_t FunctionHash
Hash a function.
int compareSignature() const
Compares the signature and other general attributes of the two functions.
int cmpTypes(Type *TyL, Type *TyR) const
cmpType - compares two types, defines total ordering among the types set.
GlobalNumberState assigns an integer to each global value in the program, which is used by the compar...
Class for arbitrary precision integers.
Definition: APInt.h:77
int compare()
Test whether the two functions have equivalent behaviour.
int cmpConstants(const Constant *L, const Constant *R) const
Constants comparison.
int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const
Test whether two basic blocks have equivalent behaviour.
LLVM Value Representation.
Definition: Value.h:71
int cmpAPFloats(const APFloat &L, const APFloat &R) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47