LLVM 17.0.0git
FunctionComparator.h
Go to the documentation of this file.
1//===- FunctionComparator.h - Function Comparator ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the FunctionComparator and GlobalNumberState classes which
10// are used by the MergeFunctions pass for comparing functions.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
15#define LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
16
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Operator.h"
21#include "llvm/IR/ValueMap.h"
24#include <cstdint>
25#include <tuple>
26
27namespace llvm {
28
29class APFloat;
30class AttributeList;
31class APInt;
32class BasicBlock;
33class Constant;
34class Function;
35class GlobalValue;
36class InlineAsm;
37class Instruction;
38class MDNode;
39class Type;
40class Value;
41
42/// GlobalNumberState assigns an integer to each global value in the program,
43/// which is used by the comparison routine to order references to globals. This
44/// state must be preserved throughout the pass, because Functions and other
45/// globals need to maintain their relative order. Globals are assigned a number
46/// when they are first visited. This order is deterministic, and so the
47/// assigned numbers are as well. When two functions are merged, neither number
48/// is updated. If the symbols are weak, this would be incorrect. If they are
49/// strong, then one will be replaced at all references to the other, and so
50/// direct callsites will now see one or the other symbol, and no update is
51/// necessary. Note that if we were guaranteed unique names, we could just
52/// compare those, but this would not work for stripped bitcodes or for those
53/// few symbols without a name.
55 struct Config : ValueMapConfig<GlobalValue *> {
56 enum { FollowRAUW = false };
57 };
58
59 // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
60 // occurs, the mapping does not change. Tracking changes is unnecessary, and
61 // also problematic for weak symbols (which may be overwritten).
63 ValueNumberMap GlobalNumbers;
64
65 // The next unused serial number to assign to a global.
66 uint64_t NextNumber = 0;
67
68public:
69 GlobalNumberState() = default;
70
73 bool Inserted;
74 std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
75 if (Inserted)
76 NextNumber++;
77 return MapIter->second;
78 }
79
81 GlobalNumbers.erase(Global);
82 }
83
84 void clear() {
85 GlobalNumbers.clear();
86 }
87};
88
89/// FunctionComparator - Compares two functions to determine whether or not
90/// they will generate machine code with the same behaviour. DataLayout is
91/// used if available. The comparator always fails conservatively (erring on the
92/// side of claiming that two functions are different).
94public:
95 FunctionComparator(const Function *F1, const Function *F2,
97 : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
98
99 /// Test whether the two functions have equivalent behaviour.
100 int compare();
101
102 /// Hash a function. Equivalent functions will have the same hash, and unequal
103 /// functions will have different hashes with high probability.
106
107protected:
108 /// Start the comparison.
110 sn_mapL.clear();
111 sn_mapR.clear();
112 }
113
114 /// Compares the signature and other general attributes of the two functions.
115 int compareSignature() const;
116
117 /// Test whether two basic blocks have equivalent behaviour.
118 int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const;
119
120 /// Constants comparison.
121 /// Its analog to lexicographical comparison between hypothetical numbers
122 /// of next format:
123 /// <bitcastability-trait><raw-bit-contents>
124 ///
125 /// 1. Bitcastability.
126 /// Check whether L's type could be losslessly bitcasted to R's type.
127 /// On this stage method, in case when lossless bitcast is not possible
128 /// method returns -1 or 1, thus also defining which type is greater in
129 /// context of bitcastability.
130 /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
131 /// to the contents comparison.
132 /// If types differ, remember types comparison result and check
133 /// whether we still can bitcast types.
134 /// Stage 1: Types that satisfies isFirstClassType conditions are always
135 /// greater then others.
136 /// Stage 2: Vector is greater then non-vector.
137 /// If both types are vectors, then vector with greater bitwidth is
138 /// greater.
139 /// If both types are vectors with the same bitwidth, then types
140 /// are bitcastable, and we can skip other stages, and go to contents
141 /// comparison.
142 /// Stage 3: Pointer types are greater than non-pointers. If both types are
143 /// pointers of the same address space - go to contents comparison.
144 /// Different address spaces: pointer with greater address space is
145 /// greater.
146 /// Stage 4: Types are neither vectors, nor pointers. And they differ.
147 /// We don't know how to bitcast them. So, we better don't do it,
148 /// and return types comparison result (so it determines the
149 /// relationship among constants we don't know how to bitcast).
150 ///
151 /// Just for clearance, let's see how the set of constants could look
152 /// on single dimension axis:
153 ///
154 /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
155 /// Where: NFCT - Not a FirstClassType
156 /// FCT - FirstClassTyp:
157 ///
158 /// 2. Compare raw contents.
159 /// It ignores types on this stage and only compares bits from L and R.
160 /// Returns 0, if L and R has equivalent contents.
161 /// -1 or 1 if values are different.
162 /// Pretty trivial:
163 /// 2.1. If contents are numbers, compare numbers.
164 /// Ints with greater bitwidth are greater. Ints with same bitwidths
165 /// compared by their contents.
166 /// 2.2. "And so on". Just to avoid discrepancies with comments
167 /// perhaps it would be better to read the implementation itself.
168 /// 3. And again about overall picture. Let's look back at how the ordered set
169 /// of constants will look like:
170 /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
171 ///
172 /// Now look, what could be inside [FCT, "others"], for example:
173 /// [FCT, "others"] =
174 /// [
175 /// [double 0.1], [double 1.23],
176 /// [i32 1], [i32 2],
177 /// { double 1.0 }, ; StructTyID, NumElements = 1
178 /// { i32 1 }, ; StructTyID, NumElements = 1
179 /// { double 1, i32 1 }, ; StructTyID, NumElements = 2
180 /// { i32 1, double 1 } ; StructTyID, NumElements = 2
181 /// ]
182 ///
183 /// Let's explain the order. Float numbers will be less than integers, just
184 /// because of cmpType terms: FloatTyID < IntegerTyID.
185 /// Floats (with same fltSemantics) are sorted according to their value.
186 /// Then you can see integers, and they are, like a floats,
187 /// could be easy sorted among each others.
188 /// The structures. Structures are grouped at the tail, again because of their
189 /// TypeID: StructTyID > IntegerTyID > FloatTyID.
190 /// Structures with greater number of elements are greater. Structures with
191 /// greater elements going first are greater.
192 /// The same logic with vectors, arrays and other possible complex types.
193 ///
194 /// Bitcastable constants.
195 /// Let's assume, that some constant, belongs to some group of
196 /// "so-called-equal" values with different types, and at the same time
197 /// belongs to another group of constants with equal types
198 /// and "really" equal values.
199 ///
200 /// Now, prove that this is impossible:
201 ///
202 /// If constant A with type TyA is bitcastable to B with type TyB, then:
203 /// 1. All constants with equal types to TyA, are bitcastable to B. Since
204 /// those should be vectors (if TyA is vector), pointers
205 /// (if TyA is pointer), or else (if TyA equal to TyB), those types should
206 /// be equal to TyB.
207 /// 2. All constants with non-equal, but bitcastable types to TyA, are
208 /// bitcastable to B.
209 /// Once again, just because we allow it to vectors and pointers only.
210 /// This statement could be expanded as below:
211 /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
212 /// vector B, and thus bitcastable to B as well.
213 /// 2.2. All pointers of the same address space, no matter what they point to,
214 /// bitcastable. So if C is pointer, it could be bitcasted to A and to B.
215 /// So any constant equal or bitcastable to A is equal or bitcastable to B.
216 /// QED.
217 ///
218 /// In another words, for pointers and vectors, we ignore top-level type and
219 /// look at their particular properties (bit-width for vectors, and
220 /// address space for pointers).
221 /// If these properties are equal - compare their contents.
222 int cmpConstants(const Constant *L, const Constant *R) const;
223
224 /// Compares two global values by number. Uses the GlobalNumbersState to
225 /// identify the same gobals across function calls.
226 int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const;
227
228 /// Assign or look up previously assigned numbers for the two values, and
229 /// return whether the numbers are equal. Numbers are assigned in the order
230 /// visited.
231 /// Comparison order:
232 /// Stage 0: Value that is function itself is always greater then others.
233 /// If left and right values are references to their functions, then
234 /// they are equal.
235 /// Stage 1: Constants are greater than non-constants.
236 /// If both left and right are constants, then the result of
237 /// cmpConstants is used as cmpValues result.
238 /// Stage 2: InlineAsm instances are greater than others. If both left and
239 /// right are InlineAsm instances, InlineAsm* pointers casted to
240 /// integers and compared as numbers.
241 /// Stage 3: For all other cases we compare order we meet these values in
242 /// their functions. If right value was met first during scanning,
243 /// then left value is greater.
244 /// In another words, we compare serial numbers, for more details
245 /// see comments for sn_mapL and sn_mapR.
246 int cmpValues(const Value *L, const Value *R) const;
247
248 /// Compare two Instructions for equivalence, similar to
249 /// Instruction::isSameOperationAs.
250 ///
251 /// Stages are listed in "most significant stage first" order:
252 /// On each stage below, we do comparison between some left and right
253 /// operation parts. If parts are non-equal, we assign parts comparison
254 /// result to the operation comparison result and exit from method.
255 /// Otherwise we proceed to the next stage.
256 /// Stages:
257 /// 1. Operations opcodes. Compared as numbers.
258 /// 2. Number of operands.
259 /// 3. Operation types. Compared with cmpType method.
260 /// 4. Compare operation subclass optional data as stream of bytes:
261 /// just convert it to integers and call cmpNumbers.
262 /// 5. Compare in operation operand types with cmpType in
263 /// most significant operand first order.
264 /// 6. Last stage. Check operations for some specific attributes.
265 /// For example, for Load it would be:
266 /// 6.1.Load: volatile (as boolean flag)
267 /// 6.2.Load: alignment (as integer numbers)
268 /// 6.3.Load: ordering (as underlying enum class value)
269 /// 6.4.Load: synch-scope (as integer numbers)
270 /// 6.5.Load: range metadata (as integer ranges)
271 /// On this stage its better to see the code, since its not more than 10-15
272 /// strings for particular instruction, and could change sometimes.
273 ///
274 /// Sets \p needToCmpOperands to true if the operands of the instructions
275 /// still must be compared afterwards. In this case it's already guaranteed
276 /// that both instructions have the same number of operands.
277 int cmpOperations(const Instruction *L, const Instruction *R,
278 bool &needToCmpOperands) const;
279
280 /// cmpType - compares two types,
281 /// defines total ordering among the types set.
282 ///
283 /// Return values:
284 /// 0 if types are equal,
285 /// -1 if Left is less than Right,
286 /// +1 if Left is greater than Right.
287 ///
288 /// Description:
289 /// Comparison is broken onto stages. Like in lexicographical comparison
290 /// stage coming first has higher priority.
291 /// On each explanation stage keep in mind total ordering properties.
292 ///
293 /// 0. Before comparison we coerce pointer types of 0 address space to
294 /// integer.
295 /// We also don't bother with same type at left and right, so
296 /// just return 0 in this case.
297 ///
298 /// 1. If types are of different kind (different type IDs).
299 /// Return result of type IDs comparison, treating them as numbers.
300 /// 2. If types are integers, check that they have the same width. If they
301 /// are vectors, check that they have the same count and subtype.
302 /// 3. Types have the same ID, so check whether they are one of:
303 /// * Void
304 /// * Float
305 /// * Double
306 /// * X86_FP80
307 /// * FP128
308 /// * PPC_FP128
309 /// * Label
310 /// * Metadata
311 /// We can treat these types as equal whenever their IDs are same.
312 /// 4. If Left and Right are pointers, return result of address space
313 /// comparison (numbers comparison). We can treat pointer types of same
314 /// address space as equal.
315 /// 5. If types are complex.
316 /// Then both Left and Right are to be expanded and their element types will
317 /// be checked with the same way. If we get Res != 0 on some stage, return it.
318 /// Otherwise return 0.
319 /// 6. For all other cases put llvm_unreachable.
320 int cmpTypes(Type *TyL, Type *TyR) const;
321
322 int cmpNumbers(uint64_t L, uint64_t R) const;
323 int cmpAligns(Align L, Align R) const;
324 int cmpAPInts(const APInt &L, const APInt &R) const;
325 int cmpAPFloats(const APFloat &L, const APFloat &R) const;
326 int cmpMem(StringRef L, StringRef R) const;
327
328 // The two functions undergoing comparison.
329 const Function *FnL, *FnR;
330
331private:
332 int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const;
333 int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
334 int cmpAttrs(const AttributeList L, const AttributeList R) const;
335 int cmpRangeMetadata(const MDNode *L, const MDNode *R) const;
336 int cmpOperandBundlesSchema(const CallBase &LCS, const CallBase &RCS) const;
337
338 /// Compare two GEPs for equivalent pointer arithmetic.
339 /// Parts to be compared for each comparison stage,
340 /// most significant stage first:
341 /// 1. Address space. As numbers.
342 /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
343 /// 3. Pointer operand type (using cmpType method).
344 /// 4. Number of operands.
345 /// 5. Compare operands, using cmpValues method.
346 int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) const;
347 int cmpGEPs(const GetElementPtrInst *GEPL,
348 const GetElementPtrInst *GEPR) const {
349 return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
350 }
351
352 /// Assign serial numbers to values from left function, and values from
353 /// right function.
354 /// Explanation:
355 /// Being comparing functions we need to compare values we meet at left and
356 /// right sides.
357 /// Its easy to sort things out for external values. It just should be
358 /// the same value at left and right.
359 /// But for local values (those were introduced inside function body)
360 /// we have to ensure they were introduced at exactly the same place,
361 /// and plays the same role.
362 /// Let's assign serial number to each value when we meet it first time.
363 /// Values that were met at same place will be with same serial numbers.
364 /// In this case it would be good to explain few points about values assigned
365 /// to BBs and other ways of implementation (see below).
366 ///
367 /// 1. Safety of BB reordering.
368 /// It's safe to change the order of BasicBlocks in function.
369 /// Relationship with other functions and serial numbering will not be
370 /// changed in this case.
371 /// As follows from FunctionComparator::compare(), we do CFG walk: we start
372 /// from the entry, and then take each terminator. So it doesn't matter how in
373 /// fact BBs are ordered in function. And since cmpValues are called during
374 /// this walk, the numbering depends only on how BBs located inside the CFG.
375 /// So the answer is - yes. We will get the same numbering.
376 ///
377 /// 2. Impossibility to use dominance properties of values.
378 /// If we compare two instruction operands: first is usage of local
379 /// variable AL from function FL, and second is usage of local variable AR
380 /// from FR, we could compare their origins and check whether they are
381 /// defined at the same place.
382 /// But, we are still not able to compare operands of PHI nodes, since those
383 /// could be operands from further BBs we didn't scan yet.
384 /// So it's impossible to use dominance properties in general.
385 mutable DenseMap<const Value*, int> sn_mapL, sn_mapR;
386
387 // The global state we will use
388 GlobalNumberState* GlobalNumbers;
389};
390
391} // end namespace llvm
392
393#endif // LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
Atomic ordering constants.
RelocType Type
Definition: COFFYAML.cpp:390
This file defines the DenseMap class.
Class for arbitrary precision integers.
Definition: APInt.h:75
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1184
This is an important base class in LLVM.
Definition: Constant.h:41
FunctionComparator - Compares two functions to determine whether or not they will generate machine co...
FunctionComparator(const Function *F1, const Function *F2, GlobalNumberState *GN)
int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const
Test whether two basic blocks have equivalent behaviour.
int compareSignature() const
Compares the signature and other general attributes of the two functions.
int cmpMem(StringRef L, StringRef R) const
int compare()
Test whether the two functions have equivalent behaviour.
int cmpAPFloats(const APFloat &L, const APFloat &R) const
static FunctionHash functionHash(Function &)
int cmpTypes(Type *TyL, Type *TyR) const
cmpType - compares two types, defines total ordering among the types set.
int cmpOperations(const Instruction *L, const Instruction *R, bool &needToCmpOperands) const
Compare two Instructions for equivalence, similar to Instruction::isSameOperationAs.
int cmpNumbers(uint64_t L, uint64_t R) const
int cmpAligns(Align L, Align R) const
void beginCompare()
Start the comparison.
int cmpValues(const Value *L, const Value *R) const
Assign or look up previously assigned numbers for the two values, and return whether the numbers are ...
int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const
Compares two global values by number.
int cmpConstants(const Constant *L, const Constant *R) const
Constants comparison.
int cmpAPInts(const APInt &L, const APInt &R) const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
GlobalNumberState assigns an integer to each global value in the program, which is used by the compar...
void erase(GlobalValue *Global)
uint64_t getNumber(GlobalValue *Global)
Metadata node.
Definition: Metadata.h:943
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void clear()
Definition: ValueMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: ValueMap.h:172
bool erase(const KeyT &Val)
Definition: ValueMap.h:190
LLVM Value Representation.
Definition: Value.h:74
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Global
Append to llvm.global_dtors.
AtomicOrdering
Atomic ordering for LLVM's memory model.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This class defines the default behavior for configurable aspects of ValueMap<>.
Definition: ValueMap.h:56