LLVM 20.0.0git
FunctionComparator.h
Go to the documentation of this file.
1//===- FunctionComparator.h - Function Comparator ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the FunctionComparator and GlobalNumberState classes which
10// are used by the MergeFunctions pass for comparing functions.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
15#define LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
16
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Operator.h"
21#include "llvm/IR/ValueMap.h"
24#include <cstdint>
25#include <tuple>
26
27namespace llvm {
28
29class APFloat;
30class AttributeList;
31class APInt;
32class BasicBlock;
33class Constant;
34class Function;
35class GlobalValue;
36class InlineAsm;
37class Instruction;
38class MDNode;
39class Type;
40class Value;
41
42/// GlobalNumberState assigns an integer to each global value in the program,
43/// which is used by the comparison routine to order references to globals. This
44/// state must be preserved throughout the pass, because Functions and other
45/// globals need to maintain their relative order. Globals are assigned a number
46/// when they are first visited. This order is deterministic, and so the
47/// assigned numbers are as well. When two functions are merged, neither number
48/// is updated. If the symbols are weak, this would be incorrect. If they are
49/// strong, then one will be replaced at all references to the other, and so
50/// direct callsites will now see one or the other symbol, and no update is
51/// necessary. Note that if we were guaranteed unique names, we could just
52/// compare those, but this would not work for stripped bitcodes or for those
53/// few symbols without a name.
55 struct Config : ValueMapConfig<GlobalValue *> {
56 enum { FollowRAUW = false };
57 };
58
59 // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
60 // occurs, the mapping does not change. Tracking changes is unnecessary, and
61 // also problematic for weak symbols (which may be overwritten).
63 ValueNumberMap GlobalNumbers;
64
65 // The next unused serial number to assign to a global.
66 uint64_t NextNumber = 0;
67
68public:
69 GlobalNumberState() = default;
70
73 bool Inserted;
74 std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
75 if (Inserted)
76 NextNumber++;
77 return MapIter->second;
78 }
79
81 GlobalNumbers.erase(Global);
82 }
83
84 void clear() {
85 GlobalNumbers.clear();
86 }
87};
88
89/// FunctionComparator - Compares two functions to determine whether or not
90/// they will generate machine code with the same behaviour. DataLayout is
91/// used if available. The comparator always fails conservatively (erring on the
92/// side of claiming that two functions are different).
94public:
95 FunctionComparator(const Function *F1, const Function *F2,
97 : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
98
99 /// Test whether the two functions have equivalent behaviour.
100 int compare();
101
102protected:
103 /// Start the comparison.
105 sn_mapL.clear();
106 sn_mapR.clear();
107 }
108
109 /// Compares the signature and other general attributes of the two functions.
110 int compareSignature() const;
111
112 /// Test whether two basic blocks have equivalent behaviour.
113 int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const;
114
115 /// Constants comparison.
116 /// Its analog to lexicographical comparison between hypothetical numbers
117 /// of next format:
118 /// <bitcastability-trait><raw-bit-contents>
119 ///
120 /// 1. Bitcastability.
121 /// Check whether L's type could be losslessly bitcasted to R's type.
122 /// On this stage method, in case when lossless bitcast is not possible
123 /// method returns -1 or 1, thus also defining which type is greater in
124 /// context of bitcastability.
125 /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
126 /// to the contents comparison.
127 /// If types differ, remember types comparison result and check
128 /// whether we still can bitcast types.
129 /// Stage 1: Types that satisfies isFirstClassType conditions are always
130 /// greater then others.
131 /// Stage 2: Vector is greater then non-vector.
132 /// If both types are vectors, then vector with greater bitwidth is
133 /// greater.
134 /// If both types are vectors with the same bitwidth, then types
135 /// are bitcastable, and we can skip other stages, and go to contents
136 /// comparison.
137 /// Stage 3: Pointer types are greater than non-pointers. If both types are
138 /// pointers of the same address space - go to contents comparison.
139 /// Different address spaces: pointer with greater address space is
140 /// greater.
141 /// Stage 4: Types are neither vectors, nor pointers. And they differ.
142 /// We don't know how to bitcast them. So, we better don't do it,
143 /// and return types comparison result (so it determines the
144 /// relationship among constants we don't know how to bitcast).
145 ///
146 /// Just for clearance, let's see how the set of constants could look
147 /// on single dimension axis:
148 ///
149 /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
150 /// Where: NFCT - Not a FirstClassType
151 /// FCT - FirstClassTyp:
152 ///
153 /// 2. Compare raw contents.
154 /// It ignores types on this stage and only compares bits from L and R.
155 /// Returns 0, if L and R has equivalent contents.
156 /// -1 or 1 if values are different.
157 /// Pretty trivial:
158 /// 2.1. If contents are numbers, compare numbers.
159 /// Ints with greater bitwidth are greater. Ints with same bitwidths
160 /// compared by their contents.
161 /// 2.2. "And so on". Just to avoid discrepancies with comments
162 /// perhaps it would be better to read the implementation itself.
163 /// 3. And again about overall picture. Let's look back at how the ordered set
164 /// of constants will look like:
165 /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
166 ///
167 /// Now look, what could be inside [FCT, "others"], for example:
168 /// [FCT, "others"] =
169 /// [
170 /// [double 0.1], [double 1.23],
171 /// [i32 1], [i32 2],
172 /// { double 1.0 }, ; StructTyID, NumElements = 1
173 /// { i32 1 }, ; StructTyID, NumElements = 1
174 /// { double 1, i32 1 }, ; StructTyID, NumElements = 2
175 /// { i32 1, double 1 } ; StructTyID, NumElements = 2
176 /// ]
177 ///
178 /// Let's explain the order. Float numbers will be less than integers, just
179 /// because of cmpType terms: FloatTyID < IntegerTyID.
180 /// Floats (with same fltSemantics) are sorted according to their value.
181 /// Then you can see integers, and they are, like a floats,
182 /// could be easy sorted among each others.
183 /// The structures. Structures are grouped at the tail, again because of their
184 /// TypeID: StructTyID > IntegerTyID > FloatTyID.
185 /// Structures with greater number of elements are greater. Structures with
186 /// greater elements going first are greater.
187 /// The same logic with vectors, arrays and other possible complex types.
188 ///
189 /// Bitcastable constants.
190 /// Let's assume, that some constant, belongs to some group of
191 /// "so-called-equal" values with different types, and at the same time
192 /// belongs to another group of constants with equal types
193 /// and "really" equal values.
194 ///
195 /// Now, prove that this is impossible:
196 ///
197 /// If constant A with type TyA is bitcastable to B with type TyB, then:
198 /// 1. All constants with equal types to TyA, are bitcastable to B. Since
199 /// those should be vectors (if TyA is vector), pointers
200 /// (if TyA is pointer), or else (if TyA equal to TyB), those types should
201 /// be equal to TyB.
202 /// 2. All constants with non-equal, but bitcastable types to TyA, are
203 /// bitcastable to B.
204 /// Once again, just because we allow it to vectors and pointers only.
205 /// This statement could be expanded as below:
206 /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
207 /// vector B, and thus bitcastable to B as well.
208 /// 2.2. All pointers of the same address space, no matter what they point to,
209 /// bitcastable. So if C is pointer, it could be bitcasted to A and to B.
210 /// So any constant equal or bitcastable to A is equal or bitcastable to B.
211 /// QED.
212 ///
213 /// In another words, for pointers and vectors, we ignore top-level type and
214 /// look at their particular properties (bit-width for vectors, and
215 /// address space for pointers).
216 /// If these properties are equal - compare their contents.
217 int cmpConstants(const Constant *L, const Constant *R) const;
218
219 /// Compares two global values by number. Uses the GlobalNumbersState to
220 /// identify the same gobals across function calls.
221 int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const;
222
223 /// Assign or look up previously assigned numbers for the two values, and
224 /// return whether the numbers are equal. Numbers are assigned in the order
225 /// visited.
226 /// Comparison order:
227 /// Stage 0: Value that is function itself is always greater then others.
228 /// If left and right values are references to their functions, then
229 /// they are equal.
230 /// Stage 1: Constants are greater than non-constants.
231 /// If both left and right are constants, then the result of
232 /// cmpConstants is used as cmpValues result.
233 /// Stage 2: InlineAsm instances are greater than others. If both left and
234 /// right are InlineAsm instances, InlineAsm* pointers casted to
235 /// integers and compared as numbers.
236 /// Stage 3: For all other cases we compare order we meet these values in
237 /// their functions. If right value was met first during scanning,
238 /// then left value is greater.
239 /// In another words, we compare serial numbers, for more details
240 /// see comments for sn_mapL and sn_mapR.
241 int cmpValues(const Value *L, const Value *R) const;
242
243 /// Compare two Instructions for equivalence, similar to
244 /// Instruction::isSameOperationAs.
245 ///
246 /// Stages are listed in "most significant stage first" order:
247 /// On each stage below, we do comparison between some left and right
248 /// operation parts. If parts are non-equal, we assign parts comparison
249 /// result to the operation comparison result and exit from method.
250 /// Otherwise we proceed to the next stage.
251 /// Stages:
252 /// 1. Operations opcodes. Compared as numbers.
253 /// 2. Number of operands.
254 /// 3. Operation types. Compared with cmpType method.
255 /// 4. Compare operation subclass optional data as stream of bytes:
256 /// just convert it to integers and call cmpNumbers.
257 /// 5. Compare in operation operand types with cmpType in
258 /// most significant operand first order.
259 /// 6. Last stage. Check operations for some specific attributes.
260 /// For example, for Load it would be:
261 /// 6.1.Load: volatile (as boolean flag)
262 /// 6.2.Load: alignment (as integer numbers)
263 /// 6.3.Load: ordering (as underlying enum class value)
264 /// 6.4.Load: synch-scope (as integer numbers)
265 /// 6.5.Load: range metadata (as integer ranges)
266 /// On this stage its better to see the code, since its not more than 10-15
267 /// strings for particular instruction, and could change sometimes.
268 ///
269 /// Sets \p needToCmpOperands to true if the operands of the instructions
270 /// still must be compared afterwards. In this case it's already guaranteed
271 /// that both instructions have the same number of operands.
272 int cmpOperations(const Instruction *L, const Instruction *R,
273 bool &needToCmpOperands) const;
274
275 /// cmpType - compares two types,
276 /// defines total ordering among the types set.
277 ///
278 /// Return values:
279 /// 0 if types are equal,
280 /// -1 if Left is less than Right,
281 /// +1 if Left is greater than Right.
282 ///
283 /// Description:
284 /// Comparison is broken onto stages. Like in lexicographical comparison
285 /// stage coming first has higher priority.
286 /// On each explanation stage keep in mind total ordering properties.
287 ///
288 /// 0. Before comparison we coerce pointer types of 0 address space to
289 /// integer.
290 /// We also don't bother with same type at left and right, so
291 /// just return 0 in this case.
292 ///
293 /// 1. If types are of different kind (different type IDs).
294 /// Return result of type IDs comparison, treating them as numbers.
295 /// 2. If types are integers, check that they have the same width. If they
296 /// are vectors, check that they have the same count and subtype.
297 /// 3. Types have the same ID, so check whether they are one of:
298 /// * Void
299 /// * Float
300 /// * Double
301 /// * X86_FP80
302 /// * FP128
303 /// * PPC_FP128
304 /// * Label
305 /// * Metadata
306 /// We can treat these types as equal whenever their IDs are same.
307 /// 4. If Left and Right are pointers, return result of address space
308 /// comparison (numbers comparison). We can treat pointer types of same
309 /// address space as equal.
310 /// 5. If types are complex.
311 /// Then both Left and Right are to be expanded and their element types will
312 /// be checked with the same way. If we get Res != 0 on some stage, return it.
313 /// Otherwise return 0.
314 /// 6. For all other cases put llvm_unreachable.
315 int cmpTypes(Type *TyL, Type *TyR) const;
316
317 int cmpNumbers(uint64_t L, uint64_t R) const;
318 int cmpAligns(Align L, Align R) const;
319 int cmpAPInts(const APInt &L, const APInt &R) const;
320 int cmpConstantRanges(const ConstantRange &L, const ConstantRange &R) const;
321 int cmpAPFloats(const APFloat &L, const APFloat &R) const;
322 int cmpMem(StringRef L, StringRef R) const;
323
324 // The two functions undergoing comparison.
325 const Function *FnL, *FnR;
326
327private:
328 int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const;
329 int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
330 int cmpAttrs(const AttributeList L, const AttributeList R) const;
331 int cmpMDNode(const MDNode *L, const MDNode *R) const;
332 int cmpMetadata(const Metadata *L, const Metadata *R) const;
333 int cmpInstMetadata(Instruction const *L, Instruction const *R) const;
334 int cmpOperandBundlesSchema(const CallBase &LCS, const CallBase &RCS) const;
335
336 /// Compare two GEPs for equivalent pointer arithmetic.
337 /// Parts to be compared for each comparison stage,
338 /// most significant stage first:
339 /// 1. Address space. As numbers.
340 /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
341 /// 3. Pointer operand type (using cmpType method).
342 /// 4. Number of operands.
343 /// 5. Compare operands, using cmpValues method.
344 int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) const;
345 int cmpGEPs(const GetElementPtrInst *GEPL,
346 const GetElementPtrInst *GEPR) const {
347 return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
348 }
349
350 /// Assign serial numbers to values from left function, and values from
351 /// right function.
352 /// Explanation:
353 /// Being comparing functions we need to compare values we meet at left and
354 /// right sides.
355 /// Its easy to sort things out for external values. It just should be
356 /// the same value at left and right.
357 /// But for local values (those were introduced inside function body)
358 /// we have to ensure they were introduced at exactly the same place,
359 /// and plays the same role.
360 /// Let's assign serial number to each value when we meet it first time.
361 /// Values that were met at same place will be with same serial numbers.
362 /// In this case it would be good to explain few points about values assigned
363 /// to BBs and other ways of implementation (see below).
364 ///
365 /// 1. Safety of BB reordering.
366 /// It's safe to change the order of BasicBlocks in function.
367 /// Relationship with other functions and serial numbering will not be
368 /// changed in this case.
369 /// As follows from FunctionComparator::compare(), we do CFG walk: we start
370 /// from the entry, and then take each terminator. So it doesn't matter how in
371 /// fact BBs are ordered in function. And since cmpValues are called during
372 /// this walk, the numbering depends only on how BBs located inside the CFG.
373 /// So the answer is - yes. We will get the same numbering.
374 ///
375 /// 2. Impossibility to use dominance properties of values.
376 /// If we compare two instruction operands: first is usage of local
377 /// variable AL from function FL, and second is usage of local variable AR
378 /// from FR, we could compare their origins and check whether they are
379 /// defined at the same place.
380 /// But, we are still not able to compare operands of PHI nodes, since those
381 /// could be operands from further BBs we didn't scan yet.
382 /// So it's impossible to use dominance properties in general.
383 mutable DenseMap<const Value*, int> sn_mapL, sn_mapR;
384
385 // The global state we will use
386 GlobalNumberState* GlobalNumbers;
387};
388
389} // end namespace llvm
390
391#endif // LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
Atomic ordering constants.
RelocType Type
Definition: COFFYAML.cpp:410
This file defines the DenseMap class.
RelaxConfig Config
Definition: ELF_riscv.cpp:506
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
This class represents a range of values.
Definition: ConstantRange.h:47
This is an important base class in LLVM.
Definition: Constant.h:42
FunctionComparator - Compares two functions to determine whether or not they will generate machine co...
FunctionComparator(const Function *F1, const Function *F2, GlobalNumberState *GN)
int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const
Test whether two basic blocks have equivalent behaviour.
int cmpConstantRanges(const ConstantRange &L, const ConstantRange &R) const
int compareSignature() const
Compares the signature and other general attributes of the two functions.
int cmpMem(StringRef L, StringRef R) const
int compare()
Test whether the two functions have equivalent behaviour.
int cmpAPFloats(const APFloat &L, const APFloat &R) const
int cmpTypes(Type *TyL, Type *TyR) const
cmpType - compares two types, defines total ordering among the types set.
int cmpOperations(const Instruction *L, const Instruction *R, bool &needToCmpOperands) const
Compare two Instructions for equivalence, similar to Instruction::isSameOperationAs.
int cmpNumbers(uint64_t L, uint64_t R) const
int cmpAligns(Align L, Align R) const
void beginCompare()
Start the comparison.
int cmpValues(const Value *L, const Value *R) const
Assign or look up previously assigned numbers for the two values, and return whether the numbers are ...
int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const
Compares two global values by number.
int cmpConstants(const Constant *L, const Constant *R) const
Constants comparison.
int cmpAPInts(const APInt &L, const APInt &R) const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
GlobalNumberState assigns an integer to each global value in the program, which is used by the compar...
void erase(GlobalValue *Global)
uint64_t getNumber(GlobalValue *Global)
Metadata node.
Definition: Metadata.h:1069
Root of the metadata hierarchy.
Definition: Metadata.h:62
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
void clear()
Definition: ValueMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: ValueMap.h:172
bool erase(const KeyT &Val)
Definition: ValueMap.h:190
LLVM Value Representation.
Definition: Value.h:74
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Global
Append to llvm.global_dtors.
AtomicOrdering
Atomic ordering for LLVM's memory model.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This class defines the default behavior for configurable aspects of ValueMap<>.
Definition: ValueMap.h:56