LLVM 20.0.0git
StructuralHash.cpp
Go to the documentation of this file.
1//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/IR/Function.h"
12#include "llvm/IR/InstrTypes.h"
15#include "llvm/IR/Module.h"
16
17using namespace llvm;
18
19namespace {
20
21// Basic hashing mechanism to detect structural change to the IR, used to verify
22// pass return status consistency with actual change. In addition to being used
23// by the MergeFunctions pass.
24
25class StructuralHashImpl {
26 stable_hash Hash = 4;
27
28 bool DetailedHash;
29
30 // This random value acts as a block header, as otherwise the partition of
31 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
32 static constexpr stable_hash BlockHeaderHash = 45798;
33 static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
34 static constexpr stable_hash GlobalHeaderHash = 23456;
35
36 /// IgnoreOp is a function that returns true if the operand should be ignored.
37 IgnoreOperandFunc IgnoreOp = nullptr;
38 /// A mapping from instruction indices to instruction pointers.
39 /// The index represents the position of an instruction based on the order in
40 /// which it is first encountered.
41 std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
42 /// A mapping from pairs of instruction indices and operand indices
43 /// to the hashes of the operands.
44 std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
45
46 /// Assign a unique ID to each Value in the order they are first seen.
48
49 static stable_hash hashType(Type *ValueType) {
51 Hashes.emplace_back(ValueType->getTypeID());
52 if (ValueType->isIntegerTy())
53 Hashes.emplace_back(ValueType->getIntegerBitWidth());
54 return stable_hash_combine(Hashes);
55 }
56
57public:
58 StructuralHashImpl() = delete;
59 explicit StructuralHashImpl(bool DetailedHash,
60 IgnoreOperandFunc IgnoreOp = nullptr)
61 : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
62 if (IgnoreOp) {
63 IndexInstruction = std::make_unique<IndexInstrMap>();
64 IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
65 }
66 }
67
68 static stable_hash hashAPInt(const APInt &I) {
70 Hashes.emplace_back(I.getBitWidth());
71 auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
72 Hashes.append(RawVals.begin(), RawVals.end());
73 return stable_hash_combine(Hashes);
74 }
75
76 static stable_hash hashAPFloat(const APFloat &F) {
77 return hashAPInt(F.bitcastToAPInt());
78 }
79
80 static stable_hash hashGlobalVariable(const GlobalVariable &GVar) {
81 if (!GVar.hasInitializer())
82 return hashGlobalValue(&GVar);
83
84 // Hash the contents of a string.
85 if (GVar.getName().starts_with(".str")) {
86 auto *C = GVar.getInitializer();
87 if (const auto *Seq = dyn_cast<ConstantDataSequential>(C))
88 if (Seq->isString())
89 return stable_hash_name(Seq->getAsString());
90 }
91
92 // Hash structural contents of Objective-C metadata in specific sections.
93 // This can be extended to other metadata if needed.
94 static constexpr const char *SectionNames[] = {
95 "__cfstring", "__cstring", "__objc_classrefs",
96 "__objc_methname", "__objc_selrefs",
97 };
98 if (GVar.hasSection()) {
100 for (const char *Name : SectionNames)
101 if (SectionName.contains(Name))
102 return hashConstant(GVar.getInitializer());
103 }
104
105 return hashGlobalValue(&GVar);
106 }
107
108 static stable_hash hashGlobalValue(const GlobalValue *GV) {
109 if (!GV->hasName())
110 return 0;
111 return stable_hash_name(GV->getName());
112 }
113
114 // Compute a hash for a Constant. This function is logically similar to
115 // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
116 // we're interested in computing a hash rather than comparing two Constants.
117 // Some of the logic is simplified, e.g, we don't expand GEPOperator.
118 static stable_hash hashConstant(const Constant *C) {
120
121 Type *Ty = C->getType();
122 Hashes.emplace_back(hashType(Ty));
123
124 if (C->isNullValue()) {
125 Hashes.emplace_back(static_cast<stable_hash>('N'));
126 return stable_hash_combine(Hashes);
127 }
128
129 if (auto *GVar = dyn_cast<GlobalVariable>(C)) {
130 Hashes.emplace_back(hashGlobalVariable(*GVar));
131 return stable_hash_combine(Hashes);
132 }
133
134 if (auto *G = dyn_cast<GlobalValue>(C)) {
135 Hashes.emplace_back(hashGlobalValue(G));
136 return stable_hash_combine(Hashes);
137 }
138
139 if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
140 if (Seq->isString()) {
141 Hashes.emplace_back(stable_hash_name(Seq->getAsString()));
142 return stable_hash_combine(Hashes);
143 }
144 }
145
146 switch (C->getValueID()) {
147 case Value::ConstantIntVal: {
148 const APInt &Int = cast<ConstantInt>(C)->getValue();
149 Hashes.emplace_back(hashAPInt(Int));
150 return stable_hash_combine(Hashes);
151 }
152 case Value::ConstantFPVal: {
153 const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
154 Hashes.emplace_back(hashAPFloat(APF));
155 return stable_hash_combine(Hashes);
156 }
157 case Value::ConstantArrayVal:
158 case Value::ConstantStructVal:
159 case Value::ConstantVectorVal:
160 case Value::ConstantExprVal: {
161 for (const auto &Op : C->operands()) {
162 auto H = hashConstant(cast<Constant>(Op));
163 Hashes.emplace_back(H);
164 }
165 return stable_hash_combine(Hashes);
166 }
167 case Value::BlockAddressVal: {
168 const BlockAddress *BA = cast<BlockAddress>(C);
169 auto H = hashGlobalValue(BA->getFunction());
170 Hashes.emplace_back(H);
171 return stable_hash_combine(Hashes);
172 }
173 case Value::DSOLocalEquivalentVal: {
174 const auto *Equiv = cast<DSOLocalEquivalent>(C);
175 auto H = hashGlobalValue(Equiv->getGlobalValue());
176 Hashes.emplace_back(H);
177 return stable_hash_combine(Hashes);
178 }
179 default:
180 // Skip other types of constants for simplicity.
181 return stable_hash_combine(Hashes);
182 }
183 }
184
185 stable_hash hashValue(Value *V) {
186 // Check constant and return its hash.
187 Constant *C = dyn_cast<Constant>(V);
188 if (C)
189 return hashConstant(C);
190
191 // Hash argument number.
193 if (Argument *Arg = dyn_cast<Argument>(V))
194 Hashes.emplace_back(Arg->getArgNo());
195
196 // Get an index (an insertion order) for the non-constant value.
197 auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
198 Hashes.emplace_back(It->second);
199
200 return stable_hash_combine(Hashes);
201 }
202
203 stable_hash hashOperand(Value *Operand) {
205 Hashes.emplace_back(hashType(Operand->getType()));
206 Hashes.emplace_back(hashValue(Operand));
207 return stable_hash_combine(Hashes);
208 }
209
210 stable_hash hashInstruction(const Instruction &Inst) {
212 Hashes.emplace_back(Inst.getOpcode());
213
214 if (!DetailedHash)
215 return stable_hash_combine(Hashes);
216
217 Hashes.emplace_back(hashType(Inst.getType()));
218
219 // Handle additional properties of specific instructions that cause
220 // semantic differences in the IR.
221 if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
222 Hashes.emplace_back(ComparisonInstruction->getPredicate());
223
224 unsigned InstIdx = 0;
225 if (IndexInstruction) {
226 InstIdx = IndexInstruction->size();
227 IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
228 }
229
230 for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
231 auto OpndHash = hashOperand(Op);
232 if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
233 assert(IndexOperandHashMap);
234 IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
235 } else
236 Hashes.emplace_back(OpndHash);
237 }
238
239 return stable_hash_combine(Hashes);
240 }
241
242 // A function hash is calculated by considering only the number of arguments
243 // and whether a function is varargs, the order of basic blocks (given by the
244 // successors of each basic block in depth first order), and the order of
245 // opcodes of each instruction within each of these basic blocks. This mirrors
246 // the strategy FunctionComparator::compare() uses to compare functions by
247 // walking the BBs in depth first order and comparing each instruction in
248 // sequence. Because this hash currently does not look at the operands, it is
249 // insensitive to things such as the target of calls and the constants used in
250 // the function, which makes it useful when possibly merging functions which
251 // are the same modulo constants and call targets.
252 //
253 // Note that different users of StructuralHash will want different behavior
254 // out of it (i.e., MergeFunctions will want something different from PM
255 // expensive checks for pass modification status). When modifying this
256 // function, most changes should be gated behind an option and enabled
257 // selectively.
258 void update(const Function &F) {
259 // Declarations don't affect analyses.
260 if (F.isDeclaration())
261 return;
262
264 Hashes.emplace_back(Hash);
265 Hashes.emplace_back(FunctionHeaderHash);
266
267 Hashes.emplace_back(F.isVarArg());
268 Hashes.emplace_back(F.arg_size());
269
272
273 // Walk the blocks in the same order as
274 // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
275 // function "structure." (BB and opcode sequence)
276 BBs.push_back(&F.getEntryBlock());
277 VisitedBBs.insert(BBs[0]);
278 while (!BBs.empty()) {
279 const BasicBlock *BB = BBs.pop_back_val();
280
281 Hashes.emplace_back(BlockHeaderHash);
282 for (auto &Inst : *BB)
283 Hashes.emplace_back(hashInstruction(Inst));
284
285 for (const BasicBlock *Succ : successors(BB))
286 if (VisitedBBs.insert(Succ).second)
287 BBs.push_back(Succ);
288 }
289
290 // Update the combined hash in place.
291 Hash = stable_hash_combine(Hashes);
292 }
293
294 void update(const GlobalVariable &GV) {
295 // Declarations and used/compiler.used don't affect analyses.
296 // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
297 // we ignore anything with the `.llvm` prefix
298 if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
299 return;
301 Hashes.emplace_back(Hash);
302 Hashes.emplace_back(GlobalHeaderHash);
303 Hashes.emplace_back(GV.getValueType()->getTypeID());
304
305 // Update the combined hash in place.
306 Hash = stable_hash_combine(Hashes);
307 }
308
309 void update(const Module &M) {
310 for (const GlobalVariable &GV : M.globals())
311 update(GV);
312 for (const Function &F : M)
313 update(F);
314 }
315
316 uint64_t getHash() const { return Hash; }
317
318 std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
319 return std::move(IndexInstruction);
320 }
321
322 std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
323 return std::move(IndexOperandHashMap);
324 }
325};
326
327} // namespace
328
329stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
330 StructuralHashImpl H(DetailedHash);
331 H.update(F);
332 return H.getHash();
333}
334
336 return StructuralHashImpl::hashGlobalVariable(GVar);
337}
338
339stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
340 StructuralHashImpl H(DetailedHash);
341 H.update(M);
342 return H.getHash();
343}
344
347 IgnoreOperandFunc IgnoreOp) {
348 StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
349 H.update(F);
350 return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
351 H.getIndexPairOpndHashMap());
352}
std::string Name
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
#define H(x, y, z)
Definition: MD5.cpp:57
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Class for arbitrary precision integers.
Definition: APInt.h:78
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
The address of a basic block.
Definition: Constants.h:893
Function * getFunction() const
Definition: Constants.h:923
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:226
unsigned size() const
Definition: DenseMap.h:99
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:117
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:109
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:296
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
op_range operands()
Definition: User.h:288
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
static constexpr StringLiteral SectionNames[SectionKindsNum]
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
FunctionHashInfo StructuralHashWithDifferences(const Function &F, IgnoreOperandFunc IgnoreOp)
Computes a structural hash of a given function, considering the structure and content of the function...
auto successors(const MachineBasicBlock *BB)
stable_hash stable_hash_name(StringRef Name)
Definition: StableHashing.h:72
stable_hash stable_hash_combine(ArrayRef< stable_hash > Buffer)
Definition: StableHashing.h:30
uint64_t stable_hash
An opaque object representing a stable hash code.
Definition: StableHashing.h:28
stable_hash StructuralHash(const Function &F, bool DetailedHash=false)
Returns a hash of the function F.
std::function< bool(const Instruction *, unsigned)> IgnoreOperandFunc
A function that takes an instruction and an operand index and returns true if the operand should be i...