-
Notifications
You must be signed in to change notification settings - Fork 13.3k
/
Copy pathHashUtilities.cpp
205 lines (173 loc) · 6.64 KB
/
HashUtilities.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
//===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Computation of hash values over BinaryFunction and BinaryBasicBlock.
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Utils/NameResolver.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
namespace bolt {
std::string hashInteger(uint64_t Value) {
std::string HashString;
if (Value == 0)
HashString.push_back(0);
while (Value) {
uint8_t LSB = Value & 0xff;
HashString.push_back(LSB);
Value >>= 8;
}
return HashString;
}
std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
std::string HashString;
// Ignore function references.
if (BC.getFunctionForSymbol(&Symbol))
return HashString;
llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
if (!ErrorOrValue)
return HashString;
// Ignore jump table references.
if (BC.getJumpTableContainingAddress(*ErrorOrValue))
return HashString;
return HashString.append(hashInteger(*ErrorOrValue));
}
std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
switch (Expr.getKind()) {
case MCExpr::Constant:
return hashInteger(cast<MCConstantExpr>(Expr).getValue());
case MCExpr::SymbolRef:
return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
case MCExpr::Unary: {
const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
return hashInteger(UnaryExpr.getOpcode())
.append(hashExpr(BC, *UnaryExpr.getSubExpr()));
}
case MCExpr::Binary: {
const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
return hashExpr(BC, *BinaryExpr.getLHS())
.append(hashInteger(BinaryExpr.getOpcode()))
.append(hashExpr(BC, *BinaryExpr.getRHS()));
}
case MCExpr::Target:
return std::string();
}
llvm_unreachable("invalid expression kind");
}
std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
if (Operand.isImm())
return hashInteger(Operand.getImm());
if (Operand.isReg())
return hashInteger(Operand.getReg());
if (Operand.isExpr())
return hashExpr(BC, *Operand.getExpr());
return std::string();
}
std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
OperandHashFuncTy OperandHashFunc) {
const bool IsX86 = BC.isX86();
// The hash is computed by creating a string of all instruction opcodes and
// possibly their operands and then hashing that string with std::hash.
std::string HashString;
for (const MCInst &Inst : BB) {
if (BC.MIB->isPseudo(Inst))
continue;
unsigned Opcode = Inst.getOpcode();
// Ignore unconditional jumps since we check CFG consistency by processing
// basic blocks in order and do not rely on branches to be in-sync with
// CFG. Note that we still use condition code of conditional jumps.
if (BC.MIB->isUnconditionalBranch(Inst))
continue;
if (IsX86 && BC.MIB->isConditionalBranch(Inst))
Opcode = BC.MIB->getShortBranchOpcode(Opcode);
if (Opcode == 0) {
HashString.push_back(0);
} else {
StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode);
HashString.append(OpcodeName.str());
}
for (const MCOperand &Op : MCPlus::primeOperands(Inst))
HashString.append(OperandHashFunc(Op));
}
return HashString;
}
/// A "loose" hash of a basic block to use with the stale profile matching. The
/// computed value will be the same for blocks with minor changes (such as
/// reordering of instructions or using different operands) but may result in
/// collisions that need to be resolved by a stronger hashing.
std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
// The hash is computed by creating a string of all lexicographically ordered
// instruction opcodes, which is then hashed with std::hash.
std::set<std::string> Opcodes;
for (const MCInst &Inst : BB) {
// Skip pseudo instructions and nops.
if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst))
continue;
// Ignore unconditional jumps, as they can be added / removed as a result
// of basic block reordering.
if (BC.MIB->isUnconditionalBranch(Inst))
continue;
// Do not distinguish different types of conditional jumps.
if (BC.MIB->isConditionalBranch(Inst)) {
Opcodes.insert("JMP");
continue;
}
std::string Mnemonic = BC.InstPrinter->getMnemonic(Inst).first;
llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); });
Opcodes.insert(Mnemonic);
}
std::string HashString;
for (const std::string &Opcode : Opcodes)
HashString.append(Opcode);
return HashString;
}
/// An even looser hash level relative to $ hashBlockLoose to use with stale
/// profile matching, composed of the names of a block's called functions in
/// lexicographic order.
std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) {
// The hash is computed by creating a string of all lexicographically ordered
// called function names.
std::vector<std::string> FunctionNames;
for (const MCInst &Instr : BB) {
// Skip non-call instructions.
if (!BC.MIB->isCall(Instr))
continue;
const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
if (!CallSymbol)
continue;
FunctionNames.push_back(std::string(CallSymbol->getName()));
}
std::sort(FunctionNames.begin(), FunctionNames.end());
std::string HashString;
for (const std::string &FunctionName : FunctionNames)
HashString.append(FunctionName);
return HashString;
}
/// The same as the $hashBlockCalls function, but for profiled functions.
std::string
hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>
&IdToYamlFunction,
const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
std::vector<std::string> FunctionNames;
for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) {
auto It = IdToYamlFunction.find(CallSiteInfo.DestId);
if (It == IdToYamlFunction.end())
continue;
StringRef Name = NameResolver::dropNumNames(It->second->Name);
FunctionNames.push_back(std::string(Name));
}
std::sort(FunctionNames.begin(), FunctionNames.end());
std::string HashString;
for (const std::string &FunctionName : FunctionNames)
HashString.append(FunctionName);
return HashString;
}
} // namespace bolt
} // namespace llvm