|
33 | 33 |
|
34 | 34 | #include <queue>
|
35 | 35 |
|
| 36 | +#undef DEBUG_TYPE |
| 37 | +#define DEBUG_TYPE "bolt-prof" |
| 38 | + |
36 | 39 | using namespace llvm;
|
37 | 40 |
|
38 | 41 | namespace opts {
|
@@ -133,6 +136,176 @@ cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
|
133 | 136 | namespace llvm {
|
134 | 137 | namespace bolt {
|
135 | 138 |
|
| 139 | +/// An object wrapping several components of a basic block hash. The combined |
| 140 | +/// (blended) hash is represented and stored as one uint64_t, while individual |
| 141 | +/// components are of smaller size (e.g., uint16_t or uint8_t). |
| 142 | +struct BlendedBlockHash { |
| 143 | +private: |
| 144 | + static uint64_t combineHashes(uint16_t Hash1, uint16_t Hash2, uint16_t Hash3, |
| 145 | + uint16_t Hash4) { |
| 146 | + uint64_t Hash = 0; |
| 147 | + |
| 148 | + Hash |= uint64_t(Hash4); |
| 149 | + Hash <<= 16; |
| 150 | + |
| 151 | + Hash |= uint64_t(Hash3); |
| 152 | + Hash <<= 16; |
| 153 | + |
| 154 | + Hash |= uint64_t(Hash2); |
| 155 | + Hash <<= 16; |
| 156 | + |
| 157 | + Hash |= uint64_t(Hash1); |
| 158 | + |
| 159 | + return Hash; |
| 160 | + } |
| 161 | + |
| 162 | + static void parseHashes(uint64_t Hash, uint16_t &Hash1, uint16_t &Hash2, |
| 163 | + uint16_t &Hash3, uint16_t &Hash4) { |
| 164 | + Hash1 = Hash & 0xffff; |
| 165 | + Hash >>= 16; |
| 166 | + |
| 167 | + Hash2 = Hash & 0xffff; |
| 168 | + Hash >>= 16; |
| 169 | + |
| 170 | + Hash3 = Hash & 0xffff; |
| 171 | + Hash >>= 16; |
| 172 | + |
| 173 | + Hash4 = Hash & 0xffff; |
| 174 | + Hash >>= 16; |
| 175 | + } |
| 176 | + |
| 177 | +public: |
| 178 | + explicit BlendedBlockHash() {} |
| 179 | + |
| 180 | + explicit BlendedBlockHash(uint64_t CombinedHash) { |
| 181 | + parseHashes(CombinedHash, Offset, OpcodeHash, InstrHash, NeighborHash); |
| 182 | + } |
| 183 | + |
| 184 | + /// Combine the blended hash into uint64_t. |
| 185 | + uint64_t combine() const { |
| 186 | + return combineHashes(Offset, OpcodeHash, InstrHash, NeighborHash); |
| 187 | + } |
| 188 | + |
| 189 | + /// Compute a distance between two given blended hashes. The smaller the |
| 190 | + /// distance, the more similar two blocks are. For identical basic blocks, |
| 191 | + /// the distance is zero. |
| 192 | + uint64_t distance(const BlendedBlockHash &BBH) const { |
| 193 | + assert(OpcodeHash == BBH.OpcodeHash && |
| 194 | + "incorrect blended hash distance computation"); |
| 195 | + uint64_t Dist = 0; |
| 196 | + // Account for NeighborHash |
| 197 | + Dist += NeighborHash == BBH.NeighborHash ? 0 : 1; |
| 198 | + Dist <<= 16; |
| 199 | + // Account for InstrHash |
| 200 | + Dist += InstrHash == BBH.InstrHash ? 0 : 1; |
| 201 | + Dist <<= 16; |
| 202 | + // Account for Offset |
| 203 | + Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset); |
| 204 | + return Dist; |
| 205 | + } |
| 206 | + |
| 207 | + /// The offset of the basic block from the function start. |
| 208 | + uint16_t Offset{0}; |
| 209 | + /// (Loose) Hash of the basic block instructions, excluding operands. |
| 210 | + uint16_t OpcodeHash{0}; |
| 211 | + /// (Strong) Hash of the basic block instructions, including opcodes and |
| 212 | + /// operands. |
| 213 | + uint16_t InstrHash{0}; |
| 214 | + /// Hash of the (loose) basic block together with (loose) hashes of its |
| 215 | + /// successors and predecessors. |
| 216 | + uint16_t NeighborHash{0}; |
| 217 | +}; |
| 218 | + |
| 219 | +/// The object is used to identify and match basic blocks in a BinaryFunction |
| 220 | +/// given their hashes computed on a binary built from several revisions behind |
| 221 | +/// release. |
| 222 | +class StaleMatcher { |
| 223 | +public: |
| 224 | + /// Initialize stale matcher. |
| 225 | + void init(const std::vector<FlowBlock *> &Blocks, |
| 226 | + const std::vector<BlendedBlockHash> &Hashes) { |
| 227 | + assert(Blocks.size() == Hashes.size() && |
| 228 | + "incorrect matcher initialization"); |
| 229 | + for (size_t I = 0; I < Blocks.size(); I++) { |
| 230 | + FlowBlock *Block = Blocks[I]; |
| 231 | + uint16_t OpHash = Hashes[I].OpcodeHash; |
| 232 | + OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block)); |
| 233 | + } |
| 234 | + } |
| 235 | + |
| 236 | + /// Find the most similar block for a given hash. |
| 237 | + const FlowBlock *matchBlock(BlendedBlockHash BlendedHash) const { |
| 238 | + auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash); |
| 239 | + if (BlockIt == OpHashToBlocks.end()) { |
| 240 | + return nullptr; |
| 241 | + } |
| 242 | + FlowBlock *BestBlock = nullptr; |
| 243 | + uint64_t BestDist = std::numeric_limits<uint64_t>::max(); |
| 244 | + for (auto It : BlockIt->second) { |
| 245 | + FlowBlock *Block = It.second; |
| 246 | + BlendedBlockHash Hash = It.first; |
| 247 | + uint64_t Dist = Hash.distance(BlendedHash); |
| 248 | + if (BestBlock == nullptr || Dist < BestDist) { |
| 249 | + BestDist = Dist; |
| 250 | + BestBlock = Block; |
| 251 | + } |
| 252 | + } |
| 253 | + return BestBlock; |
| 254 | + } |
| 255 | + |
| 256 | +private: |
| 257 | + using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>; |
| 258 | + std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks; |
| 259 | +}; |
| 260 | + |
| 261 | +void BinaryFunction::computeBlockHashes() const { |
| 262 | + if (size() == 0) |
| 263 | + return; |
| 264 | + |
| 265 | + assert(hasCFG() && "the function is expected to have CFG"); |
| 266 | + |
| 267 | + std::vector<BlendedBlockHash> BlendedHashes(BasicBlocks.size()); |
| 268 | + std::vector<uint64_t> OpcodeHashes(BasicBlocks.size()); |
| 269 | + // Initialize hash components |
| 270 | + for (size_t I = 0; I < BasicBlocks.size(); I++) { |
| 271 | + const BinaryBasicBlock *BB = BasicBlocks[I]; |
| 272 | + assert(BB->getIndex() == I && "incorrect block index"); |
| 273 | + BlendedHashes[I].Offset = BB->getOffset(); |
| 274 | + // Hashing complete instructions |
| 275 | + std::string InstrHashStr = hashBlock( |
| 276 | + BC, *BB, [&](const MCOperand &Op) { return hashInstOperand(BC, Op); }); |
| 277 | + uint64_t InstrHash = std::hash<std::string>{}(InstrHashStr); |
| 278 | + BlendedHashes[I].InstrHash = hash_64_to_16(InstrHash); |
| 279 | + // Hashing opcodes |
| 280 | + std::string OpcodeHashStr = |
| 281 | + hashBlock(BC, *BB, [](const MCOperand &Op) { return std::string(); }); |
| 282 | + OpcodeHashes[I] = std::hash<std::string>{}(OpcodeHashStr); |
| 283 | + BlendedHashes[I].OpcodeHash = hash_64_to_16(OpcodeHashes[I]); |
| 284 | + } |
| 285 | + |
| 286 | + // Initialize neighbor hash |
| 287 | + for (size_t I = 0; I < BasicBlocks.size(); I++) { |
| 288 | + const BinaryBasicBlock *BB = BasicBlocks[I]; |
| 289 | + uint64_t Hash = OpcodeHashes[I]; |
| 290 | + // Append hashes of successors |
| 291 | + for (BinaryBasicBlock *SuccBB : BB->successors()) { |
| 292 | + uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()]; |
| 293 | + Hash = hashing::detail::hash_16_bytes(Hash, SuccHash); |
| 294 | + } |
| 295 | + // Append hashes of predecessors |
| 296 | + for (BinaryBasicBlock *PredBB : BB->predecessors()) { |
| 297 | + uint64_t PredHash = OpcodeHashes[PredBB->getIndex()]; |
| 298 | + Hash = hashing::detail::hash_16_bytes(Hash, PredHash); |
| 299 | + } |
| 300 | + BlendedHashes[I].NeighborHash = hash_64_to_16(Hash); |
| 301 | + } |
| 302 | + |
| 303 | + // Assign hashes |
| 304 | + for (size_t I = 0; I < BasicBlocks.size(); I++) { |
| 305 | + const BinaryBasicBlock *BB = BasicBlocks[I]; |
| 306 | + BB->setHash(BlendedHashes[I].combine()); |
| 307 | + } |
| 308 | +} |
136 | 309 | /// Create a wrapper flow function to use with the profile inference algorithm,
|
137 | 310 | /// and initialize its jumps and metadata.
|
138 | 311 | FlowFunction
|
@@ -224,23 +397,38 @@ void matchWeightsByHashes(const BinaryFunction::BasicBlockOrderType &BlockOrder,
|
224 | 397 | const yaml::bolt::BinaryFunctionProfile &YamlBF,
|
225 | 398 | FlowFunction &Func) {
|
226 | 399 | assert(Func.Blocks.size() == BlockOrder.size() + 1);
|
227 |
| - // Initialize stale matcher |
228 |
| - DenseMap<uint64_t, std::vector<FlowBlock *>> HashToBlocks; |
| 400 | + |
| 401 | + std::vector<FlowBlock *> Blocks; |
| 402 | + std::vector<BlendedBlockHash> BlendedHashes; |
229 | 403 | for (uint64_t I = 0; I < BlockOrder.size(); I++) {
|
230 | 404 | const BinaryBasicBlock *BB = BlockOrder[I];
|
231 | 405 | assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
|
232 |
| - HashToBlocks[BB->getHash()].push_back(&Func.Blocks[I + 1]); |
| 406 | + Blocks.push_back(&Func.Blocks[I + 1]); |
| 407 | + BlendedBlockHash BlendedHash(BB->getHash()); |
| 408 | + BlendedHashes.push_back(BlendedHash); |
| 409 | + LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = " |
| 410 | + << Twine::utohexstr(BB->getHash()) << "\n"); |
233 | 411 | }
|
| 412 | + StaleMatcher Matcher; |
| 413 | + Matcher.init(Blocks, BlendedHashes); |
234 | 414 |
|
235 | 415 | // Index in yaml profile => corresponding (matched) block
|
236 | 416 | DenseMap<uint64_t, const FlowBlock *> MatchedBlocks;
|
237 | 417 | // Match blocks from the profile to the blocks in CFG
|
238 | 418 | for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
|
239 | 419 | assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
|
240 |
| - auto It = HashToBlocks.find(YamlBB.Hash); |
241 |
| - if (It != HashToBlocks.end()) { |
242 |
| - const FlowBlock *MatchedBlock = It->second.front(); |
| 420 | + BlendedBlockHash BlendedHash(YamlBB.Hash); |
| 421 | + const FlowBlock *MatchedBlock = Matcher.matchBlock(BlendedHash); |
| 422 | + if (MatchedBlock != nullptr) { |
243 | 423 | MatchedBlocks[YamlBB.Index] = MatchedBlock;
|
| 424 | + LLVM_DEBUG(dbgs() << "Matched yaml block with bid = " << YamlBB.Index |
| 425 | + << " and hash = " << Twine::utohexstr(YamlBB.Hash) |
| 426 | + << " to BB with index = " << MatchedBlock->Index - 1 |
| 427 | + << "\n"); |
| 428 | + } else { |
| 429 | + LLVM_DEBUG( |
| 430 | + dbgs() << "Couldn't match yaml block with bid = " << YamlBB.Index |
| 431 | + << " and hash = " << Twine::utohexstr(YamlBB.Hash) << "\n"); |
244 | 432 | }
|
245 | 433 | }
|
246 | 434 |
|
|
0 commit comments