Skip to content

Commit 76ff3af

Browse files
authored
wasm-decompile: overhauled name filtering. (#1272)
The previous implementation was too simplistic, as it didn't do the renaming at the correct location (such that it can catch all occurrences), and was also very ineffective in cutting down gigantic STL signatures to something managable. This version creates more usable identifiers in almost all cases.
1 parent dff75b2 commit 76ff3af

File tree

8 files changed

+168
-15
lines changed

8 files changed

+168
-15
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ add_library(wabt STATIC
248248
src/decompiler.h
249249
src/decompiler-ast.h
250250
src/decompiler-ls.h
251+
src/decompiler-naming.h
251252
src/decompiler.cc
252253
src/error-formatter.h
253254
src/error-formatter.cc

src/decompiler-naming.h

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
/*
2+
* Copyright 2019 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#ifndef WABT_DECOMPILER_NAMING_H_
18+
#define WABT_DECOMPILER_NAMING_H_
19+
20+
#include "src/decompiler-ast.h"
21+
22+
namespace wabt {
23+
24+
inline void RenameToIdentifier(std::string& name, Index i,
25+
BindingHash& bh,
26+
const std::set<string_view>* filter) {
27+
// Filter out non-identifier characters, and try to reduce the size of
28+
// gigantic C++ signature names.
29+
std::string s;
30+
size_t nesting = 0;
31+
size_t read = 0;
32+
size_t word_start = 0;
33+
for (auto c : name) {
34+
read++;
35+
// We most certainly don't want to parse the entirety of C++ signatures,
36+
// but these names are sometimes several lines long, so would be great
37+
// to trim down. One quick way to do that is to remove anything between
38+
// nested (), which usually means the parameter list.
39+
if (c == '(') {
40+
nesting++;
41+
}
42+
if (c == ')') {
43+
nesting--;
44+
}
45+
if (nesting) {
46+
continue;
47+
}
48+
if (!isalnum(static_cast<unsigned char>(c))) {
49+
c = '_';
50+
}
51+
if (c == '_') {
52+
if (s.empty()) {
53+
continue; // Skip leading.
54+
}
55+
if (s.back() == '_') {
56+
continue; // Consecutive.
57+
}
58+
}
59+
s += c;
60+
if (filter && (c == '_' || read == name.size())) {
61+
// We found a "word" inside a snake_case identifier.
62+
auto word_end = s.size();
63+
if (c == '_') {
64+
word_end--;
65+
}
66+
assert(word_end > word_start);
67+
auto word = string_view(s.c_str() + word_start, word_end - word_start);
68+
if (filter->find(word) != filter->end()) {
69+
s.resize(word_start);
70+
}
71+
word_start = s.size();
72+
}
73+
}
74+
if (!s.empty() && s.back() == '_') {
75+
s.pop_back(); // Trailing.
76+
}
77+
// If after all this culling, we're still gigantic (STL identifier can
78+
// easily be hundreds of chars in size), just cut the identifier
79+
// down, it will be disambiguated below, if needed.
80+
const size_t max_identifier_length = 100;
81+
if (s.size() > max_identifier_length) {
82+
s.resize(max_identifier_length);
83+
}
84+
// Remove original binding first, such that it doesn't match with our
85+
// new name.
86+
bh.erase(name);
87+
// Find a unique name.
88+
Index disambiguator = 0;
89+
auto base_len = s.size();
90+
for (;;) {
91+
if (bh.count(s) == 0) {
92+
break;
93+
}
94+
disambiguator++;
95+
s.resize(base_len);
96+
s += '_';
97+
s += std::to_string(disambiguator);
98+
}
99+
// Replace name in bindings.
100+
name = s;
101+
bh.emplace(s, Binding(i));
102+
}
103+
104+
template<typename T>
105+
void RenameToIdentifiers(std::vector<T*>& things, BindingHash& bh,
106+
const std::set<string_view>* filter) {
107+
Index i = 0;
108+
for (auto thing : things) {
109+
RenameToIdentifier(thing->name, i++, bh, filter);
110+
}
111+
}
112+
113+
// Function names may contain arbitrary C++ syntax, so we want to
114+
// filter those to look like identifiers. A function name may be set
115+
// by a name section (applied in ReadBinaryIr, called before this function)
116+
// or by an export (applied by GenerateNames, called before this function),
117+
// to both the Func and func_bindings.
118+
// Those names then further perculate down the IR in ApplyNames (called after
119+
// this function).
120+
// To not have to add too many decompiler-specific code into those systems
121+
// (using a callback??) we instead rename everything here.
122+
void RenameAll(Module& module) {
123+
// We also filter common C++ keywords/STL idents that make for huge
124+
// identifiers.
125+
// FIXME: this can obviously give bad results if the input is not C++..
126+
std::set<string_view> filter = {
127+
{ "const" },
128+
{ "std" },
129+
{ "allocator" },
130+
{ "char" },
131+
{ "basic" },
132+
{ "traits" },
133+
{ "wchar" },
134+
{ "t" },
135+
{ "void" },
136+
{ "int" },
137+
{ "unsigned" },
138+
{ "2" },
139+
{ "cxxabiv1" },
140+
{ "short" },
141+
{ "4096ul" },
142+
};
143+
RenameToIdentifiers(module.funcs, module.func_bindings, &filter);
144+
// Also do this for some other kinds of names.
145+
RenameToIdentifiers(module.globals, module.global_bindings, nullptr);
146+
RenameToIdentifiers(module.tables, module.table_bindings, nullptr);
147+
}
148+
149+
} // namespace wabt
150+
151+
#endif // WABT_DECOMPILER_NAMING_H_

src/decompiler.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include "src/decompiler-ast.h"
2020
#include "src/decompiler-ls.h"
21+
#include "src/decompiler-naming.h"
2122

2223
#include "src/stream.h"
2324

@@ -68,8 +69,10 @@ struct Decompiler {
6869
return std::string(amount, ' ');
6970
}
7071

71-
string_view OpcodeToToken(Opcode opcode) {
72-
return opcode.GetDecomp();
72+
std::string OpcodeToToken(Opcode opcode) {
73+
std::string s = opcode.GetDecomp();
74+
std::replace(s.begin(), s.end(), '.', '_');
75+
return s;
7376
}
7477

7578
void IndentValue(Value &val, size_t amount, string_view first_indent) {

src/decompiler.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ class Stream;
2727
struct DecompileOptions {
2828
};
2929

30+
void RenameAll(Module&);
31+
3032
std::string Decompile(const Module&, const DecompileOptions&);
3133

3234
} // namespace wabt

src/generate-names.cc

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,7 @@ void NameGenerator::GenerateName(const char* prefix,
122122
std::string* str) {
123123
str->clear();
124124
if (!(opts_ & NameOpts::NoDollar)) *str = "$";
125-
if (opts_ & NameOpts::OnlyAlphaNum) {
126-
for (auto p = prefix; *p; p++) *str += isalnum(*p) ? *p : '_';
127-
} else {
128-
*str += prefix;
129-
}
125+
*str += prefix;
130126
if (index != kInvalidIndex) {
131127
if (opts_ & NameOpts::AlphaNames) {
132128
// For params and locals, do not use a prefix char.

src/generate-names.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ enum NameOpts {
2727
None = 0,
2828
AlphaNames = 1 << 0,
2929
NoDollar = 1 << 1,
30-
OnlyAlphaNum = 1 << 2,
3130
};
3231

3332
Result GenerateNames(struct Module*, NameOpts opts = NameOpts::None);

src/tools/wasm-decompile.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,12 @@ int ProgramMain(int argc, char** argv) {
8787
ValidateOptions options(features);
8888
result = ValidateModule(&module, &errors, options);
8989
}
90-
// FIXME: do we need these?
9190
result = GenerateNames(&module,
9291
static_cast<NameOpts>(NameOpts::AlphaNames |
93-
NameOpts::NoDollar |
94-
NameOpts::OnlyAlphaNum));
92+
NameOpts::NoDollar));
93+
// Must be called after ReadBinaryIr & GenerateNames, and before
94+
// ApplyNames, see comments at definition.
95+
RenameAll(module);
9596
if (Succeeded(result)) {
9697
/* TODO(binji): This shouldn't fail; if a name can't be applied
9798
* (because the index is invalid, say) it should just be skipped. */

test/decompile/basic.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@
5757
)
5858
;; LLD outputs a name section with de-mangled C++ function signatures as names,
5959
;; so have to make sure special chars get removed.
60-
(func (export "signature-&<>()") (param) (result))
61-
(func (export "signature-&[]()") (param) (result))
60+
(func (export "void signature-&<>(int a)") (param) (result))
61+
(func (export "void signature-&[](int a)") (param) (result))
6262
(func $not-exported (param) (result))
6363
(export "f" (func $f))
6464
)
@@ -94,10 +94,10 @@ export function f(a:int, b:int):int {
9494
return 1;
9595
}
9696

97-
function signature______() {
97+
function signature() {
9898
}
9999

100-
function signature_______1() {
100+
function signature_1() {
101101
}
102102

103103
function f_e() {

0 commit comments

Comments
 (0)