Skip to content

Commit 6bdb15f

Browse files
committed
[pseudo] Reorganize CXX.h enums
- Place rules under rule::lhs::rhs__rhs__rhs - Change mangling of keywords to ALL_CAPS (needed to turn keywords that appear alone on RHS into valid identifiers) - Make enums implicitly convertible to underlying type (though still scoped, using alias tricks) In principle this lets us exhaustively write a switch over all rules of a NT: switch ((rule::declarator)N->rule()) { case rule::declarator::noptr_declarator: ... } In practice we don't do this anywhere yet as we're often switching over multiple nonterminal kinds at once. Differential Revision: https://reviews.llvm.org/D130414
1 parent aeeb174 commit 6bdb15f

File tree

8 files changed

+225
-166
lines changed

8 files changed

+225
-166
lines changed

clang-tools-extra/pseudo/gen/Main.cpp

+57-8
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,50 @@ std::string readOrDie(llvm::StringRef Path) {
5858
}
5959
} // namespace
6060

61+
namespace clang {
62+
namespace pseudo {
63+
namespace {
64+
65+
// Mangles a symbol name into a valid identifier.
66+
//
67+
// These follow names in the grammar fairly closely:
68+
// nonterminal: `ptr-declartor` becomes `ptr_declarator`;
69+
// punctuator: `,` becomes `COMMA`;
70+
// keyword: `INT` becomes `INT`;
71+
// terminal: `IDENTIFIER` becomes `IDENTIFIER`;
72+
std::string mangleSymbol(SymbolID SID, const Grammar &G) {
73+
static std::string *TokNames = new std::string[]{
74+
#define TOK(X) llvm::StringRef(#X).upper(),
75+
#define KEYWORD(Keyword, Condition) llvm::StringRef(#Keyword).upper(),
76+
#include "clang/Basic/TokenKinds.def"
77+
};
78+
if (isToken(SID))
79+
return TokNames[symbolToToken(SID)];
80+
std::string Name = G.symbolName(SID).str();
81+
// translation-unit -> translation_unit
82+
std::replace(Name.begin(), Name.end(), '-', '_');
83+
return Name;
84+
}
85+
86+
// Mangles the RHS of a rule definition into a valid identifier.
87+
//
88+
// These are unique only for a fixed LHS.
89+
// e.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`,
90+
// it is `ptr_operator__ptr_declarator`.
91+
std::string mangleRule(RuleID RID, const Grammar &G) {
92+
const auto &R = G.lookupRule(RID);
93+
std::string MangleName = mangleSymbol(R.seq().front(), G);
94+
for (SymbolID S : R.seq().drop_front()) {
95+
MangleName.append("__");
96+
MangleName.append(mangleSymbol(S, G));
97+
}
98+
return MangleName;
99+
}
100+
101+
} // namespace
102+
} // namespace pseudo
103+
} // namespace clang
104+
61105
int main(int argc, char *argv[]) {
62106
llvm::cl::ParseCommandLineOptions(argc, argv, "");
63107

@@ -81,21 +125,26 @@ int main(int argc, char *argv[]) {
81125
case EmitSymbolList:
82126
Out.os() << R"cpp(
83127
#ifndef NONTERMINAL
84-
#define NONTERMINAL(X, Y)
128+
#define NONTERMINAL(NAME, ID)
85129
#endif
86130
#ifndef RULE
87-
#define RULE(X, Y)
131+
#define RULE(LHS, RHS, ID)
88132
#endif
89133
#ifndef EXTENSION
90-
#define EXTENSION(X, Y)
134+
#define EXTENSION(NAME, ID)
91135
#endif
92136
)cpp";
93137
for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
94-
++ID)
95-
Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", G.mangleSymbol(ID),
96-
ID);
97-
for (clang::pseudo::RuleID RID = 0; RID < G.table().Rules.size(); ++RID)
98-
Out.os() << llvm::formatv("RULE({0}, {1})\n", G.mangleRule(RID), RID);
138+
++ID) {
139+
Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n",
140+
clang::pseudo::mangleSymbol(ID, G), ID);
141+
for (const clang::pseudo::Rule &R : G.rulesFor(ID)) {
142+
clang::pseudo::RuleID RID = &R - G.table().Rules.data();
143+
Out.os() << llvm::formatv("RULE({0}, {1}, {2})\n",
144+
clang::pseudo::mangleSymbol(R.Target, G),
145+
clang::pseudo::mangleRule(RID, G), RID);
146+
}
147+
}
99148
for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
100149
EID < G.table().AttributeValues.size(); ++EID) {
101150
llvm::StringRef Name = G.table().AttributeValues[EID];

clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h

+40-9
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,56 @@
2929
namespace clang {
3030
namespace pseudo {
3131
namespace cxx {
32-
// Symbol represents nonterminal symbols in the C++ grammar.
33-
// It provides a simple uniform way to access a particular nonterminal.
34-
enum class Symbol : SymbolID {
32+
33+
// We want enums to be scoped but implicitly convertible to RuleID etc.
34+
// So create regular (unscoped) enums inside subnamespaces of `detail`.
35+
// Then add aliases for them outside `detail`.
36+
namespace detail {
37+
namespace symbols {
38+
enum Symbol : SymbolID {
3539
#define NONTERMINAL(X, Y) X = Y,
3640
#include "CXXSymbols.inc"
3741
#undef NONTERMINAL
3842
};
43+
} // namespace symbols
3944

40-
enum class Rule : RuleID {
41-
#define RULE(X, Y) X = Y,
45+
namespace extensions {
46+
enum Extension : ExtensionID {
47+
#define EXTENSION(X, Y) X = Y,
4248
#include "CXXSymbols.inc"
43-
#undef RULE
49+
#undef EXTENSION
4450
};
51+
} // namespace extensions
4552

46-
enum class Extension : ExtensionID {
47-
#define EXTENSION(X, Y) X = Y,
53+
namespace rules {
54+
// For each symbol we close the last symbol's enum+namespace and open new ones.
55+
// We need a dummy namespace+enum so that this works for the first rule.
56+
namespace dummy {
57+
enum Dummy {
58+
//clang-format off
59+
#define NONTERMINAL(NAME, ID) \
60+
}; \
61+
} \
62+
namespace NAME { \
63+
enum Rule : RuleID {
64+
//clang-format on
65+
#define RULE(LHS, RHS, ID) RHS = ID,
4866
#include "CXXSymbols.inc"
49-
#undef EXTENSION
5067
};
68+
}
69+
} // namespace rules
70+
} // namespace detail
71+
72+
// Symbol represents nonterminal symbols in the C++ grammar.
73+
// It provides a simple uniform way to access a particular nonterminal.
74+
using Symbol = detail::symbols::Symbol;
75+
76+
using Extension = detail::extensions::Extension;
77+
78+
namespace rule {
79+
#define NONTERMINAL(NAME, ID) using NAME = detail::rules::NAME::Rule;
80+
#include "CXXSymbols.inc"
81+
} // namespace rule
5182

5283
// Returns the Language for the cxx.bnf grammar.
5384
const Language &getLanguage();

clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h

-15
Original file line numberDiff line numberDiff line change
@@ -162,21 +162,6 @@ class Grammar {
162162
// Terminals have names like "," (kw_comma) or "OPERATOR" (kw_operator).
163163
llvm::StringRef symbolName(SymbolID) const;
164164

165-
// Gets the mangled name for a terminal/nonterminal.
166-
// Compared to names in the grammar,
167-
// nonterminals `ptr-declartor` becomes `ptr_declarator`;
168-
// terminal `,` becomes `comma`;
169-
// terminal `IDENTIFIER` becomes `identifier`;
170-
// terminal `INT` becomes `int`;
171-
// NOTE: for nonterminals, the mangled name is the same as the cxx::Symbol
172-
// enum class; for terminals, we deliberately stripped the `kw_` prefix in
173-
// favor of the simplicity.
174-
std::string mangleSymbol(SymbolID) const;
175-
// Gets the mangled name for the rule.
176-
// E.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`,
177-
// it is `ptr_declarator_0ptr_operator_1ptr_declarator`.
178-
std::string mangleRule(RuleID) const;
179-
180165
// Lookup the SymbolID of the nonterminal symbol by Name.
181166
llvm::Optional<SymbolID> findNonterminal(llvm::StringRef Name) const;
182167

0 commit comments

Comments
 (0)