Skip to content

Commit 018be41

Browse files
committed
Implement 'disallowed_script_idents' lint
1 parent b286b38 commit 018be41

File tree

8 files changed

+152
-2
lines changed

8 files changed

+152
-2
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -2487,6 +2487,7 @@ Released 2018-09-13
24872487
[`derive_hash_xor_eq`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_hash_xor_eq
24882488
[`derive_ord_xor_partial_ord`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_ord_xor_partial_ord
24892489
[`disallowed_method`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_method
2490+
[`disallowed_script_idents`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_script_idents
24902491
[`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type
24912492
[`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression
24922493
[`doc_markdown`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_markdown

clippy_lints/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ serde = { version = "1.0", features = ["derive"] }
2323
serde_json = { version = "1.0", optional = true }
2424
toml = "0.5.3"
2525
unicode-normalization = "0.1"
26+
unicode-script = { version = "0.5.3", default-features = false }
2627
semver = "0.11"
2728
rustc-semver = "1.1.0"
2829
# NOTE: cargo requires serde feat in its url dep
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
use clippy_utils::diagnostics::span_lint;
2+
use rustc_ast::ast;
3+
use rustc_data_structures::fx::FxHashSet;
4+
use rustc_lint::{EarlyContext, EarlyLintPass, Level};
5+
use rustc_session::{declare_tool_lint, impl_lint_pass};
6+
use unicode_script::{Script, UnicodeScript};
7+
8+
declare_clippy_lint! {
9+
/// **What it does:** Checks for usage of unicode scripts other than those explicitly allowed
10+
/// by the lint config.
11+
///
12+
/// This lint doesn't take into account non-text scripts such as `Unknown` and `Linear_A`.
13+
/// It also ignores the `Common` script type.
14+
/// While configuring, be sure to use official script name [aliases] from
15+
/// [the list of supported scripts][supported_scripts].
16+
///
17+
/// See also: [`non_ascii_idents`].
18+
///
19+
/// [aliases]: http://www.unicode.org/reports/tr24/tr24-31.html#Script_Value_Aliases
20+
/// [supported_scripts]: https://www.unicode.org/iso15924/iso15924-codes.html
21+
///
22+
/// **Why is this bad?** It may be not desired to have many different scripts for
23+
/// identifiers in the codebase.
24+
///
25+
/// Note that if you only want to allow plain English, you might want to use
26+
/// built-in [`non_ascii_idents`] lint instead.
27+
///
28+
/// [`non_ascii_idents`]: https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html#non-ascii-idents
29+
///
30+
/// **Known problems:** None.
31+
///
32+
/// **Example:**
33+
/// ```rust
34+
/// // Assuming that `clippy.toml` contains the following line:
35+
/// // allowed-locales = ["Latin", "Cyrillic"]
36+
/// let counter = 10; // OK, latin is allowed.
37+
/// let счётчик = 10; // OK, cyrillic is allowed.
38+
/// let zähler = 10; // OK, it's still latin.
39+
/// let カウンタ = 10; // Will spawn the lint.
40+
/// ```
41+
pub DISALLOWED_SCRIPT_IDENTS,
42+
restriction,
43+
"usage of non-allowed Unicode scripts"
44+
}
45+
46+
#[derive(Clone, Debug)]
47+
pub struct DisallowedScriptIdents {
48+
whitelist: FxHashSet<Script>,
49+
}
50+
51+
impl DisallowedScriptIdents {
52+
pub fn new(whitelist: &[String]) -> Self {
53+
let whitelist = whitelist
54+
.iter()
55+
.map(String::as_str)
56+
.filter_map(Script::from_full_name)
57+
.collect();
58+
Self { whitelist }
59+
}
60+
}
61+
62+
impl_lint_pass!(DisallowedScriptIdents => [DISALLOWED_SCRIPT_IDENTS]);
63+
64+
impl EarlyLintPass for DisallowedScriptIdents {
65+
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
66+
// Implementation is heavily inspired by the implementation of [`non_ascii_idents`] lint:
67+
// https://github.com/rust-lang/rust/blob/master/compiler/rustc_lint/src/non_ascii_idents.rs
68+
69+
let check_disallowed_script_idents = cx.builder.lint_level(DISALLOWED_SCRIPT_IDENTS).0 != Level::Allow;
70+
if !check_disallowed_script_idents {
71+
return;
72+
}
73+
74+
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
75+
// Sort by `Span` so that error messages make sense with respect to the
76+
// order of identifier locations in the code.
77+
let mut symbols: Vec<_> = symbols.iter().collect();
78+
symbols.sort_unstable_by_key(|k| k.1);
79+
80+
for (symbol, &span) in &symbols {
81+
// Note: `symbol.as_str()` is an expensive operation, thus should not be called
82+
// more than once for a single symbol.
83+
let symbol_str = symbol.as_str();
84+
if symbol_str.is_ascii() {
85+
continue;
86+
}
87+
88+
for c in symbol_str.chars() {
89+
// We want to iterate through all the scripts associated with this character
90+
// and check whether at least of one scripts is in the whitelist.
91+
let forbidden_script = c
92+
.script_extension()
93+
.iter()
94+
.find(|script| !self.whitelist.contains(script));
95+
if let Some(script) = forbidden_script {
96+
span_lint(
97+
cx,
98+
DISALLOWED_SCRIPT_IDENTS,
99+
span,
100+
&format!(
101+
"identifier `{}` has a Unicode script that is not allowed by configuration: {}",
102+
symbol_str,
103+
script.full_name()
104+
),
105+
);
106+
// We don't want to spawn warning multiple times over a single identifier.
107+
break;
108+
}
109+
}
110+
}
111+
}
112+
}

clippy_lints/src/lib.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ mod default_numeric_fallback;
187187
mod dereference;
188188
mod derive;
189189
mod disallowed_method;
190+
mod disallowed_script_idents;
190191
mod disallowed_type;
191192
mod doc;
192193
mod double_comparison;
@@ -585,6 +586,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
585586
derive::EXPL_IMPL_CLONE_ON_COPY,
586587
derive::UNSAFE_DERIVE_DESERIALIZE,
587588
disallowed_method::DISALLOWED_METHOD,
589+
disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS,
588590
disallowed_type::DISALLOWED_TYPE,
589591
doc::DOC_MARKDOWN,
590592
doc::MISSING_ERRORS_DOC,
@@ -995,6 +997,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
995997
LintId::of(create_dir::CREATE_DIR),
996998
LintId::of(dbg_macro::DBG_MACRO),
997999
LintId::of(default_numeric_fallback::DEFAULT_NUMERIC_FALLBACK),
1000+
LintId::of(disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS),
9981001
LintId::of(else_if_without_else::ELSE_IF_WITHOUT_ELSE),
9991002
LintId::of(exhaustive_items::EXHAUSTIVE_ENUMS),
10001003
LintId::of(exhaustive_items::EXHAUSTIVE_STRUCTS),
@@ -2082,7 +2085,8 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
20822085
store.register_late_pass(move || box disallowed_type::DisallowedType::new(&disallowed_types));
20832086
let import_renames = conf.enforced_import_renames.clone();
20842087
store.register_late_pass(move || box missing_enforced_import_rename::ImportRename::new(import_renames.clone()));
2085-
2088+
let scripts = conf.allowed_scripts.clone();
2089+
store.register_early_pass(move || box disallowed_script_idents::DisallowedScriptIdents::new(&scripts));
20862090
}
20872091

20882092
#[rustfmt::skip]

clippy_lints/src/utils/conf.rs

+2
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ define_Conf! {
212212
(standard_macro_braces: Vec<crate::nonstandard_macro_braces::MacroMatcher> = Vec::new()),
213213
/// Lint: MISSING_ENFORCED_IMPORT_RENAMES. The list of imports to always rename, a fully qualified path followed by the rename.
214214
(enforced_import_renames: Vec<crate::utils::conf::Rename> = Vec::new()),
215+
/// Lint: RESTRICTED_SCRIPTS. The list of unicode scripts allowed to be used in the scope.
216+
(allowed_scripts: Vec<String> = vec!["Latin".to_string()]),
215217
}
216218

217219
/// Search for the configuration file.
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `third-party` at line 5 column 1
1+
error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `allowed-scripts`, `third-party` at line 5 column 1
22

33
error: aborting due to previous error
44

tests/ui/disallowed_script_idents.rs

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#![deny(clippy::disallowed_script_idents)]
2+
#![allow(dead_code)]
3+
4+
fn main() {
5+
let counter = 10; // OK, latin is allowed.
6+
let zähler = 10; // OK, it's still latin.
7+
8+
let счётчик = 10; // Cyrillic is not allowed by default.
9+
let カウンタ = 10; // Same for japanese.
10+
}
+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
error: identifier `счётчик` has a Unicode script that is not allowed by configuration: Cyrillic
2+
--> $DIR/disallowed_script_idents.rs:8:9
3+
|
4+
LL | let счётчик = 10; // Cyrillic is not allowed by default.
5+
| ^^^^^^^
6+
|
7+
note: the lint level is defined here
8+
--> $DIR/disallowed_script_idents.rs:1:9
9+
|
10+
LL | #![deny(clippy::disallowed_script_idents)]
11+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
12+
13+
error: identifier `カウンタ` has a Unicode script that is not allowed by configuration: Katakana
14+
--> $DIR/disallowed_script_idents.rs:9:9
15+
|
16+
LL | let カウンタ = 10; // Same for japanese.
17+
| ^^^^^^^^
18+
19+
error: aborting due to 2 previous errors
20+

0 commit comments

Comments
 (0)