Skip to content

Commit d4dd96d

Browse files
authored
red-knot: source_text, line_index, and parsed_module queries (#11822)
1 parent efbf7b1 commit d4dd96d

File tree

9 files changed

+193
-15
lines changed

9 files changed

+193
-15
lines changed

Cargo.lock

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/ruff_db/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ repository = { workspace = true }
1111
license = { workspace = true }
1212

1313
[dependencies]
14+
ruff_python_ast = { workspace = true }
15+
ruff_python_parser = { workspace = true }
16+
ruff_source_file = { workspace = true }
17+
ruff_text_size = { workspace = true }
18+
1419
camino = { workspace = true }
1520
countme = { workspace = true }
1621
dashmap = { workspace = true }

crates/ruff_db/src/file_system.rs

+27-1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,31 @@ impl FileSystemPath {
4848
unsafe { &*(path as *const Utf8Path as *const FileSystemPath) }
4949
}
5050

51+
/// Extracts the file extension, if possible.
52+
///
53+
/// The extension is:
54+
///
55+
/// * [`None`], if there is no file name;
56+
/// * [`None`], if there is no embedded `.`;
57+
/// * [`None`], if the file name begins with `.` and has no other `.`s within;
58+
/// * Otherwise, the portion of the file name after the final `.`
59+
///
60+
/// # Examples
61+
///
62+
/// ```
63+
/// use ruff_db::file_system::FileSystemPath;
64+
///
65+
/// assert_eq!("rs", FileSystemPath::new("foo.rs").extension().unwrap());
66+
/// assert_eq!("gz", FileSystemPath::new("foo.tar.gz").extension().unwrap());
67+
/// ```
68+
///
69+
/// See [`Path::extension`] for more details.
70+
#[inline]
71+
#[must_use]
72+
pub fn extension(&self) -> Option<&str> {
73+
self.0.extension()
74+
}
75+
5176
/// Converts the path to an owned [`FileSystemPathBuf`].
5277
pub fn to_path_buf(&self) -> FileSystemPathBuf {
5378
FileSystemPathBuf(self.0.to_path_buf())
@@ -251,9 +276,10 @@ impl FileType {
251276

252277
#[cfg(test)]
253278
mod tests {
254-
use crate::file_system::FileRevision;
255279
use filetime::FileTime;
256280

281+
use crate::file_system::FileRevision;
282+
257283
#[test]
258284
fn revision_from_file_time() {
259285
let file_time = FileTime::now();

crates/ruff_db/src/lib.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,19 @@ use rustc_hash::FxHasher;
44
use salsa::DbWithJar;
55

66
use crate::file_system::{FileSystem, FileSystemPath};
7+
use crate::parsed::parsed_module;
8+
use crate::source::{line_index, source_text};
79
use crate::vfs::{VendoredPath, Vfs, VfsFile};
810

911
pub mod file_system;
12+
pub mod parsed;
13+
pub mod source;
1014
pub mod vfs;
1115

1216
pub(crate) type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;
1317

1418
#[salsa::jar(db=Db)]
15-
pub struct Jar(VfsFile);
19+
pub struct Jar(VfsFile, source_text, line_index, parsed_module);
1620

1721
/// Database that gives access to the virtual filesystem, source code, and parsed AST.
1822
pub trait Db: DbWithJar<Jar> {

crates/ruff_db/src/parsed.rs

+126
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
use std::fmt::Formatter;
2+
use std::ops::Deref;
3+
use std::sync::Arc;
4+
5+
use ruff_python_ast::{ModModule, PySourceType};
6+
use ruff_python_parser::{parse_unchecked_source, Parsed};
7+
8+
use crate::source::source_text;
9+
use crate::vfs::{VfsFile, VfsPath};
10+
use crate::Db;
11+
12+
/// Returns the parsed AST of `file`, including its token stream.
13+
///
14+
/// The query uses Ruff's error-resilient parser. That means that the parser always succeeds to produce a
15+
/// AST even if the file contains syntax errors. The parse errors
16+
/// are then accessible through [`Parsed::errors`].
17+
///
18+
/// The query is only cached when the [`source_text()`] hasn't changed. This is because
19+
/// comparing two ASTs is a non-trivial operation and every offset change is directly
20+
/// reflected in the changed AST offsets.
21+
/// The other reason is that Ruff's AST doesn't implement `Eq` which Sala requires
22+
/// for determining if a query result is unchanged.
23+
#[salsa::tracked(return_ref, no_eq)]
24+
pub fn parsed_module(db: &dyn Db, file: VfsFile) -> ParsedModule {
25+
let source = source_text(db, file);
26+
let path = file.path(db);
27+
28+
let ty = match path {
29+
VfsPath::FileSystem(path) => path
30+
.extension()
31+
.map_or(PySourceType::Python, PySourceType::from_extension),
32+
VfsPath::Vendored(_) => PySourceType::Stub,
33+
};
34+
35+
ParsedModule {
36+
inner: Arc::new(parse_unchecked_source(&source, ty)),
37+
}
38+
}
39+
40+
/// Cheap cloneable wrapper around the parsed module.
41+
#[derive(Clone, PartialEq)]
42+
pub struct ParsedModule {
43+
inner: Arc<Parsed<ModModule>>,
44+
}
45+
46+
impl ParsedModule {
47+
/// Consumes `self` and returns the Arc storing the parsed module.
48+
pub fn into_arc(self) -> Arc<Parsed<ModModule>> {
49+
self.inner
50+
}
51+
}
52+
53+
impl Deref for ParsedModule {
54+
type Target = Parsed<ModModule>;
55+
56+
fn deref(&self) -> &Self::Target {
57+
&self.inner
58+
}
59+
}
60+
61+
impl std::fmt::Debug for ParsedModule {
62+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
63+
f.debug_tuple("ParsedModule").field(&self.inner).finish()
64+
}
65+
}
66+
67+
#[cfg(test)]
68+
mod tests {
69+
use crate::file_system::FileSystemPath;
70+
use crate::parsed::parsed_module;
71+
use crate::tests::TestDb;
72+
use crate::vfs::VendoredPath;
73+
use crate::Db;
74+
75+
#[test]
76+
fn python_file() {
77+
let mut db = TestDb::new();
78+
let path = FileSystemPath::new("test.py");
79+
80+
db.file_system_mut().write_file(path, "x = 10".to_string());
81+
82+
let file = db.file(path);
83+
84+
let parsed = parsed_module(&db, file);
85+
86+
assert!(parsed.is_valid());
87+
}
88+
89+
#[test]
90+
fn python_ipynb_file() {
91+
let mut db = TestDb::new();
92+
let path = FileSystemPath::new("test.ipynb");
93+
94+
db.file_system_mut()
95+
.write_file(path, "%timeit a = b".to_string());
96+
97+
let file = db.file(path);
98+
99+
let parsed = parsed_module(&db, file);
100+
101+
assert!(parsed.is_valid());
102+
}
103+
104+
#[test]
105+
fn vendored_file() {
106+
let mut db = TestDb::new();
107+
db.vfs_mut().stub_vendored([(
108+
"path.pyi",
109+
r#"
110+
import sys
111+
112+
if sys.platform == "win32":
113+
from ntpath import *
114+
from ntpath import __all__ as __all__
115+
else:
116+
from posixpath import *
117+
from posixpath import __all__ as __all__"#,
118+
)]);
119+
120+
let file = db.vendored_file(VendoredPath::new("path.pyi")).unwrap();
121+
122+
let parsed = parsed_module(&db, file);
123+
124+
assert!(parsed.is_valid());
125+
}
126+
}

crates/ruff_db/src/source.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,10 @@ mod tests {
9696
// Change the file permission only
9797
file.set_permissions(&mut db).to(Some(0o777));
9898

99-
db.events().lock().unwrap().clear();
99+
db.clear_salsa_events();
100100
assert_eq!(&*source_text(&db, file), "x = 10");
101101

102-
let events = db.events();
103-
let events = events.lock().unwrap();
102+
let events = db.take_salsa_events();
104103

105104
assert!(!events
106105
.iter()

crates/ruff_python_ast/src/lib.rs

+19-6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::ffi::OsStr;
12
use std::path::Path;
23

34
pub use expression::*;
@@ -80,13 +81,25 @@ pub enum PySourceType {
8081
Ipynb,
8182
}
8283

84+
impl PySourceType {
85+
/// Infers the source type from the file extension.
86+
///
87+
/// Falls back to `Python` if the extension is not recognized.
88+
pub fn from_extension(extension: &str) -> Self {
89+
match extension {
90+
"py" => Self::Python,
91+
"pyi" => Self::Stub,
92+
"ipynb" => Self::Ipynb,
93+
_ => Self::Python,
94+
}
95+
}
96+
}
97+
8398
impl<P: AsRef<Path>> From<P> for PySourceType {
8499
fn from(path: P) -> Self {
85-
match path.as_ref().extension() {
86-
Some(ext) if ext == "py" => PySourceType::Python,
87-
Some(ext) if ext == "pyi" => PySourceType::Stub,
88-
Some(ext) if ext == "ipynb" => PySourceType::Ipynb,
89-
_ => PySourceType::Python,
90-
}
100+
path.as_ref()
101+
.extension()
102+
.and_then(OsStr::to_str)
103+
.map_or(Self::Python, Self::from_extension)
91104
}
92105
}

crates/ruff_python_parser/src/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed
234234
}
235235

236236
/// Represents the parsed source code.
237-
#[derive(Debug, Clone)]
237+
#[derive(Debug, PartialEq, Clone)]
238238
pub struct Parsed<T> {
239239
syntax: T,
240240
tokens: Tokens,
@@ -361,7 +361,7 @@ impl Parsed<ModExpression> {
361361
}
362362

363363
/// Tokens represents a vector of lexed [`Token`].
364-
#[derive(Debug, Clone)]
364+
#[derive(Debug, Clone, PartialEq, Eq)]
365365
pub struct Tokens {
366366
raw: Vec<Token>,
367367

crates/ruff_source_file/src/line_index.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ use crate::SourceLocation;
1414
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
1515
///
1616
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
17-
#[derive(Clone)]
17+
#[derive(Clone, Eq, PartialEq)]
1818
pub struct LineIndex {
1919
inner: Arc<LineIndexInner>,
2020
}
2121

22+
#[derive(Eq, PartialEq)]
2223
struct LineIndexInner {
2324
line_starts: Vec<TextSize>,
2425
kind: IndexKind,
@@ -268,7 +269,7 @@ impl Debug for LineIndex {
268269
}
269270
}
270271

271-
#[derive(Debug, Clone, Copy)]
272+
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
272273
enum IndexKind {
273274
/// Optimized index for an ASCII only document
274275
Ascii,

0 commit comments

Comments
 (0)