Skip to content

Commit ef9e796

Browse files
authored
Fix span relocation (#14)
* fit span relocation previously when it encounters a grapheme cluster it should find the floor/ceil position of the continuation byte to index into the span string this uses a copy of two unstable functions from std: `str::floor_char_boundary` `str::ceil_char_boundary` which are tracked here: rust-lang/rust#93743 when this becomes stable, we can just rely on std. but for now they are fine. (even if they are obviously inefficient) * bump version
1 parent f8d58d1 commit ef9e796

File tree

4 files changed

+61
-7
lines changed

4 files changed

+61
-7
lines changed

Cargo.lock

Lines changed: 9 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ccs"
3-
version = "0.9.0"
3+
version = "0.9.1"
44
edition = "2021"
55
license = "0BSD"
66
repository = "https://github.com/museun/ccs"
@@ -14,4 +14,5 @@ indoc = "2.0.5"
1414
owo-colors = { version = "3.5.0", features = [ "supports-colors" ] }
1515
serde = { version = "1.0.213", features = [ "derive" ] }
1616
serde_json = "1.0.132"
17+
str_indices = "0.4.4"
1718
toml = "0.7.8"

src/parse/span.rs

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,13 @@ impl Span {
2222
if matches!(render_options.render, RenderStyle::Full) {
2323
use owo_colors::OwoColorize as _;
2424
self.relocate().try_for_each(|(start, end, text)| {
25+
let start = floor_char_boundary(text, start);
26+
let end = ceil_char_boundary(text, end);
27+
2528
let head = &text[..start];
2629
let mid = &text[start..end];
2730
let tail = &text[end..];
31+
2832
writeln!(
2933
out,
3034
" {head}{mid}{tail}",
@@ -68,14 +72,56 @@ impl Span {
6872
left_pad = span.text.len() - s.len();
6973
}
7074

75+
let start = span.highlight_start.saturating_sub(left_pad + 1);
76+
let end = span.highlight_end.saturating_sub(left_pad + 1);
77+
78+
let start = str_indices::chars::from_byte_idx(&span.text, start);
79+
let end = str_indices::chars::from_byte_idx(&span.text, end);
80+
7181
// error messages are 1 indexed
7282
break Some((
73-
span.highlight_start.saturating_sub(left_pad + 1),
74-
span.highlight_end.saturating_sub(left_pad + 1),
83+
start,
84+
end,
7585
// TODO use unicode-segmentation here
86+
// what does this mean? how would segmentation be applicable here?
7687
&span.text[left_pad..],
7788
));
7889
}
7990
})
8091
}
8192
}
93+
94+
// NOTE this is taken from <https://github.com/rust-lang/rust/issues/93743>
95+
// TODO its currently unstable but its fine for what we need
96+
fn floor_char_boundary(str: &str, index: usize) -> usize {
97+
if index >= str.len() {
98+
str.len()
99+
} else {
100+
let lower_bound = index.saturating_sub(3);
101+
let new_index = str.as_bytes()[lower_bound..=index]
102+
.iter()
103+
.rposition(|&b| is_utf8_char_boundary(b));
104+
105+
lower_bound + new_index.unwrap()
106+
}
107+
}
108+
109+
// NOTE this is taken from <https://github.com/rust-lang/rust/issues/93743>
110+
// TODO its currently unstable but its fine for what we need
111+
fn ceil_char_boundary(str: &str, index: usize) -> usize {
112+
if index > str.len() {
113+
str.len()
114+
} else {
115+
let upper_bound = Ord::min(index + 4, str.len());
116+
str.as_bytes()[index..upper_bound]
117+
.iter()
118+
.position(|&b| is_utf8_char_boundary(b))
119+
.map_or(upper_bound, |pos| pos + index)
120+
}
121+
}
122+
123+
// NOTE impl detail of `u8::is_utf8_char_boundary` used by `floor_char_boundary` and `ceil_char_boundary`
124+
const fn is_utf8_char_boundary(byte: u8) -> bool {
125+
// This is bit magic equivalent to: b < 128 || b >= 192
126+
(byte as i8) >= -0x40
127+
}

src/parse/text.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#[derive(Debug, serde::Deserialize)]
22
pub struct Text {
3-
pub highlight_start: usize,
4-
pub highlight_end: usize,
3+
pub highlight_start: usize, // are these byte or grapheme indices?
4+
pub highlight_end: usize, // are these byte or grapheme indices?
55
pub text: String,
66
}

0 commit comments

Comments
 (0)