Skip to content

Commit fd85ca0

Browse files
committed
Auto merge of #80550 - bugadani:markdown-refactor, r=jyn514
Cleanup markdown span handling, take 2 This PR includes the cleanups made in #80244 except for the removal of `locate()`. While the biggest conceptual part in #80244 was the removal of `locate()`, it introduced a diagnostic regression. Additional cleanup: - Use `RefCell` to avoid building two separate vectors for the links Work to do: - [ ] Decide if `locate()` can be simplified by assuming `s` is always in `md` - [ ] Should probably add some tests that still provide the undesired diagnostics causing #80381 cc `@jyn514` This is the best I can do without patching Pulldown to provide multiple ranges for reference-style links. Also, since `locate` is probably more efficient than `rfind` (at least it's constant time), I decided to not check the link type and just cover every &str as it was before.
2 parents 90ccf4f + f073543 commit fd85ca0

File tree

2 files changed

+111
-103
lines changed

2 files changed

+111
-103
lines changed

src/librustdoc/html/markdown.rs

+84-71
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use rustc_session::lint;
2525
use rustc_span::edition::Edition;
2626
use rustc_span::Span;
2727
use std::borrow::Cow;
28+
use std::cell::RefCell;
2829
use std::collections::VecDeque;
2930
use std::default::Default;
3031
use std::fmt::Write;
@@ -414,11 +415,13 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
414415
}
415416
}
416417

418+
type SpannedEvent<'a> = (Event<'a>, Range<usize>);
419+
417420
/// Make headings links with anchor IDs and build up TOC.
418421
struct HeadingLinks<'a, 'b, 'ids, I> {
419422
inner: I,
420423
toc: Option<&'b mut TocBuilder>,
421-
buf: VecDeque<Event<'a>>,
424+
buf: VecDeque<SpannedEvent<'a>>,
422425
id_map: &'ids mut IdMap,
423426
}
424427

@@ -428,48 +431,48 @@ impl<'a, 'b, 'ids, I> HeadingLinks<'a, 'b, 'ids, I> {
428431
}
429432
}
430433

431-
impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a, 'b, 'ids, I> {
432-
type Item = Event<'a>;
434+
impl<'a, 'b, 'ids, I: Iterator<Item = SpannedEvent<'a>>> Iterator
435+
for HeadingLinks<'a, 'b, 'ids, I>
436+
{
437+
type Item = SpannedEvent<'a>;
433438

434439
fn next(&mut self) -> Option<Self::Item> {
435440
if let Some(e) = self.buf.pop_front() {
436441
return Some(e);
437442
}
438443

439444
let event = self.inner.next();
440-
if let Some(Event::Start(Tag::Heading(level))) = event {
445+
if let Some((Event::Start(Tag::Heading(level)), _)) = event {
441446
let mut id = String::new();
442447
for event in &mut self.inner {
443-
match &event {
448+
match &event.0 {
444449
Event::End(Tag::Heading(..)) => break,
450+
Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
445451
Event::Text(text) | Event::Code(text) => {
446452
id.extend(text.chars().filter_map(slugify));
453+
self.buf.push_back(event);
447454
}
448-
_ => {}
449-
}
450-
match event {
451-
Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
452-
event => self.buf.push_back(event),
455+
_ => self.buf.push_back(event),
453456
}
454457
}
455458
let id = self.id_map.derive(id);
456459

457460
if let Some(ref mut builder) = self.toc {
458461
let mut html_header = String::new();
459-
html::push_html(&mut html_header, self.buf.iter().cloned());
462+
html::push_html(&mut html_header, self.buf.iter().map(|(ev, _)| ev.clone()));
460463
let sec = builder.push(level as u32, html_header, id.clone());
461-
self.buf.push_front(Event::Html(format!("{} ", sec).into()));
464+
self.buf.push_front((Event::Html(format!("{} ", sec).into()), 0..0));
462465
}
463466

464-
self.buf.push_back(Event::Html(format!("</a></h{}>", level).into()));
467+
self.buf.push_back((Event::Html(format!("</a></h{}>", level).into()), 0..0));
465468

466469
let start_tags = format!(
467470
"<h{level} id=\"{id}\" class=\"section-header\">\
468471
<a href=\"#{id}\">",
469472
id = id,
470473
level = level
471474
);
472-
return Some(Event::Html(start_tags.into()));
475+
return Some((Event::Html(start_tags.into()), 0..0));
473476
}
474477
event
475478
}
@@ -555,23 +558,23 @@ impl<'a, I> Footnotes<'a, I> {
555558
}
556559
}
557560

558-
impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
559-
type Item = Event<'a>;
561+
impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for Footnotes<'a, I> {
562+
type Item = SpannedEvent<'a>;
560563

561564
fn next(&mut self) -> Option<Self::Item> {
562565
loop {
563566
match self.inner.next() {
564-
Some(Event::FootnoteReference(ref reference)) => {
567+
Some((Event::FootnoteReference(ref reference), range)) => {
565568
let entry = self.get_entry(&reference);
566569
let reference = format!(
567570
"<sup id=\"fnref{0}\"><a href=\"#fn{0}\">{0}</a></sup>",
568571
(*entry).1
569572
);
570-
return Some(Event::Html(reference.into()));
573+
return Some((Event::Html(reference.into()), range));
571574
}
572-
Some(Event::Start(Tag::FootnoteDefinition(def))) => {
575+
Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => {
573576
let mut content = Vec::new();
574-
for event in &mut self.inner {
577+
for (event, _) in &mut self.inner {
575578
if let Event::End(Tag::FootnoteDefinition(..)) = event {
576579
break;
577580
}
@@ -602,7 +605,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
602605
ret.push_str("</li>");
603606
}
604607
ret.push_str("</ol></div>");
605-
return Some(Event::Html(ret.into()));
608+
return Some((Event::Html(ret.into()), 0..0));
606609
} else {
607610
return None;
608611
}
@@ -912,13 +915,14 @@ impl Markdown<'_> {
912915
};
913916

914917
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut replacer));
918+
let p = p.into_offset_iter();
915919

916920
let mut s = String::with_capacity(md.len() * 3 / 2);
917921

918922
let p = HeadingLinks::new(p, None, &mut ids);
919-
let p = LinkReplacer::new(p, links);
920-
let p = CodeBlocks::new(p, codes, edition, playground);
921923
let p = Footnotes::new(p);
924+
let p = LinkReplacer::new(p.map(|(ev, _)| ev), links);
925+
let p = CodeBlocks::new(p, codes, edition, playground);
922926
html::push_html(&mut s, p);
923927

924928
s
@@ -929,16 +933,16 @@ impl MarkdownWithToc<'_> {
929933
crate fn into_string(self) -> String {
930934
let MarkdownWithToc(md, mut ids, codes, edition, playground) = self;
931935

932-
let p = Parser::new_ext(md, opts());
936+
let p = Parser::new_ext(md, opts()).into_offset_iter();
933937

934938
let mut s = String::with_capacity(md.len() * 3 / 2);
935939

936940
let mut toc = TocBuilder::new();
937941

938942
{
939943
let p = HeadingLinks::new(p, Some(&mut toc), &mut ids);
940-
let p = CodeBlocks::new(p, codes, edition, playground);
941944
let p = Footnotes::new(p);
945+
let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
942946
html::push_html(&mut s, p);
943947
}
944948

@@ -954,19 +958,19 @@ impl MarkdownHtml<'_> {
954958
if md.is_empty() {
955959
return String::new();
956960
}
957-
let p = Parser::new_ext(md, opts());
961+
let p = Parser::new_ext(md, opts()).into_offset_iter();
958962

959963
// Treat inline HTML as plain text.
960-
let p = p.map(|event| match event {
961-
Event::Html(text) => Event::Text(text),
964+
let p = p.map(|event| match event.0 {
965+
Event::Html(text) => (Event::Text(text), event.1),
962966
_ => event,
963967
});
964968

965969
let mut s = String::with_capacity(md.len() * 3 / 2);
966970

967971
let p = HeadingLinks::new(p, None, &mut ids);
968-
let p = CodeBlocks::new(p, codes, edition, playground);
969972
let p = Footnotes::new(p);
973+
let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
970974
html::push_html(&mut s, p);
971975

972976
s
@@ -1119,56 +1123,65 @@ crate fn plain_text_summary(md: &str) -> String {
11191123
s
11201124
}
11211125

1122-
crate fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
1126+
crate fn markdown_links(md: &str) -> Vec<(String, Range<usize>)> {
11231127
if md.is_empty() {
11241128
return vec![];
11251129
}
11261130

1127-
let mut links = vec![];
1128-
let mut shortcut_links = vec![];
1129-
1130-
{
1131-
let locate = |s: &str| unsafe {
1132-
let s_start = s.as_ptr();
1133-
let s_end = s_start.add(s.len());
1134-
let md_start = md.as_ptr();
1135-
let md_end = md_start.add(md.len());
1136-
if md_start <= s_start && s_end <= md_end {
1137-
let start = s_start.offset_from(md_start) as usize;
1138-
let end = s_end.offset_from(md_start) as usize;
1139-
Some(start..end)
1140-
} else {
1141-
None
1142-
}
1143-
};
1131+
let links = RefCell::new(vec![]);
1132+
1133+
// FIXME: remove this function once pulldown_cmark can provide spans for link definitions.
1134+
let locate = |s: &str, fallback: Range<usize>| unsafe {
1135+
let s_start = s.as_ptr();
1136+
let s_end = s_start.add(s.len());
1137+
let md_start = md.as_ptr();
1138+
let md_end = md_start.add(md.len());
1139+
if md_start <= s_start && s_end <= md_end {
1140+
let start = s_start.offset_from(md_start) as usize;
1141+
let end = s_end.offset_from(md_start) as usize;
1142+
start..end
1143+
} else {
1144+
fallback
1145+
}
1146+
};
1147+
1148+
let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
1149+
// For diagnostics, we want to underline the link's definition but `span` will point at
1150+
// where the link is used. This is a problem for reference-style links, where the definition
1151+
// is separate from the usage.
1152+
match link {
1153+
// `Borrowed` variant means the string (the link's destination) may come directly from
1154+
// the markdown text and we can locate the original link destination.
1155+
// NOTE: LinkReplacer also provides `Borrowed` but possibly from other sources,
1156+
// so `locate()` can fall back to use `span`.
1157+
CowStr::Borrowed(s) => locate(s, span),
1158+
1159+
// For anything else, we can only use the provided range.
1160+
CowStr::Boxed(_) | CowStr::Inlined(_) => span,
1161+
}
1162+
};
11441163

1145-
let mut push = |link: BrokenLink<'_>| {
1146-
// FIXME: use `link.span` instead of `locate`
1147-
// (doing it now includes the `[]` as well as the text)
1148-
shortcut_links.push((link.reference.to_owned(), locate(link.reference)));
1149-
None
1150-
};
1151-
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push));
1152-
1153-
// There's no need to thread an IdMap through to here because
1154-
// the IDs generated aren't going to be emitted anywhere.
1155-
let mut ids = IdMap::new();
1156-
let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids));
1157-
1158-
for ev in iter {
1159-
if let Event::Start(Tag::Link(_, dest, _)) = ev {
1160-
debug!("found link: {}", dest);
1161-
links.push(match dest {
1162-
CowStr::Borrowed(s) => (s.to_owned(), locate(s)),
1163-
s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), None),
1164-
});
1165-
}
1164+
let mut push = |link: BrokenLink<'_>| {
1165+
let span = span_for_link(&CowStr::Borrowed(link.reference), link.span);
1166+
links.borrow_mut().push((link.reference.to_owned(), span));
1167+
None
1168+
};
1169+
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)).into_offset_iter();
1170+
1171+
// There's no need to thread an IdMap through to here because
1172+
// the IDs generated aren't going to be emitted anywhere.
1173+
let mut ids = IdMap::new();
1174+
let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids));
1175+
1176+
for ev in iter {
1177+
if let Event::Start(Tag::Link(_, dest, _)) = ev.0 {
1178+
debug!("found link: {}", dest);
1179+
let span = span_for_link(&dest, ev.1);
1180+
links.borrow_mut().push((dest.into_string(), span));
11661181
}
11671182
}
11681183

1169-
links.append(&mut shortcut_links);
1170-
1171-
links
1184+
links.into_inner()
11721185
}
11731186

11741187
#[derive(Debug)]

0 commit comments

Comments
 (0)