Skip to content

Commit 4c6fe4b

Browse files
authored
Merge pull request #7134 from readthedocs/fix-permalink
Search: don't index permalinks
2 parents e108b70 + 5cd1931 commit 4c6fe4b

File tree

2 files changed

+15
-7
lines changed

2 files changed

+15
-7
lines changed

readthedocs/search/parse_json.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def generate_page_sections(page_title, body, fjson_storage_path):
6161
for head_level in range(1, 7):
6262
tags = body.css(f'.section > h{head_level}')
6363
for tag in tags:
64-
title = tag.text().replace('¶', '').strip()
64+
title = _parse_title(tag)
6565

6666
div = tag.parent
6767
section_id = div.attributes.get('id', '')
@@ -196,21 +196,29 @@ def _get_text_for_domain_data(desc):
196196
return docstrings
197197

198198

199-
def parse_content(content, remove_first_line=False):
199+
def parse_content(content):
200200
"""Removes new line characters and ¶."""
201201
content = content.replace('¶', '').strip()
202202
content = content.split('\n')
203203

204-
# removing the starting text of each
205-
if remove_first_line and len(content) > 1:
206-
content = content[1:]
207-
208204
# Convert all new lines to " "
209205
content = (text.strip() for text in content)
210206
content = ' '.join(text for text in content if text)
211207
return content
212208

213209

210+
def _parse_title(tag):
211+
"""
212+
Parses a Sphinx title tag.
213+
214+
- Removes the permalink value
215+
"""
216+
nodes_to_be_removed = tag.css('a.headerlink')
217+
for node in nodes_to_be_removed:
218+
node.decompose()
219+
return tag.text().strip()
220+
221+
214222
def process_mkdocs_index_file(json_storage_path, page):
215223
"""Reads the json index file and parses it into a structured dict."""
216224
log.debug('Processing JSON index file: %s', json_storage_path)

readthedocs/search/tests/data/sphinx/in/page.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ <h1>Title One<a class="headerlink" href="#title-one" title="Permalink to this he
1111
<p>This is another H1 title.</p>
1212

1313
<div class="section" id="sub-title-one">
14-
<h2>Sub-title one<a class="headerlink" href="#sub-title-one" title="Permalink to this headline"></a></h2>
14+
<h2>Sub-title one<a class="headerlink" href="#sub-title-one" title="Permalink to this headline">§</a></h2>
1515
<p>Sub title</p>
1616

1717
<div class="section" id="subsub-title">

0 commit comments

Comments
 (0)