Skip to content

Commit 5ca1db9

Browse files
committed
Split single sitemap into index and sub-sitemaps per starting character
1 parent 743bbf6 commit 5ca1db9

File tree

3 files changed

+53
-7
lines changed

3 files changed

+53
-7
lines changed

src/web/routes.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@ pub(super) fn build_routes() -> Routes {
1616
// https://support.google.com/webmasters/answer/183668?hl=en
1717
routes.static_resource("/robots.txt", PermanentRedirect("/-/static/robots.txt"));
1818
routes.static_resource("/favicon.ico", PermanentRedirect("/-/static/favicon.ico"));
19-
routes.static_resource("/sitemap.xml", super::sitemap::sitemap_handler);
19+
routes.static_resource("/sitemap.xml", super::sitemap::sitemapindex_handler);
20+
routes.static_resource(
21+
"/-/sitemap/:which/sitemap.xml",
22+
super::sitemap::sitemap_handler,
23+
);
2024

2125
// This should not need to be served from the root as we reference the inner path in links,
2226
// but clients might have cached the url and need to update it.

src/web/sitemap.rs

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,27 @@ use iron::{
55
mime::{Mime, SubLevel, TopLevel},
66
IronResult, Request, Response,
77
};
8+
use router::Router;
89
use serde::Serialize;
910
use serde_json::Value;
1011

12+
/// sitemap index
13+
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
14+
struct SitemapIndexXml {
15+
sitemaps: Vec<char>,
16+
}
17+
18+
impl_webpage! {
19+
SitemapIndexXml = "core/sitemapindex.xml",
20+
content_type = ContentType(Mime(TopLevel::Application, SubLevel::Xml, vec![])),
21+
}
22+
23+
pub fn sitemapindex_handler(req: &mut Request) -> IronResult<Response> {
24+
let sitemaps: Vec<char> = (b'a'..=b'z').map(char::from).collect();
25+
26+
SitemapIndexXml { sitemaps }.into_response(req)
27+
}
28+
1129
/// The sitemap
1230
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
1331
struct SitemapXml {
@@ -21,16 +39,30 @@ impl_webpage! {
2139
}
2240

2341
pub fn sitemap_handler(req: &mut Request) -> IronResult<Response> {
42+
let router = extension!(req, Router);
43+
let which = cexpect!(req, router.find("which")).to_lowercase();
44+
2445
let mut conn = extension!(req, Pool).get()?;
2546
let query = conn
2647
.query(
27-
"SELECT DISTINCT ON (crates.name)
28-
crates.name,
29-
releases.release_time
48+
"SELECT crates.name,
49+
MAX(releases.release_time) as release_time
3050
FROM crates
3151
INNER JOIN releases ON releases.crate_id = crates.id
32-
WHERE rustdoc_status = true",
33-
&[],
52+
WHERE
53+
rustdoc_status = true AND
54+
(
55+
crates.name like $1 OR
56+
crates.name like $2
57+
)
58+
GROUP BY crates.name
59+
",
60+
&[
61+
// this LIKE pattern has the '%' only at the end,
62+
// so postgres can use the index on `name`
63+
&format!("{}%", which),
64+
&format!("{}%", which.to_uppercase()),
65+
],
3466
)
3567
.unwrap();
3668

@@ -127,13 +159,15 @@ mod tests {
127159
wrapper(|env| {
128160
let web = env.frontend();
129161
assert_success("/sitemap.xml", web)?;
162+
assert_success("/-/sitemap/s/sitemap.xml", web)?;
130163

131164
env.fake_release().name("some_random_crate").create()?;
132165
env.fake_release()
133166
.name("some_random_crate_that_failed")
134167
.build_result_successful(false)
135168
.create()?;
136-
assert_success("/sitemap.xml", web)
169+
assert_success("/sitemap.xml", web)?;
170+
assert_success("/-/sitemap/s/sitemap.xml", web)
137171
})
138172
}
139173

templates/core/sitemapindex.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
3+
{% for which in sitemaps -%}
4+
<sitemap>
5+
<loc>https://docs.rs/-/sitemap/{{ which }}/sitemap.xml</loc>
6+
</sitemap>
7+
{%- endfor %}
8+
</sitemapindex>

0 commit comments

Comments
 (0)