diff --git a/src/web/routes.rs b/src/web/routes.rs index 44b808471..dbe888bd1 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -16,7 +16,11 @@ pub(super) fn build_routes() -> Routes { // https://support.google.com/webmasters/answer/183668?hl=en routes.static_resource("/robots.txt", PermanentRedirect("/-/static/robots.txt")); routes.static_resource("/favicon.ico", PermanentRedirect("/-/static/favicon.ico")); - routes.static_resource("/sitemap.xml", super::sitemap::sitemap_handler); + routes.static_resource("/sitemap.xml", super::sitemap::sitemapindex_handler); + routes.static_resource( + "/-/sitemap/:letter/sitemap.xml", + super::sitemap::sitemap_handler, + ); // This should not need to be served from the root as we reference the inner path in links, // but clients might have cached the url and need to update it. diff --git a/src/web/sitemap.rs b/src/web/sitemap.rs index 8c6ffa8c9..c08b167e0 100644 --- a/src/web/sitemap.rs +++ b/src/web/sitemap.rs @@ -1,13 +1,31 @@ -use crate::{db::Pool, docbuilder::Limits, impl_webpage, web::page::WebPage}; +use crate::{db::Pool, docbuilder::Limits, impl_webpage, web::error::Nope, web::page::WebPage}; use chrono::{DateTime, Utc}; use iron::{ headers::ContentType, mime::{Mime, SubLevel, TopLevel}, IronResult, Request, Response, }; +use router::Router; use serde::Serialize; use serde_json::Value; +/// sitemap index +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +struct SitemapIndexXml { + sitemaps: Vec, +} + +impl_webpage! { + SitemapIndexXml = "core/sitemapindex.xml", + content_type = ContentType(Mime(TopLevel::Application, SubLevel::Xml, vec![])), +} + +pub fn sitemapindex_handler(req: &mut Request) -> IronResult { + let sitemaps: Vec = ('a'..='z').collect(); + + SitemapIndexXml { sitemaps }.into_response(req) +} + /// The sitemap #[derive(Debug, Clone, PartialEq, Eq, Serialize)] struct SitemapXml { @@ -21,16 +39,30 @@ impl_webpage! { } pub fn sitemap_handler(req: &mut Request) -> IronResult { + let router = extension!(req, Router); + let letter = cexpect!(req, router.find("letter")); + + if letter.len() != 1 { + return Err(Nope::ResourceNotFound.into()); + } else if let Some(ch) = letter.chars().next() { + if !(ch.is_ascii_lowercase()) { + return Err(Nope::ResourceNotFound.into()); + } + } + let mut conn = extension!(req, Pool).get()?; let query = conn .query( - "SELECT DISTINCT ON (crates.name) - crates.name, - releases.release_time + "SELECT crates.name, + MAX(releases.release_time) as release_time FROM crates INNER JOIN releases ON releases.crate_id = crates.id - WHERE rustdoc_status = true", - &[], + WHERE + rustdoc_status = true AND + crates.name ILIKE $1 + GROUP BY crates.name + ", + &[&format!("{}%", letter)], ) .unwrap(); @@ -119,19 +151,70 @@ pub fn about_handler(req: &mut Request) -> IronResult { #[cfg(test)] mod tests { use crate::test::{assert_success, wrapper}; + use reqwest::StatusCode; #[test] - fn sitemap() { + fn sitemap_index() { wrapper(|env| { let web = env.frontend(); - assert_success("/sitemap.xml", web)?; + assert_success("/sitemap.xml", web) + }) + } + + #[test] + fn sitemap_invalid_letters() { + wrapper(|env| { + let web = env.frontend(); + + // everything not length=1 and ascii-lowercase should fail + for invalid_letter in &["1", "aa", "A", ""] { + println!("trying to fail letter {}", invalid_letter); + assert_eq!( + web.get(&format!("/-/sitemap/{}/sitemap.xml", invalid_letter)) + .send()? + .status(), + StatusCode::NOT_FOUND + ); + } + Ok(()) + }) + } + + #[test] + fn sitemap_letter() { + wrapper(|env| { + let web = env.frontend(); + + // letter-sitemaps always work, even without crates & releases + for letter in 'a'..='z' { + assert_success(&format!("/-/sitemap/{}/sitemap.xml", letter), web)?; + } env.fake_release().name("some_random_crate").create()?; env.fake_release() .name("some_random_crate_that_failed") .build_result_successful(false) .create()?; - assert_success("/sitemap.xml", web) + + // these fake crates appear only in the `s` sitemap + let response = web.get("/-/sitemap/s/sitemap.xml").send()?; + assert!(response.status().is_success()); + + let content = response.text()?; + assert!(content.contains(&"some_random_crate")); + assert!(!(content.contains(&"some_random_crate_that_failed"))); + + // and not in the others + for letter in ('a'..='z').filter(|&c| c != 's') { + let response = web + .get(&format!("/-/sitemap/{}/sitemap.xml", letter)) + .send()?; + + assert!(response.status().is_success()); + assert!(!(response.text()?.contains("some_random_crate"))); + } + + Ok(()) }) } diff --git a/templates/core/sitemapindex.xml b/templates/core/sitemapindex.xml new file mode 100644 index 000000000..6c5c88184 --- /dev/null +++ b/templates/core/sitemapindex.xml @@ -0,0 +1,8 @@ + + + {% for which in sitemaps -%} + + https://docs.rs/-/sitemap/{{ which }}/sitemap.xml + + {%- endfor %} +