Skip to content

Commit d87f22e

Browse files
committed
Shell out to crates.io for search
This is a much smaller maintenance burden and fixes a lot of our bugs. TODO: fix tests
1 parent 279752c commit d87f22e

File tree

3 files changed

+79
-77
lines changed

3 files changed

+79
-77
lines changed

src/utils/github_updater.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use super::APP_USER_AGENT;
12
use crate::error::Result;
23
use crate::{db::Pool, Config};
34
use chrono::{DateTime, Utc};
@@ -13,12 +14,6 @@ use serde::Deserialize;
1314
use std::collections::HashSet;
1415
use std::sync::Arc;
1516

16-
const APP_USER_AGENT: &str = concat!(
17-
env!("CARGO_PKG_NAME"),
18-
" ",
19-
include_str!(concat!(env!("OUT_DIR"), "/git_version"))
20-
);
21-
2217
const GRAPHQL_UPDATE: &str = "query($ids: [ID!]!) {
2318
nodes(ids: $ids) {
2419
... on Repository {

src/utils/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,9 @@ mod queue_builder;
2626
mod release_activity_updater;
2727
mod rustc_version;
2828
pub(crate) mod sized_buffer;
29+
30+
pub(crate) const APP_USER_AGENT: &str = concat!(
31+
env!("CARGO_PKG_NAME"),
32+
" ",
33+
include_str!(concat!(env!("OUT_DIR"), "/git_version"))
34+
);

src/web/releases.rs

Lines changed: 72 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ use iron::{
1414
modifiers::Redirect,
1515
status, IronResult, Request, Response, Url,
1616
};
17+
use log::debug;
1718
use postgres::Client;
1819
use router::Router;
19-
use serde::Serialize;
20+
use serde::{Deserialize, Serialize};
2021
use serde_json::Value;
2122

2223
/// Number of release in home page
@@ -215,84 +216,84 @@ fn get_releases_by_owner(
215216

216217
/// Get the search results for a crate search query
217218
///
218-
/// Retrieves crates which names have a levenshtein distance of less than or equal to 3,
219-
/// crates who fit into or otherwise are made up of the query or crates whose descriptions
220-
/// match the search query.
221-
///
222-
/// * `query`: The query string, unfiltered
223-
/// * `page`: The page of results to show (1-indexed)
224-
/// * `limit`: The number of results to return
225-
///
226-
/// Returns 0 and an empty Vec when no results are found or if a database error occurs
227-
///
219+
/// This delegates to the crates.io search API.
228220
fn get_search_results(
229221
conn: &mut Client,
230-
mut query: &str,
222+
query: &str,
231223
page: i64,
232224
limit: i64,
233-
) -> Result<(i64, Vec<Release>), failure::Error> {
234-
query = query.trim();
235-
if query.is_empty() {
236-
return Ok((0, Vec::new()));
225+
) -> Result<(u64, Vec<Release>), failure::Error> {
226+
#[derive(Deserialize)]
227+
struct CratesIoReleases {
228+
crates: Vec<CratesIoRelease>,
229+
meta: CratesIoMeta,
230+
}
231+
#[derive(Deserialize, Debug)]
232+
struct CratesIoRelease {
233+
name: String,
234+
max_version: String,
235+
description: Option<String>,
236+
updated_at: DateTime<Utc>,
237+
}
238+
#[derive(Deserialize)]
239+
struct CratesIoMeta {
240+
total: u64,
237241
}
238-
let offset = (page - 1) * limit;
239-
240-
let statement = "
241-
SELECT
242-
crates.name AS name,
243-
releases.version AS version,
244-
releases.description AS description,
245-
releases.target_name AS target_name,
246-
releases.release_time AS release_time,
247-
releases.rustdoc_status AS rustdoc_status,
248-
github_repos.stars AS github_stars,
249-
COUNT(*) OVER() as total
250-
FROM crates
251-
INNER JOIN (
252-
SELECT releases.id, releases.crate_id
253-
FROM (
254-
SELECT
255-
releases.id,
256-
releases.crate_id,
257-
RANK() OVER (PARTITION BY crate_id ORDER BY release_time DESC) as rank
258-
FROM releases
259-
WHERE releases.rustdoc_status AND NOT releases.yanked
260-
) AS releases
261-
WHERE releases.rank = 1
262-
) AS latest_release ON latest_release.crate_id = crates.id
263-
INNER JOIN releases ON latest_release.id = releases.id
264-
LEFT JOIN github_repos ON releases.github_repo = github_repos.id
265-
WHERE
266-
((char_length($1)::float - levenshtein(crates.name, $1)::float) / char_length($1)::float) >= 0.65
267-
OR crates.name ILIKE CONCAT('%', $1, '%')
268-
GROUP BY crates.id, releases.id, github_repos.stars
269-
ORDER BY
270-
levenshtein(crates.name, $1) ASC,
271-
crates.name ILIKE CONCAT('%', $1, '%'),
272-
releases.downloads DESC
273-
LIMIT $2 OFFSET $3";
274-
275-
let rows = conn.query(statement, &[&query, &limit, &offset])?;
276242

277-
// Each row contains the total number of possible/valid results, just get it once
278-
let total_results = rows
279-
.get(0)
280-
.map(|row| row.get::<_, i64>("total"))
281-
.unwrap_or_default();
282-
let packages: Vec<Release> = rows
243+
use crate::utils::APP_USER_AGENT;
244+
use once_cell::sync::Lazy;
245+
use reqwest::blocking::Client as HttpClient;
246+
use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, USER_AGENT};
247+
248+
static HTTP_CLIENT: Lazy<HttpClient> = Lazy::new(|| {
249+
let mut headers = HeaderMap::new();
250+
headers.insert(USER_AGENT, HeaderValue::from_static(APP_USER_AGENT));
251+
headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
252+
HttpClient::builder()
253+
.default_headers(headers)
254+
.build()
255+
.unwrap()
256+
});
257+
258+
let url = format!(
259+
"https://crates.io/api/v1/crates?page={page}&per_page={limit}&q={query}",
260+
page = page,
261+
limit = limit,
262+
query = query
263+
);
264+
debug!("fetching search results from {}", url);
265+
let releases: CratesIoReleases = HTTP_CLIENT.get(&url).send()?.json()?;
266+
let query = conn.prepare(
267+
"SELECT github_repos.stars, releases.target_name, releases.rustdoc_status
268+
FROM crates INNER JOIN releases ON crates.id = releases.crate_id
269+
LEFT JOIN github_repos ON releases.github_repo = github_repos.id
270+
WHERE crates.name = $1 AND releases.version = $2",
271+
)?;
272+
let crates = releases
273+
.crates
283274
.into_iter()
284-
.map(|row| Release {
285-
name: row.get("name"),
286-
version: row.get("version"),
287-
description: row.get("description"),
288-
target_name: row.get("target_name"),
289-
release_time: row.get("release_time"),
290-
rustdoc_status: row.get("rustdoc_status"),
291-
stars: row.get::<_, Option<i32>>("github_stars").unwrap_or(0),
275+
.flat_map(|krate| {
276+
let rows = match conn.query(&query, &[&krate.name, &krate.max_version]) {
277+
Err(e) => return Some(Err(e)),
278+
Ok(rows) => rows,
279+
};
280+
debug!("looking up results for {:?}", krate);
281+
// crates.io could have a release that hasn't yet been added to the database.
282+
// If so, just skip it.
283+
let row = rows.get(0)?;
284+
let stars: Option<_> = row.get("stars");
285+
Some(Result::<_, postgres::Error>::Ok(Release {
286+
name: krate.name,
287+
version: krate.max_version,
288+
description: krate.description,
289+
release_time: krate.updated_at,
290+
target_name: row.get("target_name"),
291+
rustdoc_status: row.get("rustdoc_status"),
292+
stars: stars.unwrap_or(0),
293+
}))
292294
})
293-
.collect();
294-
295-
Ok((total_results, packages))
295+
.collect::<Result<_, _>>()?;
296+
Ok((releases.meta.total, crates))
296297
}
297298

298299
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]

0 commit comments

Comments
 (0)