Skip to content

Commit 1474a8c

Browse files
authored
Switch to https for urls (#23)
1 parent 2d19755 commit 1474a8c

File tree

7 files changed

+32
-32
lines changed

7 files changed

+32
-32
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ use url::Url;
3131

3232
fn main() {
3333
let client = Client::new();
34-
let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
34+
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
3535
let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result();
36-
let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap();
36+
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
3737
assert!(robots_txt.can_fetch("*", &fetch_url));
3838
}
3939
```

src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! robots.txt parser for Rust
22
//!
33
//! The robots.txt Exclusion Protocol is implemented as specified in
4-
//! <http://www.robotstxt.org/norobots-rfc.txt>
4+
//! <https://www.robotstxt.org/norobots-rfc.txt>
55
//!
66
//! # Installation
77
//!
@@ -23,9 +23,9 @@
2323
//!
2424
//! fn main() {
2525
//! let client = Client::new();
26-
//! let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
26+
//! let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
2727
//! let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result();
28-
//! let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap();
28+
//! let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
2929
//! assert!(robots_txt.can_fetch("*", &fetch_url));
3030
//! }
3131
//! ```

src/parser.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
//! use url::Url;
1717
//!
1818
//! fn main() {
19-
//! let robots_txt_url = Url::parse("http://google.com/robots.txt").unwrap();
19+
//! let robots_txt_url = Url::parse("https://google.com/robots.txt").unwrap();
2020
//! let robots_txt = "User-agent: *\nDisallow: /search";
2121
//! let robots_txt = parse_robots_txt(robots_txt_url.origin(), robots_txt);
2222
//! assert_eq!(robots_txt.get_warnings().len(), 0);
2323
//! let robots_txt = robots_txt.get_result();
24-
//! let good_url = Url::parse("http://google.com/test").unwrap();
25-
//! let bad_url = Url::parse("http://google.com/search/vvv").unwrap();
24+
//! let good_url = Url::parse("https://google.com/test").unwrap();
25+
//! let bad_url = Url::parse("https://google.com/search/vvv").unwrap();
2626
//! assert_eq!(robots_txt.can_fetch("*", &bad_url), false);
2727
//! assert_eq!(robots_txt.can_fetch("*", &good_url), true);
2828
//! }

tests/test_lib.rs

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@ use url::Url;
66
const AGENT: &'static str = "test_robotparser";
77

88
fn robot_test(doc: &str, good_urls: Vec<&str>, bad_urls: Vec<&str>, agent: &str) {
9-
let url = Url::parse("http://www.baidu.com/robots.txt").unwrap();
9+
let url = Url::parse("https://www.baidu.com/robots.txt").unwrap();
1010
let parser = parse_robots_txt(url.origin(), doc).get_result();
1111
for url in &good_urls {
12-
let url = format!("http://www.baidu.com{}", url);
12+
let url = format!("https://www.baidu.com{}", url);
1313
let url = Url::parse(&url).unwrap();
1414
assert!(parser.can_fetch(agent, &url));
1515
}
1616
for url in &bad_urls {
17-
let url = format!("http://www.baidu.com{}", url);
17+
let url = format!("https://www.baidu.com{}", url);
1818
let url = Url::parse(&url).unwrap();
1919
assert!(!parser.can_fetch(agent, &url));
2020
}
@@ -56,7 +56,7 @@ fn test_robots_txt_1() {
5656
#[test]
5757
fn test_robots_txt_2() {
5858
let doc = "\n\
59-
# robots.txt for http://www.example.com/\n\
59+
# robots.txt for https://www.example.com/\n\
6060
\n\
6161
User-agent: *\n\
6262
Disallow: /cyberworld/map/ # This is an infinite virtual URL space\n\
@@ -249,7 +249,7 @@ fn test_robots_txt_read() {
249249
use robotparser::http::{CreateRobotsTxtRequest, ParseRobotsTxtResponse};
250250
use reqwest::{Client, Request};
251251
let http_client = Client::new();
252-
let url = Url::parse("http://www.python.org/robots.txt").unwrap();
252+
let url = Url::parse("https://www.python.org/robots.txt").unwrap();
253253
let request = Request::create_robots_txt_request(url.origin());
254254
let mut response = http_client.execute(request).unwrap();
255255
let parser = response.parse_robots_txt_response().unwrap().get_result();
@@ -258,7 +258,7 @@ fn test_robots_txt_read() {
258258

259259
#[test]
260260
fn test_robots_text_crawl_delay() {
261-
let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
261+
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
262262
let doc = "User-agent: Yandex\n\
263263
Crawl-delay: 2.35\n\
264264
Disallow: /search/\n";
@@ -268,26 +268,26 @@ fn test_robots_text_crawl_delay() {
268268

269269
#[test]
270270
fn test_robots_text_sitemaps() {
271-
let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
271+
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
272272
let doc = "User-agent: Yandex\n\
273-
Sitemap \t : http://example.com/sitemap1.xml\n
274-
Sitemap: http://example.com/sitemap2.xml\n
275-
Sitemap: http://example.com/sitemap3.xml\n
273+
Sitemap \t : https://example.com/sitemap1.xml\n
274+
Sitemap: https://example.com/sitemap2.xml\n
275+
Sitemap: https://example.com/sitemap3.xml\n
276276
Disallow: /search/\n";
277277
let parser = parse_robots_txt(robots_txt_url.origin(), doc).get_result();
278278
assert_eq!(
279279
&[
280-
Url::parse("http://example.com/sitemap1.xml").unwrap(),
281-
Url::parse("http://example.com/sitemap2.xml").unwrap(),
282-
Url::parse("http://example.com/sitemap3.xml").unwrap()
280+
Url::parse("https://example.com/sitemap1.xml").unwrap(),
281+
Url::parse("https://example.com/sitemap2.xml").unwrap(),
282+
Url::parse("https://example.com/sitemap3.xml").unwrap()
283283
],
284284
parser.get_sitemaps()
285285
);
286286
}
287287

288288
#[test]
289289
fn test_robots_text_request_rate() {
290-
let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
290+
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
291291
let doc =
292292
"User-agent: Yandex\n\
293293
Request-rate: 3/15\n\
@@ -313,15 +313,15 @@ Clean-param: gid\n\
313313
Clean-param: tm\n\
314314
Clean-param: amp\n\
315315
";
316-
let url = Url::parse("http://www.baidu.com/robots.txt").unwrap();
316+
let url = Url::parse("https://www.baidu.com/robots.txt").unwrap();
317317
let parser = parse_robots_txt(url.origin(), doc).get_result();
318-
let mut site_url = Url::parse("http://www.baidu.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&amp=1").unwrap();
318+
let mut site_url = Url::parse("https://www.baidu.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&amp=1").unwrap();
319319
let was_updated = parser.normalize_url(&mut site_url);
320320
assert_eq!(was_updated, true);
321-
assert_eq!(site_url.as_str(), "http://www.baidu.com/test?post_id=7777");
321+
assert_eq!(site_url.as_str(), "https://www.baidu.com/test?post_id=7777");
322322

323-
let mut site_url = Url::parse("http://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&amp=1").unwrap();
323+
let mut site_url = Url::parse("https://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&amp=1").unwrap();
324324
let was_updated = parser.normalize_url(&mut site_url);
325325
assert_eq!(was_updated, false);
326-
assert_eq!(site_url.as_str(), "http://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&amp=1");
326+
assert_eq!(site_url.as_str(), "https://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&amp=1");
327327
}

tests/test_reqwest_async.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ use tokio::runtime::Runtime;
88
fn test_reqwest_async() {
99
let mut runtime = Runtime::new().unwrap();
1010
let client = Client::new();
11-
let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
11+
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
1212
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()));
1313
let robots_txt = robots_txt_response.unwrap().get_result();
14-
let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap();
14+
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
1515
assert!(robots_txt.can_fetch("*", &fetch_url));
1616
}

tests/test_reqwest_blocking.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ use url::Url;
66
#[test]
77
fn test_reqwest_blocking() {
88
let client = Client::new();
9-
let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
9+
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
1010
let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result();
11-
let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap();
11+
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
1212
assert!(robots_txt.can_fetch("*", &fetch_url));
1313
}

tests/test_warnings.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ fn test_warning_request_rate() {
106106

107107
#[test]
108108
fn test_warning_parsing_url() {
109-
let input = "User-Agent: *\nSitemap: http://python.org/sitemap.xml";
109+
let input = "User-Agent: *\nSitemap: https://python.org/sitemap.xml";
110110
validate_warnings(input, &[]);
111111
let input = "User-Agent: *\nSitemap: http$$$://python.org/sitemap.xml";
112112
validate_warnings(input, &[WarningReasonKind::ParseUrl]);

0 commit comments

Comments
 (0)