@@ -6,15 +6,15 @@ use url::Url;
6
6
const AGENT : & ' static str = "test_robotparser" ;
7
7
8
8
fn robot_test ( doc : & str , good_urls : Vec < & str > , bad_urls : Vec < & str > , agent : & str ) {
9
- let url = Url :: parse ( "http ://www.baidu.com/robots.txt" ) . unwrap ( ) ;
9
+ let url = Url :: parse ( "https ://www.baidu.com/robots.txt" ) . unwrap ( ) ;
10
10
let parser = parse_robots_txt ( url. origin ( ) , doc) . get_result ( ) ;
11
11
for url in & good_urls {
12
- let url = format ! ( "http ://www.baidu.com{}" , url) ;
12
+ let url = format ! ( "https ://www.baidu.com{}" , url) ;
13
13
let url = Url :: parse ( & url) . unwrap ( ) ;
14
14
assert ! ( parser. can_fetch( agent, & url) ) ;
15
15
}
16
16
for url in & bad_urls {
17
- let url = format ! ( "http ://www.baidu.com{}" , url) ;
17
+ let url = format ! ( "https ://www.baidu.com{}" , url) ;
18
18
let url = Url :: parse ( & url) . unwrap ( ) ;
19
19
assert ! ( !parser. can_fetch( agent, & url) ) ;
20
20
}
@@ -56,7 +56,7 @@ fn test_robots_txt_1() {
56
56
#[ test]
57
57
fn test_robots_txt_2 ( ) {
58
58
let doc = "\n \
59
- # robots.txt for http ://www.example.com/\n \
59
+ # robots.txt for https ://www.example.com/\n \
60
60
\n \
61
61
User-agent: *\n \
62
62
Disallow: /cyberworld/map/ # This is an infinite virtual URL space\n \
@@ -249,7 +249,7 @@ fn test_robots_txt_read() {
249
249
use robotparser:: http:: { CreateRobotsTxtRequest , ParseRobotsTxtResponse } ;
250
250
use reqwest:: { Client , Request } ;
251
251
let http_client = Client :: new ( ) ;
252
- let url = Url :: parse ( "http ://www.python.org/robots.txt" ) . unwrap ( ) ;
252
+ let url = Url :: parse ( "https ://www.python.org/robots.txt" ) . unwrap ( ) ;
253
253
let request = Request :: create_robots_txt_request ( url. origin ( ) ) ;
254
254
let mut response = http_client. execute ( request) . unwrap ( ) ;
255
255
let parser = response. parse_robots_txt_response ( ) . unwrap ( ) . get_result ( ) ;
@@ -258,7 +258,7 @@ fn test_robots_txt_read() {
258
258
259
259
#[ test]
260
260
fn test_robots_text_crawl_delay ( ) {
261
- let robots_txt_url = Url :: parse ( "http ://www.python.org/robots.txt" ) . unwrap ( ) ;
261
+ let robots_txt_url = Url :: parse ( "https ://www.python.org/robots.txt" ) . unwrap ( ) ;
262
262
let doc = "User-agent: Yandex\n \
263
263
Crawl-delay: 2.35\n \
264
264
Disallow: /search/\n ";
@@ -268,26 +268,26 @@ fn test_robots_text_crawl_delay() {
268
268
269
269
#[ test]
270
270
fn test_robots_text_sitemaps ( ) {
271
- let robots_txt_url = Url :: parse ( "http ://www.python.org/robots.txt" ) . unwrap ( ) ;
271
+ let robots_txt_url = Url :: parse ( "https ://www.python.org/robots.txt" ) . unwrap ( ) ;
272
272
let doc = "User-agent: Yandex\n \
273
- Sitemap \t : http ://example.com/sitemap1.xml\n
274
- Sitemap: http ://example.com/sitemap2.xml\n
275
- Sitemap: http ://example.com/sitemap3.xml\n
273
+ Sitemap \t : https ://example.com/sitemap1.xml\n
274
+ Sitemap: https ://example.com/sitemap2.xml\n
275
+ Sitemap: https ://example.com/sitemap3.xml\n
276
276
Disallow: /search/\n " ;
277
277
let parser = parse_robots_txt ( robots_txt_url. origin ( ) , doc) . get_result ( ) ;
278
278
assert_eq ! (
279
279
& [
280
- Url :: parse( "http ://example.com/sitemap1.xml" ) . unwrap( ) ,
281
- Url :: parse( "http ://example.com/sitemap2.xml" ) . unwrap( ) ,
282
- Url :: parse( "http ://example.com/sitemap3.xml" ) . unwrap( )
280
+ Url :: parse( "https ://example.com/sitemap1.xml" ) . unwrap( ) ,
281
+ Url :: parse( "https ://example.com/sitemap2.xml" ) . unwrap( ) ,
282
+ Url :: parse( "https ://example.com/sitemap3.xml" ) . unwrap( )
283
283
] ,
284
284
parser. get_sitemaps( )
285
285
) ;
286
286
}
287
287
288
288
#[ test]
289
289
fn test_robots_text_request_rate ( ) {
290
- let robots_txt_url = Url :: parse ( "http ://www.python.org/robots.txt" ) . unwrap ( ) ;
290
+ let robots_txt_url = Url :: parse ( "https ://www.python.org/robots.txt" ) . unwrap ( ) ;
291
291
let doc =
292
292
"User-agent: Yandex\n \
293
293
Request-rate: 3/15\n \
@@ -313,15 +313,15 @@ Clean-param: gid\n\
313
313
Clean-param: tm\n \
314
314
Clean-param: amp\n \
315
315
";
316
- let url = Url :: parse ( "http ://www.baidu.com/robots.txt" ) . unwrap ( ) ;
316
+ let url = Url :: parse ( "https ://www.baidu.com/robots.txt" ) . unwrap ( ) ;
317
317
let parser = parse_robots_txt ( url. origin ( ) , doc) . get_result ( ) ;
318
- let mut site_url = Url :: parse ( "http ://www.baidu.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) . unwrap ( ) ;
318
+ let mut site_url = Url :: parse ( "https ://www.baidu.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) . unwrap ( ) ;
319
319
let was_updated = parser. normalize_url ( & mut site_url) ;
320
320
assert_eq ! ( was_updated, true ) ;
321
- assert_eq ! ( site_url. as_str( ) , "http ://www.baidu.com/test?post_id=7777" ) ;
321
+ assert_eq ! ( site_url. as_str( ) , "https ://www.baidu.com/test?post_id=7777" ) ;
322
322
323
- let mut site_url = Url :: parse ( "http ://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) . unwrap ( ) ;
323
+ let mut site_url = Url :: parse ( "https ://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) . unwrap ( ) ;
324
324
let was_updated = parser. normalize_url ( & mut site_url) ;
325
325
assert_eq ! ( was_updated, false ) ;
326
- assert_eq ! ( site_url. as_str( ) , "http ://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) ;
326
+ assert_eq ! ( site_url. as_str( ) , "https ://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1" ) ;
327
327
}
0 commit comments