Skip to content

Commit 32def4b

Browse files
boyuuuuncclauss
andauthored
add crawl_google_scholar_citation.py (#3879)
* add crawl_google_scholar_citation.py * pass flack8 * pass isort * pass isort * change comment in main * modify main code * delete file * change how to build url * add a key 'hl' in params dict * Update crawl_google_scholar_citation.py * Create crawl_google_results.py * codespell: Mater Co-authored-by: Christian Clauss <[email protected]>
1 parent 9f6188c commit 32def4b

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

Diff for: .pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ repos:
4242
hooks:
4343
- id: codespell
4444
args:
45-
- --ignore-words-list=ans,fo,followings,hist,iff,secant,som,tim
45+
- --ignore-words-list=ans,fo,followings,hist,iff,mater,secant,som,tim
4646
- --skip="./.*,./other/dictionary.txt,./other/words,./project_euler/problem_022/p022_names.txt"
4747
- --quiet-level=2
4848
exclude: |

Diff for: web_programming/crawl_google_scholar_citation.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""
2+
Get the citation from google scholar
3+
using title and year of publication, and volume and pages of journal.
4+
"""
5+
6+
import requests
7+
from bs4 import BeautifulSoup
8+
9+
10+
def get_citation(base_url: str, params: dict) -> str:
11+
"""
12+
Return the citation number.
13+
"""
14+
soup = BeautifulSoup(requests.get(base_url, params=params).content, "html.parser")
15+
div = soup.find("div", attrs={"class": "gs_ri"})
16+
anchors = div.find("div", attrs={"class": "gs_fl"}).find_all("a")
17+
return anchors[2].get_text()
18+
19+
20+
if __name__ == "__main__":
21+
params = {
22+
"title": (
23+
"Precisely geometry controlled microsupercapacitors for ultrahigh areal "
24+
"capacitance, volumetric capacitance, and energy density"
25+
),
26+
"journal": "Chem. Mater.",
27+
"volume": 30,
28+
"pages": "3979-3990",
29+
"year": 2018,
30+
"hl": "en",
31+
}
32+
print(get_citation("http://scholar.google.com/scholar_lookup", params=params))

0 commit comments

Comments
 (0)