Skip to content

Commit d28ac64

Browse files
Scrape anime and play episodes on browser without ads from terminal (TheAlgorithms#5975)
* fetch anime * formatted code * fix format errors * fix bot reviews * pr review fixes * remove unussed exception * change var name * fix comments
1 parent b2a77cc commit d28ac64

File tree

1 file changed

+188
-0
lines changed

1 file changed

+188
-0
lines changed
+188
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
from xml.dom import NotFoundErr
2+
3+
import requests
4+
from bs4 import BeautifulSoup, NavigableString
5+
from fake_useragent import UserAgent
6+
7+
BASE_URL = "https://ww1.gogoanime2.org"
8+
9+
10+
def search_scraper(anime_name: str) -> list:
11+
12+
"""[summary]
13+
14+
Take an url and
15+
return list of anime after scraping the site.
16+
17+
>>> type(search_scraper("demon_slayer"))
18+
<class 'list'>
19+
20+
Args:
21+
anime_name (str): [Name of anime]
22+
23+
Raises:
24+
e: [Raises exception on failure]
25+
26+
Returns:
27+
[list]: [List of animes]
28+
"""
29+
30+
# concat the name to form the search url.
31+
search_url = f"{BASE_URL}/search/{anime_name}"
32+
33+
response = requests.get(
34+
search_url, headers={"UserAgent": UserAgent().chrome}
35+
) # request the url.
36+
37+
# Is the response ok?
38+
response.raise_for_status()
39+
40+
# parse with soup.
41+
soup = BeautifulSoup(response.text, "html.parser")
42+
43+
# get list of anime
44+
anime_ul = soup.find("ul", {"class": "items"})
45+
anime_li = anime_ul.children
46+
47+
# for each anime, insert to list. the name and url.
48+
anime_list = []
49+
for anime in anime_li:
50+
if not isinstance(anime, NavigableString):
51+
try:
52+
anime_url, anime_title = (
53+
anime.find("a")["href"],
54+
anime.find("a")["title"],
55+
)
56+
anime_list.append(
57+
{
58+
"title": anime_title,
59+
"url": anime_url,
60+
}
61+
)
62+
except (NotFoundErr, KeyError):
63+
pass
64+
65+
return anime_list
66+
67+
68+
def search_anime_episode_list(episode_endpoint: str) -> list:
69+
70+
"""[summary]
71+
72+
Take an url and
73+
return list of episodes after scraping the site
74+
for an url.
75+
76+
>>> type(search_anime_episode_list("/anime/kimetsu-no-yaiba"))
77+
<class 'list'>
78+
79+
Args:
80+
episode_endpoint (str): [Endpoint of episode]
81+
82+
Raises:
83+
e: [description]
84+
85+
Returns:
86+
[list]: [List of episodes]
87+
"""
88+
89+
request_url = f"{BASE_URL}{episode_endpoint}"
90+
91+
response = requests.get(url=request_url, headers={"UserAgent": UserAgent().chrome})
92+
response.raise_for_status()
93+
94+
soup = BeautifulSoup(response.text, "html.parser")
95+
96+
# With this id. get the episode list.
97+
episode_page_ul = soup.find("ul", {"id": "episode_related"})
98+
episode_page_li = episode_page_ul.children
99+
100+
episode_list = []
101+
for episode in episode_page_li:
102+
try:
103+
if not isinstance(episode, NavigableString):
104+
episode_list.append(
105+
{
106+
"title": episode.find("div", {"class": "name"}).text.replace(
107+
" ", ""
108+
),
109+
"url": episode.find("a")["href"],
110+
}
111+
)
112+
except (KeyError, NotFoundErr):
113+
pass
114+
115+
return episode_list
116+
117+
118+
def get_anime_episode(episode_endpoint: str) -> list:
119+
120+
"""[summary]
121+
122+
Get click url and download url from episode url
123+
124+
>>> type(get_anime_episode("/watch/kimetsu-no-yaiba/1"))
125+
<class 'list'>
126+
127+
Args:
128+
episode_endpoint (str): [Endpoint of episode]
129+
130+
Raises:
131+
e: [description]
132+
133+
Returns:
134+
[list]: [List of download and watch url]
135+
"""
136+
137+
episode_page_url = f"{BASE_URL}{episode_endpoint}"
138+
139+
response = requests.get(
140+
url=episode_page_url, headers={"User-Agent": UserAgent().chrome}
141+
)
142+
response.raise_for_status()
143+
144+
soup = BeautifulSoup(response.text, "html.parser")
145+
146+
try:
147+
episode_url = soup.find("iframe", {"id": "playerframe"})["src"]
148+
download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8"
149+
except (KeyError, NotFoundErr) as e:
150+
raise e
151+
152+
return [f"{BASE_URL}{episode_url}", f"{BASE_URL}{download_url}"]
153+
154+
155+
if __name__ == "__main__":
156+
157+
anime_name = input("Enter anime name: ").strip()
158+
anime_list = search_scraper(anime_name)
159+
print("\n")
160+
161+
if len(anime_list) == 0:
162+
print("No anime found with this name")
163+
else:
164+
165+
print(f"Found {len(anime_list)} results: ")
166+
for (i, anime) in enumerate(anime_list):
167+
anime_title = anime["title"]
168+
print(f"{i+1}. {anime_title}")
169+
170+
anime_choice = int(input("\nPlease choose from the following list: ").strip())
171+
chosen_anime = anime_list[anime_choice - 1]
172+
print(f"You chose {chosen_anime['title']}. Searching for episodes...")
173+
174+
episode_list = search_anime_episode_list(chosen_anime["url"])
175+
if len(episode_list) == 0:
176+
print("No episode found for this anime")
177+
else:
178+
print(f"Found {len(episode_list)} results: ")
179+
for (i, episode) in enumerate(episode_list):
180+
print(f"{i+1}. {episode['title']}")
181+
182+
episode_choice = int(input("\nChoose an episode by serial no: ").strip())
183+
chosen_episode = episode_list[episode_choice - 1]
184+
print(f"You chose {chosen_episode['title']}. Searching...")
185+
186+
episode_url, download_url = get_anime_episode(chosen_episode["url"])
187+
print(f"\nTo watch, ctrl+click on {episode_url}.")
188+
print(f"To download, ctrl+click on {download_url}.")

0 commit comments

Comments
 (0)