Skip to content

Scrape anime and play episodes on browser without ads from terminal #5975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 1, 2022
203 changes: 203 additions & 0 deletions web_programming/fetch_anime_and_play.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
from urllib.error import HTTPError
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from urllib.error import HTTPError
from requests.exceptions import HTTPError, RequestException

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed with from requests.exceptions import RequestException

from xml.dom import NotFoundErr

import requests
from bs4 import BeautifulSoup, NavigableString
from fake_useragent import UserAgent

BASE_URL = "https://ww1.gogoanime2.org"


def search_scraper(anime_name: str) -> list:

"""[summary]

This function will take an url and
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
This function will take an url and
Take an url and

return list of anime after scraping the site.

>>> type(search_scraper("demon_slayer"))
<class 'list'>

Args:
anime_name (str): [Name of anime]

Raises:
e: [Raises exception on failure]

Returns:
[list]: [List of animes]
"""

try:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try / except block should be just around critical areas of code. They should not be 30 lines long!!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed the long try catches with critical ones


# concat the name to form the search url.
search_url = f"{BASE_URL}/search/{anime_name}"
response = requests.get(
search_url, headers={"UserAgent": UserAgent().chrome}
) # request the url.

# Is the response ok?
response.raise_for_status()

# parse with soup.
soup = BeautifulSoup(response.text, "html.parser")

# get list of anime
items_ul = soup.find("ul", {"class": "items"})
items_li = items_ul.children

# for each anime, insert to list. the name and url.
anime_list = []
for li in items_li:
if not isinstance(li, NavigableString):
anime_url, anime_title = li.find("a")["href"], li.find("a")["title"]
anime_list.append(
{
"title": anime_title,
"url": anime_url,
}
)

return anime_list

except (requests.exceptions.RequestException, HTTPError, TypeError) as e:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
except (requests.exceptions.RequestException, HTTPError, TypeError) as e:
except (HTTPError, RequestException, TypeError) as e:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed to RequestException

raise e


def search_anime_episode_list(episode_endpoint: str) -> list:

"""[summary]

This function will take an url and
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
This function will take an url and
Take an url and

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed them

return list of episodes after scraping the site
for an url.

>>> type(search_anime_episode_list("/anime/kimetsu-no-yaiba"))
<class 'list'>

Args:
episode_endpoint (str): [Endpoint of episode]

Raises:
e: [description]

Returns:
[list]: [List of episodes]
"""

try:

request_url = f"{BASE_URL}{episode_endpoint}"
response = requests.get(
url=request_url, headers={"UserAgent": UserAgent().chrome}
)
soup = BeautifulSoup(response.text, "html.parser")

# With this id. get the episode list.
episode_page_ul = soup.find("ul", {"id": "episode_related"})
episode_page_li = episode_page_ul.children

episode_list = []
for children in episode_page_li:
try:
if not isinstance(children, NavigableString):
episode_list.append(
{
"title": children.find(
"div", {"class": "name"}
).text.replace(" ", ""),
"url": children.find("a")["href"],
}
)
except (KeyError, NotFoundErr, TypeError):
pass

return episode_list

except (requests.exceptions.RequestException) as e:
raise e


def get_anime_episode(episode_endpoint: str) -> list:

"""[summary]

Get click url and download url from episode url

Args:
episode_endpoint (str): [Endpoint of episode]

Raises:
e: [description]

Returns:
[list]: [List of download and watch url]
"""

try:

episode_page_url = f"{BASE_URL}{episode_endpoint}"

response = requests.get(
url=episode_page_url, headers={"User-Agent": UserAgent().chrome}
)
soup = BeautifulSoup(response.text, "lxml")

episode_url = soup.find("iframe", {"id": "playerframe"})["src"]
download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8"
return [f"{BASE_URL}{episode_url}", f"{BASE_URL}{download_url}"]

except (
KeyError,
NotFoundErr,
TypeError,
requests.exceptions.RequestException,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Long try / except blocks lead to many different exceptions which confuses the reader of the code. It also slows down debugging when one of them is raised.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed multiple exceptions

) as e:
raise e


if __name__ == "__main__":

try:

anime_name = input("Enter anime name: ").strip()
anime_list = search_scraper(anime_name)
print("\n")

if len(anime_list) == 0:
print("No anime found with this name")
else:

print(f"Found {len(anime_list)} results: ")
for (i, anime) in enumerate(anime_list):
anime_title = anime["title"]
print(f"{i+1}. {anime_title}")

anime_choice = int(
input("\nPlease choose from the following list: ").strip()
)
chosen_anime = anime_list[anime_choice - 1]
print(
"You chose {}. Searching for episodes...".format(chosen_anime["title"])
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in the Contributing Guidelines, please do not use printf style formatting or str.format(). Use f-string instead to be more readable and efficient.

)

episode_list = search_anime_episode_list(chosen_anime["url"])
if len(episode_list) == 0:
print("No episode found for this anime")
else:
print(f"Found {len(episode_list)} results: ")
for (i, episode) in enumerate(episode_list):
print(("{}. {}").format(i + 1, episode["title"]))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in the Contributing Guidelines, please do not use printf style formatting or str.format(). Use f-string instead to be more readable and efficient.


episode_choice = int(
input("\nChoose an episode by serial no: ").strip()
)
chosen_episode = episode_list[episode_choice - 1]
print("You chose {}. Searching...".format(chosen_episode["title"]))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in the Contributing Guidelines, please do not use printf style formatting or str.format(). Use f-string instead to be more readable and efficient.


episode_url, download_url = get_anime_episode(chosen_episode["url"])
print(f"\nTo watch, ctrl+click on {episode_url}.")
print(f"To download, ctrl+click on {download_url}.")

except (ValueError, IndexError, TypeError) as e:
raise e
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can lose the try / except if we are not going to handle the errors.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed the block.