From 190afd4669c7c4fc3cfe82b7825913b07f27643b Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Mon, 7 Aug 2023 11:56:41 +0000 Subject: [PATCH 1/5] updating DIRECTORY.md --- DIRECTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index fdcf0ceedf1f..e6a1ff356143 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -585,6 +585,7 @@ * [Hardy Ramanujanalgo](maths/hardy_ramanujanalgo.py) * [Hexagonal Number](maths/hexagonal_number.py) * [Integration By Simpson Approx](maths/integration_by_simpson_approx.py) + * [Interquartile Range](maths/interquartile_range.py) * [Is Int Palindrome](maths/is_int_palindrome.py) * [Is Ip V4 Address Valid](maths/is_ip_v4_address_valid.py) * [Is Square Free](maths/is_square_free.py) From 8bfdd14d09cb9c908ed0ace8c1282d89edb893d7 Mon Sep 17 00:00:00 2001 From: CaedenPH Date: Wed, 16 Aug 2023 16:54:57 +0100 Subject: [PATCH 2/5] fix(get-amazon-product-data): Remove whitespace in headers --- Amazon Product Data for headphones.csv | 1 + web_programming/get_amazon_product_data.py | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 Amazon Product Data for headphones.csv diff --git a/Amazon Product Data for headphones.csv b/Amazon Product Data for headphones.csv new file mode 100644 index 000000000000..42329c02b86e --- /dev/null +++ b/Amazon Product Data for headphones.csv @@ -0,0 +1 @@ +,Product Title,Product Link,Current Price of the product,Product Rating,MRP of the product,Discount diff --git a/web_programming/get_amazon_product_data.py b/web_programming/get_amazon_product_data.py index c796793f2205..9bd67f726a53 100644 --- a/web_programming/get_amazon_product_data.py +++ b/web_programming/get_amazon_product_data.py @@ -19,11 +19,13 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame: """ url = f"https://www.amazon.in/laptop/s?k={product}" header = { - "User-Agent": """Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 - (KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36""", + "User-Agent": ( + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36" + "(KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36" + ), "Accept-Language": "en-US, en;q=0.5", } - soup = BeautifulSoup(requests.get(url, headers=header).text) + soup = BeautifulSoup(requests.get(url, headers=header).text, features="lxml") # Initialize a Pandas dataframe with the column titles data_frame = DataFrame( columns=[ @@ -74,8 +76,8 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame: except ValueError: discount = float("nan") except AttributeError: - pass - data_frame.loc[len(data_frame.index)] = [ + continue + data_frame.loc[str(len(data_frame.index))] = [ product_title, product_link, product_price, @@ -97,4 +99,8 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame: if __name__ == "__main__": product = "headphones" - get_amazon_product_data(product).to_csv(f"Amazon Product Data for {product}.csv") + print( + get_amazon_product_data(product).to_csv( + f"Amazon Product Data for {product}.csv" + ) + ) From d8d44029c23b2d17e833bf07bfc33f95795446db Mon Sep 17 00:00:00 2001 From: CaedenPH Date: Wed, 16 Aug 2023 16:57:33 +0100 Subject: [PATCH 3/5] refactor(get-amazon-product-data): Don't print to_csv --- Amazon Product Data for headphones.csv | 1 - web_programming/get_amazon_product_data.py | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) delete mode 100644 Amazon Product Data for headphones.csv diff --git a/Amazon Product Data for headphones.csv b/Amazon Product Data for headphones.csv deleted file mode 100644 index 42329c02b86e..000000000000 --- a/Amazon Product Data for headphones.csv +++ /dev/null @@ -1 +0,0 @@ -,Product Title,Product Link,Current Price of the product,Product Rating,MRP of the product,Discount diff --git a/web_programming/get_amazon_product_data.py b/web_programming/get_amazon_product_data.py index 9bd67f726a53..a16175688667 100644 --- a/web_programming/get_amazon_product_data.py +++ b/web_programming/get_amazon_product_data.py @@ -99,8 +99,4 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame: if __name__ == "__main__": product = "headphones" - print( - get_amazon_product_data(product).to_csv( - f"Amazon Product Data for {product}.csv" - ) - ) + get_amazon_product_data(product).to_csv(f"Amazon Product Data for {product}.csv") From 79436251b8de73b061dca7ce3092fa7c17c33f31 Mon Sep 17 00:00:00 2001 From: CaedenPH Date: Wed, 16 Aug 2023 17:13:35 +0100 Subject: [PATCH 4/5] fix(fetch-anime-and-play): Unmark as BROKEN --- ...play.py.BROKEN => fetch_anime_and_play.py} | 73 ++++++++++--------- 1 file changed, 38 insertions(+), 35 deletions(-) rename web_programming/{fetch_anime_and_play.py.BROKEN => fetch_anime_and_play.py} (69%) diff --git a/web_programming/fetch_anime_and_play.py.BROKEN b/web_programming/fetch_anime_and_play.py similarity index 69% rename from web_programming/fetch_anime_and_play.py.BROKEN rename to web_programming/fetch_anime_and_play.py index 3bd4f704dd8d..62ce022bcb24 100644 --- a/web_programming/fetch_anime_and_play.py.BROKEN +++ b/web_programming/fetch_anime_and_play.py @@ -1,7 +1,5 @@ -from xml.dom import NotFoundErr - import requests -from bs4 import BeautifulSoup, NavigableString +from bs4 import BeautifulSoup, NavigableString, Tag from fake_useragent import UserAgent BASE_URL = "https://ww1.gogoanime2.org" @@ -13,7 +11,7 @@ def search_scraper(anime_name: str) -> list: Take an url and return list of anime after scraping the site. - >>> type(search_scraper("demon_slayer")) + >>> type(search_scraper("naruto")) Args: @@ -41,25 +39,23 @@ def search_scraper(anime_name: str) -> list: # get list of anime anime_ul = soup.find("ul", {"class": "items"}) + if anime_ul is None or isinstance(anime_ul, NavigableString): + msg = f"Could not find and anime with name {anime_name}" + raise ValueError(msg) anime_li = anime_ul.children # for each anime, insert to list. the name and url. anime_list = [] for anime in anime_li: - if not isinstance(anime, NavigableString): - try: - anime_url, anime_title = ( - anime.find("a")["href"], - anime.find("a")["title"], - ) - anime_list.append( - { - "title": anime_title, - "url": anime_url, - } - ) - except (NotFoundErr, KeyError): - pass + if isinstance(anime, Tag): + anime_url = anime.find("a") + if anime_url is None or isinstance(anime_url, NavigableString): + continue + anime_title = anime.find("a") + if anime_title is None or isinstance(anime_title, NavigableString): + continue + + anime_list.append({"title": anime_title["title"], "url": anime_url["href"]}) return anime_list @@ -93,22 +89,24 @@ def search_anime_episode_list(episode_endpoint: str) -> list: # With this id. get the episode list. episode_page_ul = soup.find("ul", {"id": "episode_related"}) + if episode_page_ul is None or isinstance(episode_page_ul, NavigableString): + msg = f"Could not find any anime eposiodes with name {anime_name}" + raise ValueError(msg) episode_page_li = episode_page_ul.children episode_list = [] for episode in episode_page_li: - try: - if not isinstance(episode, NavigableString): - episode_list.append( - { - "title": episode.find("div", {"class": "name"}).text.replace( - " ", "" - ), - "url": episode.find("a")["href"], - } - ) - except (KeyError, NotFoundErr): - pass + if isinstance(episode, Tag): + url = episode.find("a") + if url is None or isinstance(url, NavigableString): + continue + title = episode.find("div", {"class": "name"}) + if title is None or isinstance(title, NavigableString): + continue + + episode_list.append( + {"title": title.text.replace(" ", ""), "url": url["href"]} + ) return episode_list @@ -140,11 +138,16 @@ def get_anime_episode(episode_endpoint: str) -> list: soup = BeautifulSoup(response.text, "html.parser") - try: - episode_url = soup.find("iframe", {"id": "playerframe"})["src"] - download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8" - except (KeyError, NotFoundErr) as e: - raise e + url = soup.find("iframe", {"id": "playerframe"}) + if url is None or isinstance(url, NavigableString): + msg = f"Could not find url and download url from {episode_endpoint}" + raise RuntimeError(msg) + + episode_url = url["src"] + if not isinstance(episode_url, str): + msg = f"Could not find url and download url from {episode_endpoint}" + raise RuntimeError(msg) + download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8" return [f"{BASE_URL}{episode_url}", f"{BASE_URL}{download_url}"] From bc84328778aa4e0e0fed41f606ec7b7e1d31a938 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Wed, 16 Aug 2023 16:14:01 +0000 Subject: [PATCH 5/5] updating DIRECTORY.md --- DIRECTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index 8d1567465fbc..99170f0d5bfa 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -1212,6 +1212,7 @@ * [Daily Horoscope](web_programming/daily_horoscope.py) * [Download Images From Google Query](web_programming/download_images_from_google_query.py) * [Emails From Url](web_programming/emails_from_url.py) + * [Fetch Anime And Play](web_programming/fetch_anime_and_play.py) * [Fetch Bbc News](web_programming/fetch_bbc_news.py) * [Fetch Github Info](web_programming/fetch_github_info.py) * [Fetch Jobs](web_programming/fetch_jobs.py)