From daa5e3f9ab6c94824885a6b8dbbe9938edcd1838 Mon Sep 17 00:00:00 2001 From: Stephen0124 <18788802003@163.com> Date: Thu, 28 Mar 2024 22:11:11 +0800 Subject: [PATCH 1/4] Create qimai_data.py Auto crawl app information of Qimai Data --- web_programming/qimai_data.py | 93 +++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 web_programming/qimai_data.py diff --git a/web_programming/qimai_data.py b/web_programming/qimai_data.py new file mode 100644 index 000000000000..2cb529acc0fb --- /dev/null +++ b/web_programming/qimai_data.py @@ -0,0 +1,93 @@ +""" +Qimai Data (qimai.cn) is a mobile product intelligence and business analysis platform in China. + +Get a dict of the app information for a given typename (free, paid, grossing) from https://www.qimai.cn + +DrissionPage is a simple encapsulation of the selenium package, which is continuously updated, and some of the code could be +not compatible with the latest version of DrissionPage. + +p.s. Current version of DrissionPage : 4.0.4.17 +""" + +from DrissionPage import ChromiumPage + + +def login(page: ChromiumPage, username: str, password: str) -> bool: + page.get("https://www.qimai.cn/account/signin/r/%2F") + if page.url != "https://www.qimai.cn/": + try: + page.ele("@name=username").input(username) + page.ele("@name=password").input(password) + page.ele(".submit").click() + print("Login successfully") + return True + except: + print("Login failed") + page.close() + return False + else: + print("Already login") + return True + + +def get_app_info(page: ChromiumPage, username: str, password: str, typename: str) -> dict: + # Clear the cache to avoid the login failure + page.clear_cache(cookies=True) + if login(page, username, password): + # Get the data of default page + page.listen.start(f"brand={typename}&device=iphone&country=cn&genre=5000") + page.get( + f"https://www.qimai.cn/rank/index/brand/{typename}/device/iphone/country/cn/genre/5000" + ) + + # Get the response data + res = page.listen.wait().response.body + + dic = {} + for i in range(len(res["rankInfo"])): + font = res["rankInfo"][i]["appInfo"] + dic[font["appId"]] = { + "appId": font["appId"], + "appName": font["appName"], + "country": font["country"], + "file_size": font["file_size"], + "icon_path": font["icon"], + "price": font["price"], + "publisher": font["publisher"], + "subtitle": font["subtitle"], + } + + # Get the data of other pages + if res["maxPage"] > 1: + for page_num in range(1, res["maxPage"]): + # Target the page number + page.listen.start(f"page={page_num + 1}") + print(f"Getting page {page_num + 1}") + # Chrome will scroll to the bottom of the page to load more data + page.scroll.to_bottom() + page.wait.load_start() + + res = page.listen.wait().response.body + for i in range(len(res["rankInfo"])): + font = res["rankInfo"][i]["appInfo"] + dic[font["appId"]] = { + "appId": font["appId"], + "appName": font["appName"], + "country": font["country"], + "file_size": font["file_size"], + "icon_path": font["icon"], + "price": font["price"], + "publisher": font["publisher"], + "subtitle": font["subtitle"], + } + + else: + dic = {"error": "Login failed"} + + return dic + + +if __name__ == "__main__": + page = ChromiumPage() + print(get_app_info(page, "YOUR USERNAME", "YOUR PASSWORD", "free")) + page.close() From 0a1a3baf5a9e071449d4713f36b69365c561be8b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Mar 2024 14:25:32 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- web_programming/qimai_data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/web_programming/qimai_data.py b/web_programming/qimai_data.py index 2cb529acc0fb..a14fe8a446a3 100644 --- a/web_programming/qimai_data.py +++ b/web_programming/qimai_data.py @@ -30,7 +30,9 @@ def login(page: ChromiumPage, username: str, password: str) -> bool: return True -def get_app_info(page: ChromiumPage, username: str, password: str, typename: str) -> dict: +def get_app_info( + page: ChromiumPage, username: str, password: str, typename: str +) -> dict: # Clear the cache to avoid the login failure page.clear_cache(cookies=True) if login(page, username, password): From 4909f0c5a58897a6f9daf2b2b1a7666e994edbea Mon Sep 17 00:00:00 2001 From: Stephen0124 <18788802003@163.com> Date: Fri, 29 Mar 2024 17:03:37 +0800 Subject: [PATCH 3/4] Create qimai_data.py --- requirements.txt | 1 + web_programming/qimai_data.py | 14 ++++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index bb3d671393b9..641c368879c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ tweepy # yulewalker # uncomment once audio_filters/equal_loudness_filter.py is fixed typing_extensions xgboost +DrissionPage ; python_version > '4.0.4.12' \ No newline at end of file diff --git a/web_programming/qimai_data.py b/web_programming/qimai_data.py index 2cb529acc0fb..ac7981462fa0 100644 --- a/web_programming/qimai_data.py +++ b/web_programming/qimai_data.py @@ -1,12 +1,10 @@ """ -Qimai Data (qimai.cn) is a mobile product intelligence and business analysis platform in China. +Qimai Data (qimai.cn) is a mobile product business analysis platform in China. Get a dict of the app information for a given typename (free, paid, grossing) from https://www.qimai.cn -DrissionPage is a simple encapsulation of the selenium package, which is continuously updated, and some of the code could be -not compatible with the latest version of DrissionPage. - p.s. Current version of DrissionPage : 4.0.4.17 +https://github.com/g1879/DrissionPage """ from DrissionPage import ChromiumPage @@ -21,7 +19,7 @@ def login(page: ChromiumPage, username: str, password: str) -> bool: page.ele(".submit").click() print("Login successfully") return True - except: + except KeyError: print("Login failed") page.close() return False @@ -30,7 +28,8 @@ def login(page: ChromiumPage, username: str, password: str) -> bool: return True -def get_app_info(page: ChromiumPage, username: str, password: str, typename: str) -> dict: +def get_app_info(page: ChromiumPage, + username: str, password: str, typename: str) -> dict: # Clear the cache to avoid the login failure page.clear_cache(cookies=True) if login(page, username, password): @@ -82,11 +81,10 @@ def get_app_info(page: ChromiumPage, username: str, password: str, typename: str } else: - dic = {"error": "Login failed"} + return {'error': 'Login failed'} return dic - if __name__ == "__main__": page = ChromiumPage() print(get_app_info(page, "YOUR USERNAME", "YOUR PASSWORD", "free")) From 33511fd0e31cd57de56ef1f99c504707b28cd2b4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 29 Mar 2024 09:06:54 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- requirements.txt | 2 +- web_programming/qimai_data.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 641c368879c2..11282f60cfa1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ beautifulsoup4 +DrissionPage ; python_version > '4.0.4.12' fake_useragent imageio keras ; python_version < '3.12' @@ -22,4 +23,3 @@ tweepy # yulewalker # uncomment once audio_filters/equal_loudness_filter.py is fixed typing_extensions xgboost -DrissionPage ; python_version > '4.0.4.12' \ No newline at end of file diff --git a/web_programming/qimai_data.py b/web_programming/qimai_data.py index a3e57e46b253..52606242146a 100644 --- a/web_programming/qimai_data.py +++ b/web_programming/qimai_data.py @@ -82,10 +82,11 @@ def get_app_info( } else: - return {'error': 'Login failed'} + return {"error": "Login failed"} return dic + if __name__ == "__main__": page = ChromiumPage() print(get_app_info(page, "YOUR USERNAME", "YOUR PASSWORD", "free"))