Skip to content

Commit daa5e3f

Browse files
committed
Create qimai_data.py
Auto crawl app information of Qimai Data
1 parent b5cb1fb commit daa5e3f

File tree

1 file changed

+93
-0
lines changed

1 file changed

+93
-0
lines changed

web_programming/qimai_data.py

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""
2+
Qimai Data (qimai.cn) is a mobile product intelligence and business analysis platform in China.
3+
4+
Get a dict of the app information for a given typename (free, paid, grossing) from https://www.qimai.cn
5+
6+
DrissionPage is a simple encapsulation of the selenium package, which is continuously updated, and some of the code could be
7+
not compatible with the latest version of DrissionPage.
8+
9+
p.s. Current version of DrissionPage : 4.0.4.17
10+
"""
11+
12+
from DrissionPage import ChromiumPage
13+
14+
15+
def login(page: ChromiumPage, username: str, password: str) -> bool:
16+
page.get("https://www.qimai.cn/account/signin/r/%2F")
17+
if page.url != "https://www.qimai.cn/":
18+
try:
19+
page.ele("@name=username").input(username)
20+
page.ele("@name=password").input(password)
21+
page.ele(".submit").click()
22+
print("Login successfully")
23+
return True
24+
except:
25+
print("Login failed")
26+
page.close()
27+
return False
28+
else:
29+
print("Already login")
30+
return True
31+
32+
33+
def get_app_info(page: ChromiumPage, username: str, password: str, typename: str) -> dict:
34+
# Clear the cache to avoid the login failure
35+
page.clear_cache(cookies=True)
36+
if login(page, username, password):
37+
# Get the data of default page
38+
page.listen.start(f"brand={typename}&device=iphone&country=cn&genre=5000")
39+
page.get(
40+
f"https://www.qimai.cn/rank/index/brand/{typename}/device/iphone/country/cn/genre/5000"
41+
)
42+
43+
# Get the response data
44+
res = page.listen.wait().response.body
45+
46+
dic = {}
47+
for i in range(len(res["rankInfo"])):
48+
font = res["rankInfo"][i]["appInfo"]
49+
dic[font["appId"]] = {
50+
"appId": font["appId"],
51+
"appName": font["appName"],
52+
"country": font["country"],
53+
"file_size": font["file_size"],
54+
"icon_path": font["icon"],
55+
"price": font["price"],
56+
"publisher": font["publisher"],
57+
"subtitle": font["subtitle"],
58+
}
59+
60+
# Get the data of other pages
61+
if res["maxPage"] > 1:
62+
for page_num in range(1, res["maxPage"]):
63+
# Target the page number
64+
page.listen.start(f"page={page_num + 1}")
65+
print(f"Getting page {page_num + 1}")
66+
# Chrome will scroll to the bottom of the page to load more data
67+
page.scroll.to_bottom()
68+
page.wait.load_start()
69+
70+
res = page.listen.wait().response.body
71+
for i in range(len(res["rankInfo"])):
72+
font = res["rankInfo"][i]["appInfo"]
73+
dic[font["appId"]] = {
74+
"appId": font["appId"],
75+
"appName": font["appName"],
76+
"country": font["country"],
77+
"file_size": font["file_size"],
78+
"icon_path": font["icon"],
79+
"price": font["price"],
80+
"publisher": font["publisher"],
81+
"subtitle": font["subtitle"],
82+
}
83+
84+
else:
85+
dic = {"error": "Login failed"}
86+
87+
return dic
88+
89+
90+
if __name__ == "__main__":
91+
page = ChromiumPage()
92+
print(get_app_info(page, "YOUR USERNAME", "YOUR PASSWORD", "free"))
93+
page.close()

0 commit comments

Comments
 (0)