forked from lzjun567/python_scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcrawler.py
More file actions
55 lines (44 loc) · 2.05 KB
/
crawler.py
File metadata and controls
55 lines (44 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# encoding: utf-8
import csv
import requests
__author__ = 'liuzhijun'
# 头字段信息可以登录gold.xitu.io后查看
headers = {"X-LC-Id": "xxxx",
"X-LC-Session": "xxxxxx",
"X-LC-Sign": "xxx",
"X-LC-UA": "xxxxx"}
def fetch_tags():
# 获取所有标签
url = "https://api.leancloud.cn/1.1/classes/Tag?where=%7B%7D&limit=1000"
response = requests.get(url, headers=headers)
print(response.content)
with open('tags.csv', 'w') as csv_file:
# colle:收藏,sub:关注,entries:文章,view:访问
fieldnames = ['title', 'collectionsCount', 'subscribersCount', 'entriesCount', "viewsCount"]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for tag in response.json().get("results"):
writer.writerow({'title': tag.get("title"),
'collectionsCount': tag.get("collectionsCount"),
"subscribersCount": tag.get("subscribersCount"),
"entriesCount": tag.get("entriesCount"),
"viewsCount": tag.get("viewsCount")})
def fetch_entites():
# 获取所有文章
max_page = 29820
with open("entities.csv", 'w') as csv_file:
for i in range(0, max_page, 20):
url = "https://api.leancloud.cn/1.1/classes/Entry?where={}&limit=20&order=hotIndex&skip=%s"
url = url % i
response = requests.get(url, headers=headers)
fieldnames = ("title", "viewsCount", "collectionCount", "commentsCount")
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for tag in response.json().get("results"):
writer.writerow({'title': tag.get("title"),
'collectionCount': tag.get("collectionCount"),
"commentsCount": tag.get("commentsCount"),
"viewsCount": tag.get("viewsCount")})
if __name__ == '__main__':
# fetch_tags()
fetch_entites()