Skip to content

Commit 4bed1de

Browse files
committed
udpate
1 parent 2fb7ce5 commit 4bed1de

15 files changed

Lines changed: 41560 additions & 30220 deletions

.DS_Store

0 Bytes
Binary file not shown.

juejin/a_new_hope.png

282 KB
Loading

juejin/anay.py

Lines changed: 0 additions & 33 deletions
This file was deleted.

juejin/crawler.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# encoding: utf-8
2+
import csv
3+
4+
import requests
5+
6+
__author__ = 'liuzhijun'
7+
8+
# 头字段信息可以登录gold.xitu.io后查看
9+
10+
headers = {"X-LC-Id": "xxxx",
11+
"X-LC-Session": "xxxxxx",
12+
"X-LC-Sign": "xxx",
13+
"X-LC-UA": "xxxxx"}
14+
15+
16+
def fetch_tags():
17+
# 获取所有标签
18+
url = "https://api.leancloud.cn/1.1/classes/Tag?where=%7B%7D&limit=1000"
19+
response = requests.get(url, headers=headers)
20+
print(response.content)
21+
22+
with open('tags.csv', 'w') as csv_file:
23+
# colle:收藏,sub:关注,entries:文章,view:访问
24+
fieldnames = ['title', 'collectionsCount', 'subscribersCount', 'entriesCount', "viewsCount"]
25+
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
26+
writer.writeheader()
27+
for tag in response.json().get("results"):
28+
writer.writerow({'title': tag.get("title"),
29+
'collectionsCount': tag.get("collectionsCount"),
30+
"subscribersCount": tag.get("subscribersCount"),
31+
"entriesCount": tag.get("entriesCount"),
32+
"viewsCount": tag.get("viewsCount")})
33+
34+
35+
def fetch_entites():
36+
# 获取所有文章
37+
max_page = 29820
38+
with open("entities.csv", 'w') as csv_file:
39+
for i in range(0, max_page, 20):
40+
url = "https://api.leancloud.cn/1.1/classes/Entry?where={}&limit=20&order=hotIndex&skip=%s"
41+
url = url % i
42+
response = requests.get(url, headers=headers)
43+
fieldnames = ("title", "viewsCount", "collectionCount", "commentsCount")
44+
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
45+
writer.writeheader()
46+
for tag in response.json().get("results"):
47+
writer.writerow({'title': tag.get("title"),
48+
'collectionCount': tag.get("collectionCount"),
49+
"commentsCount": tag.get("commentsCount"),
50+
"viewsCount": tag.get("viewsCount")})
51+
52+
53+
if __name__ == '__main__':
54+
# fetch_tags()
55+
fetch_entites()

juejin/entities.csv

Lines changed: 10814 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)