From b6e01323bb82fb22a6a6f9c6e52b6c8814442154 Mon Sep 17 00:00:00 2001 From: egrcc Date: Tue, 17 May 2016 11:59:05 +0800 Subject: [PATCH 1/8] _ --- ...02\347\232\204\345\233\236\347\255\224.md" | 19 ----------------- ...2\347\232\204\345\233\236\347\255\224.txt" | 21 ------------------- 2 files changed, 40 deletions(-) delete mode 100644 "markdown/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237--\351\202\223\345\262\202\347\232\204\345\233\236\347\255\224.md" delete mode 100644 "text/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237--\351\202\223\345\262\202\347\232\204\345\233\236\347\255\224.txt" diff --git "a/markdown/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237--\351\202\223\345\262\202\347\232\204\345\233\236\347\255\224.md" "b/markdown/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237--\351\202\223\345\262\202\347\232\204\345\233\236\347\255\224.md" deleted file mode 100644 index f058555..0000000 --- "a/markdown/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237--\351\202\223\345\262\202\347\232\204\345\233\236\347\255\224.md" +++ /dev/null @@ -1,19 +0,0 @@ -# 现实可以有多美好? -## 作者: 邓岂 赞同: 23 -最近高考失利啦,考的很差很差。高三辛苦弄到的加分全都没用上。 -心里想不开决定自杀。 -于是给已经有男朋友的她,发了一条长长的短信,告诉她我要去复读,请她以后不要再联系我了,把我忘了吧。其实现在的我也不再单身,有了一个女朋友。可我在决定去死的时 -候,选择的唯一一个告别的人却依然是她。 -那晚,我爬到了我家阳台上(24楼),准备一下子跳下去。 -可是她回了我的短信,大意是她明白我想说不仅仅是去复读,而是要做危险的事。她还说,她不舍得忘掉我。 -那一晚心情沉痛,关了手机,从家里骑了2个小时的自行车,到她家的楼下,看着她家的灯火一夜未息。直到天蒙蒙亮,我方才骑车回家。回家打开手机,112个未接电话,其 -中有98个来自她,另外来自我和她共同的朋友们。 -她们说,半夜收到她给她们发的短信,告诉她们她很担心我会做出很多出格的事情。 -我很偏执,但我看到、知道这一切的时候,我的心肺都融化了。 - -她拒绝过我。甚至我还亲自见证了她是怎样一步一步的爱上那个男生,并最终和她走到了一起。 -有人说,我是这世界上最傻最傻的备胎。可是我想告诉所有这样说的人,做这样一个备胎可是生命中最美好的事儿啊。 - -以上。 - -#### 原链接: http://www.zhihu.com/question/24269892/answer/27338490 \ No newline at end of file diff --git "a/text/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237--\351\202\223\345\262\202\347\232\204\345\233\236\347\255\224.txt" "b/text/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237--\351\202\223\345\262\202\347\232\204\345\233\236\347\255\224.txt" deleted file mode 100644 index b1e3921..0000000 --- "a/text/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237/\347\216\260\345\256\236\345\217\257\344\273\245\346\234\211\345\244\232\347\276\216\345\245\275\357\274\237--\351\202\223\345\262\202\347\232\204\345\233\236\347\255\224.txt" +++ /dev/null @@ -1,21 +0,0 @@ -现实可以有多美好? - -作者: 邓岂 赞同: 23 - -最近高考失利啦,考的很差很差。高三辛苦弄到的加分全都没用上。 -心里想不开决定自杀。 -于是给已经有男朋友的她,发了一条长长的短信,告诉她我要去复读,请她以后不要再联系我了,把我忘了吧。其实现在的我也不再单身,有了一个女朋友。可我在决定去死的时候,选择的唯一一个告别的人却依然是她。 -那晚,我爬到了我家阳台上(24楼),准备一下子跳下去。 -可是她回了我的短信,大意是她明白我想说不仅仅是去复读,而是要做危险的事。她还说,她不舍得忘掉我。 -那一晚心情沉痛,关了手机,从家里骑了2个小时的自行车,到她家的楼下,看着她家的灯火一夜未息。直到天蒙蒙亮,我方才骑车回家。回家打开手机,112个未接电话,其中有98个来自她,另外来自我和她共同的朋友们。 -她们说,半夜收到她给她们发的短信,告诉她们她很担心我会做出很多出格的事情。 -我很偏执,但我看到、知道这一切的时候,我的心肺都融化了。 - -她拒绝过我。甚至我还亲自见证了她是怎样一步一步的爱上那个男生,并最终和她走到了一起。 -有人说,我是这世界上最傻最傻的备胎。可是我想告诉所有这样说的人,做这样一个备胎可是生命中最美好的事儿啊。 - -以上。 - - - -原链接: http://www.zhihu.com/question/24269892/answer/27338490 \ No newline at end of file From 26f33cfe90f108b4cb2c11af217087649a451eea Mon Sep 17 00:00:00 2001 From: Enaunimes Date: Wed, 18 May 2016 00:04:41 +0800 Subject: [PATCH 2/8] =?UTF-8?q?=E7=AE=80=E5=8C=96=E5=B9=B3=E5=8F=B0?= =?UTF-8?q?=E5=88=A4=E6=96=AD=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- auth.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/auth.py b/auth.py index 2856c4e..ec4b4f0 100644 --- a/auth.py +++ b/auth.py @@ -82,15 +82,7 @@ def download_captcha(): elif platform.system() == "Darwin": Logging.info(u"Command: open %s &" % image_name ) os.system("open %s &" % image_name ) - elif platform.system() == "SunOS": - os.system("open %s &" % image_name ) - elif platform.system() == "FreeBSD": - os.system("open %s &" % image_name ) - elif platform.system() == "Unix": - os.system("open %s &" % image_name ) - elif platform.system() == "OpenBSD": - os.system("open %s &" % image_name ) - elif platform.system() == "NetBSD": + elif platform.system() in ("SunOS", "FreeBSD", "Unix", "OpenBSD", "NetBSD"): os.system("open %s &" % image_name ) elif platform.system() == "Windows": os.system("%s" % image_name ) From 9afc0b7981ac3b058904be70b9ea83f9266cc051 Mon Sep 17 00:00:00 2001 From: Enaunimes Date: Wed, 18 May 2016 00:19:04 +0800 Subject: [PATCH 3/8] =?UTF-8?q?=E4=BD=BF=E7=94=A8=20getpass.getpass=20?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E5=AF=86=E7=A0=81=EF=BC=8C=E9=81=BF=E5=85=8D?= =?UTF-8?q?=E8=BE=93=E5=85=A5=E6=97=B6=E5=9B=9E=E6=98=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://docs.python.org/2/library/getpass.html --- auth.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/auth.py b/auth.py index ec4b4f0..ed51290 100644 --- a/auth.py +++ b/auth.py @@ -4,6 +4,7 @@ # Build-in / Std import os, sys, time, platform, random import re, json, cookielib +from getpass import getpass # requirements import requests, termcolor @@ -207,8 +208,7 @@ def login(account=None, password=None): if account == None: sys.stdout.write(u"请输入登录账号: ") account = raw_input() - sys.stdout.write(u"请输入登录密码: ") - password = raw_input() + password = getpass("请输入登录密码: ") form_data = build_form(account, password) """ From 0bd093b9a9751a81dcc824ba0e5ee1e5340ff51e Mon Sep 17 00:00:00 2001 From: egrcc Date: Wed, 18 May 2016 01:11:34 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E7=99=BB=E5=BD=95=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- auth.py | 8 ++-- zhihu.py | 142 +++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 125 insertions(+), 25 deletions(-) diff --git a/auth.py b/auth.py index adf6eaa..2856c4e 100644 --- a/auth.py +++ b/auth.py @@ -67,7 +67,7 @@ def __init__(self, message): def download_captcha(): url = "https://www.zhihu.com/captcha.gif" - r = requests.get(url, params={"r": random.random(), "type": "login"} ) + r = requests.get(url, params={"r": random.random(), "type": "login"}, verify=False) if int(r.status_code) != 200: raise NetworkError(u"验证码请求失败") image_name = u"verify." + r.headers['content-type'].split("/")[1] @@ -103,7 +103,7 @@ def download_captcha(): def search_xsrf(): url = "http://www.zhihu.com/" - r = requests.get(url) + r = requests.get(url, verify=False) if int(r.status_code) != 200: raise NetworkError(u"验证码请求失败") results = re.compile(r"\ 0: @@ -886,7 +958,7 @@ def get_likes(self): 'start': latest_data_time, '_xsrf': _xsrf, } - r = requests.post(post_url, data=data, headers=header) + r = requests.post(post_url, data=data, headers=header, verify=False) response_size = r.json()["msg"][0] response_html = r.json()["msg"][1] return @@ -912,7 +984,14 @@ def __init__(self, answer_url, question=None, author=None, upvote=None, content= self.content = content def parser(self): - r = requests.get(self.answer_url) + headers = { + 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36", + 'Host': "www.zhihu.com", + 'Origin': "http://www.zhihu.com", + 'Pragma': "no-cache", + 'Referer': "http://www.zhihu.com/" + } + r = requests.get(self.answer_url, headers=headers, verify=False) soup = BeautifulSoup(r.content, "lxml") self.soup = soup @@ -1157,7 +1236,14 @@ def get_voters(self): # create_session() # s = session # r = s.get(request_url, params={"params": "{\"answer_id\":\"%d\"}" % int(data_aid)}) - r = requests.get(request_url, params={"params": "{\"answer_id\":\"%d\"}" % int(data_aid)}) + headers = { + 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36", + 'Host': "www.zhihu.com", + 'Origin': "http://www.zhihu.com", + 'Pragma': "no-cache", + 'Referer': "http://www.zhihu.com/" + } + r = requests.get(request_url, params={"params": "{\"answer_id\":\"%d\"}" % int(data_aid)}, headers=headers, verify=False) soup = BeautifulSoup(r.content, "lxml") voters_info = soup.find_all("span")[1:-1] if len(voters_info) == 0: @@ -1192,7 +1278,14 @@ def __init__(self, url, name=None, creator=None): if creator != None: self.creator = creator def parser(self): - r = requests.get(self.url) + headers = { + 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36", + 'Host': "www.zhihu.com", + 'Origin': "http://www.zhihu.com", + 'Pragma': "no-cache", + 'Referer': "http://www.zhihu.com/" + } + r = requests.get(self.url, headers=headers, verify=False) soup = BeautifulSoup(r.content, "lxml") self.soup = soup @@ -1257,7 +1350,14 @@ def get_all_answers(self): yield Answer(answer_url, question, author) i = 2 while True: - r = requests.get(self.url + "?page=" + str(i)) + headers = { + 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36", + 'Host': "www.zhihu.com", + 'Origin': "http://www.zhihu.com", + 'Pragma': "no-cache", + 'Referer': "http://www.zhihu.com/" + } + r = requests.get(self.url + "?page=" + str(i), headers=headers, verify=False) answer_soup = BeautifulSoup(r.content, "lxml") answer_list = answer_soup.find_all("div", class_="zm-item") if len(answer_list) == 0: From d029066af0508b4d3cf13d5fbaeeac35467fe07e Mon Sep 17 00:00:00 2001 From: Enaunimes Date: Wed, 18 May 2016 01:32:28 +0800 Subject: [PATCH 5/8] =?UTF-8?q?=E9=98=B2=E6=AD=A2=E6=84=8F=E5=A4=96?= =?UTF-8?q?=E6=B3=84=E6=BC=8Fcookie?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index db4561e..f26d885 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,6 @@ docs/_build/ # PyBuilder target/ + +# Prevent accidental cookie leak +cookie From 2b433646f0afa6f0f3cddd9bf337e8b7510771e8 Mon Sep 17 00:00:00 2001 From: egrcc Date: Wed, 18 May 2016 16:25:05 +0800 Subject: [PATCH 6/8] topics --- test.py | 4 ++++ zhihu.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/test.py b/test.py index c7a9ceb..9e06300 100755 --- a/test.py +++ b/test.py @@ -128,6 +128,7 @@ def user_test(user_url): followees = user.get_followees() # 获取关注该用户的人 followers = user.get_followers() + topics = user.get_topics() # 获取该用户提的问题 asks = user.get_asks() # 获取该用户回答的问题的答案 @@ -166,6 +167,9 @@ def user_test(user_url): if i == 41: break + for topic in topics: + print topic + print asks # # 代表该用户提的所有问题的生成器对象 diff --git a/zhihu.py b/zhihu.py index 7046f87..e00e939 100755 --- a/zhihu.py +++ b/zhihu.py @@ -654,6 +654,22 @@ def get_followers_num(self): .find_all("a")[1].strong.string) return followers_num + def get_topics_num(self): + if self.user_url == None: + print "I'm anonymous user." + return 0 + else: + if self.soup == None: + self.parser() + soup = self.soup + topics_num = soup.find_all("div", class_="zm-profile-side-section-title")[1].strong.string.encode("utf-8") + I='' + for i in topics_num: + if i.isdigit(): + I=I+i + topics_num=int(I) + return topics_num + def get_agree_num(self): if self.user_url == None: print "I'm anonymous user." @@ -812,6 +828,55 @@ def get_followers(self): user_link = follower_soup.find("h2", class_="zm-list-content-title").a yield User(user_link["href"], user_link.string.encode("utf-8")) + def get_topics(self): + if self.user_url == None: + print "I'm anonymous user." + return + yield + else: + topics_num = self.get_topics_num() + # print topics_num + if topics_num == 0: + return + yield + else: + topics_url = self.user_url + "/topics" + headers = { + 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36", + 'Host': "www.zhihu.com", + 'Origin': "http://www.zhihu.com", + 'Pragma': "no-cache", + 'Referer': "http://www.zhihu.com/" + } + r = requests.get(topics_url, headers=headers, verify=False) + soup = BeautifulSoup(r.content, "lxml") + for i in xrange((topics_num - 1) / 20 + 1): + if i == 0: + topic_list = soup.find_all("div", class_="zm-profile-section-item zg-clear") + for j in xrange(min(topics_num, 20)): + yield topic_list[j].find("strong").string.encode("utf-8") + else: + post_url = topics_url + _xsrf = soup.find("input", attrs={'name': '_xsrf'})["value"] + offset = i * 20 + data = { + '_xsrf': _xsrf, + 'offset': offset, + 'start': 0 + } + header = { + 'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0", + 'Host': "www.zhihu.com", + 'Referer': topics_url + } + r_post = requests.post(post_url, data=data, headers=header, verify=False) + + topic_data = r_post.json()["msg"][1] + topic_soup = BeautifulSoup(topic_data, "lxml") + topic_list = topic_soup.find_all("div", class_="zm-profile-section-item zg-clear") + for j in xrange(min(topics_num - i * 20, 20)): + yield topic_list[j].find("strong").string.encode("utf-8") + def get_asks(self): """ By ecsys (https://github.com/ecsys) From 49bed784d99e6fe489ea0630601168f0b47eebbc Mon Sep 17 00:00:00 2001 From: egrcc Date: Wed, 18 May 2016 16:45:27 +0800 Subject: [PATCH 7/8] topic-num --- zhihu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zhihu.py b/zhihu.py index e00e939..68af702 100755 --- a/zhihu.py +++ b/zhihu.py @@ -662,7 +662,7 @@ def get_topics_num(self): if self.soup == None: self.parser() soup = self.soup - topics_num = soup.find_all("div", class_="zm-profile-side-section-title")[1].strong.string.encode("utf-8") + topics_num = soup.find_all("div", class_="zm-profile-side-section-title")[-1].strong.string.encode("utf-8") I='' for i in topics_num: if i.isdigit(): From 1e24d4dfa960eacddb566f00269eb3d1878a4e00 Mon Sep 17 00:00:00 2001 From: egrcc Date: Mon, 18 Jul 2016 19:32:34 +0800 Subject: [PATCH 8/8] deprecated --- README.rst | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/README.rst b/README.rst index e4fb77f..9b5ad28 100644 --- a/README.rst +++ b/README.rst @@ -1,10 +1,7 @@ zhihu-python:获取知乎信息 =============================== -:Author: `egrcc `_ ( `微博 `_ | `电邮 `_ ) -:Contributors: 参见 `Contributors `_ -:Update: 04/23 2016 - +**注意: 本项目不再维护更新!** .. contents:: @@ -875,11 +872,4 @@ zhihu.Post ---- 知乎专栏文章操作类 **Returns**:一个 Column 的实例对象 - -联系我 ----------- - -- 微博:http://weibo.com/u/2948739432 -- github:https://github.com/egrcc -- email:zhaolujun1994@gmail.com