forked from jinxin0924/Crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstack_spider.py
More file actions
25 lines (19 loc) · 721 Bytes
/
stack_spider.py
File metadata and controls
25 lines (19 loc) · 721 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
__author__ = 'Xing'
from scrapy import Spider
from scrapy.selector import Selector
from items import StackItem
class StackSpider(Spider):
name = "stack"
allowed_domains = ["stackoverflow.com"]
start_urls = [
"http://stackoverflow.com/questions?pagesize=50&sort=newest",
]
def parse(self, response):
questions = Selector(response).xpath('//div[@class="summary"]/h3')
for question in questions:
item = StackItem()
item['title'] = question.xpath(
'a[@class="question-hyperlink"]/text()').extract()[0]
item['url'] = question.xpath(
'a[@class="question-hyperlink"]/@href').extract()[0]
yield item