BaseParser为Spider的基类,用来定义任务下发与数据解析,是面向用户提供的接口
class BaseParser(object):
def start_requests(self):
"""
@summary: 添加初始url
---------
---------
@result: yield Request()
"""
pass
def parse(self, request, response):
"""
@summary: 默认的回调函数
---------
@param request:
@param response:
---------
@result:
"""
pass
def download_midware(self, request):
"""
@summary: 下载中间件 可修改请求的一些参数
---------
@param request:
---------
@result: return request / None (不会修改原来的request)
"""
pass
def exception_request(self, request, response):
"""
@summary: 请求或者parser里解析出异常的request
---------
@param request:
@param response:
---------
@result: request / callback / None (返回值必须可迭代)
"""
pass
def failed_request(self, request, response):
"""
@summary: 超过最大重试次数的request
可返回修改后的request 若不返回request,则将传进来的request直接人redis的failed表。否则将修改后的request入failed表
---------
@param request:
---------
@result: request / item / callback / None (返回值必须可迭代)
"""
pass
def start_callback(self):
"""
@summary: 程序开始的回调
---------
---------
@result: None
"""
pass
def end_callback(self):
"""
@summary: 程序结束的回调
---------
---------
@result: None
"""
pass
@property
def name(self):
return self.__class__.__name__
def close(self):
pass以程序开始结束回调举例:
import feapder
class TestSpider(feapder.Spider):
def start_callback(self):
print("爬虫开始了")
def end_callback(self):
print("爬虫结束了")