forked from Boris-code/feapder
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_spider.py
More file actions
99 lines (79 loc) · 3.16 KB
/
create_spider.py
File metadata and controls
99 lines (79 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# -*- coding: utf-8 -*-
"""
Created on 2018-08-28 17:38:43
---------
@summary: 创建spider
---------
@author: Boris
@email: boris_liu@foxmail.com
"""
import getpass
import os
import re
import feapder.utils.tools as tools
from .create_init import CreateInit
def deal_file_info(file):
file = file.replace("{DATE}", tools.get_current_date())
file = file.replace("{USER}", getpass.getuser())
return file
class CreateSpider:
def __init__(self):
self._create_init = CreateInit()
def cover_to_underline(self, key):
regex = "[A-Z]*"
capitals = re.findall(regex, key)
if capitals:
for pos, capital in enumerate(capitals):
if not capital:
continue
if pos == 0:
if len(capital) > 1:
key = key.replace(capital, capital.lower() + "_", 1)
else:
key = key.replace(capital, capital.lower(), 1)
else:
if len(capital) > 1:
key = key.replace(capital, "_" + capital.lower() + "_", 1)
else:
key = key.replace(capital, "_" + capital.lower(), 1)
return key
def get_spider_template(self, spider_type):
if spider_type == 1:
template_path = "air_spider_template.tmpl"
elif spider_type == 2:
template_path = "spider_template.tmpl"
elif spider_type == 3:
template_path = "batch_spider_template.tmpl"
else:
raise ValueError("spider type error, support 1 2 3")
template_path = os.path.abspath(
os.path.join(__file__, "../../../templates", template_path)
)
with open(template_path, "r", encoding="utf-8") as file:
spider_template = file.read()
return spider_template
def create_spider(self, spider_template, spider_name):
spider_template = spider_template.replace("${spider_name}", spider_name)
spider_template = deal_file_info(spider_template)
return spider_template
def save_spider_to_file(self, spider, spider_name):
spider_underline = self.cover_to_underline(spider_name)
spider_file = spider_underline + ".py"
if os.path.exists(spider_file):
confirm = input("%s 文件已存在 是否覆盖 (y/n). " % spider_file)
if confirm != "y":
print("取消覆盖 退出")
return
with open(spider_file, "w", encoding="utf-8") as file:
file.write(spider)
print("\n%s 生成成功" % spider_name)
self._create_init.create()
def create(self, spider_name, spider_type):
# 检查spider_name
if not re.search("^[a-zA-Z][a-zA-Z0-9_]*$", spider_name):
raise Exception("爬虫名不符合命名规范,请用下划线命名或驼峰命名方式")
if spider_name.islower():
spider_name = tools.key2hump(spider_name)
spider_template = self.get_spider_template(spider_type)
spider = self.create_spider(spider_template, spider_name)
self.save_spider_to_file(spider, spider_name)