Skip to content

Commit 910a06d

Browse files
committed
修复有依赖爬虫时,依赖爬虫不结束,新批次开启不了的bug
1 parent 406b542 commit 910a06d

1 file changed

Lines changed: 31 additions & 33 deletions

File tree

feapder/core/spiders/batch_spider.py

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -616,14 +616,14 @@ def check_batch(self, is_first_check=False):
616616
@result: 完成返回True 否则False
617617
"""
618618

619-
sql = 'select date_format(batch_date, "{date_format}"), total_count, done_count from {batch_record_table} order by id desc limit 1'.format(
619+
sql = 'select date_format(batch_date, "{date_format}"), total_count, done_count, is_done from {batch_record_table} order by id desc limit 1'.format(
620620
date_format=self._date_format.replace(":%M", ":%i"),
621621
batch_record_table=self._batch_record_table,
622622
)
623-
batch_info = self._mysqldb.find(sql) # (('2018-08-19', 49686, 0),)
623+
batch_info = self._mysqldb.find(sql) # (('批次时间', 总量, 完成量, 批次是否完成),)
624624

625625
if batch_info:
626-
batch_date, total_count, done_count = batch_info[0]
626+
batch_date, total_count, done_count, is_done = batch_info[0]
627627

628628
now_date = datetime.datetime.now()
629629
last_batch_date = datetime.datetime.strptime(batch_date, self._date_format)
@@ -639,39 +639,37 @@ def check_batch(self, is_first_check=False):
639639
done_count = task_count.get("done_count")
640640

641641
if total_count == done_count:
642-
# 检查相关联的爬虫是否完成
643-
releated_spider_is_done = self.related_spider_is_done()
644-
if releated_spider_is_done == False:
645-
msg = "《{}》本批次未完成, 正在等待依赖爬虫 {} 结束. 批次时间 {} 批次进度 {}/{}".format(
646-
self._batch_name,
647-
self._related_batch_record or self._related_task_tables,
648-
batch_date,
649-
done_count,
650-
total_count,
651-
)
652-
log.info(msg)
653-
# 检查是否超时 超时发出报警
654-
if time_difference >= datetime.timedelta(
655-
days=self._batch_interval
656-
): # 已经超时
657-
self.send_msg(
658-
msg,
659-
level="error",
660-
message_prefix="《{}》本批次未完成, 正在等待依赖爬虫 {} 结束".format(
661-
self._batch_name,
662-
self._related_batch_record or self._related_task_tables,
663-
),
642+
if not is_done:
643+
# 检查相关联的爬虫是否完成
644+
related_spider_is_done = self.related_spider_is_done()
645+
if related_spider_is_done is False:
646+
msg = "《{}》本批次未完成, 正在等待依赖爬虫 {} 结束. 批次时间 {} 批次进度 {}/{}".format(
647+
self._batch_name,
648+
self._related_batch_record or self._related_task_tables,
649+
batch_date,
650+
done_count,
651+
total_count,
664652
)
665-
self._batch_timeout = True
666-
667-
return False
653+
log.info(msg)
654+
# 检查是否超时 超时发出报警
655+
if time_difference >= datetime.timedelta(
656+
days=self._batch_interval
657+
): # 已经超时
658+
self.send_msg(
659+
msg,
660+
level="error",
661+
message_prefix="《{}》本批次未完成, 正在等待依赖爬虫 {} 结束".format(
662+
self._batch_name,
663+
self._related_batch_record
664+
or self._related_task_tables,
665+
),
666+
)
667+
self._batch_timeout = True
668668

669-
elif releated_spider_is_done == True:
670-
# 更新is_done 状态
671-
self.update_is_done()
669+
return False
672670

673-
else:
674-
self.update_is_done()
671+
else:
672+
self.update_is_done()
675673

676674
msg = "《{}》本批次完成 批次时间 {} 共处理 {} 条任务".format(
677675
self._batch_name, batch_date, done_count

0 commit comments

Comments
 (0)