From 8eec5c5b76108430bae36f0b408eb4109b13a32e Mon Sep 17 00:00:00 2001 From: shimeng Date: Fri, 22 Sep 2017 14:02:54 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B7=A5=E4=BD=9C=E5=87=BD=E6=95=B0=EF=BC=8C?= =?UTF-8?q?=E8=84=9A=E6=9C=AC=E4=BB=8E=E8=BF=99=E9=87=8C=E5=90=AF=E5=8A=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- work_spider.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 work_spider.py diff --git a/work_spider.py b/work_spider.py new file mode 100644 index 0000000..a963a33 --- /dev/null +++ b/work_spider.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Created by shimeng on 17-9-21 +import sys + +# 这里写你自己的地址 +sys.path.append('/home/shimeng/code/spider_framework_github_responsity') + +from spider.tools import format_put_data +from spider.data_save import pipeline +from spider.html_parser import parser +from spider.page_downloader import aispider +from spider.threads import start, work_queue, save_queue +from spider.log_format import logger +from proxy_basic_config import url_parse_dict +from _request import valid + +from get_proxies_base_spider import SpiderMain + + +class WorkSpider(SpiderMain): + def __init__(self): + super(WorkSpider, self).__init__() + + # 重写run方法, + # 若请求的函数为自定义, 则可以在crawl函数中设置: request=your_request_function, 默认为框架中的request + def run(self): + start() + self.craw() + + +if __name__ == '__main__': + work_spider = WorkSpider() + + work_spider.run() + + # Blocking + work_queue.join() + save_queue.join() + + # Done + logger.info('All Job Finishing, Please Check!')