-
Notifications
You must be signed in to change notification settings - Fork 167
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
shimeng
committed
Sep 22, 2017
1 parent
e584675
commit 8eec5c5
Showing
1 changed file
with
42 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# Created by shimeng on 17-9-21 | ||
import sys | ||
|
||
# 这里写你自己的地址 | ||
sys.path.append('/home/shimeng/code/spider_framework_github_responsity') | ||
|
||
from spider.tools import format_put_data | ||
from spider.data_save import pipeline | ||
from spider.html_parser import parser | ||
from spider.page_downloader import aispider | ||
from spider.threads import start, work_queue, save_queue | ||
from spider.log_format import logger | ||
from proxy_basic_config import url_parse_dict | ||
from _request import valid | ||
|
||
from get_proxies_base_spider import SpiderMain | ||
|
||
|
||
class WorkSpider(SpiderMain): | ||
def __init__(self): | ||
super(WorkSpider, self).__init__() | ||
|
||
# 重写run方法, | ||
# 若请求的函数为自定义, 则可以在crawl函数中设置: request=your_request_function, 默认为框架中的request | ||
def run(self): | ||
start() | ||
self.craw() | ||
|
||
|
||
if __name__ == '__main__': | ||
work_spider = WorkSpider() | ||
|
||
work_spider.run() | ||
|
||
# Blocking | ||
work_queue.join() | ||
save_queue.join() | ||
|
||
# Done | ||
logger.info('All Job Finishing, Please Check!') |