-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch_dp_nb_shops.js
67 lines (59 loc) · 2.26 KB
/
fetch_dp_nb_shops.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
// Generated by CoffeeScript 1.3.3
(function() {
var $, jsdom, nb_search_url, page_size, parse_db_search_result, request;
jsdom = require("jsdom");
$ = require("jQuery");
request = require("request");
nb_search_url = "http://www.dianping.com/search/keyword/11/0_";
page_size = 15;
exports.fetch_shops = function(title, callback) {
var shops;
if (title === "") {
callback("lack of arg: 'title'!");
return;
}
shops = [];
return request({
url: nb_search_url + encodeURIComponent(title.trim()),
headers: {
"User-Agent": "Mozilla/5.0 (X11; U; Linux i686; zh-CN; rv:1.9.1.2) Gecko/20090803 Fedora/3.5.2-2.fc11 Firefox/3.5.2"
}
}, function(error, response, body) {
var jq_doc, searchList, shop, _i, _len;
if (!error && response.statusCode === 200) {
console.log("search title:" + title);
jq_doc = $(body);
searchList = $(jq_doc.find("#searchList dl")[0]).children("dd");
if (!searchList || searchList.length === 0) {
callback("page scraping error: no shop list elements!");
return;
}
for (_i = 0, _len = searchList.length; _i < _len; _i++) {
shop = searchList[_i];
shop = parse_db_search_result(shop);
shops.push(shop);
}
return callback(null, shops);
} else {
return callback(error);
}
});
};
parse_db_search_result = function(shop) {
var address_row, jq_shop, rate_class, rate_span, result, shopid_text;
result = {};
jq_shop = $(shop);
rate_span = $(jq_shop.find(".remark")[0]).children("li").children("span");
rate_class = rate_span.attr('class');
result.rate = parseFloat(rate_class.substr(rate_class.indexOf('irr-star') + 8)) / 10;
result.average = parseInt($(jq_shop.find(".average")[0]).text().substr(1));
address_row = $(jq_shop.find(".address")[0]).text().substr(3).split(" ");
result.address = address_row[0];
result.phone = address_row[1];
result.shop_name = jq_shop.find(".shopname a").attr("title");
result.title = jq_shop.find(".shopname a").text();
shopid_text = jq_shop.find(".shopname a").attr("href");
result.fetch_id = shopid_text.substr(shopid_text.indexOf("shop/") + 5);
return result;
};
}).call(this);