-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfrenuscrape.py
54 lines (40 loc) · 1.52 KB
/
frenuscrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# -*- coding: utf-8 -*-
from lxml import html
import requests
import shutil
baseurl = "to fill"
num = 100
counter = 0
pages = []
for n in range(1, num):
pages.append("page-%s" % (n))
print(pages)
for topic in pages:
page = "%s/%s" % (baseurl, topic)
print(page)
url = page
page = requests.get(url)
tree = html.fromstring(page.content)
imgur = tree.xpath('//a/@href[starts-with(., "x/thread/")]')
for links in imgur:
url = links
page = requests.get(url)
tree = html.fromstring(page.content)
imgurr = tree.xpath('//a/@href[starts-with(., "https://imgur.com/") or starts-with(., "http://imgur.com/")]')
if len(imgurr) > 0:
for image in imgurr:
counter += 1
url = image
page = requests.get(url)
tree = html.fromstring(page.content)
imgurrr = tree.xpath('//a/@href[contains(., "i.imgur")]')
for idx, val in enumerate(imgurrr):
image = "https:%s" % (val)
response = requests.get(image, stream=True)
print(response)
with open('images/output_%s_%s.jpg' % (idx, counter), 'wb') as handle:
for block in response.iter_content(1024):
handle.write(block)
# with open("links.txt", "a") as myfile:
# myfile.write("Link: %s \n \n Imgur: %s \n \n \n \n \n" % (str(url), str(imgur)))
# print(x, imgur)