Skip to content

Commit

Permalink
preps for 1.3.2
Browse files Browse the repository at this point in the history
  - fixes over MR #34
  - get_json method for SmartDL
  - better logging syntax
  - added tests
  - updated user agents
  - change test files host from hetzner.de to ovh
  • Loading branch information
iTaybb committed Jul 26, 2019
1 parent 925fc53 commit 4e71c18
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 40 deletions.
4 changes: 3 additions & 1 deletion ChangeLog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ Below you find a list with the added features, changes and fixes for
each version.

======================
(Version 1.3.2 beta; ??/04/19)
(Version 1.3.2 beta; 26/07/19)
- NEW: Added ability to parse json directly from the response.
- NEW: Allow passing of custom arguments to urllib Request object (Pull request #34)
- IMPROVE: Issue #29 - added timeout parameter to the constructor of SmartDL
- IMPROVE: Better tests and logging.
- FIX: Bug fixes.
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
Python Smart Download Manager -- pySmartDL
==========================================

``pySmartDL`` strives to be a full-pledged smart download manager for Python. Main features:
``pySmartDL`` strives to be a full-fledged smart download manager for Python. Main features:

* Built-in download acceleration (with the `multipart downloading technique <http://stackoverflow.com/questions/93642/how-do-download-accelerators-work>`_).
* Mirrors support.
* Pause/Unpause feature.
* Speed limiting feature.
* Hash checking.
* Non-blocking, shows progress bar, download speed and eta.
* Full support for custom headers and methods.
* Python 3 Support

Project Links
Expand Down
13 changes: 7 additions & 6 deletions docs/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,12 @@ Example 4: Passing custom options to urllib.request.Request()

from pySmartDL import SmartDL

request_args = {"headers": {"User-Agent": "smyPartDL/86.75.309"}}
download_obj = SmartDL("http://httpbin.org/get", "./headerinfo.json", request_args=request_args)
download_obj.start()
request_args = {"headers": {"User-Agent": "pySmartDL/1.3.2"}}
obj = SmartDL("http://httpbin.org/headers", request_args=request_args, progress_bar=False)
obj.start()

# headerinfo.json should contain the custom headers
data = obj.get_json()
print(data)


==================================================================
Expand All @@ -71,7 +72,7 @@ Example 5: Fetch data to memory instead of reading it from a file
obj = SmartDL(url, progress_bar=False)
obj.start()

data = obj.get_data() # HTML tags!
data = obj.get_data() # HTML tags!
# Do something with data
Expand All @@ -83,7 +84,7 @@ Example 6: Use the nonblocking flag and get information during the download proc
import time
from pySmartDL import SmartDL
url_100mb_file = ['https://speed.hetzner.de/100MB.bin']
url_100mb_file = ['http://www.ovh.net/files/100Mio.dat']
obj = SmartDL(url_100mb_file, progress_bar=False)
obj.start(blocking=False)

Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Python Smart Download Manager -- pySmartDL
* Speed limiting feature.
* Hash checking.
* Non-blocking, shows progress bar, download speed and eta.
* Full support for custom headers and methods
* Full support for custom headers and methods.
* Python 3 Support

=============
Expand Down
4 changes: 2 additions & 2 deletions pySmartDL/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def download(url, dest, requestArgs=None, startByte=0, endByte=None, timeout=4,
logger = logger or utils.DummyLogger()
req = urllib.request.Request(url, **requestArgs)
if endByte:
req.add_header('range', 'bytes=%d-%d' % (startByte, endByte))
req.add_header('Range', 'bytes={:.0f}-{:.0f}'.format(startByte, endByte))
logger.info("Downloading '{}' to '{}'...".format(url, dest))
try:
urlObj = urllib.request.urlopen(req, timeout=timeout)
Expand All @@ -26,7 +26,7 @@ def download(url, dest, requestArgs=None, startByte=0, endByte=None, timeout=4,
if retries > 0:
logger.warning("Thread didn't got the file it was expecting. Retrying ({} times left)...".format(retries-1))
time.sleep(5)
return download(url, dest, req, startByte, endByte, timeout, shared_var, thread_shared_cmds, logger, retries-1)
return download(url, dest, requestArgs, startByte, endByte, timeout, shared_var, thread_shared_cmds, logger, retries-1)
else:
raise
else:
Expand Down
16 changes: 14 additions & 2 deletions pySmartDL/pySmartDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from io import StringIO
import multiprocessing.dummy as multiprocessing
from ctypes import c_int
import json

from . import utils
from .control_thread import ControlThread
Expand Down Expand Up @@ -64,8 +65,8 @@ class SmartDL:
:type logger: `logging.Logger` instance
:param connect_default_logger: If true, connects a default logger to the class.
:type connect_default_logger: bool
:param requestArgs: Arguments to be passed to a new urllib.request.Request instance in dictionary form. See https://docs.python.org/3/library/urllib.request.html#urllib.request.Request for options.
:type requestArgs: dict
:param request_args: Arguments to be passed to a new urllib.request.Request instance in dictionary form. See `urllib.request docs <https://docs.python.org/3/library/urllib.request.html#urllib.request.Request>`_ for options.
:type request_args: dict
:rtype: `SmartDL` instance
.. NOTE::
Expand Down Expand Up @@ -608,6 +609,17 @@ def get_data_hash(self, algorithm):
'''
return hashlib.new(algorithm, self.get_data(binary=True)).hexdigest()

def get_json(self):
'''
Returns the JSON in the downloaded data. Will raise `RuntimeError` if it's
called when the download task is not finished yet. Will raise `json.decoder.JSONDecodeError`
if the downloaded data is not valid JSON.
:rtype: dict
'''
data = self.get_data()
return json.loads(data)

def post_threadpool_actions(pool, args, expected_filesize, SmartDLObj):
"Run function after thread pool is done. Run this in a thread."
while not pool.done():
Expand Down
44 changes: 22 additions & 22 deletions pySmartDL/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,32 +130,32 @@ def get_filesize(url, timeout=15):
def get_random_useragent():
'''
Returns a random popular user-agent.
Taken from `here <http://techblog.willshouse.com/2012/01/03/most-common-user-agents/>`_, last updated on 2019/02/17.
Taken from `here <http://techblog.willshouse.com/2012/01/03/most-common-user-agents/>`_, last updated on 2019/07/26.
:returns: user-agent
:rtype: string
'''
l = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:65.0) Gecko/20100101 Firefox/65.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0"
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:67.0) Gecko/20100101 Firefox/67.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0',
'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:68.0) Gecko/20100101 Firefox/68.0',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0',
]
return random.choice(l)

Expand Down Expand Up @@ -251,7 +251,7 @@ def create_debugging_logger():
t_log.setLevel(logging.DEBUG)
console = logging.StreamHandler()
console.setLevel(logging.DEBUG)
console.setFormatter(logging.Formatter('[%(levelname)s||%(thread)d] %(message)s'))
console.setFormatter(logging.Formatter('[%(levelname)s||%(thread)d@{%(pathname)s:%(lineno)d}] %(message)s'))
t_log.addHandler(console)
DEFAULT_LOGGER_CREATED = True

Expand Down
37 changes: 32 additions & 5 deletions test/test_pySmartDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def setUp(self):
"http://www.mirrorservice.org/sites/downloads.sourceforge.net/s/se/sevenzip/7-Zip/9.20/7za920.zip"
]
self.res_7za920_hash = '2a3afe19c180f8373fa02ff00254d5394fec0349f5804e0ad2f6067854ff28ac'
self.res_testfile_1gb = 'https://speed.hetzner.de/1GB.bin'
self.res_testfile_100mb = 'https://speed.hetzner.de/100MB.bin'
self.res_testfile_1gb = 'http://www.ovh.net/files/1Gio.dat'
self.res_testfile_100mb = 'http://www.ovh.net/files/100Mio.dat'
self.enable_logging = "-vvv" in sys.argv

def test_dependencies(self):
Expand All @@ -40,10 +40,15 @@ def test_dependencies(self):
def test_download(self):
obj = pySmartDL.SmartDL(self.res_7za920_mirrors, dest=self.dl_dir, progress_bar=False, connect_default_logger=self.enable_logging)
obj.start()
self.assertEqual(obj.get_progress_bar(), '[##################]')

data = obj.get_data(binary=True, bytes=2)

self.assertEqual(data, b'PK')

# attempt to start a completed task
with self.assertRaises(RuntimeError) as ctx:
obj.start()

def test_mirrors(self):
urls = ["http://totally_fake_website/7za.zip", "https://github.com/iTaybb/pySmartDL/raw/master/test/7za920.zip"]
Expand Down Expand Up @@ -82,7 +87,7 @@ def test_pause_unpause(self, testfile=None):
# too bad, the file was too small and was downloaded complectely until we stopped it.
# We should download a bigger file
if self.res_testfile_100mb == testfile:
self.fail("The download got completed before we could stop it, even though we've used a big file. Are we on a 100GB/s connection to the Internet or someting?")
self.fail("The download got completed before we could stop it, even though we've used a big file. Are we on a 100GB/s internet connection or somethin'?")
return self.test_pause_unpause(testfile=self.res_testfile_100mb)

dl_size = obj.get_dl_size()
Expand Down Expand Up @@ -117,11 +122,10 @@ def test_speed_limiting(self):

while not obj.get_dl_size():
time.sleep(0.1)

time.sleep(30)

expected_dl_size = 30 * 1024**2
allowed_delta = 0.6 # because we took only 30sec, the delta needs to be quite big, it we were to test 60sec the delta would probably be much smaller
allowed_delta = 0.6 # because we took only 30sec, the delta needs to be quite big, it we were to test 60sec the delta would probably be much smaller
diff = math.fabs(expected_dl_size - obj.get_dl_size()) / expected_dl_size

obj.stop()
Expand Down Expand Up @@ -149,6 +153,29 @@ def test_timeout(self):
obj.start(blocking=False)
obj.wait()
self.assertTrue(obj.isSuccessful())

def test_custom_headers(self):
# sending custom user agent
ua = "pySmartDL/1.3.2"
request_args = {"headers": {"User-Agent": ua}}
obj = pySmartDL.SmartDL("http://httpbin.org/headers", request_args=request_args, progress_bar=False)
obj.start()
data = obj.get_json()
self.assertTrue(data['headers']['User-Agent'] == ua)

# passing empty request_args
obj = pySmartDL.SmartDL("http://httpbin.org/headers", request_args={}, progress_bar=False)
obj.start()
self.assertTrue(obj.isSuccessful())

def test_utils(self):
self.assertEqual(pySmartDL.utils.progress_bar(0.6, length=42), '[########################----------------]')
self.assertEqual(pySmartDL.utils.sizeof_human(175799789), '167.7 MB')
self.assertEqual(pySmartDL.utils.sizeof_human(0), '0 bytes')
self.assertEqual(pySmartDL.utils.time_human(50), '50 seconds')
self.assertEqual(pySmartDL.utils.time_human(50, fmt_short=True), '50s')
self.assertEqual(pySmartDL.utils.time_human(0, fmt_short=True), '0s')


def test_suite():
suite = unittest.makeSuite(TestSmartDL)
Expand Down

0 comments on commit 4e71c18

Please sign in to comment.