From b1c6cf9eb660f38fd276374008e07868aedb7a81 Mon Sep 17 00:00:00 2001 From: Itay Brandes Date: Wed, 30 Oct 2019 01:22:55 +0200 Subject: [PATCH] move funcs to utils.py + prep for 1.3.3 --- ChangeLog.txt | 4 +++ docs/conf.py | 2 +- pySmartDL/pySmartDL.py | 42 ++++-------------------------- pySmartDL/utils.py | 58 +++++++++++++++++++++++++++++++++++++++++- test/test_pySmartDL.py | 17 +++++++++++++ 5 files changed, 84 insertions(+), 39 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index 8a39acd..b5cb45a 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -5,6 +5,10 @@ Below you find a list with the added features, changes and fixes for each version. ====================== +(Version 1.3.3 beta; 30/10/19) +- IMPROVE: Better tests and logging. +- FIX: Bug #35 (Thanks HawkLiking) + (Version 1.3.2 beta; 26/07/19) - NEW: Added ability to parse json directly from the response. - NEW: Allow passing of custom arguments to urllib Request object (Pull request #34) diff --git a/docs/conf.py b/docs/conf.py index 55d3d86..bb52129 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -50,7 +50,7 @@ # The short X.Y version. version = '1.3' # The full version, including alpha/beta/rc tags. -release = '1.3.1' +release = '1.3.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pySmartDL/pySmartDL.py b/pySmartDL/pySmartDL.py index d321b96..50fd7da 100644 --- a/pySmartDL/pySmartDL.py +++ b/pySmartDL/pySmartDL.py @@ -22,7 +22,7 @@ __all__ = ['SmartDL', 'utils'] __version_mjaor__ = 1 __version_minor__ = 3 -__version_micro__ = 2 +__version_micro__ = 3 __version__ = "{}.{}.{}".format(__version_mjaor__, __version_minor__, __version_micro__) class HashFailedException(Exception): @@ -157,7 +157,7 @@ def add_basic_authentication(self, username, password): :param password: Password. :type password: string ''' - auth_string = '%s:%s' % (username, password) + auth_string = '{}:{}'.format(username, password) base64string = base64.standard_b64encode(auth_string.encode('utf-8')) self.requestArgs['headers']['Authorization'] = b"Basic " + base64string @@ -246,7 +246,7 @@ def start(self, blocking=None): self.logger.info('One URL is loaded.') if self.verify_hash and os.path.exists(self.dest): - if _get_file_hash(self.hash_algorithm, self.dest) == self.hash_code: + if utils.get_file_hash(self.hash_algorithm, self.dest) == self.hash_code: self.logger.info("Destination '%s' already exists, and the hash matches. No need to download." % self.dest) self.status = 'finished' return @@ -277,7 +277,7 @@ def start(self, blocking=None): self.logger.warning("Server did not send Content-Length. Filesize is unknown.") self.filesize = 0 - args = _calc_chunk_size(self.filesize, self.threads_count, self.minChunkFile) + args = utils.calc_chunk_size(self.filesize, self.threads_count, self.minChunkFile) bytes_per_thread = args[0][1] - args[0][0] + 1 if len(args)>1: self.logger.info("Launching {} threads (downloads {}/thread).".format(len(args), utils.sizeof_human(bytes_per_thread))) @@ -655,42 +655,10 @@ def post_threadpool_actions(pool, args, expected_filesize, SmartDLObj): if SmartDLObj.verify_hash: dest_path = args[-1] - hash_ = _get_file_hash(SmartDLObj.hash_algorithm, dest_path) + hash_ = utils.get_file_hash(SmartDLObj.hash_algorithm, dest_path) if hash_ == SmartDLObj.hash_code: SmartDLObj.logger.info('Hash verification succeeded.') else: SmartDLObj.logger.warning('Hash verification failed.') SmartDLObj.try_next_mirror(HashFailedException(os.path.basename(dest_path), hash, SmartDLObj.hash_code)) - -def _get_file_hash(algorithm, path): - hashAlg = hashlib.new(algorithm) - block_sz = 1* 1024**2 # 1 MB - - with open(path, 'rb') as f: - data = f.read(block_sz) - while data: - hashAlg.update(data) - data = f.read(block_sz) - - return hashAlg.hexdigest() - -def _calc_chunk_size(filesize, threads, minChunkFile): - if not filesize: - return [(0, 0)] - - while math.ceil(filesize/threads) < minChunkFile and threads > 1: - threads -= 1 - - args = [] - pos = 0 - chunk = math.ceil(filesize/threads) - for i in range(threads): - startByte = pos - endByte = pos + chunk - if endByte > filesize-1: - endByte = filesize-1 - args.append((startByte, endByte)) - pos += chunk+1 - - return args diff --git a/pySmartDL/utils.py b/pySmartDL/utils.py index ac84dce..c5ea1f6 100644 --- a/pySmartDL/utils.py +++ b/pySmartDL/utils.py @@ -9,8 +9,9 @@ import random import logging import re +import hashlib from concurrent import futures -from math import log +from math import log, ceil import shutil DEFAULT_LOGGER_CREATED = False @@ -236,6 +237,61 @@ def time_human(duration, fmt_short=False, show_ms=False): if fmt_short: return "".join(["%s%s" % x for x in result]) return ", ".join(["%s %s" % x for x in result]) + +def get_file_hash(algorithm, path): + ''' + Calculates a file's hash. + + .. WARNING:: + The hashing algorithm must be supported on your system, as documented at `hashlib documentation page `_. + + :param algorithm: Hashing algorithm. + :type algorithm: string + :param path: The file path + :type path: string + :rtype: string + ''' + hashAlg = hashlib.new(algorithm) + block_sz = 1*1024**2 # 1 MB + + with open(path, 'rb') as f: + data = f.read(block_sz) + while data: + hashAlg.update(data) + data = f.read(block_sz) + + return hashAlg.hexdigest() + +def calc_chunk_size(filesize, threads, minChunkFile): + ''' + Calculates the byte chunks to download. + + :param filesize: filesize in bytes. + :type filesize: int + :param threads: Number of trheads + :type threads: int + :param minChunkFile: Minimum chunk size + :type minChunkFile: int + :rtype: Array of (startByte,endByte) tuples + ''' + if not filesize: + return [(0, 0)] + + while ceil(filesize/threads) < minChunkFile and threads > 1: + threads -= 1 + + args = [] + pos = 0 + chunk = ceil(filesize/threads) + for i in range(threads): + startByte = pos + endByte = pos + chunk + if endByte > filesize-1: + endByte = filesize-1 + args.append((startByte, endByte)) + pos += chunk+1 + + return args def create_debugging_logger(): ''' diff --git a/test/test_pySmartDL.py b/test/test_pySmartDL.py index 0c2e3ca..c9150f1 100644 --- a/test/test_pySmartDL.py +++ b/test/test_pySmartDL.py @@ -175,6 +175,23 @@ def test_utils(self): self.assertEqual(pySmartDL.utils.time_human(50), '50 seconds') self.assertEqual(pySmartDL.utils.time_human(50, fmt_short=True), '50s') self.assertEqual(pySmartDL.utils.time_human(0, fmt_short=True), '0s') + self._test_calc_chunk_size(10000, 10, 20) + self._test_calc_chunk_size(1906023034, 20, 20) + self._test_calc_chunk_size(261969919, 20, 32) + + def _test_calc_chunk_size(self, filesize, threads, minChunkFile): + chunks = pySmartDL.utils.calc_chunk_size(filesize, threads, 20) + self.assertEqual(chunks[0][0], 0) + self.assertIsInstance(chunks[0][0], int) + self.assertIsInstance(chunks[0][1], int) + + for i in range(1, len(chunks)): + self.assertIsInstance(chunks[i][0], int) + self.assertIsInstance(chunks[i][1], int) + self.assertTrue(chunks[i][0] <= chunks[i][1]) + self.assertEqual(chunks[i-1][1] + 1, chunks[i][0]) + + self.assertEqual(chunks[-1][1], filesize-1) def test_suite():