From 99b76628f7ba66fd19a11c454e23572164b55315 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 5 Dec 2023 20:49:51 +0100 Subject: [PATCH] implement '-e/--error-file' command-line option (#4732) copying per-URL options from regular, read-only input files does currently not work --- docs/options.md | 4 ++- gallery_dl/__init__.py | 68 ++++++++++++++++++++++++++++++++---------- gallery_dl/option.py | 5 ++++ 3 files changed, 61 insertions(+), 16 deletions(-) diff --git a/docs/options.md b/docs/options.md index 6d22062bd8..45ce7ecaa8 100644 --- a/docs/options.md +++ b/docs/options.md @@ -39,6 +39,7 @@ -E, --extractor-info Print extractor defaults and settings -K, --list-keywords Print a list of available keywords and example values for the given URLs + -e, --error-file FILE Add input URLs which returned an error to FILE --list-modules Print a list of available extractor modules --list-extractors Print a list of extractor classes with description, (sub)category and example URL @@ -51,7 +52,8 @@ ## Downloader Options: -r, --limit-rate RATE Maximum download rate (e.g. 500k or 2.5M) -R, --retries N Maximum number of retries for failed HTTP - requests or -1 for infinite retries (default: 4) + requests or -1 for infinite retries (default: + 4) --http-timeout SECONDS Timeout for HTTP connections (default: 30.0) --sleep SECONDS Number of seconds to wait before each download. This can be either a constant value or a range diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 287faf1876..0f9d1cae9f 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -249,6 +249,9 @@ def main(): input_log.error(exc) return getattr(exc, "code", 128) + if args.error_file: + input_manager.error_file(args.error_file) + pformat = config.get(("output",), "progress", True) if pformat and len(input_manager.urls) > 1 and \ args.loglevel < logging.ERROR: @@ -270,6 +273,7 @@ def main(): if status: retval |= status + input_manager.error() else: input_manager.success() @@ -281,6 +285,7 @@ def main(): except exception.NoExtractorError: log.error("Unsupported URL '%s'", url) retval |= 64 + input_manager.error() input_manager.next() return retval @@ -301,9 +306,12 @@ class InputManager(): def __init__(self): self.urls = [] self.files = () + + self._url = "" + self._item = None self._index = 0 - self._current = None self._pformat = None + self._error_fp = None def add_url(self, url): self.urls.append(url) @@ -428,6 +436,15 @@ def add_file(self, path, action=None): else: append(url) + def error_file(self, path): + try: + path = util.expand_path(path) + self._error_fp = open(path, "a", encoding="utf-8") + except Exception as exc: + self.log.warning( + "Unable to open error file (%s: %s)", + exc.__class__.__name__, exc) + def progress(self, pformat=True): if pformat is True: pformat = "[{current}/{total}] {url}\n" @@ -439,17 +456,37 @@ def next(self): self._index += 1 def success(self): - if self._current: - url, path, action, indicies = self._current - lines = self.files[path] - action(lines, indicies) + if self._item: + self._rewrite() + + def error(self): + if self._error_fp: + if self._item: + url, path, action, indicies = self._item + lines = self.files[path] + out = "".join(lines[i] for i in indicies) + self._rewrite() + else: + out = str(self._url) + "\n" + try: - with open(path, "w", encoding="utf-8") as fp: - fp.writelines(lines) + self._error_fp.write(out) except Exception as exc: self.log.warning( "Unable to update '%s' (%s: %s)", - path, exc.__class__.__name__, exc) + self._error_fp.name, exc.__class__.__name__, exc) + + def _rewrite(self): + url, path, action, indicies = self._item + lines = self.files[path] + action(lines, indicies) + try: + with open(path, "w", encoding="utf-8") as fp: + fp.writelines(lines) + except Exception as exc: + self.log.warning( + "Unable to update '%s' (%s: %s)", + path, exc.__class__.__name__, exc) @staticmethod def _action_comment(lines, indicies): @@ -467,23 +504,24 @@ def __iter__(self): def __next__(self): try: - item = self.urls[self._index] + url = self.urls[self._index] except IndexError: raise StopIteration - if isinstance(item, tuple): - self._current = item - item = item[0] + if isinstance(url, tuple): + self._item = url + url = url[0] else: - self._current = None + self._item = None + self._url = url if self._pformat: output.stderr_write(self._pformat({ "total" : len(self.urls), "current": self._index + 1, - "url" : item, + "url" : url, })) - return item + return url class ExtendedUrl(): diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 255d9f2998..5966f25352 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -286,6 +286,11 @@ def build_parser(): help=("Print a list of available keywords and example values " "for the given URLs"), ) + output.add_argument( + "-e", "--error-file", + dest="error_file", metavar="FILE", + help="Add input URLs which returned an error to FILE", + ) output.add_argument( "--list-modules", dest="list_modules", action="store_true",