From 8b434f412777871a0c12cd620f8b9822f3b64d6f Mon Sep 17 00:00:00 2001 From: Gustavo Sverzut Barbieri Date: Fri, 26 May 2023 23:31:47 -0300 Subject: [PATCH] xsInit --- .gitignore | 7 + .pre-commit-config.yaml | 44 + .python-version | 1 + README.rst | 190 ++++ pf_video_transcribe/__init__.py | 0 pf_video_transcribe/__main__.py | 40 + .../abstract_subtitles/__init__.py | 0 pf_video_transcribe/abstract_subtitles/cli.py | 43 + .../abstract_subtitles/converter.py | 18 + pf_video_transcribe/converter.py | 108 ++ pf_video_transcribe/html/__init__.py | 0 pf_video_transcribe/html/__main__.py | 3 + pf_video_transcribe/html/cli.py | 140 +++ pf_video_transcribe/html/converter.py | 98 ++ pf_video_transcribe/html/default.css | 74 ++ pf_video_transcribe/html/default.js | 141 +++ pf_video_transcribe/index_html/__init__.py | 0 pf_video_transcribe/index_html/__main__.py | 3 + pf_video_transcribe/index_html/cli.py | 68 ++ pf_video_transcribe/index_html/html_info.py | 79 ++ pf_video_transcribe/index_html/work.py | 97 ++ pf_video_transcribe/jsonl/__init__.py | 0 pf_video_transcribe/jsonl/reader.py | 91 ++ pf_video_transcribe/jsonl/writer.py | 144 +++ pf_video_transcribe/log.py | 57 ++ pf_video_transcribe/serve/__init__.py | 0 pf_video_transcribe/serve/__main__.py | 3 + pf_video_transcribe/serve/cli.py | 79 ++ pf_video_transcribe/serve/work.py | 162 +++ pf_video_transcribe/srt/__init__.py | 0 pf_video_transcribe/srt/__main__.py | 3 + pf_video_transcribe/srt/cli.py | 57 ++ pf_video_transcribe/srt/converter.py | 16 + pf_video_transcribe/templates/__init__.py | 11 + .../templates/index.html.jinja2 | 56 ++ .../templates/write.html.jinja2 | 41 + .../templates/write.srt.jinja2 | 5 + .../templates/write.vtt.jinja2 | 6 + pf_video_transcribe/thumbnail/__init__.py | 0 pf_video_transcribe/thumbnail/__main__.py | 3 + pf_video_transcribe/thumbnail/cli.py | 96 ++ pf_video_transcribe/thumbnail/converter.py | 57 ++ pf_video_transcribe/transcribe/__init__.py | 0 pf_video_transcribe/transcribe/__main__.py | 3 + pf_video_transcribe/transcribe/cli.py | 127 +++ pf_video_transcribe/transcribe/work.py | 151 +++ pf_video_transcribe/types.py | 79 ++ pf_video_transcribe/utils.py | 135 +++ pf_video_transcribe/vtt/__init__.py | 0 pf_video_transcribe/vtt/__main__.py | 3 + pf_video_transcribe/vtt/cli.py | 58 ++ pf_video_transcribe/vtt/converter.py | 16 + poetry.lock | 942 ++++++++++++++++++ pyproject.toml | 48 + setup.cfg | 23 + 55 files changed, 3626 insertions(+) create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 .python-version create mode 100644 README.rst create mode 100644 pf_video_transcribe/__init__.py create mode 100644 pf_video_transcribe/__main__.py create mode 100644 pf_video_transcribe/abstract_subtitles/__init__.py create mode 100644 pf_video_transcribe/abstract_subtitles/cli.py create mode 100644 pf_video_transcribe/abstract_subtitles/converter.py create mode 100644 pf_video_transcribe/converter.py create mode 100644 pf_video_transcribe/html/__init__.py create mode 100644 pf_video_transcribe/html/__main__.py create mode 100644 pf_video_transcribe/html/cli.py create mode 100644 pf_video_transcribe/html/converter.py create mode 100644 pf_video_transcribe/html/default.css create mode 100644 pf_video_transcribe/html/default.js create mode 100644 pf_video_transcribe/index_html/__init__.py create mode 100644 pf_video_transcribe/index_html/__main__.py create mode 100644 pf_video_transcribe/index_html/cli.py create mode 100644 pf_video_transcribe/index_html/html_info.py create mode 100644 pf_video_transcribe/index_html/work.py create mode 100644 pf_video_transcribe/jsonl/__init__.py create mode 100644 pf_video_transcribe/jsonl/reader.py create mode 100644 pf_video_transcribe/jsonl/writer.py create mode 100644 pf_video_transcribe/log.py create mode 100644 pf_video_transcribe/serve/__init__.py create mode 100644 pf_video_transcribe/serve/__main__.py create mode 100644 pf_video_transcribe/serve/cli.py create mode 100644 pf_video_transcribe/serve/work.py create mode 100644 pf_video_transcribe/srt/__init__.py create mode 100644 pf_video_transcribe/srt/__main__.py create mode 100644 pf_video_transcribe/srt/cli.py create mode 100644 pf_video_transcribe/srt/converter.py create mode 100644 pf_video_transcribe/templates/__init__.py create mode 100644 pf_video_transcribe/templates/index.html.jinja2 create mode 100644 pf_video_transcribe/templates/write.html.jinja2 create mode 100644 pf_video_transcribe/templates/write.srt.jinja2 create mode 100644 pf_video_transcribe/templates/write.vtt.jinja2 create mode 100644 pf_video_transcribe/thumbnail/__init__.py create mode 100644 pf_video_transcribe/thumbnail/__main__.py create mode 100644 pf_video_transcribe/thumbnail/cli.py create mode 100644 pf_video_transcribe/thumbnail/converter.py create mode 100644 pf_video_transcribe/transcribe/__init__.py create mode 100644 pf_video_transcribe/transcribe/__main__.py create mode 100644 pf_video_transcribe/transcribe/cli.py create mode 100644 pf_video_transcribe/transcribe/work.py create mode 100644 pf_video_transcribe/types.py create mode 100644 pf_video_transcribe/utils.py create mode 100644 pf_video_transcribe/vtt/__init__.py create mode 100644 pf_video_transcribe/vtt/__main__.py create mode 100644 pf_video_transcribe/vtt/cli.py create mode 100644 pf_video_transcribe/vtt/converter.py create mode 100644 poetry.lock create mode 100644 pyproject.toml create mode 100644 setup.cfg diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ac24be8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +/.venv/ +/dist/ +/build/ +__pycache__ +*.pyc +/videos/ +.DS_Store diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..355f228 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,44 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks + +default_install_hook_types: [pre-commit, pre-push, pre-merge-commit] + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-executables-have-shebangs + - id: check-json + - id: check-merge-conflict + - id: check-toml + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + +- repo: https://github.com/python/black + rev: 23.3.0 + hooks: + - id: black + +- repo: https://github.com/sqlalchemyorg/zimports/ + rev: v0.4.5 + hooks: + - id: zimports + +- repo: https://github.com/pycqa/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + additional_dependencies: + - flake8-import-order + - flake8-import-single + - flake8-builtins + - flake8-future-annotations + - flake8-docstrings + - flake8-rst-docstrings + - pygments + +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.3.0 + hooks: + - id: mypy diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..2c07333 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..49982d1 --- /dev/null +++ b/README.rst @@ -0,0 +1,190 @@ +ProFUSION Video Transcribe +========================== + +Install +------- + +Install the project using `Poetry `_: + +.. code-block:: console + + $ poetry install --with dev + Installing dependencies from lock file + ... + Installing the current project: pf-video-transcribe + +This project uses `Faster Whisper `_, +a faster implementation of `OpenAI's Whisper `_, +which in turn is built on top of `CTranslate2 `_ +hardware optimizations, that requires installation of **NVidia CUDA libraries**, see +`their installation instructions `_. + +Run +--- + +Run the command line tool: + +.. code-block:: console + + $ pf-video-transcribe --help + +All commands take ``--log=LEVEL`` or ``--log=DOMAIN:LEVEL`` to change the +log level of every package, such as ``pf_video_transcribe.transcribe``, +``faster_whisper`` and so on. If no domain is given, then the provided level +applies to all log domains. This is a global option and should be specified +before the subcommand. + +Subcommands are explained in the next sections. + +Transcription +============= + +Given that the transcription is a heavy process and takes a lot to load the model +and then to process each media file, it's implemented as a batch operation that +generates an intermediate in the `JSON Lines `_ +(``".jsonl"``) format, with a ``"header"`` line followed by all the ``"segment"``, +ended by a ``"finished"`` line with success or failure indicator. Each ``segment`` +carries the useful information extracted by +`OpenAI Whisper `_: + +.. code-block:: console + + $ pf-video-transcribe transcribe videos/my-video.mp4 videos/other-video.mp4 + + +This will generate ``videos/my-video.jsonl`` and ``videos/other-video.jsonl``. + +Note that the first time it will take a lot to download the model from the internet. +In the next iterations, the local model will be used, but first they will be checked +remotely -- which can also take time. Using the ``--local`` flag will skip that check. + +The language is auto-detected from the first 30 seconds of actual sound (silent is +ignored), but if you do know the language, use the ``--language=LANG`` flag. + +Audio Speech Recognition (ASR) models work on slices of the media, producing segments +that are smaller than an actual human language sentence/phrase. +The ``--merge-threshold=SECONDS`` will merge sibling segments if: +``next_segment.start - last_segment.end <= merge_threshold``. The default is 1 second. + +A more complex example: + +.. code-block:: console + + $ pf-video-transcribe \ + --log=DEBUG \ + transcribe \ + --local \ + --language=pt \ + --merge-threshold=5 \ + videos/my-video.mp4 videos/other-video.mp4 + +With the transcribed ``".jsonl"`` one can convert to more usable formats, +see the next sections. + + +Convert to HTML +=============== + +This generates the HTML meant to easy viewing of the result, a ``