From 5b811fd66ae542148a979a97f8a181ec3d8ae1f1 Mon Sep 17 00:00:00 2001 From: lumin Date: Fri, 27 Dec 2024 22:56:32 +0900 Subject: [PATCH 1/2] feat(docker): restructure Dockerfile for multi-stage build Update the Dockerfile to implement a multi-stage build process. Introduce a dedicated FFmpeg stage and separate development, build, and production stages to optimize image size and improve build efficiency. Add necessary dependencies and configure the environment for better performance. Update the .dockerignore to exclude sensitive files and unnecessary directories. --- .dockerignore | 10 +++++++- Dockerfile | 65 +++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 59 insertions(+), 16 deletions(-) diff --git a/.dockerignore b/.dockerignore index f59ec20..cfac036 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,9 @@ -* \ No newline at end of file +**/coverage +**/.env +**/.aws +**/.ssh +Dockerfile +docker-compose.yml +**/.DS_Store +**/venv +**/env \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 0072d9e..cff96be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,23 +1,58 @@ -FROM python:3.13-slim-bullseye +# FFmpeg stage +FROM jrottenberg/ffmpeg:4.1-scratch AS ffmpeg -USER root +# Development stage +FROM python:3.13-bullseye AS development -ARG INSTALL_GIT=false -RUN if [ "$INSTALL_GIT" = "true" ]; then \ - apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*; \ - fi +COPY --from=ffmpeg / / -# Runtime dependency +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + +# Install build dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ - ffmpeg \ - && rm -rf /var/lib/apt/lists/* + build-essential \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* -RUN pip install markitdown +RUN pip install --no-cache-dir hatch -# Default USERID and GROUPID -ARG USERID=10000 -ARG GROUPID=10000 +WORKDIR /app +COPY . /app/ + +# Build stage +FROM python:3.13-bullseye AS build + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir hatch + +WORKDIR /app + +COPY pyproject.toml /app/ +COPY . /app/ -USER $USERID:$GROUPID +RUN hatch build + +# Production stage +FROM python:3.13-slim-bullseye AS production + +# Copy ffmpeg binaries +COPY --from=ffmpeg / / + +WORKDIR /app + +COPY --from=build /app/dist /tmp/dist + +RUN pip install --no-cache-dir /tmp/dist/markitdown-*.whl + +# Default USERID and GROUPID +RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app +USER appuser -ENTRYPOINT [ "markitdown" ] +# Entrypoint +ENTRYPOINT ["markitdown"] From 4050de78b691be1f929d825ae9c3af0d65868b4b Mon Sep 17 00:00:00 2001 From: lumin Date: Sat, 21 Dec 2024 11:52:21 +0900 Subject: [PATCH 2/2] refactor: update devcontainer configuration for clarity Remove unnecessary INSTALL_GIT argument and set target to development in the devcontainer.json file. This simplifies the configuration and aligns it with the intended development environment setup. --- .devcontainer/devcontainer.json | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index e13e299..b0494b2 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -7,9 +7,7 @@ "context": "..", // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename. "dockerfile": "../Dockerfile", - "args": { - "INSTALL_GIT": "true" - } + "target": "development" }, // Features to add to the dev container. More info: https://containers.dev/features. @@ -17,6 +15,14 @@ "features": { "ghcr.io/devcontainers-extra/features/hatch:2": {} }, + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "charliermarsh.ruff" + ] + } + }, // Use 'forwardPorts' to make a list of ports inside the container available locally. // "forwardPorts": [], @@ -28,5 +34,5 @@ // "customizations": {}, // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root. - "remoteUser": "root" + // "remoteUser": "root" }