From dc9b743afac253f858eedbc944cfc944cc9ac959 Mon Sep 17 00:00:00 2001 From: Ben Knoll Date: Mon, 3 Oct 2022 11:17:18 -0500 Subject: [PATCH] Downloads stanza models earlier Also includes updated dockerfiles --- Dockerfile | 19 +++++++++---- Dockerfile-dev | 28 +++++++++++++++++++ python/biomedicus/cli.py | 1 + .../deployment/biomedicus_deploy_config.yml | 1 + .../deployment/deploy_biomedicus.py | 12 +++++--- tools/docker/biomedicus.sh | 6 ++++ 6 files changed, 57 insertions(+), 10 deletions(-) create mode 100644 Dockerfile-dev create mode 100644 tools/docker/biomedicus.sh diff --git a/Dockerfile b/Dockerfile index afe0df58..03f7b634 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:latest +FROM ubuntu:20.04 ARG DEBIAN_FRONTEND=noninteractive @@ -9,11 +9,18 @@ RUN apt-get -qy install openjdk-11-jre-headless RUN apt-get -qy install build-essential RUN apt-get -qy install python3 python3-pip -COPY . /home/biomedicus3/biomedicus3 - WORKDIR /home/biomedicus3 -RUN pip3 install ./biomedicus3 -RUN biomedicus download-data +RUN pip3 install --upgrade pip +RUN python3 -m pip install --upgrade setuptools + +RUN pip3 install biomedicus3 +RUN biomedicus download-data --with-stanza + +COPY ./python/biomedicus/deployment/biomedicus_deploy_config.yml . +COPY ./python/biomedicus/pipeline/biomedicus_default_pipeline.yml . +COPY ./tools/docker/biomedicus.sh . + +RUN chmod +x biomedicus.sh -CMD /bin/sh +ENTRYPOINT ./biomedicus.sh diff --git a/Dockerfile-dev b/Dockerfile-dev new file mode 100644 index 00000000..13e77c3a --- /dev/null +++ b/Dockerfile-dev @@ -0,0 +1,28 @@ +FROM ubuntu:20.04 + +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get update + +RUN apt-get -qy install git +RUN apt-get -qy install openjdk-11-jdk-headless +RUN apt-get -qy install build-essential +RUN apt-get -qy install python3 python3-pip + +WORKDIR /home/biomedicus3 + +RUN pip3 install --upgrade pip +RUN python3 -m pip install --upgrade setuptools + +COPY . biomedicus3 + +RUN pip3 install ./biomedicus3 +RUN biomedicus download-data --with-stanza + +COPY ./python/biomedicus/deployment/biomedicus_deploy_config.yml . +COPY ./python/biomedicus/pipeline/biomedicus_default_pipeline.yml . +COPY ./tools/docker/biomedicus.sh . + +RUN chmod +x biomedicus.sh + +ENTRYPOINT ./biomedicus.sh diff --git a/python/biomedicus/cli.py b/python/biomedicus/cli.py index 8a3afc54..8fd2bcd7 100644 --- a/python/biomedicus/cli.py +++ b/python/biomedicus/cli.py @@ -83,6 +83,7 @@ def write_config_subparser(subparsers): def download_data_subparser(subparsers): from biomedicus.deployment.deploy_biomedicus import download_data sp = subparsers.add_parser('download-data', help="Just downloads the biomedicus data.") + sp.add_argument('--with-stanza', action='store_true', help="Also downloads stanza models.") sp.set_defaults(f=download_data) diff --git a/python/biomedicus/deployment/biomedicus_deploy_config.yml b/python/biomedicus/deployment/biomedicus_deploy_config.yml index c4fb83b3..8b1492fd 100644 --- a/python/biomedicus/deployment/biomedicus_deploy_config.yml +++ b/python/biomedicus/deployment/biomedicus_deploy_config.yml @@ -39,6 +39,7 @@ processors: entry_point: biomedicus.dependencies.stanza_selective_parser port: 50800 startup_timeout: 180 + additional_args: ['--offline'] - implementation: python entry_point: biomedicus.negation.deepen port: 50900 diff --git a/python/biomedicus/deployment/deploy_biomedicus.py b/python/biomedicus/deployment/deploy_biomedicus.py index 96a51fed..e32ccc44 100644 --- a/python/biomedicus/deployment/deploy_biomedicus.py +++ b/python/biomedicus/deployment/deploy_biomedicus.py @@ -38,13 +38,17 @@ def _listen(process: Popen) -> int: return process.wait() -def check_data(download=False): +def check_data(download=False, with_stanza=False): try: data = Path(os.environ['BIOMEDICUS_DATA']) except KeyError: data = Path.home() / '.biomedicus' / 'data' os.environ['BIOMEDICUS_DATA'] = str(data) + if with_stanza: + import stanza + stanza.download('en') + config = load_config() download_url = config['data.data_url'] data_version = config['data.version'] @@ -105,7 +109,7 @@ def attach_biomedicus_jar(deployment: Deployment, append_to: Optional[str] = Non def deploy(conf): try: - check_data(conf.download_data) + check_data(conf.download_data, with_stanza=True) except ValueError: return deployment = Deployment.from_yaml_file(conf.config) @@ -118,9 +122,9 @@ def deploy(conf): deployment.run_servers() -def download_data(_): +def download_data(conf): try: - check_data(True) + check_data(True, conf.with_stanza) except ValueError: return diff --git a/tools/docker/biomedicus.sh b/tools/docker/biomedicus.sh new file mode 100644 index 00000000..63f09ae6 --- /dev/null +++ b/tools/docker/biomedicus.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +exec 3< <(biomedicus deploy --config biomedicus_deploy_config.yml) +sed '/Done deploying all servers.$/q' <&3 ; cat <&3 & + +biomedicus run /input --watch -o /output --config biomedicus_default_pipeline.yml