feat: EasyOCR (#3712)

bentoml · Apr 19, 2023 · b80d4b9 · b80d4b9
1 parent a6e17d9
commit b80d4b9
Show file tree

Hide file tree

Showing 12 changed files with 546 additions and 10 deletions.
diff --git a/.github/workflows/frameworks.yml b/.github/workflows/frameworks.yml
@@ -29,6 +29,7 @@ jobs:
       keras: ${{ steps.filter.outputs.keras }}
       lightgbm: ${{ steps.filter.outputs.lightgbm }}
       detectron: ${{ steps.filter.outputs.detectron }}
+      easyocr: ${{ steps.filter.outputs.easyocr }}
       mlflow: ${{ steps.filter.outputs.mlflow }}
       onnx: ${{ steps.filter.outputs.onnx }}
       picklable_model: ${{ steps.filter.outputs.picklable_model }}
@@ -75,6 +76,11 @@ jobs:
               - src/bentoml/lightgbm.py
               - src/bentoml/_internal/frameworks/lightgbm.py
               - tests/integration/frameworks/models/lightgbm.py
+            easyocr:
+              - *related
+              - src/bentoml/easyocr.py
+              - src/bentoml/_internal/frameworks/easyocr.py
+              - tests/integration/frameworks/models/easyocr.py
             mlflow:
               - *related
               - src/bentoml/mlflow.py
@@ -253,6 +259,37 @@ jobs:
         run: |
           OPTS=(--cov-config pyproject.toml --cov src/bentoml --cov-append --framework detectron)
           coverage run -m pytest tests/integration/frameworks/test_frameworks.py "${OPTS[@]}"
+  easyocr_integration_tests:
+    needs: diff
+    if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.easyocr == 'true') || github.event_name == 'push' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0 # fetch all tags and branches
+      - name: Setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Get pip cache dir
+        id: cache-dir
+        run: |
+          echo ::set-output name=dir::$(pip cache dir)
+      - name: Cache pip dependencies
+        uses: actions/cache@v3
+        id: cache-pip
+        with:
+          path: ${{ steps.cache-dir.outputs.dir }}
+          key: ${{ runner.os }}-tests-${{ hashFiles('requirements/tests-requirements.txt') }}
+      - name: Install dependencies
+        run: |
+          pip install .
+          pip install easyocr torch requests Pillow
+          pip install -r requirements/tests-requirements.txt
+      - name: Run tests and generate coverage report
+        run: |
+          OPTS=(--cov-config pyproject.toml --cov src/bentoml --cov-append --framework easyocr)
+          coverage run -m pytest tests/integration/frameworks/test_frameworks.py "${OPTS[@]}"
   flax_integration_tests:
     needs: diff
     if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.flax == 'true') || github.event_name == 'push' }}

diff --git a/docs/source/frameworks/easyocr.rst b/docs/source/frameworks/easyocr.rst
@@ -0,0 +1,121 @@
+=======
+EasyOCR
+=======
+
+EasyOCR is a ready-to-use OCR with 80+ supported languages. It helps you to quickly convert and transcribe text from images. This guide provides an overiew of using `EasyOCR <https://www.jaided.ai/easyocr/>`_ with BentoML.
+
+Compatibility
+-------------
+
+BentoML has been validated to work with EasyOCR version 1.6.2 and higher.
+
+Save/Load a EasyOCR Reader with BentoML
+---------------------------------------
+
+First, create a reader instance with the `language codes <https://www.jaided.ai/easyocr/>`_ for your usecase.
+
+.. code-block:: python
+
+   import easyocr
+
+    reader = easyocr.Reader(['en'])
+
+Save this reader instance using :obj:`~bentoml.easyocr.save_model()` to save this to the BentoML model store
+
+.. code-block:: python
+
+   import bentoml
+
+   bento_model = bentoml.easyocr.save_model('en-reader', reader)
+
+To verify that the saved model is working, load it back with :obj:`~bentoml.easyocr.load_model()`:
+
+.. code-block:: python
+
+   loaded_model = bentoml.easyocr.load_model('en-reader')
+
+   rs = loaded_model.readtext('image.jpg')
+
+.. note:: GPU behaviour
+
+   GPU can be passed through ``easyocr.Reader`` constructor as ``gpu=True``. This means in order to use GPU, the reader instance must be created with a machine with GPU before saving it to BentoML.
+
+Building a Service
+------------------
+
+.. seealso::
+
+   :ref:`Building a Service <concepts/service:Service and APIs>`: more information on creating a
+   prediction service with BentoML.
+
+Create a ``service.py`` file separate from your training code that will be used to define the
+BentoML service:
+
+.. code-block:: python
+
+   import bentoml
+   import PIL.Image
+   import numpy as np
+
+   # create a runner from the saved Booster
+   runner = bentoml.easyocr.get("en-reader").to_runner()
+
+   # create a BentoML service
+   svc = bentoml.Service("ocr", runners=[runner])
+
+   # define a new endpoint on the BentoML service
+   @svc.api(input=bentoml.io.Image(), output=bentoml.io.Text())
+   async def transcript_text(input: PIL.Image.Image) -> str:
+       # use 'runner.predict.run(input)' instead of 'booster.predict'
+       return await runner.readtext.async_run(np.asarray(input))
+
+Take note of the name of the service (``svc`` in this example) and the name of the file.
+
+You should also have a ``bentofile.yaml`` alongside the service file that specifies that
+information, as well as the fact that it depends on XGBoost. This can be done using either
+``python`` (if using pip), or ``conda``:
+
+.. tab-set::
+
+   .. tab-item:: pip
+
+      .. code-block:: yaml
+
+         service: "service:svc"
+         python:
+           packages:
+              - easyocr
+              - bentoml
+
+   .. tab-item:: conda
+
+      .. code-block:: yaml
+
+         service: "service:svc"
+         conda:
+           channels:
+           - conda-forge
+           dependencies:
+           - easyocr
+
+Using Runners
+~~~~~~~~~~~~~
+
+.. seealso::
+
+   :ref:`concepts/runner:Using Runners`: a general introduction to the Runner concept and its usage.
+
+A runner for a Reader is created like so:
+
+.. code-block:: python
+
+   bentoml.easyocr.get("model_name:model_version").to_runner()
+
+``runner.readtext.run`` is generally a drop-in replacement for ``reader.readtext``.
+
+Runners must to be initialized in order for their ``run`` methods to work. This is done by BentoML
+internally when you serve a bento with ``bentoml serve``. See the :ref:`runner debugging guide
+<concepts/service:Debugging Runners>` for more information about initializing runners locally.
+
+
+.. currentmodule:: bentoml.easyocr
diff --git a/docs/source/frameworks/index.rst b/docs/source/frameworks/index.rst
@@ -63,6 +63,10 @@ projects in the `bentoml/examples <https://github.com/bentoml/BentoML/tree/main/
         :link: /frameworks/detectron
         :link-type: doc
 
+    .. grid-item-card:: :doc:`/frameworks/easyocr`
+        :link: /frameworks/easyocr
+        :link-type: doc
+
 
 Custom Models
 -------------
@@ -86,7 +90,6 @@ Roadmap
 
 The following frameworks are supported in pre-1.0 BentoML versions and are being migrated to the new 1.0 API. In the meantime, users may use :ref:`Custom Models <frameworks/index:Custom Models>` as a workaround.
 
-- EasyOCR
 - EvalML
 - FastText
 - Flax
@@ -125,6 +128,7 @@ The following frameworks are supported in pre-1.0 BentoML versions and are being
     pytorch
     pytorch_lightning
     sklearn
+    easyocr
     tensorflow
     transformers
     xgboost

diff --git a/docs/source/frameworks/xgboost.rst b/docs/source/frameworks/xgboost.rst
@@ -95,15 +95,16 @@ information, as well as the fact that it depends on XGBoost. This can be done us
 ``python`` (if using pip), or ``conda``:
 
 .. tab-set::
+
    .. tab-item:: pip
 
       .. code-block:: yaml
 
          service: "service:svc"
          description: "My XGBoost service"
          python:
-	   packages:
-	     - xgboost
+           packages:
+           - xgboost
 
    .. tab-item:: conda
 

diff --git a/docs/source/reference/frameworks/easyocr.rst b/docs/source/reference/frameworks/easyocr.rst
@@ -0,0 +1,18 @@
+=======
+EasyOCR
+=======
+
+.. admonition:: About this page
+
+   This is an API reference for EasyOCR in BentoML. Please refer to
+   :doc:`EasyOCR guide </frameworks/easyocr>` for more information about
+   how to use EasyOCR in BentoML.
+
+
+.. currentmodule:: bentoml.easyocr
+
+.. autofunction:: bentoml.easyocr.save_model
+
+.. autofunction:: bentoml.easyocr.load_model
+
+.. autofunction:: bentoml.easyocr.get
diff --git a/docs/source/reference/frameworks/index.rst b/docs/source/reference/frameworks/index.rst
@@ -69,6 +69,10 @@ Framework APIs
         :link: /reference/frameworks/detectron
         :link-type: doc
 
+    .. grid-item-card:: :doc:`/reference/frameworks/easyocr`
+        :link: /reference/frameworks/easyocr
+        :link-type: doc
+
     .. grid-item-card:: :doc:`/reference/frameworks/ray`
         :link: /reference/frameworks/ray
         :link-type: doc
@@ -89,6 +93,7 @@ Framework APIs
     mlflow
     catboost
     fastai
+    easyocr
     keras
     ray
     detectron