Skip to content

Commit

Permalink
Merge pull request #89 from speedfl/feature/base64
Browse files Browse the repository at this point in the history
Feature/base64
  • Loading branch information
tleyden authored May 8, 2017
2 parents d0292c9 + 4dda8fa commit eb6a931
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 8 deletions.
20 changes: 20 additions & 0 deletions ocr_request.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package ocrworker

import "fmt"
import "encoding/base64"

type OcrRequest struct {
ImgUrl string `json:"img_url"`
ImgBase64 string `json:"img_base64"`
EngineType OcrEngineType `json:"engine"`
ImgBytes []byte `json:"img_bytes"`
PreprocessorChain []string `json:"preprocessors"`
Expand All @@ -26,7 +28,25 @@ func (ocrRequest *OcrRequest) nextPreprocessor(processorRoutingKey string) strin
ocrRequest.PreprocessorChain = s
return x
}
}

func (ocrRequest *OcrRequest) decodeBase64() error {

bytes, decodeError := base64.StdEncoding.DecodeString(ocrRequest.ImgBase64)

if decodeError != nil {
return decodeError
}

ocrRequest.ImgBytes = bytes
ocrRequest.ImgBase64 = ""

return nil
}

func (ocrRequest *OcrRequest) hasBase64() bool {

return ocrRequest.ImgBase64 != ""
}

func (ocrRequest *OcrRequest) downloadImgUrl() error {
Expand Down
26 changes: 19 additions & 7 deletions ocr_rpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ import (
"encoding/json"
"fmt"
"time"

"github.com/couchbaselabs/logg"
"github.com/nu7hatch/gouuid"
"github.com/couchbaselabs/logg"
"github.com/streadway/amqp"
)

Expand Down Expand Up @@ -88,11 +87,24 @@ func (c *OcrRpcClient) DecodeImage(ocrRequest OcrRequest) (OcrResult, error) {
// as open-ocr, it will be expensive in terms of bandwidth
// to have image binary in messages
if ocrRequest.ImgBytes == nil {
// if we already have image bytes, ignore image url
err = ocrRequest.downloadImgUrl()
if err != nil {
logg.LogTo("OCR_CLIENT", "Error downloading img url: %v", err)
return OcrResult{}, err

// if we do not have bytes use base 64 file by converting it to bytes
if ocrRequest.hasBase64() {

logg.LogTo("OCR_CLIENT", "OCR request has base 64 convert it to bytes")

err = ocrRequest.decodeBase64()
if err != nil {
logg.LogTo("OCR_CLIENT", "Error decoding base64: %v", err)
return OcrResult{}, err
}
} else {
// if we do not have base 64 or bytes download the file
err = ocrRequest.downloadImgUrl()
if err != nil {
logg.LogTo("OCR_CLIENT", "Error downloading img url: %v", err)
return OcrResult{}, err
}
}
}

Expand Down
34 changes: 33 additions & 1 deletion tesseract_engine.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ocrworker

import (
"encoding/base64"
"fmt"
"io/ioutil"
"os"
Expand Down Expand Up @@ -97,7 +98,9 @@ func (t TesseractEngineArgs) Export() []string {
func (t TesseractEngine) ProcessRequest(ocrRequest OcrRequest) (OcrResult, error) {

tmpFileName, err := func() (string, error) {
if ocrRequest.ImgUrl != "" {
if ocrRequest.ImgBase64 != "" {
return t.tmpFileFromImageBase64(ocrRequest.ImgBase64)
} else if ocrRequest.ImgUrl != "" {
return t.tmpFileFromImageUrl(ocrRequest.ImgUrl)
} else {
return t.tmpFileFromImageBytes(ocrRequest.ImgBytes)
Expand Down Expand Up @@ -126,6 +129,8 @@ func (t TesseractEngine) ProcessRequest(ocrRequest OcrRequest) (OcrResult, error

func (t TesseractEngine) tmpFileFromImageBytes(imgBytes []byte) (string, error) {

logg.LogTo("OCR_TESSERACT", "Use tesseract with bytes image")

tmpFileName, err := createTempFileName()
if err != nil {
return "", err
Expand All @@ -142,8 +147,35 @@ func (t TesseractEngine) tmpFileFromImageBytes(imgBytes []byte) (string, error)

}

func (t TesseractEngine) tmpFileFromImageBase64(base64Image string) (string, error) {

logg.LogTo("OCR_TESSERACT", "Use tesseract with base 64")

tmpFileName, err := createTempFileName()
if err != nil {
return "", err
}

// decoding into bytes the base64 string
decoded, decodeError := base64.StdEncoding.DecodeString(base64Image)

if decodeError != nil {
return "", err
}

err = saveBytesToFileName(decoded, tmpFileName)
if err != nil {
return "", err
}

return tmpFileName, nil

}

func (t TesseractEngine) tmpFileFromImageUrl(imgUrl string) (string, error) {

logg.LogTo("OCR_TESSERACT", "Use tesseract with url")

tmpFileName, err := createTempFileName()
if err != nil {
return "", err
Expand Down

0 comments on commit eb6a931

Please sign in to comment.