Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Thread-Limit to prevent blocking of tesseract processes #62

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions js/admin.elements.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,23 @@
var fts_tesseract_elements = {
tesseract_div: null,
tesseract_ocr: null,
tesseract_thread_limit: null,
tesseract_psm: null,
tesseract_lang: null,
tesseract_pdf: null,
tesseract_pdf_limit: null,

init: function () {
fts_tesseract_elements.tesseract_div = $('#files_ocr-tesseract');
fts_tesseract_elements.tesseract_thread_limit = $('#tesseract_thread_limit');
fts_tesseract_elements.tesseract_psm = $('#tesseract_psm');
fts_tesseract_elements.tesseract_lang = $('#tesseract_lang');
fts_tesseract_elements.tesseract_ocr = $('#tesseract_ocr');
fts_tesseract_elements.tesseract_pdf = $('#tesseract_pdf');
fts_tesseract_elements.tesseract_pdf_limit = $('#tesseract_pdf_limit');

fts_tesseract_elements.tesseract_ocr.on('change', fts_tesseract_elements.updateSettings);
fts_tesseract_elements.tesseract_thread_limit.on('change', fts_tesseract_elements.updateSettings);
fts_tesseract_elements.tesseract_psm.on('change', fts_tesseract_elements.updateSettings);
fts_tesseract_elements.tesseract_lang.on('change', fts_tesseract_elements.updateSettings);
fts_tesseract_elements.tesseract_pdf.on('change', fts_tesseract_elements.updateSettings);
Expand Down
2 changes: 2 additions & 0 deletions js/admin.settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ var fts_tesseract_settings = {

updateSettingPage: function (result) {
fts_tesseract_elements.tesseract_ocr.prop('checked', (result.tesseract_enabled === '1'));
fts_tesseract_elements.tesseract_thread_limit.val(result.tesseract_thread_limit);
fts_tesseract_elements.tesseract_psm.val(result.tesseract_psm);
fts_tesseract_elements.tesseract_lang.val(result.tesseract_lang);
fts_tesseract_elements.tesseract_pdf.prop('checked', (result.tesseract_pdf === '1'));
Expand All @@ -70,6 +71,7 @@ var fts_tesseract_settings = {

var data = {
tesseract_enabled: (fts_tesseract_elements.tesseract_ocr.is(':checked')) ? 1 : 0,
tesseract_thread_limit: fts_tesseract_elements.tesseract_thread_limit.val(),
tesseract_psm: fts_tesseract_elements.tesseract_psm.val(),
tesseract_lang: fts_tesseract_elements.tesseract_lang.val(),
tesseract_pdf: (fts_tesseract_elements.tesseract_pdf.is(':checked')) ? 1 : 0,
Expand Down
25 changes: 14 additions & 11 deletions lib/Service/ConfigService.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,19 @@ class ConfigService {


const TESSERACT_ENABLED = 'tesseract_enabled';
const TESSERACT_THREAD_LIMIT = 'tesseract_thread_limit';
const TESSERACT_PSM = 'tesseract_psm';
const TESSERACT_LANG = 'tesseract_lang';
const TESSERACT_PDF = 'tesseract_pdf';
const TESSERACT_PDF_LIMIT = 'tesseract_pdf_limit';

public $defaults = [
self::TESSERACT_ENABLED => '0',
self::TESSERACT_PSM => '4',
self::TESSERACT_LANG => 'eng',
self::TESSERACT_PDF => '0',
self::TESSERACT_PDF_LIMIT => '0'
self::TESSERACT_ENABLED => '0',
self::TESSERACT_THREAD_LIMIT => '1',
self::TESSERACT_PSM => '4',
self::TESSERACT_LANG => 'eng',
self::TESSERACT_PDF => '0',
self::TESSERACT_PDF_LIMIT => '0'
];


Expand All @@ -78,12 +80,13 @@ public function onGetConfig(GenericEvent $e) {
$config = $e->getArgument('config');
$config['files_fulltextsearch_tesseract'] =
[
'version' => $this->getAppValue('installed_version'),
'enabled' => $this->getAppValue(self::TESSERACT_ENABLED),
'psm' => $this->getAppValue(self::TESSERACT_PSM),
'lang' => $this->getAppValue(self::TESSERACT_LANG),
'pdf' => $this->getAppValue(self::TESSERACT_PDF),
'pdf_limit' => $this->getAppValue(self::TESSERACT_PDF_LIMIT),
'version' => $this->getAppValue('installed_version'),
'enabled' => $this->getAppValue(self::TESSERACT_ENABLED),
'thread_limit' => $this->getAppValue(self::TESSERACT_THREAD_LIMIT),
'psm' => $this->getAppValue(self::TESSERACT_PSM),
'lang' => $this->getAppValue(self::TESSERACT_LANG),
'pdf' => $this->getAppValue(self::TESSERACT_PDF),
'pdf_limit' => $this->getAppValue(self::TESSERACT_PDF_LIMIT),
];
$e->setArgument('config', $config);
}
Expand Down
1 change: 1 addition & 0 deletions lib/Service/TesseractService.php
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ private function ocrFileFromPath(string $path): string {
$this->logger->debug('generating the TesseractOCR wrapper', ['path' => $path]);

$ocr = new TesseractOCR($path);
$ocr->threadLimit($this->configService->getAppValue(ConfigService::TESSERACT_THREAD_LIMIT));
$ocr->psm($this->configService->getAppValue(ConfigService::TESSERACT_PSM));
$lang = explode(',', $this->configService->getAppValue(ConfigService::TESSERACT_LANG));
call_user_func_array([$ocr, 'lang'], array_map('trim', $lang));
Expand Down
11 changes: 11 additions & 0 deletions templates/settings.admin.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,17 @@
</div>
</div>

<div class="div-table-row tesseract_ocr_enabled">
<div class="div-table-col div-table-col-left">
<span class="leftcol">Thread Limit</span>
<br/>
<em>set the maxmium number of threads</em>
</div>
<div class="div-table-col">
<input type="text" class="small" id="tesseract_thread_limit" value=""/>
</div>
</div>

<div class="div-table-row tesseract_ocr_enabled">
<div class="div-table-col div-table-col-left">
<span class="leftcol">Page Segmentation Method</span>
Expand Down