diff --git a/README.md b/README.md index 39355b7..fbcfc45 100644 --- a/README.md +++ b/README.md @@ -1,43 +1,117 @@ -# Angelina braille reader +# Angelina Braille Reader -Draft version. Production version will be at https://github.com/IlyaOvodov/AngelinaReader (under construction) +Angelina Braille Reader is an Optical Braille Recognition system. It is designed to convert Braille text on photos into plain text. +## General description of the solution -# requirements +The solution is a web-service. - ubuntu, windows with GPU +Users interact with it via a standard web browser on a smartphone or a desktop computer. Results are displayed on the screen as images and text and can be sent to the user's E-mail. - CUDA - Python 3.6 - PyTorch 1.4 - torchvision - ignite - numpy - PIL - albumentations - cv2 - https://github.com/IlyaOvodov/pytorch-retinanet - https://github.com/IlyaOvodov/OvoTools - https://github.com/IlyaOvodov/labelme (for annotation) +This solution can also be installed as a standalone program on a personal computer and can be used through a command-line interface. -# installation +Video presentation: https://youtu.be/_vcvxPtAzOM -``` -git clone https://github.com/IlyaOvodov/BrailleV0.git -edit BrailleV0/local_config.py to set data_path and global_3rd_party pointing to current dir -git clone https://github.com/IlyaOvodov/OvoTools.git -cd OvoTools -python setup.py develop -cd .. -git clone https://github.com/IlyaOvodov/pytorch-retinanet.git pytorch_retinanet -download model https://yadi.sk/d/GW0qEmA5rL0m0A into ./NN_saved/retina_chars_eced60/models/clr.008 -``` +This service is available at the address: http://angelina-reader.ru + + +### Solution key features + +* Can handle images of deformed braille pages +* Can recognize either one- or two-side Braille printouts +* Can recognize both recto and verso sides of a page using a single image +* Can automatically find the correct orientation of an image +* Can process: + * images taken on a smartphone camera directly from the application (only mobile web version) + * image files (jpg etc.) + * pdf files + * zip-archives with images +* Results can be sent to the user's e-mail +* Can recognize Russian and English braille texts + +### Limitations + +* Page image must be taken approximately from a top view +* Light must fall from the upper side of the page. I.e. shadow of a subject placed on a page must be directed at the bottom side of the page. Top light, side light, and light from the bottom side of the page are not allowed. +* Braille symbols must not be too small or too large. Optimally A4 page with standard braille text must occupy the whole image area. + +### Approaches used in the project + +* Braille symbols are detected using object detection CNN (RetinaNet https://arxiv.org/abs/1708.02002) +* Primary network training was done using the DSBI dataset +* Additional training data were prepared using several rounds of manual correction of results produced by CNN trained on a previous round dataset +* At first rounds poetry texts were used, errors were found using line-by-line comparison with the original text +* At later stages, recognition errors were found using spell-checker +* A new annotated dataset of 360 pages of single-side handwritten and two-side printed Braille texts is prepared, including annotation of 76 paged from the dataset, provided by World AI&DATA Challenge contest. This dataset will be published later. +* For an automatic search of correct page orientation, the page is processed in all 4 possible orientations and the orientation with the maximum presence of the most wide-spread Braille chars is selected +* For recognizing or verso side text we use the effect, that dented points became visually convex on the inverted image. We invert an image and flip it horizontally to recognize the verso side. +* We use a heuristic algorithm to form strings from detected symbols. +* We translate Braille symbols into plain Russian or English text using an algorithm where Braille interpretation rules are coded. + +## Environment requirements + +Standalone workstation requires NVIDIA GPU with at least 3GB memory (i.e. GeForce GTX 1050 3GB or better), web-server requires at least 4GB GPU memory (GeForce GTX 1050Ti or better) + +OS: Ubuntu, Windows + CUDA 10.2 + Python 3.6 + python packages see requirements.txt + + Python path should be added to PATH. + +A client requires a standard web-browser (Chrome, Firefox) + + +## Installation -# usage ``` -cd BrailleV0/web-app -python angelina_reader_app.py -access it by 127.0.0.1:5000 +git clone --recursive git@github.com:IlyaOvodov/AngelinaReader.git +cd AngelinaReader +pip install --upgrade pip +pip install -r requirements.txt +wget -O weights/model.t7 http://angelina-reader.ovdv.ru/retina_chars_eced60.clr.008 +python run_web_app.py ``` +Windows: pip directory (i.e. `\Scripts`) should be added to Path . +Be sure `python` and `pip` start Python3 if both Python 3 and Python 2.7 are installed. + +Open http://127.0.0.1:5000 in a browser. The main page of the application should be displayed. + +To access the application from Internet forward port 80 to port 5000 of the server. It is not required to test the service locally (at http://127.0.0.1:5000 address). + + +## Usage + +### Using as a web service + +start server: `python run_web_app.py` +For Windows: you can use bat-file `start_web_app.bat` + +Open page http://127.0.0.1:5000 in a browser. Follow instructions. + +If some Braille symbols can not be interpreted by the application, they are displayed as `~?~`. + +Usage of web-application is demonstrated in a brief video: https://youtu.be/_vcvxPtAzOM and in a video presentation https://youtu.be/_vcvxPtAzOM + + +### Command-line interface + +`python run_local.py [-h] [-l LANG] [-o] [-2] input [results_dir]` +or, in Windows: +`start.bat [-h] [-l LANG] [-o] [-2] input [results_dir]` +Parameters: +`input` - image file (jpg, png etc.), pdf file, zip file with images or directory name. +If directory name or zip file is supplied, all image and pdf files in it will be processed. +`results_dir` - folder to place results in. If not supplied, the input files folder will be used. For every input file will be created files `.marked.txt` with results in a plain text form and `.marked.jpg` with plain text printed over input image. +`-l LANG` - input document language (default is RU). Use `-l EN` for English texts. +`-o` - switch off automatic orientation search. Sometimes auto orientation can work incorrectly (for non-typical texts or if there are many recognition errors). In such cases adjust image orientation manually and use `-o` option. +`-2` - recognize both recto and verso sides of two-side printouts. Verso side results are stored in `.rev.marked.txt` и `.rev.marked.jpg` files. +`-h` - print help. + +## Datasets being used + +Network weights: see repository `./weights` folder. + +## Auxiliary instruments -or edit `if __name__=="__main__"` section BrailleV0/NN/RetinaNet/infer_retinanet.py. Set img_filename_mask and results_dir to proper values and run it. +None. \ No newline at end of file diff --git a/weights/info.txt b/weights/info.txt new file mode 100644 index 0000000..f298c59 --- /dev/null +++ b/weights/info.txt @@ -0,0 +1 @@ +all_data_0.5_100_5_nocls_91b802\models 006 \ No newline at end of file diff --git a/weights/param.txt b/weights/param.txt new file mode 100644 index 0000000..4121cf4 --- /dev/null +++ b/weights/param.txt @@ -0,0 +1,63 @@ +{ + 'model_name': 'NN_results/all_data_{model_params.encoder_params.aspect_ratios[0]}_{model_params.loss_params.class_loss_scale}_{augmentation.rotate_limit}_nocls', + 'data': { + 'get_points': False, + 'class_as_6pt': False, + 'batch_size': 12, + 'net_hw': ( 416, 416, ), + 'rect_margin': 0.3, + 'max_std': 0.1, + 'train_list_file_names': [ 'DSBI/data/my_train.txt', 'My/labeled/labeled2/train_books.txt', 'My/labeled/labeled2/train_withtext.txt', ( 'My/labeled/labeled2/train_pupils.txt', 2, ), 'My/labeled/not_braille/_not_braille.txt', 'My/labeled/ASI/student_book_p1.txt', ( 'My/labeled/ASI/turlom_c2.txt', 3, ), ( 'web_uploaded/re-processed200823.txt', 0.125, + { + 'calc_cls': False, + }, ), ( 'ASI_results/braile_photos_and_scans.txt', 1, + { + 'calc_cls': False, + }, ), ], + 'val_list_file_names': { + 'val_2': [ 'My/labeled/labeled2/val_books.txt', 'My/labeled/labeled2/val_withtext.txt', 'My/labeled/labeled2/val_pupils.txt', ], + 'val_3_asi': [ 'My/labeled/ASI/turlom_c15_photo_1p.txt', 'My/labeled/ASI/turlom_c15_photo_1p.txt', 'My/labeled/ASI/turlom_c15_photo_1p.txt', ], + 'val_3_asi_scan': [ 'My/labeled/ASI/turlom_c15_scan_1p.txt', 'My/labeled/ASI/turlom_c15_scan_1p.txt', 'My/labeled/ASI/turlom_c15_scan_1p.txt', ], + }, + }, + 'augmentation': { + 'img_width_range': ( 614, 1840, ), + 'stretch_limit': 0.1, + 'rotate_limit': 5, + }, + 'model': 'retina', + 'model_params': { + 'encoder_params': { + 'anchor_areas': [ 128.0, 288.0, 512.0, ], + 'aspect_ratios': [ 0.5, ], + 'iuo_fit_thr': 0, + 'iuo_nofit_thr': 0, + }, + 'loss_params': { + 'class_loss_scale': 100, + }, + }, + 'load_model_from': 'NN_results/retina_chars3_0.5_100_5_090399/models/clr.009.t7', + 'optim': 'torch.optim.SGD', + 'optim_params': { + 'lr': 0.0001, + 'momentum': 0.9, + }, + 'lr_finder': { + 'iters_num': 200, + 'log_lr_start': -4, + 'log_lr_end': -0.3, + }, + 'lr_scheduler': { + 'type': 'clr', + }, + 'clr': { + 'warmup_epochs': 10, + 'min_lr': 0.0001, + 'max_lr': 0.01, + 'period_epochs': 500, + 'scale_max_lr': 0.95, + 'scale_min_lr': 0.95, + }, +} +hash: 91b802 \ No newline at end of file