diff --git a/README.md b/README.md index a92eba2..996cabd 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,9 @@ displayHTML(vis_html) ### Jupyter +To save the visualization as html, provide the export file path: `save_path='./export.html'` for each visualizer. + + #### Dependency Parser ```python from sparknlp_display import DependencyParserVisualizer @@ -64,7 +67,8 @@ pipeline_result = dp_pipeline.fullAnnotate(text) dependency_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe. pos_col = 'pos', #specify the pos column dependency_col = 'dependency', #specify the dependency column - dependency_type_col = 'dependency_type' #specify the dependency type column + dependency_type_col = 'dependency_type', #specify the dependency type column + save_path='./export.html' # optional - to save viz as html. (default: None) ) ``` @@ -82,8 +86,9 @@ pipeline_result = ner_light_pipeline.fullAnnotate(text) ner_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe label_col='entities', #specify the entity column - document_col='document' #specify the document column (default: 'document') - labels=['PER'] #only allow these labels to be displayed. (default: [] - all labels will be displayed) + document_col='document', #specify the document column (default: 'document') + labels=['PER'], #only allow these labels to be displayed. (default: [] - all labels will be displayed) + save_path='./export.html' # optional - to save viz as html. (default: None) ) ## To set custom label colors: @@ -104,8 +109,9 @@ pipeline_result = er_light_pipeline.fullAnnotate(text) er_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe label_col='entities', #specify the ner result column - resolution_col = 'resolution' - document_col='document' #specify the document column (default: 'document') + resolution_col = 'resolution', + document_col='document', #specify the document column (default: 'document') + save_path='./export.html' # optional - to save viz as html. (default: None) ) ## To set custom label colors: @@ -126,7 +132,8 @@ pipeline_result = re_light_pipeline.fullAnnotate(text) re_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe relation_col = 'relations', #specify relations column document_col = 'document', #specify document column - show_relations=True #display relation names on arrows (default: True) + show_relations=True, #display relation names on arrows (default: True) + save_path='./export.html' # optional - to save viz as html. (default: None) ) ``` @@ -143,8 +150,9 @@ pipeline_result = ner_assertion_light_pipeline.fullAnnotate(text) assertion_vis.display(pipeline_result[0], label_col = 'entities', #specify the ner result column - assertion_col = 'assertion' #specify assertion column - document_col = 'document' #specify the document column (default: 'document') + assertion_col = 'assertion', #specify assertion column + document_col = 'document', #specify the document column (default: 'document') + save_path='./export.html' # optional - to save viz as html. (default: None) ) ## To set custom label colors: diff --git a/build/lib/sparknlp_display/VERSION b/build/lib/sparknlp_display/VERSION index 4684374..872765e 100644 --- a/build/lib/sparknlp_display/VERSION +++ b/build/lib/sparknlp_display/VERSION @@ -1 +1 @@ -1.8 \ No newline at end of file +1.9 \ No newline at end of file diff --git a/build/lib/sparknlp_display/assertion.py b/build/lib/sparknlp_display/assertion.py index 6dabcb5..0f8889f 100644 --- a/build/lib/sparknlp_display/assertion.py +++ b/build/lib/sparknlp_display/assertion.py @@ -158,7 +158,7 @@ def __display_ner(self, result, label_col, resolution_col, document_col, origina return html_output - def display(self, result, label_col, assertion_col, document_col='document', raw_text=None, return_html=False): + def display(self, result, label_col, assertion_col, document_col='document', raw_text=None, return_html=False, save_path=None): """Displays Assertion visualization. Inputs: @@ -174,9 +174,14 @@ def display(self, result, label_col, assertion_col, document_col='document', raw #self.__verifyInput(result, label_col, document_col, raw_text) html_content = self.__display_ner(result, label_col, assertion_col, document_col, raw_text) + html_content_save = style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + + if save_path != None: + with open(save_path, 'w') as f_: + f_.write(html_content_save) if return_html: - return style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + return html_content_save else: - return display(HTML(style_config.STYLE_CONFIG_ENTITIES+ " "+html_content)) + return display(HTML(html_content_save)) \ No newline at end of file diff --git a/build/lib/sparknlp_display/dependency_parser.py b/build/lib/sparknlp_display/dependency_parser.py index 553f3c7..dd4f310 100644 --- a/build/lib/sparknlp_display/dependency_parser.py +++ b/build/lib/sparknlp_display/dependency_parser.py @@ -219,7 +219,7 @@ def __generate_graph(self, result_df): return dwg.tostring() - def display(self, res, pos_col, dependency_col, dependency_type_col=None, return_html=False): + def display(self, res, pos_col, dependency_col, dependency_type_col=None, return_html=False, save_path=None): """Displays NER visualization. Inputs: @@ -254,6 +254,11 @@ def display(self, res, pos_col, dependency_col, dependency_type_col=None, return df['dependency_type'] = '' html_content = self.__generate_graph(df) + + if save_path != None: + with open(save_path, 'w') as f_: + f_.write(html_content) + if return_html: return html_content else: diff --git a/build/lib/sparknlp_display/entity_resolution.py b/build/lib/sparknlp_display/entity_resolution.py index a48b7ee..b9e2cf8 100644 --- a/build/lib/sparknlp_display/entity_resolution.py +++ b/build/lib/sparknlp_display/entity_resolution.py @@ -158,7 +158,7 @@ def __display_ner(self, result, label_col, resolution_col, document_col, origina return html_output - def display(self, result, label_col, resolution_col, document_col='document', raw_text=None, return_html=False): + def display(self, result, label_col, resolution_col, document_col='document', raw_text=None, return_html=False, save_path=None): """Displays NER visualization. Inputs: @@ -175,7 +175,13 @@ def display(self, result, label_col, resolution_col, document_col='document', ra html_content = self.__display_ner(result, label_col, resolution_col, document_col, raw_text) + html_content_save = style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + + if save_path != None: + with open(save_path, 'w') as f_: + f_.write(html_content_save) + if return_html: - return style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + return else: - return display(HTML(style_config.STYLE_CONFIG_ENTITIES+ " "+html_content)) + return display(HTML(html_content_save)) diff --git a/build/lib/sparknlp_display/ner.py b/build/lib/sparknlp_display/ner.py index a0e4238..3b276db 100644 --- a/build/lib/sparknlp_display/ner.py +++ b/build/lib/sparknlp_display/ner.py @@ -138,7 +138,7 @@ def __display_ner(self, result, label_col, document_col, original_text, labels_l return html_output - def display(self, result, label_col, document_col='document', raw_text=None, labels=None, return_html=False): + def display(self, result, label_col, document_col='document', raw_text=None, labels=None, return_html=False, save_path=None): """Displays NER visualization. Inputs: result -- A Dataframe or dictionary. @@ -153,8 +153,15 @@ def display(self, result, label_col, document_col='document', raw_text=None, lab html_content = self.__display_ner(result, label_col, document_col, raw_text, labels) + html_content_save = style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + + if save_path != None: + with open(save_path, 'w') as f_: + f_.write(html_content_save) + if return_html: - return style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + return html_content_save else: - return display(HTML(style_config.STYLE_CONFIG_ENTITIES+ " "+html_content)) + return display(HTML(html_content_save)) + \ No newline at end of file diff --git a/build/lib/sparknlp_display/relation_extraction.py b/build/lib/sparknlp_display/relation_extraction.py index de32379..cfffcd0 100644 --- a/build/lib/sparknlp_display/relation_extraction.py +++ b/build/lib/sparknlp_display/relation_extraction.py @@ -375,7 +375,7 @@ def __gen_graph(self, rdf, selected_text, exclude_relations, show_relations): return dwg.tostring() - def display(self, result, relation_col, document_col='document', exclude_relations=['O'], show_relations=True, return_html=False): + def display(self, result, relation_col, document_col='document', exclude_relations=['O'], show_relations=True, return_html=False, save_path=None): """Displays Relation Extraction visualization. Inputs: result -- A Dataframe or dictionary. @@ -392,6 +392,10 @@ def display(self, result, relation_col, document_col='document', exclude_relatio html_content = self.__gen_graph(res, original_text, exclude_relations, show_relations) + if save_path != None: + with open(save_path, 'w') as f_: + f_.write(html_content) + if return_html: return html_content else: diff --git a/dist/spark-nlp-display-1.8.tar.gz b/dist/spark-nlp-display-1.8.tar.gz deleted file mode 100644 index 0a0bf62..0000000 Binary files a/dist/spark-nlp-display-1.8.tar.gz and /dev/null differ diff --git a/dist/spark-nlp-display-1.9.tar.gz b/dist/spark-nlp-display-1.9.tar.gz new file mode 100644 index 0000000..402e3aa Binary files /dev/null and b/dist/spark-nlp-display-1.9.tar.gz differ diff --git a/dist/spark_nlp_display-1.8-py3-none-any.whl b/dist/spark_nlp_display-1.9-py3-none-any.whl similarity index 79% rename from dist/spark_nlp_display-1.8-py3-none-any.whl rename to dist/spark_nlp_display-1.9-py3-none-any.whl index 469ec5d..43e10d2 100644 Binary files a/dist/spark_nlp_display-1.8-py3-none-any.whl and b/dist/spark_nlp_display-1.9-py3-none-any.whl differ diff --git a/spark_nlp_display.egg-info/PKG-INFO b/spark_nlp_display.egg-info/PKG-INFO index d40bde8..a914054 100644 --- a/spark_nlp_display.egg-info/PKG-INFO +++ b/spark_nlp_display.egg-info/PKG-INFO @@ -1,167 +1,11 @@ Metadata-Version: 2.1 Name: spark-nlp-display -Version: 1.8 +Version: 1.9 Summary: Visualization package for Spark NLP Home-page: http://nlp.johnsnowlabs.com Author: John Snow Labs Author-email: john@johnsnowlabs.com License: UNKNOWN -Description: # spark-nlp-display - A library for the simple visualization of different types of Spark NLP annotations. - - ## Supported Visualizations: - - Dependency Parser - - Named Entity Recognition - - Entity Resolution - - Relation Extraction - - Assertion Status - - ## Complete Tutorial - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-display/blob/main/tutorials/Spark_NLP_Display.ipynb) - - https://github.com/JohnSnowLabs/spark-nlp-display/blob/main/tutorials/Spark_NLP_Display.ipynb - - ### Requirements - - spark-nlp - - ipython - - svgwrite - - pandas - - numpy - - ### Installation - ```bash - pip install spark-nlp-display - ``` - - ### How to use - - ### Databricks - #### For all modules, pass in the additional parameter "return_html=True" in the display function and use Databrick's function displayHTML() to render visualization as explained below: - ```python - from sparknlp_display import NerVisualizer - - ner_vis = NerVisualizer() - - ## To set custom label colors: - ner_vis.set_label_colors({'LOC':'#800080', 'PER':'#77b5fe'}) #set label colors by specifying hex codes - - pipeline_result = ner_light_pipeline.fullAnnotate(text) ##light pipeline - #pipeline_result = ner_full_pipeline.transform(df).collect()##full pipeline - - vis_html = ner_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe - label_col='entities', #specify the entity column - document_col='document', #specify the document column (default: 'document') - labels=['PER'], #only allow these labels to be displayed. (default: [] - all labels will be displayed) - return_html=True) - - displayHTML(vis_html) - ``` - ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/ner_viz.png) - - ### Jupyter - - #### Dependency Parser - ```python - from sparknlp_display import DependencyParserVisualizer - - dependency_vis = DependencyParserVisualizer() - - pipeline_result = dp_pipeline.fullAnnotate(text) - #pipeline_result = dp_full_pipeline.transform(df).collect()##full pipeline - - dependency_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe. - pos_col = 'pos', #specify the pos column - dependency_col = 'dependency', #specify the dependency column - dependency_type_col = 'dependency_type' #specify the dependency type column - ) - ``` - - ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/dp_viz.png) - - #### Named Entity Recognition - - ```python - from sparknlp_display import NerVisualizer - - ner_vis = NerVisualizer() - - pipeline_result = ner_light_pipeline.fullAnnotate(text) - #pipeline_result = ner_full_pipeline.transform(df).collect()##full pipeline - - ner_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe - label_col='entities', #specify the entity column - document_col='document' #specify the document column (default: 'document') - labels=['PER'] #only allow these labels to be displayed. (default: [] - all labels will be displayed) - ) - - ## To set custom label colors: - ner_vis.set_label_colors({'LOC':'#800080', 'PER':'#77b5fe'}) #set label colors by specifying hex codes - - ``` - - ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/ner_viz.png) - - #### Entity Resolution - - ```python - from sparknlp_display import EntityResolverVisualizer - - er_vis = EntityResolverVisualizer() - - pipeline_result = er_light_pipeline.fullAnnotate(text) - - er_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe - label_col='entities', #specify the ner result column - resolution_col = 'resolution' - document_col='document' #specify the document column (default: 'document') - ) - - ## To set custom label colors: - er_vis.set_label_colors({'TREATMENT':'#800080', 'PROBLEM':'#77b5fe'}) #set label colors by specifying hex codes - - ``` - - ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/er_viz.png) - - #### Relation Extraction - ```python - from sparknlp_display import RelationExtractionVisualizer - - re_vis = RelationExtractionVisualizer() - - pipeline_result = re_light_pipeline.fullAnnotate(text) - - re_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe - relation_col = 'relations', #specify relations column - document_col = 'document', #specify document column - show_relations=True #display relation names on arrows (default: True) - ) - - ``` - - ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/re_viz.png) - - #### Assertion Status - ```python - from sparknlp_display import AssertionVisualizer - - assertion_vis = AssertionVisualizer() - - pipeline_result = ner_assertion_light_pipeline.fullAnnotate(text) - - assertion_vis.display(pipeline_result[0], - label_col = 'entities', #specify the ner result column - assertion_col = 'assertion' #specify assertion column - document_col = 'document' #specify the document column (default: 'document') - ) - - ## To set custom label colors: - assertion_vis.set_label_colors({'TREATMENT':'#008080', 'problem':'#800080'}) #set label colors by specifying hex codes - - ``` - - ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/assertion_viz.png) - Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 2 @@ -169,3 +13,170 @@ Classifier: License :: OSI Approved :: Apache Software License Classifier: Operating System :: OS Independent Requires-Python: >=2.7 Description-Content-Type: text/markdown +License-File: LICENSE + +# spark-nlp-display +A library for the simple visualization of different types of Spark NLP annotations. + +## Supported Visualizations: +- Dependency Parser +- Named Entity Recognition +- Entity Resolution +- Relation Extraction +- Assertion Status + +## Complete Tutorial +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-display/blob/main/tutorials/Spark_NLP_Display.ipynb) + +https://github.com/JohnSnowLabs/spark-nlp-display/blob/main/tutorials/Spark_NLP_Display.ipynb + +### Requirements +- spark-nlp +- ipython +- svgwrite +- pandas +- numpy + +### Installation +```bash +pip install spark-nlp-display +``` + +### How to use + +### Databricks +#### For all modules, pass in the additional parameter "return_html=True" in the display function and use Databrick's function displayHTML() to render visualization as explained below: +```python +from sparknlp_display import NerVisualizer + +ner_vis = NerVisualizer() + +## To set custom label colors: +ner_vis.set_label_colors({'LOC':'#800080', 'PER':'#77b5fe'}) #set label colors by specifying hex codes + +pipeline_result = ner_light_pipeline.fullAnnotate(text) ##light pipeline +#pipeline_result = ner_full_pipeline.transform(df).collect()##full pipeline + +vis_html = ner_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe + label_col='entities', #specify the entity column + document_col='document', #specify the document column (default: 'document') + labels=['PER'], #only allow these labels to be displayed. (default: [] - all labels will be displayed) + return_html=True) + +displayHTML(vis_html) +``` +![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/ner_viz.png) + +### Jupyter + +To save the visualization as html, provide the export file path: `save_path='./export.html'` for each visualizer. + + +#### Dependency Parser +```python +from sparknlp_display import DependencyParserVisualizer + +dependency_vis = DependencyParserVisualizer() + +pipeline_result = dp_pipeline.fullAnnotate(text) +#pipeline_result = dp_full_pipeline.transform(df).collect()##full pipeline + +dependency_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe. + pos_col = 'pos', #specify the pos column + dependency_col = 'dependency', #specify the dependency column + dependency_type_col = 'dependency_type', #specify the dependency type column + save_path='./export.html' # optional - to save viz as html. (default: None) + ) +``` + +![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/dp_viz.png) + +#### Named Entity Recognition + +```python +from sparknlp_display import NerVisualizer + +ner_vis = NerVisualizer() + +pipeline_result = ner_light_pipeline.fullAnnotate(text) +#pipeline_result = ner_full_pipeline.transform(df).collect()##full pipeline + +ner_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe + label_col='entities', #specify the entity column + document_col='document', #specify the document column (default: 'document') + labels=['PER'], #only allow these labels to be displayed. (default: [] - all labels will be displayed) + save_path='./export.html' # optional - to save viz as html. (default: None) + ) + +## To set custom label colors: +ner_vis.set_label_colors({'LOC':'#800080', 'PER':'#77b5fe'}) #set label colors by specifying hex codes + +``` + +![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/ner_viz.png) + +#### Entity Resolution + +```python +from sparknlp_display import EntityResolverVisualizer + +er_vis = EntityResolverVisualizer() + +pipeline_result = er_light_pipeline.fullAnnotate(text) + +er_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe + label_col='entities', #specify the ner result column + resolution_col = 'resolution', + document_col='document', #specify the document column (default: 'document') + save_path='./export.html' # optional - to save viz as html. (default: None) + ) + +## To set custom label colors: +er_vis.set_label_colors({'TREATMENT':'#800080', 'PROBLEM':'#77b5fe'}) #set label colors by specifying hex codes + +``` + +![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/er_viz.png) + +#### Relation Extraction +```python +from sparknlp_display import RelationExtractionVisualizer + +re_vis = RelationExtractionVisualizer() + +pipeline_result = re_light_pipeline.fullAnnotate(text) + +re_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe + relation_col = 'relations', #specify relations column + document_col = 'document', #specify document column + show_relations=True, #display relation names on arrows (default: True) + save_path='./export.html' # optional - to save viz as html. (default: None) + ) + +``` + +![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/re_viz.png) + +#### Assertion Status +```python +from sparknlp_display import AssertionVisualizer + +assertion_vis = AssertionVisualizer() + +pipeline_result = ner_assertion_light_pipeline.fullAnnotate(text) + +assertion_vis.display(pipeline_result[0], + label_col = 'entities', #specify the ner result column + assertion_col = 'assertion', #specify assertion column + document_col = 'document', #specify the document column (default: 'document') + save_path='./export.html' # optional - to save viz as html. (default: None) + ) + +## To set custom label colors: +assertion_vis.set_label_colors({'TREATMENT':'#008080', 'problem':'#800080'}) #set label colors by specifying hex codes + +``` + +![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/assertion_viz.png) + + diff --git a/spark_nlp_display.egg-info/SOURCES.txt b/spark_nlp_display.egg-info/SOURCES.txt index 43e2456..24d61e1 100644 --- a/spark_nlp_display.egg-info/SOURCES.txt +++ b/spark_nlp_display.egg-info/SOURCES.txt @@ -1,3 +1,4 @@ +LICENSE MANIFEST.in README.md setup.cfg diff --git a/sparknlp_display/VERSION b/sparknlp_display/VERSION index 4684374..872765e 100644 --- a/sparknlp_display/VERSION +++ b/sparknlp_display/VERSION @@ -1 +1 @@ -1.8 \ No newline at end of file +1.9 \ No newline at end of file diff --git a/sparknlp_display/assertion.py b/sparknlp_display/assertion.py index 6dabcb5..0f8889f 100644 --- a/sparknlp_display/assertion.py +++ b/sparknlp_display/assertion.py @@ -158,7 +158,7 @@ def __display_ner(self, result, label_col, resolution_col, document_col, origina return html_output - def display(self, result, label_col, assertion_col, document_col='document', raw_text=None, return_html=False): + def display(self, result, label_col, assertion_col, document_col='document', raw_text=None, return_html=False, save_path=None): """Displays Assertion visualization. Inputs: @@ -174,9 +174,14 @@ def display(self, result, label_col, assertion_col, document_col='document', raw #self.__verifyInput(result, label_col, document_col, raw_text) html_content = self.__display_ner(result, label_col, assertion_col, document_col, raw_text) + html_content_save = style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + + if save_path != None: + with open(save_path, 'w') as f_: + f_.write(html_content_save) if return_html: - return style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + return html_content_save else: - return display(HTML(style_config.STYLE_CONFIG_ENTITIES+ " "+html_content)) + return display(HTML(html_content_save)) \ No newline at end of file diff --git a/sparknlp_display/dependency_parser.py b/sparknlp_display/dependency_parser.py index 553f3c7..dd4f310 100644 --- a/sparknlp_display/dependency_parser.py +++ b/sparknlp_display/dependency_parser.py @@ -219,7 +219,7 @@ def __generate_graph(self, result_df): return dwg.tostring() - def display(self, res, pos_col, dependency_col, dependency_type_col=None, return_html=False): + def display(self, res, pos_col, dependency_col, dependency_type_col=None, return_html=False, save_path=None): """Displays NER visualization. Inputs: @@ -254,6 +254,11 @@ def display(self, res, pos_col, dependency_col, dependency_type_col=None, return df['dependency_type'] = '' html_content = self.__generate_graph(df) + + if save_path != None: + with open(save_path, 'w') as f_: + f_.write(html_content) + if return_html: return html_content else: diff --git a/sparknlp_display/entity_resolution.py b/sparknlp_display/entity_resolution.py index a48b7ee..b9e2cf8 100644 --- a/sparknlp_display/entity_resolution.py +++ b/sparknlp_display/entity_resolution.py @@ -158,7 +158,7 @@ def __display_ner(self, result, label_col, resolution_col, document_col, origina return html_output - def display(self, result, label_col, resolution_col, document_col='document', raw_text=None, return_html=False): + def display(self, result, label_col, resolution_col, document_col='document', raw_text=None, return_html=False, save_path=None): """Displays NER visualization. Inputs: @@ -175,7 +175,13 @@ def display(self, result, label_col, resolution_col, document_col='document', ra html_content = self.__display_ner(result, label_col, resolution_col, document_col, raw_text) + html_content_save = style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + + if save_path != None: + with open(save_path, 'w') as f_: + f_.write(html_content_save) + if return_html: - return style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + return else: - return display(HTML(style_config.STYLE_CONFIG_ENTITIES+ " "+html_content)) + return display(HTML(html_content_save)) diff --git a/sparknlp_display/ner.py b/sparknlp_display/ner.py index a0e4238..3b276db 100644 --- a/sparknlp_display/ner.py +++ b/sparknlp_display/ner.py @@ -138,7 +138,7 @@ def __display_ner(self, result, label_col, document_col, original_text, labels_l return html_output - def display(self, result, label_col, document_col='document', raw_text=None, labels=None, return_html=False): + def display(self, result, label_col, document_col='document', raw_text=None, labels=None, return_html=False, save_path=None): """Displays NER visualization. Inputs: result -- A Dataframe or dictionary. @@ -153,8 +153,15 @@ def display(self, result, label_col, document_col='document', raw_text=None, lab html_content = self.__display_ner(result, label_col, document_col, raw_text, labels) + html_content_save = style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + + if save_path != None: + with open(save_path, 'w') as f_: + f_.write(html_content_save) + if return_html: - return style_config.STYLE_CONFIG_ENTITIES+ " "+html_content + return html_content_save else: - return display(HTML(style_config.STYLE_CONFIG_ENTITIES+ " "+html_content)) + return display(HTML(html_content_save)) + \ No newline at end of file diff --git a/sparknlp_display/relation_extraction.py b/sparknlp_display/relation_extraction.py index de32379..cfffcd0 100644 --- a/sparknlp_display/relation_extraction.py +++ b/sparknlp_display/relation_extraction.py @@ -375,7 +375,7 @@ def __gen_graph(self, rdf, selected_text, exclude_relations, show_relations): return dwg.tostring() - def display(self, result, relation_col, document_col='document', exclude_relations=['O'], show_relations=True, return_html=False): + def display(self, result, relation_col, document_col='document', exclude_relations=['O'], show_relations=True, return_html=False, save_path=None): """Displays Relation Extraction visualization. Inputs: result -- A Dataframe or dictionary. @@ -392,6 +392,10 @@ def display(self, result, relation_col, document_col='document', exclude_relatio html_content = self.__gen_graph(res, original_text, exclude_relations, show_relations) + if save_path != None: + with open(save_path, 'w') as f_: + f_.write(html_content) + if return_html: return html_content else: