1.8 build - dep praser and re enhancements

JohnSnowLabs · Jun 3, 2021 · 9431475 · 9431475
1 parent 857b1b3
commit 9431475
Show file tree

Hide file tree

Showing 8 changed files with 61 additions and 26 deletions.
diff --git a/build/lib/sparknlp_display/VERSION b/build/lib/sparknlp_display/VERSION
@@ -1 +1 @@
-1.7
+1.8
diff --git a/build/lib/sparknlp_display/dependency_parser.py b/build/lib/sparknlp_display/dependency_parser.py
@@ -219,7 +219,7 @@ def __generate_graph(self, result_df):
         return dwg.tostring()
 
 
-    def display(self, res, pos_col, dependency_col, dependency_type_col, return_html=False):
+    def display(self, res, pos_col, dependency_col, dependency_type_col=None, return_html=False):
         """Displays NER visualization. 
 
         Inputs:
@@ -248,10 +248,10 @@ def display(self, res, pos_col, dependency_col, dependency_type_col, return_html
         df['dependency'] = dep_res
         df['dependency_start'] = dep_res_meta
 
-        dept_res = []
-        for i in res[dependency_type_col]:
-            dept_res.append(i.result)
-        df['dependency_type'] = dept_res
+        if dependency_type_col != None:
+            df['dependency_type'] = [ i.result for i in res[dependency_type_col] ]
+        else:
+            df['dependency_type'] = ''
 
         html_content = self.__generate_graph(df)
         if return_html:

diff --git a/build/lib/sparknlp_display/relation_extraction.py b/build/lib/sparknlp_display/relation_extraction.py
@@ -6,6 +6,7 @@
 import numpy as np
 import svgwrite
 import math
+import re
 from IPython.display import display, HTML
 
 here = os.path.abspath(os.path.dirname(__file__))
@@ -236,7 +237,7 @@ def __gen_graph(self, rdf, selected_text, exclude_relations, show_relations):
         all_done = {}
 
         start_y = 75
-        x_limit = 920
+        x_limit = 1000
         y_offset = 100
         #dwg = svgwrite.Drawing("temp.svg",profile='full', size = (x_limit, len(selected_text) * 1.1 + len(rdf)*20))
 
@@ -274,17 +275,23 @@ def __gen_graph(self, rdf, selected_text, exclude_relations, show_relations):
         for ent_start_ind in all_entities_index:
             e_start_now, e_end_now, e_chunk_now, e_entity_now = basic_dict[ent_start_ind]
             prev_text = selected_text[begin_index:int(e_start_now)]
+            prev_text = re.sub(r'\s*(\n)+', r'\1', prev_text.strip(), re.MULTILINE)
             begin_index = int(e_end_now)+1
-            for word_ in prev_text.split(' '):
-                this_size = self.__size(word_)
-                if (start_x + this_size + 10) >= x_limit:
+            for line_num, line in enumerate(prev_text.split('\n')):
+                if line_num != 0:
                     start_y += y_offset
                     start_x = 10
                     this_line = 0
-                dwg_texts.append([word_, (start_x, start_y ), '#546c74', '16', self.main_font, 'font-weight:100'])
-                #dwg.add(dwg.text(word_, insert=(start_x, start_y ), fill='#546c77', font_size='16', 
-                #                 font_family='Monaco', style='font-weight:lighter'))
-                start_x += this_size + 10
+                for word_ in line.split(' '):
+                    this_size = self.__size(word_)
+                    if (start_x + this_size + 10) >= x_limit:
+                        start_y += y_offset
+                        start_x = 10
+                        this_line = 0
+                    dwg_texts.append([word_, (start_x, start_y ), '#546c74', '16', self.main_font, 'font-weight:100'])
+                    #dwg.add(dwg.text(word_, insert=(start_x, start_y ), fill='#546c77', font_size='16', 
+                    #                 font_family='Monaco', style='font-weight:lighter'))
+                    start_x += this_size + 10
 
             this_size = self.__size(e_chunk_now)
             if (start_x + this_size + 10)>= x_limit:# or this_line >= 2:
@@ -314,17 +321,22 @@ def __gen_graph(self, rdf, selected_text, exclude_relations, show_relations):
             this_line += 1 
 
 
-        prev_text = selected_text[begin_index:]        
-        for word_ in prev_text.split(' '):
-            this_size = self.__size(word_)
-            if (start_x + this_size)>= x_limit:
+        prev_text = selected_text[begin_index:]
+        prev_text = re.sub(r'\s*(\n)+', r'\1', prev_text.strip(), re.MULTILINE)
+        for line_num, line in enumerate(prev_text.split('\n')):
+            if line_num != 0:
                 start_y += y_offset
                 start_x = 10
-            dwg_texts.append([word_, (start_x, start_y ), '#546c77', '16', self.main_font, 'font-weight:100'])
-            #dwg.add(dwg.text(word_, insert=(start_x, start_y ), fill='#546c77', font_size='16', 
-            #                 font_family='Monaco', style='font-weight:lighter'))
-            start_x += this_size + 10
-
+            for word_ in line.split(' '):
+                this_size = self.__size(word_)
+                if (start_x + this_size)>= x_limit:
+                    start_y += y_offset
+                    start_x = 10
+                dwg_texts.append([word_, (start_x, start_y ), '#546c77', '16', self.main_font, 'font-weight:100'])
+                #dwg.add(dwg.text(word_, insert=(start_x, start_y ), fill='#546c77', font_size='16', 
+                #                 font_family='Monaco', style='font-weight:lighter'))
+                start_x += this_size + 10
+
 
         dwg = svgwrite.Drawing("temp.svg",profile='full', size = (x_limit, start_y+y_offset))
         dwg.embed_font(self.main_font, self.font_path)

diff --git a/dist/spark-nlp-display-1.7.tar.gz b/dist/spark-nlp-display-1.7.tar.gz
diff --git a/dist/spark-nlp-display-1.8.tar.gz b/dist/spark-nlp-display-1.8.tar.gz
diff --git a/dist/spark_nlp_display-1.7-py3-none-any.whl → dist/spark_nlp_display-1.8-py3-none-any.whl b/dist/spark_nlp_display-1.7-py3-none-any.whl → dist/spark_nlp_display-1.8-py3-none-any.whl
diff --git a/spark_nlp_display.egg-info/PKG-INFO b/spark_nlp_display.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spark-nlp-display
-Version: 1.7
+Version: 1.8
 Summary: Visualization package for Spark NLP
 Home-page: http://nlp.johnsnowlabs.com
 Author: John Snow Labs
@@ -45,15 +45,18 @@ Description: # spark-nlp-display
         ## To set custom label colors:
         ner_vis.set_label_colors({'LOC':'#800080', 'PER':'#77b5fe'}) #set label colors by specifying hex codes
 
+        pipeline_result = ner_light_pipeline.fullAnnotate(text) ##light pipeline
+        #pipeline_result = ner_full_pipeline.transform(df).collect()##full pipeline
+
         vis_html = ner_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe
                             label_col='entities', #specify the entity column
                             document_col='document', #specify the document column (default: 'document')
                             labels=['PER'], #only allow these labels to be displayed. (default: [] - all labels will be displayed)
                             return_html=True)
 
-
         displayHTML(vis_html)
         ```
+        ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/ner_viz.png)
 
         ### Jupyter
 
@@ -63,20 +66,28 @@ Description: # spark-nlp-display
 
         dependency_vis = DependencyParserVisualizer()
 
+        pipeline_result = dp_pipeline.fullAnnotate(text)
+        #pipeline_result = dp_full_pipeline.transform(df).collect()##full pipeline
+
         dependency_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe.
                                pos_col = 'pos', #specify the pos column
                                dependency_col = 'dependency', #specify the dependency column
                                dependency_type_col = 'dependency_type' #specify the dependency type column
                                )
         ```
 
+        ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/dp_viz.png)
+
         #### Named Entity Recognition
 
         ```python
         from sparknlp_display import NerVisualizer
 
         ner_vis = NerVisualizer()
 
+        pipeline_result = ner_light_pipeline.fullAnnotate(text)
+        #pipeline_result = ner_full_pipeline.transform(df).collect()##full pipeline
+
         ner_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe
                             label_col='entities', #specify the entity column
                             document_col='document' #specify the document column (default: 'document')
@@ -88,13 +99,17 @@ Description: # spark-nlp-display
 
         ```
 
+        ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/ner_viz.png)
+
         #### Entity Resolution
 
         ```python
         from sparknlp_display import EntityResolverVisualizer
 
         er_vis = EntityResolverVisualizer()
 
+        pipeline_result = er_light_pipeline.fullAnnotate(text)
+
         er_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe
                        label_col='entities', #specify the ner result column
                        resolution_col = 'resolution'
@@ -106,13 +121,16 @@ Description: # spark-nlp-display
 
         ```
 
+        ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/er_viz.png)
 
         #### Relation Extraction
         ```python
         from sparknlp_display import RelationExtractionVisualizer
 
         re_vis = RelationExtractionVisualizer()
 
+        pipeline_result = re_light_pipeline.fullAnnotate(text)
+
         re_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe
                        relation_col = 'relations', #specify relations column
                        document_col = 'document', #specify document column
@@ -121,12 +139,16 @@ Description: # spark-nlp-display
 
         ```
 
+        ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/re_viz.png)
+
         #### Assertion Status
         ```python
         from sparknlp_display import AssertionVisualizer
 
         assertion_vis = AssertionVisualizer()
 
+        pipeline_result = ner_assertion_light_pipeline.fullAnnotate(text)
+
         assertion_vis.display(pipeline_result[0], 
                               label_col = 'entities', #specify the ner result column
                               assertion_col = 'assertion' #specify assertion column
@@ -138,6 +160,7 @@ Description: # spark-nlp-display
 
         ```
 
+        ![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/assertion_viz.png)
 
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3

diff --git a/sparknlp_display/VERSION b/sparknlp_display/VERSION
@@ -1 +1 @@
-1.7
+1.8