Skip to content

Commit

Permalink
1.8 build - dep praser and re enhancements
Browse files Browse the repository at this point in the history
  • Loading branch information
HashamUlHaq committed Jun 3, 2021
1 parent 857b1b3 commit 9431475
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 26 deletions.
2 changes: 1 addition & 1 deletion build/lib/sparknlp_display/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.7
1.8
10 changes: 5 additions & 5 deletions build/lib/sparknlp_display/dependency_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def __generate_graph(self, result_df):
return dwg.tostring()


def display(self, res, pos_col, dependency_col, dependency_type_col, return_html=False):
def display(self, res, pos_col, dependency_col, dependency_type_col=None, return_html=False):
"""Displays NER visualization.
Inputs:
Expand Down Expand Up @@ -248,10 +248,10 @@ def display(self, res, pos_col, dependency_col, dependency_type_col, return_html
df['dependency'] = dep_res
df['dependency_start'] = dep_res_meta

dept_res = []
for i in res[dependency_type_col]:
dept_res.append(i.result)
df['dependency_type'] = dept_res
if dependency_type_col != None:
df['dependency_type'] = [ i.result for i in res[dependency_type_col] ]
else:
df['dependency_type'] = ''

html_content = self.__generate_graph(df)
if return_html:
Expand Down
46 changes: 29 additions & 17 deletions build/lib/sparknlp_display/relation_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import svgwrite
import math
import re
from IPython.display import display, HTML

here = os.path.abspath(os.path.dirname(__file__))
Expand Down Expand Up @@ -236,7 +237,7 @@ def __gen_graph(self, rdf, selected_text, exclude_relations, show_relations):
all_done = {}

start_y = 75
x_limit = 920
x_limit = 1000
y_offset = 100
#dwg = svgwrite.Drawing("temp.svg",profile='full', size = (x_limit, len(selected_text) * 1.1 + len(rdf)*20))

Expand Down Expand Up @@ -274,17 +275,23 @@ def __gen_graph(self, rdf, selected_text, exclude_relations, show_relations):
for ent_start_ind in all_entities_index:
e_start_now, e_end_now, e_chunk_now, e_entity_now = basic_dict[ent_start_ind]
prev_text = selected_text[begin_index:int(e_start_now)]
prev_text = re.sub(r'\s*(\n)+', r'\1', prev_text.strip(), re.MULTILINE)
begin_index = int(e_end_now)+1
for word_ in prev_text.split(' '):
this_size = self.__size(word_)
if (start_x + this_size + 10) >= x_limit:
for line_num, line in enumerate(prev_text.split('\n')):
if line_num != 0:
start_y += y_offset
start_x = 10
this_line = 0
dwg_texts.append([word_, (start_x, start_y ), '#546c74', '16', self.main_font, 'font-weight:100'])
#dwg.add(dwg.text(word_, insert=(start_x, start_y ), fill='#546c77', font_size='16',
# font_family='Monaco', style='font-weight:lighter'))
start_x += this_size + 10
for word_ in line.split(' '):
this_size = self.__size(word_)
if (start_x + this_size + 10) >= x_limit:
start_y += y_offset
start_x = 10
this_line = 0
dwg_texts.append([word_, (start_x, start_y ), '#546c74', '16', self.main_font, 'font-weight:100'])
#dwg.add(dwg.text(word_, insert=(start_x, start_y ), fill='#546c77', font_size='16',
# font_family='Monaco', style='font-weight:lighter'))
start_x += this_size + 10

this_size = self.__size(e_chunk_now)
if (start_x + this_size + 10)>= x_limit:# or this_line >= 2:
Expand Down Expand Up @@ -314,17 +321,22 @@ def __gen_graph(self, rdf, selected_text, exclude_relations, show_relations):
this_line += 1


prev_text = selected_text[begin_index:]
for word_ in prev_text.split(' '):
this_size = self.__size(word_)
if (start_x + this_size)>= x_limit:
prev_text = selected_text[begin_index:]
prev_text = re.sub(r'\s*(\n)+', r'\1', prev_text.strip(), re.MULTILINE)
for line_num, line in enumerate(prev_text.split('\n')):
if line_num != 0:
start_y += y_offset
start_x = 10
dwg_texts.append([word_, (start_x, start_y ), '#546c77', '16', self.main_font, 'font-weight:100'])
#dwg.add(dwg.text(word_, insert=(start_x, start_y ), fill='#546c77', font_size='16',
# font_family='Monaco', style='font-weight:lighter'))
start_x += this_size + 10

for word_ in line.split(' '):
this_size = self.__size(word_)
if (start_x + this_size)>= x_limit:
start_y += y_offset
start_x = 10
dwg_texts.append([word_, (start_x, start_y ), '#546c77', '16', self.main_font, 'font-weight:100'])
#dwg.add(dwg.text(word_, insert=(start_x, start_y ), fill='#546c77', font_size='16',
# font_family='Monaco', style='font-weight:lighter'))
start_x += this_size + 10


dwg = svgwrite.Drawing("temp.svg",profile='full', size = (x_limit, start_y+y_offset))
dwg.embed_font(self.main_font, self.font_path)
Expand Down
Binary file removed dist/spark-nlp-display-1.7.tar.gz
Binary file not shown.
Binary file added dist/spark-nlp-display-1.8.tar.gz
Binary file not shown.
Binary file not shown.
27 changes: 25 additions & 2 deletions spark_nlp_display.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: spark-nlp-display
Version: 1.7
Version: 1.8
Summary: Visualization package for Spark NLP
Home-page: http://nlp.johnsnowlabs.com
Author: John Snow Labs
Expand Down Expand Up @@ -45,15 +45,18 @@ Description: # spark-nlp-display
## To set custom label colors:
ner_vis.set_label_colors({'LOC':'#800080', 'PER':'#77b5fe'}) #set label colors by specifying hex codes

pipeline_result = ner_light_pipeline.fullAnnotate(text) ##light pipeline
#pipeline_result = ner_full_pipeline.transform(df).collect()##full pipeline

vis_html = ner_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe
label_col='entities', #specify the entity column
document_col='document', #specify the document column (default: 'document')
labels=['PER'], #only allow these labels to be displayed. (default: [] - all labels will be displayed)
return_html=True)


displayHTML(vis_html)
```
![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/ner_viz.png)

### Jupyter

Expand All @@ -63,20 +66,28 @@ Description: # spark-nlp-display

dependency_vis = DependencyParserVisualizer()

pipeline_result = dp_pipeline.fullAnnotate(text)
#pipeline_result = dp_full_pipeline.transform(df).collect()##full pipeline

dependency_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe.
pos_col = 'pos', #specify the pos column
dependency_col = 'dependency', #specify the dependency column
dependency_type_col = 'dependency_type' #specify the dependency type column
)
```

![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/dp_viz.png)

#### Named Entity Recognition

```python
from sparknlp_display import NerVisualizer

ner_vis = NerVisualizer()

pipeline_result = ner_light_pipeline.fullAnnotate(text)
#pipeline_result = ner_full_pipeline.transform(df).collect()##full pipeline

ner_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe
label_col='entities', #specify the entity column
document_col='document' #specify the document column (default: 'document')
Expand All @@ -88,13 +99,17 @@ Description: # spark-nlp-display

```

![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/ner_viz.png)

#### Entity Resolution

```python
from sparknlp_display import EntityResolverVisualizer

er_vis = EntityResolverVisualizer()

pipeline_result = er_light_pipeline.fullAnnotate(text)

er_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe
label_col='entities', #specify the ner result column
resolution_col = 'resolution'
Expand All @@ -106,13 +121,16 @@ Description: # spark-nlp-display

```

![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/er_viz.png)

#### Relation Extraction
```python
from sparknlp_display import RelationExtractionVisualizer

re_vis = RelationExtractionVisualizer()

pipeline_result = re_light_pipeline.fullAnnotate(text)

re_vis.display(pipeline_result[0], #should be the results of a single example, not the complete dataframe
relation_col = 'relations', #specify relations column
document_col = 'document', #specify document column
Expand All @@ -121,12 +139,16 @@ Description: # spark-nlp-display

```

![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/re_viz.png)

#### Assertion Status
```python
from sparknlp_display import AssertionVisualizer

assertion_vis = AssertionVisualizer()

pipeline_result = ner_assertion_light_pipeline.fullAnnotate(text)

assertion_vis.display(pipeline_result[0],
label_col = 'entities', #specify the ner result column
assertion_col = 'assertion' #specify assertion column
Expand All @@ -138,6 +160,7 @@ Description: # spark-nlp-display

```

![title](https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-display/main/assets/assertion_viz.png)

Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Expand Down
2 changes: 1 addition & 1 deletion sparknlp_display/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.7
1.8

0 comments on commit 9431475

Please sign in to comment.