-
Notifications
You must be signed in to change notification settings - Fork 536
/
Copy pathextract_spouse_features.py
executable file
·49 lines (46 loc) · 1.6 KB
/
extract_spouse_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python
from deepdive import *
import ddlib
@tsj_extractor
@returns(lambda
p1_id = "text",
p2_id = "text",
feature = "text",
:[])
def extract(
p1_id = "text",
p2_id = "text",
p1_begin_index = "int",
p1_end_index = "int",
p2_begin_index = "int",
p2_end_index = "int",
doc_id = "text",
sent_index = "int",
tokens = "text[]",
lemmas = "text[]",
pos_tags = "text[]",
ner_tags = "text[]",
dep_types = "text[]",
dep_parents = "int[]",
):
"""
Uses DDLIB to generate features for the spouse relation.
"""
# Create a DDLIB sentence object, which is just a list of DDLIB Word objects
sent = []
for i,t in enumerate(tokens):
sent.append(ddlib.Word(
begin_char_offset=None,
end_char_offset=None,
word=t,
lemma=lemmas[i],
pos=pos_tags[i],
ner=ner_tags[i],
dep_par=dep_parents[i] - 1, # Note that as stored from CoreNLP 0 is ROOT, but for DDLIB -1 is ROOT
dep_label=dep_types[i]))
# Create DDLIB Spans for the two person mentions
p1_span = ddlib.Span(begin_word_id=p1_begin_index, length=(p1_end_index-p1_begin_index+1))
p2_span = ddlib.Span(begin_word_id=p2_begin_index, length=(p2_end_index-p2_begin_index+1))
# Generate the generic features using DDLIB
for feature in ddlib.get_generic_features_relation(sent, p1_span, p2_span):
yield [p1_id, p2_id, feature]