-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathner_main.py
131 lines (117 loc) · 4.1 KB
/
ner_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import feature_functions
from MyViterbi import MyViterbi
from MyMaxEnt import MyMaxEnt
import ner_metrics
import json
class ner_main():
def classify(self, clf, history_list, wmap):
result = []
for index in range(len(history_list)):
mymap = self.wmap[history_list[index][0]["wn"]]
words = mymap['words']
index = history_list[index][0]["i"]
val = clf.classify(history_list[index][0])
history_list[index+1]["ta"] = history_list[index]["tb"]
history_list[index+1]["tb"] = val
return history_list
def get_tags(self,sentence):
supported_tags = ["Org", "OS", "Version", "Family", "Price", "Phone", "Feature" ,"Other"]
func_obj = feature_functions.FeatureFunctions()
wordlist=[]
words=sentence.split(" ")
mywords=words
for word in words:
wordlist.append(word)
wmap=[]
wmap.append({"words":words})
func_obj.set_wmap(wmap)
clf = MyMaxEnt([], func_obj, reg_lambda = 0.001, pic_file = 'all_data.p')
history_list=[]
for i in range(len(words)):
history = {}
history["i"] = i
if i == 0:
history["ta"] = "*" # special tag
history["tb"] = "*" # special tag
elif i == 1:
history["ta"] = "*" # special tag
history["tb"] = ""
else:
history["ta"] = ""
history["tb"] = ""
history["wn"] = 0
history_list.append((history, '', ))
for index in range(len(history_list)):
mymap = wmap[history_list[index][0]["wn"]]
index = history_list[index][0]["i"]
val = clf.classify(history_list[index][0])
if index != len(history_list)-1:
history_list[index+1][0]["ta"] = history_list[index][0]["tb"]
history_list[index+1][0]["tb"] = val
vitObj=MyViterbi()
path=vitObj.Viterbi(words, clf, supported_tags, history_list)
ner_tag_result={"words":words,"tags":path}
#print ner_tag_result
return ner_tag_result
def compute(self):
json_file = r"all_data.json"
pickle_file = r"all_data.p"
# ----------------------------------------------------------------
TRAIN = 1 #int(raw_input("Enter 1 for Train, 0 to use pickeled file: "))
supported_tags = ["Org", "OS", "Version", "Family", "Price", "Phone", "Feature" ,"Other"]
tag_set = {"Org": 0, "Other": 1}
dims = 9
trg_data_x = []
trg_data_y = []
trg_data = {'Org': [], 'Other': []}
data = json.loads(open(json_file).read())['root']
(history_list, wmap, count_list) = feature_functions.build_history(data[1:40], supported_tags)
print "After build_history"
func_obj = feature_functions.FeatureFunctions()#, supported_tags)
func_obj.set_wmap(wmap)
clf = MyMaxEnt(history_list, func_obj, reg_lambda = 0.001, pic_file = pickle_file)
print clf.model
if TRAIN == 1:
clf.train()
#-----------To test MaxEnt Classifier-------------------------------------------------
'''
result = feature_functions.test(clf, history_list[-500:], wmap)
count_correct=0
for r in result:
print r['word'], r['predicted'], r['expected']
if r['predicted'] == r['expected']:
count_correct+=1
print count_correct
'''
vitObj=MyViterbi()
#----------To perform tagging using MEMM----------------------------------------------
tagged_sentences = []
predicted = []
count_iters=0
num=1
#ner_tag_result=[]
#print "Start Viterbi"
while count_iters <= 3:
num+=1
try:
h_list = history_list[count_list[num-1]+1:count_list[num]+1]
tagged_sentence=[]
sentence_words = wmap[num]["words"]
path=vitObj.Viterbi(sentence_words, clf, supported_tags, h_list)
print path
#ner_tag_result.append({"words":sentence_words,"tags":path})
for index in range(len(sentence_words)):
tagged_sentence.append(h_list[index][1])
tagged_sentences.append(tagged_sentence)
predicted.append(path)
count_iters+=1
except:
continue
#print ner_tag_result
ner=ner_metrics.NerMetrics(tagged_sentences,predicted)
metrics=ner.compute()
#ner.print_results()
#print "Metrics", metrics
#return ner_tag_result
#n=ner_main()
#n.compute()