-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrermymaxent.py
172 lines (155 loc) · 7.19 KB
/
rermymaxent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import json
import numpy
import math
import pickle
from scipy.optimize import minimize as mymin
import datetime
# ----------------------------------------------------------------------------------------
# maxent implementation
# ----------------------------------------------------------------------------------------
class rermymaxent(object):
def __init__(self, history_tuples, function_obj, supported_rel, reg_lambda = 0.01, pic_file = None):
# history_tuples is of the form: ((ta, tb, wn, i), tag) where ta = tag t-2, tb = tag t-1, wn = pointer to a sentence, i = current index
# function_list is of the form: [(pointer_to_function_f1, tag_for_f1), (pointer_to_function_f2, tag_for_f2)...]
# reg_lambda = regularization coefficient
# pic_file = Name of file where the classifier is pickled
self.h_tuples = history_tuples
self.func = function_obj
self.reg = reg_lambda
self.dataset = None # this will be set by create_dataset
self.tag_set = supported_rel #None # this will be also be set by create_dataset - this is the set of all tags
self.create_dataset()
if len(history_tuples)==0:
self.dim=0
self.num_examples=0
else:
self.dim = self.dataset.shape[1]
self.num_examples = self.dataset.shape[0]
#print self.num_examples
#print self.dim
#print "all_date : ",self.all_data
#print "dataset : ",self.dataset
self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0
self.pic_file = pic_file
return
def create_dataset(self):
self.dataset = []
self.all_data = {}
for h in self.h_tuples: # h represents each example x that we will convert to f(x, y)
flag=False
for tag in self.tag_set:
feats = self.all_data.get(tag, [])
val = self.get_feats(h, tag)
feats.append(val)
self.all_data[tag] = feats
#print "tag : " , tag , " *** all data[tag]: " , self.all_data[tag]
if not flag and (tag in h["relations"]):
self.dataset.append(val)
flag=True
for k, v in self.all_data.items():
self.all_data[k] = numpy.array(v)
self.dataset = numpy.array(self.dataset)
return
def get_feats(self, xi, tag): # xi is the history tuple and tag is y belonging to Y (the set of all labels
# xi is of the form: history where history is a 4 tuple by itself
# self.func is the function object
return self.func.evaluate(xi, tag)
def train(self):
dt1 = datetime.datetime.now()
print 'before training: ', dt1
params = mymin(self.cost, self.model, method = 'L-BFGS-B',options = {'maxiter':25}) #, jac = self.gradient) # , options = {'maxiter':100}
self.model = params.x
dt2 = datetime.datetime.now()
print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds()
if self.pic_file != None:
pickle.dump(self.model, open(self.pic_file, "wb"))
return
def p_y_given_x(self, xi, tag): # given xi determine the probability of y - note: we have all the f(x, y) values for all y in the dataset
normalizer = 0.0
feat = self.get_feats(xi, tag)
dot_vector = numpy.dot(numpy.array(feat), self.model)
for t in self.tag_set:
feat = self.get_feats(xi, t)
dp = numpy.dot(numpy.array(feat), self.model)
if dp == 0:
normalizer += 1.0
else:
normalizer += math.exp(dp)
if dot_vector == 0:
val = 1.0
else:
val = math.exp(dot_vector) #
result = float(val) / normalizer
return result
def classify(self, xi):
if self.pic_file != None:
self.model = pickle.load(open(self.pic_file, "rb"))
maxval = 0.0
result = None
for t in self.tag_set:
val = self.p_y_given_x(xi, t)
if val >= maxval:
maxval = val
result = t
return result
def cost(self, params):
self.model = params
sum_sqr_params = sum([p * p for p in params]) # for regularization
reg_term = 0.5 * self.reg * sum_sqr_params
dot_vector = numpy.dot(self.dataset, self.model)
empirical = numpy.sum(dot_vector) # this is the emperical counts
expected = 0.0
for j in range((self.num_examples)):
mysum = 0.0
for tag in self.tag_set: # get the jth example feature vector for each tag
fx_yprime = self.all_data[tag][j] #self.get_feats(self.h_tuples[j][0], tag)
'''
dot_prod = 0.0
for f in range(len(fx_yprime)):
if fx_yprime[f] != 0:
dot_prod += self.model[f]
'''
dot_prod = numpy.dot(fx_yprime, self.model)
if dot_prod == 0:
mysum += 1.0
else:
mysum += math.exp(dot_prod)
expected += math.log(mysum)
#print "Cost = ", (expected - empirical + reg_term)
return (expected - empirical + reg_term)
def gradient(self, params):
self.model = params
gradient = []
for k in range(self.dim): # vk is a m dimensional vector
reg_term = self.reg * params[k]
empirical = 0.0
expected = 0.0
for dx in self.dataset:
empirical += dx[k]
for i in range(self.num_examples):
mysum = 0.0 # exp value per example
for t in self.tag_set: # for each tag compute the exp value
fx_yprime = self.all_data[t][i] #self.get_feats(self.h_tuples[i][0], t)
# --------------------------------------------------------
# computation of p_y_given_x
normalizer = 0.0
dot_vector = numpy.dot(numpy.array(fx_yprime), self.model)
for t1 in self.tag_set:
feat = self.all_data[t1][i]
dp = numpy.dot(numpy.array(feat), self.model)
if dp == 0:
normalizer += 1.0
else:
normalizer += math.exp(dp)
if dot_vector == 0:
val = 1.0
else:
val = math.exp(dot_vector) #
prob = float(val) / normalizer
# --------------------------------------------------------
mysum += prob * float(fx_yprime[k])
expected += mysum
gradient.append(expected - empirical + reg_term)
return numpy.array(gradient)
if __name__ == "__main__":
pass