-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScorer.py
37 lines (34 loc) · 1.02 KB
/
Scorer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
'''
Created on Oct 19, 2013
@author: Aakrati
'''
import nltk
def lcs(a, b):
lengths = [[0 for j in range(len(b)+1)] for i in range(len(a)+1)]
# row 0 and column 0 are initialized to 0 already
for i, x in enumerate(a):
for j, y in enumerate(b):
if x == y:
lengths[i+1][j+1] = lengths[i][j] + 1
else:
lengths[i+1][j+1] = \
max(lengths[i+1][j], lengths[i][j+1])
# read the substring out from the matrix
result = ""
x, y = len(a), len(b)
while x != 0 and y != 0:
if lengths[x][y] == lengths[x-1][y]:
x -= 1
elif lengths[x][y] == lengths[x][y-1]:
y -= 1
else:
assert a[x-1] == b[y-1]
result = a[x-1] + result
x -= 1
y -= 1
return (lengths[len(a)][len(b)],result)
sent1 = 'Oh hello my friend go to hell'
sent2 = 'hello go to my friend hello Oh'
sent1 = nltk.word_tokenize(sent1)
sent2 = nltk.word_tokenize(sent2)
print lcs(sent1,sent2)