-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsummarizer.py
80 lines (79 loc) · 2.08 KB
/
summarizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import docClus2
import MyLexicalChainer, MyRBM
import svcClustering1
import keyphraseExtraction
import json
from collections import defaultdict
import absSummary
import sys
import math
from nltk import word_tokenize
out_file = open("summary.json", "w")
if len(sys.argv) >=2 :
if sys.argv[1] == "deeplearning" :
option = 1
elif sys.argv[1] == "svc":
option = 2
else:
option = 3
else :
option = 2
print "Performing Doc Clustering"
clusterings = docClus2.docClus("DataCorpus3.json")
print "Clustering done"
if option == 1 :
print "Performing Deep Learning"
summaries = MyRBM.summarize("DataCorpus3.json", clusterings)
i=1
summ = {}
summ["root"] = []
for summary in summaries :
keys = []
#sentences = sent_tokenize(summary)
#words = word_tokenize(sentences[0])
#print words
#words[5] = "..."
print "summary : ", summary.encode('utf-8')
print "-----------\n\n"
summ["root"].append({"keys" : keys, "summary":summary})
i+=1
json.dump(summ,out_file, indent=4)
out_file.close()
absSummary.genSummary()
elif option == 2:
print "Performing SVC"
summaries = svcClustering1.svc("DataCorpus3.json", clusterings)
#print summaries
i=1
summ = {}
summ["root"] = []
for summary in summaries :
keys = keyphraseExtraction.extract_key(summary)
print keys
print "summary : ", summary.encode('utf-8')
print "-----------\n\n"
summ["root"].append({"key" : keys, "summary":summary})
i+=1
json.dump(summ,out_file, indent=4)
out_file.close()
absSummary.genSummary()
else :
print "Performing Lexical Chains"
summaries = MyLexicalChainer.summarize("DataCorpus3.json", clusterings)
i=1
summ = {}
summ["root"] = []
for summary in summaries :
keys = keyphraseExtraction.extract_key(summary)
print keys
#words = word_tokenize(summary)
#print words
#words[5] = "..."
#print words[:6]
print "summary : ", summary.encode('utf-8')
print "-----------\n\n"
summ["root"].append({"key" : keys, "summary":summary})
i+=1
json.dump(summ,out_file, indent=4)
out_file.close()
absSummary.genSummary()