-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathComplexityFrench.py
181 lines (144 loc) · 6.93 KB
/
ComplexityFrench.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import sys
sys.path.append('/home/garciacumbreras18/dist/freeling/APIs/python')
from ComplexityLanguage import ComplexityLanguage
import freeling
import os
import re
from functools import reduce
import numpy as np
import scipy.stats
import math
class ComplexityFrench(ComplexityLanguage):
def __init__(self):
lang = 'fr'
ComplexityLanguage.__init__(self, lang)
## Modify this line to be your FreeLing installation directory
FREELINGDIR = "/home/garciacumbreras18/dist/freeling"
DATA = FREELINGDIR+"/data/"
CLASSDIR = ""
self.lang = lang
freeling.util_init_locale("default")
# create language analyzer
self.la=freeling.lang_ident(DATA+"common/lang_ident/ident.dat")
# create options set for maco analyzer. Default values are Ok, except for data files.
op= freeling.maco_options(lang)
op.set_data_files( "",
DATA + "common/punct.dat",
DATA + lang + "/dicc.src",
DATA + lang + "/afixos.dat",
"",
DATA + lang + "/locucions.dat",
DATA + lang + "/np.dat",
DATA + lang + "/quantities.dat",
DATA + lang + "/probabilitats.dat")
# create analyzers
self.tk=freeling.tokenizer(DATA+lang+"/tokenizer.dat")
self.sp=freeling.splitter(DATA+lang+"/splitter.dat")
self.mf=freeling.maco(op)
# activate mmorpho modules to be used in next call
self.mf.set_active_options(False, True, True, True, # select which among created
True, True, False, True, # submodules are to be used.
True, True, True, True ) # default: all created submodules are used
# create tagger and sense anotator
self.tg=freeling.hmm_tagger(DATA+lang+"/tagger.dat",True,2)
self.sen=freeling.senses(DATA+lang+"/senses.dat")
f = open(CLASSDIR + '/home/garciacumbreras18/DaleChall.txt')
lines = f.readlines()
f.close()
listDaleChall = []
for l in lines:
data = l.strip().split()
listDaleChall += data
self.listDaleChall=listDaleChall
"""
config es una lista de valores booleanos que activa o desactivan el cálculo de una medida
config = [
True|False, # KANDEL MODELS
True|False, # DALE CHALL
True|False, # SOL
]
"""
self.config += [True, True, True]
self.metricsStr.extend(['KANDEL-MODELS','DALE CHALL', 'SOL'])
self.configExtend += [True, True]
self.metricsStrExtend.extend(['MEAN RARE WORDS', 'STD RARE WORDS'])
def readability(self):
#Number of low frequency words
lrarewords = []
for sentence in self.pos_content_sentences:
count = 0
for w in sentence:
if w.get_form() not in self.listDaleChall:
count+=1
lrarewords.append(count)
#print('lrarewords', lrarewords)
#N_difficultwords = count
self.N_difficultwords = sum(lrarewords)
#print("Number of rare words (N_rw): ", self.N_difficultwords, "\n")
self.mean_rw = np.mean(lrarewords)
self.std_rw = np.std(lrarewords)
#print("mean rare words: ", self.mean_rw)
#print("std rare words: ", self.std_rw)
#Number of syllables and Number of words with 3 or more syllables:tagger
N_syllables = 0
N_syllables3 = 0
lwords=[]
for sentence in self.pos_content_sentences:
for w in sentence:
lwords.append(w.get_form())
count=0
for character in lwords:
if re.match('a|e|i|o|u|y', character):
N_syllables +=1
count+=1
if count>=3:
N_syllables3 += 1
self.N_syllables = N_syllables
self.N_syllables3 = N_syllables3
kandelmodelsreadability = 207 - 1.015 * (self.N_words / self.N_sentences) - 73.6 * (self.N_syllables / self.N_words)
#print("KANDEL-MODELS: ", kandelmodelsreadability, "\n")
self.kandelmodelsreadability = kandelmodelsreadability
dalechallreadability =15.79 * (self.N_difficultwords / self.N_words) + 0.04906 * (self.N_words / self.N_sentences)
#print("DALE CHALL: ", dalechallreadability, "\n")
self.dalechallreadability = dalechallreadability
return self.kandelmodelsreadability, self.dalechallreadability, self.mean_rw, self.std_rw
def ageReadability(self):
solreadability= - 1.35 + 0.77 * (3.1291 + 1.0430 * math.sqrt(self.N_syllables3 * (30/self.N_sentences)))
#print("READABILITY SOL: ", solreadability, "\n")
self.solreadability = solreadability
return self.solreadability
def calcMetrics(self, text):
"""
Calcula la métricas de complejidad activadas en la configuración
Si config == None se calculan todas las métricas de complejidad soportadas
"""
self.textProcessing(text)
metrics = super().calcMetrics(text)
metricsFr = self.metricsStr
readability = None
for i in range(len(metrics)-1, len(metricsFr)):
if self.config == None or self.config[i] and metricsFr[i] == 'KANDEL MODELS':
readability = self.readability()
metrics['KANDEL-MODELS'] = readability[0]
if self.config == None or self.config[i] and metricsFr[i] == 'DALE CHALL':
if not readability: readability = self.readability()
metrics['DALE CHALL'] = readability[1]
if self.config == None or self.config[i] and metricsFr[i] == 'SOL':
metrics['SOL'] = self.ageReadability()
return metrics
def calcMetricsExtend(self, text):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self.textProcessing(text)
metricsExtend = super().calcMetricsExtend(text)
metricsFrExtend = self.metricsStrExtend
readability = None
for i in range(len(metricsExtend)-1, len(metricsFrExtend)):
if self.configExtend == None or self.configExtend[i] and metricsFrExtend[i] == 'MEAN RARE WORDS':
readability = self.readability()
metricsExtend['MEAN RARE WORDS'] = readability[2]
if self.configExtend == None or self.configExtend[i] and metricsFrExtend[i] == 'STD RARE WORDS':
readability = self.readability()
metricsExtend['STD RARE WORDS'] = readability[3]
return metricsExtend