-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfunctions.py
23 lines (22 loc) · 867 Bytes
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def preprocess(data,words,max_len):
xtrain=[]
ytrain=[]
for sentence in data:
a=[]
b=[]
for token in sentence:
if token.form is not None:
a.append(words.word_to_int[token.form])
b.append(words.token_to_int[token.upos])
for _ in range(max_len-len(a)):
a.append(words.word_to_int['PAD'])
b.append(words.token_to_int['X'])
xtrain.append(a)
ytrain.append(b)
return xtrain,ytrain
def preprocess_2(sentences):
for sentence in sentences:
for token in sentence:
if token.upos is None:
token.upos='X'
return sentences