-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathPokedexEntryBuilder.py
145 lines (122 loc) · 4.69 KB
/
PokedexEntryBuilder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""
Generate the Pokémon description
"""
import pandas as pd
import random
import ConceptNet as cn
import spacy
from spellchecker import SpellChecker
import gpt_2_simple as gpt2
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['KMP_WARNINGS'] = 'off'
def get_template(edgetype):
"""
Function to import the template for description building based on conceptnet edgetype
:param edgetype: a string that is one of the conceptNet edgetypes
:return: template for the edgetype that is specified in the excel and expected POS type
"""
xls = pd.ExcelFile('Data\APNLP_templates.xlsx')
df = pd.read_excel(xls, edgetype)
templates = df.values.tolist()
chosen_t = random.choice(templates)
return chosen_t[0], chosen_t[1]
def get_random_edgetype():
"""
Function to get random edgetype
:return: a random edgetype as a string
"""
return str(random.choice(cn.get_all_relations()))
def filter_word_pos(word_list, pos_list):
"""
Function to choose one word/characteristic out of the word list received from conceptnet
Based on POS tagging of the root of the sentence (dependency parsing) and the expected POS tag
:param word_list: word list from conceptnet
:param pos_list: from excel, expected pos for the requested sentence in list form
:return: list of characteristics out of word list that fits to the expected pos tag
"""
word_list_possible = []
nlp = spacy.load('en_core_web_sm')
for characteristic in word_list:
doc = nlp(characteristic)
for token in doc:
if token.dep_ == "ROOT":
for tag in pos_list:
if token.pos_ == tag:
word_list_possible.append(characteristic)
return word_list_possible
def check_spelling(word):
"""
Function to check whether a word is spelled correctly and returns the most likely word if its mispelled based on the minimum edit distance
:param word: a word (answer by user)
:return: correctly spelled word
"""
spell = SpellChecker()
mispelled = spell.unknown([word])
if len(mispelled) > 0:
return spell.correction(word)
else:
return word
def build_sentence(word):
"""
Function to write a sentence given a word, it looks up all the answers from conceptnet to find a fitting one
:param word: a word which was one of the answers given by the user
:return: a sentence as string
"""
cn_answer = cn.conceptnet_request(check_spelling(word))
edges = cn_answer[1]
random.shuffle(edges)
for edge in edges:
t = str(get_template(edge)[0])
word_list = cn_answer[0][edge]
possible_words = filter_word_pos(word_list, str(get_template(edge)[1]).split(','))
if len(possible_words) > 0:
chosen_word = random.choice(possible_words)
sentence = t.replace(t[t.find("<"):t.find(">") + 1], chosen_word) + ". "
return sentence
def build_description(answers, name, run):
"""
Function to write the description of the Pokemon, randomly select 2 sentences
:param answers: list of answers given by user that are used for the name
:param name: name of the pokemon
:return: a description as string
"""
answer = random.choice(answers)
description = build_sentence(answer.lower())
if description is None:
description = name.capitalize()
description = generation_gtp2(description, name, run)
return description
def filter_pokemon_names(desc, name):
"""
Filters out the Pokemon names in the description generated by the model.
:param desc: generated description
:param name: name of the Pokemon
:return: description without other Pokemon names
"""
poks = pd.read_csv('Data/pokemon.csv')
for pok in poks['name']:
desc = desc.replace(pok, name.capitalize())
return desc
def generation_gtp2(input_sent, name, run):
"""
Generates a longer description based on the input sentence from concept net. It uses the gtp2 model trained on
real Pokedex entries.
:param input_sent: input senteence from ConceptNet
:param name: name of the Pokemon
:return: the description for the Pokemon
"""
input_sent += ' '
sess = gpt2.start_tf_sess()
try:
gpt2.load_gpt2(sess, run_name=run)
except:
gpt2.download_gpt2()
gpt2.load_gpt2(sess, run_name=run)
desc = gpt2.generate(sess, run_name=run, length=100, prefix=input_sent, return_as_list=True)[0]
desc = filter_pokemon_names(desc, name)
desc = desc.replace('. ', '.')
desc = '.\n'.join(desc.split('.')[1:4]) + '.'
desc = desc.replace(' ', ' ').replace('\n ', '\n')
desc = desc[1:] if desc[0] ==' ' else desc
return desc