-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
125 lines (112 loc) · 5.49 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/python
import os
import openai
import pandas as pd
import flask
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import sortData
app = flask.Flask(__name__)
openai.api_key = os.getenv("OPENAI_API_KEY")
# Here the code interacts with the OpenAI API, using the reformatted user-centric film data from the findSimilarFilms() function, to get text that helps market the recommended films
# according to the user's assumed tastes.
def generateAIResponse(filmData):
try:
response = [openai.Completion.create(
model = "text-davinci-003",
prompt = "Recommend the film {recTitle} in a long paragraph, emphasising its attributes described following: {recAttr}. Make sure to relate the recommendation back\
to the user-selected film {userTitle} and its attributes: {userAttr}. Do this all the while using your own knowledge as well, while keeping the response natural\
and adressing the user directly.".format(
recTitle = filmData[x][0],
recAttr = re.sub(r'[_,]', lambda x: ' ' if x.group() == '_' else ', ', filmData[x][1]),
userTitle = filmData[0][0],
userAttr = re.sub(r'[_,]', lambda x: ' ' if x.group() == '_' else ', ', filmData[0][1])
),
temperature = 1,
max_tokens = 500
) for x in range(1, len(filmData))]
return(response)
except openai.errors.InvalidRequestError as e:
print("OpenAI API error: Invalid request - {0}".format(e.message))
return None
except openai.errors.AuthenticationError as e:
print("OpenAI API error: Authentication failed - {0}".format(e.message))
return None
except openai.errors.APIConnectionError as e:
print("OpenAI API error: Connection failed - {0}".format(e.message))
return None
except openai.errors.OpenAIError as e:
print("OpenAI API error: {0}".format(e.message))
return None
except Exception as e:
print("Unexpected error: {0}".format(e))
return None
# This part of the code takes the data from the .csv file produced by the sortData script and refactors it into a more user-friendly form, before handing into a machine-learning alogirthm
# created using SciKit to find the 5 most similar films to the film the user has inputed. This data is then handed to the generateAIResponse() function.
def findSimilarFilms(title):
cv = CountVectorizer()
try:
if not os.path.isfile("data/complete.csv"):
data = sortData.main()
else:
data = pd.read_csv("data/complete.csv")
except FileNotFoundError as e:
print("Error: Could not find the data file.")
print(str(e))
return []
except pd.errors.ParserError as e:
print("Error: Could not parse thSpecifically, it first grabs e data file.")
print(str(e))
return []
except Exception as e:
print("Error: Could not load the data file.")
print(str(e))
return []
combinedData = []
for dataI in data.index:
try:
cleanGenres = re.sub(r'[^\w\s]', '_', data["genres"][dataI])
except Exception as e:
print("Error: Could not clean genres for index {}.".format(dataI))
print(str(e))
continue
combinedData.append(str(data["tags"][dataI]))
try:
countMatrix = cv.fit_transform(combinedData)
similarityScores = cosine_similarity(countMatrix)
filmIndex = data[data.title == title].index.values[0]
weightedSimilarityScores = []
for i in range(len(similarityScores)):
ratingRaw = data.loc[i, "rating"]
ratingClean = re.sub(r'[^\d\.]', '', ratingRaw)
ratingProper = float(ratingClean)
weightedSimilarity = similarityScores[filmIndex][i] * ratingProper
weightedSimilarityScores.append(weightedSimilarity)
similarFilms = sorted(list(enumerate(weightedSimilarityScores)), key=lambda x: x[1], reverse=True)[1:6]
similarFilms = [[data.iloc[i[0]]['title'], combinedData[i[0]]] for i in similarFilms]
filmData = [[data[data.title == title].title.values[0], combinedData[filmIndex]]]
filmData.extend([[similarFilms[x][0], similarFilms[x][1]] for x in range(len(similarFilms))])
response = generateAIResponse(filmData)
return(response)
except KeyError as e:
print("Error: Could not find title {} in data file.".format(title))
print(str(e))
return []
except Exception as e:
print("Error: Could not compute similarity scores.")
print(str(e))
return []
# Finally, here the code both obtains the form data from index.html while also outputting back the OpenAI text from generateAIResponse() via findSimilarFilms().
@app.route("/", methods=("POST", "GET"))
def index():
if flask.request.method == "POST":
film = flask.request.form["film"]
try:
similarFilms = findSimilarFilms(film)
except ValueError as e:
return flask.render_template("error.html", errorMessage=str(e))
return flask.render_template("index.html", result=[film, similarFilms])
else:
result = flask.request.args.get("result")
return flask.render_template("index.html", result=result)