-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathai_logic.py
119 lines (101 loc) · 4.94 KB
/
ai_logic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from os.path import basename
from sklearn.cluster import KMeans
from sklearn.decomposition import TruncatedSVD
from utils import *
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('QtAgg')
class AI(object):
def __init__(self, OPENAI_API_KEY, model_type, llm_model):
# Initialize OpenAI client and model
self.client, self.model = initialize_openai_and_embedding(OPENAI_API_KEY, model_type, llm_model)
self.llm_model = llm_model
def content_embedding(self, content):
return self.model.encode(content).tolist()
def clustering(self, allowed_type, txt_files, pdf_files, docx_files, num_clusters):
# Clustering logic
contents = []
C_path = []
if allowed_type == "txt" or allowed_type == "all":
for file in txt_files:
with open(file, 'r', encoding='utf-8') as f:
contents.append(f.read())
C_path.append(basename(file))
if allowed_type == "pdf" or allowed_type == "all":
for file in pdf_files:
try:
pdf_text = read_pdf(file)
contents.append(pdf_text)
C_path.append(basename(file))
except Exception as e:
write_log(f"Warning: Error reading PDF file: {e}")
if allowed_type == "docx" or allowed_type == "all":
for file in docx_files:
try:
docx_text = read_docx(file)
contents.append(docx_text)
C_path.append(basename(file))
except Exception as e:
write_log(f"Warning: Error reading DOCX file: {e}")
if len(contents) < num_clusters:
return "Error"
X = [self.content_embedding(content) for content in contents]
svd = TruncatedSVD(n_components=2)
X_svd = svd.fit_transform(X)
kmeans = KMeans(n_clusters=num_clusters)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)
plt.figure(figsize=(8, 6))
for i in range(num_clusters):
plt.scatter(X_svd[y_kmeans == i, 0], X_svd[y_kmeans == i, 1], label=f'Cluster {i+1}')
for i in range(len(contents)):
plt.annotate(C_path[i], (X_svd[i, 0], X_svd[i, 1]))
plt.title('Clustering of Text Files Content')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.show()
def keyword_search(self, allowed_type, txt_files, pdf_files, docx_files, keyword):
# Keyword search logic
results = []
if allowed_type == "txt" or allowed_type == "all":
for file in txt_files:
with open(file, 'r', encoding='utf-8') as f:
content = f.read()
if keyword.lower() in content.lower():
results.append(file)
if allowed_type == "pdf" or allowed_type == "all":
for file in pdf_files:
try:
pdf_text = read_pdf(file)
if keyword.lower() in pdf_text.lower():
results.append(file)
except Exception as e:
write_log(f"Warning: Error reading PDF file: {e}")
if allowed_type == "docx" or allowed_type == "all":
for file in docx_files:
try:
docx_text = read_docx(file)
if keyword.lower() in docx_text.lower():
results.append(file)
except Exception as e:
write_log(f"Warning: Error reading DOCX file: {e}")
return results
def chat_interaction(self, msg, selected_note_files, chat_history):
# Chat interaction logic
notes_text = ""
for file in selected_note_files:
with open(file, 'r', encoding='utf-8') as f:
notes_text += f.read() + "\n"
if self.llm_model in ["Tinyllama(Q5)", "Llama2-7B(Q4)"]:
history_str = '\n'.join([f"{c['role']}: {c['content']}" for c in chat_history])
prompt = f"""Answer the user question based on the following notes.(answer max one sentence(15-20 words)\n\nNotes: \n{notes_text}\n\n{history_str}\nuser: {msg}\nassistant: """
chatbot_response = self.client(prompt, temperature=0.7, max_new_tokens=41, stop=['assistant:','user:'], threads=4)
else:
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Answer the user question based on the notes.(answer max one sentence(15-20 words)"},
{"role": "user", "content": f"Notes: \n{notes_text}"}] + chat_history + [{"role": "user", "content": msg}], max_tokens=41)
chatbot_response = response.choices[0].message.content
return chatbot_response.strip()