forked from songluyi/LoveTime
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget2db.py
166 lines (150 loc) · 6.26 KB
/
get2db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# -*- coding: utf-8 -*-
# 2017/6/12 9:53
"""
-------------------------------------------------------------------------------
Function: 用来将QQ 信息标准化 导入sqlite3 数据库
Version: 1.0
Author: SLY
Contact: [email protected]
code is far away from bugs with the god Animal protecting
┏┓ ┏┓
┏┛┻━━━┛┻┓
┃ ☃ ┃
┃ ┳┛ ┗┳ ┃
┃ ┻ ┃
┗━┓ ┏━┛
┃ ┗━━━┓
┃ 神兽保佑 ┣┓
┃ 永无BUG! ┏┛
┗┓┓┏━┳┓┏┛
┃┫┫ ┃┫┫
┗┻┛ ┗┻┛
-------------------------------------------------------------------------------
"""
from colorama import init, Fore, Back, Style
import re
# 改为sqllite3
import logging
from errors import FileError
logging.basicConfig(level=logging.INFO)
class get2db(object):
# 调用一次 传递一个游标
def __init__(self):
# 这个我想通过爬虫来判断QQ的男女 这个样通过emgon的外部库
self.boy_name = '一只特立独行的猪'
self.girl_name = '一颗被拱了的白菜'
return
def connect_db(self):
# 没发现这个用dict 可以传递现在用函数传递游标也行吧
import sqlite3
conn = sqlite3.connect('store.db')
conn.execute('''
CREATE TABLE IF NOT EXISTS "msg" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
"qq_msg" TEXT,
"qq_user" TEXT,
"qq_time" INTEGER
);
''')
return conn
def get_path(self):
import os
current_path = os.getcwd()
new_path = current_path + '\\' + 'msg' + '\\'
FileList = []
rootdir = new_path
for i in os.listdir(rootdir):
if '.txt' in i:
FileList.append(rootdir + i)
for item in FileList:
print(Fore.WHITE + '检测到您目录下有如下txt聊天文件 请确认是不是你要进行检测')
print(item)
if not FileList:
print(rootdir, ': txt file was not found please check.')
raise FileError
return FileList
# 目前就支持单文本导入即可,多了累赘
def check_format(self, path_dict):
for file in path_dict:
file_count = 0
with open(file, 'r', encoding="utf8") as check_file:
count = 0
error_tag = 0
for line in check_file:
count = count + 1
if count == 4:
if '消息分组' in line:
print(line)
else:
error_tag += 1
if count == 6:
if '消息对象' in line:
print(line)
self.girl_name = line[9:]
else:
error_tag += 1
if error_tag > 0:
print(Fore.RED + '该文本不符合导入要求,已经从列表中删除')
del path_dict[file_count]
else:
print(Fore.GREEN + '检测完成 下一步生成数据库文件')
file_count += 1
return path_dict
def check_title(self, string):
result = re.findall('\d{4}-\d{2}-\d{2} \d{1,2}:\d{2}:\d{2}', string)
if result:
return True
else:
return False
def get_content(self, legal_path):
data = []
for file in legal_path:
with open(file, 'r', encoding='utf8') as qq_msg:
db_content = {}
msg_content = []
# 现在目的就是为了解析 消息 导入到数据库
for line in qq_msg.readlines()[8:]:
if get2db().check_title(line):
# 如果存在上一行信息封装好 那么本次运行就插入到数据库 或者自己再做一个字典
if db_content:
change_formate = (db_content['time'], db_content['content'], db_content['user'])
data.append(change_formate)
msg_content = []
db_content = {}
msg_time = re.findall('\d{4}-\d{2}-\d{2} \d{1,2}:\d{2}:\d{2}', line)
msg_time = msg_time[0]
# 一般来讲 QQ的用户名20个字符就足够了
# 但是我发现有一个小bug 就是如果是英文好友 就会出现未查询到
msg_user = re.findall('[\u4e00-\u9fa5]{1,20}', line)
if msg_user:
msg_user = msg_user[0]
else:
msg_user = str(line[19:])
db_content['time'] = msg_time
db_content['user'] = msg_user
else:
msg_content.append(line)
qq_content = ''.join(msg_content)
hh_content = qq_content.replace('\n', '')
db_content['content'] = hh_content
return data
def insert_db(self, data):
db = get2db().connect_db()
cursor = db.cursor()
insert_sql = "INSERT INTO msg(qq_time,qq_msg,qq_user) VALUES (?, ?, ?)"
select_sql = "SELECT * FROM msg"
cursor.execute(select_sql)
check_result = cursor.fetchall()
# 如果数据库为空才导入,不为空则不导入 后续还是要加入判定 或者没运行一次就删除一次
if check_result:
print(Fore.YELLOW + '数据库中已经存在了需要检测的聊天记录,本次不会导入!')
else:
print(Fore.GREEN + '正在导入你的聊天数据,请稍后.....')
cursor.executemany(insert_sql, data)
db.commit()
# 这个之后再归类到一起 现在还有很多希望修改的
if __name__ == "__main__":
msg = get2db()
my_path = msg.get_path()
my_content = msg.get_content(my_path)
msg.insert_db(my_content)