-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate_uniq.py
179 lines (160 loc) · 7.19 KB
/
update_uniq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import time
import traceback
from tabulate import tabulate
from botlib.util import aretry
from botlib.configuration import Config, read_config
from botlib.db import with_cursor
read_config()
@with_cursor
def add_new_chats(cur):
cur.execute("SELECT channel_id FROM chat_counters "
" WHERE channel_id NOT IN (SELECT channel_id FROM chat_uniqueness) AND user_id NOT IN (SELECT id FROM bots) "
" GROUP BY channel_id "
" HAVING SUM(message_count) > 100")
vals = cur.fetchall()
cur.executemany("INSERT INTO chat_uniqueness(channel_id) VALUES (%s)", vals)
if cur.rowcount > 0:
print("Added %d new chats" % cur.rowcount)
@with_cursor
def add_new_bot_chats(cur):
cur.execute("SELECT channel_id FROM chat_counters "
" WHERE channel_id NOT IN (SELECT channel_id FROM chat_uniqueness) AND user_id IN (SELECT id FROM bots) AND user_id NOT IN (SELECT id FROM good_bots) "
" GROUP BY channel_id "
" HAVING SUM(message_count) > 5000")
vals = cur.fetchall()
cur.executemany("INSERT INTO chat_uniqueness(channel_id) VALUES (%s)", vals)
if cur.rowcount > 0:
print("Added %d new botty chats" % cur.rowcount)
get_chats_q = """
SELECT * FROM (
SELECT channel_id,
message_count,
(message_count - last_count) as new_messages,
bot_messages,
last_botcount,
age,
CAST((100 * (message_count - last_count))/(100+message_count) + age / (1440 * 7) + (10 * (COALESCE(bot_messages,0) - last_botcount) / (1000 + last_botcount)) - (IF(COALESCE(blacklisted,0)>0, 1, 0)) - (message_count / 100000) AS DOUBLE) AS score,
is_bad,
blacklisted,
COALESCE(uniqueness, -1) AS uniqueness,
goodness, badness, botness,
COALESCE(CONCAT(server_name, "/", channel_name), '<dm>') AS chatname
FROM (
SELECT channel_id,
message_count,
last_count,
last_botcount,
TIMESTAMPDIFF(MINUTE, last_update, CURRENT_TIMESTAMP) AS age,
uniqueness,
goodness, badness, botness
FROM chat_uniqueness
LEFT JOIN channel_counts_nobots USING (channel_id)
) a
LEFT JOIN channelinfo_current USING (channel_id)
LEFT JOIN channelinfo USING (channelinfo_id, channel_id)
LEFT JOIN options2 USING (channel_id)
LEFT JOIN serverinfo_current USING (server_id)
LEFT JOIN serverinfo USING (server_id, serverinfo_id)
LEFT JOIN (
SELECT channel_id, SUM(message_count) AS bot_messages
FROM chat_counters
WHERE user_id IN (SELECT id FROM bots) AND user_id NOT IN (SELECT id FROM good_bots) GROUP BY channel_id) bmsg USING (channel_id)
) b WHERE score > 0.1 OR uniqueness < 0 ORDER BY score DESC LIMIT 10;
"""
def get_scores(cur):
cur.execute(get_chats_q)
return cur.fetchall()
def get_botness(cur, channel_id):
cur.execute("SELECT SUM(IF(user_id IN (SELECT id FROM bots WHERE id NOT IN (SELECT id FROM good_bots)), message_count, 0)) / SUM(message_count) FROM chat_counters WHERE channel_id=%s", (channel_id,))
r = cur.fetchone()[0]
if r is None:
return 0
return r
def get_server_for_channel(cur, channel_id):
cur.execute("SELECT server_id FROM chat_counters WHERE channel_id=%s LIMIT 1", (channel_id,))
return cur.fetchone()[0]
def get_score(cur, server_id, channel_id):
cur.execute("SELECT COALESCE(SUM(IF(count=1, 1, 0)) / COUNT(*), 0) AS quality, "
" SUM(IF(bad_messages.hash IS NOT NULL, 1, 0)) / COUNT(*) AS badness, "
" SUM(IF(good_messages.hash IS NOT NULL, 1, 0)) / COUNT(*) AS goodness "
" FROM chat LEFT JOIN chat_hashcounts ON hash=UNHEX(SHA2(message, 256)) "
" LEFT JOIN bad_messages USING (hash) "
" LEFT JOIN good_messages USING (hash) "
" WHERE user_id NOT IN (SELECT id FROM bots) "
" AND (chat.server_id <=> %s OR chat.server_id IS NULL) AND chat.channel_id=%s", (server_id,channel_id))
return cur.fetchone()
def write_score(cur, channel_id, uniq, cnt, goodness, badness, botness, bot_messages):
if bot_messages is None:
bot_messages = 0
cur.execute("UPDATE chat_uniqueness SET "
" uniqueness = %s, "
" last_count = %s, "
" goodness = %s, "
" badness = %s, "
" botness = %s, "
" last_botcount = %s, "
" last_update = CURRENT_TIMESTAMP "
"WHERE channel_id = %s", (uniq, cnt, goodness, badness, botness, bot_messages, channel_id))
def set_bad(cur, channel_id):
cur.execute("INSERT INTO options2 (channel_id, is_bad) VALUES (%s, 1) ON DUPLICATE KEY UPDATE is_bad=1", (channel_id,))
def set_blacklisted(cur, channel_id):
cur.execute("INSERT INTO options2 (channel_id, blacklisted) VALUES (%s, 1) ON DUPLICATE KEY UPDATE blacklisted=1", (channel_id,))
badchannels = Config.get('UpdateUniq', 'Badchannels')
badchannels = [x.strip() for x in badchannels.split(',')]
print(badchannels)
@with_cursor
def update_step(cur):
chats_to_update = get_scores(cur)
if not chats_to_update:
print("No chats to update")
return 0
print(tabulate(chats_to_update, headers=['channel_id', 'msg', 'newmsg', 'botmsg', 'lastbotcnt', 'lastupd', 'score', 'is_bad', 'blacklist', 'uniq', 'Gss', 'Bss', 'Botss', 'chat_name']))
(channel_id, msg_count, msg_new, bot_messages, last_botcount, age, score, is_bad, is_blacklisted, uniq, _goodness, _badness, _botness, chatname) = chats_to_update[0]
server_id = get_server_for_channel(cur, channel_id)
print("Updating stats for %s %d %s" % (server_id, channel_id, chatname))
botness = get_botness(cur, channel_id)
if msg_count > 100:
(new_uniq, badness, goodness) = get_score(cur, server_id, channel_id)
print("Changed uniq from %f to %f (%f) good %.3f bad %.3f bot %.3f" % (uniq, new_uniq, float(new_uniq)-float(uniq), goodness, badness, botness))
write_score(cur, channel_id, new_uniq, msg_count, goodness, badness, botness, bot_messages)
if (is_bad is None) and (
(badness > 0.1) or
(new_uniq < 0.1) or
(botness > 0.5)):
print("Marking chat as bad.")
set_bad(cur, channel_id)
if (is_blacklisted is None) and is_bad and (
(badness > 0.3 and msg_count > 500) or
(badness > 0.6) or
(botness > 0.8)):
print("Blacklisting chat.")
set_blacklisted(cur, channel_id)
else:
print("Not enough messages, botness = %.3f" % botness)
if (is_bad is None) and (botness > 0.8):
print("Marking chat as bad.")
set_bad(cur, channel_id)
if (is_blacklisted is None) and (botness > 0.9):
print("Blacklisting chat.")
set_blacklisted(cur, channel_id)
write_score(cur, channel_id, None, msg_count, None, None, botness, bot_messages)
return score
varsleep = 60
while True:
try:
starttime = time.time()
add_new_chats()
add_new_bot_chats()
score = update_step()
endtime = time.time()
elaps = endtime-starttime
if score < 0.9:
varsleep = varsleep + 1
if score > 1.1 and varsleep > 10:
varsleep = varsleep - 1
sleeptime = (elaps * 10 + varsleep) / max(0.25, score)
print("Took %f, sleep for %f" % (elaps, sleeptime))
time.sleep(sleeptime)
except:
traceback.print_exc()
time.sleep(600)