-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsim.py
87 lines (66 loc) · 2.69 KB
/
sim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from __future__ import division
import numpy as np
import pdb
import json
import pickle as pkl
from tqdm import tqdm
class Data():
def __init__(self, filename, low, high):
fp = open(filename)
all_user_history = json.load(fp)
fp.close()
self.item_history = {}
self.user_history = {}
items = []
users = []
user_history = {}
for user in tqdm(all_user_history.keys()):
if len(all_user_history[user]) <= high and len(all_user_history[user]) >= low:
user_history[user] = all_user_history[user]
items += user_history[user]
users.append(user)
items = list(set(items))
users.sort()
items.sort()
user_key = {}
item_key = {}
ct = 0
for each in items:
item_key[each] = ct
ct += 1
ct = 0
for each in users:
user_key[each] = ct
self.user_history[ct] = map(lambda x: item_key[x], user_history[each])
ct += 1
for user in tqdm(self.user_history.keys()):
for item in self.user_history[user]:
if not self.item_history.has_key(item):
self.item_history[item] = []
self.item_history[item].append(user)
self.user_sim = np.zeros((len(users), len(users)))
self.item_sim = np.zeros((len(items), len(items)))
pkl.dump(user_key, open('../data/user_key.pkl', 'w'))
pkl.dump(item_key, open('../data/item_key.pkl', 'w'))
def find_user_sim(self):
for user1 in tqdm(self.user_history.keys()):
for user2 in self.user_history.keys():
intersection = len(set(self.user_history[user1]) & set(self.user_history[user2]))
union = len(set(self.user_history[user1]) | set(self.user_history[user2]))
self.user_sim[user1, user2] = intersection / union
fp = open('../data/user_sim.npy', 'w')
np.save(fp, self.user_sim)
fp.close()
def find_item_sim(self):
for item1 in tqdm(self.item_history.keys()):
for item2 in self.item_history.keys():
intersection = len(set(self.item_history[item1]) & set(self.item_history[item2]))
union = len(set(self.item_history[item1]) | set(self.item_history[item2]))
self.item_sim[item1, item2] = intersection / union
fp = open('../data/item_sim.npy', 'w')
np.save(fp, self.item_sim)
fp.close()
if __name__ == '__main__':
d = Data('../data/user_history.json', 10, 15)
d.find_user_sim()
d.find_item_sim()