-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdkn.py
97 lines (71 loc) · 3.78 KB
/
dkn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
"""dkn_MIND.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1V1XRRVl6feu3ZOuR1C8wUrWumNqq-YGY
# DKN : Deep Knowledge-Aware Network for News Recommendation
DKN is a deep learning model which incorporates information from knowledge graph for better news recommendation. Specifically, DKN uses TransX \[2\] method for knowledge graph representation learning, then applies a CNN framework, named KCNN, to combine entity embedding with word embedding and generate a final embedding vector for a news article. CTR prediction is made via an attention-based neural scorer.
## DKN:
- DKN is a content-based deep model for CTR prediction rather than traditional ID-based collaborative filtering.
- It makes use of knowledge entities and common sense in news content via joint learning from semantic-level and knowledge-level representations of news articles.
- DKN uses an attention module to dynamically calculate a user's aggregated historical representaition.
Wang, Hongwei, Fuzheng Zhang, Xing Xie, and Minyi Guo. "DKN: Deep knowledge-aware network for news recommendation." In Proceedings of the 2018 world wide web conference, pp. 1835-1844. 2018.
https://dl.acm.org/doi/abs/10.1145/3178876.3186175
"""
import warnings
warnings.filterwarnings("ignore")
import os
import sys
from tempfile import TemporaryDirectory
import tensorflow as tf
tf.get_logger().setLevel("ERROR") # only show error messages
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from recommenders.models.deeprec.deeprec_utils import download_deeprec_resources, prepare_hparams
from recommenders.models.deeprec.models.dkn import DKN
from recommenders.models.deeprec.io.dkn_iterator import DKNTextIterator
from recommenders.utils.notebook_utils import store_metadata
print(f"System version: {sys.version}")
print(f"Tensorflow version: {tf.__version__}")
"""## Download and load data"""
tmpdir = TemporaryDirectory()
data_path = os.path.join(tmpdir.name, "mind-demo-dkn")
yaml_file = os.path.join(data_path, "dkn.yaml")
train_file = os.path.join(data_path, "train_mind_demo.txt")
valid_file = os.path.join(data_path, "valid_mind_demo.txt")
test_file = os.path.join(data_path, "test_mind_demo.txt")
news_feature_file = os.path.join(data_path, "doc_feature.txt")
user_history_file = os.path.join(data_path, "user_history.txt")
wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")
contextEmb_file = os.path.join(data_path, "TransE_context2vec_100.npy")
if not os.path.exists(yaml_file):
download_deeprec_resources("https://recodatasets.z20.web.core.windows.net/deeprec/", tmpdir.name, "mind-demo-dkn.zip")
"""## Create hyper-parameters"""
EPOCHS = 10
HISTORY_SIZE = 50
BATCH_SIZE = 500
hparams = prepare_hparams(yaml_file,
news_feature_file = news_feature_file,
user_history_file = user_history_file,
wordEmb_file=wordEmb_file,
entityEmb_file=entityEmb_file,
contextEmb_file=contextEmb_file,
epochs=EPOCHS,
history_size=HISTORY_SIZE,
batch_size=BATCH_SIZE)
print(hparams)
"""## Train the DKN model"""
model = DKN(hparams, DKNTextIterator)
print(model.run_eval(valid_file))
model.fit(train_file, valid_file)
"""## Evaluate the DKN model
Now we can check the performance on the test set:
"""
res = model.run_eval(test_file)
print(res)
# Record results for tests - ignore this cell
store_metadata("auc", res["auc"])
store_metadata("group_auc", res["group_auc"])
store_metadata("ndcg@5", res["ndcg@5"])
store_metadata("ndcg@10", res["ndcg@10"])
store_metadata("mean_mrr", res["mean_mrr"])