dkn.py

# -*- coding: utf-8 -*-
"""dkn_MIND.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1V1XRRVl6feu3ZOuR1C8wUrWumNqq-YGY

# DKN : Deep Knowledge-Aware Network for News Recommendation

DKN is a deep learning model which incorporates information from knowledge graph for better news recommendation. Specifically, DKN uses TransX \[2\] method for knowledge graph representation learning, then applies a CNN framework, named KCNN, to combine entity embedding with word embedding and generate a final embedding vector for a news article. CTR prediction is made via an attention-based neural scorer.

## DKN:
- DKN is a content-based deep model for CTR prediction rather than traditional ID-based collaborative filtering.
- It makes use of knowledge entities and common sense in news content via joint learning from semantic-level and knowledge-level representations of news articles.
- DKN uses an attention module to dynamically calculate a user's aggregated historical representaition.


Wang, Hongwei, Fuzheng Zhang, Xing Xie, and Minyi Guo. "DKN: Deep knowledge-aware network for news recommendation." In Proceedings of the 2018 world wide web conference, pp. 1835-1844. 2018.

https://dl.acm.org/doi/abs/10.1145/3178876.3186175
"""

import warnings
warnings.filterwarnings("ignore")

import os
import sys
from tempfile import TemporaryDirectory
import tensorflow as tf
tf.get_logger().setLevel("ERROR") # only show error messages
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from recommenders.models.deeprec.deeprec_utils import download_deeprec_resources, prepare_hparams
from recommenders.models.deeprec.models.dkn import DKN
from recommenders.models.deeprec.io.dkn_iterator import DKNTextIterator
from recommenders.utils.notebook_utils import store_metadata

print(f"System version: {sys.version}")
print(f"Tensorflow version: {tf.__version__}")

"""## Download and load data"""

tmpdir = TemporaryDirectory()
data_path = os.path.join(tmpdir.name, "mind-demo-dkn")

yaml_file = os.path.join(data_path, "dkn.yaml")
train_file = os.path.join(data_path, "train_mind_demo.txt")
valid_file = os.path.join(data_path, "valid_mind_demo.txt")
test_file = os.path.join(data_path, "test_mind_demo.txt")
news_feature_file = os.path.join(data_path, "doc_feature.txt")
user_history_file = os.path.join(data_path, "user_history.txt")
wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")
contextEmb_file = os.path.join(data_path, "TransE_context2vec_100.npy")
if not os.path.exists(yaml_file):
    download_deeprec_resources("https://recodatasets.z20.web.core.windows.net/deeprec/", tmpdir.name, "mind-demo-dkn.zip")

"""## Create hyper-parameters"""

EPOCHS = 10
HISTORY_SIZE = 50
BATCH_SIZE = 500

hparams = prepare_hparams(yaml_file,
                          news_feature_file = news_feature_file,
                          user_history_file = user_history_file,
                          wordEmb_file=wordEmb_file,
                          entityEmb_file=entityEmb_file,
                          contextEmb_file=contextEmb_file,
                          epochs=EPOCHS,
                          history_size=HISTORY_SIZE,
                          batch_size=BATCH_SIZE)
print(hparams)

"""## Train the DKN model"""

model = DKN(hparams, DKNTextIterator)

print(model.run_eval(valid_file))

model.fit(train_file, valid_file)

"""## Evaluate the DKN model

Now we can check the performance on the test set:
"""

res = model.run_eval(test_file)
print(res)

# Record results for tests - ignore this cell
store_metadata("auc", res["auc"])
store_metadata("group_auc", res["group_auc"])
store_metadata("ndcg@5", res["ndcg@5"])
store_metadata("ndcg@10", res["ndcg@10"])
store_metadata("mean_mrr", res["mean_mrr"])