diff --git a/twint/storage/db.py b/twint/storage/db.py index e43510f2..c40a1fa4 100644 --- a/twint/storage/db.py +++ b/twint/storage/db.py @@ -88,13 +88,27 @@ def init(db): CREATE TABLE IF NOT EXISTS retweets( user_id integer not null, + username text not null, tweet_id integer not null, + retweet_id integer not null, CONSTRAINT retweets_pk PRIMARY KEY(user_id, tweet_id), CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users(id), CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id) ); """ cursor.execute(table_retweets) + + table_reply_to = """ + CREATE TABLE IF NOT EXISTS + replies( + tweet_id integer not null, + user_id integer not null, + username text not null, + CONSTRAINT replies_pk PRIMARY KEY (user_id, tweet_id), + CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id) + ); + """ + cursor.execute(table_reply_to) table_favorites = """ CREATE TABLE IF NOT EXISTS @@ -256,8 +270,13 @@ def tweets(conn, Tweet, config): cursor.execute(query, (config.User_id, Tweet.id)) if Tweet.retweet: - query = 'INSERT INTO retweets VALUES(?,?)' - cursor.execute(query, (config.User_id, Tweet.id)) + query = 'INSERT INTO retweets VALUES(?,?,?,?)' + cursor.execute(query, (int(Tweet.user_rt_id), Tweet.user_rt, Tweet.id, int(Tweet.retweet_id))) + + if Tweet.reply_to: + for reply in Tweet.reply_to: + query = 'INSERT INTO replies VALUES(?,?,?)' + cursor.execute(query, (Tweet.id, int(reply['user_id']), reply['username'])) conn.commit() except sqlite3.IntegrityError: diff --git a/twint/storage/elasticsearch.py b/twint/storage/elasticsearch.py index 95aeb5e0..11e7a46d 100644 --- a/twint/storage/elasticsearch.py +++ b/twint/storage/elasticsearch.py @@ -66,7 +66,6 @@ def createIndex(config, instance, **scope): "tweet": {"type": "text"}, "hashtags": {"type": "keyword"}, "cashtags": {"type": "keyword"}, - "user_id": {"type": "long"}, "user_id_str": {"type": "keyword"}, "username": {"type": "keyword"}, "name": {"type": "text"}, @@ -86,9 +85,12 @@ def createIndex(config, instance, **scope): "geo_near": {"type": "geo_point"}, "geo_tweet": {"type": "geo_point"}, "photos": {"type": "text"}, - "user_rt_id": {"type": "integer"}, + "user_rt_id": {"type": "keyword"}, "mentions": {"type": "keyword"}, - "source": {"type": "keyword"} + "source": {"type": "keyword"}, + "user_rt": {"type": "keyword"}, + "retweet_id": {"type": "keyword"}, + "reply_to": {"type": "nested"} } }, "settings": { @@ -203,7 +205,6 @@ def Tweet(Tweet, config): "tweet": Tweet.tweet, "hashtags": Tweet.hashtags, "cashtags": Tweet.cashtags, - "user_id": Tweet.user_id, "user_id_str": Tweet.user_id_str, "username": Tweet.username, "name": Tweet.name, @@ -223,6 +224,10 @@ def Tweet(Tweet, config): } if Tweet.retweet: j_data["_source"].update({"user_rt_id": Tweet.user_rt_id}) + j_data["_source"].update({"user_rt": Tweet.user_rt}) + j_data["_source"].update({"retweet_id": Tweet.retweet_id}) + if Tweet.reply_to: + j_data["_source"].update({"reply_to": Tweet.reply_to}) if Tweet.photos: _photos = [] for photo in Tweet.photos: diff --git a/twint/storage/panda.py b/twint/storage/panda.py index c3560550..3809ef67 100644 --- a/twint/storage/panda.py +++ b/twint/storage/panda.py @@ -92,7 +92,10 @@ def update(object, config): "near": Tweet.near, "geo": Tweet.geo, "source": Tweet.source, - "user_rt_id": Tweet.user_rt_id + "user_rt_id": Tweet.user_rt_id, + "user_rt": Tweet.user_rt, + "retweet_id": Tweet.retweet_id, + "reply_to": Tweet.reply_to } _object_blocks[_type].append(_data) elif _type == "user": diff --git a/twint/storage/write_meta.py b/twint/storage/write_meta.py index cb5fdc30..3b3a041e 100644 --- a/twint/storage/write_meta.py +++ b/twint/storage/write_meta.py @@ -23,10 +23,13 @@ def tweetData(t): "retweet": t.retweet, "quote_url": t.quote_url, "video": t.video, - "user_rt_id": t.user_rt_id, "near": t.near, "geo": t.geo, - "source": t.source + "source": t.source, + "user_rt_id": t.user_rt_id, + "user_rt": t.user_rt, + "retweet_id": t.retweet_id, + "reply_to": t.reply_to } return data @@ -55,10 +58,13 @@ def tweetFieldnames(): "retweet", "quote_url", "video", - "user_rt_id", "near", "geo", - "source" + "source", + "user_rt_id", + "user_rt", + "retweet_id", + "reply_to" ] return fieldnames