forked from mihirs16/Project-Darwin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fetch.py
372 lines (324 loc) · 13.1 KB
/
fetch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
import os
import sys
# sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), '..\\..\\'))
import time
from pandas import DataFrame
import tweepy
import json
from k3y5 import TWITTER_API_KEY,TWITTER_API_SECRET_KEY,TWITTER_ACCESS_TOKEN,TWITTER_ACCESS_TOKEN_SECRET,IBM_API_KEY,IBM_URL
from ibm_watson import PersonalityInsightsV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
# loading keys from json file
MAX_TWEET = 100
# connecting to twitter api
auth = tweepy.OAuthHandler(TWITTER_API_KEY,TWITTER_API_SECRET_KEY)
auth.set_access_token(TWITTER_ACCESS_TOKEN,TWITTER_ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
authenticator = IAMAuthenticator(IBM_API_KEY)
PI = PersonalityInsightsV3(
version='2020-06-15',
authenticator=authenticator
)
PI.set_service_url(IBM_URL)
def limit_handled(cursor,list_name):
while True:
try:
yield cursor.next()
# catch the api rate limit exception and wait for 15 minutes
except tweepy.RateLimitError:
print(f"\nData points in list = {len(list_name)}")
print("Hit Twitter API rate limit.")
for i in range(3,0,-1):
print(f"Wait for {i*5} mins.")
time.sleep(5*60)
# catch other api exceptions
except tweepy.error.TweepError:
print("\n Caught TweepError exception")
# this function outputs the csv file for all the tweets received
def get_all_tweets_df(username):
all_tweets = []
# requesting the most recent tweets(200 max)
new_tweets = api.user_timeline(screen_name=username,count=MAX_TWEET)
all_tweets.extend(new_tweets)
# saving the id of the oldest tweet fetched
oldest = all_tweets[-1].id - 1
# to get tweets until there are none left
while len(new_tweets)>0:
print(f"getting tweets before {oldest}")
# requesting tweets and saving new tweets to all tweets
new_tweets = api.user_timeline(screen_name = username,count=MAX_TWEET,max_id = oldest)
# max_id is to return tweets with an id less than or equal to specified id
all_tweets.extend(new_tweets) # adds all the iterms in iterable list unlike append
# updating the id of the oldest tweet
oldest = all_tweets[-1].id - 1
print(f"...{len(all_tweets)} tweets downloaded so far")
# transforming the tweets to 2D array
out_tweets = [[tweet.id_str,tweet.created_at,tweet.text,
tweet.favorite_count,tweet.in_reply_to_screen_name,tweet.retweeted]for tweet in all_tweets]
# converting list of list to dataframe
data = DataFrame(out_tweets,columns=['id','created_at','text','likes','in reply to','retweeted'])
data.to_csv(f'{username}_tweets.csv',index=False)
pass
'''
returns the tweets in a dictionary format containing a list of dictionary formatted tweets
dictionary = {
'contentItems':[
{
'content': tweets text,
'contenttype': 'application/json',
'id': tweets id,
'created':tweets creation date,
'language':'en'
}
]
}
'''
def get_all_tweets_dic(username):
all_tweets = []
tweet_dic = {
'contentItems':[]
}
# requesting the most recent tweets(200 max)
try:
new_tweets = api.user_timeline(screen_name=username,count=MAX_TWEET)
all_tweets.extend(new_tweets)
except:
return (-1)
'''while len(new_tweets)>0:
print(f"getting tweets before {oldest}")
# requesting tweets and saving new tweets to all tweets
new_tweets = api.user_timeline(screen_name = username,count=MAX_TWEET,max_id = oldest)
# max_id is to return tweets with an id less than or equal to specified id
all_tweets.extend(new_tweets) # adds all the iterms in iterable list unlike append
# updating the id of the oldest tweet
oldest = all_tweets[-1].id - 1
print(f"...{len(all_tweets)} tweets downloaded so far")'''
# appending the tweets into the list on contentItems
for tweet in all_tweets:
td = {
'content':tweet.text,
'contenttype':'application/json',
'id':tweet.id_str,
'created':tweet.created_at,
'language':'en'
}
tweet_dic['contentItems'].append(td)
return json.dumps(tweet_dic,indent=2,default=str)
# pushes the dictionary created into the personality insights to get the results
def get_insight(dic):
profile = PI.profile(
dic,
'application/json',
raw_scores=True,
consumption_preferences=True).get_result()
return json.dumps(profile, indent=2)
# getting a dictionary of PERSONALITY score for each value
'''
- personality
- big5_openness
- adventurousness
- artistic interests
- emotionality
- imagination
- intellect
- liberalism
- big5_conscientiousness
- achievement striving
- cautiousness
- dutifulness
- orderliness
- self-discipline
- self-efficacy --> producing results?
- big5_extraversion
- activity level
- assertiveness
- cheerfulness
- excitement seeking
- outgoing
- gregariousness --> sociable
- big5_agreeableness
- altruism --> disinterested and selfless concern for the well-being of others
- cooperation
- modesty
- uncompromising
- sympathy
- trust
- big5_neuroticism (emotional range)
- fiery
- prone to worry
- melancholy
- immoderation
- self consciousness
- susceptible to stress
'''
def get_personality(insight):
personality = dict()
for i in range(len(insight['personality'])):
if insight['personality'][i]['trait_id'] == 'big5_openness':
personality[f"personality_{insight['personality'][i]['trait_id'][5:]}_score"] = insight['personality'][i]['raw_score']
for o in range(len(insight['personality'][i]['children'])):
personality[f"personality_{insight['personality'][i]['children'][o]['trait_id'][6:]}_score"] = insight['personality'][i]['children'][o]['raw_score']
if insight['personality'][i]['trait_id'] == 'big5_conscientiousness':
personality[f"personality_{insight['personality'][i]['trait_id'][5:]}_score"] = insight['personality'][i]['raw_score']
for c in range(len(insight['personality'][i]['children'])):
personality[f"personality_{insight['personality'][i]['children'][c]['trait_id'][6:]}_score"] = insight['personality'][i]['children'][c]['raw_score']
if insight['personality'][i]['trait_id'] == 'big5_extraversion':
personality[f"personality_{insight['personality'][i]['trait_id'][5:]}_score"] = insight['personality'][i]['raw_score']
for c in range(len(insight['personality'][i]['children'])):
personality[f"personality_{insight['personality'][i]['children'][c]['trait_id'][6:]}_score"] = insight['personality'][i]['children'][c]['raw_score']
if insight['personality'][i]['trait_id'] == 'big5_agreeableness':
personality[f"personality_{insight['personality'][i]['trait_id'][5:]}_score"] = insight['personality'][i]['raw_score']
for c in range(len(insight['personality'][i]['children'])):
personality[f"personality_{insight['personality'][i]['children'][c]['trait_id'][6:]}_score"] = insight['personality'][i]['children'][c]['raw_score']
if insight['personality'][i]['trait_id'] == 'big5_neuroticism':
personality[f"personality_{insight['personality'][i]['trait_id'][5:]}_score"] = insight['personality'][i]['raw_score']
for c in range(len(insight['personality'][i]['children'])):
personality[f"personality_{insight['personality'][i]['children'][c]['trait_id'][6:]}_score"] = insight['personality'][i]['children'][c]['raw_score']
return personality
# getting NEED scores for each value
'''
- needs
- challenge
- closeness
- curiosity
- excitement
- harmony
- liberty
- love
- practicality
- self expression
- stability
- structure
'''
def get_need(insight):
need = dict()
for i in range(len(insight['needs'])):
need[f"need_{insight['needs'][i]['trait_id'][5:]}_score"] = insight['needs'][i]['raw_score']
return need
# getting VALUES scores for each value
'''
- values
- conservation
- openness to change
- hedonism --> pursuit of pleasure
- self enhancement
- self transcendence --> experience
'''
def get_value(insight):
value = dict()
for i in range(len(insight['values'])):
value[f"value_{insight['values'][i]['trait_id'][6:]}_score"] = insight['values'][i]['raw_score']
return value
# calculating distance between two profiles
def difference(dic1,dic2):
res = dict()
for i,j in zip(dic1.items(),dic2.items()):
res[i[0]] = i[1] - j[1]
return res
# combining all personality, need and value into a score dictionary
def combine(personality,need,value):
score = {
'personality':personality,
'need':need,
'value':value
}
return score
# returning a score of a single username
def get_score(insight):
p = get_personality(insight)
n = get_need(insight)
v = get_value(insight)
score = combine(p,n,v)
return score
def get_dist(insight1,insight2):
# calling all functions to get personality, need and value for both profiles
try:
p1 = get_personality(insight1)
n1 = get_need(insight1)
v1 = get_value(insight1)
p2 = get_personality(insight2)
n2 = get_need(insight2)
v2 = get_value(insight2)
# calling function difference to get the difference between
# all scores of personality, need and value
p_diff = difference(p1,p2)
n_diff = difference(n1,n2)
v_diff = difference(v1,v2)
# profile distance combined into one score
dist = combine(p_diff,n_diff,v_diff)
return dist
except:
insight1 = json.loads(insight1)
insight2 = json.loads(insight2)
p1 = get_personality(insight1)
n1 = get_need(insight1)
v1 = get_value(insight1)
p2 = get_personality(insight2)
n2 = get_need(insight2)
v2 = get_value(insight2)
# calling function difference to get the difference between
# all scores of personality, need and value
p_diff = difference(p1,p2)
n_diff = difference(n1,n2)
v_diff = difference(v1,v2)
# profile distance combined into one score
dist = combine(p_diff,n_diff,v_diff)
return dist
def twtScore(username):
tw_dic1 = get_all_tweets_dic(username)
# tw_dic2 = get_all_tweets_dic(username2)
if tw_dic1 == -1:
score = {
'big5': {
'openness': 0,
'conscientiousness': 0,
'extraversion': 0,
'agreeableness': 0,
'neuroticism': 0
},
'values': {
'conservation': 0,
'open_to_change': 0,
'self_enhancement': 0,
'self_transcendence': 0
}
}
big5_score = (score['big5']['openness'] + score['big5']['conscientiousness'] + score['big5']['extraversion'] + score['big5']['agreeableness'] + score['big5']['neuroticism'])/5
value_score = (score['values']['conservation'] + score['values']['open_to_change'] + score['values']['self_enhancement'] + score['values']['self_transcendence'])/4
return big5_score,value_score
else:
insight1 = get_insight(tw_dic1)
# insight2 = get_insight(tw_dic2)
'''print(insight1)
print(insight2)'''
# diff = get_dist(insight1,insight2)
insight1 = json.loads(insight1)
score = {
'big5': {
'openness': insight1['personality'][0]['raw_score'],
'conscientiousness': insight1['personality'][1]['raw_score'],
'extraversion': insight1['personality'][2]['raw_score'],
'agreeableness': insight1['personality'][3]['raw_score'],
'neuroticism': insight1['personality'][4]['raw_score']
},
'values': {
'conservation': insight1['values'][0]['raw_score'],
'open_to_change': insight1['values'][1]['raw_score'],
'self_enhancement': insight1['values'][3]['raw_score'],
'self_transcendence': insight1['values'][4]['raw_score']
}
}
big5_score = (score['big5']['openness'] + score['big5']['conscientiousness'] + score['big5']['extraversion'] + score['big5']['agreeableness'] + score['big5']['neuroticism'])/5
value_score = (score['values']['conservation'] + score['values']['open_to_change'] + score['values']['self_enhancement'] + score['values']['self_transcendence'])/4
return big5_score, value_score
'''
raw
- openness
- extraversion
- agreeableness
- conscientiousness
- neuroticism
'''
# print(twtScore('@cached_cadet'))
# print(twtScore('@wubbalubbadubdub')) # doesn't exist
# # print(get_insight(get_all_tweets_dic('@cached_cadet')))