-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcheckApps.py
123 lines (90 loc) · 3.16 KB
/
checkApps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#! /usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@author: Shreyas <[email protected]>
CheckApps
=========
Given an input of apps, check if the app exists
"""
from __future__ import division
from optparse import OptionParser
import time
import datetime
import requests
import MySQLdb
import pandas as pd
import numpy as np
import pandas.io.sql as psql
def getUserInput():
"""
Get User Input
"""
optionparser = OptionParser()
optionparser.add_option('-f', '--file', dest='file')
optionparser.add_option('-d', '--db', dest='db')
optionparser.add_option('-s', '--server', dest='server')
optionparser.add_option('-u', '--username', dest='un')
optionparser.add_option('-p', '--password', dest='pw')
(option, args) = optionparser.parse_args()
#if not option.file:
# return optionparser.error('Data File path not provided.\n Usage: --file="path.to.appData"')
#elif not option
return {
'file': option.file,
'db': option.db,
'server': option.server,
'username': option.un,
'password': option.pw
}
def checkApps(df):
"""
Input a dataframe of app ids and check if the apps exist
in the store
"""
def getApp(id):
relaxtime = 1 # 5 s timeout
time.sleep(relaxtime)
print 'I\'m checking: ' + id
r = requests.get(stores['google']+id)
print id, r.status_code
return r.status_code
stores = {
'google' :'https://play.google.com//store/apps/details?id='
}
print df.head(10)
# df['appId'].apply(getApp)
df['status'] = df['package'].apply(getApp)
return df
def getDataframeFromDatabase(host, db, un, pw):
#query = "SELECT package from potential_unfair_apps LIMIT 1000;"
query = "SELECT package from potential_unfair_apps;"
print query
conn = MySQLdb.connect(host = host, user = un, passwd = pw, db = db)
unfair_apps_df = psql.frame_query(query, conn)
return unfair_apps_df
def pushDataframeToDatabase(df, host, db, un, pw, date):
print "Date: ", date
table_name = 'potential_unfair_apps_' + date
print "Database name: ", table_name
conn = MySQLdb.connect(host = host, user = un, passwd = pw, db = db)
df.to_sql(con=conn, name=table_name, if_exists='append', flavor='mysql')
def main():
userInput = getUserInput()
unfair_df = None
timestamp = time.time()
date_str = datetime.datetime.fromtimestamp(timestamp).strftime("%Y%m%d_%H%M%S")
print userInput
if userInput['file'] != None:
ptl_unfair_df = pd.read_csv(userInput['file'])
else:
ptl_unfair_df = getDataframeFromDatabase(userInput['server'], userInput['db'],
userInput['username'], userInput['password'])
# split dataframe to a manageable size
ptl_unfair_df_list = np.array_split(ptl_unfair_df, len(ptl_unfair_df)/100)
for x in range(len(ptl_unfair_df_list)):
print 'processing batch ' + str(x)
unfair_df = checkApps(ptl_unfair_df_list[x])
pushDataframeToDatabase(unfair_df, userInput['server'], userInput['db'],
userInput['username'], userInput['password'], date_str)
if __name__ == '__main__':
main()