diff --git a/appClassifierBenchmark.py b/appClassifierBenchmark.py index 2a08ebb..ac1d04e 100644 --- a/appClassifierBenchmark.py +++ b/appClassifierBenchmark.py @@ -101,7 +101,11 @@ def trimDf(df): cols.remove('price') # considered only free apps cols.remove('appName') # removing appNames - return df[list(cols)] + # return df[list(cols)] + + + + return df[list(('revSent', 'appLabel'))] @@ -245,7 +249,7 @@ def allClassifier(cDf, models, modelchoice): performClassification(classifier, featVector, labelVector) else: if modelchoice in models and modelchoice != 'svm-nl': - classifier = models[choice] + classifier = models[modelchoice] performClassification(classifier, featVector, labelVector) else: print "Incorrect Choice" @@ -273,6 +277,8 @@ def main(): appDf = loadAppData(userInput['file']) appDf = trimDf(appDf) + print appDf.columns + if userInput['sample'] == 'all': allClassifier(appDf, models, userInput['classifier']) else: diff --git a/obidroidMR.py b/obidroidMR.py index 58c032d..7539055 100644 --- a/obidroidMR.py +++ b/obidroidMR.py @@ -3,11 +3,10 @@ from cPickle import load import re import nltk -# import pattern from textblob import TextBlob from textblob.sentiments import NaiveBayesAnalyzer import sys -# import simplejson +import math class ObidroidReview(MRJob): @@ -51,6 +50,8 @@ def getFeatures(rev): blob = TextBlob(rev, analyzer=NaiveBayesAnalyzer()) blobSent = blob.sentiment + + # print blobSent if blobSent[0] == 'pos': @@ -61,7 +62,7 @@ def getFeatures(rev): revSent = 0 - + revSent = round(revSent, 4) return [ revCharLength, @@ -84,21 +85,22 @@ def getRecord(self, _, record): #Mapper 1 idpattern = re.compile('(\w+\.+\w+[(\.+)(\w+)]+)') reviewid = record[0] - appid = idpattern.split(record[1]) + appidmatches = idpattern.split(record[1]) + appid = appidmatches[1] features = ObidroidReview.getFeatures(record[2]) - features.append(appid[1]) + features.append(reviewid) - sys.stderr.write("MAPPER INPUT: ({0},{1})\n".format(reviewid,features)) + sys.stderr.write("MAPPER OUTPUT: ({0},{1})\n".format(appid,features)) - yield reviewid, features + yield appid, features - def performAction(self,revid,revfeatures): #Reducer 1 - sys.stderr.write("MAPPER INPUT: ({0},{1})\n".format(revid,revfeatures)) - yield revid, list(revfeatures) + def performAction(self,appid,revfeatures): #Reducer 1 + sys.stderr.write("REDUCER INPUT: ({0},{1})\n".format(appid,revfeatures)) + yield appid, list(revfeatures)