Skip to content

Commit

Permalink
before presentation
Browse files Browse the repository at this point in the history
  • Loading branch information
seekshreyas committed Dec 3, 2013
1 parent b33d842 commit 28231bd
Show file tree
Hide file tree
Showing 6 changed files with 5,664,676 additions and 3 deletions.
172 changes: 172 additions & 0 deletions classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#! /usr/bin/env python
# -*- coding: UTF-8 -*-

"""
Classifier
==========
After Feature Extraction, that returns a data of the format
[(filename, linenum, vote, sentence, feat1, feat2, ...)]
"""

from __future__ import division
from optparse import OptionParser
import json
from pprint import pprint
import random
import math
import nltk

def getUserInput():
optionparser = OptionParser()

optionparser.add_option('-i', '--input', dest='inputfile')


(option, args) = optionparser.parse_args()

if not option.inputfile:
return optionparser.error('html file input not provided.\n Usage: --url="path.to.appurl"')

return { 'file' : option.inputfile }



def fileExtractor(f):
fObj = open(f)
data = json.load(fObj)

fObj.close()

return data


def featureAggregator(extract):
outputdata = []

for app in extract:
outputdata.append(featureExtractor(app))

return outputdata


def featureExtractor(app):
featDict = {}

# featList['price'] = getAppPrice(app)
# featList['numrev'] = getNumReviews(app)
featDict['1starrating'] = getOneStarRating(app)
featDict['2starrating'] = getTwoStarRating(app)
featDict['3starRating'] = getThreeStarRating(app)
featDict['4starRating'] = getFourStarRating(app)
featDict['5starRating'] = getFiveStarRating(app)
featDict['hasPrivacy'] = getPrivacyState(app)

return featDict




def getAppPrice(app):
return app['price']

def getNumReviews(app):
return len(app['reviews'])


def getOneStarRating(app):
for appRatingCount in app['rating']:
if appRatingCount[0] == ' 1 ':
return appRatingCount[1]


def getTwoStarRating(app):
for appRatingCount in app['rating']:
if appRatingCount[0] == ' 2 ':
return appRatingCount[1]


def getThreeStarRating(app):
for appRatingCount in app['rating']:
if appRatingCount[0] == ' 3 ':
return appRatingCount[1]

def getFourStarRating(app):
for appRatingCount in app['rating']:
if appRatingCount[0] == ' 4 ':
return appRatingCount[1]


def getFiveStarRating(app):
for appRatingCount in app['rating']:
if appRatingCount[0] == ' 5 ':
return appRatingCount[1]



def getPrivacyState(app):
if app['devprivacyurl'] == 'N.A':
return False
else:
return True


def classifier(extract, fold=10):

labeldata = 'fair'

data = []
for app in extract:
for rev in app['reviews']:
revlower = rev[1].lower()

# print "reviews" , revlower
if revlower.find('fake') != -1:

labeldata = 'unfair'

features = featureExtractor(app)

data.append([labeldata, list(features.values())])


# pprint(data)

random.shuffle(data)

claccuracy = []
size = int(math.floor(len(data) / 10.0))

for i in range(fold):
test_this_round = data[i*size:][:size]
train_this_round = data[:i*size] + data[(i+1)*size:]

acc = myclassifier(train_this_round, test_this_round)

claccuracy.append(acc)



def myclassifier(train_data, test_data):
classifier = nltk.NaiveBayesClassifier.train(train_data)


# print classifier.show_most_informative_features()
return nltk.classify.accuracy(classifier, test_data)







def main():
userinput = getUserInput()
print userinput['file']

extract = fileExtractor(userinput['file'])
# features = featureAggregator(extract)
classifier(extract)


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions exports/game_brain_all.json
Original file line number Diff line number Diff line change
Expand Up @@ -5617,4 +5617,5 @@
"totalReviewers": 10576,
"version": "1.2"
}

]
5,418 changes: 5,417 additions & 1 deletion exports/kristine_all.json

Large diffs are not rendered by default.

3,258 changes: 3,257 additions & 1 deletion exports/kristine_reviews.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading

0 comments on commit 28231bd

Please sign in to comment.