diff --git a/Amazon Reviews.ipynb b/Amazon Reviews.ipynb
new file mode 100644
index 0000000..53e18b4
--- /dev/null
+++ b/Amazon Reviews.ipynb	
@@ -0,0 +1,775 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Product Name</th>\n",
+       "      <th>Brand Name</th>\n",
+       "      <th>Price</th>\n",
+       "      <th>Rating</th>\n",
+       "      <th>Reviews</th>\n",
+       "      <th>Review Votes</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>5</td>\n",
+       "      <td>I feel so LUCKY to have found this used (phone...</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>4</td>\n",
+       "      <td>nice phone, nice up grade from my pantach revu...</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>5</td>\n",
+       "      <td>Very pleased</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>4</td>\n",
+       "      <td>It works good but it goes slow sometimes but i...</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Great phone to replace my lost phone. The only...</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                        Product Name Brand Name   Price  \\\n",
+       "0  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "1  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "2  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "3  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "4  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "\n",
+       "   Rating                                            Reviews  Review Votes  \n",
+       "0       5  I feel so LUCKY to have found this used (phone...           1.0  \n",
+       "1       4  nice phone, nice up grade from my pantach revu...           0.0  \n",
+       "2       5                                       Very pleased           0.0  \n",
+       "3       4  It works good but it goes slow sometimes but i...           0.0  \n",
+       "4       4  Great phone to replace my lost phone. The only...           0.0  "
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "df = pd.read_csv('Amazon_Unlocked_Mobile.csv')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Product Name</th>\n",
+       "      <th>Brand Name</th>\n",
+       "      <th>Price</th>\n",
+       "      <th>Rating</th>\n",
+       "      <th>Reviews</th>\n",
+       "      <th>Review Votes</th>\n",
+       "      <th>Positively Rated</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>5</td>\n",
+       "      <td>I feel so LUCKY to have found this used (phone...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>4</td>\n",
+       "      <td>nice phone, nice up grade from my pantach revu...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>5</td>\n",
+       "      <td>Very pleased</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>4</td>\n",
+       "      <td>It works good but it goes slow sometimes but i...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Great phone to replace my lost phone. The only...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>1</td>\n",
+       "      <td>I already had a phone with problems... I know ...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>2</td>\n",
+       "      <td>The charging port was loose. I got that solder...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Phone looks good but wouldn't stay charged, ha...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>5</td>\n",
+       "      <td>I originally was using the Samsung S2 Galaxy f...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
+       "      <td>Samsung</td>\n",
+       "      <td>199.99</td>\n",
+       "      <td>3</td>\n",
+       "      <td>It's battery life is great. It's very responsi...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                        Product Name Brand Name   Price  \\\n",
+       "0  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "1  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "2  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "3  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "4  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "5  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "6  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "7  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "8  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "9  \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...    Samsung  199.99   \n",
+       "\n",
+       "   Rating                                            Reviews  Review Votes  \\\n",
+       "0       5  I feel so LUCKY to have found this used (phone...           1.0   \n",
+       "1       4  nice phone, nice up grade from my pantach revu...           0.0   \n",
+       "2       5                                       Very pleased           0.0   \n",
+       "3       4  It works good but it goes slow sometimes but i...           0.0   \n",
+       "4       4  Great phone to replace my lost phone. The only...           0.0   \n",
+       "5       1  I already had a phone with problems... I know ...           1.0   \n",
+       "6       2  The charging port was loose. I got that solder...           0.0   \n",
+       "7       2  Phone looks good but wouldn't stay charged, ha...           0.0   \n",
+       "8       5  I originally was using the Samsung S2 Galaxy f...           0.0   \n",
+       "9       3  It's battery life is great. It's very responsi...           0.0   \n",
+       "\n",
+       "   Positively Rated  \n",
+       "0                 1  \n",
+       "1                 1  \n",
+       "2                 1  \n",
+       "3                 1  \n",
+       "4                 1  \n",
+       "5                 0  \n",
+       "6                 0  \n",
+       "7                 0  \n",
+       "8                 1  \n",
+       "9                 0  "
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.dropna(inplace=True)\n",
+    "df[df['Rating'] != 3]\n",
+    "df['Positively Rated'] = np.where(df['Rating'] > 3, 1, 0)\n",
+    "df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6899487041440472"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df['Positively Rated'].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(df['Reviews'], df['Positively Rated'], random_state = 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "X_train first entry: \n",
+      "\n",
+      " I feel so LUCKY to have found this used (phone to us & not used hard at all), phone on line from someone who upgraded and sold this one. My Son liked his old one that finally fell apart after 2.5+ years and didn't want an upgrade!! Thank you Seller, we really appreciate it & your honesty re: said used phone.I recommend this seller very highly & would but from them again!!\n",
+      "\n",
+      "\n",
+      "X_train shape:  (250751,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('X_train first entry: \\n\\n', X_train[0])\n",
+    "print('\\n\\nX_train shape: ', X_train.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# CountVectorizer\n",
+    "from sklearn.feature_extraction.text import CountVectorizer\n",
+    "\n",
+    "vect = CountVectorizer().fit(X_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['00',\n",
+       " '858',\n",
+       " 'approval',\n",
+       " 'booth',\n",
+       " 'cmon',\n",
+       " 'dealsthanks',\n",
+       " 'eclair',\n",
+       " 'ff',\n",
+       " 'gsmpros',\n",
+       " 'insertion',\n",
+       " 'linkhttps',\n",
+       " 'movment',\n",
+       " 'outmoded',\n",
+       " 'preset',\n",
+       " 'reinstallation',\n",
+       " 'separatingly',\n",
+       " 'stillnumbers',\n",
+       " 'todos',\n",
+       " 'verycool']"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "vect.get_feature_names()[::3000]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "56947"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(vect.get_feature_names())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<250751x56947 sparse matrix of type '<class 'numpy.int64'>'\n",
+       "\twith 6848632 stored elements in Compressed Sparse Row format>"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# transform the documents in the training data to a document-term matrix\n",
+    "X_train_vectorized = vect.transform(X_train)\n",
+    "X_train_vectorized"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+       "          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
+       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
+       "          verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.linear_model import LogisticRegression\n",
+    "\n",
+    "model = LogisticRegression()\n",
+    "model.fit(X_train_vectorized, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AUC:  0.896300276575\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import roc_auc_score\n",
+    "\n",
+    "predictions = model.predict(vect.transform(X_test))\n",
+    "\n",
+    "print('AUC: ', roc_auc_score(y_test, predictions))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Smallest Coefs: \n",
+      "['raymond' 'stylist' 'mony' 'false' 'worst' 'unsatisfied' 'disconnects'\n",
+      " 'pos' 'horribly' 'lies']\n",
+      "\n",
+      "Largest Coefs: \n",
+      "['excelent' 'excelente' '4eeeks' 'perfecto' 'exelente' 'loving' 'awsome'\n",
+      " 'excellent' 'becuse' 'sweet']\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# get the feature names as numpy array\n",
+    "feature_names = np.array(vect.get_feature_names())\n",
+    "\n",
+    "# Sort the coefficients from the model\n",
+    "sorted_coef_index = model.coef_[0].argsort()\n",
+    "\n",
+    "# Find the 10 smallest and 10 largest coefficients\n",
+    "# The 10 largest coefficients are being indexed using [:-11:-1] \n",
+    "# so the list returned is in order of largest to smallest\n",
+    "print('Smallest Coefs: \\n{}\\n'.format(feature_names[sorted_coef_index[:10]]))\n",
+    "print('Largest Coefs: \\n{}\\n'.format(feature_names[sorted_coef_index[:-11:-1]]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "18951"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Tfidf\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "\n",
+    "# Fit the TfidfVectorizer to the training data specifiying a minimum document frequency of 5\n",
+    "vect = TfidfVectorizer(min_df = 5).fit(X_train)\n",
+    "len(vect.get_feature_names())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AUC:  0.900231756945\n"
+     ]
+    }
+   ],
+   "source": [
+    "X_train_vectorized = vect.transform(X_train)\n",
+    "\n",
+    "model = LogisticRegression()\n",
+    "model.fit(X_train_vectorized, y_train)\n",
+    "predictions = model.predict(vect.transform(X_test))\n",
+    "print('AUC: ', roc_auc_score(y_test, predictions))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Smallest Tfidf: \n",
+      "['brawns' 'messiah' 'excites' 'reading___' 'seizing' 'srgb' 'liquidating'\n",
+      " '16nm' '1b' '700nits']\n",
+      "\n",
+      "Largest Tfidf: \n",
+      "['excllent' 'ecxelente' 'purchase' 'eh' 'gud' 'looser' 'gucci' 'soo'\n",
+      " 'unusable' 'lost']\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "feature_names = np.array(vect.get_feature_names())\n",
+    "\n",
+    "sorted_tfidf_index = X_train_vectorized.max(0).toarray()[0].argsort()\n",
+    "\n",
+    "print('Smallest Tfidf: \\n{}\\n'.format(feature_names[sorted_tfidf_index[:10]]))\n",
+    "print('Largest Tfidf: \\n{}\\n'.format(feature_names[sorted_tfidf_index[:-11:-1]]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Smallest coef: \n",
+      "['not' 'worst' 'disappointed' 'waste' 'poor' 'terrible' 'return' 'stopped'\n",
+      " 'slow' 'returning']\n",
+      "\n",
+      "Largest coef: \n",
+      "['love' 'great' 'amazing' 'excellent' 'perfect' 'loves' 'best' 'awesome'\n",
+      " 'perfectly' 'easy']\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "sorted_coef_index = model.coef_[0].argsort()\n",
+    "\n",
+    "print('Smallest coef: \\n{}\\n'.format(feature_names[sorted_coef_index[:10]]))\n",
+    "print('Largest coef: \\n{}\\n'.format(feature_names[sorted_coef_index[:-11:-1]]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0 0]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# These reviews are treated the same by our current model\n",
+    "\n",
+    "print(model.predict(vect.transform(['Not an issue, phone is working', \n",
+    "                                   'an issue, phone is not working'])))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "217383"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# n-grams\n",
+    "# Fit the CountVectorizer to the training data specifiying a minimum \n",
+    "# document frequency of 5 and extracting 1-grams and 2-grams\n",
+    "vect = CountVectorizer(min_df = 5, ngram_range = (1,2)).fit(X_train)\n",
+    "X_train_vectorized = vect.transform(X_train)\n",
+    "len(vect.get_feature_names())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AUC:  0.948236892649\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = LogisticRegression()\n",
+    "model.fit(X_train_vectorized, y_train)\n",
+    "\n",
+    "predictions = model.predict(vect.transform(X_test))\n",
+    "print('AUC: ', roc_auc_score(y_test, predictions))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Smallest Coef: \n",
+      "['junk' 'no good' 'worst' 'good love' 'horrible' 'nope' 'terrible'\n",
+      " 'needed good' 'three stars' 'not happy']\n",
+      "\n",
+      "Largest Coef: \n",
+      "['excelent' 'excelente' 'excellent' 'perfect' 'no issues' 'no problems'\n",
+      " 'perfecto' 'exelente' 'awesome' 'awsome']\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "feature_names = np.array(vect.get_feature_names())\n",
+    "sorted_coef_index = model.coef_[0].argsort()\n",
+    "\n",
+    "print('Smallest Coef: \\n{}\\n'.format(feature_names[sorted_coef_index][:10]))\n",
+    "print('Largest Coef: \\n{}\\n'.format(feature_names[sorted_coef_index][:-11:-1]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0 0]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(model.predict(vect.transform(['not an issue, phone is working',\n",
+    "                                   'an issue, phone is not working'])))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

	Product Name	Brand Name	Price	Rating	Reviews	Review Votes
0	\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...	Samsung	199.99	5	I feel so LUCKY to have found this used (phone...	1.0
1	\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...	Samsung	199.99	4	nice phone, nice up grade from my pantach revu...	0.0
2	\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...	Samsung	199.99	5	Very pleased	0.0
3	\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...	Samsung	199.99	4	It works good but it goes slow sometimes but i...	0.0
4	\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...	Samsung	199.99	4	Great phone to replace my lost phone. The only...	0.0