From b3636a1c7eb461f9a84ba72f996668a506225af1 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Mon, 5 Jun 2023 15:24:10 -0500 Subject: [PATCH] add notebook --- main.ipynb | 1228 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1228 insertions(+) create mode 100644 main.ipynb diff --git a/main.ipynb b/main.ipynb new file mode 100644 index 0000000..7204c32 --- /dev/null +++ b/main.ipynb @@ -0,0 +1,1228 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "242fecb8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package stopwords to\n", + "[nltk_data] /Users/emansell/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n", + "[nltk_data] Downloading package punkt to /Users/emansell/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import seaborn as sb\n", + "import matplotlib.pyplot as plt\n", + "import time\n", + "import os\n", + "import nltk as nltk\n", + "from nltk.corpus import stopwords\n", + "from nltk.tokenize import word_tokenize\n", + "nltk.download('stopwords')\n", + "nltk.download('punkt')\n", + "import re\n", + "import string\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.metrics import plot_confusion_matrix,classification_report\n", + "from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier\n", + "from sklearn.naive_bayes import MultinomialNB\n", + "from sklearn.model_selection import GridSearchCV\n", + "import spacy" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "757d534d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ArtistSongGenreLanguageLyrics
012 stonesworld so coldRockenIt starts with pain, followed by hate\\nFueled ...
112 stonesbrokenRockenFreedom!\\nAlone again again alone\\nPatiently w...
212 stones3 leaf loserRockenBiting the hand that feeds you, lying to the v...
312 stonesanthem for the underdogRockenYou say you know just who I am\\nBut you can't ...
412 stonesadrenalineRockenMy heart is beating faster can't control these...
..................
290178bobby womacki wish he didn t trust me so muchR&BenI'm the best friend he's got I'd give him the ...
290179bad boys bluei totally miss youPopenBad Boys Blue \"I Totally Miss You\" I did you w...
290180celine dionsorry for lovePopenForgive me for the things That I never said to...
290181dan berncure for aidsIndieenThe day they found a cure for AIDS The day the...
290182crawdad republiciceberg meadowsPopenFourth of July has come, it's custom that we g...
\n", + "

290143 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Artist Song Genre Language \\\n", + "0 12 stones world so cold Rock en \n", + "1 12 stones broken Rock en \n", + "2 12 stones 3 leaf loser Rock en \n", + "3 12 stones anthem for the underdog Rock en \n", + "4 12 stones adrenaline Rock en \n", + "... ... ... ... ... \n", + "290178 bobby womack i wish he didn t trust me so much R&B en \n", + "290179 bad boys blue i totally miss you Pop en \n", + "290180 celine dion sorry for love Pop en \n", + "290181 dan bern cure for aids Indie en \n", + "290182 crawdad republic iceberg meadows Pop en \n", + "\n", + " Lyrics \n", + "0 It starts with pain, followed by hate\\nFueled ... \n", + "1 Freedom!\\nAlone again again alone\\nPatiently w... \n", + "2 Biting the hand that feeds you, lying to the v... \n", + "3 You say you know just who I am\\nBut you can't ... \n", + "4 My heart is beating faster can't control these... \n", + "... ... \n", + "290178 I'm the best friend he's got I'd give him the ... \n", + "290179 Bad Boys Blue \"I Totally Miss You\" I did you w... \n", + "290180 Forgive me for the things That I never said to... \n", + "290181 The day they found a cure for AIDS The day the... \n", + "290182 Fourth of July has come, it's custom that we g... \n", + "\n", + "[290143 rows x 5 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rawdf = pd.read_csv('train.csv')\n", + "df = rawdf.dropna()\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bbe6002c", + "metadata": {}, + "outputs": [], + "source": [ + "#Dropping genres, non-English lyrics\n", + "excessgenres = ['Folk', 'Indie', 'Other']\n", + "\n", + "df = df[df.Genre.isin(excessgenres) == False]\n", + "df = df[df.Language == 'en']" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "69acf943", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/dg/sxm0105x28g_bgwtjzwr0zc40000gn/T/ipykernel_64209/1614685821.py:7: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " rand = rand.append(sample)\n", + "/var/folders/dg/sxm0105x28g_bgwtjzwr0zc40000gn/T/ipykernel_64209/1614685821.py:7: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " rand = rand.append(sample)\n", + "/var/folders/dg/sxm0105x28g_bgwtjzwr0zc40000gn/T/ipykernel_64209/1614685821.py:7: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " rand = rand.append(sample)\n", + "/var/folders/dg/sxm0105x28g_bgwtjzwr0zc40000gn/T/ipykernel_64209/1614685821.py:7: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " rand = rand.append(sample)\n", + "/var/folders/dg/sxm0105x28g_bgwtjzwr0zc40000gn/T/ipykernel_64209/1614685821.py:7: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " rand = rand.append(sample)\n", + "/var/folders/dg/sxm0105x28g_bgwtjzwr0zc40000gn/T/ipykernel_64209/1614685821.py:7: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " rand = rand.append(sample)\n", + "/var/folders/dg/sxm0105x28g_bgwtjzwr0zc40000gn/T/ipykernel_64209/1614685821.py:7: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " rand = rand.append(sample)\n", + "/var/folders/dg/sxm0105x28g_bgwtjzwr0zc40000gn/T/ipykernel_64209/1614685821.py:7: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " rand = rand.append(sample)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ArtistSongGenreLanguageLyrics
267372berry, chuckevery day i have the bluesRockenevery day\\nevery day i have the blues\\nevery d...
176056april wineall over townRockenSomeone said you was, runnin' 'round \\nLike yo...
49158the rolling stoneslove is strongRockenLove is strong\\nAnd you're so sweet\\nYou make ...
156827madnesstimeRockenWhen I consider how my life is spent\\nThey cry...
240798the monkeesdream worldRockenWalking around with your head in the clouds\\nY...
..................
252175ruben studdardcan i get your attentionR&BenKelly give me the ball girl\\n\\nYou are my girl...
163146amel larrieuxcongoR&BenCongo, get to, get to Congo\\nGet to, get to Co...
257250jacob bankssink or swimR&Benhead-first in the deep\\ni must sink or swim\\nl...
270091az yetevery little bit of my heartR&Ben\\ngirl\\ni always think about it 24/7\\ncan't ge...
141603livewe walk in the dreamR&BenI was wrong \\nWe are not these bodies alone \\n...
\n", + "

7200 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Artist Song Genre Language \\\n", + "267372 berry, chuck every day i have the blues Rock en \n", + "176056 april wine all over town Rock en \n", + "49158 the rolling stones love is strong Rock en \n", + "156827 madness time Rock en \n", + "240798 the monkees dream world Rock en \n", + "... ... ... ... ... \n", + "252175 ruben studdard can i get your attention R&B en \n", + "163146 amel larrieux congo R&B en \n", + "257250 jacob banks sink or swim R&B en \n", + "270091 az yet every little bit of my heart R&B en \n", + "141603 live we walk in the dream R&B en \n", + "\n", + " Lyrics \n", + "267372 every day\\nevery day i have the blues\\nevery d... \n", + "176056 Someone said you was, runnin' 'round \\nLike yo... \n", + "49158 Love is strong\\nAnd you're so sweet\\nYou make ... \n", + "156827 When I consider how my life is spent\\nThey cry... \n", + "240798 Walking around with your head in the clouds\\nY... \n", + "... ... \n", + "252175 Kelly give me the ball girl\\n\\nYou are my girl... \n", + "163146 Congo, get to, get to Congo\\nGet to, get to Co... \n", + "257250 head-first in the deep\\ni must sink or swim\\nl... \n", + "270091 \\ngirl\\ni always think about it 24/7\\ncan't ge... \n", + "141603 I was wrong \\nWe are not these bodies alone \\n... \n", + "\n", + "[7200 rows x 5 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Taking random 900 songs from each genre\n", + "genres = ['Rock', 'Pop', 'Hip-Hop', 'Metal', 'Country', 'Jazz', 'Electronic', 'R&B']\n", + "rand = pd.DataFrame()\n", + "for genre in genres:\n", + " newdf = df[df.Genre == genre]\n", + " sample = newdf.sample(n = 900)\n", + " rand = rand.append(sample)\n", + "rand " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "21e6ef8d", + "metadata": {}, + "outputs": [], + "source": [ + "#Create lemmatized corpus and clean lyrics\n", + "nlp = spacy.load('en_core_web_sm')\n", + "lemmatizedcorpus = []\n", + "def clean(text):\n", + " # Make lower\n", + " text = text.lower()\n", + " # Remove puncuation\n", + " translator = str.maketrans('', '', string.punctuation)\n", + " text = text.translate(translator)\n", + " # Remove stop words\n", + " text = text.split()\n", + " useless_words = nltk.corpus.stopwords.words(\"english\")+ ['solo', 'im', 'youre']\n", + " text_filtered = [word for word in text if not word in useless_words]\n", + " # Remove numbers\n", + " text_filtered = [re.sub(r'\\w*\\d\\w*', '', w) for w in text_filtered]\n", + " #Lemmatize and add to corpus\n", + " text_joined = nlp(' '.join(text_filtered))\n", + " text_stemmed = [y.lemma_ for y in text_joined]\n", + " lemmatizedcorpus.append(str(nlp(' '.join(text_stemmed))))\n", + "\n", + " return ' '.join(text_stemmed)\n", + "\n", + "rand['Cleaned Lyrics'] = rand.apply(lambda row: clean(row.Lyrics), axis = 1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7aa935ec", + "metadata": {}, + "outputs": [], + "source": [ + "#Creating TFIDF matrix\n", + "tfidf = TfidfVectorizer(stop_words='english', lowercase=False) \n", + "X = tfidf.fit_transform(lemmatizedcorpus)\n", + "\n", + "tfidf_tokens = tfidf.get_feature_names_out()\n", + "\n", + "\n", + "matrix = pd.DataFrame(\n", + " data=X.toarray(), \n", + " index=rand.iterrows(), \n", + " columns=tfidf_tokens\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "42c80282", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aaaaaaaaaaaaaaahhhhhhhhhhhhhaaaaahaaaaaheeeyaaaaassaaaahaaaarmagedoooonaaaayaaaayeaaah...zuzuzweizweierzyzydecoâºã¼berïsóraleømnis
(267372, [berry, chuck, every day i have the blues, Rock, en, every day\\nevery day i have the blues\\nevery day\\nevery day i have the blues\\nwhen you see me worried\\nbaby\\nbecause it's you i hate to lose\\nnobody loves me\\nnobody seems to care\\nnobody loves me\\nnobody seems to care\\nspeaking of bad luck and trouble will you know i've had my share\\ni'm gonna pack my suit-case\\nmove on down the line\\ni'm gonna pack my suit-case\\nmoving down the line\\nwell\\nthere ain't nobody worryin' and there ain't nobody cryin'\\nseems to me every day\\nevery day i have the blues\\nevery day\\nevery day i have the blues., every day every day blue every day every day blue see worried baby hate lose nobody love nobody seem care nobody love nobody seem care speak bad luck trouble know I ve share go to pack suitcase move line go to pack suitcase move line well be not nobody worryin be not nobody cryin seem every day every day blue every day every day blue])0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
(176056, [april wine, all over town, Rock, en, Someone said you was, runnin' 'round \\nLike you owned the world \\nIt's no secret, you're the talk of the town \\nYou know, the best of the girls \\n\\nThat's right, and now you \\nWalked right out of my heart \\nYou know it's all over town \\nI said you walked walked, right right, out of my heart \\nYou know it's all over town \\nNow it's all over \\n\\nYou're so neat, you're so cute, you're so fine \\nYou know that everybody wants some action \\nThey see you comin' and they act like it's cool \\nBecause they know that something's happenin' \\n\\nThat's right, because you \\nWalked right out of my heart \\nAnd now it's all over town \\nI said you walked walked, right right, out of my heart \\nYou know the word's goin' 'round \\nNow it's all over, now it's all over \\n\\nI don't care (he don't care) \\nI don't care \\n\\nI don't care care what they say about us anymore \\nI don't care what they want to do \\nBaby all my heart is for you \\nNo matter what happens \\n\\nThere's no reason why we still can't be friends \\nYou know we all need someone we can talk to \\nNo matter what they say, we both know I'm right \\nAfter all we've been through \\n\\nAnd now you walked right out of my heart \\nYou know it's all over town \\nI said you walked (walked), right (right), out of my heart \\nYou know the word's goin' 'round \\n\\nNow it's all over \\nI said you walked, right out of my heart \\nI said you walked (walked), right (right), out of my heart \\nYou walked right out of my heart \\nAnd now it's all over town, someone say runnin round like own world secret talk town know good girl that s right walk right heart know town say walk walk right right heart know town neat cute fine know everybody want action see comin act like cool know something happenin that s right walk right heart town say walk walk right right heart know word goin round do not care do not care do not care do not care care say we anymore do not care want baby heart matter happen there s reason still can not friend know need someone talk matter say know right we ve walk right heart know town say walk walk right right heart know word goin round say walk right heart say walked walk right right heart walk right heart town])0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
(49158, [the rolling stones, love is strong, Rock, en, Love is strong\\nAnd you're so sweet\\nYou make me hard\\nYou make me weak\\nLove is strong\\nAnd you're so sweet\\nAnd some day, babe\\nWe got to meet\\nA glimpse of you\\nWas all it took\\nA stranger's glance\\nIt got me hooked\\nAnd I followed you\\nAcross the stars\\nI looked for you\\nIn seedy bars\\nWhat are you scared of, baby\\nIt's more than just a dream\\nI need some time\\nWe make a beautiful team\\nA beautiful team\\nLove is strong\\nAnd you're so sweet\\nAnd some day, babe\\nWe got to meet\\nJust anywhere\\nOut in the park\\nOut on the street\\nAnd in the dark\\nI followed you\\nThrough swirling seas\\nDown darkened woods\\nWith silent trees\\nYour love is strong\\nAnd you're so sweet\\nYou make me hard\\nYou make me weak\\nWhat are you scared of, baby\\nIt's more than just a dream\\nI need some time\\nWe make a beautiful team\\nBeautiful\\nI wait for you\\nUntil the dawn\\nMy mind is ripped\\nMy heart is torned\\nYour love is strong\\nAnd you're so sweet\\nYour love is bitter\\nIt's taken neat\\nLove is strong, yeah, love strong sweet make hard make weak love strong sweet day babe got meet glimpse take stranger glance got hook follow across star look seedy bar scare baby dream need time make beautiful team beautiful team love strong sweet day babe got meet anywhere park street dark follow swirl sea darken wood silent tree love strong sweet make hard make weak scared baby dream need time make beautiful team beautiful wait dawn mind rip heart torne love strong sweet love bitter take neat love strong yeah])0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
(156827, [madness, time, Rock, en, When I consider how my life is spent\\nThey cry but I know that everything is heaven sent\\nBut why do people lie when all they want is truth at their tables\\n\\nThe sky is looking very blue today\\nIt's so fine I'd agree with every word you say about it\\nIt must be said it doesn't mean much at all\\n\\nTime, you make changes\\nTime, you're that ages\\nTime is walking with you by your side\\n\\nThere's no reason that we should complain\\nIn the night everybody looks the same\\nIt's so sad but we don't seem to be going forward at all\\n\\nTime, time, time, time\\n\\nTime, you make changes\\nTime, you're that ages\\nTime is walking with you by your side\\nTime is walking with you by your side\\n\\nCome on time\\n\\nTime, you make changes\\nTime, you're that ages\\nTime is walking with you by your side\\n\\nTime, gentlemen please time\\nTime, consider life spend cry know everything heaven send people lie want truth table sky look blue today fine i d agree every word say must say do not mean much time make change time age time walk side there s reason complain night everybody look sad do not seem go forward time time time time time make change time age time walk side time walk side come time time make change time age time walk side time gentleman please time time])0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
(240798, [the monkees, dream world, Rock, en, Walking around with your head in the clouds\\nYou don't care\\nWhy must you walk when you know \\nYou're not going anywhere\\n\\n[Chorus]\\nWhy don't you come out of your dream world\\nIt's not real\\nIt's not the way it seems to be\\nWhy don't you come into the real world\\nCome with me\\nWe'll share our thoughts, forget the dreams\\nYou'll see\\n\\nAlways pretending that everything's fine \\nWhen it's not\\nWhy must you lie when you know that \\nYou'll always get caught\\n\\n[Chorus x2]\\n\\nYou'll see\\nYou'll see, walk around head cloud do not care must walk know go anywhere chorus do not come dream world real way seem do not come real world come well share thought forget dream you ll see always pretend everything fine must lie know you ll always get catch chorus you ll see you ll see])0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
..................................................................
(252175, [ruben studdard, can i get your attention, R&B, en, Kelly give me the ball girl\\n\\nYou are my girl now, light skinned with the pretty toes now,\\nAnd if you got a csrub now let him go now, life is too short\\nTo settle now, c'mon now, in the club now,\\nHold up the duckin with the po po now,\\nGet that money anyway now, and if you work\\nHard you gotta play now, c'mon now.\\n\\n[Chorus]\\nCan I get your attention for a minute,\\nYou need to know that we gotta live it up girl (live it up)\\n(c'mon now) and all of my thugs (c'mon now (oh))\\nTo everybody who be livin it up in the hood,\\nOn the block, everywhere in the club gurl and alll of my thugs.\\n\\nTo everyone in the east (give it up)\\nIn the west (give it up) up north (give it up)\\nDown south (give it up),\\nTo the ballers shot callers, everyhood its all good.\\n\\nWent accros the coast now, the 205 got my back now,\\nA brand new ride that I got now,\\nOne's on my feet good to go now,\\n(c'mon now) having fun (now)\\nMe and my crew in the club now and the girls\\nShow me love now, Friday, Saturday, Sunday, now c'mon now.\\n\\n[Chorus]\\n\\nFrom the 205 to the 228,\\nFrom the NYC all the way back to L.A,\\nCome out and play we gonna live it up,\\nTo lil momma who hold me down I gotta give it up,\\nTo my thugs out there hustlein I hope you lve it up,\\nIn the club spinnin the bucks like\\nYou prince and stuff, every what what what,\\nHolla at ya boy pretty tony and big rub we both\\nFrom the dirty dirty you know how we do.\\n\\nOh ohhh yeah, so when you hear this on the radio\\nPlay it loud in your stario (your stero)\\nCome and show ya boy some love,\\nCome show a playa what you got,\\nKeep movin it don't you stop all my fellas get on the floor.\\nGive the girls what they lookin for,\\nAnd its all about havin fun and we won't stop until the sun comes up.\\n\\n[Chorus: x2], kelly give ball girl girl light skin pretty toe get csrub let go life short settle cmon club hold duckin po po get money anyway work hard get to play cmon chorus get attention minute need know get to live girl live cmon thugs cmon oh everybody livin hood block everywhere club gurl alll thug everyone east give west give north give south give baller shoot caller everyhood good go accros coast get back brand new ride get one foot good go cmon fun crew club girl show love friday saturday sunday cmon chorus nyc way back la come play go to live lil momma hold get to give thug hustlein hope lve club spinnin buck like prince stuff every holla ya boy pretty tony big rub dirty dirty know oh ohhh yeah hear radio play loud stario stero come show ya boy love come show playa got keep movin do not stop fella get floor give girl lookin havin fun will not stop sun come chorus])0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
(163146, [amel larrieux, congo, R&B, en, Congo, get to, get to Congo\\nGet to, get to Congo\\nGet to, get to Congo\\nGet to, get to Congo\\n\\nNow could we go to the square they call Congo?\\nI need to go and lay my feet upon the stone\\nWhere the first of us stood before, before, before\\n\\nWhere we sat and played to revive our depleted souls\\nWhere we went to forget our freedom was not our own\\nWhere we went to hold onto the memories of way back home\\n\\nNow could we?\\nNow could we?\\nNow could we go?\\n\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\n\\nNow could we go to the square they call Congo?\\nI need to go and lay my feet upon the stones\\nWhere the first of us stood before, before, before\\n\\nWhere we made music in remembrance of human bodies sold\\nWhere the sound of an old pain became a new music of hope\\nWhere they paved me a road so I could get to Congo\\n\\nNow could we?\\nNow could we?\\nNow could we go?\\n\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\n\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\n\\nAnd I would not be here today if they had not been\\nSo displaced, so displaced, but still\\nBut still they made time to sing and play a song\\nA song in Congo\\n\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\n\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\n\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo\\nGet to Cong, get to Cong Congo\\nGet to Cong, get to Congo..., congo get get congo get get congo get get congo get get congo could go square call congo need go lay foot upon stone first we stand sit play revive deplete soul go forget freedom go hold onto memory way back home could could could go get cong get cong congo get cong get congo get cong get cong congo get cong get congo could go square call congo need go lay foot upon stone first we stand make music remembrance human body sell sound old pain become new music hope paved road could get congo could could could go get cong get cong congo get cong get congo get cong get cong congo get cong get congo get cong get cong congo get cong get congo get cong get cong congo get cong get congo would today displace displace still still make time sing play song song congo get cong get cong congo get cong get congo get cong get cong congo get cong get congo get cong get cong congo get cong get congo get cong get cong congo get cong get congo get cong get cong congo get cong get congo get cong get cong congo get cong get congo])0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
(257250, [jacob banks, sink or swim, R&B, en, head-first in the deep\\ni must sink or swim\\nlearning how to breathe\\ni'll end as i begin\\noh\\ni don't know if i can do it\\ndo it\\ni'm not sure if i can do it\\ndo it\\ni know i wanna do it\\ndo it\\nfuck it\\ni'm gonna do it\\ndo it\\ncause i've got a feeling\\ntelling me to do it\\noh god\\nwhat a feeling\\nand i know and i know and i know and i know that it's\\nstanding on the edge\\nwill i fall or fly?\\nif this is the end\\nat least i saw outside\\ni don't know if i can do it\\ndo it\\ni'm not sure if i can do it\\ndo it\\ni know i wanna do it\\ndo it\\nfuck it\\ni'm gonna do it\\ndo it\\ncause i've got a feeling\\ntelling me to do it\\noh god\\nwhat a feeling\\nand i know and i know and i know and i know that it's\\ndays are long\\nthe night is young\\neven when we crash and burn\\noh\\nsomeday our story comes to an end\\ni'm still here\\ncould it be faith or could it be fortune?\\ni don't know the answer but i gotta try\\ncause i've got a feeling\\ntelling me to do it\\noh god\\nwhat a feeling\\nand i know and i know and i know and i know that it's\\ndeep inside of me\\ndeep inside of me\\ndeep inside of me\\nand i know and i know and i know and i know that it's deep inside of me\\nhead-first in the deep\\ni must sink or swim, headfirst deep must sink swim learn breathe ill end begin oh do not know sure know wanna fuck go to cause I ve get feel tell oh god feeling know know know know stand edge fall fly end least see outside do not know sure know wanna fuck go to cause I ve get feel tell oh god feeling know know know know day long night young even crash burn oh someday story come end still could faith could fortune do not know answer get to try cause I ve get feel tell oh god feeling know know know know deep inside deep inside deep inside know know know know deep inside headfirst deep must sink swim])0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
(270091, [az yet, every little bit of my heart, R&B, en, \\ngirl\\ni always think about it 24/7\\ncan't get it outta my mind\\ni want so bad to be with you\\nbaby\\nwhen i dream about it\\nit feels like heaven\\nwhy you gotta be so fine\\ni want so bad to be in you\\ni'm longing girl to tell you\\n'bout the visions in my head\\nyou never will be lonely\\nlong as roses are red\\ngirl\\nyou should never worry\\nmy intentionms\\nthey are true\\nthe only thing that matters is you\\n'cause i love you\\nand i want you\\nand i can't be without\\ni swear there's no doubt\\nthat i need you\\ni adore you\\nevery inch\\nevery part\\nwith every little bit of my heart\\nbaby\\ni can think of 'bout a million reasons\\nwhy you gotta be with me\\nmy lovin' flows like endlessly\\nbaby we can go about a zillion places\\nwhat's your fantasy\\njust ask and girl\\nyou shall receive\\ni want so much to tell you\\ngirl\\nyou'll never hurt again\\nlong as london bridge keeps falling\\ngirl\\njust know i got your back\\ngirl\\nthere's just no rhyme\\nno reason\\n'bout the way i feel 'bout you\\nthe only thing that matters is you\\n'cause i fell in love the day\\nbaby\\nyou came into my life\\ni've never been in love before\\nchanging everything i think about\\nand i don't want nobody else to have you,, girl always think can not get outta mind want bad baby dream feel like heaven get to fine want bad longing girl tell bout vision head never lonely long rose red girl never worry intentionms true thing matter cause love want can not without swear there s doubt need adore every inch every part every little bit heart baby think bout million reason get to lovin flow like endlessly baby go zillion place what s fantasy ask girl shall receive want much tell girl you ll never hurt long london bridge keep fall girl know get back girl there s rhyme reason bout way feel bout thing matter cause fall love day baby come life I ve never love change everything think do not want nobody else])0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
(141603, [live, we walk in the dream, R&B, en, I was wrong \\nWe are not these bodies alone \\nIn a dream with a mind of its own \\nI I know you long \\nTo be tethered and tied to the heart of the One \\nThe one that be layin' you down \\nOn that ancient ground \\nNothin' more to be found \\nAh yeah \\n\\n[Chorus:]\\nNow we walk in the dream \\nBut dream no more \\nTo live a life in love the only thing in \\nCommon was \\nWe knew there was somethin' more \\nNow we walk in the dream \\nBut dream no more \\nTo live a life in love \\nTo hear the voice in the wilderness \\nThe other side of the door \\nSomethin' more \\n\\nI feel strong \\nI'm finally at peace \\nThe war is all gone \\nBy no cause of my own \\nLike an eagle cuts through the air \\nNo time for fear \\nFaith in his wings takes him there \\n\\n[Chorus]\\nNow we walk in the dream \\nBut dream no more \\nTo live a life in love the only thing in \\nCommon was \\nWe knew there was somethin' more \\nNow we walk in the dream \\nBut dream no more \\nTo live a life in love \\nTo hear the voice in the wilderness \\nThe other side of the door \\nSomethin' more\\n\\nMore than these \\nWomen in the streets pullin' out their hair \\nSomethin' more \\nSomethin' more than all the pain we feel \\nSomethin' more \\nMore than we could dream of, all light \\nLove-bliss, no death and no fear \\nNo fear\\n\\n[Chorus]\\nNow we walk in the dream \\nBut dream no more \\nTo live a life in love the only thing in common was \\nWe knew there was somethin' more \\nNow we walk in the dream \\nBut dream no more \\nTo live a life in love \\nTo hear the voice in the wilderness \\nThe other side of the door \\nSomethin' more \\n\\nI was wrong \\nOh yeah, I was wrong, wrong body alone dream mind know long tethered tie heart one one layin ancient ground nothin find ah yeah chorus walk dream dream live life love thing common know somethin walk dream dream live life love hear voice wilderness side door somethin feel strong finally peace war go cause like eagle cut air time fear faith wing take chorus walk dream dream live life love thing common know somethin walk dream dream live life love hear voice wilderness side door somethin woman street pullin hair somethin somethin pain feel somethin could dream light lovebliss death fear fear chorus walk dream dream live life love thing common know somethin walk dream dream live life love hear voice wilderness side door somethin wrong oh yeah wrong])0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

7200 rows × 31855 columns

\n", + "
" + ], + "text/plain": [ + " aa aaa \\\n", + "(267372, [berry, chuck, every day i have the bl... 0.0 0.0 \n", + "(176056, [april wine, all over town, Rock, en, ... 0.0 0.0 \n", + "(49158, [the rolling stones, love is strong, Ro... 0.0 0.0 \n", + "(156827, [madness, time, Rock, en, When I consi... 0.0 0.0 \n", + "(240798, [the monkees, dream world, Rock, en, W... 0.0 0.0 \n", + "... ... ... \n", + "(252175, [ruben studdard, can i get your attent... 0.0 0.0 \n", + "(163146, [amel larrieux, congo, R&B, en, Congo,... 0.0 0.0 \n", + "(257250, [jacob banks, sink or swim, R&B, en, h... 0.0 0.0 \n", + "(270091, [az yet, every little bit of my heart,... 0.0 0.0 \n", + "(141603, [live, we walk in the dream, R&B, en, ... 0.0 0.0 \n", + "\n", + " aaaaaaaaaahhhhhhhhhhhhh \\\n", + "(267372, [berry, chuck, every day i have the bl... 0.0 \n", + "(176056, [april wine, all over town, Rock, en, ... 0.0 \n", + "(49158, [the rolling stones, love is strong, Ro... 0.0 \n", + "(156827, [madness, time, Rock, en, When I consi... 0.0 \n", + "(240798, [the monkees, dream world, Rock, en, W... 0.0 \n", + "... ... \n", + "(252175, [ruben studdard, can i get your attent... 0.0 \n", + "(163146, [amel larrieux, congo, R&B, en, Congo,... 0.0 \n", + "(257250, [jacob banks, sink or swim, R&B, en, h... 0.0 \n", + "(270091, [az yet, every little bit of my heart,... 0.0 \n", + "(141603, [live, we walk in the dream, R&B, en, ... 0.0 \n", + "\n", + " aaaaahaaaaaheeey aaaaass \\\n", + "(267372, [berry, chuck, every day i have the bl... 0.0 0.0 \n", + "(176056, [april wine, all over town, Rock, en, ... 0.0 0.0 \n", + "(49158, [the rolling stones, love is strong, Ro... 0.0 0.0 \n", + "(156827, [madness, time, Rock, en, When I consi... 0.0 0.0 \n", + "(240798, [the monkees, dream world, Rock, en, W... 0.0 0.0 \n", + "... ... ... \n", + "(252175, [ruben studdard, can i get your attent... 0.0 0.0 \n", + "(163146, [amel larrieux, congo, R&B, en, Congo,... 0.0 0.0 \n", + "(257250, [jacob banks, sink or swim, R&B, en, h... 0.0 0.0 \n", + "(270091, [az yet, every little bit of my heart,... 0.0 0.0 \n", + "(141603, [live, we walk in the dream, R&B, en, ... 0.0 0.0 \n", + "\n", + " aaaah aaaarmagedoooon \\\n", + "(267372, [berry, chuck, every day i have the bl... 0.0 0.0 \n", + "(176056, [april wine, all over town, Rock, en, ... 0.0 0.0 \n", + "(49158, [the rolling stones, love is strong, Ro... 0.0 0.0 \n", + "(156827, [madness, time, Rock, en, When I consi... 0.0 0.0 \n", + "(240798, [the monkees, dream world, Rock, en, W... 0.0 0.0 \n", + "... ... ... \n", + "(252175, [ruben studdard, can i get your attent... 0.0 0.0 \n", + "(163146, [amel larrieux, congo, R&B, en, Congo,... 0.0 0.0 \n", + "(257250, [jacob banks, sink or swim, R&B, en, h... 0.0 0.0 \n", + "(270091, [az yet, every little bit of my heart,... 0.0 0.0 \n", + "(141603, [live, we walk in the dream, R&B, en, ... 0.0 0.0 \n", + "\n", + " aaaay aaaaye aaah ... \\\n", + "(267372, [berry, chuck, every day i have the bl... 0.0 0.0 0.0 ... \n", + "(176056, [april wine, all over town, Rock, en, ... 0.0 0.0 0.0 ... \n", + "(49158, [the rolling stones, love is strong, Ro... 0.0 0.0 0.0 ... \n", + "(156827, [madness, time, Rock, en, When I consi... 0.0 0.0 0.0 ... \n", + "(240798, [the monkees, dream world, Rock, en, W... 0.0 0.0 0.0 ... \n", + "... ... ... ... ... \n", + "(252175, [ruben studdard, can i get your attent... 0.0 0.0 0.0 ... \n", + "(163146, [amel larrieux, congo, R&B, en, Congo,... 0.0 0.0 0.0 ... \n", + "(257250, [jacob banks, sink or swim, R&B, en, h... 0.0 0.0 0.0 ... \n", + "(270091, [az yet, every little bit of my heart,... 0.0 0.0 0.0 ... \n", + "(141603, [live, we walk in the dream, R&B, en, ... 0.0 0.0 0.0 ... \n", + "\n", + " zuzu zwei zweier zy \\\n", + "(267372, [berry, chuck, every day i have the bl... 0.0 0.0 0.0 0.0 \n", + "(176056, [april wine, all over town, Rock, en, ... 0.0 0.0 0.0 0.0 \n", + "(49158, [the rolling stones, love is strong, Ro... 0.0 0.0 0.0 0.0 \n", + "(156827, [madness, time, Rock, en, When I consi... 0.0 0.0 0.0 0.0 \n", + "(240798, [the monkees, dream world, Rock, en, W... 0.0 0.0 0.0 0.0 \n", + "... ... ... ... ... \n", + "(252175, [ruben studdard, can i get your attent... 0.0 0.0 0.0 0.0 \n", + "(163146, [amel larrieux, congo, R&B, en, Congo,... 0.0 0.0 0.0 0.0 \n", + "(257250, [jacob banks, sink or swim, R&B, en, h... 0.0 0.0 0.0 0.0 \n", + "(270091, [az yet, every little bit of my heart,... 0.0 0.0 0.0 0.0 \n", + "(141603, [live, we walk in the dream, R&B, en, ... 0.0 0.0 0.0 0.0 \n", + "\n", + " zydeco ⺠ã¼ber ïs \\\n", + "(267372, [berry, chuck, every day i have the bl... 0.0 0.0 0.0 0.0 \n", + "(176056, [april wine, all over town, Rock, en, ... 0.0 0.0 0.0 0.0 \n", + "(49158, [the rolling stones, love is strong, Ro... 0.0 0.0 0.0 0.0 \n", + "(156827, [madness, time, Rock, en, When I consi... 0.0 0.0 0.0 0.0 \n", + "(240798, [the monkees, dream world, Rock, en, W... 0.0 0.0 0.0 0.0 \n", + "... ... ... ... ... \n", + "(252175, [ruben studdard, can i get your attent... 0.0 0.0 0.0 0.0 \n", + "(163146, [amel larrieux, congo, R&B, en, Congo,... 0.0 0.0 0.0 0.0 \n", + "(257250, [jacob banks, sink or swim, R&B, en, h... 0.0 0.0 0.0 0.0 \n", + "(270091, [az yet, every little bit of my heart,... 0.0 0.0 0.0 0.0 \n", + "(141603, [live, we walk in the dream, R&B, en, ... 0.0 0.0 0.0 0.0 \n", + "\n", + " órale ømnis \n", + "(267372, [berry, chuck, every day i have the bl... 0.0 0.0 \n", + "(176056, [april wine, all over town, Rock, en, ... 0.0 0.0 \n", + "(49158, [the rolling stones, love is strong, Ro... 0.0 0.0 \n", + "(156827, [madness, time, Rock, en, When I consi... 0.0 0.0 \n", + "(240798, [the monkees, dream world, Rock, en, W... 0.0 0.0 \n", + "... ... ... \n", + "(252175, [ruben studdard, can i get your attent... 0.0 0.0 \n", + "(163146, [amel larrieux, congo, R&B, en, Congo,... 0.0 0.0 \n", + "(257250, [jacob banks, sink or swim, R&B, en, h... 0.0 0.0 \n", + "(270091, [az yet, every little bit of my heart,... 0.0 0.0 \n", + "(141603, [live, we walk in the dream, R&B, en, ... 0.0 0.0 \n", + "\n", + "[7200 rows x 31855 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "53b6e62c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "GradientBoostingClassifier(n_estimators=500, random_state=123)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Processing for models\n", + "matrix['target'] = LabelEncoder().fit_transform(rand[\"Genre\"])\n", + "target = matrix['target']\n", + "\n", + "X_train,X_test,y_train,y_test = train_test_split(X, target, test_size=0.2, stratify = target)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "bc0cdc8e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Gradient Boosting accuracy score 39.72 %\n", + "\n" + ] + } + ], + "source": [ + "# Gradient Boosting Classifier\n", + "\n", + "gbmodel = GradientBoostingClassifier(n_estimators=500, random_state=123)\n", + "gbmodel.fit(X_train, y_train)\n", + "print(\"Gradient Boosting accuracy score {:.2f} %\\n\".format(gbmodel.score(X_test,y_test)*100))" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "fb01316c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random forest accuracy score 41.39 %\n", + "\n" + ] + } + ], + "source": [ + "# Random Forest Classifier\n", + "\n", + "rfmodel = RandomForestClassifier()\n", + "rfmodel.fit(X_train, y_train)\n", + "print(\"Random forest accuracy score {:.2f} %\\n\".format(rfmodel.score(X_test,y_test)*100))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "c4ae16e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Naive Bayes accuracy score 40.83 %\n", + "\n" + ] + } + ], + "source": [ + "# Naive Bayes Classifier\n", + "\n", + "nbmodel = MultinomialNB()\n", + "nbmodel.fit(X_train.toarray(), y_train)\n", + "print(\"Naive Bayes accuracy score {:.2f} %\\n\".format(nbmodel.score(X_test.toarray(),y_test)*100))" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "8ed9a8a3", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'learning_rate': 0.01, 'max_depth': 8, 'max_features': 'sqrt', 'n_estimators': 10}\n" + ] + } + ], + "source": [ + "# Grid search to improve model performance (Gradient Boost)\n", + "\n", + "parameters = {\n", + " \"learning_rate\": [0.01, 0.025, 0.05, 0.1, 0.2],\n", + " \"max_depth\":[3,5,8],\n", + " \"max_features\":[\"log2\",\"sqrt\"],\n", + " \"n_estimators\":[10]\n", + " }\n", + "\n", + "gcv = GridSearchCV(gbmodel,parameters)\n", + "gcv.fit(X_train, y_train.values.ravel())\n", + "print(gcv.best_params_)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "b846f7d0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'max_depth': 20, 'n_estimators': 100}\n" + ] + } + ], + "source": [ + "# Grid search to improve model performance (Random Forest)\n", + "\n", + "parameters = {\n", + " 'n_estimators': [5,50,100],\n", + " 'max_depth': [2,10,20,None]\n", + "}\n", + "\n", + "rcv = GridSearchCV(rfmodel,parameters)\n", + "rcv.fit(X_train, y_train.values.ravel())\n", + "print(rcv.best_params_)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "43738707", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'alpha': 0.1}\n" + ] + } + ], + "source": [ + "# Grid search to improve model performance (Naive Bayes)\n", + "\n", + "parameters ={'alpha': [0.00001, 0.001, 0.1, 1, 10, 100]}\n", + "\n", + "ncv = GridSearchCV(nbmodel,parameters)\n", + "ncv.fit(X_train, y_train.values.ravel())\n", + "print(ncv.best_params_)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "52e84659", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Gradient Boosting accuracy score 35.62 %\n", + "\n" + ] + } + ], + "source": [ + "# Optimized Gradient Boosting Classifier\n", + "\n", + "ogbmodel = GradientBoostingClassifier(learning_rate = 0.01, max_depth= 8, max_features= 'sqrt', n_estimators = 10)\n", + "ogbmodel.fit(X_train, y_train)\n", + "print(\"Gradient Boosting accuracy score {:.2f} %\\n\".format(ogbmodel.score(X_test,y_test)*100))" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "c08221b3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random forest accuracy score 40.42 %\n", + "\n" + ] + } + ], + "source": [ + "# Optimized Random Forest Classifier\n", + "\n", + "orfmodel = RandomForestClassifier(max_depth= 20, n_estimators= 100)\n", + "orfmodel.fit(X_train, y_train)\n", + "print(\"Random forest accuracy score {:.2f} %\\n\".format(orfmodel.score(X_test,y_test)*100))" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "b02027ad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Naive Bayes accuracy score 41.88 %\n", + "\n" + ] + } + ], + "source": [ + "# Optimized Naive Bayes Classifier\n", + "\n", + "onbmodel = MultinomialNB(alpha=0.1)\n", + "onbmodel.fit(X_train.toarray(), y_train)\n", + "print(\"Naive Bayes accuracy score {:.2f} %\\n\".format(onbmodel.score(X_test.toarray(),y_test)*100))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1737e376", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'nbmodel' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [9]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#Graph results\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[1;32m 4\u001b[0m graph \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(\n\u001b[1;32m 5\u001b[0m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mName\u001b[39m\u001b[38;5;124m'\u001b[39m: [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMultinomial NB\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGrid Search Multinomial NB\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGradient Booster\u001b[39m\u001b[38;5;124m'\u001b[39m, \n\u001b[1;32m 6\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGrid Search Gradient Booster\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mRandom Forest\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGrid Search Random Forest\u001b[39m\u001b[38;5;124m'\u001b[39m], \n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAccuracy\u001b[39m\u001b[38;5;124m'\u001b[39m: [\u001b[43mnbmodel\u001b[49m\u001b[38;5;241m.\u001b[39mscore(X_test\u001b[38;5;241m.\u001b[39mtoarray(),y_test), onbmodel\u001b[38;5;241m.\u001b[39mscore(X_test\u001b[38;5;241m.\u001b[39mtoarray(),y_test), gbmodel\u001b[38;5;241m.\u001b[39mscore(X_test,y_test), ogbmodel\u001b[38;5;241m.\u001b[39mscore(X_test,y_test), rfmodel\u001b[38;5;241m.\u001b[39mscore(X_test,y_test), orfmodel\u001b[38;5;241m.\u001b[39mscore(X_test,y_test)]}\n\u001b[1;32m 8\u001b[0m )\n\u001b[1;32m 10\u001b[0m plt\u001b[38;5;241m.\u001b[39mbar(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mName\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccuracy\u001b[39m\u001b[38;5;124m\"\u001b[39m, data \u001b[38;5;241m=\u001b[39m graph, color\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblue\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgreen\u001b[39m\u001b[38;5;124m\"\u001b[39m,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblue\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgreen\u001b[39m\u001b[38;5;124m\"\u001b[39m,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblue\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgreen\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 11\u001b[0m plt\u001b[38;5;241m.\u001b[39mxlabel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mNameError\u001b[0m: name 'nbmodel' is not defined" + ] + } + ], + "source": [ + "#Graph results\n", + "import matplotlib.pyplot as plt\n", + "\n", + "graph = pd.DataFrame(\n", + " {'Name': ['Multinomial NB', 'Grid Search Multinomial NB', 'Gradient Booster', \n", + " 'Grid Search Gradient Booster', 'Random Forest', 'Grid Search Random Forest'], \n", + " 'Accuracy': [nbmodel.score(X_test.toarray(),y_test), onbmodel.score(X_test.toarray(),y_test), gbmodel.score(X_test,y_test), ogbmodel.score(X_test,y_test), rfmodel.score(X_test,y_test), orfmodel.score(X_test,y_test)]}\n", + ")\n", + "\n", + "plt.bar(\"Name\", \"Accuracy\", data = graph, color=[\"blue\", \"green\",\"blue\", \"green\",\"blue\", \"green\"])\n", + "plt.xlabel(\"Model\")\n", + "plt.ylabel(\"Accuracy\")\n", + "plt.xticks(rotation = 90)\n", + "plt.axhline(y= 0.125, linewidth=1, color=\"k\")\n", + "plt.title(\"Top 3 models final performance\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1071d4ed", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}