diff --git a/LUCAS/notebooks/neuralnet_comparison.ipynb b/LUCAS/notebooks/neuralnet_comparison.ipynb new file mode 100644 index 0000000..9a6a1ed --- /dev/null +++ b/LUCAS/notebooks/neuralnet_comparison.ipynb @@ -0,0 +1,1915 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Comparison of neural Models\n", + "This notebook creates a comparison of how our neural models perform. We will try each model with and without word embeddings, and produce visualisations of model performance. \n", + "\n", + "# Feed-Forward Neural Networks\n", + "First we will find this difference for Feed-Forward Neural Networks (FFNN):" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from tensorflow.keras import Sequential\n", + "from tensorflow.keras.layers import Conv2D, Dense, Dropout, Embedding, Flatten, LSTM, MaxPooling2D\n", + "from tensorflow.keras.activations import relu, sigmoid\n", + "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", + "from tensorflow.keras.preprocessing.text import text_to_word_sequence, Tokenizer\n", + "from tensorflow.keras.regularizers import l2\n", + "from tensorflow.keras.callbacks import EarlyStopping\n", + "from tensorflow.keras.utils import to_categorical\n", + "from scripts import training_helpers\n", + "from sklearn.model_selection import train_test_split, StratifiedKFold\n", + "\n", + "from gensim.models import KeyedVectors\n", + "from seaborn import boxplot\n", + "from pandas import DataFrame" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will cross validate our model, so lets create a function to handle this for us. It will use StratifiedKFold splitting:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def run_cross_validate(get_model, X, y, cv=5, categorical=False, add_target_dim=False):\n", + " skfSplitter = StratifiedKFold(n_splits=cv, shuffle=True)\n", + " metrics = {\n", + " \"accuracies\": [],\n", + " }\n", + " \n", + " for train_indices, test_indices in skfSplitter.split(X, y):\n", + " training_X = np.array([X[x] for x in train_indices])\n", + " training_y = np.array([y[x] for x in train_indices])\n", + " test_X = np.array([X[x] for x in test_indices])\n", + " test_y = np.array([y[x] for x in test_indices])\n", + " \n", + " if categorical:\n", + " training_y = to_categorical(training_y)\n", + " test_y = to_categorical(test_y)\n", + " if add_target_dim:\n", + " training_y = np.array([[y] for y in training_y])\n", + " test_y = np.array([[y] for y in test_y])\n", + " \n", + " model = get_model()\n", + " print(\"Fitting with: \", np.array(training_X).shape, \"labels\", np.array(training_y).shape)\n", + " model.fit(np.array(training_X), training_y, epochs=12, batch_size=16, validation_split=0.3,\n", + " callbacks=[EarlyStopping(monitor='val_loss', patience=4)])\n", + " metrics[\"accuracies\"].append(model.evaluate(np.array(test_X), test_y)[1])\n", + " return metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First we find results for our Bag of Words (BoW) model:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "data_frame = training_helpers.get_data_frame()\n", + "\n", + "predictors_raw = data_frame['review']\n", + "num_words = 20000\n", + "\n", + "tokenizer = Tokenizer(num_words)\n", + "tokenizer.fit_on_texts(predictors_raw)\n", + "bow_predictors = tokenizer.texts_to_matrix(predictors_raw, mode='tfidf')\n", + "labels = [x for x in data_frame['deceptive']]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting with: (1440, 20000) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6841 - acc: 0.7299 - val_loss: 0.4919 - val_acc: 0.8753\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 979us/step - loss: 0.3207 - acc: 0.9533 - val_loss: 0.4375 - val_acc: 0.8845\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 954us/step - loss: 0.2247 - acc: 0.9861 - val_loss: 0.4551 - val_acc: 0.8684\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 846us/step - loss: 0.1878 - acc: 0.9921 - val_loss: 0.4220 - val_acc: 0.8799\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 996us/step - loss: 0.1701 - acc: 0.9921 - val_loss: 0.4326 - val_acc: 0.8845\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1619 - acc: 0.9960 - val_loss: 0.4388 - val_acc: 0.8707\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 942us/step - loss: 0.1410 - acc: 0.9970 - val_loss: 0.4325 - val_acc: 0.8776\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 995us/step - loss: 0.1366 - acc: 0.9950 - val_loss: 0.4526 - val_acc: 0.8730\n", + "160/160 [==============================] - 0s 289us/step\n", + "Fitting with: (1440, 20000) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7056 - acc: 0.7279 - val_loss: 0.5265 - val_acc: 0.8868\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 829us/step - loss: 0.3381 - acc: 0.9434 - val_loss: 0.4575 - val_acc: 0.8799\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 990us/step - loss: 0.2363 - acc: 0.9831 - val_loss: 0.4420 - val_acc: 0.8915\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1894 - acc: 0.9960 - val_loss: 0.4378 - val_acc: 0.8915\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 924us/step - loss: 0.1702 - acc: 0.9940 - val_loss: 0.4364 - val_acc: 0.8845\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 983us/step - loss: 0.1517 - acc: 0.9980 - val_loss: 0.4367 - val_acc: 0.8915\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 807us/step - loss: 0.1410 - acc: 0.9940 - val_loss: 0.4323 - val_acc: 0.8730\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 904us/step - loss: 0.1376 - acc: 0.9960 - val_loss: 0.4444 - val_acc: 0.8776\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 816us/step - loss: 0.1390 - acc: 0.9980 - val_loss: 0.4466 - val_acc: 0.8845\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 1s 804us/step - loss: 0.1469 - acc: 0.9921 - val_loss: 0.4863 - val_acc: 0.8661\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 1s 843us/step - loss: 0.1403 - acc: 0.9960 - val_loss: 0.4829 - val_acc: 0.8730\n", + "160/160 [==============================] - 0s 227us/step\n", + "Fitting with: (1440, 20000) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7498 - acc: 0.7071 - val_loss: 0.5638 - val_acc: 0.8799\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 815us/step - loss: 0.4170 - acc: 0.9126 - val_loss: 0.4938 - val_acc: 0.8499\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 840us/step - loss: 0.2676 - acc: 0.9722 - val_loss: 0.4810 - val_acc: 0.8637\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.2140 - acc: 0.9891 - val_loss: 0.4444 - val_acc: 0.8799\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1829 - acc: 0.9950 - val_loss: 0.4518 - val_acc: 0.8822\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 975us/step - loss: 0.1756 - acc: 0.9901 - val_loss: 0.4546 - val_acc: 0.8753\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1630 - acc: 0.9960 - val_loss: 0.4691 - val_acc: 0.8845\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1602 - acc: 0.9970 - val_loss: 0.4792 - val_acc: 0.8891\n", + "160/160 [==============================] - 0s 255us/step\n", + "Fitting with: (1440, 20000) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 3s 3ms/step - loss: 0.6894 - acc: 0.7358 - val_loss: 0.5138 - val_acc: 0.8545\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.3196 - acc: 0.9523 - val_loss: 0.4592 - val_acc: 0.8799\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.2174 - acc: 0.9891 - val_loss: 0.4649 - val_acc: 0.8637\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1838 - acc: 0.9940 - val_loss: 0.4599 - val_acc: 0.8707\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.1663 - acc: 0.9960 - val_loss: 0.4437 - val_acc: 0.8799\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1553 - acc: 0.9950 - val_loss: 0.4822 - val_acc: 0.8614\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1593 - acc: 0.9950 - val_loss: 0.5086 - val_acc: 0.8637\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1509 - acc: 0.9970 - val_loss: 0.4692 - val_acc: 0.8730\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 987us/step - loss: 0.1471 - acc: 0.9940 - val_loss: 0.4755 - val_acc: 0.8753\n", + "160/160 [==============================] - 0s 206us/step\n", + "Fitting with: (1440, 20000) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7318 - acc: 0.7080 - val_loss: 0.5465 - val_acc: 0.8753\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 927us/step - loss: 0.3468 - acc: 0.9643 - val_loss: 0.4573 - val_acc: 0.8822\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 916us/step - loss: 0.2312 - acc: 0.9921 - val_loss: 0.4483 - val_acc: 0.8753\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 728us/step - loss: 0.1951 - acc: 0.9960 - val_loss: 0.4571 - val_acc: 0.8753\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 779us/step - loss: 0.1763 - acc: 0.9960 - val_loss: 0.4494 - val_acc: 0.8753\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1657 - acc: 0.9990 - val_loss: 0.4440 - val_acc: 0.8822\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 977us/step - loss: 0.1584 - acc: 0.9980 - val_loss: 0.4273 - val_acc: 0.8799\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1548 - acc: 0.9960 - val_loss: 0.4505 - val_acc: 0.8730\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1544 - acc: 0.9921 - val_loss: 0.4302 - val_acc: 0.8938\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1580 - acc: 0.9940 - val_loss: 0.4619 - val_acc: 0.8707\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1466 - acc: 0.9980 - val_loss: 0.4619 - val_acc: 0.8730\n", + "160/160 [==============================] - 0s 225us/step\n", + "Fitting with: (1440, 20000) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 3s 3ms/step - loss: 0.7268 - acc: 0.6941 - val_loss: 0.5627 - val_acc: 0.8776\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 837us/step - loss: 0.3848 - acc: 0.9325 - val_loss: 0.4620 - val_acc: 0.8822\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 972us/step - loss: 0.2405 - acc: 0.9881 - val_loss: 0.4566 - val_acc: 0.8868\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 873us/step - loss: 0.1955 - acc: 0.9970 - val_loss: 0.4575 - val_acc: 0.8845\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1771 - acc: 0.9940 - val_loss: 0.4582 - val_acc: 0.8915\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 920us/step - loss: 0.1674 - acc: 0.9940 - val_loss: 0.4646 - val_acc: 0.8776\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 863us/step - loss: 0.1607 - acc: 0.9940 - val_loss: 0.4594 - val_acc: 0.8730\n", + "160/160 [==============================] - 0s 219us/step\n", + "Fitting with: (1440, 20000) labels (1440,)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7634 - acc: 0.6634 - val_loss: 0.5894 - val_acc: 0.8822\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.4324 - acc: 0.9166 - val_loss: 0.4643 - val_acc: 0.8938\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.2883 - acc: 0.9742 - val_loss: 0.4305 - val_acc: 0.8776\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.2227 - acc: 0.9841 - val_loss: 0.4194 - val_acc: 0.8938\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1930 - acc: 0.9911 - val_loss: 0.4176 - val_acc: 0.8799\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1785 - acc: 0.9911 - val_loss: 0.4234 - val_acc: 0.8707\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1646 - acc: 0.9940 - val_loss: 0.4333 - val_acc: 0.8822\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1583 - acc: 0.9960 - val_loss: 0.4301 - val_acc: 0.8730\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 965us/step - loss: 0.1568 - acc: 0.9940 - val_loss: 0.4599 - val_acc: 0.8707\n", + "160/160 [==============================] - 0s 195us/step\n", + "Fitting with: (1440, 20000) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 3s 3ms/step - loss: 0.7641 - acc: 0.6872 - val_loss: 0.5579 - val_acc: 0.8822\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.3955 - acc: 0.9355 - val_loss: 0.5036 - val_acc: 0.8522\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.2586 - acc: 0.9791 - val_loss: 0.4740 - val_acc: 0.8868\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.2081 - acc: 0.9921 - val_loss: 0.4843 - val_acc: 0.8845\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1824 - acc: 0.9921 - val_loss: 0.4691 - val_acc: 0.8730\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1643 - acc: 0.9970 - val_loss: 0.4535 - val_acc: 0.8707\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1504 - acc: 0.9980 - val_loss: 0.4490 - val_acc: 0.8891\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1529 - acc: 0.9950 - val_loss: 0.4477 - val_acc: 0.8753\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1434 - acc: 0.9940 - val_loss: 0.4441 - val_acc: 0.8776\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1440 - acc: 0.9950 - val_loss: 0.4579 - val_acc: 0.8707\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1499 - acc: 0.9940 - val_loss: 0.4817 - val_acc: 0.8684\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1531 - acc: 0.9940 - val_loss: 0.5193 - val_acc: 0.8661\n", + "160/160 [==============================] - 0s 230us/step\n", + "Fitting with: (1440, 20000) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 3s 3ms/step - loss: 0.7052 - acc: 0.7130 - val_loss: 0.5127 - val_acc: 0.8753\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 736us/step - loss: 0.3357 - acc: 0.9305 - val_loss: 0.4500 - val_acc: 0.8845\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 724us/step - loss: 0.2225 - acc: 0.9871 - val_loss: 0.4319 - val_acc: 0.8868\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 718us/step - loss: 0.1869 - acc: 0.9871 - val_loss: 0.4384 - val_acc: 0.8799\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 709us/step - loss: 0.1713 - acc: 0.9911 - val_loss: 0.4498 - val_acc: 0.8661\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 718us/step - loss: 0.1615 - acc: 0.9940 - val_loss: 0.4233 - val_acc: 0.8961\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 690us/step - loss: 0.1433 - acc: 0.9970 - val_loss: 0.4405 - val_acc: 0.8799\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 704us/step - loss: 0.1467 - acc: 0.9921 - val_loss: 0.4298 - val_acc: 0.8868\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 708us/step - loss: 0.1292 - acc: 0.9980 - val_loss: 0.4451 - val_acc: 0.8868\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 1s 715us/step - loss: 0.1257 - acc: 0.9950 - val_loss: 0.4153 - val_acc: 0.8799\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 1s 711us/step - loss: 0.1256 - acc: 0.9970 - val_loss: 0.4726 - val_acc: 0.8776\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 1s 687us/step - loss: 0.1233 - acc: 0.9970 - val_loss: 0.4450 - val_acc: 0.8637\n", + "160/160 [==============================] - 0s 161us/step\n", + "Fitting with: (1440, 20000) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7709 - acc: 0.6614 - val_loss: 0.6258 - val_acc: 0.8476\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 798us/step - loss: 0.4380 - acc: 0.9086 - val_loss: 0.4669 - val_acc: 0.8915\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 705us/step - loss: 0.2870 - acc: 0.9682 - val_loss: 0.4391 - val_acc: 0.8891\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 713us/step - loss: 0.2232 - acc: 0.9881 - val_loss: 0.4446 - val_acc: 0.8891\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 727us/step - loss: 0.1941 - acc: 0.9970 - val_loss: 0.4226 - val_acc: 0.8868\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 736us/step - loss: 0.1772 - acc: 0.9960 - val_loss: 0.4077 - val_acc: 0.8891\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 720us/step - loss: 0.1673 - acc: 0.9950 - val_loss: 0.4217 - val_acc: 0.8961\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 693us/step - loss: 0.1556 - acc: 0.9970 - val_loss: 0.4280 - val_acc: 0.8868\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 711us/step - loss: 0.1615 - acc: 0.9950 - val_loss: 0.4308 - val_acc: 0.8799\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 1s 697us/step - loss: 0.1561 - acc: 0.9990 - val_loss: 0.4547 - val_acc: 0.8845\n", + "160/160 [==============================] - 0s 180us/step\n" + ] + } + ], + "source": [ + "def get_ff_bow_model():\n", + " model = Sequential([\n", + " Dense(16, activation=relu, input_shape=(num_words,), kernel_regularizer=l2(0.01)),\n", + " Dropout(0.25),\n", + " Dense(8, activation=relu, kernel_regularizer=l2(0.01)),\n", + " Dense(1, activation=sigmoid)\n", + " ])\n", + " model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", + " return model\n", + "\n", + "ff_bow_scores = run_cross_validate(get_ff_bow_model, bow_predictors, labels, cv=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And for our word vector method. First we must create our word vectors using a word vectorizing model generated in another experiment:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "word_vectors = KeyedVectors.load(\"opspam_w2v.kv\", mmap=\"r\")\n", + "\n", + "predictors_sequences = pad_sequences(tokenizer.texts_to_sequences(predictors_raw))\n", + "max_sequence_length = max([len(x) for x in predictors_sequences])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "embedding_length = word_vectors.vector_size\n", + "\n", + "corpus_words = tokenizer.word_index\n", + "corpus_vocab_size = len(corpus_words)+1\n", + "vectorizer_words = word_vectors.wv\n", + "embedding_matrix = np.zeros((corpus_vocab_size, embedding_length))\n", + "for word, idx in corpus_words.items():\n", + " if word in vectorizer_words.vocab:\n", + " embedding_matrix[idx] = np.array(vectorizer_words[word], dtype=np.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 3s 3ms/step - loss: 0.9209 - acc: 0.4826 - val_loss: 0.8241 - val_acc: 0.5058\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7935 - acc: 0.5283 - val_loss: 0.7679 - val_acc: 0.5820\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7564 - acc: 0.5591 - val_loss: 0.7637 - val_acc: 0.5912\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7345 - acc: 0.5809 - val_loss: 0.7491 - val_acc: 0.5843\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7040 - acc: 0.6207 - val_loss: 0.7282 - val_acc: 0.5543\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6990 - acc: 0.6246 - val_loss: 0.7280 - val_acc: 0.6143\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6904 - acc: 0.6157 - val_loss: 0.7217 - val_acc: 0.6212\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6781 - acc: 0.6375 - val_loss: 0.7192 - val_acc: 0.6467\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6694 - acc: 0.6743 - val_loss: 0.7458 - val_acc: 0.6028\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6700 - acc: 0.6713 - val_loss: 0.7373 - val_acc: 0.6189\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6504 - acc: 0.6862 - val_loss: 0.7777 - val_acc: 0.5820\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6576 - acc: 0.6783 - val_loss: 0.7460 - val_acc: 0.6005\n", + "160/160 [==============================] - 0s 294us/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 4s 3ms/step - loss: 0.9513 - acc: 0.5084 - val_loss: 0.8599 - val_acc: 0.5081\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.8459 - acc: 0.5204 - val_loss: 0.8152 - val_acc: 0.4873\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7894 - acc: 0.4965 - val_loss: 0.7690 - val_acc: 0.5219\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7602 - acc: 0.5303 - val_loss: 0.7565 - val_acc: 0.5196\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7510 - acc: 0.5611 - val_loss: 0.7703 - val_acc: 0.5681\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7588 - acc: 0.5879 - val_loss: 0.7839 - val_acc: 0.5312\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7422 - acc: 0.6097 - val_loss: 0.7811 - val_acc: 0.5035\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7296 - acc: 0.6346 - val_loss: 0.7702 - val_acc: 0.5520\n", + "160/160 [==============================] - 0s 306us/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 4s 4ms/step - loss: 0.9359 - acc: 0.5144 - val_loss: 0.8700 - val_acc: 0.5612\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.8265 - acc: 0.6157 - val_loss: 0.8473 - val_acc: 0.5704\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7612 - acc: 0.6683 - val_loss: 0.8640 - val_acc: 0.5381\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7338 - acc: 0.6733 - val_loss: 0.8086 - val_acc: 0.6305\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7032 - acc: 0.6922 - val_loss: 0.8274 - val_acc: 0.5982\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6995 - acc: 0.6842 - val_loss: 0.8140 - val_acc: 0.6097\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6837 - acc: 0.7160 - val_loss: 0.8143 - val_acc: 0.6443\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6660 - acc: 0.7398 - val_loss: 0.8751 - val_acc: 0.5704\n", + "160/160 [==============================] - 0s 263us/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 4s 4ms/step - loss: 0.8829 - acc: 0.5204 - val_loss: 0.8026 - val_acc: 0.5520\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7805 - acc: 0.5323 - val_loss: 0.7737 - val_acc: 0.5774\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7674 - acc: 0.5482 - val_loss: 0.7544 - val_acc: 0.5912\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7479 - acc: 0.5670 - val_loss: 0.7511 - val_acc: 0.5450\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7369 - acc: 0.5929 - val_loss: 0.7486 - val_acc: 0.5912\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7181 - acc: 0.6077 - val_loss: 0.7357 - val_acc: 0.5912\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6895 - acc: 0.6465 - val_loss: 0.7435 - val_acc: 0.5820\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6723 - acc: 0.6594 - val_loss: 0.7476 - val_acc: 0.6259\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6898 - acc: 0.6415 - val_loss: 0.7639 - val_acc: 0.6259\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6794 - acc: 0.6693 - val_loss: 0.7798 - val_acc: 0.5797\n", + "160/160 [==============================] - 0s 335us/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 4s 4ms/step - loss: 0.8804 - acc: 0.5114 - val_loss: 0.8206 - val_acc: 0.5173\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.8086 - acc: 0.4955 - val_loss: 0.7912 - val_acc: 0.5289\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7808 - acc: 0.5134 - val_loss: 0.7772 - val_acc: 0.5751\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7533 - acc: 0.5432 - val_loss: 0.7505 - val_acc: 0.5658\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7456 - acc: 0.5482 - val_loss: 0.7338 - val_acc: 0.5958\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6944 - acc: 0.6216 - val_loss: 0.7413 - val_acc: 0.6051\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6902 - acc: 0.6495 - val_loss: 0.7583 - val_acc: 0.5958\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6705 - acc: 0.6604 - val_loss: 0.7632 - val_acc: 0.6443\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6630 - acc: 0.7021 - val_loss: 0.7724 - val_acc: 0.6628\n", + "160/160 [==============================] - 0s 367us/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 5s 5ms/step - loss: 0.8874 - acc: 0.5214 - val_loss: 0.8097 - val_acc: 0.5012\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7886 - acc: 0.5144 - val_loss: 0.7705 - val_acc: 0.5012\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7706 - acc: 0.5015 - val_loss: 0.7667 - val_acc: 0.5035\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7432 - acc: 0.5730 - val_loss: 0.7355 - val_acc: 0.5358\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7331 - acc: 0.5571 - val_loss: 0.7236 - val_acc: 0.5612\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7277 - acc: 0.5809 - val_loss: 0.7189 - val_acc: 0.5912\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7032 - acc: 0.5799 - val_loss: 0.7121 - val_acc: 0.6166\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7005 - acc: 0.5968 - val_loss: 0.7159 - val_acc: 0.5751\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 9/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6993 - acc: 0.6127 - val_loss: 0.7485 - val_acc: 0.5150\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7053 - acc: 0.6117 - val_loss: 0.7538 - val_acc: 0.5866\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7019 - acc: 0.6256 - val_loss: 0.7142 - val_acc: 0.6097\n", + "160/160 [==============================] - 0s 492us/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 5s 5ms/step - loss: 0.8920 - acc: 0.5055 - val_loss: 0.8175 - val_acc: 0.4873\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7954 - acc: 0.5154 - val_loss: 0.7949 - val_acc: 0.5242\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7746 - acc: 0.5174 - val_loss: 0.7670 - val_acc: 0.5035\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7491 - acc: 0.5283 - val_loss: 0.7389 - val_acc: 0.5473\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7377 - acc: 0.5323 - val_loss: 0.7341 - val_acc: 0.5219\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7330 - acc: 0.5561 - val_loss: 0.7375 - val_acc: 0.5704\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7317 - acc: 0.5124 - val_loss: 0.7280 - val_acc: 0.5012\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7158 - acc: 0.5313 - val_loss: 0.7052 - val_acc: 0.5774\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7047 - acc: 0.5998 - val_loss: 0.7271 - val_acc: 0.5450\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7022 - acc: 0.6058 - val_loss: 0.7436 - val_acc: 0.5912\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7008 - acc: 0.6296 - val_loss: 0.7374 - val_acc: 0.5658\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6995 - acc: 0.6068 - val_loss: 0.7685 - val_acc: 0.5797\n", + "160/160 [==============================] - 0s 350us/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 5s 5ms/step - loss: 0.8891 - acc: 0.5164 - val_loss: 0.8056 - val_acc: 0.4919\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7825 - acc: 0.5045 - val_loss: 0.7710 - val_acc: 0.5196\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7575 - acc: 0.5641 - val_loss: 0.7588 - val_acc: 0.5566\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7454 - acc: 0.5710 - val_loss: 0.7511 - val_acc: 0.5704\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7276 - acc: 0.6087 - val_loss: 0.7639 - val_acc: 0.5127\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7200 - acc: 0.6197 - val_loss: 0.7439 - val_acc: 0.6212\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7128 - acc: 0.6246 - val_loss: 0.7711 - val_acc: 0.5335\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7019 - acc: 0.6385 - val_loss: 0.7634 - val_acc: 0.5473\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6985 - acc: 0.6455 - val_loss: 0.7276 - val_acc: 0.6328\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6783 - acc: 0.6683 - val_loss: 0.7609 - val_acc: 0.5797\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6801 - acc: 0.6663 - val_loss: 0.7424 - val_acc: 0.6143\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6818 - acc: 0.6763 - val_loss: 0.7433 - val_acc: 0.6282\n", + "160/160 [==============================] - 0s 299us/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 5s 5ms/step - loss: 0.9225 - acc: 0.5074 - val_loss: 0.8355 - val_acc: 0.5058\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.8099 - acc: 0.5462 - val_loss: 0.7916 - val_acc: 0.5912\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7566 - acc: 0.6197 - val_loss: 0.7677 - val_acc: 0.6074\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7362 - acc: 0.6296 - val_loss: 0.7969 - val_acc: 0.5335\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7294 - acc: 0.6326 - val_loss: 0.7467 - val_acc: 0.6467\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7109 - acc: 0.6683 - val_loss: 0.7903 - val_acc: 0.5589\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6913 - acc: 0.6753 - val_loss: 0.7964 - val_acc: 0.6236\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6834 - acc: 0.6902 - val_loss: 0.7506 - val_acc: 0.6582\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.6810 - acc: 0.6931 - val_loss: 0.7542 - val_acc: 0.6536\n", + "160/160 [==============================] - 0s 293us/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 5s 5ms/step - loss: 0.8913 - acc: 0.5362 - val_loss: 0.8622 - val_acc: 0.4919\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7880 - acc: 0.5849 - val_loss: 0.7844 - val_acc: 0.5889\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7606 - acc: 0.6028 - val_loss: 0.7727 - val_acc: 0.6005\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7388 - acc: 0.6346 - val_loss: 0.7571 - val_acc: 0.5958\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7332 - acc: 0.6435 - val_loss: 0.7682 - val_acc: 0.6282\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7039 - acc: 0.6773 - val_loss: 0.7803 - val_acc: 0.6143\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7175 - acc: 0.6673 - val_loss: 0.7860 - val_acc: 0.5866\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.7136 - acc: 0.6544 - val_loss: 0.7704 - val_acc: 0.6189\n", + "160/160 [==============================] - 0s 338us/step\n" + ] + } + ], + "source": [ + "def get_ff_wv_model():\n", + " model_ff_wv = Sequential([\n", + " Embedding(corpus_vocab_size, embedding_length, weights=[embedding_matrix], trainable=False,\n", + " input_length=max_sequence_length),\n", + " Flatten(),\n", + " Dense(16, activation=relu, kernel_regularizer=l2(0.01)), #, input_shape=(num_words,)\n", + " Dropout(0.25),\n", + " Dense(8, activation=relu, kernel_regularizer=l2(0.01)),\n", + " Dense(1, activation=sigmoid)\n", + " ])\n", + " model_ff_wv.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", + " return model_ff_wv\n", + "\n", + "ff_wv_scores = run_cross_validate(get_ff_wv_model, predictors_sequences, labels, cv=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bag of words: [0.875, 0.85625, 0.88125, 0.8875, 0.84375, 0.89375, 0.91875, 0.8375, 0.86875, 0.86875]\n", + "Word vectors: [0.625, 0.63125, 0.54375, 0.50625, 0.5625, 0.5875, 0.5625, 0.625, 0.54375, 0.55625]\n" + ] + } + ], + "source": [ + "print (\"Bag of words: \", ff_bow_scores['accuracies'])\n", + "print (\"Word vectors: \", ff_wv_scores['accuracies'])\n", + "\n", + "ff_scores_entries =[('Bag of Words', x) for x in ff_bow_scores['accuracies']] + [('Word Vectors', x) for x in ff_wv_scores['accuracies']]\n", + "ff_scores_data_frame = DataFrame(ff_scores_entries, columns=['input type', 'accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAFrBJREFUeJzt3X20XXV95/H3hyAQREAlsurFEDRRpB2rktJBq6LVLrQVap1RqF31oUprhxjb0Y7WDmXoqg/VqZOJjFNkqdUqiFolakaKiE+AkvD8XO5C0QQfIiIPAmLCd/7Y+24OJze5J5idc5P7fq111917n9/Z+5ubc+/n/Pbv7N9OVSFJEsBu4y5AkjR7GAqSpI6hIEnqGAqSpI6hIEnqGAqSpI6hIEnqGAqSpI6hIEnq7D7uArbVAQccUIsWLRp3GZK0U7nkkkt+XFULZmq304XCokWLWLt27bjLkKSdSpKbR2nn6SNJUsdQkCR1DAVJUsdQkCR1DAVJUsdQkCR1DAVJUmenu05hV7By5UomJyfHXQbr168HYGJiYqx1LF68mGXLlo21BkkNQ2EOu+eee8ZdgqRZxlAYg9nyrnj58uUArFixYsyVSJotHFOQJHUMBUlSx1CQJHUMBUlSp9dQSHJ0khuSTCZ5yzSPH5zkvCRXJvlKkoP6rEeStHW9hUKSecCpwAuBw4Djkxw21Ow9wEeq6inAKcA7+qpHkjSzPnsKRwCTVXVTVd0HnAkcO9TmMOC8dvn8aR6XJO1AfYbCBPC9gfV17bZBVwAvbZdfAjwiyaOHd5TkhCRrk6zdsGFDL8VKkvoNhUyzrYbW3wQ8J8llwHOA9cDGzZ5UdVpVLa2qpQsWzHiLUUnSQ9TnFc3rgMcNrB8E3DLYoKpuAf4AIMk+wEur6vYea5IkbUWfPYU1wJIkhyTZAzgOWDXYIMkBSaZqeCvwwR7rkSTNoLdQqKqNwInAOcB1wFlVdU2SU5Ic0zY7Crghyb8DBwJ/31c9kqSZ9TohXlWtBlYPbTtpYPlTwKf6rEGSNLo5N0vqbLmXwWww9XOYmi11rvO+DtIcDIXJyUkuv/o6Nu39qHGXMna73dd8GOySm3445krGb97dPxl3CdKsMOdCAWDT3o/inkNfNO4yNIvMv371zI2kOcAJ8SRJHUNBktQxFCRJHUNBktSZcwPN69evZ97dtzuwqAeZd/etrF+/2bRb0pxjT0GS1JlzPYWJiQl+8PPd/UiqHmT+9auZmDhw3GVIY2dPQZLUMRQkSR1DQZLUmXNjCtDMc+Onj2C3e+8A4P699h1zJePXzH3kmII050Jh8eLF4y5h1picvBOAxY/3jyEc6GtDYg6GglMjP2BqyuwVK1aMuRJJs4VjCpKkjqEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkzpy7TmE2WLlyJZOTk+Muo6th6nqFcVm8eLHXj0izhKEwh82fP3/cJUiaZQyFMfBdsaTZyjEFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdXoNhSRHJ7khyWSSt0zz+MIk5ye5LMmVSV7UZz2SpK3rLRSSzANOBV4IHAYcn+SwoWZ/A5xVVU8DjgP+T1/1SJJm1mdP4Qhgsqpuqqr7gDOBY4faFLBvu7wfcEuP9UiSZtDn/RQmgO8NrK8DfnOozcnAvyVZBjwceH6P9UiSZtBnTyHTbKuh9eOBD1fVQcCLgI8m2aymJCckWZtk7YYNG3ooVZIE/YbCOuBxA+sHsfnpoT8BzgKoqouAvYADhndUVadV1dKqWrpgwYKeypUk9RkKa4AlSQ5JsgfNQPKqoTbfBX4bIMmTaULBroAkjUlvoVBVG4ETgXOA62g+ZXRNklOSHNM2+6/A65JcAZwBvKqqhk8xSZJ2kD4Hmqmq1cDqoW0nDSxfCzyzzxokSaPzimZJUsdQkCR1DAVJUsdQkCR1DAVJUsdQkCR1DAVJUsdQkCR1DAVJUsdQkCR1DAVJUsdQkCR1DAVJUsdQkCR1DAVJUsdQkCR1DAVJUmekUEjy6SS/m8QQkaRd2Kh/5N8P/CFwY5J3Jjm0x5okSWMyUihU1Zeq6hXA04HvAOcmuTDJq5M8rM8CJUk7zsing5I8GngV8FrgMmAFTUic20tlkqQdbvdRGiX5V+BQ4KPAi6vq++1Dn0iytq/iJEk71kihALyvqr483QNVtXQ71iNJGqNRTx89Ocn+UytJHpnkz3uqSZI0JqOGwuuq6qdTK1V1G/C6fkqSJI3LqKGwW5JMrSSZB+zRT0mSpHEZdUzhHOCsJP8XKODPgC/2VpUkaSxGDYX/Bvwp8HogwL8Bp/dVlCRpPEYKhaq6n+aq5vf3W44kaZxGvU5hCfAO4DBgr6ntVfX4nuqSJI3BqAPNH6LpJWwEngt8hOZCNknSLmTUUJhfVecBqaqbq+pk4Hn9lSVJGodRB5rvbafNvjHJicB64DH9lSVJGodRewpvBPYG3gAcDvwR8Mq+ipIkjceMPYX2QrWXVdWbgbuAV/delSRpLGbsKVTVJuDwwSuaJUm7plHHFC4Dzk7ySeBnUxur6l97qUqSNBajhsKjgFt58CeOCjAUJGkXMuoVzY4jSNIcMOoVzR+i6Rk8SFW9ZobnHU1z2855wOlV9c6hx99LczEcNJ9uekxV7Y8kaSxGPX30+YHlvYCXALds7Qntp5ZOBV4ArAPWJFlVVddOtamqvxhovwx42oj1SJJ6MOrpo08Pric5A/jSDE87Apisqpva55wJHAtcu4X2xwN/O0o9kqR+jHrx2rAlwMIZ2kwA3xtYX9du20ySg4FDgGnvAy1J2jFGHVO4kwePKfyA5h4LW33aNNs2G5doHQd8qr0mYrrjnwCcALBw4UxZJEl6qEY9ffSIh7DvdcDjBtYPYsvjEMcB/2Urxz8NOA1g6dKlWwoWSdIvaaTTR0lekmS/gfX9k/z+DE9bAyxJckiSPWj+8K+aZt9PAh4JXDR62ZKkPow6pvC3VXX71EpV/ZQZBoWraiNwIs39na8Dzqqqa5KckuSYgabHA2dWlT0ASRqzUT+SOl14zPjcqloNrB7adtLQ+skj1iBJ6tmoPYW1Sf4xyROSPL696OySPguTJO14o4bCMuA+4BPAWcA9bGVgWJK0cxr100c/A97Scy2SpDEb9dNH5ybZf2D9kUnO6a8sSdI4jHr66ID2E0cAVNVteI9mSdrljBoK9yfpLiVOsogtX50sSdpJjfqR1LcB30jy1Xb92bTTTkiSdh2jDjR/MclSmiC4HDib5hNIkqRdyKgT4r0WWE4zf9HlwH+kmZbieVt7niRp5zLqmMJy4DeAm6vquTQ3w9nQW1WSpLEYNRTurap7AZLsWVXXA0/qryxJ0jiMOtC8rr1O4bPAuUluY4bbcUqSdj6jDjS/pF08Ocn5wH7AF3urSpI0FqP2FDpV9dWZW0mSdkYP9R7NkqRdkKEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkzjbPkipp17Zy5UomJyfHWsP69esBmJiYGGsdAIsXL2bZsmXjLmOHMRQkzTr33HPPuEuYswwFSQ8yG94VL1++HIAVK1aMuZK5xzEFSVLHUJAkdQwFSVLHUJAkdQwFSVLHTx9Js8RsuD5gtpj6OUx9Cmmu25HXShgK0iwxOTnJjddcxsJ9No27lLHb4xfNSYyf37x2zJWM33fvmrdDj2coSLPIwn028ddPv2PcZWgWeful++7Q4zmmIEnq9BoKSY5OckOSySRv2UKblyW5Nsk1ST7eZz2SpK3r7fRRknnAqcALgHXAmiSrquragTZLgLcCz6yq25I8pq96JEkz67OncAQwWVU3VdV9wJnAsUNtXgecWlW3AVTVj3qsR5I0gz5DYQL43sD6unbboCcCT0xyQZJvJjm6x3okSTPo89NHmWZbTXP8JcBRwEHA15P8WlX99EE7Sk4ATgBYuHDh9q9UkgT021NYBzxuYP0g4JZp2pxdVb+oqm8DN9CExINU1WlVtbSqli5YsKC3giVpruszFNYAS5IckmQP4Dhg1VCbzwLPBUhyAM3ppJt6rEmStBW9hUJVbQROBM4BrgPOqqprkpyS5Ji22TnArUmuBc4H3lxVt/ZVkyRp63q9ormqVgOrh7adNLBcwF+2X5KkMfOKZklSx1CQJHUMBUlSx1CQJHUMBUlSx1CQJHUMBUlSx1CQJHUMBUlSx1CQJHUMBUlSx1CQJHV6nRBP0ujWr1/Pz+6cx9sv3XfcpWgWufnOeTx8/foddjx7CpKkjj0FaZaYmJjg5xu/z18//Y5xl6JZ5O2X7sueE8O3t++PPQVJUsdQkCR1DAVJUsdQkCR1HGiWZpHv3uVHUgF+eHfzfvXAve8fcyXj99275rFkBx7PUJBmicWLF4+7hFnjvslJAPY82J/JEnbsa8NQkGaJZcuWjbuEWWP58uUArFixYsyVzD2OKUiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOr3eeS3J0cAKYB5welW9c+jxVwHvBta3m95XVaf3WZOkrVu5ciWT7e0wx2Xq+FN3YBunxYsXz6m74vUWCknmAacCLwDWAWuSrKqqa4eafqKqTuyrDkk7n/nz54+7hDmrz57CEcBkVd0EkORM4FhgOBQkzSJz6V2xNtfnmMIE8L2B9XXttmEvTXJlkk8leVyP9UiSZtBnKGSabTW0/jlgUVU9BfgS8M/T7ig5IcnaJGs3bNiwncuUJE3pMxTWAYPv/A8CbhlsUFW3VtXP29UPAIdPt6OqOq2qllbV0gULFvRSrCSp31BYAyxJckiSPYDjgFWDDZL8ysDqMcB1PdYjSZpBbwPNVbUxyYnAOTQfSf1gVV2T5BRgbVWtAt6Q5BhgI/AT4FV91SNJmlmqhk/zz25Lly6ttWvXjrsMSdqpJLmkqpbO1M4rmiVJHUNBktTZ6U4fJdkA3DzuOnYhBwA/HncR0jR8bW5fB1fVjB/f3OlCQdtXkrWjnGeUdjRfm+Ph6SNJUsdQkCR1DAWdNu4CpC3wtTkGjilIkjr2FCRJHUNhjJJsSnJ5kiuSXJrkGT0fb0GSbyW5LMmzBrYfm+SzA+tvTTI5sP7iJKuG97cNxz0qyecfeuXa0ZK8N8kbB9bPSXL6wPr/TPKXv8T+T07ypqFtRyW5aGjb7kl+ODRP2ij73z/Jnz/U+uYyQ2G87qmqp1bVrwNvBd7R8/F+G7i+qp5WVV8f2H4hcOTA+pHAHUke064/A7hg1IO0d93Tzu1Cmv93kuxGc83Arw48PvJrYhteD18DDkqyaGDb84Grq+r7I+5jyv7ANoWCr9uGoTB77AvcBpBknyTntb2Hq5IcO9UoyX9Pcn2Sc5OcMfxuq21zcPv8K9vvC5M8FfgH4EVt76S732FVbQBuT7K43TQBfJr2j0L7/cJ238e3NV2d5F0Dx7wrySlJvgUcmeTots5vAH8w0O457fEvb3ssj9guPz1tbxfwwP//rwJXA3cmeWSSPYEnA5el8e729XBVkpdD967//CQfB65qt70tyQ1JvgQ8afiAVXU/8Eng5QObjwPOaJ//hCRfTHJJkq8nObTdfmCSz7Q97ivaHvc7gSe0r7N3j1pnkocn+UK7n6un2s0pVeXXmL6ATcDlwPXA7cDh7fbdgX3b5QOASZqbFi1t288HHgHcCLxpmv1+Dnhlu/wa4LPt8quA922hlg8Df0zzy3omTa/iH9pabgP2Ah4LfBdY0G7/MvD77fMLeFm7vBfNXfeWtHWfBXx+oLZntsv7ALuP+//Bry2+Pr8DLAT+FPgz4O+AFwHPBL7WtnkpcC7NTMgHtq+PXwGOAn4GHNK2O5wmHPameQM0uYXX7m8Al7XLewI/Ah7Zrp8HLGmXfxP4crv8CeCN7fI8YD9gEU0Pg22s86XABwaet9+4/x929Jc9hfGaOn10KHA08JEkoflD+vYkV9LckW6C5oX8W8DZVXVPVd1J8wd2OkcCH2+XP9o+byZT7wyfAVwEXEzzi/c04IaqupfmF/YrVbWhqjYCHwOe3T5/E03vAuBQ4NtVdWM1v1n/MnScf0zyBmD/dj+anYZfExcNrF/Ytvkt4Iyq2lRVPwS+SvM6Abi4qr7dLj8L+ExV3V1VdzB0b5UpVbUG2CfJk4AXAt+sqtuS7NMe95NJLgf+ieaPOsDzgPe3z99UVbdPs+tR67wKeH6SdyV51hb2tUszFGaJqrqIplewAHhF+/3wqnoq8EOad9/T3eJ0pN2P0GbqHPIzgIva0NmL5p3U1LnjrR3/3qraNNMxq+qdwGtpejvfnDoFoFlp6jXxH2hOH32T5g3H4HjC1l4TPxtaH/Xz72fSnDbqTh3R/K36afsmaurrySPub+Q6q+rfeaBX844kJ23DMXYJhsIs0f5xnAfcStP9/VFV/SLJc4GD22bfAF6cZK/2ndPvbmF3F9L8QkETMN8YoYRraU4PPQu4rN12Oc1pg6l3hd8CnpPkgHZQ7niad1zDrgcOSfKEdv34gX/nE6rqqqp6F7CWpleh2ekC4PeAn7TvsH9CM4B7JE2vAZrB4ZcnmZdkAU3P8eJp9vU14CVJ5rfjSC/eynHPAP6IpgewCqDtXXw7yX8GaMcIfr1tfx7w+nb7vCT7AnfSnGIdPP6MdSZ5LHB3Vf0L8B7g6Vupc5fU253XNJL5bVcYmncyr6yqTUk+BnwuyVoeGHOgqtak+WjoFTQzxa6lGYsY9gbgg0neDGwAXj1TIVVV7SDxflX1i3bzRcAJtKFQVd9P8lbg/Lbe1VV19jT7ujfJCcAXkvyYJpR+rX34jW3QbaIJov83U20am6toeq8fH9q2T1VNzV76GZqQuIKmJ/BXVfWD4R5gVV2a5BM0r+ebgcFPvzHU9tokdwOXVNVgb+MVwPuT/A3wMJoexRXAcuC0JH9C87p6fVVdlOSCJFfTvMb+apQ6aXpF705yP/AL2rCZS7yieSeTZJ+quivJ3jTvfk6oqkvHXZekXYM9hZ3PaUkOoznf/88GgqTtyZ6CJKnjQLMkqWMoSJI6hoIkqWMoaM5IcuHMrbZ5n4uS/OG2PibNVoaC5oyq6mNq8kXAlv7wb+0xaVYyFDRnJLmr/X5Ukq8k+VQ7k+vH2jmnSPKddt6bi9uvxe32Dyf5T8P7opmN81ntbJx/MXTIBz3Wzuz51IF9XJDkKWnuLfDRJF9OcmOS1w20eXOSNWlmvP0f/fxkpAcYCpqrnga8ETgMeDzNzJ9T7qiqI4D3Af9rhv28Bfh6OxfPe2d47HSamWpJ8kRgz6q6sm37FJppS44ETkry2CS/QzPT7BHAU4HDkzwbqUeGguaqi6tqXTVz+F9Oc6pnyhkD348cfuIv4ZPA7yV5GM2U5h8eeGxq9tsf00wjcgTwO+3XZcClNPNELdmO9Uib8YpmzVU/H1jexIN/F2qa5Y20b6LaU017bOsBq+ruJOcCxwIvo7k/xnTHnFoP8I6q+qdtPZb0UNlTkDb38oHvU7OBfodmSmVo/qg/rF0eno1z0HSPnQ78b2BNO+volGPb2W8fTTNd+RrgHOA17Yy4JJnIA7dIlXphT0Ha3J7tjLG78cC03x8Azk5yMc1UzVOzd14JbExyBfDhoXGFzR6rqkuS3AF8aOiYFwNfoLnT2d9V1S3ALUmeDFzUjoPfRTOl9I+2879X6jj3kTQgyXeApQNTQ2/v/T8W+ApwaDueQZKTgbuq6j19HFPaFp4+knaQJH9Mc6Oit00FgjTb2FOQJHXsKUiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKnz/wHJ4W1qEkm2AgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "boxplot(x='input type', y='accuracy', data=ff_scores_data_frame)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The results are very strange! How could word vectors be less accurate than bag of words? This is known to occur when two conditions are met:\n", + " \n", + "* The dataset is small\n", + "* The dataset is very domain specific\n", + "\n", + "It is possible that these conditions actually are met here. The problem however is that running these models over our full dataset will take much longer, and will require a commited experiment to complete the investigation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convolutional Network\n", + "Now let's try this with a convolutional network. It has been shown that word vectors perform better for text classification than Bag of Words. If BoW is more accurate, it is a clear sign that we should investigate why. First we find the Bag of Words result:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1600, 20000)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bow_predictors.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1600, 1, 20000, 1)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batches = 1600\n", + "convolutional_data = np.array(np.split(np.array([[[y] for y in z] for z in bow_predictors]), batches))\n", + "convolutional_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def get_conv_bow_model():\n", + " model = Sequential([\n", + " Conv2D(\n", + " filters=50,\n", + " kernel_size=(1, 10),\n", + " data_format=\"channels_last\",\n", + " input_shape=(1, 20000, 1),\n", + " activation=relu),\n", + " MaxPooling2D(pool_size=(1, 10)),\n", + " Dropout(0.2),\n", + " Flatten(),\n", + " Dense(2, activation='softmax')\n", + " ])\n", + " model.compile(\n", + " loss='binary_crossentropy',\n", + " optimizer='adam',\n", + " metrics=['accuracy'])\n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting with: (1440, 1, 20000, 1) labels (1440, 2)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.4104 - acc: 0.8133 - val_loss: 0.6877 - val_acc: 0.6420\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.1270 - acc: 0.9563 - val_loss: 0.7839 - val_acc: 0.6305\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.0611 - acc: 0.9891 - val_loss: 0.9292 - val_acc: 0.6120\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0412 - acc: 0.9911 - val_loss: 1.1034 - val_acc: 0.5889\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0321 - acc: 0.9930 - val_loss: 1.1769 - val_acc: 0.5912\n", + "160/160 [==============================] - 1s 7ms/step\n", + "Fitting with: (1440, 1, 20000, 1) labels (1440, 2)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 16s 15ms/step - loss: 0.4176 - acc: 0.8064 - val_loss: 0.7610 - val_acc: 0.6189\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.1339 - acc: 0.9494 - val_loss: 0.8414 - val_acc: 0.6328\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0693 - acc: 0.9861 - val_loss: 0.9440 - val_acc: 0.6212\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0391 - acc: 0.9940 - val_loss: 1.1303 - val_acc: 0.6028\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0212 - acc: 0.9970 - val_loss: 1.2155 - val_acc: 0.6143\n", + "160/160 [==============================] - 1s 7ms/step\n", + "Fitting with: (1440, 1, 20000, 1) labels (1440, 2)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 16s 15ms/step - loss: 0.4728 - acc: 0.7716 - val_loss: 0.6186 - val_acc: 0.6674\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.1369 - acc: 0.9503 - val_loss: 0.6578 - val_acc: 0.6697\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0724 - acc: 0.9821 - val_loss: 0.9531 - val_acc: 0.6351\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0431 - acc: 0.9940 - val_loss: 0.8991 - val_acc: 0.6328\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0247 - acc: 0.9970 - val_loss: 1.0405 - val_acc: 0.6212\n", + "160/160 [==============================] - 1s 7ms/step\n", + "Fitting with: (1440, 1, 20000, 1) labels (1440, 2)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.3978 - acc: 0.8113 - val_loss: 0.6811 - val_acc: 0.6328\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.1221 - acc: 0.9623 - val_loss: 0.7995 - val_acc: 0.6305\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0695 - acc: 0.9801 - val_loss: 0.9517 - val_acc: 0.6051\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0386 - acc: 0.9930 - val_loss: 1.0806 - val_acc: 0.6212\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0240 - acc: 0.9950 - val_loss: 1.1951 - val_acc: 0.6120\n", + "160/160 [==============================] - 1s 7ms/step\n", + "Fitting with: (1440, 1, 20000, 1) labels (1440, 2)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 17s 16ms/step - loss: 0.4336 - acc: 0.7786 - val_loss: 0.6395 - val_acc: 0.6697\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 21s 21ms/step - loss: 0.1405 - acc: 0.9434 - val_loss: 0.7227 - val_acc: 0.6582\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 21s 21ms/step - loss: 0.0678 - acc: 0.9871 - val_loss: 0.8461 - val_acc: 0.6536\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.0361 - acc: 0.9950 - val_loss: 1.0363 - val_acc: 0.6236\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.0234 - acc: 0.9960 - val_loss: 1.1762 - val_acc: 0.6143\n", + "160/160 [==============================] - 1s 7ms/step\n", + "Fitting with: (1440, 1, 20000, 1) labels (1440, 2)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 18s 18ms/step - loss: 0.4447 - acc: 0.7805 - val_loss: 0.6711 - val_acc: 0.6282\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 19s 19ms/step - loss: 0.1323 - acc: 0.9573 - val_loss: 0.7492 - val_acc: 0.6351\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 17s 16ms/step - loss: 0.0769 - acc: 0.9782 - val_loss: 0.9064 - val_acc: 0.6189\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 15s 15ms/step - loss: 0.0408 - acc: 0.9930 - val_loss: 1.0671 - val_acc: 0.6028\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.0253 - acc: 1.0000 - val_loss: 1.1682 - val_acc: 0.6120\n", + "160/160 [==============================] - 1s 7ms/step\n", + "Fitting with: (1440, 1, 20000, 1) labels (1440, 2)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.4321 - acc: 0.7776 - val_loss: 0.6643 - val_acc: 0.6467\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 18s 18ms/step - loss: 0.1234 - acc: 0.9623 - val_loss: 0.8116 - val_acc: 0.6282\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 19s 19ms/step - loss: 0.0605 - acc: 0.9881 - val_loss: 0.9559 - val_acc: 0.6097\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 18s 18ms/step - loss: 0.0384 - acc: 0.9930 - val_loss: 1.0549 - val_acc: 0.6120\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 18s 18ms/step - loss: 0.0208 - acc: 0.9990 - val_loss: 1.1966 - val_acc: 0.6074\n", + "160/160 [==============================] - 1s 8ms/step\n", + "Fitting with: (1440, 1, 20000, 1) labels (1440, 2)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 18s 18ms/step - loss: 0.4149 - acc: 0.8054 - val_loss: 0.7225 - val_acc: 0.6490\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 18s 17ms/step - loss: 0.1227 - acc: 0.9682 - val_loss: 0.7774 - val_acc: 0.6490\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 20s 20ms/step - loss: 0.0639 - acc: 0.9841 - val_loss: 0.9796 - val_acc: 0.6166\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 19s 19ms/step - loss: 0.0328 - acc: 0.9950 - val_loss: 1.0511 - val_acc: 0.6305\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 18s 18ms/step - loss: 0.0191 - acc: 0.9980 - val_loss: 1.1928 - val_acc: 0.6236\n", + "160/160 [==============================] - 1s 7ms/step\n", + "Fitting with: (1440, 1, 20000, 1) labels (1440, 2)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 17s 16ms/step - loss: 0.4449 - acc: 0.7954 - val_loss: 0.6790 - val_acc: 0.6397\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.1311 - acc: 0.9543 - val_loss: 0.7680 - val_acc: 0.6467\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.0736 - acc: 0.9811 - val_loss: 0.9357 - val_acc: 0.6467\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 16s 16ms/step - loss: 0.0440 - acc: 0.9901 - val_loss: 1.1653 - val_acc: 0.6005\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 18s 18ms/step - loss: 0.0265 - acc: 0.9960 - val_loss: 1.1660 - val_acc: 0.6259\n", + "160/160 [==============================] - 2s 11ms/step\n", + "Fitting with: (1440, 1, 20000, 1) labels (1440, 2)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 21s 21ms/step - loss: 0.4229 - acc: 0.8024 - val_loss: 0.6728 - val_acc: 0.6605\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 20s 20ms/step - loss: 0.1182 - acc: 0.9583 - val_loss: 0.7547 - val_acc: 0.6467\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 17s 17ms/step - loss: 0.0661 - acc: 0.9791 - val_loss: 0.9196 - val_acc: 0.6490\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 18s 18ms/step - loss: 0.0412 - acc: 0.9901 - val_loss: 1.1109 - val_acc: 0.6120\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 18s 18ms/step - loss: 0.0232 - acc: 0.9970 - val_loss: 1.1591 - val_acc: 0.6305\n", + "160/160 [==============================] - 1s 7ms/step\n" + ] + } + ], + "source": [ + "conv_bow_scores = run_cross_validate(get_conv_bow_model, convolutional_data, labels, cv=10, categorical=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And our word vector result:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def vectorize_review(review_words):\n", + " sentence = []\n", + " for word in review_words:\n", + " if word in word_vectors.wv:\n", + " sentence.append(word_vectors.wv['dog'])\n", + " return np.array(sentence, np.float64)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def pad_vectorized_review(vectorized_review, length):\n", + " return np.concatenate((vectorized_review, np.zeros((length - len(vectorized_review), 100))))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "vectorized_reviews = [vectorize_review(text_to_word_sequence(x)) for x in predictors_raw]\n", + "pad_length = max([x.shape[0] for x in vectorized_reviews])\n", + "vectorized_reviews = np.array([[pad_vectorized_review(x, pad_length)] for x in vectorized_reviews])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1600, 1, 381, 100)\n" + ] + } + ], + "source": [ + "print(vectorized_reviews.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def get_conv_wv_model():\n", + " model = Sequential([\n", + " Conv2D(\n", + " filters=50,\n", + " kernel_size=(10, 100),\n", + " data_format=\"channels_first\",\n", + " input_shape=(1, 381, 100),\n", + " activation=relu),\n", + " MaxPooling2D(strides=(1, 1), pool_size=(2, 1), data_format=\"channels_first\"),\n", + " Dropout(0.2),\n", + " Flatten(),\n", + " Dense(2, activation='softmax')\n", + " ])\n", + " model.compile(\n", + " loss='binary_crossentropy',\n", + " optimizer='adam',\n", + " metrics=['accuracy'])\n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting with: (1332, 1, 381, 100) labels (1332, 2)\n", + "Train on 932 samples, validate on 400 samples\n", + "Epoch 1/12\n", + "932/932 [==============================] - 6s 7ms/step - loss: 0.6975 - acc: 0.4764 - val_loss: 0.6934 - val_acc: 0.4475\n", + "Epoch 2/12\n", + "932/932 [==============================] - 5s 5ms/step - loss: 0.6915 - acc: 0.5300 - val_loss: 0.6942 - val_acc: 0.5025\n", + "Epoch 3/12\n", + "932/932 [==============================] - 4s 5ms/step - loss: 0.6849 - acc: 0.5590 - val_loss: 0.6958 - val_acc: 0.4925\n", + "Epoch 4/12\n", + "932/932 [==============================] - 4s 5ms/step - loss: 0.6753 - acc: 0.5869 - val_loss: 0.7030 - val_acc: 0.5225\n", + "Epoch 5/12\n", + "932/932 [==============================] - 4s 5ms/step - loss: 0.6682 - acc: 0.5912 - val_loss: 0.7110 - val_acc: 0.5000\n", + "268/268 [==============================] - 1s 2ms/step\n", + "Fitting with: (1332, 1, 381, 100) labels (1332, 2)\n", + "Train on 932 samples, validate on 400 samples\n", + "Epoch 1/12\n", + "932/932 [==============================] - 6s 7ms/step - loss: 0.6989 - acc: 0.4775 - val_loss: 0.6931 - val_acc: 0.4975\n", + "Epoch 2/12\n", + "932/932 [==============================] - 5s 5ms/step - loss: 0.6932 - acc: 0.4710 - val_loss: 0.6932 - val_acc: 0.4975\n", + "Epoch 3/12\n", + "932/932 [==============================] - 5s 5ms/step - loss: 0.6932 - acc: 0.5011 - val_loss: 0.6932 - val_acc: 0.4975\n", + "Epoch 4/12\n", + "932/932 [==============================] - 5s 5ms/step - loss: 0.6934 - acc: 0.4775 - val_loss: 0.6932 - val_acc: 0.4975\n", + "Epoch 5/12\n", + "932/932 [==============================] - 5s 5ms/step - loss: 0.6932 - acc: 0.5011 - val_loss: 0.6932 - val_acc: 0.4975\n", + "268/268 [==============================] - 1s 2ms/step\n", + "Fitting with: (1334, 1, 381, 100) labels (1334, 2)\n", + "Train on 933 samples, validate on 401 samples\n", + "Epoch 1/12\n", + "933/933 [==============================] - 6s 7ms/step - loss: 0.6957 - acc: 0.4759 - val_loss: 0.6931 - val_acc: 0.4938\n", + "Epoch 2/12\n", + "933/933 [==============================] - 5s 6ms/step - loss: 0.6919 - acc: 0.5252 - val_loss: 0.6923 - val_acc: 0.5037\n", + "Epoch 3/12\n", + "933/933 [==============================] - 5s 5ms/step - loss: 0.6842 - acc: 0.5659 - val_loss: 0.6956 - val_acc: 0.5012\n", + "Epoch 4/12\n", + "933/933 [==============================] - 5s 6ms/step - loss: 0.6716 - acc: 0.5884 - val_loss: 0.6993 - val_acc: 0.5112\n", + "Epoch 5/12\n", + "933/933 [==============================] - 5s 5ms/step - loss: 0.6600 - acc: 0.5949 - val_loss: 0.7082 - val_acc: 0.5187\n", + "Epoch 6/12\n", + "933/933 [==============================] - 5s 5ms/step - loss: 0.6565 - acc: 0.5949 - val_loss: 0.7069 - val_acc: 0.5187\n", + "266/266 [==============================] - 1s 3ms/step\n", + "Fitting with: (1334, 1, 381, 100) labels (1334, 2)\n", + "Train on 933 samples, validate on 401 samples\n", + "Epoch 1/12\n", + "933/933 [==============================] - 6s 6ms/step - loss: 0.6965 - acc: 0.4952 - val_loss: 0.6932 - val_acc: 0.5137\n", + "Epoch 2/12\n", + "933/933 [==============================] - 4s 5ms/step - loss: 0.6920 - acc: 0.5359 - val_loss: 0.6932 - val_acc: 0.5212\n", + "Epoch 3/12\n", + "933/933 [==============================] - 4s 5ms/step - loss: 0.6865 - acc: 0.5627 - val_loss: 0.6960 - val_acc: 0.5237\n", + "Epoch 4/12\n", + "933/933 [==============================] - 4s 5ms/step - loss: 0.6786 - acc: 0.5756 - val_loss: 0.6971 - val_acc: 0.5287\n", + "Epoch 5/12\n", + "933/933 [==============================] - 4s 5ms/step - loss: 0.6761 - acc: 0.5713 - val_loss: 0.7037 - val_acc: 0.5087\n", + "266/266 [==============================] - 1s 2ms/step\n", + "Fitting with: (1334, 1, 381, 100) labels (1334, 2)\n", + "Train on 933 samples, validate on 401 samples\n", + "Epoch 1/12\n", + "933/933 [==============================] - 6s 7ms/step - loss: 0.6979 - acc: 0.5091 - val_loss: 0.6924 - val_acc: 0.5112\n", + "Epoch 2/12\n", + "933/933 [==============================] - 5s 5ms/step - loss: 0.6928 - acc: 0.5177 - val_loss: 0.6922 - val_acc: 0.5287\n", + "Epoch 3/12\n", + "933/933 [==============================] - 4s 5ms/step - loss: 0.6823 - acc: 0.5852 - val_loss: 0.6925 - val_acc: 0.5287\n", + "Epoch 4/12\n", + "933/933 [==============================] - 4s 5ms/step - loss: 0.6680 - acc: 0.5927 - val_loss: 0.6970 - val_acc: 0.5237\n", + "Epoch 5/12\n", + "933/933 [==============================] - 4s 5ms/step - loss: 0.6621 - acc: 0.5916 - val_loss: 0.6982 - val_acc: 0.5387\n", + "Epoch 6/12\n", + "933/933 [==============================] - 4s 5ms/step - loss: 0.6552 - acc: 0.5981 - val_loss: 0.7037 - val_acc: 0.5411\n", + "266/266 [==============================] - 1s 3ms/step\n", + "Fitting with: (1334, 1, 381, 100) labels (1334, 2)\n", + "Train on 933 samples, validate on 401 samples\n", + "Epoch 1/12\n", + "933/933 [==============================] - 7s 7ms/step - loss: 0.6992 - acc: 0.4995 - val_loss: 0.6934 - val_acc: 0.4888\n", + "Epoch 2/12\n", + "933/933 [==============================] - 5s 5ms/step - loss: 0.6934 - acc: 0.4770 - val_loss: 0.6933 - val_acc: 0.4888\n", + "Epoch 3/12\n", + "933/933 [==============================] - 4s 5ms/step - loss: 0.6930 - acc: 0.5091 - val_loss: 0.6934 - val_acc: 0.4988\n", + "Epoch 4/12\n", + "933/933 [==============================] - 4s 5ms/step - loss: 0.6915 - acc: 0.5456 - val_loss: 0.6944 - val_acc: 0.4788\n", + "Epoch 5/12\n", + "933/933 [==============================] - 5s 5ms/step - loss: 0.6869 - acc: 0.5552 - val_loss: 0.6962 - val_acc: 0.4913\n", + "Epoch 6/12\n", + "933/933 [==============================] - 5s 5ms/step - loss: 0.6740 - acc: 0.5809 - val_loss: 0.7028 - val_acc: 0.5212\n", + "266/266 [==============================] - 1s 2ms/step\n" + ] + } + ], + "source": [ + "conv_wv_scores = run_cross_validate(get_conv_wv_model, vectorized_reviews, labels, cv=6, categorical=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bag of words: [0.84375, 0.86875, 0.7625, 0.825, 0.86875, 0.85, 0.8, 0.8375, 0.8375, 0.78125]\n", + "Word vectors: [0.4738805965701146, 0.5, 0.488721804063123, 0.5300751881940025, 0.4812030079669522, 0.496240601727837]\n" + ] + } + ], + "source": [ + "print (\"Bag of words: \", conv_bow_scores['accuracies'])\n", + "print (\"Word vectors: \", conv_wv_scores['accuracies'])\n", + "\n", + "conv_scores_entries =[('Bag of Words', x) for x in conv_bow_scores['accuracies']] + [('Word Vectors', x) for x in conv_wv_scores['accuracies']]\n", + "conv_scores_data_frame = DataFrame(conv_scores_entries, columns=['input type', 'accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAHKNJREFUeJzt3X+YHVWd5/H3Jx0hQQRC0vJIhyaBhF+6DsiduIgyqAR7nZHouotBXYI/yDgzhMDOOAuri2x4RlFnxkHkYQw+CDhKQBkhKksmCChCkO6QQEgk0oZfnSDEBAVM+NGd7/5Rp0Pl5nbXDXT1vZ3+vJ7nPl116pyqbye37/eeOlWnFBGYmZkNZkyjAzAzs+bnZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMys0ttEBDJVJkybFlClTGh2GmdmIsnz58t9FRGtRvd0mWUyZMoWurq5Gh2FmNqJIeqyeej4NZWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWaHd5j6L3cWll15Kd3d3Q2NYv349AG1tbQ2NA2DatGnMmzev0WGYjXpOFraTrVu3NjoEM2syThZNphm+Rc+fPx+ASy65pMGRmFmz8JiFmZkVcrIwM7NCThZmZlbIycLMzAqVmiwkdUhaK6lb0nk1trdLul3SCkkPSHp/Kp8iaauklen1r2XGaWZmgyvtaihJLcBlwEygB+iUtDgi1uSqfR64PiIul3QUcDMwJW37TUQcXVZ8ZmZWvzJ7FjOA7ohYFxEvAYuAWVV1AtgnLe8LbCgxHjMze5XKvM+iDXgit94DvL2qzoXAf0iaB7weOCm3baqkFcCzwOcj4s4SY22KO6ebRf+/Q//9FqOd7yI3KzdZqEZZVK2fBlwVEf8k6TjgO5LeAjwJtEfEJknHAjdKenNEPLvDAaS5wFyA9vb21xRsd3c3Kx/8FX177f+a9rM7GPNS9t+0fN1TDY6k8Vq2bG50CGZNocxk0QMclFufzM6nmT4FdABExDJJ44BJEfE08GIqXy7pN8BhwA4P2Y6IhcBCgEqlUp2IdlnfXvuz9Yj3v9bd2G5k/EM3NzoEs6ZQ5phFJzBd0lRJewCzgcVVdR4H3gsg6UhgHLBRUmsaIEfSIcB0YF2JsZqZ2SBK61lERK+ks4AlQAtwZUSslrQA6IqIxcDfAldIOpfsFNUZERGSTgAWSOoF+oDPRITPB5iZNUipEwlGxM1kl8Pmyy7ILa8Bjq/R7gbghjJjMzOz+nnW2WT9+vW0bPmDz1HbDlq2bGL9+t5Gh2HWcJ7uw8zMCrlnkbS1tfHbF8f6aijbwfiHbqat7YBGh2HWcO5ZmJlZIScLMzMr5GRhZmaFnCzMzKyQk4WZmRVysjAzs0JOFmZmVsjJwszMCjlZmJlZIScLMzMr5GRhZmaFnCzMzKyQk4WZmRUqddZZSR3AJWRPyvtWRFxctb0duBrYL9U5Lz0wCUnnkz2juw84OyKWlBkrQMuWzX6eBTDmhWcB2DZunwZH0ngtWzYDnnXWrLRkkZ6hfRkwE+gBOiUtTk/H6/d54PqIuFzSUWRP1ZuSlmcDbwYOBG6VdFhE9JUV77Rp08ra9YjT3f0cANMO8YckHOD3hhnl9ixmAN0RsQ5A0iJgFpBPFgH0f33dF9iQlmcBiyLiReARSd1pf8vKCnbevHll7XrEmT9/PgCXXHJJgyMxs2ZR5phFG/BEbr0nleVdCHxcUg9Zr6L/E7uetmZmNkzKTBaqURZV66cBV0XEZOD9wHckjamzLZLmSuqS1LVx48bXHLCZmdVWZrLoAQ7KrU/mldNM/T4FXA8QEcuAccCkOtsSEQsjohIRldbW1iEM3czM8spMFp3AdElTJe1BNmC9uKrO48B7ASQdSZYsNqZ6syXtKWkqMB24t8RYzcxsEKUNcEdEr6SzgCVkl8VeGRGrJS0AuiJiMfC3wBWSziU7zXRGRASwWtL1ZIPhvcDflHkllJmZDa7U+yzSPRM3V5VdkFteAxw/QNt/AP6hzPjMzKw+voPbzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoVKnUjQdt2ll15Kd3d3Q2PoP37/41Ubadq0aX7krVkTcLKwnYwfP77RIZhZk3GyaDL+Fm1mzchjFmZmVqjUZCGpQ9JaSd2Szqux/WuSVqbXryX9PretL7et+nGsZmY2jEo7DSWpBbgMmAn0AJ2SFqen4wEQEefm6s8DjsntYmtEHF1WfGZmVr8yexYzgO6IWBcRLwGLgFmD1D8NuLbEeMzM7FUqM1m0AU/k1ntS2U4kHQxMBW7LFY+T1CXpHkkfLC9MMzMrUubVUKpRFgPUnQ38ICL6cmXtEbFB0iHAbZJWRcRvdjiANBeYC9De3j4UMZuZWQ1l9ix6gINy65OBDQPUnU3VKaiI2JB+rgPuYMfxjP46CyOiEhGV1tbWoYjZzMxqKDNZdALTJU2VtAdZQtjpqiZJhwMTgGW5sgmS9kzLk4DjgTXVbc3MbHiUdhoqInolnQUsAVqAKyNitaQFQFdE9CeO04BFEZE/RXUk8E1J28gS2sX5q6jMzGx4acfP6JGrUqlEV1dXo8MwMxtRJC2PiEpRPd/BbWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzArVlSwk3SDpzyU5uZiZjUL1fvhfDnwUeFjSxZKOKDEmMzNrMnUli4i4NSI+BrwNeBRYKuluSZ+Q9LqB2knqkLRWUrek82ps/5qklen1a0m/z22bI+nh9Jqz67+amZkNlbofqyppIvBx4H8AK4DvAu8E5gAn1qjfAlwGzAR6gE5Ji/OPR42Ic3P15wHHpOX9gS8AFSCA5antM7v4+5mZ2RCod8zi34E7gb2AD0TEKRFxXUTMA/YeoNkMoDsi1kXES8AiYNYghzkNuDYtvw9YGhGbU4JYCnTUE6uZmQ29ensW34iI22ptGOTZrW3AE7n1HuDttSpKOhiYCvQfo1bbtjpjNTOzIVbvAPeRkvbrX5E0QdJfF7RRjbIYoO5s4AcR0bcrbSXNldQlqWvjxo0F4ZiZ2atVb7I4MyK2Dz6nU0NnFrTpAQ7KrU8GNgxQdzavnIKqu21ELIyISkRUWltbC8IxM7NXq95kMUbS9m/7afB6j4I2ncB0SVMl7UGWEBZXV5J0ODABWJYrXgKcnHowE4CTU5mZmTVAvWMWS4DrJf0r2emgzwC3DNYgInolnZXatgBXRsRqSQuArojoTxynAYsiInJtN0u6iCzhACyIiM11/1ZmZjaklPuMHrhSduf2XwLvJRtP+A/gW7kxhoarVCrR1dXV6DDMzEYUScsHuVBpu7p6FhGxjewu7stfa2BmZjby1JUsJE0HvgQcBYzrL4+IQ0qKy8zMmki9A9zfJutV9ALvBq4BvlNWUGZm1lzqTRbjI+KnZGMcj0XEhcB7ygvLzMyaSb1XQ72QBrkfTlc4rQfeWF5YZmbWTOrtWZxDNi/U2cCxZBMKeiZYM7NRorBnkW7AOzUiPgs8D3yi9KjMzKypFPYs0r0Ux+bv4DYzs9Gl3jGLFcBNkr4P/LG/MCL+vZSozMysqdSbLPYHNrHjFVABOFmYmY0C9d7B7XEKM7NRrN47uL9NjedJRMQnhzwiMzNrOvWehvpxbnkc8CEGfjaFmZntZuo9DXVDfl3StcCtpURkZmZNp96b8qpNB9qHMhAzM2te9Y5ZPMeOYxa/Bf5XKRGZmVnTqatnERFviIh9cq/Dqk9N1SKpQ9JaSd2SzhugzqmS1khaLel7ufI+SSvTa6fHsZqZ2fCpt2fxIeC2iPhDWt8PODEibhykTQtwGTAT6AE6JS2OiDW5OtOB84HjI+IZSfnJCbdGxNG7/BuZmdmQq3fM4gv9iQIgIn4PfKGgzQygOyLWRcRLwCJgVlWdM4HLIuKZtN+n64zHzMyGUb3Jola9ol5JG/BEbr0nleUdBhwm6S5J90jqyG0bJ6krlX+wzjjNzKwE9d5n0SXpn8lOKwUwD1he0KbWxIPVN/aNJbuy6kRgMnCnpLeknkt7RGyQdAhwm6RVEfGbHQ4gzQXmArS3++IsM7Oy1NuzmAe8BFwHXA9sBf6moE0PcFBufTI738jXA9wUES9HxCPAWrLkQURsSD/XAXcAx1QfICIWRkQlIiqtra11/ipmZrar6r0p749AzauZBtEJTJc0lezJerOBj1bVuRE4DbhK0iSy01LrJE0AtkTEi6n8eOAru3h8MzMbInX1LCQtTVdA9a9PkLRksDYR0QucBSwBfgVcHxGrJS2QdEqqtgTYJGkNcDvw2YjYBBxJdurr/lR+cf4qKjMzG16K2Gl+wJ0rSSsi4piiskaqVCrR1dXV6DDMzEYUScsjolJUr94xi22Sto8gS5pCjVlozcxs91Tv1VCfA34h6Wdp/QTSVUhmZrb7q3eA+xZJFbIEsRK4ieyKKDMzGwXqne7j08B8sstfVwL/GVjGjo9ZNTOz3VS9YxbzgT8FHouId5Pd87CxtKjMzKyp1JssXoiIFwAk7RkRDwGHlxeWmZk1k3oHuHvSfRY3AkslPYMfq2pmNmrUO8D9obR4oaTbgX2BW0qLyszMmkq9PYvtIuJnxbXMzGx38mqfwW1mZqOIk4WZmRVysjAzs0JOFmZmVsjJwszMCjlZmJlZIScLMzMrVGqykNQhaa2kbkk1H8sq6VRJayStlvS9XPkcSQ+n15wy4zQzs8Ht8k159ZLUAlwGzAR6gE5Ji/OPR5U0HTgfOD4inpH0xlS+P/AFoEL2kKXlqe0zZcVrZmYDK7NnMQPojoh1EfESsAiYVVXnTOCy/iQQEU+n8vcBSyNic9q2FOgoMVYzMxtEmcmiDXgit96TyvIOAw6TdJekeyR17EJbMzMbJqWdhgJUo6z6ud1jgenAiWQPVrpT0lvqbIukuaTHu7a3t+/UwMzMhkaZPYse4KDc+mR2nta8B7gpIl6OiEeAtWTJo562RMTCiKhERKW1tXVIgzczs1eUmSw6gemSpkraA5gNLK6qcyPwbgBJk8hOS60DlgAnS5ogaQJwciozM7MGKO00VET0SjqL7EO+BbgyIlZLWgB0RcRiXkkKa4A+4LMRsQlA0kVkCQdgQURsLitWMzMbnCJ2GgoYkSqVSnR1dTU6DDOzEUXS8oioFNXzHdxmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCpSYLSR2S1krqlnReje1nSNooaWV6fTq3rS9XXv04VjMzG0alPVZVUgtwGTAT6AE6JS2OiDVVVa+LiLNq7GJrRBxdVnxmZla/MnsWM4DuiFgXES8Bi4BZJR7PzMxKUmayaAOeyK33pLJqH5b0gKQfSDooVz5OUpekeyR9sMQ4zcysQJnJQjXKomr9R8CUiHgrcCtwdW5be3qI+EeBf5F06E4HkOamhNK1cePGoYrbzMyqlJkseoB8T2EysCFfISI2RcSLafUK4Njctg3p5zrgDuCY6gNExMKIqEREpbW1dWijNzOz7cpMFp3AdElTJe0BzAZ2uKpJ0ptyq6cAv0rlEyTtmZYnAccD1QPjZmY2TEq7GioieiWdBSwBWoArI2K1pAVAV0QsBs6WdArQC2wGzkjNjwS+KWkbWUK7uMZVVGZmNkwUUT2MMDJVKpXo6upqdBhmZiOKpOVpfHhQvoPbzMwKOVmY2YiyadMmzj77bDZt2tToUEYVJwszG1GuvvpqVq1axTXXXNPoUEYVJwszGzE2bdrELbfcQkRwyy23uHcxjJwszGzEuPrqq9m2bRsAfX197l0MIycLMxsxbr31Vnp7ewHo7e1l6dKlDY5o9HCyMLMR46STTmLs2Oz2sLFjxzJz5swGRzR6OFmY2YgxZ84cxozJPrZaWlo4/fTTGxzR6OFkYWYjxsSJE+no6EASHR0dTJw4sdEhjRqlTfdhZlaGOXPm8Oijj7pXMcycLMxsRJk4cSJf//rXGx3GqOPTUGZmVsjJwszMCjlZmJlZIScLMzMr5GRhZmaFSk0WkjokrZXULem8GtvPkLRR0sr0+nRu2xxJD6fXnDLjNDOzwZV26aykFuAyYCbQA3RKWlzj8ajXRcRZVW33B74AVIAAlqe2z5QVr5mZDazMnsUMoDsi1kXES8AiYFadbd8HLI2IzSlBLAU6SorTzMwKlHlTXhvwRG69B3h7jXoflnQC8Gvg3Ih4YoC2bWUFatbsLr30Urq7uxsaw/r169m6dWtDY2gm48ePp62t8R9L06ZNY968eaUfp8xkoRplUbX+I+DaiHhR0meAq4H31NkWSXOBuQDt7e2vLVqzJtbd3c3Dq1fQvndfw2Lo2zKGbX21/jRHp76Xn+XF3icbGsPjz7cM27HKTBY9wEG59cnAhnyFiMg/5uoK4Mu5tidWtb2j+gARsRBYCFCpVHZKJma7k/a9+/jfb3u20WFYE/niffsM27HKHLPoBKZLmippD2A2sDhfQdKbcqunAL9Ky0uAkyVNkDQBODmVmZlZA5TWs4iIXklnkX3ItwBXRsRqSQuArohYDJwt6RSgF9gMnJHabpZ0EVnCAVgQEZvLitWs2a1fv54/PtcyrN8krfk99lwLr1+/fliOVeqssxFxM3BzVdkFueXzgfMHaHslcGWZ8ZmZWX08RbnZCNDW1saLvU96zMJ28MX79mHPYboiy9N9mJlZIfcszEaIx5/3mAXAU1uy77gH7LWtwZE03uPPtzB9mI7lZGE2AkybNq3RITSNl9LNiXse7H+T6Qzfe8PJwmwEGI47dEeK+fPnA3DJJZc0OJLRxWMWZmZWyMnCzMwK+TSUmdWlGSYzBLbH0H86qlGGawK/ZuFkYWYjyvjx4xsdwqjkZGFmdRlN36JtZx6zMDOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFVJENDqGISFpI/BYo+PYjUwCftfoIMwG4Pfn0Dk4IlqLKu02ycKGlqSuiKg0Og6zWvz+HH4+DWVmZoWcLMzMrJCThQ1kYaMDMBuE35/DzGMWZmZWyD0LMzMr5GTRpCT1SVop6X5J90l6R8nHa5X0S0krJL0rVz5L0o259fMldefWPyBp8Ws47omSfvzqI7fhJOlrks7JrS+R9K3c+j9J+p+vYf8XSvq7qrITJS2rKhsr6SlJb9rF/e8n6a9fbXyjmZNF89oaEUdHxJ8A5wNfKvl47wUeiohjIuLOXPndwHG59eOAZyW9Ma2/A7ir3oNIannNkVoj3U32f46kMWT3O7w5t73u98MuvBd+DkyWNCVXdhLwYEQ8Wec++u0H7FKy8Hs242QxMuwDPAMgaW9JP029jVWSZvVXkvR/JD0kaamka6u/oaU6B6f2D6Sf7ZKOBr4CvD/1ZrY/tzIiNgJ/kDQtFbUBN5A+MNLPu9O+T0sxPSjpy7ljPi9pgaRfAsdJ6khx/gL4r7l6f5aOvzL1cN4wJP96NpTu4pX/+zcDDwLPSZogaU/gSGCFMl9N74VVkj4C23sJt0v6HrAqlX1O0lpJtwKHVx8wIrYB3wc+kiueDVyb2h8q6RZJyyXdKemIVH6ApB+m3vn9qXd+MXBoeo99td44Jb1e0k/Sfh7srzeqRIRfTfgC+oCVwEPAH4BjU/lYYJ+0PAnoBgRUUv3xwBuAh4G/q7HfHwFz0vIngRvT8hnANwaI5SrgdLI/5EVkvZCvpFieAcYBBwKPA62p/Dbgg6l9AKem5XHAE8D0FPf1wI9zsR2flvcGxjb6/8Gvmu+HR4F24C+BzwAXAe8Hjgd+nup8GFgKtAAHpPfGm4ATgT8CU1O9Y8mSxl5kX4q6B3jf/imwIi3vCTwNTEjrPwWmp+W3A7el5euAc9JyC7AvMIWsR8Iuxvlh4Ipcu30b/f8w3C/3LJpX/2moI4AO4BpJIvuA/aKkB4Bbyb7pHwC8E7gpIrZGxHNkH7y1HAd8Ly1/J7Ur0v9t8h3AMuBesj/KY4C1EfEC2R/zHRGxMSJ6ge8CJ6T2fWS9EYAjgEci4uHI/ur+reo4/yzpbGC/tB9rPtXvh2W59btTnXcC10ZEX0Q8BfyM7D0CcG9EPJKW3wX8MCK2RMSzQM3xr4joBPaWdDjwX4B7IuIZSXun435f0krgm2Qf9gDvAS5P7fsi4g81dl1vnKuAkyR9WdK7BtjXbs3JYgSIiGVkvYhW4GPp57ERcTTwFNm3db3a3ddRp/889TuAZSkZjSP79tV/fnqw478QEX1Fx4yIi4FPk/WO7uk/nWBNp//98J/ITkPdQ/YlJD9eMdj74Y9V6/Vev7+I7PTT9lNQZJ9hv09frPpfR9a5v7rjjIhf80ov6EuSLtiFY+wWnCxGgPSh2QJsIutKPx0RL0t6N3BwqvYL4AOSxqVvW38+wO7uJvtjgyzx/KKOENaQnWZ6F7Aila0kOwXR/03yl8CfSZqUBgRPI/uWVu0hYKqkQ9P6abnf89CIWBURXwa6yHoh1nzuAv4C2Jy+kW8mGzg+jqyXAdmg9EcktUhqJetl3ltjXz8HPiRpfBqj+sAgx70W+DhZj2ExQOqNPCLpvwOkMYg/SfV/CvxVKm+RtA/wHNlp2vzxC+OUdCCwJSL+DfhH4G2DxLlbGtvoAGxA41O3GrJvP3Miok/Sd4EfSerilTENIqJT2SWs95PNvttFNtZR7WzgSkmfBTYCnygKJCIiDU7vGxEvp+JlwFxSsoiIJyWdD9ye4r05Im6qsa8XJM0FfiLpd2TJ6i1p8zkpAfaRJaj/VxSbNcQqsp7u96rK9o6I/plgf0iWPO4n6zn8fUT8trq3GBH3SbqO7L38GJC/Eo+qumskbQGWR0S+d/Ix4HJJnwdeR9YDuR+YDyyU9Cmy99RfRcQySXdJepDs/fX39cRJ1ov6qqRtwMukJDSa+A7u3YikvSPieUl7kX1jmhsR9zU6LjMb+dyz2L0slHQU2XjC1U4UZjZU3LMwM7NCHuA2M7NCThZmZlbIycLMzAo5WdioJ+nu4lq7vM8pkj66q9vMmpWThY16EVHG9O9TgIESwmDbzJqSk4WNepKeTz9PlHSHpB+kWXG/m+bjQtKjaV6ge9NrWiq/StJ/q94X2eym70qzm55bdcgdtqWZUo/O7eMuSW9V9myH70i6TdLDks7M1fmspE5lswf/33L+Zcxe4WRhtqNjgHOAo4BDyGZS7fdsRMwAvgH8S8F+zgPuTHMVfa1g27fIZv1F0mHAnhHxQKr7VrKpW44DLpB0oKSTyWbtnQEcDRwr6QTMSuRkYbajeyOiJ7JnKKwkO2XU79rcz+OqG74G3wf+QtLryKaNvyq3rX8m4d+RTaUyAzg5vVYA95HNoTV9COMx24nv4Dbb0Yu55T52/BuJGsu9pC9d6ZTVHrt6wIjYImkpMAs4lezZJLWO2b8u4EsR8c1dPZbZq+WehVn9PpL72T+76qNkU1dD9mH/urRcPbtpXq1t3wK+DnSmWVz7zUozCU8kmxK+E1gCfDLNLoykNr3ymFuzUrhnYVa/PdPsu2N4ZWr1K4CbJN1LNiV2/2yoDwC9ku4Hrqoat9hpW0Qsl/Qs8O2qY94L/ITsyXQXRcQGYIOkI4Flafz9ebKpu58e4t/XbDvPDWVWB0mPApXcFNxDvf8DgTuAI9J4CZIuBJ6PiH8s45hmu8KnocwaTNLpZA+P+lx/ojBrNu5ZmJlZIfcszMyskJOFmZkVcrIwM7NCThZmZlbIycLMzAo5WZiZWaH/DxpHMDjNvWNTAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "boxplot(x='input type', y='accuracy', data=conv_scores_data_frame)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Just as before our results are alarming. We need to investigate why this can occur, it may be that we have made a mistake in generating our word vectors" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Recurrent Neural Networks\n", + "Let's also try this same experiment for Recurrent Neural Networks. We will use LSTM since this is known to be a good option for text classification. First with Bag of Words:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1600, 1, 20000)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batches = 1600\n", + "rnn_bow_data = np.array(np.split(bow_predictors, batches))\n", + "rnn_bow_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1600, 1)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rnn_bow_targets = np.array([[x] for x in labels])\n", + "rnn_bow_targets.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting with: (1440, 1, 20000) labels (1440, 1)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 3s 3ms/step - loss: 0.5428 - acc: 0.7637 - val_loss: 0.3637 - val_acc: 0.8799\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1343 - acc: 0.9861 - val_loss: 0.3267 - val_acc: 0.8753\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0618 - acc: 0.9990 - val_loss: 0.3146 - val_acc: 0.8891\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0396 - acc: 1.0000 - val_loss: 0.3132 - val_acc: 0.8915\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0288 - acc: 1.0000 - val_loss: 0.3127 - val_acc: 0.8868\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0224 - acc: 1.0000 - val_loss: 0.3150 - val_acc: 0.8868\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0180 - acc: 1.0000 - val_loss: 0.3180 - val_acc: 0.8891\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0150 - acc: 1.0000 - val_loss: 0.3211 - val_acc: 0.8938\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0127 - acc: 1.0000 - val_loss: 0.3256 - val_acc: 0.8938\n", + "160/160 [==============================] - 0s 237us/step\n", + "Fitting with: (1440, 1, 20000) labels (1440, 1)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 3s 3ms/step - loss: 0.5319 - acc: 0.7706 - val_loss: 0.3679 - val_acc: 0.8730\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1252 - acc: 0.9861 - val_loss: 0.3113 - val_acc: 0.8799\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0609 - acc: 0.9970 - val_loss: 0.3081 - val_acc: 0.8868\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0391 - acc: 1.0000 - val_loss: 0.3008 - val_acc: 0.8891\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.0281 - acc: 1.0000 - val_loss: 0.2969 - val_acc: 0.8915\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.0216 - acc: 1.0000 - val_loss: 0.2971 - val_acc: 0.8891\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.0174 - acc: 1.0000 - val_loss: 0.2988 - val_acc: 0.8961\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.0144 - acc: 1.0000 - val_loss: 0.3019 - val_acc: 0.8984\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 2s 1ms/step - loss: 0.0121 - acc: 1.0000 - val_loss: 0.3039 - val_acc: 0.8984\n", + "160/160 [==============================] - 0s 229us/step\n", + "Fitting with: (1440, 1, 20000) labels (1440, 1)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 3s 3ms/step - loss: 0.5196 - acc: 0.7607 - val_loss: 0.3464 - val_acc: 0.8915\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1028 - acc: 0.9791 - val_loss: 0.3072 - val_acc: 0.8938\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0453 - acc: 0.9990 - val_loss: 0.3003 - val_acc: 0.8915\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0286 - acc: 1.0000 - val_loss: 0.3003 - val_acc: 0.8891\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0203 - acc: 1.0000 - val_loss: 0.3056 - val_acc: 0.8868\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0156 - acc: 1.0000 - val_loss: 0.3105 - val_acc: 0.8845\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0125 - acc: 1.0000 - val_loss: 0.3130 - val_acc: 0.8845\n", + "160/160 [==============================] - 0s 224us/step\n", + "Fitting with: (1440, 1, 20000) labels (1440, 1)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 3s 3ms/step - loss: 0.5540 - acc: 0.7686 - val_loss: 0.4088 - val_acc: 0.8753\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1803 - acc: 0.9772 - val_loss: 0.3416 - val_acc: 0.8776\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0933 - acc: 0.9980 - val_loss: 0.3257 - val_acc: 0.8684\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0619 - acc: 0.9990 - val_loss: 0.3187 - val_acc: 0.8637\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0456 - acc: 1.0000 - val_loss: 0.3161 - val_acc: 0.8661\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0356 - acc: 1.0000 - val_loss: 0.3161 - val_acc: 0.8614\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0287 - acc: 1.0000 - val_loss: 0.3183 - val_acc: 0.8637\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0238 - acc: 1.0000 - val_loss: 0.3198 - val_acc: 0.8637\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0201 - acc: 1.0000 - val_loss: 0.3231 - val_acc: 0.8637\n", + "160/160 [==============================] - 0s 183us/step\n", + "Fitting with: (1440, 1, 20000) labels (1440, 1)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 4s 4ms/step - loss: 0.5290 - acc: 0.7696 - val_loss: 0.3542 - val_acc: 0.8822\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1171 - acc: 0.9861 - val_loss: 0.3179 - val_acc: 0.8707\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0541 - acc: 0.9990 - val_loss: 0.3179 - val_acc: 0.8753\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0348 - acc: 1.0000 - val_loss: 0.3170 - val_acc: 0.8684\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0251 - acc: 1.0000 - val_loss: 0.3195 - val_acc: 0.8684\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0194 - acc: 1.0000 - val_loss: 0.3243 - val_acc: 0.8684\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0156 - acc: 1.0000 - val_loss: 0.3283 - val_acc: 0.8661\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0130 - acc: 1.0000 - val_loss: 0.3334 - val_acc: 0.8684\n", + "160/160 [==============================] - 0s 214us/step\n", + "Fitting with: (1440, 1, 20000) labels (1440, 1)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 4s 4ms/step - loss: 0.5361 - acc: 0.7537 - val_loss: 0.3424 - val_acc: 0.8753\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1170 - acc: 0.9811 - val_loss: 0.3131 - val_acc: 0.8776\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0500 - acc: 0.9990 - val_loss: 0.3046 - val_acc: 0.8915\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0318 - acc: 1.0000 - val_loss: 0.3040 - val_acc: 0.8891\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0230 - acc: 1.0000 - val_loss: 0.3056 - val_acc: 0.8915\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0176 - acc: 1.0000 - val_loss: 0.3096 - val_acc: 0.8915\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0142 - acc: 1.0000 - val_loss: 0.3137 - val_acc: 0.8891\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0117 - acc: 1.0000 - val_loss: 0.3174 - val_acc: 0.8891\n", + "160/160 [==============================] - 0s 239us/step\n", + "Fitting with: (1440, 1, 20000) labels (1440, 1)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 4s 4ms/step - loss: 0.5679 - acc: 0.7428 - val_loss: 0.4064 - val_acc: 0.8684\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 2s 2ms/step - loss: 0.1829 - acc: 0.9762 - val_loss: 0.3367 - val_acc: 0.8799\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0931 - acc: 0.9990 - val_loss: 0.3038 - val_acc: 0.8915\n", + "Epoch 4/12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0615 - acc: 0.9990 - val_loss: 0.2955 - val_acc: 0.8915\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0450 - acc: 1.0000 - val_loss: 0.2853 - val_acc: 0.8961\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0350 - acc: 1.0000 - val_loss: 0.2822 - val_acc: 0.8984\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0282 - acc: 1.0000 - val_loss: 0.2824 - val_acc: 0.8961\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0234 - acc: 1.0000 - val_loss: 0.2832 - val_acc: 0.8961\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0198 - acc: 1.0000 - val_loss: 0.2830 - val_acc: 0.8984\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0170 - acc: 1.0000 - val_loss: 0.2855 - val_acc: 0.8938\n", + "160/160 [==============================] - 0s 230us/step\n", + "Fitting with: (1440, 1, 20000) labels (1440, 1)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 5s 4ms/step - loss: 0.5634 - acc: 0.7458 - val_loss: 0.4024 - val_acc: 0.8684\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1787 - acc: 0.9821 - val_loss: 0.3468 - val_acc: 0.8915\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0907 - acc: 0.9980 - val_loss: 0.3273 - val_acc: 0.8845\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0602 - acc: 1.0000 - val_loss: 0.3214 - val_acc: 0.8776\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0443 - acc: 1.0000 - val_loss: 0.3196 - val_acc: 0.8776\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0344 - acc: 1.0000 - val_loss: 0.3198 - val_acc: 0.8730\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0278 - acc: 1.0000 - val_loss: 0.3222 - val_acc: 0.8776\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0230 - acc: 1.0000 - val_loss: 0.3260 - val_acc: 0.8776\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0195 - acc: 1.0000 - val_loss: 0.3297 - val_acc: 0.8753\n", + "160/160 [==============================] - 0s 198us/step\n", + "Fitting with: (1440, 1, 20000) labels (1440, 1)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 5s 5ms/step - loss: 0.5229 - acc: 0.7746 - val_loss: 0.3663 - val_acc: 0.8661\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1295 - acc: 0.9821 - val_loss: 0.3030 - val_acc: 0.9007\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0640 - acc: 0.9970 - val_loss: 0.3018 - val_acc: 0.8938\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0416 - acc: 1.0000 - val_loss: 0.2985 - val_acc: 0.8891\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0301 - acc: 1.0000 - val_loss: 0.2996 - val_acc: 0.8915\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0234 - acc: 1.0000 - val_loss: 0.3038 - val_acc: 0.8868\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0188 - acc: 1.0000 - val_loss: 0.3071 - val_acc: 0.8915\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0156 - acc: 1.0000 - val_loss: 0.3115 - val_acc: 0.8915\n", + "160/160 [==============================] - 0s 209us/step\n", + "Fitting with: (1440, 1, 20000) labels (1440, 1)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 4s 4ms/step - loss: 0.5431 - acc: 0.7468 - val_loss: 0.3233 - val_acc: 0.8961\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.1069 - acc: 0.9821 - val_loss: 0.3107 - val_acc: 0.8684\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0441 - acc: 0.9990 - val_loss: 0.2851 - val_acc: 0.8845\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0272 - acc: 1.0000 - val_loss: 0.2836 - val_acc: 0.8961\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0198 - acc: 1.0000 - val_loss: 0.2866 - val_acc: 0.8868\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0154 - acc: 1.0000 - val_loss: 0.2890 - val_acc: 0.8915\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0124 - acc: 1.0000 - val_loss: 0.2920 - val_acc: 0.8915\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 1s 1ms/step - loss: 0.0104 - acc: 1.0000 - val_loss: 0.2961 - val_acc: 0.8915\n", + "160/160 [==============================] - 0s 196us/step\n" + ] + } + ], + "source": [ + "def get_rnn_bow_model():\n", + " model = Sequential([\n", + " LSTM(8, input_shape=(1, 20000)),\n", + " Dense(1, activation='sigmoid')\n", + " ])\n", + " model.compile(\n", + " loss='binary_crossentropy',\n", + " optimizer='adam',\n", + " metrics=['accuracy'])\n", + " return model\n", + "\n", + "rnn_bow_scores = run_cross_validate(get_rnn_bow_model, rnn_bow_data, rnn_bow_targets, cv=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And with Word Vectors:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 62s 62ms/step - loss: 0.6888 - acc: 0.5402 - val_loss: 0.6834 - val_acc: 0.5404\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 57s 57ms/step - loss: 0.6733 - acc: 0.5909 - val_loss: 0.6546 - val_acc: 0.6236\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 56s 56ms/step - loss: 0.6554 - acc: 0.6038 - val_loss: 0.6469 - val_acc: 0.6189\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 60s 59ms/step - loss: 0.6487 - acc: 0.6187 - val_loss: 0.6443 - val_acc: 0.6259\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 58s 58ms/step - loss: 0.6480 - acc: 0.6137 - val_loss: 0.6396 - val_acc: 0.6351\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 61s 60ms/step - loss: 0.6374 - acc: 0.6266 - val_loss: 0.6375 - val_acc: 0.6397\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 62s 62ms/step - loss: 0.6345 - acc: 0.6316 - val_loss: 0.6285 - val_acc: 0.6513\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 58s 57ms/step - loss: 0.6259 - acc: 0.6455 - val_loss: 0.6333 - val_acc: 0.6420\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 64s 63ms/step - loss: 0.6282 - acc: 0.6445 - val_loss: 0.6301 - val_acc: 0.6582\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 65s 65ms/step - loss: 0.6379 - acc: 0.6256 - val_loss: 0.6290 - val_acc: 0.6513\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 58s 58ms/step - loss: 0.6169 - acc: 0.6465 - val_loss: 0.6272 - val_acc: 0.6467\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 58s 58ms/step - loss: 0.6149 - acc: 0.6534 - val_loss: 0.6222 - val_acc: 0.6605\n", + "160/160 [==============================] - 0s 3ms/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 67s 67ms/step - loss: 0.6884 - acc: 0.5353 - val_loss: 0.6811 - val_acc: 0.6005\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 60s 59ms/step - loss: 0.6717 - acc: 0.5968 - val_loss: 0.6534 - val_acc: 0.6189\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 64s 64ms/step - loss: 0.6515 - acc: 0.6157 - val_loss: 0.6452 - val_acc: 0.6189\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 64s 63ms/step - loss: 0.6432 - acc: 0.6226 - val_loss: 0.6378 - val_acc: 0.6305\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 64s 64ms/step - loss: 0.6430 - acc: 0.6157 - val_loss: 0.6500 - val_acc: 0.5889\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 91s 90ms/step - loss: 0.6342 - acc: 0.6326 - val_loss: 0.6359 - val_acc: 0.6097\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 65s 64ms/step - loss: 0.6276 - acc: 0.6395 - val_loss: 0.6327 - val_acc: 0.6536\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 67s 66ms/step - loss: 0.6332 - acc: 0.6346 - val_loss: 0.6309 - val_acc: 0.6420\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 71s 71ms/step - loss: 0.6214 - acc: 0.6316 - val_loss: 0.6358 - val_acc: 0.6166\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 72s 72ms/step - loss: 0.6120 - acc: 0.6524 - val_loss: 0.6467 - val_acc: 0.6420\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 70s 70ms/step - loss: 0.6185 - acc: 0.6524 - val_loss: 0.6225 - val_acc: 0.6559\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 71s 70ms/step - loss: 0.6102 - acc: 0.6475 - val_loss: 0.6225 - val_acc: 0.6467\n", + "160/160 [==============================] - 1s 3ms/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 71s 70ms/step - loss: 0.6903 - acc: 0.5223 - val_loss: 0.6819 - val_acc: 0.5196\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 75s 74ms/step - loss: 0.6711 - acc: 0.5869 - val_loss: 0.6466 - val_acc: 0.6236\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 69s 69ms/step - loss: 0.6464 - acc: 0.6177 - val_loss: 0.6375 - val_acc: 0.6328\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 70s 69ms/step - loss: 0.6456 - acc: 0.6316 - val_loss: 0.6343 - val_acc: 0.6305\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 66s 66ms/step - loss: 0.6339 - acc: 0.6236 - val_loss: 0.6691 - val_acc: 0.5982\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 65s 65ms/step - loss: 0.6251 - acc: 0.6504 - val_loss: 0.6209 - val_acc: 0.6721\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 70s 69ms/step - loss: 0.6196 - acc: 0.6405 - val_loss: 0.6202 - val_acc: 0.6674\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 63s 63ms/step - loss: 0.6187 - acc: 0.6624 - val_loss: 0.6122 - val_acc: 0.6582\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 83s 82ms/step - loss: 0.6152 - acc: 0.6673 - val_loss: 0.6368 - val_acc: 0.6420\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 75s 74ms/step - loss: 0.6272 - acc: 0.6524 - val_loss: 0.6318 - val_acc: 0.6259\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 75s 74ms/step - loss: 0.6005 - acc: 0.6713 - val_loss: 0.6140 - val_acc: 0.6628\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 85s 84ms/step - loss: 0.5985 - acc: 0.6842 - val_loss: 0.6047 - val_acc: 0.6836\n", + "160/160 [==============================] - 0s 3ms/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 88s 87ms/step - loss: 0.6866 - acc: 0.5362 - val_loss: 0.6726 - val_acc: 0.5635\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 83s 83ms/step - loss: 0.6628 - acc: 0.5988 - val_loss: 0.6554 - val_acc: 0.6051\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 71s 70ms/step - loss: 0.6488 - acc: 0.6087 - val_loss: 0.6522 - val_acc: 0.5935\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 75s 75ms/step - loss: 0.6505 - acc: 0.6077 - val_loss: 0.6587 - val_acc: 0.5958\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 71s 70ms/step - loss: 0.6428 - acc: 0.6266 - val_loss: 0.6429 - val_acc: 0.6051\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 67s 67ms/step - loss: 0.6415 - acc: 0.6276 - val_loss: 0.6421 - val_acc: 0.6074\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 68s 68ms/step - loss: 0.6374 - acc: 0.6286 - val_loss: 0.6412 - val_acc: 0.6236\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 71s 70ms/step - loss: 0.6351 - acc: 0.6266 - val_loss: 0.6380 - val_acc: 0.6305\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 64s 63ms/step - loss: 0.6293 - acc: 0.6346 - val_loss: 0.6298 - val_acc: 0.6328\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 60s 60ms/step - loss: 0.6218 - acc: 0.6425 - val_loss: 0.6497 - val_acc: 0.6143\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 66s 66ms/step - loss: 0.6171 - acc: 0.6524 - val_loss: 0.6061 - val_acc: 0.6767\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 72s 72ms/step - loss: 0.6143 - acc: 0.6524 - val_loss: 0.6134 - val_acc: 0.6628\n", + "160/160 [==============================] - 0s 3ms/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 82s 81ms/step - loss: 0.6987 - acc: 0.4826 - val_loss: 0.6887 - val_acc: 0.5727\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 67s 67ms/step - loss: 0.6840 - acc: 0.5770 - val_loss: 0.6715 - val_acc: 0.6028\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 67s 66ms/step - loss: 0.6641 - acc: 0.6018 - val_loss: 0.6541 - val_acc: 0.6005\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 66s 66ms/step - loss: 0.6446 - acc: 0.6266 - val_loss: 0.6315 - val_acc: 0.6490\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 67s 67ms/step - loss: 0.6442 - acc: 0.6137 - val_loss: 0.6301 - val_acc: 0.6420\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 64s 64ms/step - loss: 0.6468 - acc: 0.6216 - val_loss: 0.6516 - val_acc: 0.6328\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 64s 64ms/step - loss: 0.6385 - acc: 0.6216 - val_loss: 0.6304 - val_acc: 0.6328\n", + "Epoch 8/12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1007/1007 [==============================] - 65s 64ms/step - loss: 0.6331 - acc: 0.6485 - val_loss: 0.6256 - val_acc: 0.6582\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 65s 64ms/step - loss: 0.6318 - acc: 0.6435 - val_loss: 0.6302 - val_acc: 0.6282\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 66s 65ms/step - loss: 0.6230 - acc: 0.6455 - val_loss: 0.6141 - val_acc: 0.6697\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 64s 64ms/step - loss: 0.6309 - acc: 0.6475 - val_loss: 0.6288 - val_acc: 0.6374\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 65s 64ms/step - loss: 0.6283 - acc: 0.6395 - val_loss: 0.6318 - val_acc: 0.6236\n", + "160/160 [==============================] - 0s 3ms/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 69s 68ms/step - loss: 0.6893 - acc: 0.5392 - val_loss: 0.6756 - val_acc: 0.6005\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 65s 65ms/step - loss: 0.6760 - acc: 0.5859 - val_loss: 0.6496 - val_acc: 0.6212\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 64s 64ms/step - loss: 0.6484 - acc: 0.6127 - val_loss: 0.6219 - val_acc: 0.6490\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 63s 62ms/step - loss: 0.6364 - acc: 0.6445 - val_loss: 0.6187 - val_acc: 0.6536\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 61s 61ms/step - loss: 0.6387 - acc: 0.6286 - val_loss: 0.6261 - val_acc: 0.6212\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 61s 61ms/step - loss: 0.6376 - acc: 0.6375 - val_loss: 0.6137 - val_acc: 0.6490\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 62s 61ms/step - loss: 0.6285 - acc: 0.6385 - val_loss: 0.6234 - val_acc: 0.6513\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 63s 63ms/step - loss: 0.6304 - acc: 0.6455 - val_loss: 0.6402 - val_acc: 0.6120\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 63s 63ms/step - loss: 0.6238 - acc: 0.6326 - val_loss: 0.6097 - val_acc: 0.6721\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 67s 67ms/step - loss: 0.6117 - acc: 0.6554 - val_loss: 0.6064 - val_acc: 0.6697\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 69s 68ms/step - loss: 0.6045 - acc: 0.6634 - val_loss: 0.6313 - val_acc: 0.6328\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 64s 64ms/step - loss: 0.6021 - acc: 0.6643 - val_loss: 0.5917 - val_acc: 0.6790\n", + "160/160 [==============================] - 0s 2ms/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 72s 71ms/step - loss: 0.6910 - acc: 0.5194 - val_loss: 0.6821 - val_acc: 0.5635\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 68s 67ms/step - loss: 0.6755 - acc: 0.5899 - val_loss: 0.6486 - val_acc: 0.6189\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 59s 59ms/step - loss: 0.6484 - acc: 0.6356 - val_loss: 0.6686 - val_acc: 0.5982\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 61s 61ms/step - loss: 0.6367 - acc: 0.6375 - val_loss: 0.6529 - val_acc: 0.5912\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 66s 65ms/step - loss: 0.6339 - acc: 0.6524 - val_loss: 0.6297 - val_acc: 0.6513\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 63s 63ms/step - loss: 0.6282 - acc: 0.6415 - val_loss: 0.6270 - val_acc: 0.6420\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 64s 63ms/step - loss: 0.6245 - acc: 0.6465 - val_loss: 0.6343 - val_acc: 0.6490\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 59s 58ms/step - loss: 0.6247 - acc: 0.6564 - val_loss: 0.6331 - val_acc: 0.6536\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 57s 56ms/step - loss: 0.6143 - acc: 0.6673 - val_loss: 0.6305 - val_acc: 0.6605\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 72s 72ms/step - loss: 0.6135 - acc: 0.6653 - val_loss: 0.6333 - val_acc: 0.6374\n", + "160/160 [==============================] - 1s 5ms/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 96s 95ms/step - loss: 0.6945 - acc: 0.5134 - val_loss: 0.6855 - val_acc: 0.5820\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 80s 80ms/step - loss: 0.6778 - acc: 0.5770 - val_loss: 0.6553 - val_acc: 0.6212\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 59s 59ms/step - loss: 0.6453 - acc: 0.6147 - val_loss: 0.6244 - val_acc: 0.6536\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 65s 64ms/step - loss: 0.6669 - acc: 0.6068 - val_loss: 0.6432 - val_acc: 0.6212\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 82s 82ms/step - loss: 0.6385 - acc: 0.6276 - val_loss: 0.6249 - val_acc: 0.6443\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 83s 82ms/step - loss: 0.6359 - acc: 0.6266 - val_loss: 0.6222 - val_acc: 0.6628\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 85s 84ms/step - loss: 0.6291 - acc: 0.6346 - val_loss: 0.6182 - val_acc: 0.6744\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 88s 88ms/step - loss: 0.6293 - acc: 0.6425 - val_loss: 0.6231 - val_acc: 0.6582\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 85s 84ms/step - loss: 0.6234 - acc: 0.6266 - val_loss: 0.6308 - val_acc: 0.6236\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 81s 80ms/step - loss: 0.6269 - acc: 0.6336 - val_loss: 0.6256 - val_acc: 0.6282\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 83s 82ms/step - loss: 0.6180 - acc: 0.6435 - val_loss: 0.6045 - val_acc: 0.6767\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 78s 78ms/step - loss: 0.6172 - acc: 0.6455 - val_loss: 0.5987 - val_acc: 0.6651\n", + "160/160 [==============================] - 0s 2ms/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 75s 75ms/step - loss: 0.6898 - acc: 0.5392 - val_loss: 0.6768 - val_acc: 0.6120\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 67s 66ms/step - loss: 0.6667 - acc: 0.5958 - val_loss: 0.6455 - val_acc: 0.6143\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 66s 65ms/step - loss: 0.6447 - acc: 0.6028 - val_loss: 0.6323 - val_acc: 0.6282\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 72s 71ms/step - loss: 0.6346 - acc: 0.6286 - val_loss: 0.6324 - val_acc: 0.6328\n", + "Epoch 5/12\n", + "1007/1007 [==============================] - 79s 79ms/step - loss: 0.6277 - acc: 0.6296 - val_loss: 0.6252 - val_acc: 0.6374\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 76s 76ms/step - loss: 0.6265 - acc: 0.6365 - val_loss: 0.6373 - val_acc: 0.6328\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 68s 67ms/step - loss: 0.6225 - acc: 0.6336 - val_loss: 0.6263 - val_acc: 0.6513\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 83s 83ms/step - loss: 0.6069 - acc: 0.6435 - val_loss: 0.6160 - val_acc: 0.6674\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 74s 73ms/step - loss: 0.6150 - acc: 0.6495 - val_loss: 0.6174 - val_acc: 0.6236\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 71s 71ms/step - loss: 0.6167 - acc: 0.6395 - val_loss: 0.6224 - val_acc: 0.6490\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 74s 73ms/step - loss: 0.6047 - acc: 0.6733 - val_loss: 0.6486 - val_acc: 0.6120\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 69s 68ms/step - loss: 0.6026 - acc: 0.6624 - val_loss: 0.6058 - val_acc: 0.6559\n", + "160/160 [==============================] - 0s 3ms/step\n", + "Fitting with: (1440, 784) labels (1440,)\n", + "Train on 1007 samples, validate on 433 samples\n", + "Epoch 1/12\n", + "1007/1007 [==============================] - 81s 80ms/step - loss: 0.6927 - acc: 0.5074 - val_loss: 0.6830 - val_acc: 0.5820\n", + "Epoch 2/12\n", + "1007/1007 [==============================] - 79s 78ms/step - loss: 0.6829 - acc: 0.5591 - val_loss: 0.6628 - val_acc: 0.6212\n", + "Epoch 3/12\n", + "1007/1007 [==============================] - 74s 74ms/step - loss: 0.6681 - acc: 0.5958 - val_loss: 0.6378 - val_acc: 0.6536\n", + "Epoch 4/12\n", + "1007/1007 [==============================] - 73s 72ms/step - loss: 0.6509 - acc: 0.6236 - val_loss: 0.6282 - val_acc: 0.6443\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 5/12\n", + "1007/1007 [==============================] - 66s 65ms/step - loss: 0.6414 - acc: 0.6207 - val_loss: 0.6306 - val_acc: 0.6374\n", + "Epoch 6/12\n", + "1007/1007 [==============================] - 77s 77ms/step - loss: 0.6386 - acc: 0.6286 - val_loss: 0.6189 - val_acc: 0.6420\n", + "Epoch 7/12\n", + "1007/1007 [==============================] - 80s 79ms/step - loss: 0.6361 - acc: 0.6395 - val_loss: 0.6182 - val_acc: 0.6513\n", + "Epoch 8/12\n", + "1007/1007 [==============================] - 81s 80ms/step - loss: 0.6230 - acc: 0.6614 - val_loss: 0.6216 - val_acc: 0.6282\n", + "Epoch 9/12\n", + "1007/1007 [==============================] - 78s 78ms/step - loss: 0.6183 - acc: 0.6475 - val_loss: 0.6271 - val_acc: 0.6536\n", + "Epoch 10/12\n", + "1007/1007 [==============================] - 78s 77ms/step - loss: 0.6245 - acc: 0.6475 - val_loss: 0.6002 - val_acc: 0.6697\n", + "Epoch 11/12\n", + "1007/1007 [==============================] - 82s 81ms/step - loss: 0.6083 - acc: 0.6683 - val_loss: 0.5864 - val_acc: 0.6813\n", + "Epoch 12/12\n", + "1007/1007 [==============================] - 78s 78ms/step - loss: 0.6335 - acc: 0.6187 - val_loss: 0.6513 - val_acc: 0.5958\n", + "160/160 [==============================] - 0s 3ms/step\n" + ] + } + ], + "source": [ + "def get_rnn_wv_model():\n", + " model = Sequential([\n", + " Embedding(corpus_vocab_size, embedding_length, weights=[embedding_matrix], input_length=max_sequence_length,\n", + " trainable=False),\n", + " LSTM(8),\n", + " Dense(1, activation='sigmoid')\n", + " ])\n", + " model.compile(\n", + " loss='binary_crossentropy',\n", + " optimizer='adam',\n", + " metrics=['accuracy'])\n", + " return model\n", + "\n", + "rnn_wv_scores = run_cross_validate(get_rnn_wv_model, predictors_sequences, labels, cv=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bag of words: [0.89375, 0.86875, 0.88125, 0.89375, 0.86875, 0.875, 0.8875, 0.9125, 0.85625, 0.86875]\n", + "Word vectors: [0.7125, 0.65, 0.68125, 0.68125, 0.6375, 0.6, 0.59375, 0.74375, 0.625, 0.59375]\n" + ] + } + ], + "source": [ + "print (\"Bag of words: \", rnn_bow_scores['accuracies'])\n", + "print (\"Word vectors: \", rnn_wv_scores['accuracies'])\n", + "\n", + "rnn_scores_entries =[('Bag of Words', x) for x in rnn_bow_scores['accuracies']] + [('Word Vectors', x) for x in rnn_wv_scores['accuracies']]\n", + "rnn_scores_data_frame = DataFrame(rnn_scores_entries, columns=['input type', 'accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAGxRJREFUeJzt3XuUVeWd5vHvYxkV26AYKq62EEELb7m0xmoyapto0hra7gxJZ8ZAktXmJp10S9B07NFORm2cUXOZdhjiskNcxI4dJSTpKJ0wYfCWGMVIIXiBFj3BW4FRInhB8EL5mz/2e2RzqKq9QTbnUPV81jqrzt7n3Xv/Ck6d57z78m5FBGZmZgPZo9kFmJlZ63NYmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkV2rPZBewsI0eOjDFjxjS7DDOz3cqSJUt+HxHtRe0GTViMGTOG7u7uZpdhZrZbkfR4mXbeDWVmZoUcFmZmVshhYWZmhRwWZmZWyGFhZmaFHBZmZlbIYWFmZoUGzXUWg8XMmTOp1WpNrWH16tUAdHR0NLUOgM7OTqZOndrsMsyGPIeFbWPTpk3NLsHMWozDosW0wrfoadOmATBjxowmV2JmrcLHLMzMrJDDwszMClUaFpImSFopqSbpgj5eP1TSLZLul3S7pFG5186S9Eh6nFVlnWZmNrDKwkJSG3AV8GfAMcBkScc0NPsW8P2IeDcwHbg8LXsgcDHwXmA8cLGkEVXVamZmA6uyZzEeqEXEqoh4FZgDTGxocwxwS3p+W+71DwELI2JdRKwHFgITKqzVzMwGUGVYdABP5qZ70ry8+4CPpecfBd4q6W0llzUzs12kyrBQH/OiYforwPslLQXeD6wGNpdcFklTJHVL6l67du2brdfMzPpRZVj0AIfkpkcBa/INImJNRPxlRBwHfDXNe77MsqntrIjoioiu9vbCuwKamdkOqjIsFgPjJI2VtBcwCZiXbyBppKR6DRcCs9PzBcDpkkakA9unp3lmZtYElYVFRGwGziH7kP8PYG5ELJc0XdJ/Ts1OAVZKehg4CPifadl1wKVkgbMYmJ7mmZlZEyhim0MBu6Wurq7o7u7e4eVbYQC/VlH/d+js7GxyJa3BgxnaYCZpSUR0FbXz2FBJrVZj2YP/Qe++Bza7lKbb49XsC8SSVU83uZLma9voDq0ZOCy20rvvgWw66oxml2EtZNhD85tdgllL8NhQZmZWyGFhZmaFHBZmZlbIYWFmZoUcFmZmVshhYWZmhXzqbLJ69WraNj7vUyVtK20bn2X16s3NLsOs6dyzMDOzQu5ZJB0dHfzulT19UZ5tZdhD8+noOKjZZZg1nXsWZmZWyGFhZmaFvBsqp23jOh/gBvZ4+QUAXt9neJMrab5sIEHvhjJzWCQejnuLWu1FADoP84ckHOT3hhkOizf4fgVbTJs2DYAZM2Y0uRIzaxU+ZmFmZoUcFmZmVshhYWZmhRwWZmZWyGFhZmaFfDZUi5k5cya1Wq2pNdS3Xz8rqpk6Ozt9pppZC6i0ZyFpgqSVkmqSLujj9dGSbpO0VNL9ks5I88dI2iRpWXr8c5V12taGDRvGsGHDml2GmbWQynoWktqAq4DTgB5gsaR5EbEi1+xrwNyIuFrSMcB8YEx67bcRcWxV9bUqf4s2s1ZUZc9iPFCLiFUR8SowB5jY0CaA+pgS+wNrKqzHzMx2UJVh0QE8mZvuSfPyLgE+JamHrFeR/1o9Nu2e+qWkkyus08zMClQZFupjXjRMTwaujYhRwBnAdZL2AJ4CRkfEccCXgeslbTOqnaQpkrolda9du3Ynl29mZnVVhkUPcEhuehTb7mb6HDAXICIWAfsAIyPilYh4Ns1fAvwWOKJxAxExKyK6IqKrvb29gl/BzMyg2rBYDIyTNFbSXsAkYF5DmyeADwJIOposLNZKak8HyJF0GDAOWFVhrWZmNoDKzoaKiM2SzgEWAG3A7IhYLmk60B0R84C/A74r6TyyXVSfjoiQ9D5guqTNQC/whYhYV1WtZmY2MEU0HkbYPXV1dUV3d3ezyzAz261IWhIRXUXtPNyHmZkVcliYmVkhh4WZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWaFKw0LSBEkrJdUkXdDH66Ml3SZpqaT7JZ2Re+3CtNxKSR+qsk4zMxvYnlWtWFIbcBVwGtADLJY0LyJW5Jp9DZgbEVdLOgaYD4xJzycB7wAOBm6WdERE9FZVr5mZ9a/KnsV4oBYRqyLiVWAOMLGhTQDD0/P9gTXp+URgTkS8EhGPArW0PjMza4Iqw6IDeDI33ZPm5V0CfEpSD1mvYup2LGtmZrtIlWGhPuZFw/Rk4NqIGAWcAVwnaY+SyyJpiqRuSd1r16590wWbmVnfqgyLHuCQ3PQotuxmqvscMBcgIhYB+wAjSy5LRMyKiK6I6Gpvb9+JpZuZWV6VYbEYGCdprKS9yA5Yz2to8wTwQQBJR5OFxdrUbpKkvSWNBcYB91RYq5mZDaCys6EiYrOkc4AFQBswOyKWS5oOdEfEPODvgO9KOo9sN9OnIyKA5ZLmAiuAzcDf+kwoM7PmUfbZvPvr6uqK7u7uZpdhZrZbkbQkIrqK2vkKbjMzK+SwMDOzQg4LMzMr5LAwM7NCpcJC0k8k/Xm6YM7MzIaYsh/+VwOfAB6RdIWkoyqsyczMWkypsIiImyPik8B7gMeAhZLukvQZSW+pskAzM2u+0ruVJL0N+DTweWApMIMsPBZWUpmZmbWMUldwS/o34CjgOuDDEfFUeumHknwlnJnZIFd2uI9vR8Stfb1Q5so/MzPbvZXdDXW0pAPqE5JGSPqbimoyM7MWUzYszo6I5+oTEbEeOLuakszMrNWUDYs9JL1xQ6J0f+29qinJzMxaTdljFguAuZL+mWwo8S8Av6isKjMzayllw+K/AX8NfJHslqf/D7imqqLMzKy1lAqLiHid7Cruq6stx8zMWlHZ6yzGAZcDx5Dd+hSAiDisorrMzKyFlD3A/T2yXsVm4FTg+2QX6JmZ2RBQNiyGRcQtZLdhfTwiLgE+UF1ZZmbWSsoe4H45DU/+iKRzgNXA26sry8zMWknZnsW5wL7Al4DjgU8BZ1VVlJmZtZbCnkW6AO/MiDgf2AB8pvKqzMyspRT2LCKiFzg+fwV3WZImSFopqSbpgj5ev1LSsvR4WNJzudd6c6/N295tm5nZzlP2mMVS4CZJPwJeqs+MiH/rb4HUI7kKOA3oARZLmhcRK3LLn5drPxU4LreKTRFxbMn6zMysQmXD4kDgWbY+AyqAfsMCGA/UImIVgKQ5wERgRT/tJwMXl6zHzMx2obJXcO/IcYoO4MncdA/w3r4aSjoUGAvk75mxT7qx0mbgioi4cQdqMDOznaDsFdzfI+tJbCUiPjvQYn3M22YdySTgx+n4SN3oiFgj6TDgVkkPRMRvG+qaAkwBGD169EC/gpmZvQllT539GfDz9LgFGE52ZtRAeoBDctOjgDX9tJ0E3JCfERFr0s9VwO1sfTyj3mZWRHRFRFd7e3vxb2FmZjuk7G6on+SnJd0A3Fyw2GJgnKSxZBfxTQI+0dhI0pHACGBRbt4IYGNEvCJpJHAS8I0ytZqZ2c5X9gB3o3HAgPt9ImJzutp7AdAGzI6I5ZKmA90RUT8ddjIwJyLyu6iOBr4j6XWy3s8V+bOozGzXmzlzJrVardllsHr1agA6OjqaWkdnZydTp05tag27UtljFi+y9fGG35Hd42JAETEfmN8w76KG6Uv6WO4u4F1lajOzoWXTpk3NLmFIKrsb6q1VF2Jmra1VvkVPmzYNgBkzZjS5kqGl1AFuSR+VtH9u+gBJH6muLDMzayVlz4a6OCKer09ExHP4AjozsyGjbFj01W5HD46bmdlupmxYdEv6J0mHSzpM0pXAkioLMzOz1lE2LKYCrwI/BOYCm4C/raooMzNrLWXPhnoJ2GaIcTMzGxrKng21UNIBuekRkhZUV5aZmbWSsruhRqYzoACIiPX4HtxmZkNG2bB4XdIbw3tIGkP/I8iamdkgU/b0168Cv5b0yzT9PtLQ4GZmNviVPcD9C0ldZAGxDLiJ7IwoMzMbAsoOJPh5YBrZPSmWAf+JbEjxDwy0nJmZDQ5lj1lMA/4YeDwiTiW7EdHayqoyM7OWUjYsXo6IlwEk7R0RDwFHVleWmZm1krIHuHvSdRY3Agslraf/W6SamdkgU/YA90fT00sk3QbsD/yisqrMzKylbPfIsRHxy+JWZmY2mJQ9ZmFmZkOYw8LMzAo5LMzMrJDDwszMClUaFpImSFopqSZpm/thSLpS0rL0eFjSc7nXzpL0SHqcVWWdZmY2sMruoy2pDbgKOA3oARZLmhcRK+ptIuK8XPupZFeGI+lA4GKgi2x02yVp2fVV1WtmZv2rsmcxHqhFxKqIeBWYA0wcoP1k4Ib0/EPAwohYlwJiITChwlrNzGwAVYZFB/BkbronzduGpEOBscCt27usmZlVr8qwUB/z+rth0iTgxxHRuz3LSpoiqVtS99q1HtfQzKwqVYZFD3BIbnoU/Y8nNYktu6BKLxsRsyKiKyK62tvb32S5ZmbWnyrDYjEwTtJYSXuRBcK8xkaSjgRGkN0fo24BcLqkEZJGAKeneWZm1gSVnQ0VEZslnUP2Id8GzI6I5ZKmA90RUQ+OycCciIjcsuskXUoWOADTI2JdVbWamdnAKgsLgIiYD8xvmHdRw/Ql/Sw7G5hdWXFmZlaar+A2M7NCDgszMyvksDAzs0IOCzMzK+SwMDOzQg4LMzMrVOmps2a2c8ycOZNardbsMlpC/d9h2rRpTa6kNXR2djJ16tTKt+OwMNsN1Go1Hlm+lNH79RY3HuT2ei3bIfLK491NrqT5ntjQtsu25bAw202M3q+Xf3jPC80uw1rIZfcO32Xb8jELMzMr5LAwM7NCDgszMyvksDAzs0IOCzMzK+SwMDOzQg4LMzMr5LAwM7NCDgszMyvksDAzs0IOCzMzK+SwMDOzQg4LMzMrVGlYSJogaaWkmqQL+mlzpqQVkpZLuj43v1fSsvSYV2WdZmY2sMqGKJfUBlwFnAb0AIslzYuIFbk244ALgZMiYr2kt+dWsSkijq2qPjMzK6/KnsV4oBYRqyLiVWAOMLGhzdnAVRGxHiAinqmwHjMz20FVhkUH8GRuuifNyzsCOELSnZLuljQh99o+krrT/I9UWKeZmRWo8k556mNe9LH9ccApwCjgDknvjIjngNERsUbSYcCtkh6IiN9utQFpCjAFYPTo0Tu7fjMzS6rsWfQAh+SmRwFr+mhzU0S8FhGPAivJwoOIWJN+rgJuB45r3EBEzIqIrojoam9v3/m/gZmZAdWGxWJgnKSxkvYCJgGNZzXdCJwKIGkk2W6pVZJGSNo7N/8kYAVmZtYUle2GiojNks4BFgBtwOyIWC5pOtAdEfPSa6dLWgH0AudHxLOSTgS+I+l1skC7In8WldlQs3r1al56sY3L7h3e7FKshTz+Yht/sHr1LtlWlccsiIj5wPyGeRflngfw5fTIt7kLeFeVtZmZWXmVhoWZ7RwdHR28svkp/uE9LzS7FGshl907nL07Gk8yrYaH+zAzs0IOCzMzK+SwMDOzQg4LMzMr5LAwM7NCDgszMyvksDAzs0IOCzMzK+SwMDOzQg4LMzMr5LAwM7NCDgszMyvksDAzs0IOCzMzK+SwMDOzQg4LMzMr5LAwM7NCDgszMyvksDAzs0IOCzMzK7Rnswsws3Ke2NDGZfcOb3YZTff0xuw77kH7vt7kSprviQ1tjNtF26o0LCRNAGYAbcA1EXFFH23OBC4BArgvIj6R5p8FfC01+x8R8S9V1mrWyjo7O5tdQst4tVYDYO9D/W8yjl333lBEVLNiqQ14GDgN6AEWA5MjYkWuzThgLvCBiFgv6e0R8YykA4FuoIssRJYAx0fE+v6219XVFd3d3ZX8LmbWOqZNmwbAjBkzmlzJ4CBpSUR0FbWr8pjFeKAWEasi4lVgDjCxoc3ZwFX1EIiIZ9L8DwELI2Jdem0hMKHCWs3MbABVhkUH8GRuuifNyzsCOELSnZLuTrutyi5rZma7SJXHLNTHvMZ9XnuS7XY7BRgF3CHpnSWXRdIUYArA6NGj30ytZmY2gCp7Fj3AIbnpUcCaPtrcFBGvRcSjwEqy8CizLBExKyK6IqKrvb19pxZvZmZbVBkWi4FxksZK2guYBMxraHMjcCqApJFku6VWAQuA0yWNkDQCOD3NMzOzJqhsN1REbJZ0DtmHfBswOyKWS5oOdEfEPLaEwgqgFzg/Ip4FkHQpWeAATI+IdVXVambFZs6cSS2dttpM9RrqZ0U1S2dnJ1OnTm1qDbtSpddZRMR8YH7DvItyzwP4cno0LjsbmF1lfWa2+xk2bFizSxiSfAW3mZUylL5F27Y8NpSZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWaHKbn60q0laCzze7DoGkZHA75tdhFk//P7ceQ6NiMKRWAdNWNjOJam7zN2zzJrB789dz7uhzMyskMPCzMwKOSysP7OaXYDZAPz+3MV8zMLMzAq5Z2FmZoUcFi1KUq+kZZLuk3SvpBMr3l67pN9IWirp5Nz8iZJuzE1fKKmWm/6wpMbb5W7Pdk+R9LMdr9x2JUlXSjo3N71A0jW56f8laZubmW3H+i+R9JWGeadIWtQwb09JT0v6w+1c/wGS/mZH6xvKHBata1NEHBsRfwRcCFxe8fY+CDwUEcdFxB25+XcBJ+SmTwBekPT2NH0icGfZjUhqe9OVWjPdRfZ/jqQ9yK53eEfu9dLvh+14L/wKGCVpTG7enwIPRsRTJddRdwCwXWHh92zGYbF7GA6sB5C0n6RbUm/jAUkT640k/XdJD0laKOmGxm9oqc2hafn708/Rko4FvgGckXozb9y3MiLWAs9L6kyzOoCfkD4w0s+70ronp5oelPT13DY3SJou6TfACZImpDp/Dfxlrt370/aXpR7OW3fKv57tTHey5f/+HcCDwIuSRkjaGzgaWKrMN9N74QFJH4c3egm3SboeeCDN+6qklZJuBo5s3GBEvA78CPh4bvYk4Ia0/OGSfiFpiaQ7JB2V5h8k6aepd35f6p1fARye3mPfLFunpD+Q9PO0ngfr7YaUiPCjBR9AL7AMeAh4Hjg+zd8TGJ6ejwRqgICu1H4Y8FbgEeArfaz334Gz0vPPAjem558Gvt1PLdcCf0X2hzyHrBfyjVTLemAf4GDgCaA9zb8V+EhaPoAz0/N9gCeBcanuucDPcrWdlJ7vB+zZ7P8HP/p8PzwGjAb+GvgCcClwBnAS8KvU5mPAQqANOCi9N/4QOAV4CRib2h1PFhr7kn0pqvXzvv1jYGl6vjfwDDAiTd8CjEvP3wvcmp7/EDg3PW8D9gfGkPVI2M46PwZ8N7fc/s3+f9jVD/csWld9N9RRwATg+5JE9gF7maT7gZvJvukfBPwJcFNEbIqIF8k+ePtyAnB9en5dWq5I/dvkicAi4B6yP8rjgJUR8TLZH/PtEbE2IjYDPwDel5bvJeuNABwFPBoRj0T2V/evDdv5J0lfAg5I67HW0/h+WJSbviu1+RPghojojYingV+SvUcA7omIR9Pzk4GfRsTGiHgB6PP4V0QsBvaTdCTwZ8DdEbFe0n5puz+StAz4DtmHPcAHgKvT8r0R8Xwfqy5b5wPAn0r6uqST+1nXoOaw2A1ExCKyXkQ78Mn08/iIOBZ4muzbunZ09SXa1PdTnwgsSmG0D9m3r/r+6YG2/3JE9BZtMyKuAD5P1ju6u747wVpO/f3wLrLdUHeTfQnJH68Y6P3wUsN02fP355DtfnpjFxTZZ9hz6YtV/XF0yfWVrjMiHmZLL+hySRdtxzYGBYfFbiB9aLYBz5J1pZ+JiNcknQocmpr9GviwpH3St60/72d1d5H9sUEWPL8uUcIKst1MJwNL07xlZLsg6t8kfwO8X9LIdEBwMtm3tEYPAWMlHZ6mJ+d+z8Mj4oGI+DrQTdYLsdZzJ/AXwLr0jXwd2YHjE8h6GZAdlP64pDZJ7WS9zHv6WNevgI9KGpaOUX14gO3eAHyKrMcwDyD1Rh6V9F8B0jGIP0rtbwG+mOa3SRoOvEi2mza//cI6JR0MbIyIfwW+BbxngDoHpT2bXYD1a1jqVkP27eesiOiV9APg3yV1s+WYBhGxWNkprPeRjb7bTXaso9GXgNmSzgfWAp8pKiQiIh2c3j8iXkuzFwFTSGEREU9JuhC4LdU7PyJu6mNdL0uaAvxc0u/Jwuqd6eVzUwD2kgXU/y2qzZriAbKe7vUN8/aLiPpIsD8lC4/7yHoOfx8Rv2vsLUbEvZJ+SPZefhzIn4lHQ9sVkjYCSyIi3zv5JHC1pK8BbyHrgdwHTANmSfoc2XvqixGxSNKdkh4ke3/9fZk6yXpR35T0OvAaKYSGEl/BPYhI2i8iNkjal+wb05SIuLfZdZnZ7s89i8FllqRjyI4n/IuDwsx2FvcszMyskA9wm5lZIYeFmZkVcliYmVkhh4UNeZLuKm613escI+kT2/uaWatyWNiQFxFVDP8+BugvEAZ6zawlOSxsyJO0If08RdLtkn6cRsX9QRqPC0mPpXGB7kmPzjT/Wkn/pXFdZKObnpxGNz2vYZNbvZZGSj02t447Jb1b2b0drpN0q6RHJJ2da3O+pMXKRg/+x2r+Zcy2cFiYbe044FzgGOAwspFU616IiPHAt4H/XbCeC4A70lhFVxa8dg3ZqL9IOgLYOyLuT23fTTZ0ywnARZIOlnQ62ai944FjgeMlvQ+zCjkszLZ2T0T0RHYPhWVku4zqbsj9PKFxwTfhR8BfSHoL2bDx1+Zeq48k/HuyoVTGA6enx1LgXrIxtMbtxHrMtuEruM229krueS9b/41EH883k750pV1We23vBiNio6SFwETgTLJ7k/S1zfq0gMsj4jvbuy2zHeWehVl5H8/9rI+u+hjZ0NWQfdi/JT1vHN00r6/XrgH+D7A4jeJaNzGNJPw2siHhFwMLgM+m0YWR1KEtt7k1q4R7Fmbl7Z1G392DLUOrfxe4SdI9ZENi10dDvR/YLOk+4NqG4xbbvBYRSyS9AHyvYZv3AD8nuzPdpRGxBlgj6WhgUTr+voFs6O5ndvLva/YGjw1lVoKkx4Cu3BDcO3v9BwO3A0el4yVIugTYEBHfqmKbZtvDu6HMmkzSX5HdPOqr9aAwazXuWZiZWSH3LMzMrJDDwszMCjkszMyskMPCzMwKOSzMzKyQw8LMzAr9f1nDe3sDnStiAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "boxplot(x='input type', y='accuracy', data=rnn_scores_data_frame)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Comparison of all models:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAHvNJREFUeJzt3X2cVnWd//HXm1EE72+Y7hgQarCk1s1tolorLdPQ3ypr9ito3aK1sN2UzYfsrv5ylVh99Nu28tdsaqGrlpVEdEdGIXm32WIyiICgyBUpXtLNmDeJaAh8fn+c78jxYmbOBczhmhnez8fjesw53/M953xmrrmuzznne873q4jAzMysN0MaHYCZmfV/ThZmZlbIycLMzAo5WZiZWSEnCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NC+zQ6gL4yYsSIGDNmTKPDMDMbUJYuXfp4RDQX1Rs0yWLMmDF0dHQ0OgwzswFF0iP11PNlKDMzK+RkYWZmhZwszMysUKnJQtJESWskVSRd2M3yIyXdKmmFpDsktaTyN0paLGlVWvbBMuM0M7PelZYsJDUBVwKnAOOBKZLG11T7PPD1iDgGmAV8NpVvAj4cEa8HJgL/T9KhZcVqZma9K/PMYgJQiYh1EbEZmANMqqkzHrg1Td/etTwiHoqItWl6A/B7oPDWLjMzK0eZyWIk8GhuvprK8pYDZ6bpM4CDJB2RryBpAjAU+FVJcZqZWYEyn7NQN2W1Y7jOAL4saSrw38BjwJYXNyC9ErgR+EhEbNthB9I0YBrA6NGj+ybq3dDe3k6lUunz7VarVQBaWlr6fNsAra2tTJ8+vZRtm9ngUGayqAKjcvMtwIZ8hXSJ6X0Akg4EzoyIp9P8wcCPgYsj4u7udhARs4HZAG1tbYN2MPHnnnuu0SGY2V6uzGSxBBgnaSzZGcNk4EP5CpJGAE+ks4aLgOtS+VDg+2SN398pMcY+VdbRedd229vbS9m+mVmR0tosImILcC6wEHgAmBsRqyTNknR6qnYCsEbSQ8DLgctT+QeAdwJTJd2XXm8sK1YzM+tdqX1DRcQCYEFN2SW56XnAvG7W+wbwjTJjMzOz+vkJbjMzK+RkYWZmhZwszMys0KAZz8LM9l5+xql8ThZmZj3wM07bOVmY2YDnZ5zK5zYLMzMr5GRhZmaFnCzMzKyQk4WZmRVysjAzs0JOFmZmVsjJwszMCjlZmJlZIScLMzMr5GRhZmaFSk0WkiZKWiOpIunCbpYfKelWSSsk3SGpJbfsI5LWptdHyozTzMx6V1qykNQEXAmcAowHpkgaX1Pt82TjbB8DzAI+m9Y9HLgUeAswAbhU0mFlxWpmZr0r88xiAlCJiHURsRmYA0yqqTMeuDVN355b/l5gUUQ8ERFPAouAiSXGamZmvSgzWYwEHs3NV1NZ3nLgzDR9BnCQpCPqXNfMzPaQMpOFuimLmvkZwPGSlgHHA48BW+pcF0nTJHVI6ujs7NzdeM3MrAdlJosqMCo33wJsyFeIiA0R8b6IOBb4dCp7up51U93ZEdEWEW3Nzc19Hb+ZmSVlJoslwDhJYyUNBSYD8/MVJI2Q1BXDRcB1aXohcLKkw1LD9smpzMzMGqC0ZBERW4Bzyb7kHwDmRsQqSbMknZ6qnQCskfQQ8HLg8rTuE8C/kSWcJcCsVGZmZg1Q6rCqEbEAWFBTdklueh4wr4d1r2P7mYaZmTWQn+A2M7NCThZmZlbIycLMzAo5WZiZWaFSG7jNzPLa29upVCqNDqNua9euBWD69OkNjmTntLa29nnMThZmtsdUKhWWrVoGhzY6kjpty34se2xZY+PYGU+Vs1knCzPbsw6FbSdsa3QUg9aQO8ppXXCbhZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVmhvfJuKN/rvWeUca+3mTXGXpksKpUKy1auZtv+hzc6lLpoczZI4NJf/bbBkdRvyCb3KG82mOyVyQJg2/6H8/z4v2p0GIPWsNU3NzoEM+tDbrMwM7NCThZmZlao1GQhaaKkNZIqki7sZvloSbdLWiZphaRTU/m+kr4maaWkByRdVGacZmbWu9KShaQm4ErgFGA8MEXS+JpqF5ONzX0sMBm4KpX/b2C/iPgz4E3AOZLGlBWrmZn1rswziwlAJSLWRcRmYA4wqaZOAAen6UOADbnyAyTtAwwHNgN/LDFWMzPrRZnJYiTwaG6+msryZgJnSaoCC4DzUvk84FngN8B64PMRscO9mJKmSeqQ1NHZ2dnH4ZuZWZcyk4W6KYua+SnADRHRApwK3ChpCNlZyVbgVcBY4AJJr95hYxGzI6ItItqam5v7NnozM3tRmc9ZVIFRufkWtl9m6nI2MBEgIhZLGgaMAD4E/DQiXgB+L+kXQBuwrsR4zaxk1WoVni5vzAUDnoJqVPt8s2W+Y0uAcZLGShpK1oA9v6bOeuBEAElHA8OAzlT+bmUOAN4KPFhirGZm1ovSziwiYoukc4GFQBNwXUSskjQL6IiI+cAFwDWSzie7RDU1IkLSlcD1wP1kl7Ouj4gVZcVqZntGS0sLner0SHklGnLHEFpGtvT5dkvt7iMiFpA1XOfLLslNrwaO62a9jWS3z5qZWT/gC4dmZlbIycLMzAo5WZiZWSEnCzMzK7RXjmdRrVYZsulpj7lQoiGb/kC1uqXRYZhZH/GZhZmZFdorzyxaWlr43Z/28Uh5JRq2+mZaWl7R6DDMrI/slcnCrDvt7e1UKpU+3261mnW90NLS9w9Ktba2Mn369D7fbqmeGkDdfWxMPw9saBQ75yl27LK1DzhZmJXsueeea3QI/UZra2ujQ9gpa9euBWDcyHENjmQnjCzn7+xkYZaUdYTetd329vZStj+QDLSzIL932w2Qc0EzM2skJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQqUmC0kTJa2RVJF0YTfLR0u6XdIySSsknZpbdoykxZJWSVqZxuc2M7MGKO05C0lNwJXASUAVWCJpfhodr8vFwNyIuFrSeLJR9cZI2gf4BvC3EbFc0hHAC2XFamZmvSvzzGICUImIdRGxGZgDTKqpE8DBafoQYEOaPhlYERHLASLiDxGxtcRYzcysF2U+wT0SeDQ3XwXeUlNnJnCLpPOAA4D3pPKjgJC0EGgG5kTE5/oyuCGbnhgwXZTr+T8CEMMOLqjZfwzZ9ATgjgTNBosyk4W6KYua+SnADRHxBUlvA26U9IYU19uBNwObgFslLY2IW1+yA2kaMA1g9OjRdQc28PqneQaAca8ZSF++rxhwf2cz61mZyaIKjMrNt7D9MlOXs4GJABGxODVij0jr3hkRjwNIWgD8BfCSZBERs4HZAG1tbbWJqEfun8bMbOeU2WaxBBgnaaykocBkYH5NnfXAiQCSjgaGAZ3AQuAYSfunxu7jgdWYmVlDlHZmERFbJJ1L9sXfBFwXEaskzQI6ImI+cAFwjaTzyS5RTY2IAJ6U9EWyhBPAgoj4cVmxmtnAVtZYJF1dlJd1NWIgjUdSV7KQ9F3gOuAnEbGt3o1HxAKy22HzZZfkplcDx/Ww7jfIbp81M2uI4cOHNzqEfqPeM4urgY8C7ZK+Q9Yo/WB5YZmZ1W+gHJ0PZHW1WUTEzyLib8gamR8GFkn6H0kflbRvmQGamVnj1d3AnZ6ingp8DFgGfIkseSwqJTIzM+s36m2z+B7wOuBG4LSI+E1a9G1JHWUFZ2Zm/UO9bRZfjojbulsQEW19GI+ZmfVD9V6GOlrSoV0zkg6T9A8lxWRmZv1Mvcni4xHxVNdMRDwJfLyckMzMrL+pN1kMkfRiX0+p+/Gh5YRkZmb9Tb1tFguBuZK+QvZE9SeAn5YWlVkPynpSt0xlPwVchoH0ZLHtGfUmi38BzgH+nqw32VuAa8sKyqwnlUqFh+6/l9EHDpzhTYa+kJ3AP//wkgZHUp/1G5saHYL1Q3Uli9TFx9XpZdZQow/cysVtGxsdxqB1WceBjQ7B+qF6n7MYB3wWGE/WMywAEfHqkuIyM7N+pN4G7uvJziq2AO8Cvk72gJ6Zme0F6k0Ww9ModYqIRyJiJvDu8sIyM7P+pN4G7uclDQHWpjEqHgNeVl5YZmbWn9R7ZvEpYH9gOvAm4CzgI2UFZWZm/UthskgP4H0gIjZGRDUiPhoRZ0bE3XWsO1HSGkkVSRd2s3y0pNslLZO0QtKp3SzfKGnGTv1WZmbWpwqTRURsBd6Uf4K7HinJXAmcQnYX1RRJ42uqXQzMjYhjycbovqpm+RXAT3Zmv2Zm1vfqbbNYBvwwjZL3bFdhRHyvl3UmAJWIWAcgaQ4wCVidqxPAwWn6EGBD1wJJfw2sy+/PzMwao95kcTjwB156B1QAvSWLkcCjufkq8JaaOjOBWySdBxwAvAdA0gFkT42fBPgSlJlZg9X7BPdHd2Hb3V22ipr5KWTjeX9B0tuAGyW9AfgMcEVEbOzt6pekacA0gNGjR+9CiGZmVo96n+C+nh2/6ImIv+tltSowKjffQu4yU3I2MDFta7GkYcAIsjOQ90v6HHAosE3S8xHx5Zr9zwZmA7S1te0Qn5mZ9Y16L0PdnJseBpzBjl/8tZYA4ySNJXsuYzLwoZo664ETgRskHZ223RkR7+iqIGkmsLE2UfRHZfWIWnavpQOph9FqtcqzzzS5/6ISPfJMEwdUq40Ow/qZei9DfTc/L+km4GcF62xJD/AtBJqA6yJilaRZQEdEzAcuAK6RdD7ZmcvUiPAZQo3hw4c3OgQz28vVe2ZRaxxQ2EgQEQuABTVll+SmVwPHFWxj5q6FuOcNlKPzgaylpYXnt/zGvc6W6LKOAxnW0tLoMKyfqbfN4hle2mbxW7K7lczMbC9Q72Wog8oOxMzM+q+6+oaSdIakQ3Lzh6aH5szMbC9Qb0eCl0bE010zEfEUcGk5IZmZWX9Tb7Lort6uNo6bmdkAU2+y6JD0RUmvkfRqSVcAS8sMzMzM+o96k8V5wGbg28Bc4Dngk2UFZWZm/Uu9d0M9C+wwHoWZme0d6r0bapGkQ3Pzh0laWF5YZmbWn9R7GWpEugMKgIh4Eo/BbWa216g3WWyT9GL3HpLG0E0vtGZmNjjVe/vrp4G7JN2Z5t9JGkfCzMwGv3obuH8qqY0sQdwH/JDsjigzM9sL1NuR4MeAfyQbwOg+4K3AYl46zKqZmQ1S9bZZ/CPwZuCRiHgXcCzQWVpUZmbWr9SbLJ6PiOcBJO0XEQ8Cry0vLDMz60/qbeCupucsfgAskvQkxcOqmpnZIFHXmUVEnBERT6VR6/4V+C+gsItySRMlrZFUkbTDE+CSRku6XdIySSsknZrKT5K0VNLK9NNtI2ZmDbTTPcdGxJ3FtUBSE3AlcBJQBZZImp+GUu1yMTA3Iq6WNJ5sCNYxwOPAaRGxQdIbyMbxHrmzsZqZWd+ot81iV0wAKhGxLiI2A3OASTV1Ajg4TR9CurQVEcsiousy1ypgmKT9SozVzMx6UWayGAk8mpuvsuPZwUzgLElVsrOK87rZzpnAsoj4U+0CSdMkdUjq6Oz0zVlmZmUpM1mom7LaLkKmADdERAtwKnCjpBdjkvR64N+Bc7rbQUTMjoi2iGhrbm7uo7DNzKxWmcmiCozKzbew4x1UZ5ONj0FELAaGASMAJLUA3wc+HBG/KjFOMzMrUGayWAKMkzRW0lBgMjC/ps564EQASUeTJYvOdJvuj4GLIuIXJcZoZmZ1KC1ZRMQW4FyyO5keILvraZWkWZJOT9UuAD4uaTlwEzA1IiKt1wr8q6T70stdopuZNchO3zq7MyJiAVnDdb7sktz0auC4bta7DLiszNjMzKx+ZV6GMjOzQcLJwszMCjlZmJlZIScLMzMr5GRhZmaFnCzMzKyQk4WZmRVysjAzs0JOFmZmVsjJwszMCjlZmJlZoVL7hjIrw/qNTVzWcWCjw6jb7zZlx2Qv339bgyOpz/qNTRzV6CCs33GysAGltbW10SHstM1r1wIwbMy4BkdSn6MYmH9nK5eThQ0o06dPb3QIO60r5vb29gZHYrbr3GZhZmaFnCzMzKxQqclC0kRJayRVJF3YzfLRkm6XtEzSCkmn5pZdlNZbI+m9ZcZpZma9K63NQlITcCVwElAFlkian0bH63Ix2XCrV0saTzaq3pg0PRl4PfAq4GeSjoqIrWXFa2ZmPSvzzGICUImIdRGxGZgDTKqpE8DBafoQYEOangTMiYg/RcSvgUranpmZNUCZyWIk8GhuvprK8mYCZ0mqkp1VnLcT6yJpmqQOSR2dnZ19FbeZmdUoM1mom7KomZ8C3BARLcCpwI2ShtS5LhExOyLaIqKtubl5twM2M7PulfmcRRUYlZtvYftlpi5nAxMBImKxpGHAiDrXNTOzPaTMM4slwDhJYyUNJWuwnl9TZz1wIoCko4FhQGeqN1nSfpLGAuOAe0qM1czMelHamUVEbJF0LrAQaAKui4hVkmYBHRExH7gAuEbS+WSXmaZGRACrJM0FVgNbgE/6Tigzs8YptbuPiFhA1nCdL7skN70aOK6HdS8HLi8zPjMzq4+f4DYzs0JOFmZmVsjJwszMCjlZmJlZIScLMzMr5GRhZmaFnCzMzKyQk4WZmRVysjAzs0JOFmZmVsjJwszMCjlZmJlZIScLMzMr5GRhZmaFnCzMzKyQk4WZmRUqNVlImihpjaSKpAu7WX6FpPvS6yFJT+WWfU7SKkkPSGqXpDJjNTOznpU2Up6kJuBK4CSgCiyRND+NjgdARJyfq38ecGya/kuyEfSOSYvvAo4H7igrXjMz61mZZxYTgEpErIuIzcAcYFIv9acAN6XpAIYBQ4H9gH2B35UYq5mZ9aLMZDESeDQ3X01lO5B0JDAWuA0gIhYDtwO/Sa+FEfFAibGamVkvykwW3bUxRA91JwPzImIrgKRW4GighSzBvFvSO3fYgTRNUoekjs7Ozj4K28zMapWZLKrAqNx8C7Chh7qT2X4JCuAM4O6I2BgRG4GfAG+tXSkiZkdEW0S0NTc391HYZmZWq8xksQQYJ2mspKFkCWF+bSVJrwUOAxbnitcDx0vaR9K+ZI3bvgxlZtYgpSWLiNgCnAssJPuinxsRqyTNknR6ruoUYE5E5C9RzQN+BawElgPLI+JHZcVqZma9K+3WWYCIWAAsqCm7pGZ+ZjfrbQXOKTM2MzOrn5/gNjOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwKOVmYmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFSp18CNJE4EvAU3AtRHxf2uWXwG8K83uD7wsIg5Ny0YD15KN4x3AqRHxcJnx2t6tvb2dSqXS59tdu3YtANOnT+/zbbe2tpayXbNapSULSU3AlcBJQBVYIml+RKzuqhMR5+fqnwccm9vE14HLI2KRpAOBbWXFalam4cOHNzoEs91W5pnFBKASEesAJM0BJgGre6g/Bbg01R0P7BMRiwAiYmOJcZoB5Rz5mw0WZbZZjAQezc1XU9kOJB0JjAVuS0VHAU9J+p6kZZL+I52pmJlZA5SZLNRNWfRQdzIwLyK2pvl9gHcAM4A3A68Gpu6wA2mapA5JHZ2dnbsfsZmZdavMZFEla5zu0gJs6KHuZOCmmnWXRcS6iNgC/AD4i9qVImJ2RLRFRFtzc3MfhW1mZrXKTBZLgHGSxkoaSpYQ5tdWkvRa4DBgcc26h0nqygDvpue2DjMzK1lpySKdEZwLLAQeAOZGxCpJsySdnqs6BZgTEZFbdyvZJahbJa0ku6R1TVmxmplZ75T7jh7Q2traoqOjo9FhmJkNKJKWRkRbUT0/wW1mZoWcLMzMrNCguQwlqRN4pNFxlGgE8Hijg7Bd5vdv4Brs792REVF4O+mgSRaDnaSOeq4rWv/k92/g8nuX8WUoMzMr5GRhZmaFnCwGjtmNDsB2i9+/gcvvHW6zMDOzOvjMwszMCjlZ1EHSVkn35V5j+mCbMyXN2FP7212STpB0c6Pj2BWSXiFpjqRfSVotaYGko/bAfh+WNKKgzlRJr8rNX5vGc+nLOKZK+nJfbrO/yX1m7pf0I0mHNjqmIukz9ZeNjqNepQ6rOog8FxFv7O/7k7RP6pNrt0lqynUZP2BJEvB94GsRMTmVvRF4OfBQI2NLpgL3k3pkjoiPNTSagevFz4ykrwGfBC4vY0e1n430P6aI2NnRPE8ANgL/04fhlcZnFrtIUlMalGmJpBWSzskt+6dc+Wdy5Z+WtEbSz4DX7uT+hkm6XtLKNCDUu1L5VEnfkfQj4BZJV3V11Cjp+5KuS9NnS7osTf9A0lJJqyRNy+1jY+ro8ZfA2yRNlPSgpLuA9+36X6uh3gW8EBFf6SqIiPuAu9L7d3/6m34QXjzau0PSvPS7f1OZUyTN7dpGqvejND0lbeN+Sf9eG4CkMZLuz83PSGeW7wfagG+mo+Lhad9tvW03vU+XS1ou6W5JL0/lp0n6Zfr/+FlX+V5oMbmB1nr5PH44lS2XdGMquyG9L111NqafJ0i6XdK3gJXpPX1A0lXAvcAoSSdLWizp3vSZPDCt+7Ckz6TylZJep+xqwSeA89N7/47y/yy7KSL8KngBW4H70uv7qWwacHGa3g/oIBvt72SyuydEloxvBt4JvAlYCewPHAxUgBk7sb8LgOvT9OuA9cAwsiPTKnB4WjYZ+I80fQ9wd5q+Hnhvmu6qO5zsqPaINB/AB9L0MLKRDsel32UucHOj34tdeO+mA1d0U34msAhoIjvLWA+8kuxo72my8VeGkH3xvJ3sLHw9cEBa/2rgLOBVqbw51bkN+OtU52Gyp3/HAPfn9j0DmJmm7wDacsvuIEsgvW03gNPS9Ody/4eHsf2mlY8BX0jTU4EvN/q9KPl93ph+NgHfASam+Z4+j68H1gAjaj4TNwDv72a7JwDPAmPT/BhgG/DWND8C+O/c/8e/AJfk/g/OS9P/AFybpmfSw3dAf3z5MlR9urssdDJwTO4o5BCyL9aT02tZKj8wlR9E9sW/CUDSDmN7FOzv7cB/AkTEg5IeIRt+FmBRRDyRpn8OfErZde/VZOOCvBJ4G9kXJ8B0SWek6VEpvj+QJanvpvLXAb+OiLUp3m+QJcjB4u3ATZFdTvidpDvJRmX8I3BPRFQBJN0HjImIuyT9FDhN0jzgfwH/TDbWyh0R0Znqf5Psy+gHuxnfm3vZ7mayLz2ApcBJaboF+HZ6v4cCv97NGAaS4V3vFdnfZFEq7+nz+Odko3M+DpD7/PTmnojI/00fiYi70/RbgfHAL7KrUgzlpWP0fC/9XMoAPUv3ZahdJ7KjhTem19iIuCWVfzZX3hoR/5XW2eE+ZUmjtL0h+xMF++vJs10TEfEY2RHmRLIjnZ8DHyA7QnpG0gnAe4C3RcSfk32IhqXVn4+XtlMMhvuqV5Gd1dXq7e/5p9z0Vra37X2b7G/5bmBJRDxTsJ0uW3jpZ21YTxXrjO+FSIemNfH9J9kZxJ8B59S5n8Gi6wDrSLIv6k+m8p4+j6L7/+8X3ytl3/pDc8uerambnxfZQVvXfsZHxNm55V3/U/n3a0Bxsth1C4G/l7QvgKSjJB2Qyv8ud71ypKSXkX1xn5GuSx8EnAYQEY/m/sG+0v2uIK3/N137AkaTnUZ3ZzHwKbYnixnpJ2RnQE9GxCZJryM7IurOg8BYSa9J81N6+2P0Y7cB+0n6eFeBpDcDTwIfVNb21Ex21H5PwbbuIBve9+NkiQPgl8DxkkZIaiL7O91Zs97vgJdJOkLSfsBf5ZY9Q3bWWaue7dY6BHgsTX+koO6gFBFPk51Bz0ifzZ4+j7cCH5B0RCo/PG3iYbYfXEwC9q1z13cDx0lqTdvbX8V33PX03vdLTha77lqyyzz3psbLrwL7pLOLbwGLlY3yNw84KCLuJfuCuY/sUs/Pu99sj64CmtI2vw1MjYg/9VD35ymWClnj2+G5/f0U2EfSCuDfyP7JdxARz5NddvqxsgbuAdmjbzoCPwM4Sdmts6vIrhV/C1gBLCdLKP8cEb8t2NZWsss/p6SfRMRvgIuA29O27o2IH9as9wIwiywB3EyWiLvcAHylq4E7t07hdrsxE/iOpJ8zuHtJ7VVELCP7m03u5fO4iuxuqTslLQe+mFa/hixJ3wO8hR3PJnraZydZ29BN6bN1N9ml3N78iOwAckA0cPsJbjMzK+QzCzMzK+RkYWZmhZwszMyskJOFmZkVcrIwM7NCThZmDaD6eqQtrGO2pzhZmJlZIScLszqlnkYfVDbmxP3KeqR9j6RfSForaYKkw5X16rsi9Qh7TFr3CEm3pB5hv0quOw9JZ0m6Jz2c9dX0xLZZv+JkYbZzWoEvAceQPaH7IbJOCWcA/wf4DLAsIo5J819P610K3BURxwLzybprQdLRwAeB41LfRltJ3bqY9ScDskMrswb6dUSsBEhdh9waEZG6khhD1pHdmQARcVs6oziErO+p96XyH0t6Mm3vRLK+iJak3kqHA7/fg7+PWV2cLMx2Tr4/rm25+W1kn6fuRiqMmp95IhvF76I+i9CsBL4MZda38r0DnwA8HhF/rCk/hawbech6P31/6gmV1OZx5J4O2qyIzyzM+tZM4PrU8+gmtncV/hmyHknvJetqfD1ARKyWdDHZkLhDgBfIxmIYkL382uDlXmfNzKyQL0OZmVkhJwszMyvkZGFmZoWcLMzMrJCThZmZFXKyMDOzQk4WZmZWyMnCzMwK/X9lUWBTiWo8LgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "bow_scores = []\n", + "for score in ff_bow_scores['accuracies']:\n", + " bow_scores.append((\"Feed-Forward\", score))\n", + "for score in conv_bow_scores['accuracies']:\n", + " bow_scores.append((\"Convolutional\", score))\n", + "for score in rnn_bow_scores['accuracies']:\n", + " bow_scores.append((\"Recurrent\", score))\n", + " \n", + "boxplot(x='model', y='accuracy', data=DataFrame(bow_scores, columns=[\"model\", \"accuracy\"]))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAGz9JREFUeJzt3X2UVfV97/H3h/EBjFZBMEkZEdLBGG+TajsxSc3NRW8huBofk2Uwzb3QNhLb6jTpNa3eepWgXTXNus3KGJNovBjam6iJMYoWH0gMXo0aGQRFiMqIT0dMAoJGBBSY7/1j/0Y2x5nZh/Fszpzh81rrrNn7d/b+ne/MmXM++3krIjAzMxvIiEYXYGZmQ5/DwszMCjkszMyskMPCzMwKOSzMzKyQw8LMzAo5LMzMrJDDwszMCjkszMys0D6NLqBexo4dGxMnTmx0GWZmTWXp0qXrI2Jc0XTDJiwmTpxIV1dXo8swM2sqkp6tZbpSN0NJmi7pCUndki7o4/mvSVqeHk9Kejn33I7ccwvKrNPMzAZW2pqFpBbgSmAqUAGWSFoQEat6p4mIL+amPw84NtfFlog4pqz6zMysdmWuWRwHdEfEmoh4A7geOHWA6c8CriuxHjMzG6Qyw2I88HxuvJLa3kLSEcAk4O5c80hJXZIelHRaeWWamVmRMndwq4+2/m6eMQO4MSJ25NomRMRaSe8B7pa0IiKe2uUFpNnAbIAJEybUo2YzM+tDmWsWFeDw3HgrsLafaWdQtQkqItamn2uAxey6P6N3mqsjoj0i2seNKzzyy8zMBqnMsFgCTJY0SdJ+ZIHwlqOaJL0XGA08kGsbLWn/NDwWOB5YVT2vmZntGaVthoqI7ZLOBe4EWoB5EbFS0lygKyJ6g+Ms4PrY9f6u7wOuktRDFmiX54+iMjPL6+zspLu7u+79VioVAFpbW+veN0BbWxsdHR2l9F1vpZ6UFxELgYVVbRdXjc/pY777gfeXWZuZWZEtW7Y0uoQhY9icwW1me6+yls57++3s7Cyl/2biCwmamVkhh4WZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWaFSw0LSdElPSOqWdEEfz39N0vL0eFLSy7nnZkpanR4zy6zTzMwGtk9ZHUtqAa4EpgIVYImkBRGxqneaiPhibvrzgGPT8BjgEqAdCGBpmndjWfWamVn/ylyzOA7ojog1EfEGcD1w6gDTnwVcl4Y/DiyKiA0pIBYB00us1czMBlBmWIwHns+NV1LbW0g6ApgE3L0780qaLalLUte6devqUrSZmb1VmWGhPtqin2lnADdGxI7dmTciro6I9ohoHzdu3CDLNDOzImWGRQU4PDfeCqztZ9oZ7NwEtbvzmplZycoMiyXAZEmTJO1HFggLqieS9F5gNPBArvlOYJqk0ZJGA9NSm5mZNUBpR0NFxHZJ55J9ybcA8yJipaS5QFdE9AbHWcD1ERG5eTdIupQscADmRsSGsmo1M7OBlRYWABGxEFhY1XZx1ficfuadB8wrrTgzM6uZz+A2M7NCDgszMyvksDAzs0IOCzMzK+SwMDOzQg4LMzMr5LAwM7NCpZ5nYWaW19nZSXd3d6PLqNnq1asB6OjoaHAlu6etra3uNTsszGyP6e7uZtnKZXBIoyupUU/2Y9kLyxpbx+54uXiSwXBYmNmedQj0TOlpdBXD1ojF5exd8D4LMzMr5LAwM7NCDgszMyvksDAzs0IOCzMzK+SwMDOzQg4LMzMr5LAwM7NCDgszMyvksDAzs0IOCzMzK+SwMDOzQg4LMzMr5LAwM7NCDgszMyvksDAzs0IOCzMzK1RqWEiaLukJSd2SLuhnmjMlrZK0UtL3c+07JC1PjwVl1mlmZgMr7baqklqAK4GpQAVYImlBRKzKTTMZuBA4PiI2Sjos18WWiDimrPrMzKx2Za5ZHAd0R8SaiHgDuB44tWqas4ErI2IjQET8psR6zMxskMoMi/HA87nxSmrLOxI4UtLPJT0oaXruuZGSulL7aSXWaWZmBUrbDAWoj7bo4/UnA1OAVuBeSb8fES8DEyJiraT3AHdLWhERT+3yAtJsYDbAhAkT6l2/mZklZa5ZVIDDc+OtwNo+prklIrZFxNPAE2ThQUSsTT/XAIuBY6tfICKujoj2iGgfN25c/X8DMzMDyg2LJcBkSZMk7QfMAKqParoZOAFA0liyzVJrJI2WtH+u/XhgFWZm1hClbYaKiO2SzgXuBFqAeRGxUtJcoCsiFqTnpklaBewAvhQRL0n6Y+AqST1kgXZ5/igqM2tOlUoFXoERi32KV2lehkpU6t5tmfssiIiFwMKqtotzwwH8XXrkp7kfeH+ZtZmZWe1KDQszs7zW1lbWaR09U3oaXcqwNWLxCFrHt9a/37r3aGZmw47DwszMCjkszMyskMPCzMwK1RQWkn4k6U8lOVzMzPZCtX75fwv4DLBa0uWSjiqxJjMzG2JqCouI+ElE/Bnwh8AzwCJJ90v6c0n7llmgmZk1Xs3nWUg6FPgs8N+AZcD3gI8CM8kuBLjX6+zspLu7u+79VirZ2ZitrfU/dhqgra2Njo6OUvo2s+GhprCQdBNwFPDvwMkR8WJ66gZJXWUVZ5ktW7Y0ugQz28vVumbxjYi4u68nIqK9jvU0tbKWznv77ezsLKV/M7Mite7gfp+kQ3pH0lVh/7qkmszMbIipNSzOTjckAiDdBvXsckoyM7OhptawGCHpzTvfSWoB9iunJDMzG2pq3WdxJ/ADSd8muzXqOcAdpVVlZmZDSq1h8Q/A54G/Iru39l3ANWUVZWZmQ0tNYRERPWRncX+r3HLMzGwoqvU8i8nAPwNHAyN72yPiPSXVZWZmQ0itO7ivJVur2A6cAPwb2Ql6Zma2F6h1n8WoiPipJEXEs8AcSfcCl5RYW2nKuixHWVavXg2Ud9JfWXwZEbPho9aw2JouT75a0rnAC8Bh5ZVVru7ubpatWEXPAWMaXUpN9EYAsPSpXzW4ktqN2Lyh0SWYWR3VGhZfAA4AOoBLyTZFzSyrqD2h54AxbD36E40uY9gaueq2RpdgZnVUGBbpBLwzI+JLwCbgz0uvyszMhpTCHdwRsQP4o/wZ3GZmtnepdTPUMuAWST8EXuttjIibSqnKzMyGlFrDYgzwEnBiri0Ah4WZ2V6g1jO4vZ/CzOrjZRixuNZTvBpsU/p5YEOr2D0vA+Pr322tZ3BfS7YmsYuI+Iu6V2Rmw1ZbW1ujS9gtvec4TR4/ucGV7Ibx5fyda90MlT8OciRwOrC2aCZJ04GvAy3ANRFxeR/TnAnMIQujRyLiM6l9JnBRmuyyiJhfY61mNkQ120mavkvlTrVuhvpRflzSdcBPBponHXJ7JTAVqABLJC2IiFW5aSYDFwLHR8RGSYel9jFkZ4e3k4XI0jTvxpp/MzMzq5vBbjicDEwomOY4oDsi1kTEG8D1wKlV05wNXNkbAhHxm9T+cWBRRGxIzy0Cpg+yVjMze5tq3WfxKrvus/gV2T0uBjIeeD43XgE+VDXNkan/n5NtqpoTEXf0M28Ju2zMzKwWtW6GOmgQffd1El/1TvJ9yNZSpgCtwL2Sfr/GeZE0G5gNMGFC0YqOmZkNVk2boSSdLung3Pghkk4rmK0CHJ4bb+WtO8UrwC0RsS0ingaeIAuPWuYlIq6OiPaIaB83blwtv4qZmQ1CrfssLomIV3pHIuJlii9PvgSYLGmSpP2AGcCCqmluJrsoIZLGkm2WWkN2z+9pkkZLGg1MS21mZtYAtR4621eoDDhvRGxPlzO/k2x/xLyIWClpLtAVEQvYGQqrgB3AlyLiJQBJl5IFDsDciPA1r82sT2Xdo6bse8k00z1fag2LLkn/SnYobADnAUuLZoqIhcDCqraLc8MB/F16VM87D5hXY31mZnU3atSoRpcwZNQaFucB/wu4IY3fxc4T5szMGqpZls6bWa1HQ70GXFByLWZmNkTVejTUIkmH5MZHS/IOZzOzvUStR0ONTUdAAZDOqm7ae3CbmdnuqTUseiS9edabpIn0cZKcmZkNT7Xu4P5H4D5J96Txj5HOnDYzs+Gv1h3cd0hqJwuI5cAtwJYyCzMzs6Gj1gsJfg74W7LLbiwHPgw8wK63WTUzs2Gq1n0Wfwt8EHg2Ik4AjgXWlVaVmZkNKbWGxdaI2Aogaf+IeBx4b3llmZnZUFLrDu5KOs/iZmCRpI3UcFtVMzMbHmrdwX16Gpwj6WfAwcAdpVVlZmZDSq1rFm+KiHuKpzIzs+FksPfgNjOzvYjDwszMCjkszMyskMPCzMwKOSzMzKyQw8LMzAo5LMzMrJDDwszMCu32SXnDQaVSYcTmVxi56rZGlzJsjdj8EpXK9kaXYWZ14jULMzMrtFeuWbS2tvLr1/dh69GfaHQpw9bIVbfR2vquRpdhZnXiNQszMyvksDAzs0IOCzMzK+SwMDOzQqWGhaTpkp6Q1C3pgj6enyVpnaTl6fG53HM7cu0LyqzTzMwGVtrRUJJagCuBqUAFWCJpQUSsqpr0hog4t48utkTEMWXVZ2ZmtStzzeI4oDsi1kTEG8D1wKklvp6ZmZWkzLAYDzyfG6+ktmqflPSopBslHZ5rHympS9KDkk7r6wUkzU7TdK1bt66OpZuZWV6ZJ+Wpj7aoGr8VuC4iXpd0DjAfODE9NyEi1kp6D3C3pBUR8dQunUVcDVwN0N7eXt33gEZs3tA0l/vQ1t8CECN/p8GV1G7E5g2AT8ozGy7KDIsKkF9TaAXW5ieIiJdyo98BvpJ7bm36uUbSYuBYYJewGKy2trZ6dLPHrF79KgCTf6+Zvnzf1XR/ZzPrX5lhsQSYLGkS8AIwA/hMfgJJ746IF9PoKcAvU/toYHNa4xgLHA/8S70K6+joqFdXe0RvvZ2dnQ2uxMz2VqWFRURsl3QucCfQAsyLiJWS5gJdEbEA6JB0CrAd2ADMSrO/D7hKUg/ZfpXL+ziKyszM9pBSLyQYEQuBhVVtF+eGLwQu7GO++4H3l1mbmZnVzmdwm5lZIYeFmZkVcliYmVkhh4WZmRVyWJiZWSGHhVnJ1q9fz3nnncdLL71UPLHZEOWwMCvZ/PnzefTRR5k/f36jSzEbNIeFWYnWr1/P7bffTkRw++23e+3CmpbDwqxE8+fPJyK7xmVPT4/XLqxpOSzMSrRo0SK2bdsGwLZt27jrrrsaXJHZ4DgszEo0depU9t13XwD23Xdfpk2b1uCKzAbHYWFWopkzZyJlt3YZMWIEM2fObHBFZoPjsDAr0dixYznppJOQxEknncShhx7a6JLMBqXUq86aWbZ28cwzz3itwpqaw8KsZGPHjuWKK65odBlmb4s3Q5mZWSGHhZmZFXJYmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkV8uU+6qizs5Pu7u6697t69WoAOjo66t43QFtbW2l9m9nw4LBoAqNGjWp0CWa2lys1LCRNB74OtADXRMTlVc/PAr4KvJCavhER16TnZgIXpfbLImLI34/SS+dmNlyVFhaSWoArgalABVgiaUFErKqa9IaIOLdq3jHAJUA7EMDSNO/Gsuo1M7P+lbmD+zigOyLWRMQbwPXAqTXO+3FgUURsSAGxCJheUp1mZlagzM1Q44Hnc+MV4EN9TPdJSR8DngS+GBHP9zPv+LIKteZR1kEEAJVKhS1btpTSd1lGjRpFa2tr3fv1QQ9WrcywUB9tUTV+K3BdRLwu6RxgPnBijfMiaTYwG2DChAlvr1prCt3d3Tz52MNMOHBH3fvesXkEPTv6+tcbunZs+y1bn3mxrn0+t6mlrv3Z8FBmWFSAw3PjrcDa/AQR8VJu9DvAV3LzTqmad3H1C0TE1cDVAO3t7W8JExueJhy4g4vaNzW6jGHrsq4DG12CDUFl7rNYAkyWNEnSfsAMYEF+Aknvzo2eAvwyDd8JTJM0WtJoYFpqMzOzBihtzSIitks6l+xLvgWYFxErJc0FuiJiAdAh6RRgO7ABmJXm3SDpUrLAAZgbERvKqtXMzAamiOGx9aa9vT26uroaXYaV7IwzzuC1jb/hiIPqv8/CMs++2sI7Rh/GTTfd1OhSbA+QtDQi2oum87WhzMyskC/3YU2ltbWVrdtf9A7uEl3WdSAjSzgc15qb1yzMzKyQw8LMzAo5LMzMrJDDwszMCnkHtzWd5za1NNVZxr/enC2TvfOAngZXUpvnNrVwZKOLsCHHYWFNpa2trdEl7LY30p0OR06c3OBKanMkzfl3tnI5LKypNOOVUHtr7uzsbHAlZoPnfRZmZlbIYWFmZoUcFmZmVshhYWZmhbyD2ywp65atq9PRUGXsnPftT21PcViYlWzUqFGNLsHsbXNYmCVeQjfrn/dZmJlZIYeFmZkVcliYmVkhh4WZmRVyWJiZWSGHhZmZFXJYmJlZIYeFmZkVUkQ0uoa6kLQOeLbRdZRoLLC+0UXYoPn9a17D/b07IiLGFU00bMJiuJPUFRHtja7DBsfvX/Pye5fxZigzMyvksDAzs0IOi+ZxdaMLsLfF71/z8nuH91mYmVkNvGZhZmaFHBY1kLRD0vLcY2Id+pwj6fw99Xpvl6Qpkm5rdB2DIeldkq6X9JSkVZIWSjpyD7zuM5LGFkwzS9Lv5savkXR0neuYJekb9exzqMl9Zh6TdKukQxpdU5H0mfrjRtdRK9/8qDZbIuKYof56kvaJiO31KEBSS0TsqEdfjSRJwI+B+RExI7UdA7wTeLKRtSWzgMeAtQAR8bmGVtO83vzMSJoP/A3wT2W8UPVnI/2PKSJ6drOrKcAm4P46llcar1kMkqQWSV+VtETSo5I+n3vuS7n2L+fa/1HSE5J+Arx3N19vpKRrJa2QtEzSCal9lqQfSroVuEvSNyWdkp77saR5afgvJV2Whm+WtFTSSkmzc6+xSdJcSb8APiJpuqTHJd0HnDH4v1ZDnQBsi4hv9zZExHLgvvT+PZb+pp+GN5f2Fku6Mf3u31PmJEk/6O0jTXdrGj4r9fGYpK9UFyBpoqTHcuPnpzXLTwHtwPfSUvGo9NrtA/Wb3qd/kvSIpAclvTO1nyzpF+n/4ye97XuhB4DxvSMDfB7/e2p7RNK/p7bvpveld5pN6ecUST+T9H1gRXpPfynpm8DDwOGSpkl6QNLD6TN5YJr3GUlfTu0rJB2lbGvBOcAX03v/n8v/s7xNEeFHwQPYASxPjx+nttnARWl4f6ALmARMIzt6QmRhfBvwMeCPgBXAAcDvAN3A+bvxev8DuDYNHwU8B4wkWzKtAGPSczOAr6bhh4AH0/C1wMfTcO+0o8iWag9N4wGcmYZHAs8Dk9Pv8gPgtka/F4N47zqAr/XR/klgEdBCtpbxHPBusqW9V4DW9P49AHyUbC38OeAdaf5vAZ8Ffje1j0vT3A2clqZ5huzs34nAY7nXPh+Yk4YXA+255xaTBchA/QZwchr+l9z/4Wh2HrTyOeB/p+FZwDca/V6U/D5vSj9bgB8C09N4f5/H/wQ8AYyt+kx8F/hUH/1OAV4DJqXxiUAP8OE0Phb4f7n/j38ALs79H5yXhv8auCYNz6Gf74Ch+PBmqNr0tVloGvCB3FLIwWRfrNPSY1lqPzC1H0T2xb8ZQNKC3Xy9jwJXAETE45KeBXq3uy+KiA1p+F7gC8q2e68CRkt6N/ARsi9OgA5Jp6fhw1N9L5GF1I9S+1HA0xGxOtX7f8kCcrj4KHBdZJsTfi3pHuCDwG+BhyKiAiBpOTAxIu6TdAdwsqQbgT8F/h44EVgcEevS9N8j+zK6+W3W98EB+n2D7EsPYCkwNQ23Ajek93s/4Om3WUMzGdX7XpH9TRal9v4+j38A3BgR6wFyn5+BPBQR+b/psxHxYBr+MHA08PNsqxT7kS1o9Lop/VxKk66lezPU4IlsaeGY9JgUEXel9n/OtbdFxP9J87zlOGVJh2vnjuxzCl6vP6/1DkTEC2RLmNPJlnTuBc4kW0J6VdIU4E+Aj0TEH5B9iEam2bfGrvsphsNx1SvJ1uqqDfT3fD03vIOd+/ZuIPtbnggsiYhXC/rptZ1dP2sj+5uwxvq2RVo0rarvCrI1iPcDn6/xdYaL3gWsI8i+qP8mtff3eRR9/3+/+V4p+9bfL/fca1XT5sdFttDW+zpHR8Rf5p7v/Z/Kv19NxWExeHcCfyVpXwBJR0p6R2r/i9z2yvGSDiP74j49bZc+CDgZICKez/2Dfbvvl4I0/5/1vhYwgWw1ui8PAF9gZ1icn35Ctga0MSI2SzqKbImoL48DkyT9Xho/a6A/xhB2N7C/pLN7GyR9ENgIfFrZvqdxZEvtDxX0tRj4Q+BssuAA+AXwXySNldRC9ne6p2q+XwOHSTpU0v7AJ3LPvUq21lmtln6rHQy8kIZnFkw7LEXEK2Rr0Oenz2Z/n8efAmdKOjS1j0ldPMPOhYtTgX1rfOkHgeMltaX+DlDxEXf9vfdDksNi8K4h28zzcNp5eRWwT1q7+D7wgKQVwI3AQRHxMNkXzHKyTT339t1tv74JtKQ+bwBmRcTr/Ux7b6qlm2zn25jc690B7CPpUeBSsn/yt4iIrWSbnf5D2Q7upryib1oCPx2YquzQ2ZVk24q/DzwKPEIWKH8fEb8q6GsH2eafk9JPIuJF4ELgZ6mvhyPilqr5tgFzyQLgNrIg7vVd4Nu9O7hz8xT224c5wA8l3cvwvkrqgCJiGdnfbMYAn8eVZEdL3SPpEeBf0+zfIQvph4AP8da1if5ecx3ZvqHr0mfrQbJNuQO5lWwBsil2cPsMbjMzK+Q1CzMzK+SwMDOzQg4LMzMr5LAwM7NCDgszMyvksDBrANV2RdrCacz2FIeFmZkVcliY1ShdafRxZfeceEzZFWn/RNLPJa2WdJykMcqu6vtouiLsB9K8h0q6K10R9ipyl/OQ9FlJD6WTs65KZ2ybDSkOC7Pd0wZ8HfgA2Rm6nyG7KOH5wP8Evgwsi4gPpPF/S/NdAtwXEccCC8gu14Kk9wGfBo5P1zbaQbqsi9lQ0pQXtDJroKcjYgVAunTITyMi0qUkJpJdyO6TABFxd1qjOJjs2lNnpPb/kLQx9fdfya5FtCRdrXQU8Js9+PuY1cRhYbZ78tfj6smN95B9nvq6U2FU/cwT2V38LqxbhWYl8GYos/rKXx14CrA+In5b1X4S2WXkIbv66afSlVBJ+zyO2NNFmxXxmoVZfc0Brk1XHt3MzkuFf5nsiqQPk11q/DmAiFgl6SKyW+KOALaR3YuhKa/ya8OXrzprZmaFvBnKzMwKOSzMzKyQw8LMzAo5LMzMrJDDwszMCjkszMyskMPCzMwKOSzMzKzQ/wf+ZgmY3MjZxwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "wv_scores = []\n", + "for score in ff_wv_scores['accuracies']:\n", + " wv_scores.append((\"Feed-Forward\", score))\n", + "for score in conv_wv_scores['accuracies']:\n", + " wv_scores.append((\"Convolutional\", score))\n", + "for score in rnn_wv_scores['accuracies']:\n", + " wv_scores.append((\"Recurrent\", score))\n", + "\n", + "boxplot(x='model', y='accuracy', data=DataFrame(wv_scores, columns=[\"model\", \"accuracy\"]))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From these results we can see that more work is needed to investigate why our embeddings to not perform better than bag of words. There are a number of possibilities, already suggested." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}