diff --git a/Quora_bow_tfidf_randomforest.ipynb b/Quora_bow_tfidf_randomforest.ipynb new file mode 100644 index 0000000..2c0a05b --- /dev/null +++ b/Quora_bow_tfidf_randomforest.ipynb @@ -0,0 +1,993 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | id | \n", + "qid1 | \n", + "qid2 | \n", + "question1 | \n", + "question2 | \n", + "is_duplicate | \n", + "
---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "1 | \n", + "2 | \n", + "What is the step by step guide to invest in sh... | \n", + "What is the step by step guide to invest in sh... | \n", + "0 | \n", + "
1 | \n", + "1 | \n", + "3 | \n", + "4 | \n", + "What is the story of Kohinoor (Koh-i-Noor) Dia... | \n", + "What would happen if the Indian government sto... | \n", + "0 | \n", + "
2 | \n", + "2 | \n", + "5 | \n", + "6 | \n", + "How can I increase the speed of my internet co... | \n", + "How can Internet speed be increased by hacking... | \n", + "0 | \n", + "
3 | \n", + "3 | \n", + "7 | \n", + "8 | \n", + "Why am I mentally very lonely? How can I solve... | \n", + "Find the remainder when [math]23^{24}[/math] i... | \n", + "0 | \n", + "
4 | \n", + "4 | \n", + "9 | \n", + "10 | \n", + "Which one dissolve in water quikly sugar, salt... | \n", + "Which fish would survive in salt water? | \n", + "0 | \n", + "
5 | \n", + "5 | \n", + "11 | \n", + "12 | \n", + "Astrology: I am a Capricorn Sun Cap moon and c... | \n", + "I'm a triple Capricorn (Sun, Moon and ascendan... | \n", + "1 | \n", + "
6 | \n", + "6 | \n", + "13 | \n", + "14 | \n", + "Should I buy tiago? | \n", + "What keeps childern active and far from phone ... | \n", + "0 | \n", + "
7 | \n", + "7 | \n", + "15 | \n", + "16 | \n", + "How can I be a good geologist? | \n", + "What should I do to be a great geologist? | \n", + "1 | \n", + "
8 | \n", + "8 | \n", + "17 | \n", + "18 | \n", + "When do you use シ instead of し? | \n", + "When do you use \"&\" instead of \"and\"? | \n", + "0 | \n", + "
9 | \n", + "9 | \n", + "19 | \n", + "20 | \n", + "Motorola (company): Can I hack my Charter Moto... | \n", + "How do I hack Motorola DCX3400 for free internet? | \n", + "0 | \n", + "