diff --git a/Assignments/Assignment 1/exploring_word_vectors.ipynb b/Assignments/Assignment 1/exploring_word_vectors.ipynb index 7a4a0e7..01a58cf 100644 --- a/Assignments/Assignment 1/exploring_word_vectors.ipynb +++ b/Assignments/Assignment 1/exploring_word_vectors.ipynb @@ -14,9 +14,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package reuters to /home/t/nltk_data...\n" + ] + } + ], "source": [ "# All Import Statements Defined Here\n", "# Note: Do not add to this list.\n", @@ -124,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -148,11 +156,148 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['', 'japan', 'to', 'revise', 'long', '-', 'term', 'energy', 'demand', 'downwards', 'the',\n", + " 'ministry', 'of', 'international', 'trade', 'and', 'industry', '(', 'miti', ')', 'will', 'revise',\n", + " 'its', 'long', '-', 'term', 'energy', 'supply', '/', 'demand', 'outlook', 'by', 'august', 'to',\n", + " 'meet', 'a', 'forecast', 'downtrend', 'in', 'japanese', 'energy', 'demand', ',', 'ministry',\n", + " 'officials', 'said', '.', 'miti', 'is', 'expected', 'to', 'lower', 'the', 'projection', 'for',\n", + " 'primary', 'energy', 'supplies', 'in', 'the', 'year', '2000', 'to', '550', 'mln', 'kilolitres',\n", + " '(', 'kl', ')', 'from', '600', 'mln', ',', 'they', 'said', '.', 'the', 'decision', 'follows',\n", + " 'the', 'emergence', 'of', 'structural', 'changes', 'in', 'japanese', 'industry', 'following',\n", + " 'the', 'rise', 'in', 'the', 'value', 'of', 'the', 'yen', 'and', 'a', 'decline', 'in', 'domestic',\n", + " 'electric', 'power', 'demand', '.', 'miti', 'is', 'planning', 'to', 'work', 'out', 'a', 'revised',\n", + " 'energy', 'supply', '/', 'demand', 'outlook', 'through', 'deliberations', 'of', 'committee',\n", + " 'meetings', 'of', 'the', 'agency', 'of', 'natural', 'resources', 'and', 'energy', ',', 'the',\n", + " 'officials', 'said', '.', 'they', 'said', 'miti', 'will', 'also', 'review', 'the', 'breakdown',\n", + " 'of', 'energy', 'supply', 'sources', ',', 'including', 'oil', ',', 'nuclear', ',', 'coal', 'and',\n", + " 'natural', 'gas', '.', 'nuclear', 'energy', 'provided', 'the', 'bulk', 'of', 'japan', \"'\", 's',\n", + " 'electric', 'power', 'in', 'the', 'fiscal', 'year', 'ended', 'march', '31', ',', 'supplying',\n", + " 'an', 'estimated', '27', 'pct', 'on', 'a', 'kilowatt', '/', 'hour', 'basis', ',', 'followed',\n", + " 'by', 'oil', '(', '23', 'pct', ')', 'and', 'liquefied', 'natural', 'gas', '(', '21', 'pct', '),',\n", + " 'they', 'noted', '.', ''],\n", + " ['', 'energy', '/', 'u', '.', 's', '.', 'petrochemical', 'industry', 'cheap', 'oil',\n", + " 'feedstocks', ',', 'the', 'weakened', 'u', '.', 's', '.', 'dollar', 'and', 'a', 'plant',\n", + " 'utilization', 'rate', 'approaching', '90', 'pct', 'will', 'propel', 'the', 'streamlined', 'u',\n", + " '.', 's', '.', 'petrochemical', 'industry', 'to', 'record', 'profits', 'this', 'year', ',',\n", + " 'with', 'growth', 'expected', 'through', 'at', 'least', '1990', ',', 'major', 'company',\n", + " 'executives', 'predicted', '.', 'this', 'bullish', 'outlook', 'for', 'chemical', 'manufacturing',\n", + " 'and', 'an', 'industrywide', 'move', 'to', 'shed', 'unrelated', 'businesses', 'has', 'prompted',\n", + " 'gaf', 'corp', '&', 'lt', ';', 'gaf', '>,', 'privately', '-', 'held', 'cain', 'chemical', 'inc',\n", + " ',', 'and', 'other', 'firms', 'to', 'aggressively', 'seek', 'acquisitions', 'of', 'petrochemical',\n", + " 'plants', '.', 'oil', 'companies', 'such', 'as', 'ashland', 'oil', 'inc', '&', 'lt', ';', 'ash',\n", + " '>,', 'the', 'kentucky', '-', 'based', 'oil', 'refiner', 'and', 'marketer', ',', 'are', 'also',\n", + " 'shopping', 'for', 'money', '-', 'making', 'petrochemical', 'businesses', 'to', 'buy', '.', '\"',\n", + " 'i', 'see', 'us', 'poised', 'at', 'the', 'threshold', 'of', 'a', 'golden', 'period', ',\"', 'said',\n", + " 'paul', 'oreffice', ',', 'chairman', 'of', 'giant', 'dow', 'chemical', 'co', '&', 'lt', ';',\n", + " 'dow', '>,', 'adding', ',', '\"', 'there', \"'\", 's', 'no', 'major', 'plant', 'capacity', 'being',\n", + " 'added', 'around', 'the', 'world', 'now', '.', 'the', 'whole', 'game', 'is', 'bringing', 'out',\n", + " 'new', 'products', 'and', 'improving', 'the', 'old', 'ones', '.\"', 'analysts', 'say', 'the',\n", + " 'chemical', 'industry', \"'\", 's', 'biggest', 'customers', ',', 'automobile', 'manufacturers',\n", + " 'and', 'home', 'builders', 'that', 'use', 'a', 'lot', 'of', 'paints', 'and', 'plastics', ',',\n", + " 'are', 'expected', 'to', 'buy', 'quantities', 'this', 'year', '.', 'u', '.', 's', '.',\n", + " 'petrochemical', 'plants', 'are', 'currently', 'operating', 'at', 'about', '90', 'pct',\n", + " 'capacity', ',', 'reflecting', 'tighter', 'supply', 'that', 'could', 'hike', 'product', 'prices',\n", + " 'by', '30', 'to', '40', 'pct', 'this', 'year', ',', 'said', 'john', 'dosher', ',', 'managing',\n", + " 'director', 'of', 'pace', 'consultants', 'inc', 'of', 'houston', '.', 'demand', 'for', 'some',\n", + " 'products', 'such', 'as', 'styrene', 'could', 'push', 'profit', 'margins', 'up', 'by', 'as',\n", + " 'much', 'as', '300', 'pct', ',', 'he', 'said', '.', 'oreffice', ',', 'speaking', 'at', 'a',\n", + " 'meeting', 'of', 'chemical', 'engineers', 'in', 'houston', ',', 'said', 'dow', 'would', 'easily',\n", + " 'top', 'the', '741', 'mln', 'dlrs', 'it', 'earned', 'last', 'year', 'and', 'predicted', 'it',\n", + " 'would', 'have', 'the', 'best', 'year', 'in', 'its', 'history', '.', 'in', '1985', ',', 'when',\n", + " 'oil', 'prices', 'were', 'still', 'above', '25', 'dlrs', 'a', 'barrel', 'and', 'chemical',\n", + " 'exports', 'were', 'adversely', 'affected', 'by', 'the', 'strong', 'u', '.', 's', '.', 'dollar',\n", + " ',', 'dow', 'had', 'profits', 'of', '58', 'mln', 'dlrs', '.', '\"', 'i', 'believe', 'the',\n", + " 'entire', 'chemical', 'industry', 'is', 'headed', 'for', 'a', 'record', 'year', 'or', 'close',\n", + " 'to', 'it', ',\"', 'oreffice', 'said', '.', 'gaf', 'chairman', 'samuel', 'heyman', 'estimated',\n", + " 'that', 'the', 'u', '.', 's', '.', 'chemical', 'industry', 'would', 'report', 'a', '20', 'pct',\n", + " 'gain', 'in', 'profits', 'during', '1987', '.', 'last', 'year', ',', 'the', 'domestic',\n", + " 'industry', 'earned', 'a', 'total', 'of', '13', 'billion', 'dlrs', ',', 'a', '54', 'pct', 'leap',\n", + " 'from', '1985', '.', 'the', 'turn', 'in', 'the', 'fortunes', 'of', 'the', 'once', '-', 'sickly',\n", + " 'chemical', 'industry', 'has', 'been', 'brought', 'about', 'by', 'a', 'combination', 'of', 'luck',\n", + " 'and', 'planning', ',', 'said', 'pace', \"'\", 's', 'john', 'dosher', '.', 'dosher', 'said', 'last',\n", + " 'year', \"'\", 's', 'fall', 'in', 'oil', 'prices', 'made', 'feedstocks', 'dramatically', 'cheaper',\n", + " 'and', 'at', 'the', 'same', 'time', 'the', 'american', 'dollar', 'was', 'weakening', 'against',\n", + " 'foreign', 'currencies', '.', 'that', 'helped', 'boost', 'u', '.', 's', '.', 'chemical',\n", + " 'exports', '.', 'also', 'helping', 'to', 'bring', 'supply', 'and', 'demand', 'into', 'balance',\n", + " 'has', 'been', 'the', 'gradual', 'market', 'absorption', 'of', 'the', 'extra', 'chemical',\n", + " 'manufacturing', 'capacity', 'created', 'by', 'middle', 'eastern', 'oil', 'producers', 'in',\n", + " 'the', 'early', '1980s', '.', 'finally', ',', 'virtually', 'all', 'major', 'u', '.', 's', '.',\n", + " 'chemical', 'manufacturers', 'have', 'embarked', 'on', 'an', 'extensive', 'corporate',\n", + " 'restructuring', 'program', 'to', 'mothball', 'inefficient', 'plants', ',', 'trim', 'the',\n", + " 'payroll', 'and', 'eliminate', 'unrelated', 'businesses', '.', 'the', 'restructuring', 'touched',\n", + " 'off', 'a', 'flurry', 'of', 'friendly', 'and', 'hostile', 'takeover', 'attempts', '.', 'gaf', ',',\n", + " 'which', 'made', 'an', 'unsuccessful', 'attempt', 'in', '1985', 'to', 'acquire', 'union',\n", + " 'carbide', 'corp', '&', 'lt', ';', 'uk', '>,', 'recently', 'offered', 'three', 'billion', 'dlrs',\n", + " 'for', 'borg', 'warner', 'corp', '&', 'lt', ';', 'bor', '>,', 'a', 'chicago', 'manufacturer',\n", + " 'of', 'plastics', 'and', 'chemicals', '.', 'another', 'industry', 'powerhouse', ',', 'w', '.',\n", + " 'r', '.', 'grace', '&', 'lt', ';', 'gra', '>', 'has', 'divested', 'its', 'retailing', ',',\n", + " 'restaurant', 'and', 'fertilizer', 'businesses', 'to', 'raise', 'cash', 'for', 'chemical',\n", + " 'acquisitions', '.', 'but', 'some', 'experts', 'worry', 'that', 'the', 'chemical', 'industry',\n", + " 'may', 'be', 'headed', 'for', 'trouble', 'if', 'companies', 'continue', 'turning', 'their',\n", + " 'back', 'on', 'the', 'manufacturing', 'of', 'staple', 'petrochemical', 'commodities', ',', 'such',\n", + " 'as', 'ethylene', ',', 'in', 'favor', 'of', 'more', 'profitable', 'specialty', 'chemicals',\n", + " 'that', 'are', 'custom', '-', 'designed', 'for', 'a', 'small', 'group', 'of', 'buyers', '.', '\"',\n", + " 'companies', 'like', 'dupont', '&', 'lt', ';', 'dd', '>', 'and', 'monsanto', 'co', '&', 'lt', ';',\n", + " 'mtc', '>', 'spent', 'the', 'past', 'two', 'or', 'three', 'years', 'trying', 'to', 'get', 'out',\n", + " 'of', 'the', 'commodity', 'chemical', 'business', 'in', 'reaction', 'to', 'how', 'badly', 'the',\n", + " 'market', 'had', 'deteriorated', ',\"', 'dosher', 'said', '.', '\"', 'but', 'i', 'think', 'they',\n", + " 'will', 'eventually', 'kill', 'the', 'margins', 'on', 'the', 'profitable', 'chemicals', 'in',\n", + " 'the', 'niche', 'market', '.\"', 'some', 'top', 'chemical', 'executives', 'share', 'the',\n", + " 'concern', '.', '\"', 'the', 'challenge', 'for', 'our', 'industry', 'is', 'to', 'keep', 'from',\n", + " 'getting', 'carried', 'away', 'and', 'repeating', 'past', 'mistakes', ',\"', 'gaf', \"'\", 's',\n", + " 'heyman', 'cautioned', '.', '\"', 'the', 'shift', 'from', 'commodity', 'chemicals', 'may', 'be',\n", + " 'ill', '-', 'advised', '.', 'specialty', 'businesses', 'do', 'not', 'stay', 'special', 'long',\n", + " '.\"', 'houston', '-', 'based', 'cain', 'chemical', ',', 'created', 'this', 'month', 'by', 'the',\n", + " 'sterling', 'investment', 'banking', 'group', ',', 'believes', 'it', 'can', 'generate', '700',\n", + " 'mln', 'dlrs', 'in', 'annual', 'sales', 'by', 'bucking', 'the', 'industry', 'trend', '.',\n", + " 'chairman', 'gordon', 'cain', ',', 'who', 'previously', 'led', 'a', 'leveraged', 'buyout', 'of',\n", + " 'dupont', \"'\", 's', 'conoco', 'inc', \"'\", 's', 'chemical', 'business', ',', 'has', 'spent', '1',\n", + " '.', '1', 'billion', 'dlrs', 'since', 'january', 'to', 'buy', 'seven', 'petrochemical', 'plants',\n", + " 'along', 'the', 'texas', 'gulf', 'coast', '.', 'the', 'plants', 'produce', 'only', 'basic',\n", + " 'commodity', 'petrochemicals', 'that', 'are', 'the', 'building', 'blocks', 'of', 'specialty',\n", + " 'products', '.', '\"', 'this', 'kind', 'of', 'commodity', 'chemical', 'business', 'will', 'never',\n", + " 'be', 'a', 'glamorous', ',', 'high', '-', 'margin', 'business', ',\"', 'cain', 'said', ',',\n", + " 'adding', 'that', 'demand', 'is', 'expected', 'to', 'grow', 'by', 'about', 'three', 'pct',\n", + " 'annually', '.', 'garo', 'armen', ',', 'an', 'analyst', 'with', 'dean', 'witter', 'reynolds', ',',\n", + " 'said', 'chemical', 'makers', 'have', 'also', 'benefitted', 'by', 'increasing', 'demand', 'for',\n", + " 'plastics', 'as', 'prices', 'become', 'more', 'competitive', 'with', 'aluminum', ',', 'wood',\n", + " 'and', 'steel', 'products', '.', 'armen', 'estimated', 'the', 'upturn', 'in', 'the', 'chemical',\n", + " 'business', 'could', 'last', 'as', 'long', 'as', 'four', 'or', 'five', 'years', ',', 'provided',\n", + " 'the', 'u', '.', 's', '.', 'economy', 'continues', 'its', 'modest', 'rate', 'of', 'growth', '.',\n", + " ''],\n", + " ['', 'turkey', 'calls', 'for', 'dialogue', 'to', 'solve', 'dispute', 'turkey', 'said',\n", + " 'today', 'its', 'disputes', 'with', 'greece', ',', 'including', 'rights', 'on', 'the',\n", + " 'continental', 'shelf', 'in', 'the', 'aegean', 'sea', ',', 'should', 'be', 'solved', 'through',\n", + " 'negotiations', '.', 'a', 'foreign', 'ministry', 'statement', 'said', 'the', 'latest', 'crisis',\n", + " 'between', 'the', 'two', 'nato', 'members', 'stemmed', 'from', 'the', 'continental', 'shelf',\n", + " 'dispute', 'and', 'an', 'agreement', 'on', 'this', 'issue', 'would', 'effect', 'the', 'security',\n", + " ',', 'economy', 'and', 'other', 'rights', 'of', 'both', 'countries', '.', '\"', 'as', 'the',\n", + " 'issue', 'is', 'basicly', 'political', ',', 'a', 'solution', 'can', 'only', 'be', 'found', 'by',\n", + " 'bilateral', 'negotiations', ',\"', 'the', 'statement', 'said', '.', 'greece', 'has', 'repeatedly',\n", + " 'said', 'the', 'issue', 'was', 'legal', 'and', 'could', 'be', 'solved', 'at', 'the',\n", + " 'international', 'court', 'of', 'justice', '.', 'the', 'two', 'countries', 'approached', 'armed',\n", + " 'confrontation', 'last', 'month', 'after', 'greece', 'announced', 'it', 'planned', 'oil',\n", + " 'exploration', 'work', 'in', 'the', 'aegean', 'and', 'turkey', 'said', 'it', 'would', 'also',\n", + " 'search', 'for', 'oil', '.', 'a', 'face', '-', 'off', 'was', 'averted', 'when', 'turkey',\n", + " 'confined', 'its', 'research', 'to', 'territorrial', 'waters', '.', '\"', 'the', 'latest',\n", + " 'crises', 'created', 'an', 'historic', 'opportunity', 'to', 'solve', 'the', 'disputes', 'between',\n", + " 'the', 'two', 'countries', ',\"', 'the', 'foreign', 'ministry', 'statement', 'said', '.', 'turkey',\n", + " \"'\", 's', 'ambassador', 'in', 'athens', ',', 'nazmi', 'akiman', ',', 'was', 'due', 'to', 'meet',\n", + " 'prime', 'minister', 'andreas', 'papandreou', 'today', 'for', 'the', 'greek', 'reply', 'to', 'a',\n", + " 'message', 'sent', 'last', 'week', 'by', 'turkish', 'prime', 'minister', 'turgut', 'ozal', '.',\n", + " 'the', 'contents', 'of', 'the', 'message', 'were', 'not', 'disclosed', '.', '']]\n" + ] + } + ], "source": [ "reuters_corpus = read_corpus()\n", "pprint.pprint(reuters_corpus[:3], compact=True, width=100)" @@ -171,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -189,7 +334,9 @@ " # ------------------\n", " # Write your implementation here.\n", "\n", - "\n", + " corpus_words = {work for sen in corpus for work in sen}\n", + " corpus_words = sorted(list(corpus_words))\n", + " num_corpus_words = len(corpus_words)\n", " # ------------------\n", "\n", " return corpus_words, num_corpus_words" @@ -197,9 +344,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------------------------------------------------------------\n", + "Passed All Tests!\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], "source": [ "# ---------------------\n", "# Run this sanity check\n", @@ -237,7 +394,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -265,8 +422,14 @@ " \n", " # ------------------\n", " # Write your implementation here.\n", + " word2Ind = dict(zip(words, range(num_words)))\n", + " \n", + " M = np.zeros((num_words, num_words))\n", "\n", - "\n", + " for sen in corpus:\n", + " for i in range(len(sen) - 1):\n", + " M[word2Ind[sen[i]], word2Ind[sen[i + 1]]] += 1\n", + " M[word2Ind[sen[i + 1]], word2Ind[sen[i]]] += 1\n", " # ------------------\n", "\n", " return M, word2Ind" @@ -274,9 +437,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------------------------------------------------------------\n", + "Passed All Tests!\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], "source": [ "# ---------------------\n", "# Run this sanity check\n", @@ -342,7 +515,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -362,11 +535,11 @@ " M_reduced = None\n", " print(\"Running Truncated SVD over %i words...\" % (M.shape[0]))\n", " \n", - " # ------------------\n", - " # Write your implementation here.\n", - " \n", - " \n", - " # ------------------\n", + " # ------------------\n", + " # Write your implementation here.\n", + " svd = TruncatedSVD(n_components=k, n_iter=n_iters)\n", + " M_reduced = svd.fit_transform(M)\n", + " # ------------------\n", "\n", " print(\"Done.\")\n", " return M_reduced" @@ -374,9 +547,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running Truncated SVD over 10 words...\n", + "Done.\n", + "--------------------------------------------------------------------------------\n", + "Passed All Tests!\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], "source": [ "# ---------------------\n", "# Run this sanity check\n", @@ -412,7 +597,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -429,16 +614,47 @@ "\n", " # ------------------\n", " # Write your implementation here.\n", - "\n", - "\n", + " x = []\n", + " y = []\n", + " w = []\n", + " for word in words:\n", + " w.append(word)\n", + " x.append(M_reduced[word2Ind[word]][0])\n", + " y.append(M_reduced[word2Ind[word]][1])\n", + "\n", + " plt.scatter(x, y)\n", + " for i, txt in enumerate(w):\n", + " plt.annotate(txt, (x[i], y[i]))\n", " # ------------------" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------------------------------------------------------------\n", + "Outputted Plot:\n", + "--------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD8CAYAAACGsIhGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAH1ZJREFUeJzt3X901fWd5/HnywgUFgVsQgdQBGcoWmU34B1bt3NmxBEi9RyQ1lLk2NJubZxunZ7ZDpTkVJzRGU/peFY5c6ZqacvUqltrqbUZwSWisdI5Wg1CBS2UoLImYQkOQuWHKPjeP+4X95Lc5Jvk3psEeT3OuSff7+f7+Xzv+35z+b7y/X7v5auIwMzMrCun9XcBZmY28DkszMwslcPCzMxSOSzMzCyVw8LMzFI5LMzMLJXDwszMUjkszMwslcPCzMxSnd7fBfRGeXl5TJgwob/LMDM7qWzYsOGNiKjozdiTMiwmTJhAY2Njf5dhZnZSkbSzt2N9GsrMzFI5LMzMLNUpGxb79u3jrrvu6tXY5cuXc+jQoQ7ts2fP5qKLLiq0NDOzDoq5z5J0m6TXJR3o7jocFr2QLywefvhhhg8fXozSzMw6KPI+69+AS3qyjpPyAncx1NTUsGPHDiorK5kxYwajR4/moYce4siRI8ydO5dbbrmFgwcPMm/ePJqbmzl27BhLly5l9+7dtLa2Mn36dMrLy2loaODAgQPccccdrFixgnnz5vX3SzOzD6Bi7LOAjwJExLMAkrr9/KdsWCxbtowtW7awadMm6uvrWbVqFc899xwRwezZs3n66afZs2cPY8eOZfXq1QDs37+fESNGcMcdd9DQ0EB5eTkAS5cu5W//9m8ZNmxYf74kM/sAK8Y+q6Ki4ve9ff6inIaStFJSm6QtnSyXpH+W1CTpRUnTcpYtlLQ9eSwsRj09VV9fT319PVOnTmXatGls3bqV7du3M2XKFNatW8eSJUtYv349I0aM6DB206ZNNDU1MXfu3H6o3MxORYXss3qrWEcWPwL+BfhxJ8tnAZOSx8eBu4GPSzoL+DsgAwSwQVJdRLxZpLpO8MjGFm5fu43WfYc5K/bzh7ePAhAR1NbWcsMNN3QYs2HDBtasWUNtbS0zZ87k5ptvPmH5M888w4YNG5gwYQJHjx6lra2Nyy67jKeeeqoUL8HMTjHH91s7d77G3jcO8sjGloL2Wb1VlCOLiHga2NtFlznAjyPrWWCkpDFAFfB4ROxNAuJx4Mpi1NTeIxtbqH14My37DhNA29ti1543eWRjC1VVVaxcuZIDB7IfDGhpaaGtrY3W1laGDRvGddddx6JFi3jhhRcAOOOMM3jrrbcA+OpXv0prayuvvfYav/71r/noRz/qoDCzosjdb2nwUN45fJDahzdzxh9f3Ot9Vm/11TWLccDrOfPNSVtn7UV3+9ptHH732PvzZUPPZPC4C1gw68/42uevYcGCBVx66aUADB8+nPvvv5+mpiYWL17MaaedxqBBg7j77rsBqK6uZtasWYwZM4aGhoZSlGtmdsJ+q2zomQwZ9zF23HMD37/gE3yzF/sskgvckv4JWAAMk9QM/CAi/r6rWhQRRXlRkiYAj0ZEhy8aSFoNfDsifp3MPwF8E7gcGBIR/5i0LwUORcT/zLOOaqAaYPz48Rfv3Nmzb61PrFlNvlcq4NVlV/VoXWZmfaHY+y1JGyIi05ta+up7Fs3AOTnzZwOtXbR3EBErIiITEZmKip7/P1hjRw7tUbuZWX8bSPutvgqLOuALyaeiPgHsj4hdwFpgpqRRkkYBM5O2oltcNZmhg8pOaBs6qIzFVZNL8XRmZgUbSPutolyzkPQT4DKgPDn/9XfAIICIuAdYA3wKaAIOAV9Klu2V9A/A88mqbo2Iri6U99rVU7OXQo5/GmrsyKEsrpr8fruZ2UAzkPZbRbtm0ZcymUz4vyg3M+uZk+GahZmZncQcFmZmlsphYWZmqRwWZmaWymFhZmapHBZmZpbKYWFmZqkcFmZmlsphYWZmqRwWZmaWymFhZmapHBZmZpbKYWFmZqkcFmZmlsphYWZmqYoSFpKulLRNUpOkmjzL75S0KXn8XtK+nGXHcpbVFaMeMzMrroLvlCepDPguMIPsPbWfl1QXES8f7xMR/yOn/18DU3NWcTgiKgutw8zMSqcYRxaXAE0R8UpEvAM8CMzpov+1wE+K8LxmZtZHihEW44DXc+abk7YOJJ0LTASezGn+kKRGSc9KuroI9ZiZWZEVfBoKUJ62zm7sPR9YFRHHctrGR0SrpPOAJyVtjogdHZ5EqgaqAcaPH19ozWZm1gPFOLJoBs7JmT8baO2k73zanYKKiNbk5yvAU5x4PSO334qIyEREpqKiotCazcysB4oRFs8DkyRNlDSYbCB0+FSTpMnAKOCZnLZRkoYk0+XAJ4GX2481M7P+VfBpqIg4KulGYC1QBqyMiJck3Qo0RsTx4LgWeDAick9RXQB8T9J7ZINrWe6nqMzMbGDQifvuk0Mmk4nGxsb+LsPM7KQiaUNEZHoz1t/gNjOzVA4LMzNL5bAwM7NUDgszM0vlsDAzs1QOCzMzS+WwMDOzVA4LMzNL5bAwM7NUDgszM0vlsDAzs1QOCzMzS+WwMDOzVA4LMzNL5bAwM7NUDgszM0tVlLCQdKWkbZKaJNXkWf5FSXskbUoe1+csWyhpe/JYWIx6zMysuAq+raqkMuC7wAygGXheUl2e26P+NCJubDf2LODvgAwQwIZk7JuF1mVmZsVTjCOLS4CmiHglIt4BHgTmdHNsFfB4ROxNAuJx4Moi1GRmZkVUjLAYB7yeM9+ctLX3GUkvSlol6ZwejkVStaRGSY179uwpQtlmZtZdxQgL5WmLdvP/BkyIiP8MrAPu7cHYbGPEiojIRESmoqKi18WamVnPFSMsmoFzcubPBlpzO0TEf0TEkWT2+8DF3R1rZmb9rxhh8TwwSdJESYOB+UBdbgdJY3JmZwO/S6bXAjMljZI0CpiZtJmZ2QBS8KehIuKopBvJ7uTLgJUR8ZKkW4HGiKgDvi5pNnAU2At8MRm7V9I/kA0cgFsjYm+hNZmZWXEpIu8lggEtk8lEY2Njf5dhZnZSkbQhIjK9GetvcJuZWSqHhZmZpXJYmJlZKoeFmZmlcliYmVkqh4WZmaVyWJiZWSqHhZmZpXJYmJlZKoeFmZmlcliYmVkqh4WZmaVyWJiZWSqHhZmZpXJYmJlZqqKEhaQrJW2T1CSpJs/yb0h6WdKLkp6QdG7OsmOSNiWPuvZjzcys/xV8pzxJZcB3gRlk76n9vKS6iHg5p9tGIBMRhyR9Ffgn4HPJssMRUVloHWZmVjrFOLK4BGiKiFci4h3gQWBOboeIaIiIQ8nss8DZRXheMzPrI8UIi3HA6znzzUlbZ74MPJYz/yFJjZKelXR1EeoxM7MiK/g0FKA8bXlv7C3pOiAD/EVO8/iIaJV0HvCkpM0RsSPP2GqgGmD8+PGFV21mZt1WjCOLZuCcnPmzgdb2nSRdAXwLmB0RR463R0Rr8vMV4Clgar4niYgVEZGJiExFRUURyjYzs+4qRlg8D0ySNFHSYGA+cMKnmiRNBb5HNijactpHSRqSTJcDnwRyL4ybmdkAUPBpqIg4KulGYC1QBqyMiJck3Qo0RkQdcDswHPiZJID/ExGzgQuA70l6j2xwLWv3KSozMxsAFJH38sKAlslkorGxsb/LMDM7qUjaEBGZ3oz1N7jNzCyVw8LMzFI5LMzMLJXDwszMUjkszMwslcPCzMxSOSzMzCyVw8LMzFI5LMzMLJXDwszMUjkszMwslcPCrIj27dvHXXfd1auxy5cv59ChQ+/PX3bZZUyePJnKykoqKytpa2vrYrRZaTkszIqomGEB8MADD7Bp0yY2bdrE6NGji1GiWa8U4055Zpaoqalhx44dVFZWMmPGDEaPHs1DDz3EkSNHmDt3LrfccgsHDx5k3rx5NDc3c+zYMZYuXcru3btpbW1l+vTplJeX09DQ0N8vxewEDguzIlq2bBlbtmxh06ZN1NfXs2rVKp577jkigtmzZ/P000+zZ88exo4dy+rVqwHYv38/I0aM4I477qChoYHy8vL31/elL32JsrIyPvOZz3DTTTeR3A/GrM8V5TSUpCslbZPUJKkmz/Ihkn6aLP+NpAk5y2qT9m2SqopRj9lAUF9fT319PVOnTmXatGls3bqV7du3M2XKFNatW8eSJUtYv349I0aMyDv+gQceYPPmzaxfv57169dz33339fErMPv/Cj6ykFQGfBeYQfZ+3M9Lqmt3x7svA29GxJ9Img98B/icpI+RvQ3rhcBYYJ2kj0bEsULrMutLj2xs4fa129i58zX2vnGQRza2EBHU1tZyww03dOi/YcMG1qxZQ21tLTNnzuTmm2/u0GfcuHEAnHHGGSxYsIDnnnuOL3zhCyV/LWb5FOPI4hKgKSJeiYh3gAeBOe36zAHuTaZXAX+p7PH0HODBiDgSEa8CTcn6zE4aj2xsofbhzbTsO4wGD+WdwwepfXgzZ/zxxaxcuZIDBw4A0NLSQltbG62trQwbNozrrruORYsW8cILLwDZUHjrrbcAOHr0KG+88QYA7777Lo8++igXXXRR/7xAM4pzzWIc8HrOfDPw8c76JPfs3g98OGl/tt3YcUWoyazP3L52G4ffzR4Mlw09kyHjPsaOe27g+xd8gm8uWMCll14KwPDhw7n//vtpampi8eLFnHbaaQwaNIi7774bgOrqambNmsWYMWN49NFHqaqq4t133+XYsWNcccUVfOUrX+m312hW8D24JX0WqIqI65P5zwOXRMRf5/R5KenTnMzvIHsEcSvwTETcn7T/EFgTET/P8zzVQDXA+PHjL965c2dBdZsVy8Sa1eT7VyTg1WVX9XU5Zp3q73twNwPn5MyfDbR21kfS6cAIYG83xwIQESsiIhMRmYqKiiKUbVYcY0cO7VG72cmoGGHxPDBJ0kRJg8lesK5r16cOWJhMXwM8GdlDmjpgfvJpqYnAJOC5ItRk1mcWV01m6KCyE9qGDipjcdXkfqrIrPgKvmaRXIO4EVgLlAErI+IlSbcCjRFRB/wQuE9SE9kjivnJ2JckPQS8DBwFvuZPQtnJ5uqp2ctst6/dRuu+w4wdOZTFVZPfbzf7ICj4mkV/yGQy0djY2N9lmJmdVPr7moWZmX3AOSzMzCyVw8LMzFI5LMzMLJXDwszMUjkszMwslcPCzMxSOSzMzCyVw8LMzFI5LMzMLJXDwszMUjkszMwslcPCzMxSOSzMzCyVw8LMzFI5LMzMLFVBYSHpLEmPS9qe/ByVp0+lpGckvSTpRUmfy1n2I0mvStqUPCoLqcfMzEqj0COLGuCJiJgEPJHMt3cI+EJEXAhcCSyXNDJn+eKIqEwemwqsx8zMSqDQsJgD3JtM3wtc3b5DRPw+IrYn061AG1BR4POamVkfKjQsPhIRuwCSn6O76izpEmAwsCOn+bbk9NSdkoZ0MbZaUqOkxj179hRYtpmZ9URqWEhaJ2lLnsecnjyRpDHAfcCXIuK9pLkWOB/4U+AsYEln4yNiRURkIiJTUeEDEzOzvnR6WoeIuKKzZZJ2SxoTEbuSMGjrpN+ZwGrgpoh4Nmfdu5LJI5L+FVjUo+rNzKxPFHoaqg5YmEwvBH7ZvoOkwcAvgB9HxM/aLRuT/BTZ6x1bCqzHzMxKoNCwWAbMkLQdmJHMIykj6QdJn3nAnwNfzPMR2QckbQY2A+XAPxZYj5mZlYAior9r6LFMJhONjY39XYaZ2UlF0oaIyPRmrL/BbWZmqRwWZmaWymFhZmapHBZmZpbKYWFmZqkcFmZmlsphYWZmqRwWZmaWymFhZmapHBZmZpbKYWFmZqkcFmZmlsphYWZmqRwWZmaWymFhZmapCgoLSWdJelzS9uTnqE76Hcu58VFdTvtESb9Jxv80uauemZkNMIUeWdQAT0TEJOCJZD6fwxFRmTxm57R/B7gzGf8m8OUC6zEzsxIoNCzmAPcm0/eSvY92tyT33b4cWNWb8WZm1ncKDYuPRMQugOTn6E76fUhSo6RnJR0PhA8D+yLiaDLfDIwrsB4zMyuB09M6SFoH/FGeRd/qwfOMj4hWSecBT0raDPwhT79ObwguqRqoBhg/fnwPntrMzAqVGhYRcUVnyyTtljQmInZJGgO0dbKO1uTnK5KeAqYCPwdGSjo9Obo4G2jtoo4VwAqATCbTaaiYmVnxFXoaqg5YmEwvBH7ZvoOkUZKGJNPlwCeBlyMigAbgmq7Gm5lZ/ys0LJYBMyRtB2Yk80jKSPpB0ucCoFHSb8mGw7KIeDlZtgT4hqQmstcwflhgPWZmVgLK/oF/cslkMtHY2NjfZZiZnVQkbYiITG/G+hvcZmaWymFhZmapHBZmZpbKYWFmZqkcFmZmlsphYWZmqRwWZmaWymFhZmapHBZmZpbKYWFmZqkcFmZmlsphYWZmqRwWZmaWymFhZmapHBZmZpbKYWFmZqkKCgtJZ0l6XNL25OeoPH2mS9qU83hb0tXJsh9JejVnWWUh9ZiZWWkUemRRAzwREZOAJ5L5E0REQ0RURkQlcDlwCKjP6bL4+PKI2FRgPWZmVgKFhsUc4N5k+l7g6pT+1wCPRcShAp/XzMz6UKFh8ZGI2AWQ/Byd0n8+8JN2bbdJelHSnZKGdDZQUrWkRkmNe/bsKaxqMzPrkdSwkLRO0pY8jzk9eSJJY4ApwNqc5lrgfOBPgbOAJZ2Nj4gVEZGJiExFRUVPntrMzAp0elqHiLiis2WSdksaExG7kjBo62JV84BfRMS7OevelUwekfSvwKJu1m1mZn2o0NNQdcDCZHoh8Msu+l5Lu1NQScAgSWSvd2wpsB4zMyuBQsNiGTBD0nZgRjKPpIykHxzvJGkCcA7wq3bjH5C0GdgMlAP/WGA9ZmZWAqmnoboSEf8B/GWe9kbg+pz514BxefpdXsjzm5lZ3/A3uM3MLJXDwszMUjkszMwslcPCzMxSOSzMzCyVw8LMzFI5LMzMLJXDwszMUjkszMwslcPCzMxSOSzMzCzVKRsW+/bt46677urV2OXLl3PoUPZmf4cOHeKqq67i/PPP58ILL6SmpsOdZc3MClasfRaApP8t6beSXpJ0j6SytHU4LHqh/YZftGgRW7duZePGjfz7v/87jz32WLHKNDMDirvPAuZFxH8BLgIqgM+mraOg/3X2ZFZTU8OOHTuorKxkxowZjB49moceeogjR44wd+5cbrnlFg4ePMi8efNobm7m2LFjLF26lN27d9Pa2sr06dMpLy+noaGB6dOnAzB48GCmTZtGc3NzP786M/ugKcY+C/goQET8IVnt6cBgIFILiIiT7nHxxRdHoV599dW48MILIyJi7dq18ZWvfCXee++9OHbsWFx11VXxq1/9KlatWhXXX3/9+2P27dsXERHnnntu7Nmzp8M633zzzZg4cWLs2LGj4PrMzHIVY58FNEayHyV7i+s3gf8FlEXKfreg01CSPpuc83pPUqaLfldK2iapSVJNTvtESb+RtF3STyUNLqSe3qqvr6e+vp6pU6cybdo0tm7dyvbt25kyZQrr1q1jyZIlrF+/nhEjRnS6jqNHj3Lttdfy9a9/nfPOO68PqzezU00x9lkRUQWMAYYAqfcWKvQ01Bbg08D3OuuQXDj5Ltk76TUDz0uqi4iXge8Ad0bEg5LuAb4M3F1gTZ16ZGMLt6/dRuu+w5wV+/nD20eB7NFVbW0tN9xwQ4cxGzZsYM2aNdTW1jJz5kxuvvnmvOuurq5m0qRJ/M3f/E2pyjezU9Dx/dbOna+x942DPLKxpSj7LICIeFtSHTAHeLyrOgo6soiI30XEtpRulwBNEfFKRLwDPAjMSe67fTmwKul3L9n7cJfEIxtbqH14My37DhNA29ti1543eWRjC1VVVaxcuZIDBw4A0NLSQltbG62trQwbNozrrruORYsW8cILLwBwxhln8NZbb72/7ptuuon9+/ezfPnyUpVvZqeg3P2WBg/lncMHqX14M2f88cW93mdJGi5pTDJ9OvApYGtaLX1xgXsc8HrOfDPwceDDwL6IOJrT3uHWq8Vy+9ptHH732PvzZUPPZPC4C1gw68/42uevYcGCBVx66aUADB8+nPvvv5+mpiYWL17MaaedxqBBg7j77uxBT3V1NbNmzWLMmDHcd9993HbbbZx//vlMmzYNgBtvvJHrr7++YxFmZj2Qu98qG3omQ8Z9jB333MD3L/gE3+zFPovsBe7/BNRJGgKUAU8C96TVouRCR+cdpHXAH+VZ9K2I+GXS5ylgUWTvvd1+/GeBqoi4Ppn/PNmjjVuBZyLiT5L2c4A1ETGlkzqqgWqA8ePHX7xz586013aCiTWr817uF/Dqsqt6tC4zs75Q7P2WpA0R0en15a6kHllExBW9WXGOZuCcnPmzgVbgDWCkpNOTo4vj7Z3VsQJYAZDJZNI/5tXO2JFDadl3OG+7mdlANJD2W33xpbzngUnJJ58GA/OBusge0jQA1yT9FgK/LFURi6smM3TQiV9SHDqojMVVk0v1lGZmBRlI+61CPzo7V1IzcCmwWtLapH2spDUAyVHDjWQ/0/s74KGIeClZxRLgG5KayF7D+GEh9XTl6qnj+PanpzBu5FAEjBs5lG9/egpXTy3ZZRIzs4IMpP1W6jWLgSiTyURjY4fLI2Zm1oVCrlmcsv83lJmZdZ/DwszMUjkszMwslcPCzMxSOSzMzCyVw8LMzFKdlB+dlbQH6Nn/93GicrLfIB9oXFfPDMS6BmJN4Lp6YiDWBMWp69yIqOjNwJMyLAolqbG3nzUuJdfVMwOxroFYE7iunhiINUH/1+XTUGZmlsphYWZmqU7VsFjR3wV0wnX1zECsayDWBK6rJwZiTdDPdZ2S1yzMzKxnTtUjCzMz64EPbFhI+qyklyS9J6nTTxBIulLSNklNkmpy2idK+o2k7ZJ+mtyLoxh1nSXp8WS9j0salafPdEmbch5vS7o6WfYjSa/mLKvsq7qSfsdynrsup73o26ub26pS0jPJ7/pFSZ/LWVbUbdXZeyVn+ZDktTcl22JCzrLapH2bpKpC6uhhTd+Q9HKybZ6QdG7Osry/yz6q64uS9uQ8//U5yxYmv/Ptkhb2cV135tT0e0n7cpaVZHtJWimpTdKWTpZL0j8nNb8oaVrOspJtqw4i4gP5AC4AJgNPAZlO+pQBO4DzgMHAb4GPJcseAuYn0/cAXy1SXf8E1CTTNcB3UvqfBewFhiXzPwKuKcH26lZdwIFO2ou+vbpTE9l7Ck9KpscCu4CRxd5WXb1Xcvr8d+CeZHo+8NNk+mNJ/yHAxGQ9ZX1U0/Sc985Xj9fU1e+yj+r6IvAvnbzfX0l+jkqmR/VVXe36/zWwsg+2158D04AtnSz/FPAY2bupfgL4Tam3Vb7HB/bIIiJ+FxHbUrpdAjRFxCsR8Q7wIDBHkoDLgVVJv3uBq4tU2pxkfd1d7zXAYxFxqEjP35me1vW+Em6v1Joi4vcRsT2ZbgXagF596ShF3vdKF/WuAv4y2TZzgAcj4khEvAo0JesreU0R0ZDz3nmW7O2LS60726ozVcDjEbE3It4EHgeu7Ke6rgV+UqTn7lREPE32D8LOzAF+HFnPkr0d9RhKu606+MCGRTeNA17PmW9O2j4M7IvsXf5y24vhIxGxCyD5OTql/3w6vmFvSw5H75Q0pI/r+pCkRknPHj81Rum2V4+2laRLyP7FuCOnuVjbqrP3St4+ybbYT3bbdGdsqWrK9WWyf6Eel+93WQzdreszye9mlaRzeji2lHWRnK6bCDyZ01yq7ZWms7pLua06OL1UK+4LktYBf5Rn0bciojv381aetuiiveC6uruOZD1jgClkb0l7XC3wf8nuFFeQvTXtrX1Y1/iIaJV0HvCkpM3AH/L069b2KvK2ug9YGBHvJc293lb5niJPW/vXWJL3Uxe6vV5J1wEZ4C9ymjv8LiNiR77xJajr34CfRMQRSX9F9ojs8m6OLWVdx80HVkXEsZy2Um2vNH39vsrrpA6LiLiiwFU0A+fkzJ8NtJL9/1dGSjo9+QvxeHvBdUnaLWlMROxKdnBtXaxqHvCLiHg3Z927kskjkv4VWNSXdSWneoiIVyQ9BUwFfk4vt1cxapJ0JrAauCk5TD++7l5vqzw6e6/k69Ms6XRgBNnTC90ZW6qakHQF2fD9i4g4cry9k99lMXZ+qXVFxH/kzH4f+E7O2MvajX2qCDV1q64c84Gv5TaUcHul6azuUm6rDk7101DPA5OU/STPYLJvkLrIXj1qIHu9AGAh0J0jle6oS9bXnfV2OGea7DSPXye4Gsj7CYpS1CVp1PFTOZLKgU8CL5dwe3WnpsHAL8ie0/1Zu2XF3FZ53ytd1HsN8GSybeqA+cp+WmoiMAl4roBaul2TpKnA94DZEdGW0573d1mEmrpb15ic2dnA75LptcDMpL5RwExOPLIuaV1JbZPJXjB+JqetlNsrTR3wheRTUZ8A9id/CJVyW3VUqivn/f0A5pJN3iPAbmBt0j4WWJPT71PA78n+hfCtnPbzyP6DbgJ+BgwpUl0fBp4Atic/z0raM8APcvpNAFqA09qNfxLYTHbHdz8wvK/qAv5r8ty/TX5+uZTbq5s1XQe8C2zKeVSWYlvle6+QPa01O5n+UPLam5JtcV7O2G8l47YBs4r4Pk+raV3y/j++berSfpd9VNe3gZeS528Azs8Z+9+SbdgEfKkv60rm/x5Y1m5cybYX2T8IdyXv42ay15b+CvirZLmA7yY1bybn052l3FbtH/4Gt5mZpTrVT0OZmVk3OCzMzCyVw8LMzFI5LMzMLJXDwszMUjkszMwslcPCzMxSOSzMzCzV/wNqBetQmq10LwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "# ---------------------\n", "# Run this sanity check\n", @@ -480,9 +696,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running Truncated SVD over 8185 words...\n", + "Done.\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAD8CAYAAAC/1zkdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XlclXXe//HXJ8DELdwF09ByyQVRwbXMYhIbnbS0tE10StIetnnfjjZOkzXNjP3Uxpzbaqy0TCuVSq27e0xcyhwrQXHL1FLKwMw0UBMT5Pv7g+MJCXM5F5u+n48Hj3Mt3/NdDsa763stx5xziIiIeOGisu6AiIicPxQqIiLiGYWKiIh4RqEiIiKeUaiIiIhnFCoiIuIZhYqIiHhGoSIiIp5RqIiIiGeCy7oDp1KnTh0XGRlZ1t0QEalQUlNTv3fO1S2r9sttqERGRpKSklLW3RARqVDM7KuybF/TXyIi4hmFioiIeEahIiIinlGoiIiIZxQqIiLl0Msvv0xmZua5vDXSzAZ63Z8zpVARESmHfi1Ujh8/Xsq9OXMKFRGRUpCenk7Lli1JSEggKiqKgQMHcuTIEVJTU7nmmmvo2LEj8fHx7Nmzh6SkJFJSUrjjjjuIjo4mJyeHyMhInnjiCa666ioWLFhAWloaXbp0ISoqiptuuokffvjhF22aWUcz+8DMUs1siZmF+7avNLMY33IdM0v3LQ81s4Vm9o6Z7TKzUWY22szWm9nHZlbrdONUqIiIlJJt27aRmJjIxo0bqVGjBtOnT+f+++8nKSmJ1NRUfv/73zN+/HgGDhxITEwMc+fOJS0tjdDQUAAqV67MRx99xODBgxkyZAhPPfUUGzdupG3btjz++OMntWVmIcA/gYHOuY7ATOCvZ9DNNsDtQCdf+SPOufbAGmDI6d5cbm9+FBE53zRq1Iju3bsDcOedd/K3v/2NzZs3c/311wMF01rh4eGnfP+gQYMAyM7OJisri2uuuQaAhIQEbrnllqLFW1AQEEvNDCAI2HMG3VzhnDsEHDKzbOAd3/ZNQNTp3qxQEREpQQvXZzBpyTa++iqdfYd+YuH6DPq3bwhA9erVad26NWvWrDmjuqpWrXo2TRuwxTnXtZh9efw8U1W5yL6fCi3nF1rP5wwyQ9NfIiIlZOH6DB55axMZWTkAHMv6joemzWfh+gxef/11unTpwr59+/yhkpuby5YtW4CCwDl06FCx9V5yySXUrFmTVatWAfDqq6/6j1oK2QbUNbOuUDAdZmatffvSgY6+ZU+vFNORiohICZm0ZBs5uT9fqRVSuxH7097njt/+k97d2nP//fcTHx/PAw88QHZ2Nnl5eTz00EO0bt2aoUOHMmLECEJDQ4s9knnllVcYMWIER44coWnTpsyaNeuk/c65Y75Li6eZ2SUU/L2fCmwBJgPzzewuYLmXYzbnXOCVmPUGnqFgzu5F59zEIvt7UDCYKGCwcy7pdHXGxMQ4PVBSRCqyJuP+lxN/YfOy9/Jd0uNE3P0sBuya2KdE2jSzVOdcTIlUfgYCnv4ysyBgOnAD0Aq4zcxaFSn2NTAUeC3Q9kREKoqIsNCz2n4+8OKcSifgC+fcTufcMeANoF/hAs65dOfcRgpO9IiIXBDGxLcgNCQIgOBL6hNx97OEhgQxJr5FGfes5HhxTqUhsLvQ+jdAZw/qFRGp0E5c5TVpyTYys3KICAtlTHwL//bzkRehYsVsO6cTNWaWCCQCNG7cOJA+iYiUC/3bNzyvQ6QoL6a/vgEaFVq/FDinp6A552Y452KcczF165bZt2GKiMg58iJU1gLNzKyJmVUCBgOLPahXREQqmIBDxTmXB4wClgBbgfnOuS1m9oSZ3QhgZrFm9g1wC/AvM9sSaLsiIlL+eHLzo3PuPeC9Itv+XGh5LQXTYiIich7TY1pERMQzChUREfGMQkVERDyjUBEREc8oVERExDMKFRER8YxCRUREPKNQERERzyhURETEMwoVERHxjEJFREQ8o1ARERHPKFRERMQzChUREfGMQkVERDyjUBEREc94Eipm1tvMtpnZF2Y2rpj9F5vZPN/+T8ws0ot2RUSkfAk4VMwsCJgO3AC0Am4zs1ZFit0N/OCcuwL4B/BUoO2KiEj548WRSifgC+fcTufcMeANoF+RMv2AV3zLSUCcmZkHbYuISDniRag0BHYXWv/Gt63YMs65PCAbqO1B2yIiUo54ESrFHXG4cyiDmSWaWYqZpezbt8+DromISGnyIlS+ARoVWr8UyDxVGTMLBi4BDhStyDk3wzkX45yLqVu3rgddExGR0uRFqKwFmplZEzOrBAwGFhcpsxhI8C0PBJY7535xpCIiIhVbcKAVOOfyzGwUsAQIAmY657aY2RNAinNuMfAS8KqZfUHBEcrgQNsVEZHyJ+BQAXDOvQe8V2TbnwstHwVu8aItEREpv3RHvYiIeEahIiIinlGoiIiIZxQqIiLiGYWKiIh4RqEiIiKeUaiIiIhnFCoiIuIZhYqIiHhGoSIiIp5RqIiIiGcUKiIi4hmFioiIeEahIiIinlGoiIiIZxQqIiLiGYWKiIh4JqBQMbNaZrbUzHb4Xmueoty/zSzLzN4NpD0RESnfAj1SGQcsc841A5b51oszCbgrwLZERKScCzRU+gGv+JZfAfoXV8g5tww4FGBbIiJSzgUaKvWdc3sAfK/1Au+SiIhUVMGnK2BmyUCDYnaN97ozZpYIJAI0btzY6+pFRKSEnTZUnHO/OdU+M9trZuHOuT1mFg58F0hnnHMzgBkAMTExLpC6RESk9AU6/bUYSPAtJwCLAqxPREQqsEBDZSJwvZntAK73rWNmMWb24olCZrYKWADEmdk3ZhYfYLsiIlIOnXb669c45/YDccVsTwHuKbR+dSDtiIhIxaA76kVExDMKFRER8YxCRUREPKNQERERzyhURETEMwoVERHxjEJFREQ8o1ARERHPKFRERMQzChUREfGMQkVERDyjUBEREc8oVERExDMKFRER8YxCRUREPKNQERERzwQUKmZWy8yWmtkO32vNYspEm9kaM9tiZhvNbFAgbYqISPkV6JHKOGCZc64ZsMy3XtQRYIhzrjXQG5hqZmEBtisiIuVQoKHSD3jFt/wK0L9oAefcdufcDt9yJvAdUDfAdkVEKoS8vLyy7kKpCjRU6jvn9gD4Xuv9WmEz6wRUAr4MsF0RkRIxZ84cOnXqRHR0NPfeey/Hjx+nWrVqjB8/nnbt2tGlSxf27t0LwL59+xgwYACxsbHExsayevVqACZMmEBiYiK9evViyJAhHDlyhFtvvZWoqCgGDRpE586dSUlJ4aWXXuLhhx/2t/3CCy8wevToMhm3V04bKmaWbGabi/npdzYNmVk48CowzDmXf4oyiWaWYmYp+/btO5vqRUTOWbdu3QDYunUr8+bNY/Xq1aSlpREUFMTcuXP58ccf6dKlCxs2bKBHjx788Y9/pG/fvjz44IM8/PDDrF27ljfffJN77rnHX2dqaiqLFi3itdde49lnn6VmzZps3LiRyMhIUlJSABg8eDCLFy8mNzcXgFmzZjFs2LDS/wA8FHy6As6535xqn5ntNbNw59weX2h8d4pyNYD/Bf7knPv4V9qaAcwAiImJcafrm4iIF/7zn/8AsGzZMlJTU4mNjQUgJyeHevXqUalSJfr27QtAx44dmT17NmZGcnIyn332mb+egwcPcujQIQBuvPFGQkNDAfjoo4948MEHAZg3bx6tW7cGoGrVqlx33XW8++67XHnllRw7doy2bduWzqBLSKDTX4uBBN9yArCoaAEzqwS8Dcx2zi0IsD0REc+FVqlK94nL+eOs99l/5DhV6jTk6NGjxMTE8NhjjxESEsKSJUto2bIljz32GDt37gQgPz+fPn36cOedd5KWlkZGRgZdu3YlKyuLkJAQ+vTpQ7t27Vi+fDkrVqxg2rRpZGZmsmPHDkaMGAHA3LlzGTduHNdddx3169fnpptu8vdr6dKl3HzzzWXymZyrQENlInC9me0ArvetY2YxZvair8ytQA9gqJml+X6iA2xXRMQTC9dn8FNePhlZOYQ0uJxj2fvY0+S3/G3uUrZv386bb76Jc47hw4fzzjvv8Je//IWcnBwAevXqxaeffuqvKy0tzb/8+eefExERwYYNG3j00Uf55ptveOCBB6hbty65ubk8//zzQMHR0LFjxwgKCuLVV19l69atnJj+r4jTYQGFinNuv3MuzjnXzPd6wLc9xTl3j295jnMuxDkXXegn7ddrFhEpHZOWbMPl/QTARUEhEBRMxuIp3NX3GtLT09m8eTP5+fk0adKEZs2aYWY0bdoUwH/kMWXKFFq1auUPCoDw8HCSk5MZO3Ys7dq14+DBg0RFRZGdnU3r1q255JJLAAgKCmL48OF0796dWrVqcddddzFnzhyysrJYs2YNN9xwQ+l/KAE47TkVEZHz0cL1GUxaso2MrBws+GL/drsomIhh/8SAPof/j8jISNasWeM/JzJw4EAqVarEjBkzqFOnDrfddhuVKlXiD3/4AwBXXHEFDz30EJGRkYwdO5b33nuPxx9/nLi4OJKSkrj00ks5cOAAl112GQCVK1fmP//5j/8qsGHDhvG73/2OypUrc8sttxAcXLH+TFes3oqIBOjpp59m6rMz2HvwKFXb9qJGbD9c7tFflIsIC4XDBcstW7Zk165dfPnll1x++eW8/vrr/nKRkZG8++67AKxbt45du3YBkJmZSa1atbjzzjsJCgpi5MiRLFq0iO+//57nnnuOSpUqkZWVxZEjRwgNDSUuLq6g3YgIIiIiePLJJ1m6dGkJfxre07O/ROSCkZqayqxZs4gYMoX6d07m8IYlHNv7y9vmQkOCGBPfwr9euXJlZsyYQZ8+fbjqqqv8RxkAAwYM4MCBA0RHR/Pcc8/RvHlzADZt2uS/3+Xpp58mOTmZDRs2MGnSJJ566imuvfZawsLCqFKlCgsWnHwN0x133EGjRo1o1apVCX0SJcecK59X7sbExLgT13KLiHjhmWeeYf/+/bx6rDMOyPrwVS6qcglZH86m8egk8rL3cuDtv/Da/31E//YNy6yfo0aNon379tx9991n/V4zS3XOxZRAt86Ipr9E5Lx34vzJ1qVbqEoOtXtcRVZO7i/K1a9RmbA6Vcs0UDp27EjVqlWZMmVKmfUhEJr+EpHz2sL1GTzy1iYysnK4uFFr9m76iEM/HiYo7ycOb1pKSO1LgYIpr3t7ND2rutPT03nttdc87W9qaioffvghF1988ekLl0MKFRE5r01aso2c3OMAXNzgCqq1iWP3rIfJfHU0wZUqE1S5Ombw95vb0qt1g7OquyRCpaJTqIjIeeXpp5+mTZs2tGnThqlTp/LVV+lkvnSff79zjiotulO9620E/5RN9Y+fo+2VLYhvWYuePXvSp08fOnXqRKdOnfjiiy8AGDp0KElJSf46qlWrBsC4ceNYtWoV0dHR/OMf/yjdgZZTOqciIueNE1d3ffLJJzjn6Ny5M7XjHij2oYTNulxPSNYaJk+eTEzMz+e1a9Sowaeffsrs2bN56KGH/JcLF2fixIlMnjz5V8tcaHSkIiIV3sL1GXSfuJxeY/9FVr1olm7Polq1atx8883EVt6LmZ1UPviii066ZLiw2267zf+6Zs2aEu/7+UZHKiJSoZ04EZ+Texzn4NDRPB55a5N/f+NqUK9aJRqGhZKZlUP1EEe35vXo374hU4upr3AAnVgODg4mP7/gGzuccxw7dqxEx1SR6UhFRCq0k07EN2rNkR0f8+ORH5n4Thpvv/02N9xwAzkHD7B4eDs+f/w3RBz8jJbhNQCoXr26/1H1J8ybN8//2rVrV6DgrvnU1FQAFi1a5P/+k+Lef6HTkYqIVGiZWTn+5RNXd307ezTfAv9v/MPExsby5z//mc6dO9OkSRNatmzpLz906FBGjBhBaGiof6rrp59+onPnzuTn5/sfxzJ8+HD69etHp06diIuLo2rVqgBERUURHBxMu3btGDp06Enf4nih0h31IlKhdZ+4nIxCwXJCw7BQVo+77qzqOvGtjHXq1PGqe6WurO+o1/SXiFRoY+JbEBoSdNK2os/uktKj6S8RqdBOPFJl0pJtZGblEBEWypj4Fuf0qJX09HSPe3fhUaiISIXXv33DMn1el/wsoOkvM6tlZkvNbIfvtWYxZS4zs1Tf1whvMbMRgbQpIiLlV6DnVMYBy5xzzYBlvvWi9gDdnHPRQGdgnJlFBNiuiIiUQ4GGSj/gFd/yK0D/ogWcc8eccz/5Vi/2oE0RESmnAv0DX985twfA91qvuEJm1sjMNgK7gaecc5kBtisiIuXQaU/Um1kyUNzzoMefaSPOud1AlG/aa6GZJTnn9hbTViKQCNC4ceMzrV5ERMqJ04aKc+43p9pnZnvNLNw5t8fMwqHYh4EWrivTzLYAVwNJxeyfAcyAgpsfT9c3EREpXwKd/loMJPiWE4BFRQuY2aVmFupbrgl0B7YF2K6IiJRDgYbKROB6M9sBXO9bx8xizOxFX5krgU/MbAPwATDZObep2NpERKRCC+jmR+fcfiCumO0pwD2+5aVAVCDtiIhIxaDLe0VExDMKFRER8YxCRUREPKNQERERzyhURETEMwoVERHxjEJFREQ8o1ARERHPKFRERMQzChUREfGMQkVERDyjUBEREc8oVERExDMKFRER8YxCRUREPKNQERERzwQUKmZWy8yWmtkO32vNXylbw8wyzOx/AmlTRETKr0CPVMYBy5xzzYBlvvVT+QsFXycsIiLnqUBDpR/wim/5FaB/cYXMrCNQH3g/wPZERKQcCzRU6jvn9gD4XusVLWBmFwFTgDEBtiUiIuVc8OkKmFky0KCYXePPsI37gPecc7vN7HRtJQKJAI0bNz7D6kVEpLw4bag4535zqn1mttfMwp1ze8wsHPiumGJdgavN7D6gGlDJzA47535x/sU5NwOYARATE+POdBAiIlI+nDZUTmMxkABM9L0uKlrAOXfHiWUzGwrEFBcoIiJS8QV6TmUicL2Z7QCu961jZjFm9mKgnRM5n6Wnp9OmTZsSb2flypX07du3xNsRgQCPVJxz+4G4YranAPcUs/1l4OVA2hQRyMvLIzg4+JTrImVF/wpFylBeXh4JCQmsX7+e5s2bM3v2bCZPnsw777xDTk4O3bp141//+hdmRs+ePenWrRurV6/mxhtvZNOmTdSqVYv169fToUMHnnjiCe6//342bdpEXl4eEyZMoF+/fie198EHH/Dggw8CYGZ8+OGHVK9evSyGLucpPaZFpAxt27aNxMRENm7cSI0aNXj22WcZNWoUa9euZfPmzeTk5PDuu+/6y2dlZfHBBx/wX//1XwBs376d5ORkpkyZwl//+leuu+461q5dy4oVKxgzZgw//vjjSe1NnjyZ6dOnk5aWxqpVqwgNDS3V8cr5T6EiUoYaNWpE9+7dAbjzzjv56KOPWLFiBZ07d6Zt27YsX76cLVu2+MsPGjTopPffcsstBAUFAfD+++8zceJEoqOj6dmzJ0ePHuXrr78+qXz37t0ZPXo006ZNIysrS1Nm4jn9ixIpRQvXZzBpyTYys3Ko5bI5mpt/0n4z47777iMlJYVGjRoxYcIEjh496t9ftWrVk8oXXnfO8eabb9KiRYuTyuzdu9e/PG7cOPr06cN7771Hly5dSE5OpmXLll4OUS5wOlIRKSUL12fwyFubyMjKwQF7Dx5l37cZTHx5MQCvv/46V111FQB16tTh8OHDJCUlnXH98fHx/POf/8S5glu81q9f/4syX375JW3btmXs2LHExMTw+eefBz4wkUIUKiKlZNKSbeTkHj9pW0jtRkx97gWioqI4cOAAI0eOZPjw4bRt25b+/fsTGxt7xvU/+uij5ObmEhUVRZs2bXj00Ud/UWbq1Km0adOGdu3aERoayg033BDwuEQKsxP/V1PexMTEuJSUlLLuhohnmoz7X4r7r82AXRP7lHZ35DxlZqnOuZiyal9HKiKlJCKs+CutTrVdpCJSqIgUoyTudh8T34LQkKCTth1ePZdeYQWPzJs6dSpHjhzxtE2R0qZQESkl/ds35O83t6VhWCgGNAwL5cVpk3hsxGBAoSLnB4WKyGns3LmT9u3bM2nSJEaNGuXf3rdvX1auXMn8+fMZPXo0AM888wxNmzYFCq60OnE11xNPPEFsbCx/uiue1jvfYOfff8vqcdex8JnxJCUlMW3aNDIzM7n22mu59tprS3+QIh5RqIj8im3btjFgwABmzZpF3bp1iy3To0cPVq1aBcCqVauoXbs2GRkZfPTRR1x99dUAv3qXPMADDzxAREQEK1asYMWKFSU7KJESpFAROYV9+/bRr18/5syZQ3R09CnLNWjQgMOHD3Po0CF2797N7bffzocffsiqVav8ofJrd8kX1qFDB77//vsSGY9IadAd9SI+Re92D6pclUaNGrF69Wpat25NcHAw+fk/3wFf+E73rl27MmvWLFq0aMHVV1/NzJkzWbNmDVOmTOHo0aO/epe8yPlEoSIXhLFjx3LZZZdx3333ATBhwgSqV69Ofn4+8+fPZ1/WYQ6Hd6Bat9vJy97Lxjf+RP7RQ1x0URBjx46lUqVKXHHFFTz99NPEx8eTmZnJ1q1bGTJkCADJycnMmTOH+vXrs2DBAurVq8exY8d44YUXuOeegm+B6N27N/PnzycpKYnmzZvTqVMnvvjiCzIyMrjpppuoXr06P/zwg7/P/fv3Z/fu3Rw9epQHH3yQxMTE0v/gRM6Spr/kgjB48GDmzZvnX58/fz5169Zlx44dfPrpp0QM+ydHMndwdPdmAPKy93JR5Wpcdu+/6NmzJ0888QT79+8nMzOTL7/8klatWhEVFcXUqVOBgumt3Nxcxo8fT0xMDM2aNaNx48YAhIWFMXz4cHbs2EFiYiLNmjVjy5Yt/kfYX3TRRcydO5fExET27t3rf1z9zJkzSU1NJSUlhWnTprF///5S/tREzp6OVOSC0L59e7777jsyMzPZt28fNWvWZOPGjbz//vu0b9+ez/YcxB07Su6BTIJr1CU4rD4NE18gMyuHG7t0oWPHjsTFxZGdnU2DBg3Ytm0bAD/99BMAl19+Odu3b+d3v/sdy5cvJyIiggkTJvjbf/LJJ1m4cCFz587l3Xff5ZNPPvE/giUnJ4edO3cyYcIEpkyZwqJFBd/KPW3aNN5++20Adu/ezY4dO6hdu3YpfmoiZy+gUDGzWsA8IBJIB251zv1QTLnjwCbf6tfOuRsDaVfkTBQ+RxIRFkrbq+JJSkri22+/ZfDgwaSnp/PII49w77330n3icjKycoCCoxQLCgEK7nYPCgoiJyeH/Px8wsLCSEtL+0VbP/74I7feeisvvPACERERAKc8B+OcIyEhgb///e+n7PvKlStJTk5mzZo1VKlSxf8oe5HyLtDpr3HAMudcM2CZb704Oc65aN+PAkVKXNEnAmdk5ZASdCXPvjSbpKQkBg4cSHx8PDNnzuTw4cOMiW9BcM4PHP8xy19HaEgQY+J/fox8jRo1aNKkCQsWLAAKwmHDhg0ADBs2jGHDhvmv9gKIjIxk3bp1AKxbt45du3YBEBcXR1JSEt99V3An/YEDB/jqq69O6n92djY1a9akSpUqfP7553z88cfef0giJSDQ6a9+QE/f8ivASmBsgHWKBKy4JwLnh13K7r0H6HTlZYSHhxMeHs7WrVvp2rUrAEFWiXrxD7P3MAQHXcTfb25L//YNmbzs5zrmzp3LyJEjefLJJ8nNzWXw4MGEhYWRlJTE9u3bmTlzJgAvvvgiAwYMYPbs2URHRxMbG0vz5s0BaNWqFU8++SS9evUiPz+fkJAQpk+fzmWXXeZvp3fv3jz//PNERUXRokULunTpUsKfmIg3AnpKsZllOefCCq3/4JyrWUy5PCANyAMmOucWnqK+RCARoHHjxh2L/t+bnJ/S09Pp27cvmzdvPqf3V6tWjcOHD5+0TU8ElgtVWT+l+LRHKmaWDDQoZtf4s2insXMu08yaAsvNbJNz7suihZxzM4AZUPDo+7OoX+QkEWGh/nMkRbeLSMk5bag4535zqn1mttfMwp1ze8wsHPjuFHVk+l53mtlKoD3wi1CRC1deXh4JCQmsX7+e5s2bM3v2bFq1asWgQYP8jy157bXXuOKKK9i1axe33347eXl59O7du9j6xsS34JG3Np00BVb0HImIeC/QE/WLgQTfcgKwqGgBM6tpZhf7lusA3YHPAmxXzjPbtm0jMTGRjRs3UqNGDZ599lmg4OT4p59+yqhRo3jooYcAePDBBxk5ciRr166lQYPiDqKLfyLwiXMkIlJyAj2nUhuYDzQGvgZucc4dMLMYYIRz7h4z6wb8C8inIMSmOudeOl3d+ubHC0d6ejo9evTg66+/BmD58uVMmzaNtLQ0li9fTtOmTcnNzaVBgwbs37+f2rVr8+233xISEsLBgweJiIj4xTkVkQtVuT+n8mucc/uBuGK2pwD3+Jb/A7QNpB05/xR9ztbR3PyT9pvZSa+/tiwi5Yce0yKlrug9JHsPHmXftxlMfHkxAK+//rr/e0hOPFpl3rx5/kt/u3fvzhtvvAEUXOIrIuWHQkVKXXH3kITUbsTU514gKiqKAwcOMHLkSKDgMSidO3fmmWee4R//+AdQ8EVY06dPJzY2luzs7FLvv4icWkDnVEqSzqmcv870HpLIyEhSUlKoU6dOqfVNpKIr63MqOlKRUneqe0V0D4lIxadQkVI3Jr4FoSFBJ20r7h6S9PR0HaWIVDB69L2UuhP3ihR+gvCY+Ba6h0TkPKBQkTLRv31DhYjIeUjTXyIi4hmFioiIeEahIiIinlGoiIiIZxQqIiLiGYWKiIh4RqEiIiKeUaiIiIhnFCoiIuKZgELFzGqZ2VIz2+F7rXmKco3N7H0z22pmn5lZZCDtiohI+RTokco4YJlzrhmwzLdenNnAJOfclUAn4LsA2xURkXIo0FDpB7ziW34F6F+0gJm1AoKdc0sBnHOHnXNHAmxXRETKoUBDpb5zbg+A77VeMWWaA1lm9paZrTezSWYWVEw5ERGp4E77lGIzSwYaFLNr/FnHKis0AAAH/0lEQVS0cTXQHvgamAcMBV4qpq1EIBGgcePGZ1i9iIiUF6c9UnHO/cY516aYn0XAXjMLB/C9Fneu5BtgvXNup3MuD1gIdDhFWzOcczHOuZi6deue+6jKuZdffplRo0aVdTdERDwX6PTXYiDBt5wALCqmzFqgppmdSInrgM8CbPeCkpeXV9ZdEBE5I4GGykTgejPbAVzvW8fMYszsRQDn3HHgv4FlZrYJMOCFANstEXPmzKFTp05ER0dz7733cvz4cf7973/ToUMH2rVrR1xcHAATJkxg8uTJ/ve1adOG9PR0APr370/Hjh1p3bo1M2bM8JeZNWsWzZs355prrmH16tX+7V999RVxcXFERUURFxfH119/DcDQoUMZPXo01157LWPHji2F0YuIBC6gb350zu0H4orZngLcU2h9KRAVSFslbevWrcybN4/Vq1cTEhLCfffdx5w5c/jTn/7Ehx9+SJMmTThw4MBp65k5cya1atUiJyeH2NhYBgwYwLFjx3jsscdITU3lkksu4dprr6V9+/YAjBo1iiFDhpCQkMDMmTN54IEHWLhwIQDbt28nOTmZoCBd1yAiFcMF/XXCC9dn+L8n3T5bQtbHa4mNjQUgJyeHTz75hB49etCkSRMAatWqddo6p02bxttvvw3A7t272bFjB99++y09e/bkxHmiQYMGsX37dgDWrFnDW2+9BcBdd93FH/7wB39dt9xyiwJFRCqUCzZUFq7P4JG3NpGTexyA7JxjWPNrmPA/T/u/O33x4sXMnz//F+8NDg4mPz/fv3706FEAVq5cSXJyMmvWrKFKlSr07NnTv8/MzqhfhctVrVr13AYnIlJGLthnf01ass0fKACVL2vHwa2r+OubHwNw4MAB2rVrxwcffMCuXbv82wAiIyNZt24dAOvWrfPvz87OpmbNmlSpUoXPP/+cjz8uqKtz586sXLmS/fv3k5uby4IFC/ztduvWjTfeeAOAuXPnctVVV5XwyEVESs4Fe6SSmZVz0nqlOo0Ju/ouNrwwhqjFjxMSEsL06dOZMWMGN998M/n5+dSrV4+lS5cyYMAAZs+eTXR0NLGxsTRv3hyA3r178/zzzxMVFUWLFi3o0qULAOHh4UyYMIGuXbsSHh5Ohw4dOH68INCmTZvG73//eyZNmkTdunWZNWtW6X4QIiIeMudcWfehWDExMS4lJaXE6u8+cTkZRYIFoGFYKKvHXVdi7YqIlCQzS3XOxZRV+xfs9NeY+BaEhpx8Ejw0JIgx8S3KqEciIhXfBTv9deJk/ImrvyLCQhkT38K/XUREzt4FGypQECwKERER71yw018iIuI9hYqIiHhGoSIiIp5RqIiIiGcUKiIi4hmFioiIeEahIiIinlGoiIiIZxQqIiLimXL7QEkz2wd8Vdb9KGV1gO/LuhNlSOPX+DX+wF3mnKvrQT3npNyGyoXIzFLK8umiZU3j1/g1/oo/fk1/iYiIZxQqIiLiGYVK+TKjrDtQxjT+C5vGfx7QORUREfGMjlRERMQzCpVSYGa9zWybmX1hZuOK2d/YzFaY2Xoz22hmvy207xHf+7aZWXzp9twb5zp+M4s0sxwzS/P9PF/6vQ/cGYz/MjNb5hv7SjO7tNC+BDPb4ftJKN2eeyPA8R8v9PtfXLo994aZzTSz78xs8yn2m5lN830+G82sQ6F9Fe/375zTTwn+AEHAl0BToBKwAWhVpMwMYKRvuRWQXmh5A3Ax0MRXT1BZj6kUxx8JbC7rMZTC+BcACb7l64BXfcu1gJ2+15q+5ZplPabSGr9v/XBZj8GDz6AH0OFU/5aB3wL/BxjQBfikIv/+daRS8joBXzjndjrnjgFvAP2KlHFADd/yJUCmb7kf8IZz7ifn3C7gC199FUkg4z8fnMn4WwHLfMsrCu2PB5Y65w44534AlgK9S6HPXgpk/OcF59yHwIFfKdIPmO0KfAyEmVk4FfT3r1ApeQ2B3YXWv/FtK2wCcKeZfQO8B9x/Fu8t7wIZP0AT37TYB2Z2dYn2tGScyfg3AAN8yzcB1c2s9hm+t7wLZPwAlc0sxcw+NrP+JdvVMnOqz6hC/v4VKiXPitlW9JK724CXnXOXUnAo/KqZXXSG7y3vAhn/HqCxc649MBp4zcxqULGcyfj/G7jGzNYD1wAZQN4Zvre8C2T8UPD7jwFuB6aa2eUl1tOyc6rPqEL+/hUqJe8boFGh9Uv55fTO3cB8AOfcGqAyBc8BOpP3lnfnPH7ftN9+3/ZUCubmm5d4j7112vE75zKdczf7wnO8b1v2mby3Aghk/DjnMn2vO4GVQPtS6HNpO9VnVCF//wqVkrcWaGZmTcysEjAYKHoVy9dAHICZXUnBH9V9vnKDzexiM2sCNAM+LbWee+Ocx29mdc0syLe9KQXj31lqPffGacdvZnV8R2YAjwAzfctLgF5mVtPMagK9fNsqknMev2/cF58oA3QHPiu1npeexcAQ31VgXYBs59weKurvv6yvFLgQfiiY0tlOwf9pj/dtewK40bfcClhNwdxyGtCr0HvH+963DbihrMdSmuOnYJ59i2/7OuB3ZT2WEhr/QGCHr8yLwMWF3vt7Ci7Q+AIYVtZjKc3xA92ATb7f/ybg7rIeyzmO/3UKpnJzKTj6uBsYAYzw7Tdguu/z2QTEVOTfv+6oFxERz2j6S0REPKNQERERzyhURETEMwoVERHxjEJFREQ8o1ARERHPKFRERMQzChUREfHM/wewZJSnLr2FVAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "# -----------------------------\n", "# Run This Cell to Produce Your Plot\n", @@ -504,7 +741,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Write your answer here.\n" + "#### Write your answer here.\n", + "\n", + "1. There are 2 clusters what I can see:\n", + " - bpd, barrels, venezuela, kuwait\n", + " - output, oil, industry, energy\n", + " \n", + "2. I think these clusters bellow should be together:\n", + " - \"bpd\", \"barrels\", \"petroleum\" and \"oil\"\n", + " - \"kuwait\", \"venezuela\" and \"ecuador\"" ] }, { @@ -520,7 +765,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -537,9 +782,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[==================================================] 100.0% 252.1/252.1MB downloaded\n", + "Loaded vocab size 400000\n" + ] + } + ], "source": [ "# -----------------------------------\n", "# Run Cell to Load Word Vectors\n", @@ -569,7 +823,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -614,9 +868,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shuffling words ...\n", + "Putting 10000 words into word2Ind and matrix M...\n", + "Done.\n", + "Running Truncated SVD over 10010 words...\n", + "Done.\n" + ] + } + ], "source": [ "# -----------------------------------------------------------------\n", "# Run Cell to Reduce 200-Dimensional Word Embeddings to k Dimensions\n", @@ -650,11 +916,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZsAAAD8CAYAAAChHgmuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl0VeW9//H3lzCFGQFlEBq4AgIBAoR5cEAu2moZLl6s2Au3LRFdWKwtF/xplXqXlgpVsBd7iwNKFcRGQbS2IBA1ICjJTWRQKShBCBQoEAYJSOD7++Mc0hACCZzsjJ/XWllnD8/Zz/dZQD7svZ+zj7k7IiIiQapS2gWIiEjFp7AREZHAKWxERCRwChsREQmcwkZERAKnsBERkcApbEREJHAKGxERCZzCRkREAle1tAu4kMaNG3tMTExplyEiUq6kpqb+w92blHYd+ZXZsImJiSElJaW0yxARKVfMbEdp11AQXUYTEZHAKWxERCRwChsREQmcwkZERAKnsBERKSP69et3Se3ff/99br311ou2MbMCJ4KZ2f1mVuuSOoyAwkZEpIz46KOPeOWVV+jVqxdxcXHcfffdnD59mjp16vDQQw/RtWtX+vTpw969ewHIysoiNTWVnj170rNnT9asWQOAmU0zs7lmthyYb2a1zOx1M9tgZovM7GPgv4DxZvb02f7NbLyZPWVmUcU9NoWNiEgZUatWLRYtWsQTTzxBgwYNWLFiBS1btuSbb76hd+/efPrpp7Rs2ZLY2FgGDBjAlClTaN26NevXr2fAgAGMGDEi7+F+CNwHjAdSgMGEfuevB+KBJuF9E8ysmpkdAx4BbgIeNrPFZw9kZkPM7M1IxqawEREpI06fPk1qaioTJkwgOTkZd2f8+PGYGQ0bNuTEiROsWrWK66+/nuTkZHbs2MGmTZuIi4tj4cKFnDx5Ev75e/0IcBK4GagFjHL3WOA54FNgP3A98ApwK1AbOOnuXYDHgA5mdvbDof8JzItkbAobEZFStCQtk/7TV9F66p85ddrpe/NInn/+eW688Ua++uorfvWrXxEVFcWOHTv44osvuOqqq6hduzZmRvXq1enfvz/p6elMmDCBX/7ylwBnwoc++7qR0FlMgpkNdPfD+Up4HhgHOPA0gLs78EfgLjNrAPQF/hLJOMvsEwRERCq6JWmZTP7Tp5w646ENVoW3lyymbac4atSowcGDBzl69CgAOTk5570/Li6OjIwMAKpWrcquXbvy7q4G4O5/M7PfAIOAX5tZGtCZ0JkN7v6xmbUMv2dBnvfPA94GTgB/cvfzC7gExRI2ZnYzMBuIAp539+n59tcA5gM9gAPAaHfPKI6+RUTKq2lLN/8zaADMqD/wLp56/BFq5hxjyJAhzJkzJ3f3tddey759+zhy5AgAjRs3Ztu2bXTp0oUDBw4QHR19tmkz4IrQIa058D9AF0L3aloBG4A6QF3gH8DrQGd3P3T2AO6+28x2Aw8DQyIda8SX0cKzFuYAtwAdgR+YWcd8zX4MHHL3awidpv0m0n5FRMq7rOxT522r3WEQV9x8H4MGDSI1NZU+ffpw9913A1CzZk1eeeUVPvvsMwYMGEC7du3o3r07GzZsYNu2bbRp0wZCv4erAFvCh+wMvA+0JXQ2cx/QCPg98BczSwIGAAWdubwK7HT3zyIdq4UuzUVwALO+wDR3HxpefxDA3X+dp82ycJu14Tnffwea+EU6j4+Pdz2IU0Qqspipf77gvozp37usY5pZqrvH59tWF0gidGnNgCnu/pfw/ZhPgE/d/fYCjvU/QJq7v3BZxeRRHJfRWgA786zvAnpfqI2755jZYULJ+o+8jcwsAUgAaNWqVTGUJiJSdjWsVY1Dx88/u2lYq1qx9uPuRwldQsu/PQtoV9B7zCwV+Ab4eXHUUByz0ayAbfnPWIrSBnef6+7x7h7fpEmZ+zoGEZFi9ehtnagWde6vx2pRxqO3dSqliv7J3Xu4+yB3P1kcxyuOM5tdQMs861cDuy/QZlf4Mlp94GAx9C0iUm4N79YCgBnLtrA7K5vmDaKZPLR97vaKpDjCZj3Q1sxaA5nAHcCd+dosBcYCa4FRwKqL3a8REakshndrUSHDJb+IwyZ8D2YisIzQ1OcX3X2zmT0GpLj7UuAF4I9mto3QGc0dkfYrIiLlR7F8zsbd3wXezbftkTzLJ4DzZjqIiEjloMfViIhI4BQ2IiISOIWNiIgETmEjIiKBU9iIiEjgFDYiIhI4hY2IiAROYSMiIoFT2IiISOAUNiIiEjiFjYhIJWJm48JfilaiFDYiInJB4a+FwcwyzKzx5R5HYSMiUga88sor9OrVi7i4OO6++25Onz7NX//6V7p3707Xrl0ZPHgwANOmTWPmzJm574uNjSUjIwOA4cOHA3Qws83hbz4GwMz+08z+ZmYfAP3zbP+Oma00sw3h11bh7S+Z2VNmlgT8pjjGVyxPfRYRkcszZcoUatSoQVpaGmvWrOHxxx9n+fLl3HnnnSxevJg2bdowevRoJk2aREZGBnPmzKFNmzbMmzePFi1acObMGQC+/PJLjh49evawWcAvzOwN4ANCX/28BegFXAFkm9kvgOuA+cBk4K/AM2aWCHwPqA4sAv4rf81mtoTQF2LWBGa7+9zCxqkzGxGRUrAkLZP+01fxx30tmDHnBdasW0/Pnj2ZPn0627dvZ+XKlYwaNYrPPvuM1NRUNm3aBMCBAwfo168fmzdvpkGDBhw5cgSAhIQEOnbsCGDAlUAboC3wMPAa8P+AFELfL3ZWX2BBeHkxofAZTegrY34K5ABjCij/R+7eA4gHfmpmjQobr8JGRKSELUnL5ME3N5KZlU31q/6Fb08cx2N6MfbBGfTo0YMxY8YQFRWVexntiy++YOvWrQA0bNiQZs2aAdCjRw+ys7P55ptvWL16NfPmzTvbxTfASUJnHgB1gBmEguTMRUqrCvQAbgOmAYMJhVZ+PzWzT4F1hM5w2hY2ZoWNiEgJm7FsC9mnTueu12rXlyObP+DJZ37PHXfcQXZ2NpMmTaJ27dosXryYbdu2MWLECACio6P5v//7PwD27NnDwYMHOXPmDLVr1+aGG24A+IzQtyFHhQ+/AbgV+Dmwn9AXWZ4h9Pv/o3DbmsBw4CvgZWApMNnd27v7tLy1m9n1wE1AX3fvCqTxz1C7IN2zEREpYZlZ2ees1+1+Kyd2fMrfP36bZw9+Tk5ODtWrV2f27NmMHDmSkydP0qRJE15++WXcnd27dxMXF0eNGjVo1KgRdevWpX379uzatQugI/DfwMbw4acDfyJ0uWwXsALoAHQndKnsdeBfgBHAA8D/AqkAZnYFUNfdd+Qptz5wyN2Pm9m1QJ+ijFlnNiIiJSzK7Jz16k2+g0VVo2aLDnz++eds3bqVhIQEHn30UXJycqhfvz4vvvgiAFlZWTz11FOkp6dz++23M3HiRGJiYliwYAFXXnnl2UN2At4GtgOjwuungVrAH4FhhCYKLAE+Bb4Axrj7SkL3eK4FHgHeA5rlK/+vQFUz20Ao1NYVZcw6sxERKWGn3XOXcw7vZe/rj1KjeTu+3fsVo0aNYv78+QwYMIA333yTY8eOUa9ePWrVqsWaNWswM8aMGUN0dDRr166lQ4cOVKlSheXLlzNx4kSWL1++HThKKGS6AY3c/ZCZvQS84+4pZtYDqEEogFoBN7r7HjN7H/iFu8eFP1OT4u7rzGwckA7MA2KB3xKarfZDQveGNhQ2Zp3ZiIiUsBYNos9Zzzm4izpdb6bnAy9Qr1495syZw3333UdiYiKpqan86Ec/4qGHHmLUqFHEx8fz6quvkp6eTnR06Dg1a9Zk9erV3HHHHQCtgSnu3oXQpbRH8/ZlZtWA3wGjwjPKXgQeL0LZscCdhKZPPw4cd/duwFrgPwp7s85sRERK2OSh7XnwzY25kwSi6jahYevOTB7annq97uKJJ55g06ZNDBkyBIDTp0/nzkAryOjRowE4fPgwQJS7fxDe9TKh+zV5tScUHO9Z6HJeFLCnCGUnuftR4KiZHSZ0mQ5CgdalsDcrbERESkGNqlVyw6ZKFePXIzszvFsLVq3aQt26denUqRNr164t0rFq1659KV0bsNnd+xawL4d/XvHKP8PsZJ7lM3nWz1CELNFlNBGREnT2MzZZ2adyt506vI8vPk0FYOHChfTp04f9+/fnhs2pU6fYvHkzAHXr1s37pIBz1K9fH+C0mQ0Mb/ohoScI5LUFaGJmfSF0Wc3MOoX3ZRD6nA2E7vkUG4WNiEgJyv8ZG4BqjVoy6/fP0aVLFw4ePJh7v2bKlCl07dqVuLg4PvroIwDGjRvHhAkTiIuLIzs7u6AutgMzwrPF4oDH8u50928JBclvwh/MTAf6hXfPBO4xs4+Ay37oZkHM88yKKEvi4+M9JSWltMsQESlWraf+mby/dXMO72Vf4q9o8eNn2T79exEf38xS3T0+4gMVM53ZiIiUoOb5ZqIVtr2iUNiIiJSgyUPbE10tKne9av2r+JcJf2Dy0PalWFXwNBtNRKQEDe/WAgjdu9mdlU3zBtFMHto+d3tFFVHYhJ+bswiIITSL4d/d/VC+NnHA74F6hD6t+ri7L4qkXxGR8mx4txYVPlzyi/Qy2lRgpbu3BVaG1/M7DvyHu3cCbgZmmVmDCPsVEakw+vULTQbLyMggNja2lKsJRqRhM4zQJ1QJvw7P38Dd/+buW8PLu4F9QJMI+xURqTDOTmuuyCINm6vcfQ9A+PXKizU2s16EHt725QX2J5hZipml7N+/P8LSRETKnqeeeorY2FhiY2OZNWsWAHXq1CnlqoJX6D0bM1sBNC1g10OX0pGZNSP0aOux7l7gN8WFv8d6LoQ+Z3MpxxcRKetSU1OZN28eH3/8Me5O7969ue6660q7rBJRaNi4+00X2mdme82sWfjR1M0IXSIrqF094M/Aw+5epO8+EBGpaFavXs2IESNyn2U2cuRIkpOTS7mqkhHp1OelwFhC3wQ3FngrfwMzqw4sBua7e/6nj4qIVHhL0jKZsWwLn7+3mdpk0z0tU7PRLtF0YIiZbQWGhNcxs3gzez7c5t+BQcA4M0sP/8RF2K+ISLlw9sGbmVnZ1GjZib0bVzNl0Xpe+2grixcvZuDAgYUfpAKI6MzG3Q8AgwvYngL8JLz8CvBKJP2IiJRXeR+8WaPpNdSJHcz2Fybxo5eq8MTUSXTr1q2UKywZehCniEiA8j948yyDYnnw5nnH1YM4RUQqn8r64M38FDYiIgHK/+BNgOhqURX+wZv56UGcIiIBqqwP3sxPYSMiErDK+ODN/HQZTUREAqewERGRwClsREQkcAobEREJnMJGREQCp7AREZHAKWxERCRwChsREQmcwkZERAKnsBERkcApbEREikFGRgaxsbGB9/PII4+wYsUKAGbNmsXx48cD77M4KGxERMqRxx57jJtuuglQ2IiIVGpfffUV3bp1Y8aMGUycODF3+6233sr777/P66+/zgMPPADA7NmzadOmDQBffvklAwYMAEKh0rNnT2JjY0lISODsF12OGzeOxMREnnnmGXbv3s0NN9zADTfcUMIjvHQKGxGRYrRlyxb+7d/+jXnz5tGkSZMC2wwaNIjk5GQAkpOTadSoEZmZmaxevZqBAwcCMHHiRNavX8+mTZvIzs7mnXfeOecYP/3pT2nevDlJSUkkJSUFO6hioK8YEBG5TEvSMnO/p+YKP8yuPXsZNmwYb7zxBp06dSI9Pb3A9zVt2pRjx45x9OhRdu7cyZ133smHH35IcnIyI0eOBCApKYknn3yS48ePc/DgQTp16sRtt91WksMrVjqzERG5DEvSMnnwzY1kZmXjwN4jJzhODWo2uJI1a9YAULVqVc6cOZP7nhMnTuQu9+3bl3nz5tG+fXsGDhxIcnIya9eupX///pw4cYJ7772XxMRENm7cyPjx4895b3mksBERuQwzlm0h+9TpczdWiaLmd6cwf/58FixYQExMDOnp6Zw5c4adO3fyySef5DYdNGgQM2fOZNCgQXTr1o2kpCRq1KhB/fr1c4OlcePGHDt2jMTExAJrqFu3LkePHg1sjMVJl9FERC7D7qzsArfvPQ5p77zDkCFDePjhh2ndujWdO3cmNjaW7t2757YbOHAgO3fuZNCgQURFRdGyZUuuvfZaABo0aMD48ePp3LkzMTEx9OzZs8C+EhISuOWWW2jWrFmZv29jZ2c4lDXx8fGekpJS2mWIiBSo//RVZBYQOC0aRLNm6o2lUFGImaW6e3ypFXABuowmInIZJg9tT3S1qHO2RVeLYvLQ9qVUUdmmy2giIpdheLcWALmz0Zo3iGby0Pa52+VcChsRkcs0vFsLhUsR6TKaiIgETmEjIiKBiyhszOwKM3vPzLaGXxtepG09M8s0s/+JpE8RESl/Ij2zmQqsdPe2wMrw+oX8N/BBhP2JiEg5FGnYDANeDi+/DAwvqJGZ9QCuApZH2J+IiJRDkYbNVe6+ByD8emX+BmZWBfgtMLmwg5lZgpmlmFnK/v37IyxNRETKikKnPpvZCqBpAbseKmIf9wLvuvtOM7toQ3efC8yF0BMEinh8EREp4woNG3e/6UL7zGyvmTVz9z1m1gzYV0CzvsBAM7sXqANUN7Nj7n6x+zsiIlKBRPqhzqXAWGB6+PWt/A3cfczZZTMbB8QraEREKpdI79lMB4aY2VZgSHgdM4s3s+cjLU5ERCoGPfVZRKQC0VOfRUSk0lLYiIhI4BQ2IiISOIWNiIgETmEjIiKBU9iIiEjgFDYiIhI4hY2IiAROYSMiIoFT2IiISOAUNiIiEjiFjYiIBE5hIyIigVPYiIhI4BQ2IiISOIWNiIgETmEjIiKBU9iIiEjgFDYiIhI4hY2IiAROYSMilcZLL73E7t27L/v9GRkZLFiwoBgrqjwUNiJSaShsSo/CRkTKtaeeeorY2FhiY2OZNWsWGRkZxMbG5u6fOXMm06ZNIzExkZSUFMaMGUNcXBzZ2dnExMQwZcoUevXqRa9evdi2bRsA48aNIzExMfcYderUAWDq1KkkJycTFxfH008/XbIDLecUNiJSbqWmpjJv3jw+/vhj1q1bx3PPPcehQ4cKbDtq1Cji4+N59dVXSU9PJzo6GoB69erxySefMHHiRO6///6L9jd9+nQGDhxIeno6P/vZz4p9PBWZwkZEyq3Vq1czYsQIateuTZ06dRg5ciTJycmXdIwf/OAHua9r164NokwBqpZ2ASIil2pJWiYzlm3h8/c2U5tsuqdlMrxbCwCysrI4c+ZMbtsTJ05c9Fhmdt5y1apVc4/h7nz77bfFPYRKR2c2IlKuLEnL5ME3N5KZlU2Nlp3Yu3E1Uxat57WPtrJ48WJuueUW9u3bx4EDBzh58iTvvPNO7nvr1q3L0aNHzzneokWLcl/79u0LQExMDKmpqQC89dZbnDp16oLvl6LRmY2IlCszlm0h+9RpAGo0vYY6sYPZ/sIkfvRSFZ6YOomePXvyyCOP0Lt3b1q3bs21116b+95x48YxYcIEoqOjcy+ZnTx5kt69e3PmzBkWLlwIwPjx4xk2bBi9evVi8ODB1K5dG4AuXbpQtWpVunbtyrhx43Tf5hKYu5d2DQWKj4/3lJSU0i5DRMqY1lP/TEG/tQzYPv17l3SsmJgYUlJSaNy4cbHUVhaYWaq7x5d2HflFdBnNzK4ws/fMbGv4teEF2rUys+Vm9rmZfWZmMZH0KyKVV/MG0Ze0XcqGSO/ZTAVWuntbYGV4vSDzgRnu3gHoBeyLsF8RqaQmD21PdLWoc7ZFV4ti8tD2l3ysjIyMCnVWU5ZFGjbDgJfDyy8Dw/M3MLOOQFV3fw/A3Y+5+/EI+xWRSmp4txb8emRnWjSIxoAWDaL59cjOubPRpGyK6J6NmWW5e4M864fcvWG+NsOBnwDfAq2BFcBUdz9dwPESgASAVq1a9dixY8dl1yYiUhmV1Xs2hc5GM7MVQNMCdj10CX0MBLoBXwOLgHHAC/kbuvtcYC6EJggU8fgiIlLGFRo27n7ThfaZ2V4za+bue8ysGQXfi9kFpLn7V+H3LAH6UEDYiIhIxRTpPZulwNjw8ljgrQLarAcamlmT8PqNwGcR9isiIuVIpGEzHRhiZluBIeF1zCzezJ4HCN+b+QWw0sw2EpoO/1yE/YqISDkS0RME3P0AMLiA7SmEJgWcXX8P6BJJXyIiUn7p2WgiIhI4hY2IiAROYSMiIoFT2IiISOAUNiIiEjiFjYiIBE5hIyIigVPYiIhI4BQ2IiISOIWNiIgETmEjIiKBU9iIiEjgFDYiIhI4hY2IiAROYSMiIoFT2IiISOAUNiKSKyMjg9jY2MD7ef/997n11lsD70fKDoWNiBSLnJyci65L5RbR10KLSMWTk5PD2LFjSUtLo127dsyfP5+ZM2fy9ttvk52dTb9+/fjDH/6AmXH99dfTr18/1qxZw/e//302btzIFVdcQVpaGt27d+exxx7jvvvuY+PGjeTk5DBt2jSGDRt2Tn8ffPABkyZNAsDM+PDDD6lbt25pDF0CpDMbETnHli1bSEhIYMOGDdSrV49nn32WiRMnsn79ejZt2kR2djbvvPNObvusrCw++OADfv7znwPwt7/9jRUrVvDb3/6Wxx9/nBtvvJH169eTlJTE5MmT+eabb87pb+bMmcyZM4f09HSSk5OJjo4u0fFKydCZjUgltyQtkxnLtrA7K5sr/DCNmzanf//+ANx1110888wztG7dmieffJLjx49z8OBBOnXqxG233QbA6NGjzzne7bffTlRUFADLly9n6dKlzJw5E4ATJ07w9ddfn9O+f//+PPDAA4wZM4aRI0dy9dVXBz1kKQU6sxGpxJakZfLgmxvJzMrGgb1HTpB1PIclaZm5bcyMe++9l8TERDZu3Mj48eM5ceJE7v7atWufc8y86+7OG2+8QXp6Ounp6Xz99dd06NDhnPZTp07l+eefJzs7mz59+vDFF18EM1gpVQobkUpsxrItZJ86fc62nCP7eGTumwAsXLiQAQMGANC4cWOOHTtGYmJikY8/dOhQfve73+HuAKSlpZ3X5ssvv6Rz585MmTKF+Ph4hU0FpctoIpXY7qzs87ZVa9SS7evepUuX52jbti333HMPhw4donPnzsTExNCzZ88iH/+Xv/wl999/P126dMHdiYmJOed+D8CsWbNISkoiKiqKjh07csstt0Q8Lil77Oz/OMqa+Ph4T0lJKe0yRCq0/tNXkVlA4LRoEM2aqTeWQkUSKTNLdff40q4jP11GE6nEJg9tT3S1qHO2RVeLYvLQ9qVUkVRUuowmUokN79YCIHc2WvMG0Uwe2j53u0hxUdiIVHLDu7VQuEjgdBlNREQCF1HYmNkVZvaemW0Nvza8QLsnzWyzmX1uZs+YmUXSr4iIlC+RntlMBVa6e1tgZXj9HGbWD+gPdAFigZ7AdRH2KyIi5UikYTMMeDm8/DIwvIA2DtQEqgM1gGrA3gj7FRGRciTSsLnK3fcAhF+vzN/A3dcCScCe8M8yd/88wn5FRKQcKXQ2mpmtAJoWsOuhonRgZtcAHYCzT9d7z8wGufuHBbRNABIAWrVqVZTDi4hIOVBo2Lj7TRfaZ2Z7zayZu+8xs2bAvgKajQDWufux8Hv+AvQBzgsbd58LzIXQEwSKNgQRESnrIr2MthQYG14eC7xVQJuvgevMrKqZVSM0OUCX0UREKpFIw2Y6MMTMtgJDwuuYWbyZPR9ukwh8CWwEPgU+dfe3I+xXRETKkYieIODuB4DBBWxPAX4SXj4N3B1JPyIiUr7pCQIiIhI4hY2IiAROYSMiIoFT2IiISOAUNiIiEjiFjYiIBE5hIyIigVPYiIhI4BQ2IiISOIWNiIgETmEjIiKBU9iIiEjgFDYiIhI4hY2IiAROYSMiIoFT2IiISOAUNiIiEjiFjYiIBE5hIyIigVPYiIhI4BQ2IiISOIWNiIgETmEjIiKBU9iIiEjgFDYiIhK4ShE2GRkZxMbGXvb769SpU4zViIhUPpUibEREpHRVLe0CSkpOTg5jx44lLS2Ndu3aMX/+fDp27Mjo0aNJSkoCYMGCBVxzzTVs376dO++8k5ycHG6++eZSrlxEpPyrNGc2W7ZsISEhgQ0bNlCvXj2effZZAOrVq8cnn3zCxIkTuf/++wGYNGkS99xzD+vXr6dp06alWbaISIVg7n75bza7HZgGdAB6uXvKBdrdDMwGooDn3X16YceOj4/3lJQCD1ckS9IymbFsC7uzsrnCD/P1/Mns35MJwKpVq3jmmWdIT09n1apVtGnThlOnTtG0aVMOHDhAo0aN+Pvf/061atU4cuQIzZs359ixY5ddi4hISTGzVHePL+068ov0zGYTMBL48EINzCwKmAPcAnQEfmBmHSPs96KWpGXy4JsbyczKxoG9R06QdTyHJWmZees65/ViyyIiEpmIwsbdP3f3LYU06wVsc/ev3P1b4DVgWCT9FmbGsi1knzp9zracI/t4ZO6bACxcuJABAwYAsGjRotzXvn37AtC/f39ee+01AF599dUgSxURqRRK4p5NC2BnnvVd4W2B2Z2Vfd62ao1asn3du3Tp0oWDBw9yzz33AHDy5El69+7N7NmzefrppwGYPXs2c+bMoWfPnhw+fDjIUkVEKoVC79mY2QqgoLvkD7n7W+E27wO/KOieTfi+zlB3/0l4/YeE7u/cV0DbBCABoFWrVj127NhxaaMJ6z99FZkFBE6LBtGsmXpj7npMTAwpKSk0btz4svoRESlryu09G3e/yd1jC/h5q4h97AJa5lm/Gth9gb7munu8u8c3adKkiIc/3+Sh7YmuFnXOtuhqUUwe2v6yjykiIpevJD5nsx5oa2atgUzgDuDOIDsc3i10le7sbLTmDaKZPLR97vazMjIygixDRETCIgobMxsB/A5oAvzZzNLdfaiZNSc0xfm77p5jZhOBZYSmPr/o7psjrrwQw7u1OC9cRESkdEQUNu6+GFhcwPbdwHfzrL8LvBtJXyIiUn5VmicIiIhI6VHYiIhI4BQ2IiISOIWNiIgETmEjIiKBU9iIiEjgFDYiIhK4iL7PJkhmth+4vIejFa/GwD9Ku4iAVfRiQrGHAAAEB0lEQVQxVvTxgcZYURTHGL/j7pf/vK+AlNmwKSvMLKUsPtSuOFX0MVb08YHGWFFU5DHqMpqIiAROYSMiIoFT2BRubmkXUAIq+hgr+vhAY6woKuwYdc9GREQCpzMbEREJnMImzMxuNrMtZrbNzKYWsL+GmS0K7//YzGJKvsrLV4TxPWBmn5nZBjNbaWbfKY06I1HYGPO0G2VmbmblbtZPUcZoZv8e/rPcbGYLSrrGSBXh72orM0sys7Tw39fvFnScssrMXjSzfWa26QL7zcyeCY9/g5l1L+kaA+Hulf6H0Je6fQm0AaoDnwId87W5F/jf8PIdwKLSrruYx3cDUCu8fE95Gl9RxxhuVxf4EFgHxJd23QH8ObYF0oCG4fUrS7vuAMY4F7gnvNwRyCjtui9xjIOA7sCmC+z/LvAXwIA+wMelXXNx/OjMJqQXsM3dv3L3b4HXgGH52gwDXg4vJwKDzcxKsMZIFDo+d09y9+Ph1XXA1SVcY6SK8mcI8N/Ak8CJkiyumBRljOOBOe5+CMDd95VwjZEqyhgdqBderg/sLsH6IubuHwIHL9JkGDDfQ9YBDcysWclUFxyFTUgLYGee9V3hbQW2cfcc4DDQqESqi1xRxpfXjwn9z6o8KXSMZtYNaOnu75RkYcWoKH+O7YB2ZrbGzNaZ2c0lVl3xKMoYpwF3mdkuQt8AfF/JlFZiLvXfa7kQ0ddCVyAFnaHkn6ZXlDZlVZFrN7O7gHjgukArKn4XHaOZVQGeBsaVVEEBKMqfY1VCl9KuJ3R2mmxmse6eFXBtxaUoY/wB8JK7/9bM+gJ/DI/xTPDllYjy/LvmgnRmE7ILaJln/WrOPzXPbWNmVQmdvl/sVLgsKcr4MLObgIeA77v7yRKqrbgUNsa6QCzwvpllELoWvrScTRIo6t/Tt9z9lLtvB7YQCp/yoihj/DHwOoC7rwVqEnqmWEVRpH+v5Y3CJmQ90NbMWptZdUITAJbma7MUGBteHgWs8vDdvHKg0PGFLzH9gVDQlLfr/FDIGN39sLs3dvcYd48hdF/q++6eUjrlXpai/D1dQmiyB2bWmNBlta9KtMrIFGWMXwODAcysA6Gw2V+iVQZrKfAf4VlpfYDD7r6ntIuKlC6jEboHY2YTgWWEZsO86O6bzewxIMXdlwIvEDpd30bojOaO0qv40hRxfDOAOsCfwvMevnb375da0ZeoiGMs14o4xmXAv5rZZ8BpYLK7Hyi9qi9NEcf4c+A5M/sZoctL48rRf/wws4WELnM2Dt93ehSoBuDu/0voPtR3gW3AceA/S6fS4qUnCIiISOB0GU1ERAKnsBERkcApbEREJHAKGxERCZzCRkREAqewERGRwClsREQkcAobEREJ3P8H2enkC/lljAEAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "words = ['barrels', 'bpd', 'ecuador', 'energy', 'industry', 'kuwait', 'oil', 'output', 'petroleum', 'venezuela']\n", "plot_embeddings(M_reduced_normalized, word2Ind, words)" @@ -664,7 +943,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Write your answer here." + "#### Write your answer here.\n", + "\n", + "1. There are 2 clusters what I can see:\n", + " - oil, petroleum, ecuador, venezuela\n", + " - industry, energy" ] }, { @@ -695,15 +978,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('ends', 0.6128067970275879),\n", + " ('leaf', 0.6027014255523682),\n", + " ('stems', 0.5998532772064209),\n", + " ('takes', 0.5902855396270752),\n", + " ('leaving', 0.5761634111404419),\n", + " ('grows', 0.5663397312164307),\n", + " ('flowers', 0.5600922107696533),\n", + " ('turns', 0.5536050796508789),\n", + " ('leave', 0.5496848225593567),\n", + " ('goes', 0.5434924960136414)]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - " # ------------------\n", - " # Write your implementation here.\n", + "# ------------------\n", + "# Write your implementation here.\n", "\n", - "\n", - " # ------------------" + "wv_from_bin.most_similar(\"leaves\")\n", + "# ------------------" ] }, { @@ -730,15 +1033,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.19722837209701538\n", + "0.7167813181877136\n" + ] + } + ], "source": [ - " # ------------------\n", - " # Write your implementation here.\n", - "\n", - "\n", - " # ------------------" + "# ------------------\n", + "# Write your implementation here.\n", + "print(wv_from_bin.distance('men', 'women'))\n", + "print(wv_from_bin.distance('men', 'king'))\n", + "# ------------------" ] }, { @@ -764,9 +1076,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('queen', 0.6978678703308105),\n", + " ('princess', 0.6081745028495789),\n", + " ('monarch', 0.5889754891395569),\n", + " ('throne', 0.5775108933448792),\n", + " ('prince', 0.5750998258590698),\n", + " ('elizabeth', 0.5463595986366272),\n", + " ('daughter', 0.5399125814437866),\n", + " ('kingdom', 0.5318052172660828),\n", + " ('mother', 0.5168544054031372),\n", + " ('crown', 0.5164473056793213)]\n" + ] + } + ], "source": [ "# Run this cell to answer the analogy -- man : king :: woman : x\n", "pprint.pprint(wv_from_bin.most_similar(positive=['woman', 'king'], negative=['man']))" @@ -784,13 +1113,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('cold', 0.6264682412147522),\n", + " ('cool', 0.5971968770027161),\n", + " ('warm', 0.5749486088752747),\n", + " ('heat', 0.5509929060935974),\n", + " ('wet', 0.5442032814025879),\n", + " ('temperatures', 0.5357182025909424),\n", + " ('dry', 0.5311398506164551),\n", + " ('weather', 0.5237288475036621),\n", + " ('cooler', 0.5152636170387268),\n", + " ('hotter', 0.501091718673706)]\n" + ] + } + ], "source": [ " # ------------------\n", " # Write your implementation here.\n", - "\n", + "pprint.pprint(wv_from_bin.most_similar(positive=['winter', 'hot'], negative=['summer']))\n", "\n", " # ------------------" ] @@ -843,9 +1189,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('employee', 0.6375863552093506),\n", + " ('workers', 0.6068919897079468),\n", + " ('nurse', 0.5837947130203247),\n", + " ('pregnant', 0.5363885760307312),\n", + " ('mother', 0.5321309566497803),\n", + " ('employer', 0.5127025842666626),\n", + " ('teacher', 0.5099577307701111),\n", + " ('child', 0.5096741914749146),\n", + " ('homemaker', 0.5019455552101135),\n", + " ('nurses', 0.4970571994781494)]\n", + "\n", + "[('workers', 0.611325740814209),\n", + " ('employee', 0.5983108878135681),\n", + " ('working', 0.5615329742431641),\n", + " ('laborer', 0.5442320108413696),\n", + " ('unemployed', 0.5368517637252808),\n", + " ('job', 0.5278826951980591),\n", + " ('work', 0.5223963260650635),\n", + " ('mechanic', 0.5088937282562256),\n", + " ('worked', 0.5054520964622498),\n", + " ('factory', 0.4940453767776489)]\n" + ] + } + ], "source": [ "# Run this cell\n", "# Here `positive` indicates the list of words to be similar to and `negative` indicates the list of words to be\n", @@ -873,14 +1247,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('waved', 0.7440091371536255),\n", + " ('flags', 0.6568725109100342),\n", + " ('cheering', 0.6446677446365356),\n", + " ('banners', 0.6004939079284668),\n", + " ('chanting', 0.5811529159545898),\n", + " ('placards', 0.5701406002044678),\n", + " ('shouting', 0.5681235790252686),\n", + " ('slogans', 0.5568947196006775),\n", + " ('chanted', 0.5365853905677795),\n", + " ('shouted', 0.5337547659873962)]\n", + "[('waved', 0.7243529558181763),\n", + " ('cheering', 0.6555410623550415),\n", + " ('banners', 0.6392576098442078),\n", + " ('flags', 0.6310420632362366),\n", + " ('chanting', 0.610493004322052),\n", + " ('placards', 0.5842955708503723),\n", + " ('brandishing', 0.5838682651519775),\n", + " ('marched', 0.5790764689445496),\n", + " ('shouting', 0.5765238404273987),\n", + " ('chanted', 0.5760204792022705)]\n" + ] + } + ], "source": [ " # ------------------\n", " # Write your implementation here.\n", - "\n", - "\n", + "pprint.pprint(wv_from_bin.most_similar(positive=['white', 'waving'], negative=['black']))\n", + "pprint.pprint(wv_from_bin.most_similar(positive=['black', 'waving'], negative=['white']))\n", " # ------------------" ] }, @@ -932,9 +1333,9 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "cs224n", + "display_name": "Python 3", "language": "python", - "name": "cs224n" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -946,7 +1347,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.5" + "version": "3.6.8" } }, "nbformat": 4, diff --git a/Lectures/Lecture 01/Gensim word vector visualization.ipynb b/Lectures/Lecture 01/Gensim word vector visualization.ipynb index 8996749..38fab5c 100644 --- a/Lectures/Lecture 01/Gensim word vector visualization.ipynb +++ b/Lectures/Lecture 01/Gensim word vector visualization.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -50,18 +50,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/hanh/anaconda3/lib/python3.7/site-packages/smart_open/smart_open_lib.py:398: UserWarning: This function is deprecated, use smart_open.open instead. See the migration notes for details: https://github.com/RaRe-Technologies/smart_open/blob/master/README.rst#migrating-to-the-new-open-function\n", + " 'See the migration notes for details: %s' % _MIGRATION_NOTES_URL\n" + ] + }, + { + "data": { + "text/plain": [ + "(400000, 100)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "glove_file = datapath('/Users/manning/Corpora/GloVe/glove.6B.100d.txt')\n", + "glove_file = datapath('/home/hanh/Desktop/cs224n_2020/Lectures/Lecture 01/glove.6B.100d.txt')\n", "word2vec_glove_file = get_tmpfile(\"glove.6B.100d.word2vec.txt\")\n", "glove2word2vec(glove_file, word2vec_glove_file)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -70,36 +89,105 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('barack', 0.937216579914093),\n", + " ('bush', 0.927285373210907),\n", + " ('clinton', 0.8960003852844238),\n", + " ('mccain', 0.8875633478164673),\n", + " ('gore', 0.8000321388244629),\n", + " ('hillary', 0.7933663129806519),\n", + " ('dole', 0.7851964235305786),\n", + " ('rodham', 0.751889705657959),\n", + " ('romney', 0.7488929629325867),\n", + " ('kerry', 0.7472623586654663)]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.most_similar('obama')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('coconut', 0.7097253799438477),\n", + " ('mango', 0.7054824233055115),\n", + " ('bananas', 0.6887733936309814),\n", + " ('potato', 0.6629636287689209),\n", + " ('pineapple', 0.6534532904624939),\n", + " ('fruit', 0.6519855260848999),\n", + " ('peanut', 0.6420576572418213),\n", + " ('pecan', 0.6349173188209534),\n", + " ('cashew', 0.6294420957565308),\n", + " ('papaya', 0.6246591210365295)]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.most_similar('banana')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('keyrates', 0.7173938751220703),\n", + " ('sungrebe', 0.7119239568710327),\n", + " ('þórður', 0.7067720890045166),\n", + " ('zety', 0.7056615352630615),\n", + " ('23aou94', 0.6959497928619385),\n", + " ('___________________________________________________________',\n", + " 0.694915235042572),\n", + " ('elymians', 0.6945434212684631),\n", + " ('camarina', 0.6927202939987183),\n", + " ('ryryryryryry', 0.6905653476715088),\n", + " ('maurilio', 0.6865653395652771)]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.most_similar(negative='banana')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "queen: 0.7699\n" + ] + } + ], "source": [ "result = model.most_similar(positive=['woman', 'king'], negative=['man'])\n", "print(\"{}: {:.4f}\".format(*result[0]))" @@ -107,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -125,68 +213,132 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'australian'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "analogy('japan', 'japanese', 'australia')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'champagne'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "analogy('australia', 'beer', 'france')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'nixon'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "analogy('obama', 'clinton', 'reagan')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'longest'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "analogy('tall', 'tallest', 'long')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'terrible'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "analogy('good', 'fantastic', 'bad')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cereal\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/hanh/anaconda3/lib/python3.7/site-packages/gensim/models/keyedvectors.py:876: FutureWarning: arrays to stack must be passed as a \"sequence\" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.\n", + " vectors = vstack(self.word_vec(word, use_norm=True) for word in used_words).astype(REAL)\n" + ] + } + ], "source": [ "print(model.doesnt_match(\"breakfast cereal dinner lunch\".split()))" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -209,16 +361,802 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "/* Put everything inside the global mpl namespace */\n", + "window.mpl = {};\n", + "\n", + "\n", + "mpl.get_websocket_type = function() {\n", + " if (typeof(WebSocket) !== 'undefined') {\n", + " return WebSocket;\n", + " } else if (typeof(MozWebSocket) !== 'undefined') {\n", + " return MozWebSocket;\n", + " } else {\n", + " alert('Your browser does not have WebSocket support.' +\n", + " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", + " 'Firefox 4 and 5 are also supported but you ' +\n", + " 'have to enable WebSockets in about:config.');\n", + " };\n", + "}\n", + "\n", + "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", + " this.id = figure_id;\n", + "\n", + " this.ws = websocket;\n", + "\n", + " this.supports_binary = (this.ws.binaryType != undefined);\n", + "\n", + " if (!this.supports_binary) {\n", + " var warnings = document.getElementById(\"mpl-warnings\");\n", + " if (warnings) {\n", + " warnings.style.display = 'block';\n", + " warnings.textContent = (\n", + " \"This browser does not support binary websocket messages. \" +\n", + " \"Performance may be slow.\");\n", + " }\n", + " }\n", + "\n", + " this.imageObj = new Image();\n", + "\n", + " this.context = undefined;\n", + " this.message = undefined;\n", + " this.canvas = undefined;\n", + " this.rubberband_canvas = undefined;\n", + " this.rubberband_context = undefined;\n", + " this.format_dropdown = undefined;\n", + "\n", + " this.image_mode = 'full';\n", + "\n", + " this.root = $('
');\n", + " this._root_extra_style(this.root)\n", + " this.root.attr('style', 'display: inline-block');\n", + "\n", + " $(parent_element).append(this.root);\n", + "\n", + " this._init_header(this);\n", + " this._init_canvas(this);\n", + " this._init_toolbar(this);\n", + "\n", + " var fig = this;\n", + "\n", + " this.waiting = false;\n", + "\n", + " this.ws.onopen = function () {\n", + " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", + " fig.send_message(\"send_image_mode\", {});\n", + " if (mpl.ratio != 1) {\n", + " fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n", + " }\n", + " fig.send_message(\"refresh\", {});\n", + " }\n", + "\n", + " this.imageObj.onload = function() {\n", + " if (fig.image_mode == 'full') {\n", + " // Full images could contain transparency (where diff images\n", + " // almost always do), so we need to clear the canvas so that\n", + " // there is no ghosting.\n", + " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", + " }\n", + " fig.context.drawImage(fig.imageObj, 0, 0);\n", + " };\n", + "\n", + " this.imageObj.onunload = function() {\n", + " fig.ws.close();\n", + " }\n", + "\n", + " this.ws.onmessage = this._make_on_message_function(this);\n", + "\n", + " this.ondownload = ondownload;\n", + "}\n", + "\n", + "mpl.figure.prototype._init_header = function() {\n", + " var titlebar = $(\n", + " '
');\n", + " var titletext = $(\n", + " '
');\n", + " titlebar.append(titletext)\n", + " this.root.append(titlebar);\n", + " this.header = titletext[0];\n", + "}\n", + "\n", + "\n", + "\n", + "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", + "\n", + "}\n", + "\n", + "\n", + "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", + "\n", + "}\n", + "\n", + "mpl.figure.prototype._init_canvas = function() {\n", + " var fig = this;\n", + "\n", + " var canvas_div = $('
');\n", + "\n", + " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", + "\n", + " function canvas_keyboard_event(event) {\n", + " return fig.key_event(event, event['data']);\n", + " }\n", + "\n", + " canvas_div.keydown('key_press', canvas_keyboard_event);\n", + " canvas_div.keyup('key_release', canvas_keyboard_event);\n", + " this.canvas_div = canvas_div\n", + " this._canvas_extra_style(canvas_div)\n", + " this.root.append(canvas_div);\n", + "\n", + " var canvas = $('');\n", + " canvas.addClass('mpl-canvas');\n", + " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", + "\n", + " this.canvas = canvas[0];\n", + " this.context = canvas[0].getContext(\"2d\");\n", + "\n", + " var backingStore = this.context.backingStorePixelRatio ||\n", + "\tthis.context.webkitBackingStorePixelRatio ||\n", + "\tthis.context.mozBackingStorePixelRatio ||\n", + "\tthis.context.msBackingStorePixelRatio ||\n", + "\tthis.context.oBackingStorePixelRatio ||\n", + "\tthis.context.backingStorePixelRatio || 1;\n", + "\n", + " mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n", + "\n", + " var rubberband = $('');\n", + " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", + "\n", + " var pass_mouse_events = true;\n", + "\n", + " canvas_div.resizable({\n", + " start: function(event, ui) {\n", + " pass_mouse_events = false;\n", + " },\n", + " resize: function(event, ui) {\n", + " fig.request_resize(ui.size.width, ui.size.height);\n", + " },\n", + " stop: function(event, ui) {\n", + " pass_mouse_events = true;\n", + " fig.request_resize(ui.size.width, ui.size.height);\n", + " },\n", + " });\n", + "\n", + " function mouse_event_fn(event) {\n", + " if (pass_mouse_events)\n", + " return fig.mouse_event(event, event['data']);\n", + " }\n", + "\n", + " rubberband.mousedown('button_press', mouse_event_fn);\n", + " rubberband.mouseup('button_release', mouse_event_fn);\n", + " // Throttle sequential mouse events to 1 every 20ms.\n", + " rubberband.mousemove('motion_notify', mouse_event_fn);\n", + "\n", + " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", + " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", + "\n", + " canvas_div.on(\"wheel\", function (event) {\n", + " event = event.originalEvent;\n", + " event['data'] = 'scroll'\n", + " if (event.deltaY < 0) {\n", + " event.step = 1;\n", + " } else {\n", + " event.step = -1;\n", + " }\n", + " mouse_event_fn(event);\n", + " });\n", + "\n", + " canvas_div.append(canvas);\n", + " canvas_div.append(rubberband);\n", + "\n", + " this.rubberband = rubberband;\n", + " this.rubberband_canvas = rubberband[0];\n", + " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", + " this.rubberband_context.strokeStyle = \"#000000\";\n", + "\n", + " this._resize_canvas = function(width, height) {\n", + " // Keep the size of the canvas, canvas container, and rubber band\n", + " // canvas in synch.\n", + " canvas_div.css('width', width)\n", + " canvas_div.css('height', height)\n", + "\n", + " canvas.attr('width', width * mpl.ratio);\n", + " canvas.attr('height', height * mpl.ratio);\n", + " canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n", + "\n", + " rubberband.attr('width', width);\n", + " rubberband.attr('height', height);\n", + " }\n", + "\n", + " // Set the figure to an initial 600x600px, this will subsequently be updated\n", + " // upon first draw.\n", + " this._resize_canvas(600, 600);\n", + "\n", + " // Disable right mouse context menu.\n", + " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", + " return false;\n", + " });\n", + "\n", + " function set_focus () {\n", + " canvas.focus();\n", + " canvas_div.focus();\n", + " }\n", + "\n", + " window.setTimeout(set_focus, 100);\n", + "}\n", + "\n", + "mpl.figure.prototype._init_toolbar = function() {\n", + " var fig = this;\n", + "\n", + " var nav_element = $('
')\n", + " nav_element.attr('style', 'width: 100%');\n", + " this.root.append(nav_element);\n", + "\n", + " // Define a callback function for later on.\n", + " function toolbar_event(event) {\n", + " return fig.toolbar_button_onclick(event['data']);\n", + " }\n", + " function toolbar_mouse_event(event) {\n", + " return fig.toolbar_button_onmouseover(event['data']);\n", + " }\n", + "\n", + " for(var toolbar_ind in mpl.toolbar_items) {\n", + " var name = mpl.toolbar_items[toolbar_ind][0];\n", + " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", + " var image = mpl.toolbar_items[toolbar_ind][2];\n", + " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", + "\n", + " if (!name) {\n", + " // put a spacer in here.\n", + " continue;\n", + " }\n", + " var button = $('');\n", + " button.click(method_name, toolbar_event);\n", + " button.mouseover(tooltip, toolbar_mouse_event);\n", + " nav_element.append(button);\n", + " }\n", + "\n", + " // Add the status bar.\n", + " var status_bar = $('');\n", + " nav_element.append(status_bar);\n", + " this.message = status_bar[0];\n", + "\n", + " // Add the close button to the window.\n", + " var buttongrp = $('
');\n", + " var button = $('');\n", + " button.click(function (evt) { fig.handle_close(fig, {}); } );\n", + " button.mouseover('Stop Interaction', toolbar_mouse_event);\n", + " buttongrp.append(button);\n", + " var titlebar = this.root.find($('.ui-dialog-titlebar'));\n", + " titlebar.prepend(buttongrp);\n", + "}\n", + "\n", + "mpl.figure.prototype._root_extra_style = function(el){\n", + " var fig = this\n", + " el.on(\"remove\", function(){\n", + "\tfig.close_ws(fig, {});\n", + " });\n", + "}\n", + "\n", + "mpl.figure.prototype._canvas_extra_style = function(el){\n", + " // this is important to make the div 'focusable\n", + " el.attr('tabindex', 0)\n", + " // reach out to IPython and tell the keyboard manager to turn it's self\n", + " // off when our div gets focus\n", + "\n", + " // location in version 3\n", + " if (IPython.notebook.keyboard_manager) {\n", + " IPython.notebook.keyboard_manager.register_events(el);\n", + " }\n", + " else {\n", + " // location in version 2\n", + " IPython.keyboard_manager.register_events(el);\n", + " }\n", + "\n", + "}\n", + "\n", + "mpl.figure.prototype._key_event_extra = function(event, name) {\n", + " var manager = IPython.notebook.keyboard_manager;\n", + " if (!manager)\n", + " manager = IPython.keyboard_manager;\n", + "\n", + " // Check for shift+enter\n", + " if (event.shiftKey && event.which == 13) {\n", + " this.canvas_div.blur();\n", + " event.shiftKey = false;\n", + " // Send a \"J\" for go to next cell\n", + " event.which = 74;\n", + " event.keyCode = 74;\n", + " manager.command_mode();\n", + " manager.handle_keydown(event);\n", + " }\n", + "}\n", + "\n", + "mpl.figure.prototype.handle_save = function(fig, msg) {\n", + " fig.ondownload(fig, null);\n", + "}\n", + "\n", + "\n", + "mpl.find_output_cell = function(html_output) {\n", + " // Return the cell and output element which can be found *uniquely* in the notebook.\n", + " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", + " // IPython event is triggered only after the cells have been serialised, which for\n", + " // our purposes (turning an active figure into a static one), is too late.\n", + " var cells = IPython.notebook.get_cells();\n", + " var ncells = cells.length;\n", + " for (var i=0; i= 3 moved mimebundle to data attribute of output\n", + " data = data.data;\n", + " }\n", + " if (data['text/html'] == html_output) {\n", + " return [cell, data, j];\n", + " }\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "// Register the function which deals with the matplotlib target/channel.\n", + "// The kernel may be null if the page has been refreshed.\n", + "if (IPython.notebook.kernel != null) {\n", + " IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n", + "}\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "display_pca_scatterplot(model, sample=300)" ] @@ -263,7 +1994,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.7.6" } }, "nbformat": 4,