diff --git a/Week 01/Python review session/python-review-demo.py b/Week 01/Python review session/python-review-demo.py new file mode 100644 index 0000000..5c51552 --- /dev/null +++ b/Week 01/Python review session/python-review-demo.py @@ -0,0 +1,28 @@ +import numpy as np + +def power_iteration(A, tolerance=1e-7): + b_old = np.random.rand(A.shape[1]) + b = np.random.rand(A.shape[1]) + num_iterations = 0 + while num_iterations == 0 or np.linalg.norm(b_old - b) > tolerance: + b_old = np.copy(b) + b = np.dot(A, b) + b_norm = np.linalg.norm(b) + b /= b_norm + num_iterations += 1 + return np.dot(A, b), b, num_iterations + +def main(): + A = np.array([[.5, .4], [.2, .8]]) + ab, b, number_iterations = power_iteration(A) + + eig1 = ab[0] / b[0] + eig2 = ab[1] / b[1] + assert(np.abs((eig1 - eig2) / eig2) < 1e-5) + + b /= b[1] + + print(eig1, b, number_iterations) + +if __name__ == '__main__': + main() diff --git a/Week 03/Assignment 3/README.md b/Week 03/Assignment 3/README.md new file mode 100644 index 0000000..335eb7e --- /dev/null +++ b/Week 03/Assignment 3/README.md @@ -0,0 +1,3 @@ +# Assignment 3 + +- [Handout](http://web.stanford.edu/class/cs224n/assignments/a3.pdf) diff --git a/Week 03/Assignment 3/collect_submission.sh b/Week 03/Assignment 3/collect_submission.sh new file mode 100755 index 0000000..02acf7c --- /dev/null +++ b/Week 03/Assignment 3/collect_submission.sh @@ -0,0 +1,2 @@ +rm -f assignment3.zip +zip -r assignment3.zip *.py ./data ./utils diff --git a/Week 03/Assignment 3/local_env.yml b/Week 03/Assignment 3/local_env.yml new file mode 100755 index 0000000..6325364 --- /dev/null +++ b/Week 03/Assignment 3/local_env.yml @@ -0,0 +1,11 @@ +name: a3 +channels: + - pytorch + - defaults +dependencies: + - python=3.7 + - numpy + - tqdm + - docopt + - pytorch + - torchvision diff --git a/Week 03/Assignment 3/parser_model.py b/Week 03/Assignment 3/parser_model.py new file mode 100755 index 0000000..851e131 --- /dev/null +++ b/Week 03/Assignment 3/parser_model.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +CS224N 2019-20: Homework 3 +parser_model.py: Feed-Forward Neural Network for Dependency Parsing +Sahil Chopra +Haoshen Hong +""" +import argparse +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +class ParserModel(nn.Module): + """ Feedforward neural network with an embedding layer and two hidden layers. + The ParserModel will predict which transition should be applied to a + given partial parse configuration. + + PyTorch Notes: + - Note that "ParserModel" is a subclass of the "nn.Module" class. In PyTorch all neural networks + are a subclass of this "nn.Module". + - The "__init__" method is where you define all the layers and parameters + (embedding layers, linear layers, dropout layers, etc.). + - "__init__" gets automatically called when you create a new instance of your class, e.g. + when you write "m = ParserModel()". + - Other methods of ParserModel can access variables that have "self." prefix. Thus, + you should add the "self." prefix layers, values, etc. that you want to utilize + in other ParserModel methods. + - For further documentation on "nn.Module" please see https://pytorch.org/docs/stable/nn.html. + """ + def __init__(self, embeddings, n_features=36, + hidden_size=200, n_classes=3, dropout_prob=0.5): + """ Initialize the parser model. + + @param embeddings (ndarray): word embeddings (num_words, embedding_size) + @param n_features (int): number of input features + @param hidden_size (int): number of hidden units + @param n_classes (int): number of output classes + @param dropout_prob (float): dropout probability + """ + super(ParserModel, self).__init__() + self.n_features = n_features + self.n_classes = n_classes + self.dropout_prob = dropout_prob + self.embed_size = embeddings.shape[1] + self.hidden_size = hidden_size + self.embeddings = nn.Parameter(torch.tensor(embeddings)) + + ### YOUR CODE HERE (~10 Lines) + ### TODO: + ### 1) Declare `self.embed_to_hidden_weight` and `self.embed_to_hidden_bias` as `nn.Parameter`. + ### Initialize weight with the `nn.init.xavier_uniform_` function and bias with `nn.init.uniform_` + ### with default parameters. + ### 2) Construct `self.dropout` layer. + ### 3) Declare `self.hidden_to_logits_weight` and `self.hidden_to_logits_bias` as `nn.Parameter`. + ### Initialize weight with the `nn.init.xavier_uniform_` function and bias with `nn.init.uniform_` + ### with default parameters. + ### + ### Note: Trainable variables are declared as `nn.Parameter` which is a commonly used API + ### to include a tensor into a computational graph to support updating w.r.t its gradient. + ### Here, we use Xavier Uniform Initialization for our Weight initialization. + ### It has been shown empirically, that this provides better initial weights + ### for training networks than random uniform initialization. + ### For more details checkout this great blogpost: + ### http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization + ### + ### Please see the following docs for support: + ### nn.Parameter: https://pytorch.org/docs/stable/nn.html#parameters + ### Initialization: https://pytorch.org/docs/stable/nn.init.html + ### Dropout: https://pytorch.org/docs/stable/nn.html#dropout-layers + + + + + ### END YOUR CODE + + def embedding_lookup(self, w): + """ Utilize `w` to select embeddings from embedding matrix `self.embeddings` + @param w (Tensor): input tensor of word indices (batch_size, n_features) + + @return x (Tensor): tensor of embeddings for words represented in w + (batch_size, n_features * embed_size) + """ + + ### YOUR CODE HERE (~1-3 Lines) + ### TODO: + ### 1) For each index `i` in `w`, select `i`th vector from self.embeddings + ### 2) Reshape the tensor using `view` function if necessary + ### + ### Note: All embedding vectors are stacked and stored as a matrix. The model receives + ### a list of indices representing a sequence of words, then it calls this lookup + ### function to map indices to sequence of embeddings. + ### + ### This problem aims to test your understanding of embedding lookup, + ### so DO NOT use any high level API like nn.Embedding + ### (we are asking you to implement that!). Pay attention to tensor shapes + ### and reshape if necessary. Make sure you know each tensor's shape before you run the code! + ### + ### Pytorch has some useful APIs for you, and you can use either one + ### in this problem (except nn.Embedding). These docs might be helpful: + ### Index select: https://pytorch.org/docs/stable/torch.html#torch.index_select + ### Gather: https://pytorch.org/docs/stable/torch.html#torch.gather + ### View: https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view + + + + ### END YOUR CODE + return x + + + def forward(self, w): + """ Run the model forward. + + Note that we will not apply the softmax function here because it is included in the loss function nn.CrossEntropyLoss + + PyTorch Notes: + - Every nn.Module object (PyTorch model) has a `forward` function. + - When you apply your nn.Module to an input tensor `w` this function is applied to the tensor. + For example, if you created an instance of your ParserModel and applied it to some `w` as follows, + the `forward` function would called on `w` and the result would be stored in the `output` variable: + model = ParserModel() + output = model(w) # this calls the forward function + - For more details checkout: https://pytorch.org/docs/stable/nn.html#torch.nn.Module.forward + + @param w (Tensor): input tensor of tokens (batch_size, n_features) + + @return logits (Tensor): tensor of predictions (output after applying the layers of the network) + without applying softmax (batch_size, n_classes) + """ + ### YOUR CODE HERE (~3-5 lines) + ### TODO: + ### Complete the forward computation as described in write-up. In addition, include a dropout layer + ### as decleared in `__init__` after ReLU function. + ### + ### Note: We do not apply the softmax to the logits here, because + ### the loss function (torch.nn.CrossEntropyLoss) applies it more efficiently. + ### + ### Please see the following docs for support: + ### Matrix product: https://pytorch.org/docs/stable/torch.html#torch.matmul + ### ReLU: https://pytorch.org/docs/stable/nn.html?highlight=relu#torch.nn.functional.relu + + + ### END YOUR CODE + return logits + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Simple sanity check for parser_model.py') + parser.add_argument('-e', '--embedding', action='store_true', help='sanity check for embeding_lookup function') + parser.add_argument('-f', '--forward', action='store_true', help='sanity check for forward function') + args = parser.parse_args() + + embeddings = np.zeros((100, 30), dtype=np.float32) + model = ParserModel(embeddings) + + def check_embedding(): + inds = torch.randint(0, 100, (4, 36), dtype=torch.long) + selected = model.embedding_lookup(inds) + assert np.all(selected.data.numpy() == 0), "The result of embedding lookup: " \ + + repr(selected) + " contains non-zero elements." + + def check_forward(): + inputs =torch.randint(0, 100, (4, 36), dtype=torch.long) + out = model(inputs) + expected_out_shape = (4, 3) + assert out.shape == expected_out_shape, "The result shape of forward is: " + repr(out.shape) + \ + " which doesn't match expected " + repr(expected_out_shape) + + if args.embedding: + check_embedding() + print("Embedding_lookup sanity check passes!") + + if args.forward: + check_forward() + print("Forward sanity check passes!") \ No newline at end of file diff --git a/Week 03/Assignment 3/parser_transitions.py b/Week 03/Assignment 3/parser_transitions.py new file mode 100755 index 0000000..2f99ed0 --- /dev/null +++ b/Week 03/Assignment 3/parser_transitions.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +CS224N 2019-20: Homework 3 +parser_transitions.py: Algorithms for completing partial parsess. +Sahil Chopra +Haoshen Hong +""" + +import sys + +class PartialParse(object): + def __init__(self, sentence): + """Initializes this partial parse. + + @param sentence (list of str): The sentence to be parsed as a list of words. + Your code should not modify the sentence. + """ + # The sentence being parsed is kept for bookkeeping purposes. Do not alter it in your code. + self.sentence = sentence + + ### YOUR CODE HERE (3 Lines) + ### Your code should initialize the following fields: + ### self.stack: The current stack represented as a list with the top of the stack as the + ### last element of the list. + ### self.buffer: The current buffer represented as a list with the first item on the + ### buffer as the first item of the list + ### self.dependencies: The list of dependencies produced so far. Represented as a list of + ### tuples where each tuple is of the form (head, dependent). + ### Order for this list doesn't matter. + ### + ### Note: The root token should be represented with the string "ROOT" + ### + + + ### END YOUR CODE + + + def parse_step(self, transition): + """Performs a single parse step by applying the given transition to this partial parse + + @param transition (str): A string that equals "S", "LA", or "RA" representing the shift, + left-arc, and right-arc transitions. You can assume the provided + transition is a legal transition. + """ + ### YOUR CODE HERE (~7-10 Lines) + ### TODO: + ### Implement a single parsing step, i.e. the logic for the following as + ### described in the pdf handout: + ### 1. Shift + ### 2. Left Arc + ### 3. Right Arc + + + ### END YOUR CODE + + def parse(self, transitions): + """Applies the provided transitions to this PartialParse + + @param transitions (list of str): The list of transitions in the order they should be applied + + @return dsependencies (list of string tuples): The list of dependencies produced when + parsing the sentence. Represented as a list of + tuples where each tuple is of the form (head, dependent). + """ + for transition in transitions: + self.parse_step(transition) + return self.dependencies + + +def minibatch_parse(sentences, model, batch_size): + """Parses a list of sentences in minibatches using a model. + + @param sentences (list of list of str): A list of sentences to be parsed + (each sentence is a list of words and each word is of type string) + @param model (ParserModel): The model that makes parsing decisions. It is assumed to have a function + model.predict(partial_parses) that takes in a list of PartialParses as input and + returns a list of transitions predicted for each parse. That is, after calling + transitions = model.predict(partial_parses) + transitions[i] will be the next transition to apply to partial_parses[i]. + @param batch_size (int): The number of PartialParses to include in each minibatch + + + @return dependencies (list of dependency lists): A list where each element is the dependencies + list for a parsed sentence. Ordering should be the + same as in sentences (i.e., dependencies[i] should + contain the parse for sentences[i]). + """ + dependencies = [] + + ### YOUR CODE HERE (~8-10 Lines) + ### TODO: + ### Implement the minibatch parse algorithm as described in the pdf handout + ### + ### Note: A shallow copy (as denoted in the PDF) can be made with the "=" sign in python, e.g. + ### unfinished_parses = partial_parses[:]. + ### Here `unfinished_parses` is a shallow copy of `partial_parses`. + ### In Python, a shallow copied list like `unfinished_parses` does not contain new instances + ### of the object stored in `partial_parses`. Rather both lists refer to the same objects. + ### In our case, `partial_parses` contains a list of partial parses. `unfinished_parses` + ### contains references to the same objects. Thus, you should NOT use the `del` operator + ### to remove objects from the `unfinished_parses` list. This will free the underlying memory that + ### is being accessed by `partial_parses` and may cause your code to crash. + + + ### END YOUR CODE + + return dependencies + + +def test_step(name, transition, stack, buf, deps, + ex_stack, ex_buf, ex_deps): + """Tests that a single parse step returns the expected output""" + pp = PartialParse([]) + pp.stack, pp.buffer, pp.dependencies = stack, buf, deps + + pp.parse_step(transition) + stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies))) + assert stack == ex_stack, \ + "{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack) + assert buf == ex_buf, \ + "{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf) + assert deps == ex_deps, \ + "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) + print("{:} test passed!".format(name)) + + +def test_parse_step(): + """Simple tests for the PartialParse.parse_step function + Warning: these are not exhaustive + """ + test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [], + ("ROOT", "the", "cat"), ("sat",), ()) + test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [], + ("ROOT", "cat",), ("sat",), (("cat", "the"),)) + test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [], + ("ROOT", "run",), (), (("run", "fast"),)) + + +def test_parse(): + """Simple tests for the PartialParse.parse function + Warning: these are not exhaustive + """ + sentence = ["parse", "this", "sentence"] + dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"]) + dependencies = tuple(sorted(dependencies)) + expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this')) + assert dependencies == expected, \ + "parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected) + assert tuple(sentence) == ("parse", "this", "sentence"), \ + "parse test failed: the input sentence should not be modified" + print("parse test passed!") + + +class DummyModel(object): + """Dummy model for testing the minibatch_parse function + """ + def __init__(self, mode = "unidirectional"): + self.mode = mode + + def predict(self, partial_parses): + if self.mode == "unidirectional": + return self.unidirectional_predict(partial_parses) + elif self.mode == "interleave": + return self.interleave_predict(partial_parses) + else: + raise NotImplementedError() + + def unidirectional_predict(self, partial_parses): + """First shifts everything onto the stack and then does exclusively right arcs if the first word of + the sentence is "right", "left" if otherwise. + """ + return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S" + for pp in partial_parses] + + def interleave_predict(self, partial_parses): + """First shifts everything onto the stack and then interleaves "right" and "left". + """ + return [("RA" if len(pp.stack) % 2 == 0 else "LA") if len(pp.buffer) == 0 else "S" + for pp in partial_parses] + +def test_dependencies(name, deps, ex_deps): + """Tests the provided dependencies match the expected dependencies""" + deps = tuple(sorted(deps)) + assert deps == ex_deps, \ + "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) + + +def test_minibatch_parse(): + """Simple tests for the minibatch_parse function + Warning: these are not exhaustive + """ + + # Unidirectional arcs test + sentences = [["right", "arcs", "only"], + ["right", "arcs", "only", "again"], + ["left", "arcs", "only"], + ["left", "arcs", "only", "again"]] + deps = minibatch_parse(sentences, DummyModel(), 2) + test_dependencies("minibatch_parse", deps[0], + (('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs'))) + test_dependencies("minibatch_parse", deps[1], + (('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs'))) + test_dependencies("minibatch_parse", deps[2], + (('only', 'ROOT'), ('only', 'arcs'), ('only', 'left'))) + test_dependencies("minibatch_parse", deps[3], + (('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only'))) + + # Out-of-bound test + sentences = [["right"]] + deps = minibatch_parse(sentences, DummyModel(), 2) + test_dependencies("minibatch_parse", deps[0], (('ROOT', 'right'),)) + + # Mixed arcs test + sentences = [["this", "is", "interleaving", "dependency", "test"]] + deps = minibatch_parse(sentences, DummyModel(mode="interleave"), 1) + test_dependencies("minibatch_parse", deps[0], + (('ROOT', 'is'), ('dependency', 'interleaving'), + ('dependency', 'test'), ('is', 'dependency'), ('is', 'this'))) + print("minibatch_parse test passed!") + + +if __name__ == '__main__': + args = sys.argv + if len(args) != 2: + raise Exception("You did not provide a valid keyword. Either provide 'part_c' or 'part_d', when executing this script") + elif args[1] == "part_c": + test_parse_step() + test_parse() + elif args[1] == "part_d": + test_minibatch_parse() + else: + raise Exception("You did not provide a valid keyword. Either provide 'part_c' or 'part_d', when executing this script") diff --git a/Week 03/Assignment 3/run.py b/Week 03/Assignment 3/run.py new file mode 100755 index 0000000..be7e5f6 --- /dev/null +++ b/Week 03/Assignment 3/run.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +CS224N 2019-20: Homework 3 +run.py: Run the dependency parser. +Sahil Chopra +Haoshen Hong +""" +from datetime import datetime +import os +import pickle +import math +import time +import argparse + +from torch import nn, optim +import torch +from tqdm import tqdm + +from parser_model import ParserModel +from utils.parser_utils import minibatches, load_and_preprocess_data, AverageMeter + +parser = argparse.ArgumentParser(description='Train neural dependency parser in pytorch') +parser.add_argument('-d', '--debug', action='store_true', help='whether to enter debug mode') +args = parser.parse_args() + +# ----------------- +# Primary Functions +# ----------------- +def train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005): + """ Train the neural dependency parser. + + @param parser (Parser): Neural Dependency Parser + @param train_data (): + @param dev_data (): + @param output_path (str): Path to which model weights and results are written. + @param batch_size (int): Number of examples in a single batch + @param n_epochs (int): Number of training epochs + @param lr (float): Learning rate + """ + best_dev_UAS = 0 + + + ### YOUR CODE HERE (~2-7 lines) + ### TODO: + ### 1) Construct Adam Optimizer in variable `optimizer` + ### 2) Construct the Cross Entropy Loss Function in variable `loss_func` with `mean` + ### reduction (default) + ### + ### Hint: Use `parser.model.parameters()` to pass optimizer + ### necessary parameters to tune. + ### Please see the following docs for support: + ### Adam Optimizer: https://pytorch.org/docs/stable/optim.html + ### Cross Entropy Loss: https://pytorch.org/docs/stable/nn.html#crossentropyloss + + + + ### END YOUR CODE + + for epoch in range(n_epochs): + print("Epoch {:} out of {:}".format(epoch + 1, n_epochs)) + dev_UAS = train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size) + if dev_UAS > best_dev_UAS: + best_dev_UAS = dev_UAS + print("New best dev UAS! Saving model.") + torch.save(parser.model.state_dict(), output_path) + print("") + + +def train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size): + """ Train the neural dependency parser for single epoch. + + Note: In PyTorch we can signify train versus test and automatically have + the Dropout Layer applied and removed, accordingly, by specifying + whether we are training, `model.train()`, or evaluating, `model.eval()` + + @param parser (Parser): Neural Dependency Parser + @param train_data (): + @param dev_data (): + @param optimizer (nn.Optimizer): Adam Optimizer + @param loss_func (nn.CrossEntropyLoss): Cross Entropy Loss Function + @param batch_size (int): batch size + + @return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data + """ + parser.model.train() # Places model in "train" mode, i.e. apply dropout layer + n_minibatches = math.ceil(len(train_data) / batch_size) + loss_meter = AverageMeter() + + with tqdm(total=(n_minibatches)) as prog: + for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)): + optimizer.zero_grad() # remove any baggage in the optimizer + loss = 0. # store loss for this batch here + train_x = torch.from_numpy(train_x).long() + train_y = torch.from_numpy(train_y.nonzero()[1]).long() + + ### YOUR CODE HERE (~5-10 lines) + ### TODO: + ### 1) Run train_x forward through model to produce `logits` + ### 2) Use the `loss_func` parameter to apply the PyTorch CrossEntropyLoss function. + ### This will take `logits` and `train_y` as inputs. It will output the CrossEntropyLoss + ### between softmax(`logits`) and `train_y`. Remember that softmax(`logits`) + ### are the predictions (y^ from the PDF). + ### 3) Backprop losses + ### 4) Take step with the optimizer + ### Please see the following docs for support: + ### Optimizer Step: https://pytorch.org/docs/stable/optim.html#optimizer-step + + + + + ### END YOUR CODE + prog.update(1) + loss_meter.update(loss.item()) + + print ("Average Train Loss: {}".format(loss_meter.avg)) + + print("Evaluating on dev set",) + parser.model.eval() # Places model in "eval" mode, i.e. don't apply dropout layer + dev_UAS, _ = parser.parse(dev_data) + print("- dev UAS: {:.2f}".format(dev_UAS * 100.0)) + return dev_UAS + + +if __name__ == "__main__": + debug = args.debug + + assert (torch.__version__.split(".") >= ["1", "0", "0"]), "Please install torch version >= 1.0.0" + + print(80 * "=") + print("INITIALIZING") + print(80 * "=") + parser, embeddings, train_data, dev_data, test_data = load_and_preprocess_data(debug) + + start = time.time() + model = ParserModel(embeddings) + parser.model = model + print("took {:.2f} seconds\n".format(time.time() - start)) + + print(80 * "=") + print("TRAINING") + print(80 * "=") + output_dir = "results/{:%Y%m%d_%H%M%S}/".format(datetime.now()) + output_path = output_dir + "model.weights" + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005) + + if not debug: + print(80 * "=") + print("TESTING") + print(80 * "=") + print("Restoring the best model weights found on the dev set") + parser.model.load_state_dict(torch.load(output_path)) + print("Final evaluation on test set",) + parser.model.eval() + UAS, dependencies = parser.parse(test_data) + print("- test UAS: {:.2f}".format(UAS * 100.0)) + print("Done!") diff --git a/Week 03/Assignment 3/utils/__init__.py b/Week 03/Assignment 3/utils/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/Week 03/Assignment 3/utils/general_utils.py b/Week 03/Assignment 3/utils/general_utils.py new file mode 100755 index 0000000..5940ce7 --- /dev/null +++ b/Week 03/Assignment 3/utils/general_utils.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +CS224N 2018-19: Homework 3 +general_utils.py: General purpose utilities. +Sahil Chopra +""" + +import sys +import time +import numpy as np + + +def get_minibatches(data, minibatch_size, shuffle=True): + """ + Iterates through the provided data one minibatch at at time. You can use this function to + iterate through data in minibatches as follows: + + for inputs_minibatch in get_minibatches(inputs, minibatch_size): + ... + + Or with multiple data sources: + + for inputs_minibatch, labels_minibatch in get_minibatches([inputs, labels], minibatch_size): + ... + + Args: + data: there are two possible values: + - a list or numpy array + - a list where each element is either a list or numpy array + minibatch_size: the maximum number of items in a minibatch + shuffle: whether to randomize the order of returned data + Returns: + minibatches: the return value depends on data: + - If data is a list/array it yields the next minibatch of data. + - If data a list of lists/arrays it returns the next minibatch of each element in the + list. This can be used to iterate through multiple data sources + (e.g., features and labels) at the same time. + + """ + list_data = type(data) is list and (type(data[0]) is list or type(data[0]) is np.ndarray) + data_size = len(data[0]) if list_data else len(data) + indices = np.arange(data_size) + if shuffle: + np.random.shuffle(indices) + for minibatch_start in np.arange(0, data_size, minibatch_size): + minibatch_indices = indices[minibatch_start:minibatch_start + minibatch_size] + yield [_minibatch(d, minibatch_indices) for d in data] if list_data \ + else _minibatch(data, minibatch_indices) + + +def _minibatch(data, minibatch_idx): + return data[minibatch_idx] if type(data) is np.ndarray else [data[i] for i in minibatch_idx] + + +def test_all_close(name, actual, expected): + if actual.shape != expected.shape: + raise ValueError("{:} failed, expected output to have shape {:} but has shape {:}" + .format(name, expected.shape, actual.shape)) + if np.amax(np.fabs(actual - expected)) > 1e-6: + raise ValueError("{:} failed, expected {:} but value is {:}".format(name, expected, actual)) + else: + print(name, "passed!") diff --git a/Week 03/Assignment 3/utils/parser_utils.py b/Week 03/Assignment 3/utils/parser_utils.py new file mode 100755 index 0000000..c559a58 --- /dev/null +++ b/Week 03/Assignment 3/utils/parser_utils.py @@ -0,0 +1,422 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +CS224N 2018-19: Homework 3 +parser_utils.py: Utilities for training the dependency parser. +Sahil Chopra +""" + +import time +import os +import logging +from collections import Counter +from . general_utils import get_minibatches +from parser_transitions import minibatch_parse + +from tqdm import tqdm +import torch +import numpy as np + +P_PREFIX = '

:' +L_PREFIX = ':' +UNK = '' +NULL = '' +ROOT = '' + + +class Config(object): + language = 'english' + with_punct = True + unlabeled = True + lowercase = True + use_pos = True + use_dep = True + use_dep = use_dep and (not unlabeled) + data_path = './data' + train_file = 'train.conll' + dev_file = 'dev.conll' + test_file = 'test.conll' + embedding_file = './data/en-cw.txt' + + +class Parser(object): + """Contains everything needed for transition-based dependency parsing except for the model""" + + def __init__(self, dataset): + root_labels = list([l for ex in dataset + for (h, l) in zip(ex['head'], ex['label']) if h == 0]) + counter = Counter(root_labels) + if len(counter) > 1: + logging.info('Warning: more than one root label') + logging.info(counter) + self.root_label = counter.most_common()[0][0] + deprel = [self.root_label] + list(set([w for ex in dataset + for w in ex['label'] + if w != self.root_label])) + tok2id = {L_PREFIX + l: i for (i, l) in enumerate(deprel)} + tok2id[L_PREFIX + NULL] = self.L_NULL = len(tok2id) + + config = Config() + self.unlabeled = config.unlabeled + self.with_punct = config.with_punct + self.use_pos = config.use_pos + self.use_dep = config.use_dep + self.language = config.language + + if self.unlabeled: + trans = ['L', 'R', 'S'] + self.n_deprel = 1 + else: + trans = ['L-' + l for l in deprel] + ['R-' + l for l in deprel] + ['S'] + self.n_deprel = len(deprel) + + self.n_trans = len(trans) + self.tran2id = {t: i for (i, t) in enumerate(trans)} + self.id2tran = {i: t for (i, t) in enumerate(trans)} + + # logging.info('Build dictionary for part-of-speech tags.') + tok2id.update(build_dict([P_PREFIX + w for ex in dataset for w in ex['pos']], + offset=len(tok2id))) + tok2id[P_PREFIX + UNK] = self.P_UNK = len(tok2id) + tok2id[P_PREFIX + NULL] = self.P_NULL = len(tok2id) + tok2id[P_PREFIX + ROOT] = self.P_ROOT = len(tok2id) + + # logging.info('Build dictionary for words.') + tok2id.update(build_dict([w for ex in dataset for w in ex['word']], + offset=len(tok2id))) + tok2id[UNK] = self.UNK = len(tok2id) + tok2id[NULL] = self.NULL = len(tok2id) + tok2id[ROOT] = self.ROOT = len(tok2id) + + self.tok2id = tok2id + self.id2tok = {v: k for (k, v) in tok2id.items()} + + self.n_features = 18 + (18 if config.use_pos else 0) + (12 if config.use_dep else 0) + self.n_tokens = len(tok2id) + + def vectorize(self, examples): + vec_examples = [] + for ex in examples: + word = [self.ROOT] + [self.tok2id[w] if w in self.tok2id + else self.UNK for w in ex['word']] + pos = [self.P_ROOT] + [self.tok2id[P_PREFIX + w] if P_PREFIX + w in self.tok2id + else self.P_UNK for w in ex['pos']] + head = [-1] + ex['head'] + label = [-1] + [self.tok2id[L_PREFIX + w] if L_PREFIX + w in self.tok2id + else -1 for w in ex['label']] + vec_examples.append({'word': word, 'pos': pos, + 'head': head, 'label': label}) + return vec_examples + + def extract_features(self, stack, buf, arcs, ex): + if stack[0] == "ROOT": + stack[0] = 0 + + def get_lc(k): + return sorted([arc[1] for arc in arcs if arc[0] == k and arc[1] < k]) + + def get_rc(k): + return sorted([arc[1] for arc in arcs if arc[0] == k and arc[1] > k], + reverse=True) + + p_features = [] + l_features = [] + features = [self.NULL] * (3 - len(stack)) + [ex['word'][x] for x in stack[-3:]] + features += [ex['word'][x] for x in buf[:3]] + [self.NULL] * (3 - len(buf)) + if self.use_pos: + p_features = [self.P_NULL] * (3 - len(stack)) + [ex['pos'][x] for x in stack[-3:]] + p_features += [ex['pos'][x] for x in buf[:3]] + [self.P_NULL] * (3 - len(buf)) + + for i in range(2): + if i < len(stack): + k = stack[-i-1] + lc = get_lc(k) + rc = get_rc(k) + llc = get_lc(lc[0]) if len(lc) > 0 else [] + rrc = get_rc(rc[0]) if len(rc) > 0 else [] + + features.append(ex['word'][lc[0]] if len(lc) > 0 else self.NULL) + features.append(ex['word'][rc[0]] if len(rc) > 0 else self.NULL) + features.append(ex['word'][lc[1]] if len(lc) > 1 else self.NULL) + features.append(ex['word'][rc[1]] if len(rc) > 1 else self.NULL) + features.append(ex['word'][llc[0]] if len(llc) > 0 else self.NULL) + features.append(ex['word'][rrc[0]] if len(rrc) > 0 else self.NULL) + + if self.use_pos: + p_features.append(ex['pos'][lc[0]] if len(lc) > 0 else self.P_NULL) + p_features.append(ex['pos'][rc[0]] if len(rc) > 0 else self.P_NULL) + p_features.append(ex['pos'][lc[1]] if len(lc) > 1 else self.P_NULL) + p_features.append(ex['pos'][rc[1]] if len(rc) > 1 else self.P_NULL) + p_features.append(ex['pos'][llc[0]] if len(llc) > 0 else self.P_NULL) + p_features.append(ex['pos'][rrc[0]] if len(rrc) > 0 else self.P_NULL) + + if self.use_dep: + l_features.append(ex['label'][lc[0]] if len(lc) > 0 else self.L_NULL) + l_features.append(ex['label'][rc[0]] if len(rc) > 0 else self.L_NULL) + l_features.append(ex['label'][lc[1]] if len(lc) > 1 else self.L_NULL) + l_features.append(ex['label'][rc[1]] if len(rc) > 1 else self.L_NULL) + l_features.append(ex['label'][llc[0]] if len(llc) > 0 else self.L_NULL) + l_features.append(ex['label'][rrc[0]] if len(rrc) > 0 else self.L_NULL) + else: + features += [self.NULL] * 6 + if self.use_pos: + p_features += [self.P_NULL] * 6 + if self.use_dep: + l_features += [self.L_NULL] * 6 + + features += p_features + l_features + assert len(features) == self.n_features + return features + + def get_oracle(self, stack, buf, ex): + if len(stack) < 2: + return self.n_trans - 1 + + i0 = stack[-1] + i1 = stack[-2] + h0 = ex['head'][i0] + h1 = ex['head'][i1] + l0 = ex['label'][i0] + l1 = ex['label'][i1] + + if self.unlabeled: + if (i1 > 0) and (h1 == i0): + return 0 + elif (i1 >= 0) and (h0 == i1) and \ + (not any([x for x in buf if ex['head'][x] == i0])): + return 1 + else: + return None if len(buf) == 0 else 2 + else: + if (i1 > 0) and (h1 == i0): + return l1 if (l1 >= 0) and (l1 < self.n_deprel) else None + elif (i1 >= 0) and (h0 == i1) and \ + (not any([x for x in buf if ex['head'][x] == i0])): + return l0 + self.n_deprel if (l0 >= 0) and (l0 < self.n_deprel) else None + else: + return None if len(buf) == 0 else self.n_trans - 1 + + def create_instances(self, examples): + all_instances = [] + succ = 0 + for id, ex in enumerate(examples): + n_words = len(ex['word']) - 1 + + # arcs = {(h, t, label)} + stack = [0] + buf = [i + 1 for i in range(n_words)] + arcs = [] + instances = [] + for i in range(n_words * 2): + gold_t = self.get_oracle(stack, buf, ex) + if gold_t is None: + break + legal_labels = self.legal_labels(stack, buf) + assert legal_labels[gold_t] == 1 + instances.append((self.extract_features(stack, buf, arcs, ex), + legal_labels, gold_t)) + if gold_t == self.n_trans - 1: + stack.append(buf[0]) + buf = buf[1:] + elif gold_t < self.n_deprel: + arcs.append((stack[-1], stack[-2], gold_t)) + stack = stack[:-2] + [stack[-1]] + else: + arcs.append((stack[-2], stack[-1], gold_t - self.n_deprel)) + stack = stack[:-1] + else: + succ += 1 + all_instances += instances + + return all_instances + + def legal_labels(self, stack, buf): + labels = ([1] if len(stack) > 2 else [0]) * self.n_deprel + labels += ([1] if len(stack) >= 2 else [0]) * self.n_deprel + labels += [1] if len(buf) > 0 else [0] + return labels + + def parse(self, dataset, eval_batch_size=5000): + sentences = [] + sentence_id_to_idx = {} + for i, example in enumerate(dataset): + n_words = len(example['word']) - 1 + sentence = [j + 1 for j in range(n_words)] + sentences.append(sentence) + sentence_id_to_idx[id(sentence)] = i + + model = ModelWrapper(self, dataset, sentence_id_to_idx) + dependencies = minibatch_parse(sentences, model, eval_batch_size) + + UAS = all_tokens = 0.0 + with tqdm(total=len(dataset)) as prog: + for i, ex in enumerate(dataset): + head = [-1] * len(ex['word']) + for h, t, in dependencies[i]: + head[t] = h + for pred_h, gold_h, gold_l, pos in \ + zip(head[1:], ex['head'][1:], ex['label'][1:], ex['pos'][1:]): + assert self.id2tok[pos].startswith(P_PREFIX) + pos_str = self.id2tok[pos][len(P_PREFIX):] + if (self.with_punct) or (not punct(self.language, pos_str)): + UAS += 1 if pred_h == gold_h else 0 + all_tokens += 1 + prog.update(i + 1) + UAS /= all_tokens + return UAS, dependencies + + +class ModelWrapper(object): + def __init__(self, parser, dataset, sentence_id_to_idx): + self.parser = parser + self.dataset = dataset + self.sentence_id_to_idx = sentence_id_to_idx + + def predict(self, partial_parses): + mb_x = [self.parser.extract_features(p.stack, p.buffer, p.dependencies, + self.dataset[self.sentence_id_to_idx[id(p.sentence)]]) + for p in partial_parses] + mb_x = np.array(mb_x).astype('int32') + mb_x = torch.from_numpy(mb_x).long() + mb_l = [self.parser.legal_labels(p.stack, p.buffer) for p in partial_parses] + + pred = self.parser.model(mb_x) + pred = pred.detach().numpy() + pred = np.argmax(pred + 10000 * np.array(mb_l).astype('float32'), 1) + pred = ["S" if p == 2 else ("LA" if p == 0 else "RA") for p in pred] + return pred + + +def read_conll(in_file, lowercase=False, max_example=None): + examples = [] + with open(in_file) as f: + word, pos, head, label = [], [], [], [] + for line in f.readlines(): + sp = line.strip().split('\t') + if len(sp) == 10: + if '-' not in sp[0]: + word.append(sp[1].lower() if lowercase else sp[1]) + pos.append(sp[4]) + head.append(int(sp[6])) + label.append(sp[7]) + elif len(word) > 0: + examples.append({'word': word, 'pos': pos, 'head': head, 'label': label}) + word, pos, head, label = [], [], [], [] + if (max_example is not None) and (len(examples) == max_example): + break + if len(word) > 0: + examples.append({'word': word, 'pos': pos, 'head': head, 'label': label}) + return examples + + +def build_dict(keys, n_max=None, offset=0): + count = Counter() + for key in keys: + count[key] += 1 + ls = count.most_common() if n_max is None \ + else count.most_common(n_max) + + return {w[0]: index + offset for (index, w) in enumerate(ls)} + + +def punct(language, pos): + if language == 'english': + return pos in ["''", ",", ".", ":", "``", "-LRB-", "-RRB-"] + elif language == 'chinese': + return pos == 'PU' + elif language == 'french': + return pos == 'PUNC' + elif language == 'german': + return pos in ["$.", "$,", "$["] + elif language == 'spanish': + # http://nlp.stanford.edu/software/spanish-faq.shtml + return pos in ["f0", "faa", "fat", "fc", "fd", "fe", "fg", "fh", + "fia", "fit", "fp", "fpa", "fpt", "fs", "ft", + "fx", "fz"] + elif language == 'universal': + return pos == 'PUNCT' + else: + raise ValueError('language: %s is not supported.' % language) + + +def minibatches(data, batch_size): + x = np.array([d[0] for d in data]) + y = np.array([d[2] for d in data]) + one_hot = np.zeros((y.size, 3)) + one_hot[np.arange(y.size), y] = 1 + return get_minibatches([x, one_hot], batch_size) + + +def load_and_preprocess_data(reduced=True): + config = Config() + + print("Loading data...",) + start = time.time() + train_set = read_conll(os.path.join(config.data_path, config.train_file), + lowercase=config.lowercase) + dev_set = read_conll(os.path.join(config.data_path, config.dev_file), + lowercase=config.lowercase) + test_set = read_conll(os.path.join(config.data_path, config.test_file), + lowercase=config.lowercase) + if reduced: + train_set = train_set[:1000] + dev_set = dev_set[:500] + test_set = test_set[:500] + print("took {:.2f} seconds".format(time.time() - start)) + + print("Building parser...",) + start = time.time() + parser = Parser(train_set) + print("took {:.2f} seconds".format(time.time() - start)) + + print("Loading pretrained embeddings...",) + start = time.time() + word_vectors = {} + for line in open(config.embedding_file).readlines(): + sp = line.strip().split() + word_vectors[sp[0]] = [float(x) for x in sp[1:]] + embeddings_matrix = np.asarray(np.random.normal(0, 0.9, (parser.n_tokens, 50)), dtype='float32') + + for token in parser.tok2id: + i = parser.tok2id[token] + if token in word_vectors: + embeddings_matrix[i] = word_vectors[token] + elif token.lower() in word_vectors: + embeddings_matrix[i] = word_vectors[token.lower()] + print("took {:.2f} seconds".format(time.time() - start)) + + print("Vectorizing data...",) + start = time.time() + train_set = parser.vectorize(train_set) + dev_set = parser.vectorize(dev_set) + test_set = parser.vectorize(test_set) + print("took {:.2f} seconds".format(time.time() - start)) + + print("Preprocessing training data...",) + start = time.time() + train_examples = parser.create_instances(train_set) + print("took {:.2f} seconds".format(time.time() - start)) + + return parser, embeddings_matrix, train_examples, dev_set, test_set, + + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +if __name__ == '__main__': + pass diff --git a/Week 03/Linguistic Structure: Dependency Parsing/README.md b/Week 03/Linguistic Structure: Dependency Parsing/README.md new file mode 100644 index 0000000..e57f1fc --- /dev/null +++ b/Week 03/Linguistic Structure: Dependency Parsing/README.md @@ -0,0 +1,11 @@ +# Linguistic Structure: Dependency Parsing + +- [Slide](http://web.stanford.edu/class/cs224n/slides/cs224n-2020-lecture05-dep-parsing.pdf) +- [Note](http://web.stanford.edu/class/cs224n/readings/cs224n-2019-notes04-dependencyparsing.pdf) +- Suggested Readings: + 1. [Incrementality in Deterministic Dependency Parsing](https://www.aclweb.org/anthology/W/W04/W04-0308.pdf) + 2. [A Fast and Accurate Dependency Parser using Neural Networks](https://www.emnlp2014.org/papers/pdf/EMNLP2014082.pdf) + 3. [Dependency Parsing](http://www.morganclaypool.com/doi/abs/10.2200/S00169ED1V01Y200901HLT002) + 4. [Globally Normalized Transition-Based Neural Networks](https://arxiv.org/pdf/1603.06042.pdf) + 5. [Universal Stanford Dependencies: A cross-linguistic typology](http://nlp.stanford.edu/~manning/papers/USD_LREC14_UD_revision.pdf) + 6. [Universal Dependencies website](http://universaldependencies.org/) diff --git a/Week 03/Recurrent Neural Networks and Language Models/README.md b/Week 03/Recurrent Neural Networks and Language Models/README.md new file mode 100644 index 0000000..93898c3 --- /dev/null +++ b/Week 03/Recurrent Neural Networks and Language Models/README.md @@ -0,0 +1,9 @@ +# Recurrent Neural Networks and Language Models + +- [Slide](http://web.stanford.edu/class/cs224n/slides/cs224n-2020-lecture06-rnnlm.pdf) +- [Note](http://web.stanford.edu/class/cs224n/readings/cs224n-2019-notes05-LM_RNN.pdf) +- Suggested Readings: + 1. [N-gram Language Models](https://web.stanford.edu/~jurafsky/slp3/3.pdf) + 2. [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/) + 3. [Sequence Modeling: Recurrent and Recursive Neural Nets](http://www.deeplearningbook.org/contents/rnn.html) (Sections 10.1 and 10.2) + 4. [On Chomsky and the Two Cultures of Statistical Learning](http://norvig.com/chomsky.html)