Merge pull request #244 from davidbau/tutorial

Add a tutorial notebook.
CSAILVision · Aug 21, 2020 · e494c00 · e494c00
2 parents 4a4d88a + df30f12
commit e494c00
Show file tree

Hide file tree

Showing 11 changed files with 359 additions and 5 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+*.ipynb filter=clean_ipynb
diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,9 @@ vis/
 log/
 pretrained/
 
-.idea/
+.ipynb_checkpoints
+
+ADE_val*.jpg
+ADE_val*.png
+
+.idea/
diff --git a/demo_test.sh b/demo_test.sh
@@ -2,15 +2,16 @@
 
 # Image and model names
 TEST_IMG=ADE_val_00001519.jpg
-MODEL_PATH=ade20k-resnet50dilated-ppm_deepsup
+MODEL_NAME=ade20k-resnet50dilated-ppm_deepsup
+MODEL_PATH=ckpt/$MODEL_NAME
 RESULT_PATH=./
 
-ENCODER=$MODEL_PATH/encoder_epoch_20.pth
-DECODER=$MODEL_PATH/decoder_epoch_20.pth
+ENCODER=$MODEL_NAME/encoder_epoch_20.pth
+DECODER=$MODEL_NAME/decoder_epoch_20.pth
 
 # Download model weights and image
 if [ ! -e $MODEL_PATH ]; then
-  mkdir $MODEL_PATH
+  mkdir -p $MODEL_PATH
 fi
 if [ ! -e $ENCODER ]; then
   wget -P $MODEL_PATH http://sceneparsing.csail.mit.edu/model/pytorch/$ENCODER
@@ -22,10 +23,15 @@ if [ ! -e $TEST_IMG ]; then
   wget -P $RESULT_PATH http://sceneparsing.csail.mit.edu/data/ADEChallengeData2016/images/validation/$TEST_IMG
 fi
 
+if [ -z "$DOWNLOAD_ONLY" ]
+then
+
 # Inference
 python3 -u test.py \
   --imgs $TEST_IMG \
   --cfg config/ade20k-resnet50dilated-ppm_deepsup.yaml \
   DIR $MODEL_PATH \
   TEST.result ./ \
   TEST.checkpoint epoch_20.pth
+
+fi
diff --git a/notebooks/DemoSegmenter.ipynb b/notebooks/DemoSegmenter.ipynb
@@ -0,0 +1,225 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Semantic Segmentation Demo\n",
+    "\n",
+    "This is a notebook for running the benchmark semantic segmentation network from the the [ADE20K MIT Scene Parsing Benchchmark](http://sceneparsing.csail.mit.edu/).\n",
+    "\n",
+    "The code for this notebook is available here\n",
+    "https://github.com/davidbau/semantic-segmentation-pytorch/tree/tutorial/notebooks\n",
+    "\n",
+    "It can be run on Colab at this URL https://colab.research.google.com/github/davidbau/semantic-segmentation-pytorch/blob/tutorial/notebooks/DemoSegmenter.ipynb"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Environment Setup\n",
+    "\n",
+    "First, download the code and pretrained models if we are on colab."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "# Colab-specific setup\n",
+    "!(stat -t /usr/local/lib/*/dist-packages/google/colab > /dev/null 2>&1) && exit \n",
+    "pip install yacs 2>&1 >> install.log\n",
+    "git init 2>&1 >> install.log\n",
+    "git remote add origin https://github.com/davidbau/semantic-segmentation-pytorch.git 2>> install.log\n",
+    "git pull origin tutorial 2>&1 >> install.log\n",
+    "DOWNLOAD_ONLY=1 ./demo_test.sh 2>> install.log"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Imports and utility functions\n",
+    "\n",
+    "We need pytorch, numpy, and the code for the segmentation model.  And some utilities for visualizing the data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# System libs\n",
+    "import os, csv, torch, numpy, scipy.io, PIL.Image, torchvision.transforms\n",
+    "# Our libs\n",
+    "from mit_semseg.models import ModelBuilder, SegmentationModule\n",
+    "from mit_semseg.utils import colorEncode\n",
+    "\n",
+    "colors = scipy.io.loadmat('data/color150.mat')['colors']\n",
+    "names = {}\n",
+    "with open('data/object150_info.csv') as f:\n",
+    "    reader = csv.reader(f)\n",
+    "    next(reader)\n",
+    "    for row in reader:\n",
+    "        names[int(row[0])] = row[5].split(\";\")[0]\n",
+    "\n",
+    "def visualize_result(img, pred, index=None):\n",
+    "    # filter prediction class if requested\n",
+    "    if index is not None:\n",
+    "        pred = pred.copy()\n",
+    "        pred[pred != index] = -1\n",
+    "        print(f'{names[index+1]}:')\n",
+    "        \n",
+    "    # colorize prediction\n",
+    "    pred_color = colorEncode(pred, colors).astype(numpy.uint8)\n",
+    "\n",
+    "    # aggregate images and save\n",
+    "    im_vis = numpy.concatenate((img, pred_color), axis=1)\n",
+    "    display(PIL.Image.fromarray(im_vis))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading the segmentation model\n",
+    "\n",
+    "Here we load a pretrained segmentation model.  Like any pytorch model, we can call it like a function, or examine the parameters in all the layers.\n",
+    "\n",
+    "After loading, we put it on the GPU.  And since we are doing inference, not training, we put the model in eval mode."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Network Builders\n",
+    "net_encoder = ModelBuilder.build_encoder(\n",
+    "    arch='resnet50dilated',\n",
+    "    fc_dim=2048,\n",
+    "    weights='ckpt/ade20k-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth')\n",
+    "net_decoder = ModelBuilder.build_decoder(\n",
+    "    arch='ppm_deepsup',\n",
+    "    fc_dim=2048,\n",
+    "    num_class=150,\n",
+    "    weights='ckpt/ade20k-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth',\n",
+    "    use_softmax=True)\n",
+    "\n",
+    "crit = torch.nn.NLLLoss(ignore_index=-1)\n",
+    "segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)\n",
+    "segmentation_module.eval()\n",
+    "segmentation_module.cuda()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load test data\n",
+    "\n",
+    "Now we load and normalize a single test image.  Here we use the commonplace convention of normalizing the image to a scale for which the RGB values of a large photo dataset would have zero mean and unit standard deviation.  (These numbers come from the imagenet dataset.)  With this normalization, the limiiting ranges of RGB values are within about (-2.2 to +2.7)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load and normalize one image as a singleton tensor batch\n",
+    "pil_to_tensor = torchvision.transforms.Compose([\n",
+    "    torchvision.transforms.ToTensor(),\n",
+    "    torchvision.transforms.Normalize(\n",
+    "        mean=[0.485, 0.456, 0.406], # These are RGB mean+std values\n",
+    "        std=[0.229, 0.224, 0.225])  # across a large photo dataset.\n",
+    "])\n",
+    "pil_image = PIL.Image.open('ADE_val_00001519.jpg').convert('RGB')\n",
+    "img_original = numpy.array(pil_image)\n",
+    "img_data = pil_to_tensor(pil_image)\n",
+    "singleton_batch = {'img_data': img_data[None].cuda()}\n",
+    "output_size = img_data.shape[1:]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run the Model\n",
+    "\n",
+    "Finally we just pass the test image to the segmentation model.\n",
+    "\n",
+    "The segmentation model is coded as a function that takes a dictionary as input, because it wants to know both the input batch image data as well as the desired output segmentation resolution.  We ask for full resolution output.\n",
+    "\n",
+    "Then we use the previously-defined visualize_result function to render the semgnatioon map."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# Run the segmentation at the highest resolution.\n",
+    "with torch.no_grad():\n",
+    "    scores = segmentation_module(singleton_batch, segSize=output_size)\n",
+    "    \n",
+    "# Get the predicted scores for each pixel\n",
+    "_, pred = torch.max(scores, dim=1)\n",
+    "pred = pred.cpu()[0].numpy()\n",
+    "visualize_result(img_original, pred)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Showing classes individually\n",
+    "\n",
+    "To see which colors are which, here we visualize individual classes, one at a time."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Top classes in answer\n",
+    "predicted_classes = numpy.bincount(pred.flatten()).argsort()[::-1]\n",
+    "for c in predicted_classes[:15]:\n",
+    "    visualize_result(img_original, pred, c)"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/ckpt b/notebooks/ckpt
@@ -0,0 +1 @@
+../ckpt
diff --git a/notebooks/config b/notebooks/config
@@ -0,0 +1 @@
+../config
diff --git a/notebooks/data b/notebooks/data
@@ -0,0 +1 @@
+../data
diff --git a/notebooks/ipynb_drop_output.py b/notebooks/ipynb_drop_output.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+
+"""
+Suppress output and prompt numbers in git version control.
+
+This script will tell git to ignore prompt numbers and cell output
+when looking at ipynb files UNLESS their metadata contains:
+
+    "git" : { "keep_output" : true }
+
+The notebooks themselves are not changed.
+
+See also this blogpost: http://pascalbugnion.net/blog/ipython-notebooks-and-git.html.
+
+Usage instructions
+==================
+
+1. Put this script in a directory that is on the system's path.
+   For future reference, I will assume you saved it in 
+   `~/scripts/ipynb_drop_output`.
+2. Make sure it is executable by typing the command
+   `chmod +x ~/scripts/ipynb_drop_output`.
+3. Register a filter for ipython notebooks by
+   putting the following line in `~/.config/git/attributes`:
+   `*.ipynb  filter=clean_ipynb`
+4. Connect this script to the filter by running the following 
+   git commands:
+
+   git config --global filter.clean_ipynb.clean ipynb_drop_output
+   git config --global filter.clean_ipynb.smudge cat
+
+To tell git NOT to ignore the output and prompts for a notebook,
+open the notebook's metadata (Edit > Edit Notebook Metadata). A
+panel should open containing the lines:
+
+    {
+        "name" : "",
+        "signature" : "some very long hash"
+    }
+
+Add an extra line so that the metadata now looks like:
+
+    {
+        "name" : "",
+        "signature" : "don't change the hash, but add a comma at the end of the line",
+        "git" : { "keep_outputs" : true }
+    }
+
+You may need to "touch" the notebooks for git to actually register a change, if
+your notebooks are already under version control.
+
+Notes
+=====
+
+Changed by David Bau to make stripping output the default.
+
+This script is inspired by http://stackoverflow.com/a/20844506/827862, but 
+lets the user specify whether the ouptut of a notebook should be kept
+in the notebook's metadata, and works for IPython v3.0.
+"""
+
+import sys
+import json
+
+nb = sys.stdin.read()
+
+json_in = json.loads(nb)
+nb_metadata = json_in["metadata"]
+keep_output = False
+if "git" in nb_metadata:
+    if "keep_outputs" in nb_metadata["git"] and nb_metadata["git"]["keep_outputs"]:
+        keep_output = True
+if keep_output:
+    sys.stdout.write(nb)
+    exit() 
+
+
+ipy_version = int(json_in["nbformat"])-1 # nbformat is 1 more than actual version.
+
+def strip_output_from_cell(cell):
+    if "outputs" in cell:
+        cell["outputs"] = []
+    if "prompt_number" in cell:
+        del cell["prompt_number"]
+    if "execution_count" in cell:
+        cell["execution_count"] = None
+
+
+if ipy_version == 2:
+    for sheet in json_in["worksheets"]:
+        for cell in sheet["cells"]:
+            strip_output_from_cell(cell)
+else:
+    for cell in json_in["cells"]:
+        strip_output_from_cell(cell)
+
+json.dump(json_in, sys.stdout, sort_keys=True, indent=1, separators=(",",": "))
diff --git a/notebooks/mit_semseg b/notebooks/mit_semseg
@@ -0,0 +1 @@
+../mit_semseg