Skip to content

Commit

Permalink
Merge pull request BVLC#816 from shelhamer/pycaffe-labels-grayscale-a…
Browse files Browse the repository at this point in the history
…ttrs-examples

Improve and polish pycaffe
  • Loading branch information
shelhamer committed Aug 6, 2014
2 parents d1d499d + 0db9478 commit 52d7a48
Show file tree
Hide file tree
Showing 11 changed files with 257 additions and 196 deletions.
2 changes: 1 addition & 1 deletion examples/detection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"input": [
"!mkdir -p _temp\n",
"!echo `pwd`/images/fish-bike.jpg > _temp/det_input.txt\n",
"!../python/detect.py --crop_mode=selective_search --pretrained_model=imagenet/caffe_rcnn_imagenet_model --model_def=imagenet/rcnn_imagenet_deploy.prototxt --gpu _temp/det_input.txt _temp/det_output.h5"
"!../python/detect.py --crop_mode=selective_search --pretrained_model=imagenet/caffe_rcnn_imagenet_model --model_def=imagenet/rcnn_imagenet_deploy.prototxt --gpu --raw_scale=255 _temp/det_input.txt _temp/det_output.h5"
],
"language": "python",
"metadata": {},
Expand Down
92 changes: 38 additions & 54 deletions examples/filter_visualization.ipynb

Large diffs are not rendered by default.

79 changes: 36 additions & 43 deletions examples/imagenet_classification.ipynb

Large diffs are not rendered by default.

29 changes: 17 additions & 12 deletions examples/net_surgery.ipynb

Large diffs are not rendered by default.

36 changes: 23 additions & 13 deletions python/caffe/_caffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@


using namespace caffe; // NOLINT(build/namespaces)
using boost::python::dict;
using boost::python::extract;
using boost::python::len;
using boost::python::list;
Expand Down Expand Up @@ -274,6 +275,11 @@ struct CaffeNet {

// The pointer to the internal caffe::Net instant.
shared_ptr<Net<float> > net_;
// Input preprocessing configuration attributes.
dict mean_;
dict input_scale_;
dict raw_scale_;
dict channel_swap_;
// if taking input from an ndarray, we need to hold references
object input_data_;
object input_labels_;
Expand Down Expand Up @@ -311,19 +317,23 @@ BOOST_PYTHON_MODULE(_caffe) {
boost::python::class_<CaffeNet, shared_ptr<CaffeNet> >(
"Net", boost::python::init<string, string>())
.def(boost::python::init<string>())
.def("_forward", &CaffeNet::Forward)
.def("_backward", &CaffeNet::Backward)
.def("set_mode_cpu", &CaffeNet::set_mode_cpu)
.def("set_mode_gpu", &CaffeNet::set_mode_gpu)
.def("set_phase_train", &CaffeNet::set_phase_train)
.def("set_phase_test", &CaffeNet::set_phase_test)
.def("set_device", &CaffeNet::set_device)
.add_property("_blobs", &CaffeNet::blobs)
.add_property("layers", &CaffeNet::layers)
.add_property("inputs", &CaffeNet::inputs)
.add_property("outputs", &CaffeNet::outputs)
.def("_set_input_arrays", &CaffeNet::set_input_arrays)
.def("save", &CaffeNet::save);
.def("_forward", &CaffeNet::Forward)
.def("_backward", &CaffeNet::Backward)
.def("set_mode_cpu", &CaffeNet::set_mode_cpu)
.def("set_mode_gpu", &CaffeNet::set_mode_gpu)
.def("set_phase_train", &CaffeNet::set_phase_train)
.def("set_phase_test", &CaffeNet::set_phase_test)
.def("set_device", &CaffeNet::set_device)
.add_property("_blobs", &CaffeNet::blobs)
.add_property("layers", &CaffeNet::layers)
.add_property("inputs", &CaffeNet::inputs)
.add_property("outputs", &CaffeNet::outputs)
.add_property("mean", &CaffeNet::mean_)
.add_property("input_scale", &CaffeNet::input_scale_)
.add_property("raw_scale", &CaffeNet::raw_scale_)
.add_property("channel_swap", &CaffeNet::channel_swap_)
.def("_set_input_arrays", &CaffeNet::set_input_arrays)
.def("save", &CaffeNet::save);

boost::python::class_<CaffeBlob, CaffeBlobWrap>(
"Blob", boost::python::no_init)
Expand Down
36 changes: 22 additions & 14 deletions python/caffe/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ class Classifier(caffe.Net):
by scaling, center cropping, or oversampling.
"""
def __init__(self, model_file, pretrained_file, image_dims=None,
gpu=False, mean_file=None, input_scale=None, channel_swap=None):
gpu=False, mean=None, input_scale=None, raw_scale=None,
channel_swap=None):
"""
Take
image_dims: dimensions to scale input for cropping/sampling.
Default is to scale to net input size for whole-image crop.
gpu, mean_file, input_scale, channel_swap: convenience params for
setting mode, mean, input scale, and channel order.
Default is to scale to net input size for whole-image crop.
gpu, mean, input_scale, raw_scale, channel_swap: params for
preprocessing options.
"""
caffe.Net.__init__(self, model_file, pretrained_file)
self.set_phase_test()
Expand All @@ -30,11 +31,13 @@ def __init__(self, model_file, pretrained_file, image_dims=None,
else:
self.set_mode_cpu()

if mean_file:
self.set_mean(self.inputs[0], mean_file)
if input_scale:
if mean is not None:
self.set_mean(self.inputs[0], mean)
if input_scale is not None:
self.set_input_scale(self.inputs[0], input_scale)
if channel_swap:
if raw_scale is not None:
self.set_raw_scale(self.inputs[0], raw_scale)
if channel_swap is not None:
self.set_channel_swap(self.inputs[0], channel_swap)

self.crop_dims = np.array(self.blobs[self.inputs[0]].data.shape[2:])
Expand All @@ -57,24 +60,29 @@ def predict(self, inputs, oversample=True):
for N images and C classes.
"""
# Scale to standardize input dimensions.
inputs = np.asarray([caffe.io.resize_image(im, self.image_dims)
for im in inputs])
input_ = np.zeros((len(inputs),
self.image_dims[0], self.image_dims[1], inputs[0].shape[2]),
dtype=np.float32)
for ix, in_ in enumerate(inputs):
input_[ix] = caffe.io.resize_image(in_, self.image_dims)

if oversample:
# Generate center, corner, and mirrored crops.
inputs = caffe.io.oversample(inputs, self.crop_dims)
input_ = caffe.io.oversample(input_, self.crop_dims)
else:
# Take center crop.
center = np.array(self.image_dims) / 2.0
crop = np.tile(center, (1, 2))[0] + np.concatenate([
-self.crop_dims / 2.0,
self.crop_dims / 2.0
])
inputs = inputs[:, crop[0]:crop[2], crop[1]:crop[3], :]
input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]

# Classify
caffe_in = np.asarray([self.preprocess(self.inputs[0], in_)
for in_ in inputs])
caffe_in = np.zeros(np.array(input_.shape)[[0,3,1,2]],
dtype=np.float32)
for ix, in_ in enumerate(input_):
caffe_in[ix] = self.preprocess(self.inputs[0], in_)
out = self.forward_all(**{self.inputs[0]: caffe_in})
predictions = out[self.outputs[0]].squeeze(axis=(2,3))

Expand Down
47 changes: 30 additions & 17 deletions python/caffe/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@ class Detector(caffe.Net):
Detector extends Net for windowed detection by a list of crops or
selective search proposals.
"""
def __init__(self, model_file, pretrained_file, gpu=False, mean_file=None,
input_scale=None, channel_swap=None, context_pad=None):
def __init__(self, model_file, pretrained_file, gpu=False, mean=None,
input_scale=None, raw_scale=None, channel_swap=None,
context_pad=None):
"""
Take
gpu, mean_file, input_scale, channel_swap: convenience params for
setting mode, mean, input scale, and channel order.
gpu, mean, input_scale, raw_scale, channel_swap: params for
preprocessing options.
context_pad: amount of surrounding context to take s.t. a `context_pad`
sized border of pixels in the network input image is context, as in
R-CNN feature extraction.
Expand All @@ -42,11 +43,13 @@ def __init__(self, model_file, pretrained_file, gpu=False, mean_file=None,
else:
self.set_mode_cpu()

if mean_file:
self.set_mean(self.inputs[0], mean_file)
if input_scale:
if mean is not None:
self.set_mean(self.inputs[0], mean)
if input_scale is not None:
self.set_input_scale(self.inputs[0], input_scale)
if channel_swap:
if raw_scale is not None:
self.set_raw_scale(self.inputs[0], raw_scale)
if channel_swap is not None:
self.set_channel_swap(self.inputs[0], channel_swap)

self.configure_crop(context_pad)
Expand All @@ -73,8 +76,11 @@ def detect_windows(self, images_windows):
window_inputs.append(self.crop(image, window))

# Run through the net (warping windows to input dimensions).
caffe_in = np.asarray([self.preprocess(self.inputs[0], window_in)
for window_in in window_inputs])
caffe_in = np.zeros((len(window_inputs), window_inputs[0].shape[2])
+ self.blobs[self.inputs[0]].data.shape[2:],
dtype=np.float32)
for ix, window_in in enumerate(window_inputs):
caffe_in[ix] = self.preprocess(self.inputs[0], window_in)
out = self.forward_all(**{self.inputs[0]: caffe_in})
predictions = out[self.outputs[0]].squeeze(axis=(2,3))

Expand Down Expand Up @@ -180,12 +186,19 @@ def configure_crop(self, context_pad):
"""
self.context_pad = context_pad
if self.context_pad:
input_scale = self.input_scale.get(self.inputs[0])
raw_scale = self.raw_scale.get(self.inputs[0])
channel_order = self.channel_swap.get(self.inputs[0])
# Padding context crops needs the mean in unprocessed input space.
self.crop_mean = self.mean[self.inputs[0]].copy()
self.crop_mean = self.crop_mean.transpose((1,2,0))
channel_order_inverse = [channel_order.index(i)
for i in range(self.crop_mean.shape[2])]
self.crop_mean = self.crop_mean[:,:, channel_order_inverse]
self.crop_mean /= input_scale
mean = self.mean.get(self.inputs[0])
if mean is not None:
crop_mean = mean.copy().transpose((1,2,0))
if channel_order is not None:
channel_order_inverse = [channel_order.index(i)
for i in range(crop_mean.shape[2])]
crop_mean = crop_mean[:,:, channel_order_inverse]
if raw_scale is not None:
crop_mean /= raw_scale
self.crop_mean = crop_mean
else:
self.crop_mean = np.zeros(self.blobs[self.inputs[0]].data.shape,
dtype=np.float32)
18 changes: 15 additions & 3 deletions python/caffe/io.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import skimage.io
import skimage.transform
from scipy.ndimage import zoom
from skimage.transform import resize

from caffe.proto import caffe_pb2

Expand All @@ -15,7 +16,8 @@ def load_image(filename, color=True):
loads as intensity (if image is already grayscale).
Give
image: an image with type np.float32 of size (H x W x 3) in RGB or
image: an image with type np.float32 in range [0, 1]
of size (H x W x 3) in RGB or
of size (H x W x 1) in grayscale.
"""
img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32)
Expand All @@ -40,7 +42,17 @@ def resize_image(im, new_dims, interp_order=1):
Give
im: resized ndarray with shape (new_dims[0], new_dims[1], K)
"""
return skimage.transform.resize(im, new_dims, order=interp_order)
if im.shape[-1] == 1 or im.shape[-1] == 3:
# skimage is fast but only understands {1,3} channel images in [0, 1].
im_min, im_max = im.min(), im.max()
im_std = (im - im_min) / (im_max - im_min)
resized_std = resize(im_std, new_dims, order=interp_order)
resized_im = resized_std * (im_max - im_min) + im_min
else:
# ndimage interpolates anything but more slowly.
scale = tuple(np.array(new_dims) / np.array(im.shape[:2]))
resized_im = zoom(im, scale + (1,), order=interp_order)
return resized_im.astype(np.float32)


def oversample(images, crop_dims):
Expand Down
Loading

0 comments on commit 52d7a48

Please sign in to comment.