forked from fanweng/Udacity-Sensor-Fusion-Nanodegree
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetect_objects_2.cpp
141 lines (120 loc) · 4.97 KB
/
detect_objects_2.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#include <iostream>
#include <numeric>
#include <fstream>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn.hpp>
#include "dataStructures.h"
using namespace std;
void detectObjects2()
{
// load image from file
cv::Mat img = cv::imread("../images/0000000000.png");
// load class names from file
string yoloBasePath = "../dat/yolo/";
string yoloClassesFile = yoloBasePath + "coco.names";
string yoloModelConfiguration = yoloBasePath + "yolov3.cfg";
string yoloModelWeights = yoloBasePath + "yolov3.weights";
vector<string> classes;
ifstream ifs(yoloClassesFile.c_str());
string line;
while (getline(ifs, line)) classes.push_back(line);
// load neural network
cv::dnn::Net net = cv::dnn::readNetFromDarknet(yoloModelConfiguration, yoloModelWeights);
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
// generate 4D blob from input image
cv::Mat blob;
double scalefactor = 1/255.0;
cv::Size size = cv::Size(608, 608);
cv::Scalar mean = cv::Scalar(0,0,0);
bool swapRB = false;
bool crop = false;
cv::dnn::blobFromImage(img, blob, scalefactor, size, mean, swapRB, crop);
// Get names of output layers
vector<cv::String> names;
vector<int> outLayers = net.getUnconnectedOutLayers(); // get indices of output layers, i.e. layers with unconnected outputs
vector<cv::String> layersNames = net.getLayerNames(); // get names of all layers in the network
names.resize(outLayers.size());
for (size_t i = 0; i < outLayers.size(); ++i) // Get the names of the output layers in names
{
names[i] = layersNames[outLayers[i] - 1];
}
// invoke forward propagation through network
vector<cv::Mat> netOutput;
net.setInput(blob);
net.forward(netOutput, names);
// Scan through all bounding boxes and keep only the ones with high confidence
float confThreshold = 0.30;
vector<int> classIds;
vector<float> confidences;
vector<cv::Rect> boxes;
for (size_t i = 0; i < netOutput.size(); ++i)
{
float* data = (float*)netOutput[i].data;
for (int j = 0; j < netOutput[i].rows; ++j, data += netOutput[i].cols)
{
cv::Mat scores = netOutput[i].row(j).colRange(5, netOutput[i].cols);
cv::Point classId;
double confidence;
// Get the value and location of the maximum score
cv::minMaxLoc(scores, 0, &confidence, 0, &classId);
if (confidence > confThreshold)
{
cv::Rect box; int cx, cy;
cx = (int)(data[0] * img.cols);
cy = (int)(data[1] * img.rows);
box.width = (int)(data[2] * img.cols);
box.height = (int)(data[3] * img.rows);
box.x = cx - box.width/2; // left
box.y = cy - box.height/2; // top
boxes.push_back(box);
classIds.push_back(classId.x);
confidences.push_back((float)confidence);
}
}
}
// perform non-maxima suppression
float nmsThreshold = 0.3; // Non-maximum suppression threshold
vector<int> indices;
cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
std::vector<BoundingBox> bBoxes;
for (auto it = indices.begin(); it != indices.end(); ++it)
{
BoundingBox bBox;
bBox.roi = boxes[*it];
bBox.classID = classIds[*it];
bBox.confidence = confidences[*it];
bBox.boxID = (int)bBoxes.size(); // zero-based unique identifier for this bounding box
bBoxes.push_back(bBox);
}
// show results
cv::Mat visImg = img.clone();
for (auto it = bBoxes.begin(); it != bBoxes.end(); ++it)
{
// Draw rectangle displaying the bounding box
int top, left, width, height;
top = (*it).roi.y;
left = (*it).roi.x;
width = (*it).roi.width;
height = (*it).roi.height;
cv::rectangle(visImg, cv::Point(left, top), cv::Point(left + width, top + height), cv::Scalar(0, 255, 0), 2);
string label = cv::format("%.2f", (*it).confidence);
label = classes[((*it).classID)] + ":" + label;
// Display label at the top of the bounding box
int baseLine;
cv::Size labelSize = getTextSize(label, cv::FONT_ITALIC, 0.3, 1, &baseLine);
top = max(top, labelSize.height);
rectangle(visImg, cv::Point(left, top - round(1.5 * labelSize.height)), cv::Point(left + round(1.5 * labelSize.width), top + baseLine), cv::Scalar(255, 255, 255), cv::FILLED);
cv::putText(visImg, label, cv::Point(left, top), cv::FONT_ITALIC, 0.3, cv::Scalar(0, 0, 0), 1);
}
string windowName = "Object classification";
cv::namedWindow( windowName, 1 );
cv::imshow( windowName, visImg );
cv::waitKey(0); // wait for key to be pressed
}
int main()
{
detectObjects2();
}