In this tutorial you will learn how to use opencv_dnn module for image classification by using GoogLeNet trained network from Caffe model zoo.
We will demonstrate results of this example on the following picture.
#include <fstream>
#include <sstream>
const char* keys =
"{ help h | | Print help message. }"
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
"{ model m | | Path to a binary file of model contains trained weights. "
"It could be a file with extensions .caffemodel (Caffe), "
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }"
"{ config c | | Path to a text file of model contains network configuration. "
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }"
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
"{ classes | | Optional path to a text file with names of classes. }"
"{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }"
"{ scale | 1 | Preprocess input image by multiplying on a scale factor. }"
"{ width | | Preprocess input image by resizing to a specific width. }"
"{ height | | Preprocess input image by resizing to a specific height. }"
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }"
"{ backend | 0 | Choose one of computation backends: "
"0: automatically (by default), "
"1: Halide language (http://halide-lang.org/), "
"2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
"3: OpenCV implementation }"
"{ target | 0 | Choose one of target computation devices: "
"0: CPU target (by default), "
"1: OpenCL, "
"2: OpenCL fp16 (half-float precision), "
"3: VPU }";
using namespace dnn;
std::vector<std::string> classes;
int main(int argc, char** argv)
{
parser.about("Use this script to run classification deep learning networks using OpenCV.");
if (argc == 1 || parser.has("help"))
{
parser.printMessage();
return 0;
}
float scale = parser.get<float>("scale");
bool swapRB = parser.get<bool>("rgb");
CV_Assert(parser.has(
"width"), parser.has(
"height"));
int inpWidth = parser.get<int>("width");
int inpHeight = parser.get<int>("height");
int backendId = parser.get<int>("backend");
int targetId = parser.get<int>("target");
if (parser.has("classes"))
{
std::string file = parser.get<
String>(
"classes");
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
while (std::getline(ifs, line))
{
classes.push_back(line);
}
}
Net net =
readNet(model, config, framework);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
static const std::string kWinName = "Deep learning image classification in OpenCV";
if (parser.has("input"))
else
{
cap >> frame;
if (frame.empty())
{
break;
}
net.setInput(blob);
Mat prob = net.forward();
double confidence;
int classId = classIdPoint.
x;
std::vector<double> layersTimes;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
label = format("%s: %.4f", (classes.empty() ? format("Class #%d", classId).c_str() :
classes[classId].c_str()),
confidence);
}
return 0;
}