In this tutorial you will learn how to use opencv_dnn module for image classification by using GoogLeNet trained network from Caffe model zoo.
We will demonstrate results of this example on the following picture.
#include <fstream>
#include <sstream>
const char* keys =
"{ help h | | Print help message. }"
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
"{ model m | | Path to a binary file of model contains trained weights. "
"It could be a file with extensions .caffemodel (Caffe), "
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }"
"{ config c | | Path to a text file of model contains network configuration. "
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }"
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
"{ classes | | Optional path to a text file with names of classes. }"
"{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }"
"{ scale | 1 | Preprocess input image by multiplying on a scale factor. }"
"{ width | | Preprocess input image by resizing to a specific width. }"
"{ height | | Preprocess input image by resizing to a specific height. }"
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }"
"{ backend | 0 | Choose one of computation backends: "
"0: automatically (by default), "
"1: Halide language (, "
"2: Intel's Deep Learning Inference Engine (, "
"3: OpenCV implementation }"
"{ target | 0 | Choose one of target computation devices: "
"0: CPU target (by default), "
"1: OpenCL, "
"2: OpenCL fp16 (half-float precision), "
"3: VPU }";
using namespace dnn;
std::vector<std::string> classes;
int main(int argc, char** argv)
parser.about("Use this script to run classification deep learning networks using OpenCV.");
if (argc == 1 || parser.has("help"))
return 0;
float scale = parser.get<float>("scale");
bool swapRB = parser.get<bool>("rgb");
"width"), parser.has(
int inpWidth = parser.get<int>("width");
int inpHeight = parser.get<int>("height");
int backendId = parser.get<int>("backend");
int targetId = parser.get<int>("target");
if (parser.has("classes"))
std::string file = parser.get<
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
while (std::getline(ifs, line))
Net net =
readNet(model, config, framework);
static const std::string kWinName = "Deep learning image classification in OpenCV";
if (parser.has("input"))
cap >> frame;
if (frame.empty())
Mat prob = net.forward();
double confidence;
int classId = classIdPoint.
std::vector<double> layersTimes;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
label = format("%s: %.4f", (classes.empty() ? format("Class #%d", classId).c_str() :
return 0;