OpenCV  4.10.0-dev
Open Source Computer Vision
No Matches
#include <fstream>
#include <sstream>
#include <opencv2/dnn.hpp>
#include "common.hpp"
std::string keys =
"{ help h | | Print help message. }"
"{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }"
"{ zoo | models.yml | An optional path to file with preprocessing parameters }"
"{ device | 0 | camera device number. }"
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }"
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
"{ classes | | Optional path to a text file with names of classes. }"
"{ colors | | Optional path to a text file with colors for an every class. "
"An every color is represented with three values from 0 to 255 in BGR channels order. }"
"{ backend | 0 | Choose one of computation backends: "
"0: automatically (by default), "
"1: Halide language (, "
"2: Intel's Deep Learning Inference Engine (, "
"3: OpenCV implementation, "
"4: VKCOM, "
"5: CUDA }"
"{ target | 0 | Choose one of target computation devices: "
"0: CPU target (by default), "
"1: OpenCL, "
"2: OpenCL fp16 (half-float precision), "
"3: VPU, "
"4: Vulkan, "
"6: CUDA, "
"7: CUDA fp16 (half-float preprocess) }";
using namespace cv;
using namespace dnn;
std::vector<std::string> classes;
std::vector<Vec3b> colors;
void showLegend();
void colorizeSegmentation(const Mat &score, Mat &segm);
int main(int argc, char** argv)
CommandLineParser parser(argc, argv, keys);
const std::string modelName = parser.get<String>("@alias");
const std::string zooFile = parser.get<String>("zoo");
keys += genPreprocArguments(modelName, zooFile);
parser = CommandLineParser(argc, argv, keys);
parser.about("Use this script to run semantic segmentation deep learning networks using OpenCV.");
if (argc == 1 || parser.has("help"))
return 0;
float scale = parser.get<float>("scale");
Scalar mean = parser.get<Scalar>("mean");
bool swapRB = parser.get<bool>("rgb");
int inpWidth = parser.get<int>("width");
int inpHeight = parser.get<int>("height");
String model = findFile(parser.get<String>("model"));
String config = findFile(parser.get<String>("config"));
String framework = parser.get<String>("framework");
int backendId = parser.get<int>("backend");
int targetId = parser.get<int>("target");
// Open file with classes names.
if (parser.has("classes"))
std::string file = parser.get<String>("classes");
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
CV_Error(Error::StsError, "File " + file + " not found");
std::string line;
while (std::getline(ifs, line))
// Open file with colors.
if (parser.has("colors"))
std::string file = parser.get<String>("colors");
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
CV_Error(Error::StsError, "File " + file + " not found");
std::string line;
while (std::getline(ifs, line))
std::istringstream colorStr(line.c_str());
Vec3b color;
for (int i = 0; i < 3 && !colorStr.eof(); ++i)
colorStr >> color[i];
if (!parser.check())
return 1;
Net net = readNet(model, config, framework);
// Create a window
static const std::string kWinName = "Deep learning semantic segmentation in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
if (parser.has("input"))<String>("input"));
// Process frames.
Mat frame, blob;
while (waitKey(1) < 0)
cap >> frame;
if (frame.empty())
blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false);
Mat score = net.forward();
Mat segm;
colorizeSegmentation(score, segm);
resize(segm, segm, frame.size(), 0, 0, INTER_NEAREST);
addWeighted(frame, 0.1, segm, 0.9, 0.0, frame);
// Put efficiency information.
std::vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
imshow(kWinName, frame);
if (!classes.empty())
return 0;
void colorizeSegmentation(const Mat &score, Mat &segm)
const int rows = score.size[2];
const int cols = score.size[3];
const int chns = score.size[1];
if (colors.empty())
// Generate colors.
for (int i = 1; i < chns; ++i)
Vec3b color;
for (int j = 0; j < 3; ++j)
color[j] = (colors[i - 1][j] + rand() % 256) / 2;
else if (chns != (int)colors.size())
CV_Error(Error::StsError, format("Number of output classes does not match "
"number of colors (%d != %zu)", chns, colors.size()));
Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
Mat maxVal(rows, cols, CV_32FC1,;
for (int ch = 1; ch < chns; ch++)
for (int row = 0; row < rows; row++)
const float *ptrScore = score.ptr<float>(0, ch, row);
uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);
float *ptrMaxVal = maxVal.ptr<float>(row);
for (int col = 0; col < cols; col++)
if (ptrScore[col] > ptrMaxVal[col])
ptrMaxVal[col] = ptrScore[col];
ptrMaxCl[col] = (uchar)ch;
segm.create(rows, cols, CV_8UC3);
for (int row = 0; row < rows; row++)
const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
Vec3b *ptrSegm = segm.ptr<Vec3b>(row);
for (int col = 0; col < cols; col++)
ptrSegm[col] = colors[ptrMaxCl[col]];
void showLegend()
static const int kBlockHeight = 30;
static Mat legend;
if (legend.empty())
const int numClasses = (int)classes.size();
if ((int)colors.size() != numClasses)
CV_Error(Error::StsError, format("Number of output classes does not match "
"number of labels (%zu != %zu)", colors.size(), classes.size()));
legend.create(kBlockHeight * numClasses, 200, CV_8UC3);
for (int i = 0; i < numClasses; i++)
Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight);
putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255));
namedWindow("Legend", WINDOW_NORMAL);
imshow("Legend", legend);
Designed for command line parsing.
Definition utility.hpp:832
n-dimensional dense array class
Definition mat.hpp:812
Mat & setTo(InputArray value, InputArray mask=noArray())
Sets all or some of the array elements to the specified value.
MatSize size
Definition mat.hpp:2160
uchar * data
pointer to the data
Definition mat.hpp:2140
void create(int rows, int cols, int type)
Allocates new array data if needed.
uchar * ptr(int i0=0)
Returns a pointer to the specified matrix row.
Mat rowRange(int startrow, int endrow) const
Creates a matrix header for the specified row span.
bool empty() const
Returns true if the array has no elements.
Template class for specifying the size of an image or rectangle.
Definition types.hpp:335
Template class for short numerical vectors, a partial case of Matx.
Definition matx.hpp:369
Class for video capturing from video files, image sequences or cameras.
Definition videoio.hpp:731
virtual bool open(const String &filename, int apiPreference=CAP_ANY)
Opens a video file or a capturing device or an IP video stream for video capturing.
void addWeighted(InputArray src1, double alpha, InputArray src2, double beta, double gamma, OutputArray dst, int dtype=-1)
Calculates the weighted sum of two arrays.
std::string String
Definition cvstd.hpp:151
#define CV_32FC1
Definition interface.h:118
unsigned char uchar
Definition interface.h:51
#define CV_8UC1
Definition interface.h:88
#define CV_8UC3
Definition interface.h:90
String format(const char *fmt,...)
Returns a text string formatted using the printf-like expression.
#define CV_Error(code, msg)
Call the error handler.
Definition base.hpp:335
double getTickFrequency()
Returns the number of ticks per second.
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition base.hpp:359
Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size &size=Size(), const Scalar &mean=Scalar(), bool swapRB=false, bool crop=false, int ddepth=CV_32F)
Creates 4-dimensional blob from image. Optionally resizes and crops image from center,...
Net readNet(CV_WRAP_FILE_PATH const String &model, CV_WRAP_FILE_PATH const String &config="", const String &framework="")
Read deep learning network represented in one of the supported formats.
void imshow(const String &winname, InputArray mat)
Displays an image in the specified window.
int waitKey(int delay=0)
Waits for a pressed key.
void namedWindow(const String &winname, int flags=WINDOW_AUTOSIZE)
Creates a window.
void putText(InputOutputArray img, const String &text, Point org, int fontFace, double fontScale, Scalar color, int thickness=1, int lineType=LINE_8, bool bottomLeftOrigin=false)
Draws a text string.
void line(InputOutputArray img, Point pt1, Point pt2, const Scalar &color, int thickness=1, int lineType=LINE_8, int shift=0)
Draws a line segment connecting two points.
void resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation=INTER_LINEAR)
Resizes an image.
int main(int argc, char *argv[])
Definition highgui_qt.cpp:3
"black box" representation of the file storage associated with a file on disk.
Definition core.hpp:102