#include <iostream>
#include <fstream>
#include "common.hpp"
const string about = "Use this script for Text Detection and Recognition using OpenCV. \n\n"
"Firstly, download required models using `download_models.py` (if not already done). Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to point to the directory where models are downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data.\n"
"To run:\n"
"\t Example: ./example_dnn_text_detection modelName(i.e. DB or East) --ocr_model=<path to VGG_CTC.onnx>\n\n"
"Detection model path can also be specified using --model argument. \n\n"
"Download ocr model using: python download_models.py OCR \n\n";
string keys =
"{ help h | | Print help message. }"
"{ input i | right.jpg | Path to an input image. }"
"{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }"
"{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }"
"{ ocr_model | | Path to a binary .onnx model for recognition. }"
"{ model | | Path to detection model file. }"
"{ thr | 0.5 | Confidence threshold for EAST detector. }"
"{ nms | 0.4 | Non-maximum suppression threshold for EAST detector. }"
"{ binaryThreshold bt | 0.3 | Confidence threshold for the binary map in DB detector. }"
"{ polygonThreshold pt | 0.5 | Confidence threshold for polygons in DB detector. }"
"{ maxCandidate max | 200 | Max candidates for polygons in DB detector. }"
"{ unclipRatio ratio | 2.0 | Unclip ratio for DB detector. }"
"{ vocabularyPath vp | alphabet_36.txt | Path to vocabulary file. }";
static void fourPointsTransform(
const Mat& frame,
const Point2f vertices[],
Mat& result);
static void processFrame(
const vector<vector<Point>>& detResults,
const std::string& ocr_model,
bool imreadRGB,
int fontSize,
int fontWeight,
const vector<std::string>& vocabulary
);
int main(
int argc,
char** argv) {
if (!parser.has("@alias") || parser.has("help"))
{
cout << about << endl;
parser.printMessage();
return -1;
}
const string modelName = parser.get<
String>(
"@alias");
const string zooFile = findFile(parser.get<
String>(
"zoo"));
keys += genPreprocArguments(modelName, zooFile, "");
keys += genPreprocArguments(modelName, zooFile, "ocr_");
parser.about(about);
if (sha1.empty()){
sha1 = parser.get<
String>(
"sha1");
}
String detModelPath = findModel(parser.get<
String>(
"model"), sha1);
String ocr = findModel(parser.get<
String>(
"ocr_model"), ocr_sha1);
int height = parser.get<int>("height");
int width = parser.get<int>("width");
bool imreadRGB = parser.get<bool>("rgb");
float binThresh = parser.get<float>("binaryThreshold");
float polyThresh = parser.get<float>("polygonThreshold");
double unclipRatio = parser.get<double>("unclipRatio");
uint maxCandidates = parser.get<
uint>(
"maxCandidate");
float confThreshold = parser.get<float>("thr");
float nmsThreshold = parser.get<float>("nms");
if (!parser.check()) {
parser.printErrors();
return 1;
}
vector<vector<Point>> detResults;
int stdSize = 20;
int stdWeight = 400;
int stdImgSize = 512;
int size = (stdSize*imgWidth)/stdImgSize;
int weight = (stdWeight*imgWidth)/stdImgSize;
if (modelName == "East") {
detector.setConfidenceThreshold(confThreshold)
.setNMSThreshold(nmsThreshold);
detector.setInputParams(1.0,
Size(width, height), mean,
true);
detector.detect(frame, detResults);
}
else if (modelName == "DB") {
detector.setBinaryThreshold(binThresh)
.setPolygonThreshold(polyThresh)
.setUnclipRatio(unclipRatio)
.setMaxCandidates(maxCandidates);
detector.setInputParams(1.0 / 255.0,
Size(width, height), mean);
detector.detect(frame, detResults);
}
else {
cout << "[ERROR]: Unsupported file config for the detector model. Valid values: east/db" << endl;
return 1;
}
ifstream vocFile;
vocFile.open(samples::findFile(vocPath));
std::string vocLine;
vector<std::string> vocabulary;
while (getline(vocFile, vocLine)) {
vocabulary.push_back(vocLine);
}
processFrame(frame, detResults, ocr, imreadRGB, board, fontFace, size, weight, vocabulary);
return 0;
}
static void fourPointsTransform(
const Mat& frame,
const Point2f vertices[],
Mat& result) {
};
}
void processFrame(
const vector<vector<Point>>& detResults,
const std::string& ocr_model,
bool imreadRGB,
int fontSize,
int fontWeight,
const vector<std::string>& vocabulary
) {
if (detResults.size() > 0) {
if (!imreadRGB) {
} else {
recInput = frame;
}
vector<vector<Point>> contours;
for (
uint i = 0; i < detResults.size(); i++) {
const auto& quadrangle = detResults[i];
contours.emplace_back(quadrangle);
vector<Point2f> quadrangle_2f;
for (int j = 0; j < 4; j++)
quadrangle_2f.emplace_back(detResults[i][j]);
fourPointsTransform(recInput, &quadrangle_2f[0], cropped);
if(!ocr_model.empty()){
recognizer.setVocabulary(vocabulary);
recognizer.setDecodeType("CTC-greedy");
double recScale = 1.0 / 127.5;
recognizer.setInputParams(recScale, recInputSize, recMean);
string recognitionResult = recognizer.recognize(cropped);
cout << i << ": '" << recognitionResult << "'" << endl;
putText(board, recognitionResult,
Point(detResults[i][1].x, detResults[i][0].y),
Scalar(0, 0, 0), fontFace, fontSize, fontWeight);
}
else{
cout << "[WARN] Please pass the path to the ocr model using --ocr_model to get the recognised text." << endl;
}
}
} else {
cout << "No Text Detected." << endl;
}
imshow(
"Text Detection and Recognition", stacked);
}
#define CV_CheckEQ(v1, v2, msg)
Supported values of these types: int, float, double.
Definition check.hpp:120
Designed for command line parsing.
Definition utility.hpp:890
Wrapper on top of a truetype/opentype/etc font, i.e. Freetype's FT_Face.
Definition imgproc.hpp:4991
n-dimensional dense array class
Definition mat.hpp:950
MatSize size
Definition mat.hpp:2447
int cols
Definition mat.hpp:2424
int rows
the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
Definition mat.hpp:2424
int type() const
Returns the type of a matrix element.
Template class for specifying the size of an image or rectangle.
Definition types.hpp:338
_Tp height
the height
Definition types.hpp:366
_Tp width
the width
Definition types.hpp:365
This class represents high-level API for text detection DL networks compatible with DB model.
Definition dnn.hpp:2029
This class represents high-level API for text detection DL networks compatible with EAST model.
Definition dnn.hpp:1968
This class represents high-level API for text recognition networks.
Definition dnn.hpp:1812
Scalar mean(InputArray src, InputArray mask=noArray())
Calculates an average (mean) of array elements.
void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
void hconcat(const Mat *src, size_t nsrc, OutputArray dst)
Applies horizontal concatenation to given matrices.
std::string String
Definition cvstd.hpp:151
uint32_t uint
Definition interface.h:42
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition exception.hpp:198
void imshow(const String &winname, InputArray mat)
Displays an image in the specified window.
int waitKey(int delay=0)
Waits for a pressed key.
CV_EXPORTS_W Mat imread(const String &filename, int flags=IMREAD_COLOR_BGR)
Loads an image from a file.
void cvtColor(InputArray src, OutputArray dst, int code, int dstCn=0, AlgorithmHint hint=cv::ALGO_HINT_DEFAULT)
Converts an image from one color space to another.
@ COLOR_BGR2GRAY
convert between RGB/BGR and grayscale, color conversions
Definition imgproc.hpp:559
void putText(InputOutputArray img, const String &text, Point org, int fontFace, double fontScale, Scalar color, int thickness=1, int lineType=LINE_8, bool bottomLeftOrigin=false)
Draws a text string.
void polylines(InputOutputArray img, InputArrayOfArrays pts, bool isClosed, const Scalar &color, int thickness=1, int lineType=LINE_8, int shift=0)
Draws several polygonal curves.
int main(int argc, char *argv[])
Definition highgui_qt.cpp:3
Definition all_layers.hpp:47
GOpaque< Size > size(const GMat &src)
Gets dimensions from Mat.