const char* keys =
"{ help h | | Print help message. }"
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
"{ model m | | Path to a binary .pb file contains trained detector network.}"
"{ ocr | | Path to a binary .pb or .onnx file contains trained recognition network.}"
"{ width | 320 | Preprocess input image by resizing to a specific width. It should be multiple by 32. }"
"{ height | 320 | Preprocess input image by resizing to a specific height. It should be multiple by 32. }"
"{ thr | 0.5 | Confidence threshold. }"
"{ nms | 0.4 | Non-maximum suppression threshold. }";
void decodeBoundingBoxes(
const Mat& scores,
const Mat& geometry,
float scoreThresh,
std::vector<RotatedRect>& detections, std::vector<float>& confidences);
void fourPointsTransform(
const Mat& frame,
Point2f vertices[4],
Mat& result);
void decodeText(
const Mat& scores, std::string& text);
int main(int argc, char** argv)
{
parser.about("Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of "
"EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)");
if (argc == 1 || parser.has("help"))
{
parser.printMessage();
return 0;
}
float confThreshold = parser.get<float>("thr");
float nmsThreshold = parser.get<float>("nms");
int inpWidth = parser.get<int>("width");
int inpHeight = parser.get<int>("height");
if (!parser.check())
{
parser.printErrors();
return 1;
}
if (!modelRecognition.empty())
recognizer =
readNet(modelRecognition);
bool openSuccess = parser.has(
"input") ? cap.
open(parser.get<
String>(
"input")) : cap.
open(0);
static const std::string kWinName = "EAST: An Efficient and Accurate Scene Text Detector";
std::vector<Mat> outs;
std::vector<String> outNames(2);
outNames[0] = "feature_fusion/Conv_7/Sigmoid";
outNames[1] = "feature_fusion/concat_3";
{
cap >> frame;
if (frame.empty())
{
break;
}
detector.setInput(blob);
detector.forward(outs, outNames);
std::vector<RotatedRect> boxes;
std::vector<float> confidences;
decodeBoundingBoxes(scores, geometry, confThreshold, boxes, confidences);
std::vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
Point2f ratio((
float)frame.cols / inpWidth, (
float)frame.rows / inpHeight);
for (size_t i = 0; i < indices.size(); ++i)
{
for (int j = 0; j < 4; ++j)
{
vertices[j].
x *= ratio.x;
vertices[j].
y *= ratio.y;
}
if (!modelRecognition.empty())
{
fourPointsTransform(frame, vertices, cropped);
std::string wordRecognized = "";
decodeText(result, wordRecognized);
}
for (int j = 0; j < 4; ++j)
line(frame, vertices[j], vertices[(j + 1) % 4],
Scalar(0, 255, 0), 1);
}
std::string label = format(
"Inference time: %.2f ms", tickMeter.
getTimeMilli());
}
return 0;
}
void decodeBoundingBoxes(
const Mat& scores,
const Mat& geometry,
float scoreThresh,
std::vector<RotatedRect>& detections, std::vector<float>& confidences)
{
detections.clear();
const int height = scores.
size[2];
const int width = scores.
size[3];
for (int y = 0; y < height; ++y)
{
const float* scoresData = scores.
ptr<
float>(0, 0, y);
const float* x0_data = geometry.
ptr<
float>(0, 0, y);
const float* x1_data = geometry.
ptr<
float>(0, 1, y);
const float* x2_data = geometry.
ptr<
float>(0, 2, y);
const float* x3_data = geometry.
ptr<
float>(0, 3, y);
const float* anglesData = geometry.
ptr<
float>(0, 4, y);
for (int x = 0; x < width; ++x)
{
float score = scoresData[x];
if (score < scoreThresh)
continue;
float offsetX = x * 4.0f, offsetY = y * 4.0f;
float angle = anglesData[x];
float h = x0_data[x] + x2_data[x];
float w = x1_data[x] + x3_data[x];
Point2f offset(offsetX + cosA * x1_data[x] + sinA * x2_data[x],
offsetY - sinA * x1_data[x] + cosA * x2_data[x]);
detections.push_back(r);
confidences.push_back(score);
}
}
}
void fourPointsTransform(
const Mat& frame,
Point2f vertices[4],
Mat& result)
{
};
}
void decodeText(
const Mat& scores, std::string& text)
{
static const std::string alphabet = "0123456789abcdefghijklmnopqrstuvwxyz";
std::vector<char> elements;
elements.reserve(scores.
size[0]);
for (
int rowIndex = 0; rowIndex < scoresMat.
rows; ++rowIndex)
{
if (p.
x > 0 && static_cast<size_t>(p.
x) <= alphabet.size())
{
elements.push_back(alphabet[p.
x - 1]);
}
else
{
elements.push_back('-');
}
}
if (elements.size() > 0 && elements[0] != '-')
text += elements[0];
for (size_t elementIndex = 1; elementIndex < elements.size(); ++elementIndex)
{
if (elementIndex > 0 && elements[elementIndex] != '-' &&
elements[elementIndex - 1] != elements[elementIndex])
{
text += elements[elementIndex];
}
}
}