OpenCV  3.0.0-rc1
Open Source Computer Vision
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
AKAZE and ORB planar tracking

Introduction

In this tutorial we will compare AKAZE and ORB local features using them to find matches between video frames and track object movements.

The algorithm is as follows:

frame.png

Data

To do the tracking we need a video and object position on the first frame.

You can download our example video and data from here.

To run the code you have to specify input and output video path and object bounding box.

./planar_tracking blais.mp4 result.avi blais_bb.xml.gz

Source Code

1 #include <opencv2/features2d.hpp>
2 #include <opencv2/videoio.hpp>
3 #include <opencv2/opencv.hpp>
4 #include <vector>
5 #include <iostream>
6 #include <iomanip>
7 
8 #include "stats.h" // Stats structure definition
9 #include "utils.h" // Drawing and printing functions
10 
11 using namespace std;
12 using namespace cv;
13 
14 const double akaze_thresh = 3e-4; // AKAZE detection threshold set to locate about 1000 keypoints
15 const double ransac_thresh = 2.5f; // RANSAC inlier threshold
16 const double nn_match_ratio = 0.8f; // Nearest-neighbour matching ratio
17 const int bb_min_inliers = 100; // Minimal number of inliers to draw bounding box
18 const int stats_update_period = 10; // On-screen statistics are updated every 10 frames
19 
20 class Tracker
21 {
22 public:
23  Tracker(Ptr<Feature2D> _detector, Ptr<DescriptorMatcher> _matcher) :
24  detector(_detector),
25  matcher(_matcher)
26  {}
27 
28  void setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats);
29  Mat process(const Mat frame, Stats& stats);
30  Ptr<Feature2D> getDetector() {
31  return detector;
32  }
33 protected:
34  Ptr<Feature2D> detector;
35  Ptr<DescriptorMatcher> matcher;
36  Mat first_frame, first_desc;
37  vector<KeyPoint> first_kp;
38  vector<Point2f> object_bb;
39 };
40 
41 void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats)
42 {
43  first_frame = frame.clone();
44  detector->detectAndCompute(first_frame, noArray(), first_kp, first_desc);
45  stats.keypoints = (int)first_kp.size();
46  drawBoundingBox(first_frame, bb);
47  putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4);
48  object_bb = bb;
49 }
50 
51 Mat Tracker::process(const Mat frame, Stats& stats)
52 {
53  vector<KeyPoint> kp;
54  Mat desc;
55  detector->detectAndCompute(frame, noArray(), kp, desc);
56  stats.keypoints = (int)kp.size();
57 
58  vector< vector<DMatch> > matches;
59  vector<KeyPoint> matched1, matched2;
60  matcher->knnMatch(first_desc, desc, matches, 2);
61  for(unsigned i = 0; i < matches.size(); i++) {
62  if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) {
63  matched1.push_back(first_kp[matches[i][0].queryIdx]);
64  matched2.push_back( kp[matches[i][0].trainIdx]);
65  }
66  }
67  stats.matches = (int)matched1.size();
68 
69  Mat inlier_mask, homography;
70  vector<KeyPoint> inliers1, inliers2;
71  vector<DMatch> inlier_matches;
72  if(matched1.size() >= 4) {
73  homography = findHomography(Points(matched1), Points(matched2),
74  RANSAC, ransac_thresh, inlier_mask);
75  }
76 
77  if(matched1.size() < 4 || homography.empty()) {
78  Mat res;
79  hconcat(first_frame, frame, res);
80  stats.inliers = 0;
81  stats.ratio = 0;
82  return res;
83  }
84  for(unsigned i = 0; i < matched1.size(); i++) {
85  if(inlier_mask.at<uchar>(i)) {
86  int new_i = static_cast<int>(inliers1.size());
87  inliers1.push_back(matched1[i]);
88  inliers2.push_back(matched2[i]);
89  inlier_matches.push_back(DMatch(new_i, new_i, 0));
90  }
91  }
92  stats.inliers = (int)inliers1.size();
93  stats.ratio = stats.inliers * 1.0 / stats.matches;
94 
95  vector<Point2f> new_bb;
96  perspectiveTransform(object_bb, new_bb, homography);
97  Mat frame_with_bb = frame.clone();
98  if(stats.inliers >= bb_min_inliers) {
99  drawBoundingBox(frame_with_bb, new_bb);
100  }
101  Mat res;
102  drawMatches(first_frame, inliers1, frame_with_bb, inliers2,
103  inlier_matches, res,
104  Scalar(255, 0, 0), Scalar(255, 0, 0));
105  return res;
106 }
107 
108 int main(int argc, char **argv)
109 {
110  if(argc < 4) {
111  cerr << "Usage: " << endl <<
112  "akaze_track input_path output_path bounding_box" << endl;
113  return 1;
114  }
115  VideoCapture video_in(argv[1]);
116  VideoWriter video_out(argv[2],
117  (int)video_in.get(CAP_PROP_FOURCC),
118  (int)video_in.get(CAP_PROP_FPS),
119  Size(2 * (int)video_in.get(CAP_PROP_FRAME_WIDTH),
120  2 * (int)video_in.get(CAP_PROP_FRAME_HEIGHT)));
121 
122  if(!video_in.isOpened()) {
123  cerr << "Couldn't open " << argv[1] << endl;
124  return 1;
125  }
126  if(!video_out.isOpened()) {
127  cerr << "Couldn't open " << argv[2] << endl;
128  return 1;
129  }
130 
131  vector<Point2f> bb;
132  FileStorage fs(argv[3], FileStorage::READ);
133  if(fs["bounding_box"].empty()) {
134  cerr << "Couldn't read bounding_box from " << argv[3] << endl;
135  return 1;
136  }
137  fs["bounding_box"] >> bb;
138 
139  Stats stats, akaze_stats, orb_stats;
140  Ptr<AKAZE> akaze = AKAZE::create();
141  akaze->setThreshold(akaze_thresh);
142  Ptr<ORB> orb = ORB::create();
143  orb->setMaxFeatures(stats.keypoints);
144  Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce-Hamming");
145  Tracker akaze_tracker(akaze, matcher);
146  Tracker orb_tracker(orb, matcher);
147 
148  Mat frame;
149  video_in >> frame;
150  akaze_tracker.setFirstFrame(frame, bb, "AKAZE", stats);
151  orb_tracker.setFirstFrame(frame, bb, "ORB", stats);
152 
153  Stats akaze_draw_stats, orb_draw_stats;
154  int frame_count = (int)video_in.get(CAP_PROP_FRAME_COUNT);
155  Mat akaze_res, orb_res, res_frame;
156  for(int i = 1; i < frame_count; i++) {
157  bool update_stats = (i % stats_update_period == 0);
158  video_in >> frame;
159 
160  akaze_res = akaze_tracker.process(frame, stats);
161  akaze_stats += stats;
162  if(update_stats) {
163  akaze_draw_stats = stats;
164  }
165 
166  orb->setMaxFeatures(stats.keypoints);
167  orb_res = orb_tracker.process(frame, stats);
168  orb_stats += stats;
169  if(update_stats) {
170  orb_draw_stats = stats;
171  }
172 
173  drawStatistics(akaze_res, akaze_draw_stats);
174  drawStatistics(orb_res, orb_draw_stats);
175  vconcat(akaze_res, orb_res, res_frame);
176  video_out << res_frame;
177  cout << i << "/" << frame_count - 1 << endl;
178  }
179  akaze_stats /= frame_count - 1;
180  orb_stats /= frame_count - 1;
181  printStatistics("AKAZE", akaze_stats);
182  printStatistics("ORB", orb_stats);
183  return 0;
184 }
bool empty() const
Returns true if the array has no elements.
Scalar_< double > Scalar
Definition: types.hpp:597
Definition: videoio.hpp:100
void drawMatches(InputArray img1, const std::vector< KeyPoint > &keypoints1, InputArray img2, const std::vector< KeyPoint > &keypoints2, const std::vector< DMatch > &matches1to2, InputOutputArray outImg, const Scalar &matchColor=Scalar::all(-1), const Scalar &singlePointColor=Scalar::all(-1), const std::vector< char > &matchesMask=std::vector< char >(), int flags=DrawMatchesFlags::DEFAULT)
Draws the found matches of keypoints from two images.
Video writer class.
Definition: videoio.hpp:598
Definition: videoio.hpp:99
Base abstract class for the long-term tracker:
Definition: tracker.hpp:529
Class for video capturing from video files, image sequences or cameras. The class provides C++ API fo...
Definition: videoio.hpp:429
virtual void setMaxFeatures(int maxFeatures)=0
void vconcat(const Mat *src, size_t nsrc, OutputArray dst)
Applies vertical concatenation to given matrices.
Definition: videoio.hpp:97
InputOutputArray noArray()
Mat clone() const
Creates a full copy of the array and the underlying data.
RANSAC algorithm.
Definition: calib3d.hpp:187
XML/YAML file storage class that encapsulates all the information necessary for writing or reading da...
Definition: persistence.hpp:298
unsigned char uchar
Definition: defs.h:284
Template class for smart pointers with shared ownership.
Definition: cvstd.hpp:283
Size2i Size
Definition: types.hpp:308
Definition: videoio.hpp:101
virtual void detectAndCompute(InputArray image, InputArray mask, std::vector< KeyPoint > &keypoints, OutputArray descriptors, bool useProvidedKeypoints=false)
virtual void setThreshold(double threshold)=0
Mat findHomography(InputArray srcPoints, InputArray dstPoints, int method=0, double ransacReprojThreshold=3, OutputArray mask=noArray(), const int maxIters=2000, const double confidence=0.995)
Finds a perspective transformation between two planes.
Definition: videoio.hpp:98
void hconcat(const Mat *src, size_t nsrc, OutputArray dst)
Applies horizontal concatenation to given matrices.
void putText(InputOutputArray img, const String &text, Point org, int fontFace, double fontScale, Scalar color, int thickness=1, int lineType=LINE_8, bool bottomLeftOrigin=false)
Draws a text string.
small size sans-serif font
Definition: core.hpp:214
int main(int argc, const char *argv[])
Definition: facerec_demo.cpp:67
n-dimensional dense array class
Definition: mat.hpp:726
_Tp & at(int i0=0)
Returns a reference to the specified array element.
Class for matching keypoint descriptors.
Definition: types.hpp:728
Point2i Point
Definition: types.hpp:181
void perspectiveTransform(InputArray src, OutputArray dst, InputArray m)
Performs the perspective matrix transformation of vectors.

Explanation

Tracker class

This class implements algorithm described abobve using given feature detector and descriptor matcher.

Results

You can watch the resulting video on youtube.

AKAZE statistics:

Matches 626
Inliers 410
Inlier ratio 0.58
Keypoints 1117

ORB statistics:

Matches 504
Inliers 319
Inlier ratio 0.56
Keypoints 1112