In this tutorial you will learn how to use the reconstruction api for camera motion estimation:
#include <iostream>
#include <fstream>
static void help() {
cout
<< "\n------------------------------------------------------------------\n"
<< " This program shows the camera trajectory reconstruction capabilities\n"
<< " in the OpenCV Structure From Motion (SFM) module.\n"
<< " \n"
<< " Usage:\n"
<< " example_sfm_trajectory_reconstruction <path_to_tracks_file> <f> <cx> <cy>\n"
<< " where: is the tracks file absolute path into your system. \n"
<< " \n"
<< " The file must have the following format: \n"
<< " row1 : x1 y1 x2 y2 ... x36 y36 for track 1\n"
<< " row2 : x1 y1 x2 y2 ... x36 y36 for track 2\n"
<< " etc\n"
<< " \n"
<< " i.e. a row gives the 2D measured position of a point as it is tracked\n"
<< " through frames 1 to 36. If there is no match found in a view then x\n"
<< " and y are -1.\n"
<< " \n"
<< " Each row corresponds to a different point.\n"
<< " \n"
<< " f is the focal length in pixels. \n"
<< " cx is the image principal point x coordinates in pixels. \n"
<< " cy is the image principal point y coordinates in pixels. \n"
<< "------------------------------------------------------------------\n\n"
<< endl;
}
static void parser_2D_tracks(
const String &_filename, std::vector<Mat> &points2d )
{
ifstream myfile(_filename.c_str());
if (!myfile.is_open())
{
cout << "Unable to read file: " << _filename << endl;
exit(0);
} else {
double x, y;
string line_str;
int n_frames = 0, n_tracks = 0;
vector<vector<Vec2d> > tracks;
for ( ; getline(myfile,line_str); ++n_tracks)
{
istringstream
line(line_str);
vector<Vec2d> track;
for ( n_frames = 0; line >> x >> y; ++n_frames)
{
if ( x > 0 && y > 0)
track.push_back(
Vec2d(x,y));
else
track.push_back(
Vec2d(-1));
}
tracks.push_back(track);
}
for (int i = 0; i < n_frames; ++i)
{
for (int j = 0; j < n_tracks; ++j)
{
frame(0,j) = tracks[j][i][0];
frame(1,j) = tracks[j][i][1];
}
points2d.push_back(
Mat(frame));
}
myfile.close();
}
}
bool camera_pov = false;
{
camera_pov = !camera_pov;
}
int main(int argc, char** argv)
{
if ( argc != 5 )
{
help();
exit(0);
}
std::vector<Mat> points2d;
parser_2D_tracks( argv[1], points2d );
const double f = atof(argv[2]),
cx = atof(argv[3]), cy = atof(argv[4]);
0, f, cy,
0, 0, 1);
bool is_projective = true;
vector<Mat> Rs_est, ts_est, points3d_estimated;
reconstruct(points2d, Rs_est, ts_est, K, points3d_estimated, is_projective);
cout << "\n----------------------------\n" << endl;
cout << "Reconstruction: " << endl;
cout << "============================" << endl;
cout << "Estimated 3D points: " << points3d_estimated.size() << endl;
cout << "Estimated cameras: " << Rs_est.size() << endl;
cout << "Refined intrinsics: " << endl << K << endl << endl;
cout << "3D Visualization: " << endl;
cout << "============================" << endl;
viz::Viz3d window_est(
"Estimation Coordinate Frame");
window_est.setBackgroundColor();
window_est.registerKeyboardCallback(&keyboard_callback);
cout << "Recovering points ... ";
vector<Vec3f> point_cloud_est;
for (int i = 0; i < points3d_estimated.size(); ++i)
point_cloud_est.push_back(
Vec3f(points3d_estimated[i]));
cout << "[DONE]" << endl;
cout << "Recovering cameras ... ";
vector<Affine3d> path_est;
for (size_t i = 0; i < Rs_est.size(); ++i)
path_est.push_back(
Affine3d(Rs_est[i],ts_est[i]));
cout << "[DONE]" << endl;
cout << "Rendering Trajectory ... ";
cout << endl << "Press: " << endl;
cout << " 's' to switch the camera pov" << endl;
cout << " 'q' to close the windows " << endl;
if ( path_est.size() > 0 )
{
int idx = 0, forw = -1, n = static_cast<int>(path_est.size());
while(!window_est.wasStopped())
{
for (size_t i = 0; i < point_cloud_est.size(); ++i)
{
Vec3d point = point_cloud_est[i];
char buffer[50];
sprintf (buffer, "%d", static_cast<int>(i));
window_est.showWidget(
"Cube"+
String(buffer), cube_widget, point_pose);
}
if ( camera_pov )
window_est.setViewerPose(cam_pose);
else
{
window_est.showWidget(
"cameras_frames_and_lines_est",
viz::WTrajectory(path_est, viz::WTrajectory::PATH, 1.0, viz::Color::green()));
window_est.showWidget("CPW", cpw, cam_pose);
window_est.showWidget("CPW_FRUSTUM", cpw_frustum, cam_pose);
}
forw *= (idx==n || idx==0) ? -1: 1; idx += forw;
window_est.spinOnce(1, true);
window_est.removeAllWidgets();
}
}
return 0;
}
Firstly, we need to load the file containing the 2d points tracked over all the frames and construct the container to feed the reconstruction api. In this case the tracked 2d points will have the following structure, a vector of 2d points array, where each inner array represents a different frame. Every frame is composed by a list of 2d points which e.g. the first point in frame 1 is the same point in frame 2. If there is no point in a frame the assigned value will be (-1,-1):
Secondly, the built container will be used to feed the reconstruction api. It is important outline that the estimated results must be stored in a vector<Mat>:
Finally, the obtained results will be shown in Viz, in this case reproducing the camera with an oscillation effect.
In order to run this sample we need to specify the path to the tracked points file, the focal lenght of the camera in addition to the center projection coordinates (in pixels). You can find a sample file in samples/data/desktop_trakcks.txt
The following picture shows the obtained camera motion obtained from the tracked 2d points: