Open
Description
Describe the problem
I wrote a c++ file to read video frames(854x480) from an input video and am calling a siftjob on each frame. The code is below
// main.cpp
#include <opencv2/opencv.hpp>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <popsift/popsift.h>
#include <popsift/features.h>
#include <stdio.h>
#include <string>
#include <iostream>
using namespace cv;
using namespace std;
using std::string;
int main()
{
cudaDeviceReset();
std::clock_t start;
popsift::Config config;
PopSift PopSift(
config,
popsift::Config::ExtractingMode,
false ? PopSift::FloatImages : PopSift::ByteImages
);
string filename = "input.mp4";
VideoCapture cap(filename);
Mat frame;
if (!cap.isOpened())
{
std::cerr << "Couldn't open capture." << std::endl;
return -1;
}
for (;;)
{
cap >> frame;
if (frame.empty()) break;
unsigned char* dataMat = frame.data;
SiftJob* job = PopSift.enqueue(frame.cols, frame.rows, dataMat);
start = std::clock();
popsift::Features* feature_list = job->get();
std::cout << "Time: " << (std::clock() - start) / (double)(CLOCKS_PER_SEC / 1000) << " ms" << std::endl;
char c = cv::waitKey(10);
if (c == 27) break;
}
cap.release();
return 0;
}
Here's my printed device information
Device information:
Name: NVIDIA GeForce GTX 1660
Compute Capability: 7.5
Total device mem: 6441992192 B 6291008 kB 6143 MB
Per-block shared mem: 49152
Warp size: 32
Max threads per block: 1024
Max threads per SM(X): 1024
Max block sizes: {1024,1024,64}
Max grid sizes: {2147483647,65535,65535}
Number of SM(x)s: 22
Concurrent kernels: yes
Mapping host memory: yes
Unified addressing: yes
On my gtx1660 the time for job->get() takes ~200ms on the 854x480px frame.
If I add:
config.setDownsampling(0);
config.setFilterMaxExtrema(false);
I can bring down the time to ~65ms
Which is still slower than the python version of this code using
sift = cv2.SIFT_create()
sift.detect(frame,None)
Which only takes ~50ms
PopSift should be running much much faster according the paper? Is there something I'm missing?