diff --git a/examples/protonect/CMakeLists.txt b/examples/protonect/CMakeLists.txt index 1c29e413d..983465b13 100644 --- a/examples/protonect/CMakeLists.txt +++ b/examples/protonect/CMakeLists.txt @@ -40,10 +40,7 @@ SET(LIBRARY_OUTPUT_PATH ${MY_DIR}/lib) FIND_PACKAGE(OpenCL) # dependencies -FIND_PACKAGE(OpenCV REQUIRED) - -# OpenCV -INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIR}) +FIND_PACKAGE(OpenCV) # LibUSB INCLUDE_DIRECTORIES("${MY_DIR}/../../depends/libusb/include/libusb-1.0/") @@ -94,7 +91,6 @@ SET(SOURCES SET(LIBRARIES usb-1.0 - ${OpenCV_LIBS} turbojpeg ${LIBFREENECT2_THREADING_LIBRARIES} ) diff --git a/examples/protonect/Protonect.cpp b/examples/protonect/Protonect.cpp index baac87cd5..b0199721a 100644 --- a/examples/protonect/Protonect.cpp +++ b/examples/protonect/Protonect.cpp @@ -28,11 +28,12 @@ #include #include -#include +//#include #include #include #include +#include bool protonect_shutdown = false; @@ -55,7 +56,7 @@ int main(int argc, char *argv[]) libfreenect2::Freenect2 freenect2; - libfreenect2::Freenect2Device *dev = freenect2.openDefaultDevice(); + libfreenect2::Freenect2Device *dev = freenect2.openDefaultDevice(new libfreenect2::CpuPacketPipeline()); if(dev == 0) { @@ -83,11 +84,11 @@ int main(int argc, char *argv[]) libfreenect2::Frame *ir = frames[libfreenect2::Frame::Ir]; libfreenect2::Frame *depth = frames[libfreenect2::Frame::Depth]; - cv::imshow("rgb", cv::Mat(rgb->height, rgb->width, CV_8UC3, rgb->data)); - cv::imshow("ir", cv::Mat(ir->height, ir->width, CV_32FC1, ir->data) / 20000.0f); - cv::imshow("depth", cv::Mat(depth->height, depth->width, CV_32FC1, depth->data) / 4500.0f); + //cv::imshow("rgb", cv::Mat(rgb->height, rgb->width, CV_8UC3, rgb->data)); + //cv::imshow("ir", cv::Mat(ir->height, ir->width, CV_32FC1, ir->data) / 20000.0f); + //cv::imshow("depth", cv::Mat(depth->height, depth->width, CV_32FC1, depth->data) / 4500.0f); - int key = cv::waitKey(1); + int key = 1;//cv::waitKey(1); protonect_shutdown = protonect_shutdown || (key > 0 && ((key & 0xFF) == 27)); // shutdown on escape listener.release(frames); diff --git a/examples/protonect/src/cpu_depth_packet_processor.cpp b/examples/protonect/src/cpu_depth_packet_processor.cpp index 269d39811..e9def2565 100644 --- a/examples/protonect/src/cpu_depth_packet_processor.cpp +++ b/examples/protonect/src/cpu_depth_packet_processor.cpp @@ -28,7 +28,6 @@ #include #include -#include #include #include @@ -37,6 +36,150 @@ #include #endif +#include +#include + +template +struct Vec +{ + ScalarT val[Size]; +}; + +template +struct Mat +{ +private: + bool owns_buffer; + unsigned char *buffer_, *buffer_end_; + int width_, height_, x_step, y_step; + + void allocate(int width, int height, unsigned char *external_buffer = 0) + { + this->width_ = width; + this->height_ = height; + x_step = sizeof(ScalarT); + y_step = width * x_step; + + owns_buffer = external_buffer == 0; + + if(owns_buffer) + { + buffer_ = new unsigned char[y_step * height]; + } + else + { + buffer_ = external_buffer; + } + buffer_end_ = buffer_ + (y_step * height); + } + + void deallocate() + { + if(owns_buffer && buffer_ != 0) + { + delete[] buffer_; + owns_buffer = false; + buffer_ = 0; + buffer_end_ = 0; + } + } + +public: + Mat() + { + } + + Mat(int height, int width) : owns_buffer(false), buffer_(0) + { + create(height, width); + } + + template + Mat(int height, int width, DataT *external_buffer) + { + allocate(width, height, reinterpret_cast(external_buffer)); + } + + ~Mat() + { + deallocate(); + } + + int width() const + { + return width_; + } + + int height() const + { + return height_; + } + + void create(int height, int width) + { + deallocate(); + allocate(width, height); + } + + void copyTo(Mat &other) const + { + other.create(height(), width()); + std::copy(buffer_, buffer_end_, other.buffer_); + } + + const ScalarT &at(int y, int x) const + { + return *ptr(y, x); + } + + ScalarT &at(int y, int x) + { + return *ptr(y, x); + } + + const ScalarT *ptr(int y, int x) const + { + return reinterpret_cast(buffer_ + y_step * y + x_step * x); + } + + ScalarT *ptr(int y, int x) + { + return reinterpret_cast(buffer_ + y_step * y + x_step * x); + } + + unsigned char* buffer() + { + return buffer_; + } + + int sizeInBytes() const + { + return buffer_end_ - buffer_; + } +}; + +template +void flipHorizontal(const Mat &in, Mat& out) +{ + in.copyTo(out); + + typedef unsigned char type; + + int linestep = out.sizeInBytes() / out.height() / sizeof(type); + + type *first_line = reinterpret_cast(out.buffer()), *last_line = reinterpret_cast(out.buffer()) + (out.height() - 1) * linestep; + + + for(int y = 0; y < out.height() / 2; ++y) + { + for(int x = 0; x < linestep; ++x, ++first_line, ++last_line) + { + std::swap(*first_line, *last_line); + } + last_line -= 2 * linestep; + } +} + namespace libfreenect2 { @@ -62,7 +205,8 @@ inline int bfi(int width, int offset, int src2, int src3) class CpuDepthPacketProcessorImpl { public: - cv::Mat p0_table0, p0_table1, p0_table2, x_table, z_table; + Mat p0_table0, p0_table1, p0_table2; + Mat x_table, z_table; int16_t lut11to16[2048]; @@ -99,12 +243,12 @@ class CpuDepthPacketProcessorImpl void startTiming() { - timing_current_start = cv::getTickCount(); + //timing_current_start = cv::getTickCount(); } void stopTiming() { - timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency(); + //timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency(); timing_acc_n += 1.0; if(timing_acc_n >= 100.0) @@ -190,24 +334,27 @@ class CpuDepthPacketProcessorImpl return lut11to16[((i1 | i2) & 2047)]; } - void fill_trig_tables(cv::Mat& p0table, float trig_table[512*424][6]) + void fillTrigTable(Mat &p0table, float trig_table[512*424][6]) { - for (int i = 0; i < 512*424; i++) - { - float p0 = -((float)p0table.at(i)) * 0.000031 * M_PI; + int i = 0; + + for(int y = 0; y < 424; ++y) + for(int x = 0; x < 512; ++x, ++i) + { + float p0 = -((float)p0table.at(y, x)) * 0.000031 * M_PI; - float tmp0 = p0 + params.phase_in_rad[0]; - float tmp1 = p0 + params.phase_in_rad[1]; - float tmp2 = p0 + params.phase_in_rad[2]; + float tmp0 = p0 + params.phase_in_rad[0]; + float tmp1 = p0 + params.phase_in_rad[1]; + float tmp2 = p0 + params.phase_in_rad[2]; - trig_table[i][0] = std::cos(tmp0); - trig_table[i][1] = std::cos(tmp1); - trig_table[i][2] = std::cos(tmp2); + trig_table[i][0] = std::cos(tmp0); + trig_table[i][1] = std::cos(tmp1); + trig_table[i][2] = std::cos(tmp2); - trig_table[i][3] = std::sin(-tmp0); - trig_table[i][4] = std::sin(-tmp1); - trig_table[i][5] = std::sin(-tmp2); - } + trig_table[i][3] = std::sin(-tmp0); + trig_table[i][4] = std::sin(-tmp1); + trig_table[i][5] = std::sin(-tmp2); + } } void processMeasurementTriple(float trig_table[512*424][6], float abMultiplierPerFrq, int x, int y, const int32_t* m, float* m_out) @@ -221,7 +368,7 @@ class CpuDepthPacketProcessorImpl float sin_negtmp1 = trig_table[offset][4]; float sin_negtmp2 = trig_table[offset][5]; - float zmultiplier = z_table.at(y, x); + float zmultiplier = z_table.at(y, x); bool cond0 = 0 < zmultiplier; bool cond1 = (m[0] == 32767 || m[1] == 32767 || m[2] == 32767) && cond0; @@ -283,9 +430,9 @@ class CpuDepthPacketProcessorImpl processMeasurementTriple(trig_table2, params.ab_multiplier_per_frq[2], x, y, m2_raw, m2_out); } - void filterPixelStage1(int x, int y, const cv::Mat& m, float* m_out, bool& bilateral_max_edge_test) + void filterPixelStage1(int x, int y, const Mat >& m, float* m_out, bool& bilateral_max_edge_test) { - const float *m_ptr = m.ptr(y, x); + const float *m_ptr = (m.ptr(y, x)->val); bilateral_max_edge_test = true; if(x < 1 || y < 1 || x > 510 || y > 422) @@ -338,7 +485,7 @@ class CpuDepthPacketProcessorImpl continue; } - const float *other_m_ptr = m.ptr(y + yi, x + xi) + offset; + const float *other_m_ptr = (m.ptr(y + yi, x + xi)->val) + offset; float other_norm2 = other_m_ptr[0] * other_m_ptr[0] + other_m_ptr[1] * other_m_ptr[1]; // TODO: maybe fix numeric problems when norm = 0 - original code uses reciprocal square root, which returns +inf for +0 float other_inv_norm = 1.0f / std::sqrt(other_norm2); @@ -487,8 +634,8 @@ class CpuDepthPacketProcessorImpl } // this seems to be the phase to depth mapping :) - float zmultiplier = z_table.at(y, x); - float xmultiplier = x_table.at(y, x); + float zmultiplier = z_table.at(y, x); + float xmultiplier = x_table.at(y, x); phase = 0 < phase ? phase + params.phase_offset : phase; @@ -520,9 +667,9 @@ class CpuDepthPacketProcessorImpl //ir_out[2] = std::min(m2[2] * ab_output_multiplier, 65535.0f); } - void filterPixelStage2(int x, int y, cv::Mat &m, bool max_edge_test_ok, float *depth_out) + void filterPixelStage2(int x, int y, Mat > &m, bool max_edge_test_ok, float *depth_out) { - cv::Vec3f &depth_and_ir_sum = m.at(y, x); + Vec &depth_and_ir_sum = m.at(y, x); float &raw_depth = depth_and_ir_sum.val[0], &ir_sum = depth_and_ir_sum.val[2]; if(raw_depth >= params.min_depth && raw_depth <= params.max_depth) @@ -541,7 +688,7 @@ class CpuDepthPacketProcessorImpl { if(yi == 0 && xi == 0) continue; - cv::Vec3f &other = m.at(y + yi, x + xi); + Vec &other = m.at(y + yi, x + xi); ir_sum_acc += other.val[2]; squared_ir_sum_acc += other.val[2] * other.val[2]; @@ -634,68 +781,69 @@ void CpuDepthPacketProcessor::loadP0TablesFromCommandResponse(unsigned char* buf if(impl_->flip_ptables) { - cv::flip(cv::Mat(424, 512, CV_16UC1, p0table->p0table0), impl_->p0_table0, 0); - cv::flip(cv::Mat(424, 512, CV_16UC1, p0table->p0table1), impl_->p0_table1, 0); - cv::flip(cv::Mat(424, 512, CV_16UC1, p0table->p0table2), impl_->p0_table2, 0); - - impl_->fill_trig_tables(impl_->p0_table0, impl_->trig_table0); - impl_->fill_trig_tables(impl_->p0_table1, impl_->trig_table1); - impl_->fill_trig_tables(impl_->p0_table2, impl_->trig_table2); + flipHorizontal(Mat(424, 512, p0table->p0table0), impl_->p0_table0); + flipHorizontal(Mat(424, 512, p0table->p0table1), impl_->p0_table1); + flipHorizontal(Mat(424, 512, p0table->p0table2), impl_->p0_table2); } else { - cv::Mat(424, 512, CV_16UC1, p0table->p0table0).copyTo(impl_->p0_table0); - cv::Mat(424, 512, CV_16UC1, p0table->p0table1).copyTo(impl_->p0_table1); - cv::Mat(424, 512, CV_16UC1, p0table->p0table2).copyTo(impl_->p0_table2); + Mat p00(424, 512, p0table->p0table0); + p00.copyTo(impl_->p0_table0); + Mat(424, 512, p0table->p0table1).copyTo(impl_->p0_table1); + Mat(424, 512, p0table->p0table2).copyTo(impl_->p0_table2); } + + impl_->fillTrigTable(impl_->p0_table0, impl_->trig_table0); + impl_->fillTrigTable(impl_->p0_table1, impl_->trig_table1); + impl_->fillTrigTable(impl_->p0_table2, impl_->trig_table2); } void CpuDepthPacketProcessor::loadP0TablesFromFiles(const char* p0_filename, const char* p1_filename, const char* p2_filename) { - cv::Mat p0_table0(424, 512, CV_16UC1); - if(!loadBufferFromFile2(p0_filename, p0_table0.data, p0_table0.total() * p0_table0.elemSize())) + Mat p0_table0(424, 512); + if(!loadBufferFromFile2(p0_filename, p0_table0.buffer(), p0_table0.sizeInBytes())) { std::cerr << "[CpuDepthPacketProcessor::loadP0TablesFromFiles] Loading p0table 0 from '" << p0_filename << "' failed!" << std::endl; } - cv::Mat p0_table1(424, 512, CV_16UC1); - if(!loadBufferFromFile2(p1_filename, p0_table1.data, p0_table1.total() * p0_table1.elemSize())) + Mat p0_table1(424, 512); + if(!loadBufferFromFile2(p1_filename, p0_table1.buffer(), p0_table1.sizeInBytes())) { std::cerr << "[CpuDepthPacketProcessor::loadP0TablesFromFiles] Loading p0table 1 from '" << p1_filename << "' failed!" << std::endl; } - cv::Mat p0_table2(424, 512, CV_16UC1); - if(!loadBufferFromFile2(p2_filename, p0_table2.data, p0_table2.total() * p0_table2.elemSize())) + Mat p0_table2(424, 512); + if(!loadBufferFromFile2(p2_filename, p0_table2.buffer(), p0_table2.sizeInBytes())) { std::cerr << "[CpuDepthPacketProcessor::loadP0TablesFromFiles] Loading p0table 2 from '" << p2_filename << "' failed!" << std::endl; } if(impl_->flip_ptables) { - cv::flip(p0_table0, impl_->p0_table0, 0); - cv::flip(p0_table1, impl_->p0_table1, 0); - cv::flip(p0_table2, impl_->p0_table2, 0); + flipHorizontal(p0_table0, impl_->p0_table0); + flipHorizontal(p0_table1, impl_->p0_table1); + flipHorizontal(p0_table2, impl_->p0_table2); - impl_->fill_trig_tables(impl_->p0_table0, impl_->trig_table0); - impl_->fill_trig_tables(impl_->p0_table1, impl_->trig_table1); - impl_->fill_trig_tables(impl_->p0_table2, impl_->trig_table2); + impl_->fillTrigTable(impl_->p0_table0, impl_->trig_table0); + impl_->fillTrigTable(impl_->p0_table1, impl_->trig_table1); + impl_->fillTrigTable(impl_->p0_table2, impl_->trig_table2); } else { - impl_->fill_trig_tables(p0_table0, impl_->trig_table0); - impl_->fill_trig_tables(p0_table1, impl_->trig_table1); - impl_->fill_trig_tables(p0_table2, impl_->trig_table2); + impl_->fillTrigTable(p0_table0, impl_->trig_table0); + impl_->fillTrigTable(p0_table1, impl_->trig_table1); + impl_->fillTrigTable(p0_table2, impl_->trig_table2); } } void CpuDepthPacketProcessor::loadXTableFromFile(const char* filename) { - impl_->x_table.create(424, 512, CV_32FC1); + impl_->x_table.create(424, 512); const unsigned char *data; size_t length; if(loadResource("xTable.bin", &data, &length)) { - std::copy(data, data + length, impl_->x_table.data); + std::copy(data, data + length, impl_->x_table.buffer()); } else { @@ -705,14 +853,14 @@ void CpuDepthPacketProcessor::loadXTableFromFile(const char* filename) void CpuDepthPacketProcessor::loadZTableFromFile(const char* filename) { - impl_->z_table.create(424, 512, CV_32FC1); + impl_->z_table.create(424, 512); const unsigned char *data; size_t length; if(loadResource("zTable.bin", &data, &length)) { - std::copy(data, data + length, impl_->z_table.data); + std::copy(data, data + length, impl_->z_table.buffer()); } else { @@ -739,11 +887,15 @@ void CpuDepthPacketProcessor::process(const DepthPacket &packet) { if(listener_ == 0) return; - impl_->startTiming(); + //impl_->startTiming(); - cv::Mat m = cv::Mat::zeros(424, 512, CV_32FC(9)), m_filtered = cv::Mat::zeros(424, 512, CV_32FC(9)), m_max_edge_test = cv::Mat::ones(424, 512, CV_8UC1); + Mat > + m(424, 512), + m_filtered(424, 512) + ; + Mat m_max_edge_test(424, 512); - float *m_ptr = m.ptr(); + float *m_ptr = (m.ptr(0, 0)->val); for(int y = 0; y < 424; ++y) for(int x = 0; x < 512; ++x, m_ptr += 9) @@ -754,8 +906,8 @@ void CpuDepthPacketProcessor::process(const DepthPacket &packet) // bilateral filtering if(impl_->enable_bilateral_filter) { - float *m_filtered_ptr = m_filtered.ptr(); - unsigned char *m_max_edge_test_ptr = m_max_edge_test.ptr(); + float *m_filtered_ptr = (m_filtered.ptr(0, 0)->val); + unsigned char *m_max_edge_test_ptr = m_max_edge_test.ptr(0, 0); for(int y = 0; y < 424; ++y) for(int x = 0; x < 512; ++x, m_filtered_ptr += 9, ++m_max_edge_test_ptr) @@ -765,39 +917,39 @@ void CpuDepthPacketProcessor::process(const DepthPacket &packet) *m_max_edge_test_ptr = max_edge_test_val ? 1 : 0; } - m_ptr = m_filtered.ptr(); + m_ptr = (m_filtered.ptr(0, 0)->val); } else { - m_ptr = m.ptr(); + m_ptr = (m.ptr(0, 0)->val); } - cv::Mat out_ir(424, 512, CV_32FC1, impl_->ir_frame->data), out_depth(424, 512, CV_32FC1, impl_->depth_frame->data); + Mat out_ir(424, 512, impl_->ir_frame->data), out_depth(424, 512, impl_->depth_frame->data); if(impl_->enable_edge_filter) { - cv::Mat depth_ir_sum(424, 512, CV_32FC3); - cv::Vec3f *depth_ir_sum_ptr = depth_ir_sum.ptr(); - unsigned char *m_max_edge_test_ptr = m_max_edge_test.ptr(); + Mat > depth_ir_sum(424, 512); + Vec *depth_ir_sum_ptr = depth_ir_sum.ptr(0, 0); + unsigned char *m_max_edge_test_ptr = m_max_edge_test.ptr(0, 0); for(int y = 0; y < 424; ++y) for(int x = 0; x < 512; ++x, m_ptr += 9, ++m_max_edge_test_ptr, ++depth_ir_sum_ptr) { float raw_depth, ir_sum; - impl_->processPixelStage2(x, y, m_ptr + 0, m_ptr + 3, m_ptr + 6, out_ir.ptr(423 - y, x), &raw_depth, &ir_sum); + impl_->processPixelStage2(x, y, m_ptr + 0, m_ptr + 3, m_ptr + 6, out_ir.ptr(423 - y, x), &raw_depth, &ir_sum); depth_ir_sum_ptr->val[0] = raw_depth; depth_ir_sum_ptr->val[1] = *m_max_edge_test_ptr == 1 ? raw_depth : 0; depth_ir_sum_ptr->val[2] = ir_sum; } - m_max_edge_test_ptr = m_max_edge_test.ptr(); + m_max_edge_test_ptr = m_max_edge_test.ptr(0, 0); for(int y = 0; y < 424; ++y) for(int x = 0; x < 512; ++x, ++m_max_edge_test_ptr) { - impl_->filterPixelStage2(x, y, depth_ir_sum, *m_max_edge_test_ptr == 1, out_depth.ptr(423 - y, x)); + impl_->filterPixelStage2(x, y, depth_ir_sum, *m_max_edge_test_ptr == 1, out_depth.ptr(423 - y, x)); } } else @@ -805,7 +957,7 @@ void CpuDepthPacketProcessor::process(const DepthPacket &packet) for(int y = 0; y < 424; ++y) for(int x = 0; x < 512; ++x, m_ptr += 9) { - impl_->processPixelStage2(x, y, m_ptr + 0, m_ptr + 3, m_ptr + 6, out_ir.ptr(423 - y, x), out_depth.ptr(423 - y, x), 0); + impl_->processPixelStage2(x, y, m_ptr + 0, m_ptr + 3, m_ptr + 6, out_ir.ptr(423 - y, x), out_depth.ptr(423 - y, x), 0); } } @@ -819,7 +971,7 @@ void CpuDepthPacketProcessor::process(const DepthPacket &packet) impl_->newDepthFrame(); } - impl_->stopTiming(); + //impl_->stopTiming(); } } /* namespace libfreenect2 */ diff --git a/examples/protonect/src/opencl_depth_packet_processor.cpp b/examples/protonect/src/opencl_depth_packet_processor.cpp index edda413af..7c1970804 100644 --- a/examples/protonect/src/opencl_depth_packet_processor.cpp +++ b/examples/protonect/src/opencl_depth_packet_processor.cpp @@ -28,7 +28,7 @@ #include #include -#include +//#include #include #include #include @@ -538,12 +538,12 @@ class OpenCLDepthPacketProcessorImpl void startTiming() { - timing_current_start = cv::getTickCount(); + //timing_current_start = cv::getTickCount(); } void stopTiming() { - timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency(); + //timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency(); timing_acc_n += 1.0; if(timing_acc_n >= 100.0) diff --git a/examples/protonect/src/turbo_jpeg_rgb_packet_processor.cpp b/examples/protonect/src/turbo_jpeg_rgb_packet_processor.cpp index 88f650c2b..d08256119 100644 --- a/examples/protonect/src/turbo_jpeg_rgb_packet_processor.cpp +++ b/examples/protonect/src/turbo_jpeg_rgb_packet_processor.cpp @@ -26,8 +26,9 @@ #include -#include +//#include #include +#include namespace libfreenect2 { @@ -78,12 +79,12 @@ class TurboJpegRgbPacketProcessorImpl void startTiming() { - timing_current_start = cv::getTickCount(); + //timing_current_start = cv::getTickCount(); } void stopTiming() { - timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency(); + //timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency(); timing_acc_n += 1.0; if(timing_acc_n >= 100.0) @@ -110,7 +111,7 @@ void TurboJpegRgbPacketProcessor::process(const RgbPacket &packet) { if(impl_->decompressor != 0 && listener_ != 0) { - impl_->startTiming(); + //impl_->startTiming(); int r = tjDecompress2(impl_->decompressor, packet.jpeg_buffer, packet.jpeg_buffer_length, impl_->frame->data, 1920, 1920 * tjPixelSize[TJPF_BGR], 1080, TJPF_BGR, 0); @@ -126,7 +127,7 @@ void TurboJpegRgbPacketProcessor::process(const RgbPacket &packet) std::cerr << "[TurboJpegRgbPacketProcessor::doProcess] Failed to decompress rgb image! TurboJPEG error: '" << tjGetErrorStr() << "'" << std::endl; } - impl_->stopTiming(); + //impl_->stopTiming(); } }