diff --git a/examples/protonect/CMakeLists.txt b/examples/protonect/CMakeLists.txt
index 1c29e413d..983465b13 100644
--- a/examples/protonect/CMakeLists.txt
+++ b/examples/protonect/CMakeLists.txt
@@ -40,10 +40,7 @@ SET(LIBRARY_OUTPUT_PATH ${MY_DIR}/lib)
 FIND_PACKAGE(OpenCL)
 
 # dependencies
-FIND_PACKAGE(OpenCV REQUIRED)
-
-# OpenCV
-INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIR})
+FIND_PACKAGE(OpenCV)
 
 # LibUSB
 INCLUDE_DIRECTORIES("${MY_DIR}/../../depends/libusb/include/libusb-1.0/")
@@ -94,7 +91,6 @@ SET(SOURCES
 
 SET(LIBRARIES
   usb-1.0
-  ${OpenCV_LIBS}
   turbojpeg
   ${LIBFREENECT2_THREADING_LIBRARIES}
 )
diff --git a/examples/protonect/Protonect.cpp b/examples/protonect/Protonect.cpp
index baac87cd5..b0199721a 100644
--- a/examples/protonect/Protonect.cpp
+++ b/examples/protonect/Protonect.cpp
@@ -28,11 +28,12 @@
 #include <iostream>
 #include <signal.h>
 
-#include <opencv2/opencv.hpp>
+//#include <opencv2/opencv.hpp>
 
 #include <libfreenect2/libfreenect2.hpp>
 #include <libfreenect2/frame_listener_impl.h>
 #include <libfreenect2/threading.h>
+#include <libfreenect2/packet_pipeline.h>
 
 bool protonect_shutdown = false;
 
@@ -55,7 +56,7 @@ int main(int argc, char *argv[])
 
 
   libfreenect2::Freenect2 freenect2;
-  libfreenect2::Freenect2Device *dev = freenect2.openDefaultDevice();
+  libfreenect2::Freenect2Device *dev = freenect2.openDefaultDevice(new libfreenect2::CpuPacketPipeline());
 
   if(dev == 0)
   {
@@ -83,11 +84,11 @@ int main(int argc, char *argv[])
     libfreenect2::Frame *ir = frames[libfreenect2::Frame::Ir];
     libfreenect2::Frame *depth = frames[libfreenect2::Frame::Depth];
 
-    cv::imshow("rgb", cv::Mat(rgb->height, rgb->width, CV_8UC3, rgb->data));
-    cv::imshow("ir", cv::Mat(ir->height, ir->width, CV_32FC1, ir->data) / 20000.0f);
-    cv::imshow("depth", cv::Mat(depth->height, depth->width, CV_32FC1, depth->data) / 4500.0f);
+    //cv::imshow("rgb", cv::Mat(rgb->height, rgb->width, CV_8UC3, rgb->data));
+    //cv::imshow("ir", cv::Mat(ir->height, ir->width, CV_32FC1, ir->data) / 20000.0f);
+    //cv::imshow("depth", cv::Mat(depth->height, depth->width, CV_32FC1, depth->data) / 4500.0f);
 
-    int key = cv::waitKey(1);
+    int key = 1;//cv::waitKey(1);
     protonect_shutdown = protonect_shutdown || (key > 0 && ((key & 0xFF) == 27)); // shutdown on escape
 
     listener.release(frames);
diff --git a/examples/protonect/src/cpu_depth_packet_processor.cpp b/examples/protonect/src/cpu_depth_packet_processor.cpp
index 269d39811..e9def2565 100644
--- a/examples/protonect/src/cpu_depth_packet_processor.cpp
+++ b/examples/protonect/src/cpu_depth_packet_processor.cpp
@@ -28,7 +28,6 @@
 #include <libfreenect2/resource.h>
 #include <libfreenect2/protocol/response.h>
 
-#include <opencv2/opencv.hpp>
 #include <iostream>
 #include <fstream>
 
@@ -37,6 +36,150 @@
 #include <math.h>
 #endif
 
+#include <cmath>
+#include <limits>
+
+template<typename ScalarT, int Size>
+struct Vec
+{
+  ScalarT val[Size];
+};
+
+template<typename ScalarT>
+struct Mat
+{
+private:
+  bool owns_buffer;
+  unsigned char *buffer_, *buffer_end_;
+  int width_, height_, x_step, y_step;
+
+  void allocate(int width, int height, unsigned char *external_buffer = 0)
+  {
+    this->width_ = width;
+    this->height_ = height;
+    x_step = sizeof(ScalarT);
+    y_step = width * x_step;
+
+    owns_buffer = external_buffer == 0;
+
+    if(owns_buffer)
+    {
+      buffer_ = new unsigned char[y_step * height];
+    }
+    else
+    {
+      buffer_ = external_buffer;
+    }
+    buffer_end_ = buffer_ + (y_step * height);
+  }
+
+  void deallocate()
+  {
+    if(owns_buffer && buffer_ != 0)
+    {
+      delete[] buffer_;
+      owns_buffer = false;
+      buffer_ = 0;
+      buffer_end_ = 0;
+    }
+  }
+
+public:
+  Mat()
+  {
+  }
+
+  Mat(int height, int width) : owns_buffer(false), buffer_(0)
+  {
+    create(height, width);
+  }
+
+  template<typename DataT>
+  Mat(int height, int width, DataT *external_buffer)
+  {
+    allocate(width, height, reinterpret_cast<unsigned char *>(external_buffer));
+  }
+
+  ~Mat()
+  {
+    deallocate();
+  }
+
+  int width() const
+  {
+    return width_;
+  }
+  
+  int height() const
+  {
+    return height_;
+  }
+
+  void create(int height, int width)
+  {
+    deallocate();
+    allocate(width, height);
+  }
+
+  void copyTo(Mat<ScalarT> &other) const
+  {
+    other.create(height(), width());
+    std::copy(buffer_, buffer_end_, other.buffer_);
+  }
+
+  const ScalarT &at(int y, int x) const
+  {
+    return *ptr(y, x);
+  }
+
+  ScalarT &at(int y, int x)
+  {
+    return *ptr(y, x);
+  }
+
+  const ScalarT *ptr(int y, int x) const
+  {
+    return reinterpret_cast<const ScalarT *>(buffer_ + y_step * y + x_step * x);
+  }
+
+  ScalarT *ptr(int y, int x)
+  {
+    return reinterpret_cast<ScalarT *>(buffer_ + y_step * y + x_step * x);
+  }
+
+  unsigned char* buffer()
+  {
+    return buffer_;
+  }
+
+  int sizeInBytes() const
+  {
+    return buffer_end_ - buffer_;
+  }
+};
+
+template<typename ScalarT>
+void flipHorizontal(const Mat<ScalarT> &in, Mat<ScalarT>& out)
+{
+  in.copyTo(out);
+  
+  typedef unsigned char type;
+
+  int linestep = out.sizeInBytes() / out.height() / sizeof(type);
+
+  type *first_line = reinterpret_cast<type *>(out.buffer()), *last_line = reinterpret_cast<type *>(out.buffer()) + (out.height() - 1) * linestep;
+
+
+  for(int y = 0; y < out.height() / 2; ++y)
+  {
+    for(int x = 0; x < linestep; ++x, ++first_line, ++last_line)
+    {
+      std::swap(*first_line, *last_line);
+    }
+    last_line -= 2 * linestep;
+  }
+}
+
 namespace libfreenect2
 {
 
@@ -62,7 +205,8 @@ inline int bfi(int width, int offset, int src2, int src3)
 class CpuDepthPacketProcessorImpl
 {
 public:
-  cv::Mat p0_table0, p0_table1, p0_table2, x_table, z_table;
+  Mat<uint16_t> p0_table0, p0_table1, p0_table2;
+  Mat<float> x_table, z_table;
 
   int16_t lut11to16[2048];
 
@@ -99,12 +243,12 @@ class CpuDepthPacketProcessorImpl
 
   void startTiming()
   {
-    timing_current_start = cv::getTickCount();
+    //timing_current_start = cv::getTickCount();
   }
 
   void stopTiming()
   {
-    timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency();
+    //timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency();
     timing_acc_n += 1.0;
 
     if(timing_acc_n >= 100.0)
@@ -190,24 +334,27 @@ class CpuDepthPacketProcessorImpl
     return lut11to16[((i1 | i2) & 2047)];
   }
 
-  void fill_trig_tables(cv::Mat& p0table, float trig_table[512*424][6])
+  void fillTrigTable(Mat<uint16_t> &p0table, float trig_table[512*424][6])
   {
-    for (int i = 0; i < 512*424; i++)
-    {
-      float p0 = -((float)p0table.at<uint16_t>(i)) * 0.000031 * M_PI;
+    int i = 0;
+
+    for(int y = 0; y < 424; ++y)
+      for(int x = 0; x < 512; ++x, ++i)
+      {
+        float p0 = -((float)p0table.at(y, x)) * 0.000031 * M_PI;
 
-      float tmp0 = p0 + params.phase_in_rad[0];
-      float tmp1 = p0 + params.phase_in_rad[1];
-      float tmp2 = p0 + params.phase_in_rad[2];
+        float tmp0 = p0 + params.phase_in_rad[0];
+        float tmp1 = p0 + params.phase_in_rad[1];
+        float tmp2 = p0 + params.phase_in_rad[2];
 
-      trig_table[i][0] = std::cos(tmp0);
-      trig_table[i][1] = std::cos(tmp1);
-      trig_table[i][2] = std::cos(tmp2);
+        trig_table[i][0] = std::cos(tmp0);
+        trig_table[i][1] = std::cos(tmp1);
+        trig_table[i][2] = std::cos(tmp2);
 
-      trig_table[i][3] = std::sin(-tmp0);
-      trig_table[i][4] = std::sin(-tmp1);
-      trig_table[i][5] = std::sin(-tmp2);
-    }
+        trig_table[i][3] = std::sin(-tmp0);
+        trig_table[i][4] = std::sin(-tmp1);
+        trig_table[i][5] = std::sin(-tmp2);
+      }
   }
 
   void processMeasurementTriple(float trig_table[512*424][6], float abMultiplierPerFrq, int x, int y, const int32_t* m, float* m_out)
@@ -221,7 +368,7 @@ class CpuDepthPacketProcessorImpl
     float sin_negtmp1 = trig_table[offset][4];
     float sin_negtmp2 = trig_table[offset][5];
 
-    float zmultiplier = z_table.at<float>(y, x);
+    float zmultiplier = z_table.at(y, x);
     bool cond0 = 0 < zmultiplier;
     bool cond1 = (m[0] == 32767 || m[1] == 32767 || m[2] == 32767) && cond0;
 
@@ -283,9 +430,9 @@ class CpuDepthPacketProcessorImpl
     processMeasurementTriple(trig_table2, params.ab_multiplier_per_frq[2], x, y, m2_raw, m2_out);
   }
 
-  void filterPixelStage1(int x, int y, const cv::Mat& m, float* m_out, bool& bilateral_max_edge_test)
+  void filterPixelStage1(int x, int y, const Mat<Vec<float, 9> >& m, float* m_out, bool& bilateral_max_edge_test)
   {
-    const float *m_ptr = m.ptr<float>(y, x);
+    const float *m_ptr = (m.ptr(y, x)->val);
     bilateral_max_edge_test = true;
 
     if(x < 1 || y < 1 || x > 510 || y > 422)
@@ -338,7 +485,7 @@ class CpuDepthPacketProcessorImpl
               continue;
             }
 
-            const float *other_m_ptr = m.ptr<float>(y + yi, x + xi) + offset;
+            const float *other_m_ptr = (m.ptr(y + yi, x + xi)->val) + offset;
             float other_norm2 = other_m_ptr[0] * other_m_ptr[0] + other_m_ptr[1] * other_m_ptr[1];
             // TODO: maybe fix numeric problems when norm = 0 - original code uses reciprocal square root, which returns +inf for +0
             float other_inv_norm = 1.0f / std::sqrt(other_norm2);
@@ -487,8 +634,8 @@ class CpuDepthPacketProcessorImpl
     }
 
     // this seems to be the phase to depth mapping :)
-    float zmultiplier = z_table.at<float>(y, x);
-    float xmultiplier = x_table.at<float>(y, x);
+    float zmultiplier = z_table.at(y, x);
+    float xmultiplier = x_table.at(y, x);
 
     phase = 0 < phase ? phase + params.phase_offset : phase;
 
@@ -520,9 +667,9 @@ class CpuDepthPacketProcessorImpl
     //ir_out[2] = std::min(m2[2] * ab_output_multiplier, 65535.0f);
   }
 
-  void filterPixelStage2(int x, int y, cv::Mat &m, bool max_edge_test_ok, float *depth_out)
+  void filterPixelStage2(int x, int y, Mat<Vec<float, 3> > &m, bool max_edge_test_ok, float *depth_out)
   {
-    cv::Vec3f &depth_and_ir_sum = m.at<cv::Vec3f>(y, x);
+    Vec<float, 3> &depth_and_ir_sum = m.at(y, x);
     float &raw_depth = depth_and_ir_sum.val[0], &ir_sum = depth_and_ir_sum.val[2];
 
     if(raw_depth >= params.min_depth && raw_depth <= params.max_depth)
@@ -541,7 +688,7 @@ class CpuDepthPacketProcessorImpl
           {
             if(yi == 0 && xi == 0) continue;
 
-            cv::Vec3f &other = m.at<cv::Vec3f>(y + yi, x + xi);
+            Vec<float, 3> &other = m.at(y + yi, x + xi);
 
             ir_sum_acc += other.val[2];
             squared_ir_sum_acc += other.val[2] * other.val[2];
@@ -634,68 +781,69 @@ void CpuDepthPacketProcessor::loadP0TablesFromCommandResponse(unsigned char* buf
 
   if(impl_->flip_ptables)
   {
-    cv::flip(cv::Mat(424, 512, CV_16UC1, p0table->p0table0), impl_->p0_table0, 0);
-    cv::flip(cv::Mat(424, 512, CV_16UC1, p0table->p0table1), impl_->p0_table1, 0);
-    cv::flip(cv::Mat(424, 512, CV_16UC1, p0table->p0table2), impl_->p0_table2, 0);
-
-    impl_->fill_trig_tables(impl_->p0_table0, impl_->trig_table0);
-    impl_->fill_trig_tables(impl_->p0_table1, impl_->trig_table1);
-    impl_->fill_trig_tables(impl_->p0_table2, impl_->trig_table2);
+    flipHorizontal(Mat<uint16_t>(424, 512, p0table->p0table0), impl_->p0_table0);
+    flipHorizontal(Mat<uint16_t>(424, 512, p0table->p0table1), impl_->p0_table1);
+    flipHorizontal(Mat<uint16_t>(424, 512, p0table->p0table2), impl_->p0_table2);
   }
   else
   {
-    cv::Mat(424, 512, CV_16UC1, p0table->p0table0).copyTo(impl_->p0_table0);
-    cv::Mat(424, 512, CV_16UC1, p0table->p0table1).copyTo(impl_->p0_table1);
-    cv::Mat(424, 512, CV_16UC1, p0table->p0table2).copyTo(impl_->p0_table2);
+    Mat<uint16_t> p00(424, 512, p0table->p0table0);
+    p00.copyTo(impl_->p0_table0);
+    Mat<uint16_t>(424, 512, p0table->p0table1).copyTo(impl_->p0_table1);
+    Mat<uint16_t>(424, 512, p0table->p0table2).copyTo(impl_->p0_table2);
   }
+
+  impl_->fillTrigTable(impl_->p0_table0, impl_->trig_table0);
+  impl_->fillTrigTable(impl_->p0_table1, impl_->trig_table1);
+  impl_->fillTrigTable(impl_->p0_table2, impl_->trig_table2);
 }
 void CpuDepthPacketProcessor::loadP0TablesFromFiles(const char* p0_filename, const char* p1_filename, const char* p2_filename)
 {
-  cv::Mat p0_table0(424, 512, CV_16UC1);
-  if(!loadBufferFromFile2(p0_filename, p0_table0.data, p0_table0.total() * p0_table0.elemSize()))
+  Mat<uint16_t> p0_table0(424, 512);
+  if(!loadBufferFromFile2(p0_filename, p0_table0.buffer(), p0_table0.sizeInBytes()))
   {
     std::cerr << "[CpuDepthPacketProcessor::loadP0TablesFromFiles] Loading p0table 0 from '" << p0_filename << "' failed!" << std::endl;
   }
 
-  cv::Mat p0_table1(424, 512, CV_16UC1);
-  if(!loadBufferFromFile2(p1_filename, p0_table1.data, p0_table1.total() * p0_table1.elemSize()))
+  Mat<uint16_t> p0_table1(424, 512);
+  if(!loadBufferFromFile2(p1_filename, p0_table1.buffer(), p0_table1.sizeInBytes()))
   {
     std::cerr << "[CpuDepthPacketProcessor::loadP0TablesFromFiles] Loading p0table 1 from '" << p1_filename << "' failed!" << std::endl;
   }
 
-  cv::Mat p0_table2(424, 512, CV_16UC1);
-  if(!loadBufferFromFile2(p2_filename, p0_table2.data, p0_table2.total() * p0_table2.elemSize()))
+  Mat<uint16_t> p0_table2(424, 512);
+  if(!loadBufferFromFile2(p2_filename, p0_table2.buffer(), p0_table2.sizeInBytes()))
   {
     std::cerr << "[CpuDepthPacketProcessor::loadP0TablesFromFiles] Loading p0table 2 from '" << p2_filename << "' failed!" << std::endl;
   }
 
   if(impl_->flip_ptables)
   {
-    cv::flip(p0_table0, impl_->p0_table0, 0);
-    cv::flip(p0_table1, impl_->p0_table1, 0);
-    cv::flip(p0_table2, impl_->p0_table2, 0);
+    flipHorizontal(p0_table0, impl_->p0_table0);
+    flipHorizontal(p0_table1, impl_->p0_table1);
+    flipHorizontal(p0_table2, impl_->p0_table2);
 
-    impl_->fill_trig_tables(impl_->p0_table0, impl_->trig_table0);
-    impl_->fill_trig_tables(impl_->p0_table1, impl_->trig_table1);
-    impl_->fill_trig_tables(impl_->p0_table2, impl_->trig_table2);
+    impl_->fillTrigTable(impl_->p0_table0, impl_->trig_table0);
+    impl_->fillTrigTable(impl_->p0_table1, impl_->trig_table1);
+    impl_->fillTrigTable(impl_->p0_table2, impl_->trig_table2);
   }
   else
   {
-    impl_->fill_trig_tables(p0_table0, impl_->trig_table0);
-    impl_->fill_trig_tables(p0_table1, impl_->trig_table1);
-    impl_->fill_trig_tables(p0_table2, impl_->trig_table2);
+    impl_->fillTrigTable(p0_table0, impl_->trig_table0);
+    impl_->fillTrigTable(p0_table1, impl_->trig_table1);
+    impl_->fillTrigTable(p0_table2, impl_->trig_table2);
   }
 }
 
 void CpuDepthPacketProcessor::loadXTableFromFile(const char* filename)
 {
-  impl_->x_table.create(424, 512, CV_32FC1);
+  impl_->x_table.create(424, 512);
   const unsigned char *data;
   size_t length;
 
   if(loadResource("xTable.bin", &data, &length))
   {
-    std::copy(data, data + length, impl_->x_table.data);
+    std::copy(data, data + length, impl_->x_table.buffer());
   }
   else
   {
@@ -705,14 +853,14 @@ void CpuDepthPacketProcessor::loadXTableFromFile(const char* filename)
 
 void CpuDepthPacketProcessor::loadZTableFromFile(const char* filename)
 {
-  impl_->z_table.create(424, 512, CV_32FC1);
+  impl_->z_table.create(424, 512);
 
   const unsigned char *data;
   size_t length;
 
   if(loadResource("zTable.bin", &data, &length))
   {
-    std::copy(data, data + length, impl_->z_table.data);
+    std::copy(data, data + length, impl_->z_table.buffer());
   }
   else
   {
@@ -739,11 +887,15 @@ void CpuDepthPacketProcessor::process(const DepthPacket &packet)
 {
   if(listener_ == 0) return;
 
-  impl_->startTiming();
+  //impl_->startTiming();
 
-  cv::Mat m = cv::Mat::zeros(424, 512, CV_32FC(9)), m_filtered = cv::Mat::zeros(424, 512, CV_32FC(9)), m_max_edge_test = cv::Mat::ones(424, 512, CV_8UC1);
+  Mat<Vec<float, 9> >
+      m(424, 512),
+      m_filtered(424, 512)
+  ;
+  Mat<unsigned char> m_max_edge_test(424, 512);
 
-  float *m_ptr = m.ptr<float>();
+  float *m_ptr = (m.ptr(0, 0)->val);
 
   for(int y = 0; y < 424; ++y)
     for(int x = 0; x < 512; ++x, m_ptr += 9)
@@ -754,8 +906,8 @@ void CpuDepthPacketProcessor::process(const DepthPacket &packet)
   // bilateral filtering
   if(impl_->enable_bilateral_filter)
   {
-    float *m_filtered_ptr = m_filtered.ptr<float>();
-    unsigned char *m_max_edge_test_ptr = m_max_edge_test.ptr<unsigned char>();
+    float *m_filtered_ptr = (m_filtered.ptr(0, 0)->val);
+    unsigned char *m_max_edge_test_ptr = m_max_edge_test.ptr(0, 0);
 
     for(int y = 0; y < 424; ++y)
       for(int x = 0; x < 512; ++x, m_filtered_ptr += 9, ++m_max_edge_test_ptr)
@@ -765,39 +917,39 @@ void CpuDepthPacketProcessor::process(const DepthPacket &packet)
         *m_max_edge_test_ptr = max_edge_test_val ? 1 : 0;
       }
 
-    m_ptr = m_filtered.ptr<float>();
+    m_ptr = (m_filtered.ptr(0, 0)->val);
   }
   else
   {
-    m_ptr = m.ptr<float>();
+    m_ptr = (m.ptr(0, 0)->val);
   }
 
-  cv::Mat out_ir(424, 512, CV_32FC1, impl_->ir_frame->data), out_depth(424, 512, CV_32FC1, impl_->depth_frame->data);
+  Mat<float> out_ir(424, 512, impl_->ir_frame->data), out_depth(424, 512, impl_->depth_frame->data);
 
   if(impl_->enable_edge_filter)
   {
-    cv::Mat depth_ir_sum(424, 512, CV_32FC3);
-    cv::Vec3f *depth_ir_sum_ptr = depth_ir_sum.ptr<cv::Vec3f>();
-    unsigned char *m_max_edge_test_ptr = m_max_edge_test.ptr<unsigned char>();
+    Mat<Vec<float, 3> > depth_ir_sum(424, 512);
+    Vec<float, 3> *depth_ir_sum_ptr = depth_ir_sum.ptr(0, 0);
+    unsigned char *m_max_edge_test_ptr = m_max_edge_test.ptr(0, 0);
 
     for(int y = 0; y < 424; ++y)
       for(int x = 0; x < 512; ++x, m_ptr += 9, ++m_max_edge_test_ptr, ++depth_ir_sum_ptr)
       {
         float raw_depth, ir_sum;
 
-        impl_->processPixelStage2(x, y, m_ptr + 0, m_ptr + 3, m_ptr + 6, out_ir.ptr<float>(423 - y, x), &raw_depth, &ir_sum);
+        impl_->processPixelStage2(x, y, m_ptr + 0, m_ptr + 3, m_ptr + 6, out_ir.ptr(423 - y, x), &raw_depth, &ir_sum);
 
         depth_ir_sum_ptr->val[0] = raw_depth;
         depth_ir_sum_ptr->val[1] = *m_max_edge_test_ptr == 1 ? raw_depth : 0;
         depth_ir_sum_ptr->val[2] = ir_sum;
       }
 
-    m_max_edge_test_ptr = m_max_edge_test.ptr<unsigned char>();
+    m_max_edge_test_ptr = m_max_edge_test.ptr(0, 0);
 
     for(int y = 0; y < 424; ++y)
       for(int x = 0; x < 512; ++x, ++m_max_edge_test_ptr)
       {
-        impl_->filterPixelStage2(x, y, depth_ir_sum, *m_max_edge_test_ptr == 1, out_depth.ptr<float>(423 - y, x));
+        impl_->filterPixelStage2(x, y, depth_ir_sum, *m_max_edge_test_ptr == 1, out_depth.ptr(423 - y, x));
       }
   }
   else
@@ -805,7 +957,7 @@ void CpuDepthPacketProcessor::process(const DepthPacket &packet)
     for(int y = 0; y < 424; ++y)
       for(int x = 0; x < 512; ++x, m_ptr += 9)
       {
-        impl_->processPixelStage2(x, y, m_ptr + 0, m_ptr + 3, m_ptr + 6, out_ir.ptr<float>(423 - y, x), out_depth.ptr<float>(423 - y, x), 0);
+        impl_->processPixelStage2(x, y, m_ptr + 0, m_ptr + 3, m_ptr + 6, out_ir.ptr(423 - y, x), out_depth.ptr(423 - y, x), 0);
       }
   }
 
@@ -819,7 +971,7 @@ void CpuDepthPacketProcessor::process(const DepthPacket &packet)
     impl_->newDepthFrame();
   }
 
-  impl_->stopTiming();
+  //impl_->stopTiming();
 }
 
 } /* namespace libfreenect2 */
diff --git a/examples/protonect/src/opencl_depth_packet_processor.cpp b/examples/protonect/src/opencl_depth_packet_processor.cpp
index edda413af..7c1970804 100644
--- a/examples/protonect/src/opencl_depth_packet_processor.cpp
+++ b/examples/protonect/src/opencl_depth_packet_processor.cpp
@@ -28,7 +28,7 @@
 #include <libfreenect2/resource.h>
 #include <libfreenect2/protocol/response.h>
 
-#include <opencv2/opencv.hpp>
+//#include <opencv2/opencv.hpp>
 #include <iostream>
 #include <fstream>
 #include <sstream>
@@ -538,12 +538,12 @@ class OpenCLDepthPacketProcessorImpl
 
   void startTiming()
   {
-    timing_current_start = cv::getTickCount();
+    //timing_current_start = cv::getTickCount();
   }
 
   void stopTiming()
   {
-    timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency();
+    //timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency();
     timing_acc_n += 1.0;
 
     if(timing_acc_n >= 100.0)
diff --git a/examples/protonect/src/turbo_jpeg_rgb_packet_processor.cpp b/examples/protonect/src/turbo_jpeg_rgb_packet_processor.cpp
index 88f650c2b..d08256119 100644
--- a/examples/protonect/src/turbo_jpeg_rgb_packet_processor.cpp
+++ b/examples/protonect/src/turbo_jpeg_rgb_packet_processor.cpp
@@ -26,8 +26,9 @@
 
 #include <libfreenect2/rgb_packet_processor.h>
 
-#include <opencv2/opencv.hpp>
+//#include <opencv2/opencv.hpp>
 #include <turbojpeg.h>
+#include <iostream>
 
 namespace libfreenect2
 {
@@ -78,12 +79,12 @@ class TurboJpegRgbPacketProcessorImpl
 
   void startTiming()
   {
-    timing_current_start = cv::getTickCount();
+    //timing_current_start = cv::getTickCount();
   }
 
   void stopTiming()
   {
-    timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency();
+    //timing_acc += (cv::getTickCount() - timing_current_start) / cv::getTickFrequency();
     timing_acc_n += 1.0;
 
     if(timing_acc_n >= 100.0)
@@ -110,7 +111,7 @@ void TurboJpegRgbPacketProcessor::process(const RgbPacket &packet)
 {
   if(impl_->decompressor != 0 && listener_ != 0)
   {
-    impl_->startTiming();
+    //impl_->startTiming();
 
     int r = tjDecompress2(impl_->decompressor, packet.jpeg_buffer, packet.jpeg_buffer_length, impl_->frame->data, 1920, 1920 * tjPixelSize[TJPF_BGR], 1080, TJPF_BGR, 0);
 
@@ -126,7 +127,7 @@ void TurboJpegRgbPacketProcessor::process(const RgbPacket &packet)
       std::cerr << "[TurboJpegRgbPacketProcessor::doProcess] Failed to decompress rgb image! TurboJPEG error: '" << tjGetErrorStr() << "'" << std::endl;
     }
 
-    impl_->stopTiming();
+    //impl_->stopTiming();
   }
 }