From 443712107c95edf38082355628161eab235b137d Mon Sep 17 00:00:00 2001 From: Albert Hofkamp Date: Tue, 29 Sep 2015 11:57:43 +0200 Subject: [PATCH 1/4] Move 'data' acces function to the point where it is needed. --- src/cpu_depth_packet_processor.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cpu_depth_packet_processor.cpp b/src/cpu_depth_packet_processor.cpp index 53d0319f6..2ac439e90 100644 --- a/src/cpu_depth_packet_processor.cpp +++ b/src/cpu_depth_packet_processor.cpp @@ -332,11 +332,6 @@ class CpuDepthPacketProcessorImpl: public WithPerfLogging int32_t decodePixelMeasurement(unsigned char* data, int sub, int x, int y) { - // 298496 = 512 * 424 * 11 / 8 = number of bytes per sub image - uint16_t *ptr = reinterpret_cast(data + 298496 * sub); - int i = y < 212 ? y + 212 : 423 - y; - ptr += 352*i; - /** r1.yz = r2.xxyx < l(0, 1, 0, 0) // ilt r1.y = r1.z | r1.y // or @@ -385,6 +380,11 @@ class CpuDepthPacketProcessorImpl: public WithPerfLogging return lut11to16[0]; } + // 298496 = 512 * 424 * 11 / 8 = number of bytes per sub image + uint16_t *ptr = reinterpret_cast(data + 298496 * sub); + int i = y < 212 ? y + 212 : 423 - y; + ptr += 352*i; + int i1 = ptr[r1yi]; int i2 = ptr[r1yi + 1]; i1 = i1 >> r1zi; From 4afd1a9049ae5a2cc0fb26d7c7c5a275165278fe Mon Sep 17 00:00:00 2001 From: Albert Hofkamp Date: Tue, 29 Sep 2015 12:06:35 +0200 Subject: [PATCH 2/4] Merge booleans, eliminate bfi and r4wi. --- src/cpu_depth_packet_processor.cpp | 32 ++++-------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/src/cpu_depth_packet_processor.cpp b/src/cpu_depth_packet_processor.cpp index 2ac439e90..33391c001 100644 --- a/src/cpu_depth_packet_processor.cpp +++ b/src/cpu_depth_packet_processor.cpp @@ -332,34 +332,11 @@ class CpuDepthPacketProcessorImpl: public WithPerfLogging int32_t decodePixelMeasurement(unsigned char* data, int sub, int x, int y) { - /** - r1.yz = r2.xxyx < l(0, 1, 0, 0) // ilt - r1.y = r1.z | r1.y // or - */ - bool r1y = x < 1 || y < 0; - /* - r1.zw = l(0, 0, 510, 423) < r2.xxxy // ilt - r1.z = r1.w | r1.z // or - */ - bool r1z = 510 < x || 423 < y; - /* - r1.y = r1.z | r1.y // or - */ - r1y = r1y || r1z; - /* - r1.y = r1.y & l(0x1fffffff) // and - */ + bool r1y = x < 1 || y < 0 || 510 < x || 423 < y; int r1yi = r1y ? 0xffffffff : 0x0; r1yi &= 0x1fffffff; - /* - bfi r1.z, l(2), l(7), r2.x, l(0) - ushr r1.w, r2.x, l(2) - r1.z = r1.w + r1.z // iadd - */ - int r1zi = bfi(2, 7, x, 0); - int r1wi = x >> 2; - r1zi = r1wi + r1zi; + int r1zi = (x >> 2) + ((x & 0x3) << 7); /* imul null, r1.z, r1.z, l(11) @@ -370,10 +347,9 @@ class CpuDepthPacketProcessorImpl: public WithPerfLogging r4.w = -r1.z + l(16) // iadd */ r1zi = (r1zi * 11L) & 0xffffffff; - r1wi = r1zi >> 4; + int r1wi = r1zi >> 4; r1yi = r1yi + r1wi; r1zi = r1zi & 15; - int r4wi = -r1zi + 16; if(r1yi > 352) { @@ -388,7 +364,7 @@ class CpuDepthPacketProcessorImpl: public WithPerfLogging int i1 = ptr[r1yi]; int i2 = ptr[r1yi + 1]; i1 = i1 >> r1zi; - i2 = i2 << r4wi; + i2 = i2 << (16 - r1zi); return lut11to16[((i1 | i2) & 2047)]; } From 767ba4a108ea54cf8361939f23fecce29b9593b5 Mon Sep 17 00:00:00 2001 From: Albert Hofkamp Date: Tue, 29 Sep 2015 13:12:48 +0200 Subject: [PATCH 3/4] Split case of r1yi bigger than 352. Due to known range of the x coordinate, "rizi >> 4" cannot go beyond 352. The only way to get there is due to having an out-of-bound pixel (x, y) coordinate. Therefore, "return lut11to16[0]" happens only for a true boolean condition. --- src/cpu_depth_packet_processor.cpp | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/src/cpu_depth_packet_processor.cpp b/src/cpu_depth_packet_processor.cpp index 33391c001..9349be8c8 100644 --- a/src/cpu_depth_packet_processor.cpp +++ b/src/cpu_depth_packet_processor.cpp @@ -332,35 +332,22 @@ class CpuDepthPacketProcessorImpl: public WithPerfLogging int32_t decodePixelMeasurement(unsigned char* data, int sub, int x, int y) { - bool r1y = x < 1 || y < 0 || 510 < x || 423 < y; - int r1yi = r1y ? 0xffffffff : 0x0; - r1yi &= 0x1fffffff; - - int r1zi = (x >> 2) + ((x & 0x3) << 7); - - /* - imul null, r1.z, r1.z, l(11) - ushr r1.w, r1.z, l(4) - r1.y = r1.w + r1.y // iadd - r1.w = r1.y + l(1) // iadd - r1.z = r1.z & l(15) // and - r4.w = -r1.z + l(16) // iadd - */ - r1zi = (r1zi * 11L) & 0xffffffff; - int r1wi = r1zi >> 4; - r1yi = r1yi + r1wi; - r1zi = r1zi & 15; - - if(r1yi > 352) + if (x < 1 || y < 0 || 510 < x || 423 < y) { return lut11to16[0]; } + int r1zi = (x >> 2) + ((x & 0x3) << 7); // Range 1..510 + r1zi = r1zi * 11L; // Range 11..5610 + // 298496 = 512 * 424 * 11 / 8 = number of bytes per sub image uint16_t *ptr = reinterpret_cast(data + 298496 * sub); int i = y < 212 ? y + 212 : 423 - y; ptr += 352*i; + int r1yi = r1zi >> 4; // Range 0..350 + r1zi = r1zi & 15; + int i1 = ptr[r1yi]; int i2 = ptr[r1yi + 1]; i1 = i1 >> r1zi; From 7c34bc96020c8ed84a435186fa7781fca1f78165 Mon Sep 17 00:00:00 2001 From: Albert Hofkamp Date: Mon, 28 Sep 2015 09:56:19 +0200 Subject: [PATCH 4/4] Copy pixels at x==0 || x== 511 as well. --- src/cpu_depth_packet_processor.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/cpu_depth_packet_processor.cpp b/src/cpu_depth_packet_processor.cpp index 9349be8c8..93f0384ca 100644 --- a/src/cpu_depth_packet_processor.cpp +++ b/src/cpu_depth_packet_processor.cpp @@ -332,28 +332,33 @@ class CpuDepthPacketProcessorImpl: public WithPerfLogging int32_t decodePixelMeasurement(unsigned char* data, int sub, int x, int y) { - if (x < 1 || y < 0 || 510 < x || 423 < y) + if (x < 0 || y < 0 || 511 < x || 423 < y) { return lut11to16[0]; } - int r1zi = (x >> 2) + ((x & 0x3) << 7); // Range 1..510 - r1zi = r1zi * 11L; // Range 11..5610 + int r1zi = (x >> 2) + ((x & 0x3) << 7); // Range 0..511 + r1zi = r1zi * 11L; // Range 0..5621 // 298496 = 512 * 424 * 11 / 8 = number of bytes per sub image uint16_t *ptr = reinterpret_cast(data + 298496 * sub); int i = y < 212 ? y + 212 : 423 - y; ptr += 352*i; - int r1yi = r1zi >> 4; // Range 0..350 + int r1yi = r1zi >> 4; // Range 0..351 r1zi = r1zi & 15; - int i1 = ptr[r1yi]; - int i2 = ptr[r1yi + 1]; + uint16_t i1 = ptr[r1yi]; i1 = i1 >> r1zi; - i2 = i2 << (16 - r1zi); - return lut11to16[((i1 | i2) & 2047)]; + if (r1zi > 5) // For x == 511, r1yi == 351 but r1zi == 5. + { + uint16_t i2 = ptr[r1yi + 1]; + i2 = i2 << (16 - r1zi); + i1 |= i2; + } + + return lut11to16[i1 & 2047]; } /**