diff --git a/.gitignore b/.gitignore index 134ceb9..d6610cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ __pycache__/ weights/*.tar -weights/*.pth +# weights/*.pth YOLOX_outputs/ *.pth *.onnx diff --git a/README.md b/README.md index 38434df..7b91a20 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,16 @@ # YOLOX-ROS +## TR setup +### before you build +`sudo apt install ros-humble-generate-parameter-library` + +### building +colcon build --cmake-args -DYOLOX_USE_TENSORRT=ON + +### running +`ros2 launch yolox_ros_cpp yolox_tensorrt.launch.py` + + ![](https://img.shields.io/github/stars/Ar-Ray-code/YOLOX-ROS) [![iron](https://github.com/Ar-Ray-code/YOLOX-ROS/actions/workflows/ci_iron.yml/badge.svg?branch=iron)](https://github.com/Ar-Ray-code/YOLOX-ROS/actions/workflows/ci_iron.yml) diff --git a/weights/onnx/armor_s.onnx b/weights/onnx/armor_s.onnx new file mode 100644 index 0000000..0d6064b Binary files /dev/null and b/weights/onnx/armor_s.onnx differ diff --git a/weights/onnx/armor_sim_s.onnx b/weights/onnx/armor_sim_s.onnx new file mode 100644 index 0000000..e109778 Binary files /dev/null and b/weights/onnx/armor_sim_s.onnx differ diff --git a/weights/onnx/armor_tiny.onnx b/weights/onnx/armor_tiny.onnx new file mode 100644 index 0000000..0cdf5b1 Binary files /dev/null and b/weights/onnx/armor_tiny.onnx differ diff --git a/weights/tensorrt/armor_s.trt b/weights/tensorrt/armor_s.trt new file mode 100644 index 0000000..f033e89 Binary files /dev/null and b/weights/tensorrt/armor_s.trt differ diff --git a/weights/tensorrt/armor_sim_s.trt b/weights/tensorrt/armor_sim_s.trt new file mode 100644 index 0000000..c1c41d3 Binary files /dev/null and b/weights/tensorrt/armor_sim_s.trt differ diff --git a/yolox_ros_cpp/yolox_cpp/CMakeLists.txt b/yolox_ros_cpp/yolox_cpp/CMakeLists.txt index db849f4..d9834ce 100644 --- a/yolox_ros_cpp/yolox_cpp/CMakeLists.txt +++ b/yolox_ros_cpp/yolox_cpp/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.8) +cmake_minimum_required(VERSION 3.14) project(yolox_cpp) if(NOT CMAKE_CXX_STANDARD) @@ -11,6 +11,11 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") add_compile_options(-Wall -Wextra -Wpedantic) endif() +set(CMAKE_BUILD_TYPE Release) +set(CMAKE_CUDA_FLAGS_RELEASE "-O3 --use_fast_math -DNDEBUG") +set(CMAKE_CUDA_ARCHITECTURES 87) +project(yolox_cpp CXX CUDA) + find_package(ament_cmake_auto REQUIRED) ament_auto_find_build_dependencies() @@ -46,15 +51,15 @@ endif() if(YOLOX_USE_TENSORRT) find_package(CUDA REQUIRED) + enable_language(CUDA) if (NOT CUDA_TOOLKIT_ROOT_DIR) set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda) endif() find_path(TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES include) - set(ENABLE_TENSORRT ON) - set(TARGET_SRC ${TARGET_SRC} src/yolox_tensorrt.cpp) + set(TARGET_SRC ${TARGET_SRC} src/yolox_tensorrt.cpp src/cuda_utils.cu) set(TARGET_LIBS ${TARGET_LIBS} nvinfer nvinfer_plugin) set(TARGET_DPENDENCIES ${TARGET_DPENDENCIES} CUDA) endif() @@ -85,6 +90,11 @@ configure_file( ) ament_auto_add_library(yolox_cpp SHARED ${TARGET_SRC}) +target_include_directories(yolox_cpp PUBLIC + $ + $ +) + ament_target_dependencies(yolox_cpp ${TARGET_DPENDENCIES}) ament_export_dependencies(${TARGET_DPENDENCIES}) target_link_libraries(yolox_cpp ${TARGET_LIBS}) diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp index 5360908..217afd1 100644 --- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp +++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp @@ -39,7 +39,8 @@ namespace yolox_cpp num_classes_(num_classes), p6_(p6), model_version_(model_version) { } - virtual std::vector inference(const cv::Mat &frame) = 0; + virtual std::vector inference(const cv::Mat &frame, uchar3* d_image, float* d_output, + cudaStream_t copy_stream_, cudaStream_t resize_stream_) = 0; protected: int input_w_; @@ -76,35 +77,20 @@ namespace yolox_cpp // for NCHW void blobFromImage(const cv::Mat &img, float *blob_data) { - const size_t channels = 3; - const size_t img_h = img.rows; - const size_t img_w = img.cols; - const size_t img_hw = img_h * img_w; - float *blob_data_ch0 = blob_data; - float *blob_data_ch1 = blob_data + img_hw; - float *blob_data_ch2 = blob_data + img_hw * 2; - // HWC -> CHW - if (this->model_version_ == "0.1.0") - { - for (size_t i = 0; i < img_hw; ++i) - { - // blob = (img / 255.0 - mean) / std - const size_t src_idx = i * channels; - blob_data_ch0[i] = static_cast(img.data[src_idx + 0]) * this->std255_inv_[0] + this->mean_std_[0]; - blob_data_ch1[i] = static_cast(img.data[src_idx + 1]) * this->std255_inv_[1] + this->mean_std_[1]; - blob_data_ch2[i] = static_cast(img.data[src_idx + 2]) * this->std255_inv_[2] + this->mean_std_[2]; - } - } - else - { - for (size_t i = 0; i < img_hw; ++i) - { - const size_t src_idx = i * channels; - blob_data_ch0[i] = static_cast(img.data[src_idx + 0]); - blob_data_ch1[i] = static_cast(img.data[src_idx + 1]); - blob_data_ch2[i] = static_cast(img.data[src_idx + 2]); - } - } + cv::Scalar mean = cv::Scalar(0, 0, 0); + cv::Scalar std_inv = cv::Scalar(1.0, 1.0, 1.0); // No normalization + + cv::Mat blob = cv::dnn::blobFromImage( + img, + 1.0, // dummy scalefactor (will apply manually below) + cv::Size(), // use original size + mean, // mean subtraction + true, // swapRB = true + false // crop = false + ); + + // Copy to output buffer + std::memcpy(blob_data, blob.ptr(), sizeof(float) * 3 * img.rows * img.cols); } // for NHWC diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/cuda_utils.cuh b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/cuda_utils.cuh new file mode 100644 index 0000000..87b2b0e --- /dev/null +++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/cuda_utils.cuh @@ -0,0 +1,9 @@ +#pragma once + +#include +#include + +__global__ void gpuResizeAndBlobFromImage(uchar3* image_data, float* output, int r_width, int r_height, + int input_width, int input_height, float w_ratio, float h_ratio); +extern "C" void launchGPUResizeAndBlobFromImage(uchar3* image_data, float* output, int r_width, int r_height, + int input_width, int input_height, float w_ratio, float h_ratio, cudaStream_t stream); \ No newline at end of file diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/tensorrt_logging.h b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/tensorrt_logging.h index abea1ec..0a75ded 100644 --- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/tensorrt_logging.h +++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/tensorrt_logging.h @@ -26,503 +26,507 @@ #include #include -namespace yolox_cpp -{ - - using Severity = nvinfer1::ILogger::Severity; - - class LogStreamConsumerBuffer : public std::stringbuf - { - public: - LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog) - : mOutput(stream) - , mPrefix(prefix) - , mShouldLog(shouldLog) - { - } - - LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept - : mOutput(other.mOutput) - , mPrefix(other.mPrefix) - , mShouldLog(other.mShouldLog) - { - } - LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete; - LogStreamConsumerBuffer() = delete; - LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete; - LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete; - - ~LogStreamConsumerBuffer() override - { - // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence - // std::streambuf::pptr() gives a pointer to the current position of the output sequence - // if the pointer to the beginning is not equal to the pointer to the current position, - // call putOutput() to log the output to the stream - if (pbase() != pptr()) - { - putOutput(); - } - } - - //! - //! synchronizes the stream buffer and returns 0 on success - //! synchronizing the stream buffer consists of inserting the buffer contents into the stream, - //! resetting the buffer and flushing the stream - //! - int32_t sync() override - { +namespace yolox_cpp { + +using Severity = nvinfer1::ILogger::Severity; + +class LogStreamConsumerBuffer : public std::stringbuf { + public: + LogStreamConsumerBuffer(std::ostream &stream, const std::string &prefix, + bool shouldLog) + : mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {} + + LogStreamConsumerBuffer(LogStreamConsumerBuffer &&other) noexcept + : mOutput(other.mOutput), mPrefix(other.mPrefix), + mShouldLog(other.mShouldLog) {} + LogStreamConsumerBuffer(const LogStreamConsumerBuffer &other) = delete; + LogStreamConsumerBuffer() = delete; + LogStreamConsumerBuffer & + operator=(const LogStreamConsumerBuffer &) = delete; + LogStreamConsumerBuffer &operator=(LogStreamConsumerBuffer &&) = delete; + + ~LogStreamConsumerBuffer() override { + // std::streambuf::pbase() gives a pointer to the beginning of the + // buffered part of the output sequence std::streambuf::pptr() gives a + // pointer to the current position of the output sequence if the pointer + // to the beginning is not equal to the pointer to the current position, + // call putOutput() to log the output to the stream + if (pbase() != pptr()) { putOutput(); - return 0; } - - void putOutput() - { - if (mShouldLog) - { - // prepend timestamp - std::time_t timestamp = std::time(nullptr); - tm* tm_local = std::localtime(×tamp); - mOutput << "["; - mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; - mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; - // std::stringbuf::str() gets the string contents of the buffer - // insert the buffer contents pre-appended by the appropriate prefix into the stream - mOutput << mPrefix << str(); - } - // set the buffer to empty - str(""); - // flush the stream - mOutput.flush(); - } - - void setShouldLog(bool shouldLog) - { - mShouldLog = shouldLog; - } - - private: - std::ostream& mOutput; - std::string mPrefix; - bool mShouldLog{}; - }; // class LogStreamConsumerBuffer + } //! - //! \class LogStreamConsumerBase - //! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer + //! synchronizes the stream buffer and returns 0 on success + //! synchronizing the stream buffer consists of inserting the buffer + //! contents into the stream, resetting the buffer and flushing the stream //! - class LogStreamConsumerBase - { - public: - LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog) - : mBuffer(stream, prefix, shouldLog) - { - } + int32_t sync() override { + putOutput(); + return 0; + } - protected: - LogStreamConsumerBuffer mBuffer; - }; // class LogStreamConsumerBase - - //! - //! \class LogStreamConsumer - //! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages. - //! Order of base classes is LogStreamConsumerBase and then std::ostream. - //! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field - //! in LogStreamConsumer and then the address of the buffer is passed to std::ostream. - //! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream. - //! Please do not change the order of the parent classes. - //! - class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream - { - public: - //! - //! \brief Creates a LogStreamConsumer which logs messages with level severity. - //! Reportable severity determines if the messages are severe enough to be logged. - //! - LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity, nvinfer1::ILogger::Severity severity) - : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity) - , std::ostream(&mBuffer) // links the stream buffer with the stream - , mShouldLog(severity <= reportableSeverity) - , mSeverity(severity) - { + void putOutput() { + if (mShouldLog) { + // prepend timestamp + std::time_t timestamp = std::time(nullptr); + tm *tm_local = std::localtime(×tamp); + mOutput << "["; + mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon + << "/"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday + << "/"; + mOutput << std::setw(4) << std::setfill('0') + << 1900 + tm_local->tm_year << "-"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour + << ":"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min + << ":"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec + << "] "; + // std::stringbuf::str() gets the string contents of the buffer + // insert the buffer contents pre-appended by the appropriate prefix + // into the stream + mOutput << mPrefix << str(); } + // set the buffer to empty + str(""); + // flush the stream + mOutput.flush(); + } - LogStreamConsumer(LogStreamConsumer&& other) noexcept - : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog) - , std::ostream(&mBuffer) // links the stream buffer with the stream - , mShouldLog(other.mShouldLog) - , mSeverity(other.mSeverity) - { - } - LogStreamConsumer(const LogStreamConsumer& other) = delete; - LogStreamConsumer() = delete; - ~LogStreamConsumer() = default; - LogStreamConsumer& operator=(const LogStreamConsumer&) = delete; - LogStreamConsumer& operator=(LogStreamConsumer&&) = delete; - - void setReportableSeverity(Severity reportableSeverity) - { - mShouldLog = mSeverity <= reportableSeverity; - mBuffer.setShouldLog(mShouldLog); - } + void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; } + + private: + std::ostream &mOutput; + std::string mPrefix; + bool mShouldLog{}; +}; // class LogStreamConsumerBuffer + +//! +//! \class LogStreamConsumerBase +//! \brief Convenience object used to initialize LogStreamConsumerBuffer before +//! std::ostream in LogStreamConsumer +//! +class LogStreamConsumerBase { + public: + LogStreamConsumerBase(std::ostream &stream, const std::string &prefix, + bool shouldLog) + : mBuffer(stream, prefix, shouldLog) {} + + protected: + LogStreamConsumerBuffer mBuffer; +}; // class LogStreamConsumerBase + +//! +//! \class LogStreamConsumer +//! \brief Convenience object used to facilitate use of C++ stream syntax when +//! logging messages. +//! Order of base classes is LogStreamConsumerBase and then std::ostream. +//! This is because the LogStreamConsumerBase class is used to initialize the +//! LogStreamConsumerBuffer member field in LogStreamConsumer and then the +//! address of the buffer is passed to std::ostream. This is necessary to +//! prevent the address of an uninitialized buffer from being passed to +//! std::ostream. Please do not change the order of the parent classes. +//! +class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream { + public: + //! + //! \brief Creates a LogStreamConsumer which logs messages with level + //! severity. + //! Reportable severity determines if the messages are severe enough to be + //! logged. + //! + LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity, + nvinfer1::ILogger::Severity severity) + : LogStreamConsumerBase(severityOstream(severity), + severityPrefix(severity), + severity <= reportableSeverity), + std::ostream(&mBuffer) // links the stream buffer with the stream + , + mShouldLog(severity <= reportableSeverity), mSeverity(severity) {} + + LogStreamConsumer(LogStreamConsumer &&other) noexcept + : LogStreamConsumerBase(severityOstream(other.mSeverity), + severityPrefix(other.mSeverity), + other.mShouldLog), + std::ostream(&mBuffer) // links the stream buffer with the stream + , + mShouldLog(other.mShouldLog), mSeverity(other.mSeverity) {} + LogStreamConsumer(const LogStreamConsumer &other) = delete; + LogStreamConsumer() = delete; + ~LogStreamConsumer() = default; + LogStreamConsumer &operator=(const LogStreamConsumer &) = delete; + LogStreamConsumer &operator=(LogStreamConsumer &&) = delete; + + void setReportableSeverity(Severity reportableSeverity) { + mShouldLog = mSeverity <= reportableSeverity; + mBuffer.setShouldLog(mShouldLog); + } - private: - static std::ostream& severityOstream(Severity severity) - { - return severity >= Severity::kINFO ? std::cout : std::cerr; - } + private: + static std::ostream &severityOstream(Severity severity) { + return severity >= Severity::kINFO ? std::cout : std::cerr; + } - static std::string severityPrefix(Severity severity) - { - switch (severity) - { - case Severity::kINTERNAL_ERROR: return "[F] "; - case Severity::kERROR: return "[E] "; - case Severity::kWARNING: return "[W] "; - case Severity::kINFO: return "[I] "; - case Severity::kVERBOSE: return "[V] "; - default: assert(0); return ""; - } + static std::string severityPrefix(Severity severity) { + switch (severity) { + case Severity::kINTERNAL_ERROR: + return "[F] "; + case Severity::kERROR: + return "[E] "; + case Severity::kWARNING: + return "[W] "; + case Severity::kINFO: + return "[I] "; + case Severity::kVERBOSE: + return "[V] "; + default: + assert(0); + return ""; } + } - bool mShouldLog; - Severity mSeverity; - }; // class LogStreamConsumer + bool mShouldLog; + Severity mSeverity; +}; // class LogStreamConsumer + +//! +//! \class Logger +//! +//! \brief Class which manages logging of TensorRT tools and samples +//! +//! \details This class provides a common interface for TensorRT tools and +//! samples to log information to the console, and supports logging two types of +//! messages: +//! +//! - Debugging messages with an associated severity (info, warning, error, or +//! internal error/fatal) +//! - Test pass/fail messages +//! +//! The advantage of having all samples use this class for logging as opposed to +//! emitting directly to stdout/stderr is that the logic for controlling the +//! verbosity and formatting of sample output is centralized in one location. +//! +//! In the future, this class could be extended to support dumping test results +//! to a file in some standard format (for example, JUnit XML), and providing +//! additional metadata (e.g. timing the duration of a test run). +//! +//! TODO: For backwards compatibility with existing samples, this class inherits +//! directly from the nvinfer1::ILogger interface, which is problematic since +//! there isn't a clean separation between messages coming from the TensorRT +//! library and messages coming from the sample. +//! +//! In the future (once all samples are updated to use Logger::getTRTLogger() to +//! access the ILogger) we can refactor the class to eliminate the inheritance +//! and instead make the nvinfer1::ILogger implementation a member of the Logger +//! object. +//! +class Logger : public nvinfer1::ILogger { + public: + explicit Logger(Severity severity = Severity::kWARNING) + : mReportableSeverity(severity) {} + + //! + //! \enum TestResult + //! \brief Represents the state of a given test + //! + enum class TestResult { + kRUNNING, //!< The test is running + kPASSED, //!< The test passed + kFAILED, //!< The test failed + kWAIVED //!< The test was waived + }; + + //! + //! \brief Forward-compatible method for retrieving the nvinfer::ILogger + //! associated with this Logger \return The nvinfer1::ILogger associated + //! with this Logger + //! + //! TODO Once all samples are updated to use this method to register the + //! logger with TensorRT, we can eliminate the inheritance of Logger from + //! ILogger + //! + nvinfer1::ILogger &getTRTLogger() noexcept { return *this; } + + //! + //! \brief Implementation of the nvinfer1::ILogger::log() virtual method + //! + //! Note samples should not be calling this function directly; it will + //! eventually go away once we eliminate the inheritance from + //! nvinfer1::ILogger + //! + void log(Severity severity, const char *msg) noexcept override { + LogStreamConsumer(mReportableSeverity, severity) + << "[TRT] " << std::string(msg) << std::endl; + } //! - //! \class Logger + //! \brief Method for controlling the verbosity of logging output //! - //! \brief Class which manages logging of TensorRT tools and samples + //! \param severity The logger will only emit messages that have severity of + //! this level or higher. //! - //! \details This class provides a common interface for TensorRT tools and samples to log information to the console, - //! and supports logging two types of messages: - //! - //! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal) - //! - Test pass/fail messages - //! - //! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is - //! that the logic for controlling the verbosity and formatting of sample output is centralized in one location. - //! - //! In the future, this class could be extended to support dumping test results to a file in some standard format - //! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run). + void setReportableSeverity(Severity severity) noexcept { + mReportableSeverity = severity; + } + //! - //! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger - //! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT - //! library and messages coming from the sample. + //! \brief Opaque handle that holds logging information for a particular + //! test //! - //! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the - //! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger - //! object. + //! This object is an opaque handle to information used by the Logger to + //! print test results. The sample must call Logger::defineTest() in order + //! to obtain a TestAtom that can be used with + //! Logger::reportTest{Start,End}(). //! - class Logger : public nvinfer1::ILogger - { - public: - explicit Logger(Severity severity = Severity::kWARNING) - : mReportableSeverity(severity) - { - } - - //! - //! \enum TestResult - //! \brief Represents the state of a given test - //! - enum class TestResult - { - kRUNNING, //!< The test is running - kPASSED, //!< The test passed - kFAILED, //!< The test failed - kWAIVED //!< The test was waived - }; - - //! - //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger - //! \return The nvinfer1::ILogger associated with this Logger - //! - //! TODO Once all samples are updated to use this method to register the logger with TensorRT, - //! we can eliminate the inheritance of Logger from ILogger - //! - nvinfer1::ILogger& getTRTLogger() noexcept - { - return *this; - } - - //! - //! \brief Implementation of the nvinfer1::ILogger::log() virtual method - //! - //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the - //! inheritance from nvinfer1::ILogger - //! - void log(Severity severity, const char* msg) noexcept override - { - LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl; - } - - //! - //! \brief Method for controlling the verbosity of logging output - //! - //! \param severity The logger will only emit messages that have severity of this level or higher. - //! - void setReportableSeverity(Severity severity) noexcept - { - mReportableSeverity = severity; - } + class TestAtom { + public: + TestAtom(TestAtom &&) = default; - //! - //! \brief Opaque handle that holds logging information for a particular test - //! - //! This object is an opaque handle to information used by the Logger to print test results. - //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used - //! with Logger::reportTest{Start,End}(). - //! - class TestAtom - { - public: - TestAtom(TestAtom&&) = default; - - private: - friend class Logger; - - TestAtom(bool started, const std::string& name, const std::string& cmdline) - : mStarted(started) - , mName(name) - , mCmdline(cmdline) - { - } - - bool mStarted; - std::string mName; - std::string mCmdline; - }; - - //! - //! \brief Define a test for logging - //! - //! \param[in] name The name of the test. This should be a string starting with - //! "TensorRT" and containing dot-separated strings containing - //! the characters [A-Za-z0-9_]. - //! For example, "TensorRT.sample_googlenet" - //! \param[in] cmdline The command line used to reproduce the test - // - //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). - //! - static TestAtom defineTest(const std::string& name, const std::string& cmdline) - { - return TestAtom(false, name, cmdline); - } - - //! - //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments - //! as input - //! - //! \param[in] name The name of the test - //! \param[in] argc The number of command-line arguments - //! \param[in] argv The array of command-line arguments (given as C strings) - //! - //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). - //! - static TestAtom defineTest(const std::string& name, int32_t argc, char const* const* argv) - { - // Append TensorRT version as info - const std::string vname = name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]"; - auto cmdline = genCmdlineString(argc, argv); - return defineTest(vname, cmdline); - } - - //! - //! \brief Report that a test has started. - //! - //! \pre reportTestStart() has not been called yet for the given testAtom - //! - //! \param[in] testAtom The handle to the test that has started - //! - static void reportTestStart(TestAtom& testAtom) - { - reportTestResult(testAtom, TestResult::kRUNNING); - assert(!testAtom.mStarted); - testAtom.mStarted = true; - } - - //! - //! \brief Report that a test has ended. - //! - //! \pre reportTestStart() has been called for the given testAtom - //! - //! \param[in] testAtom The handle to the test that has ended - //! \param[in] result The result of the test. Should be one of TestResult::kPASSED, - //! TestResult::kFAILED, TestResult::kWAIVED - //! - static void reportTestEnd(TestAtom const& testAtom, TestResult result) - { - assert(result != TestResult::kRUNNING); - assert(testAtom.mStarted); - reportTestResult(testAtom, result); - } + private: + friend class Logger; - static int32_t reportPass(TestAtom const& testAtom) - { - reportTestEnd(testAtom, TestResult::kPASSED); - return EXIT_SUCCESS; - } + TestAtom(bool started, const std::string &name, + const std::string &cmdline) + : mStarted(started), mName(name), mCmdline(cmdline) {} - static int32_t reportFail(TestAtom const& testAtom) - { - reportTestEnd(testAtom, TestResult::kFAILED); - return EXIT_FAILURE; - } + bool mStarted; + std::string mName; + std::string mCmdline; + }; - static int32_t reportWaive(TestAtom const& testAtom) - { - reportTestEnd(testAtom, TestResult::kWAIVED); - return EXIT_SUCCESS; - } - - static int32_t reportTest(TestAtom const& testAtom, bool pass) - { - return pass ? reportPass(testAtom) : reportFail(testAtom); - } - - Severity getReportableSeverity() const - { - return mReportableSeverity; - } - - private: - //! - //! \brief returns an appropriate string for prefixing a log message with the given severity - //! - static const char* severityPrefix(Severity severity) - { - switch (severity) - { - case Severity::kINTERNAL_ERROR: return "[F] "; - case Severity::kERROR: return "[E] "; - case Severity::kWARNING: return "[W] "; - case Severity::kINFO: return "[I] "; - case Severity::kVERBOSE: return "[V] "; - default: assert(0); return ""; - } - } - - //! - //! \brief returns an appropriate string for prefixing a test result message with the given result - //! - static const char* testResultString(TestResult result) - { - switch (result) - { - case TestResult::kRUNNING: return "RUNNING"; - case TestResult::kPASSED: return "PASSED"; - case TestResult::kFAILED: return "FAILED"; - case TestResult::kWAIVED: return "WAIVED"; - default: assert(0); return ""; - } - } - - //! - //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity - //! - static std::ostream& severityOstream(Severity severity) - { - return severity >= Severity::kINFO ? std::cout : std::cerr; - } - - //! - //! \brief method that implements logging test results - //! - static void reportTestResult(TestAtom const& testAtom, TestResult result) - { - severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " - << testAtom.mCmdline << std::endl; - } - - //! - //! \brief generate a command line string from the given (argc, argv) values - //! - static std::string genCmdlineString(int32_t argc, char const* const* argv) - { - std::stringstream ss; - for (int32_t i = 0; i < argc; i++) - { - if (i > 0) - { - ss << " "; - } - ss << argv[i]; - } - return ss.str(); - } - - Severity mReportableSeverity; - }; // class Logger - -} // namespace yolox_cpp + //! + //! \brief Define a test for logging + //! + //! \param[in] name The name of the test. This should be a string starting + //! with + //! "TensorRT" and containing dot-separated strings + //! containing the characters [A-Za-z0-9_]. For example, + //! "TensorRT.sample_googlenet" + //! \param[in] cmdline The command line used to reproduce the test + // + //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). + //! + static TestAtom defineTest(const std::string &name, + const std::string &cmdline) { + return TestAtom(false, name, cmdline); + } -namespace -{ - using namespace yolox_cpp; //! - //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE + //! \brief A convenience overloaded version of defineTest() that accepts an + //! array of command-line arguments + //! as input //! - //! Example usage: + //! \param[in] name The name of the test + //! \param[in] argc The number of command-line arguments + //! \param[in] argv The array of command-line arguments (given as C strings) //! - //! LOG_VERBOSE(logger) << "hello world" << std::endl; + //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). //! - inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) - { - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE); + static TestAtom defineTest(const std::string &name, int32_t argc, + char const *const *argv) { + // Append TensorRT version as info + const std::string vname = + name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]"; + auto cmdline = genCmdlineString(argc, argv); + return defineTest(vname, cmdline); } //! - //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO + //! \brief Report that a test has started. //! - //! Example usage: + //! \pre reportTestStart() has not been called yet for the given testAtom //! - //! LOG_INFO(logger) << "hello world" << std::endl; + //! \param[in] testAtom The handle to the test that has started //! - inline LogStreamConsumer LOG_INFO(const Logger& logger) - { - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); + static void reportTestStart(TestAtom &testAtom) { + reportTestResult(testAtom, TestResult::kRUNNING); + assert(!testAtom.mStarted); + testAtom.mStarted = true; } //! - //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING + //! \brief Report that a test has ended. //! - //! Example usage: + //! \pre reportTestStart() has been called for the given testAtom //! - //! LOG_WARN(logger) << "hello world" << std::endl; + //! \param[in] testAtom The handle to the test that has ended + //! \param[in] result The result of the test. Should be one of + //! TestResult::kPASSED, + //! TestResult::kFAILED, TestResult::kWAIVED //! - inline LogStreamConsumer LOG_WARN(const Logger& logger) - { - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING); + static void reportTestEnd(TestAtom const &testAtom, TestResult result) { + assert(result != TestResult::kRUNNING); + assert(testAtom.mStarted); + reportTestResult(testAtom, result); + } + + static int32_t reportPass(TestAtom const &testAtom) { + reportTestEnd(testAtom, TestResult::kPASSED); + return EXIT_SUCCESS; + } + + static int32_t reportFail(TestAtom const &testAtom) { + reportTestEnd(testAtom, TestResult::kFAILED); + return EXIT_FAILURE; + } + + static int32_t reportWaive(TestAtom const &testAtom) { + reportTestEnd(testAtom, TestResult::kWAIVED); + return EXIT_SUCCESS; + } + + static int32_t reportTest(TestAtom const &testAtom, bool pass) { + return pass ? reportPass(testAtom) : reportFail(testAtom); + } + + Severity getReportableSeverity() const { return mReportableSeverity; } + + private: + //! + //! \brief returns an appropriate string for prefixing a log message with + //! the given severity + //! + static const char *severityPrefix(Severity severity) { + switch (severity) { + case Severity::kINTERNAL_ERROR: + return "[F] "; + case Severity::kERROR: + return "[E] "; + case Severity::kWARNING: + return "[W] "; + case Severity::kINFO: + return "[I] "; + case Severity::kVERBOSE: + return "[V] "; + default: + assert(0); + return ""; + } } //! - //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR - //! - //! Example usage: + //! \brief returns an appropriate string for prefixing a test result message + //! with the given result + //! + static const char *testResultString(TestResult result) { + switch (result) { + case TestResult::kRUNNING: + return "RUNNING"; + case TestResult::kPASSED: + return "PASSED"; + case TestResult::kFAILED: + return "FAILED"; + case TestResult::kWAIVED: + return "WAIVED"; + default: + assert(0); + return ""; + } + } + //! - //! LOG_ERROR(logger) << "hello world" << std::endl; + //! \brief returns an appropriate output stream (cout or cerr) to use with + //! the given severity //! - inline LogStreamConsumer LOG_ERROR(const Logger& logger) - { - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); + static std::ostream &severityOstream(Severity severity) { + return severity >= Severity::kINFO ? std::cout : std::cerr; } //! - //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR - //! ("fatal" severity) + //! \brief method that implements logging test results //! - //! Example usage: + static void reportTestResult(TestAtom const &testAtom, TestResult result) { + severityOstream(Severity::kINFO) + << "&&&& " << testResultString(result) << " " << testAtom.mName + << " # " << testAtom.mCmdline << std::endl; + } + //! - //! LOG_FATAL(logger) << "hello world" << std::endl; + //! \brief generate a command line string from the given (argc, argv) values //! - inline LogStreamConsumer LOG_FATAL(const Logger& logger) - { - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR); + static std::string genCmdlineString(int32_t argc, char const *const *argv) { + std::stringstream ss; + for (int32_t i = 0; i < argc; i++) { + if (i > 0) { + ss << " "; + } + ss << argv[i]; + } + return ss.str(); } + + Severity mReportableSeverity; +}; // class Logger + +} // namespace yolox_cpp + +namespace { +using namespace yolox_cpp; +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages +//! of severity kVERBOSE +//! +//! Example usage: +//! +//! LOG_VERBOSE(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_VERBOSE(const Logger &logger) { + return LogStreamConsumer(logger.getReportableSeverity(), + Severity::kVERBOSE); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages +//! of severity kINFO +//! +//! Example usage: +//! +//! LOG_INFO(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_INFO(const Logger &logger) { + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages +//! of severity kWARNING +//! +//! Example usage: +//! +//! LOG_WARN(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_WARN(const Logger &logger) { + return LogStreamConsumer(logger.getReportableSeverity(), + Severity::kWARNING); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages +//! of severity kERROR +//! +//! Example usage: +//! +//! LOG_ERROR(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_ERROR(const Logger &logger) { + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages +//! of severity kINTERNAL_ERROR +//! ("fatal" severity) +//! +//! Example usage: +//! +//! LOG_FATAL(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_FATAL(const Logger &logger) { + return LogStreamConsumer(logger.getReportableSeverity(), + Severity::kINTERNAL_ERROR); +} } // anonymous namespace #endif // TENSORRT_LOGGING_H diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp index 15ee452..912c7e6 100644 --- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp +++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -37,10 +38,10 @@ namespace yolox_cpp{ float nms_th=0.45, float conf_th=0.3, const std::string &model_version="0.1.1rc0", int num_classes=80, bool p6=false); ~YoloXTensorRT(); - std::vector inference(const cv::Mat& frame) override; - + std::vector inference(const cv::Mat &frame, uchar3* d_image, float* d_output, + cudaStream_t copy_stream_, cudaStream_t resize_stream_); private: - void doInference(const float* input, float* output); + void doInference(const float* input, float* output, cudaStream_t copy_stream_); int DEVICE_ = 0; Logger gLogger_; diff --git a/yolox_ros_cpp/yolox_cpp/src/cuda_utils.cu b/yolox_ros_cpp/yolox_cpp/src/cuda_utils.cu new file mode 100644 index 0000000..5bec46d --- /dev/null +++ b/yolox_ros_cpp/yolox_cpp/src/cuda_utils.cu @@ -0,0 +1,58 @@ +#include +#include + +__global__ void gpuResizeAndBlobFromImage(uchar3* image_data, float* output, int r_width, int r_height, + int input_width, int input_height, float w_ratio, float h_ratio) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= r_width || y >= r_height) return; + + // Look into the input image, grab all the pixels + // Do math to determine the color of the output pixel; for bilinear scaling + float sy = (y + 0.5) * h_ratio - 0.5; + float sx = (x + 0.5) * w_ratio - 0.5; + + int y0 = (int) sy; + int x0 = (int) sx; + int y1 = y0 + 1; + int x1 = x0 + 1; + + y0 = max(y0, 0); + x0 = max(x0, 0); + y1 = min(y1, input_height - 1); + x1 = min(x1, input_width - 1); + + int src_idx = y0 * input_width + x0; + if (src_idx >= input_width * input_height) return; + + // Weights for each input pixel to contribute + float w00 = (1 - (sy - y0)) * (1 - (sx - x0)); // For y = 0 x = 0 + float w01 = (1 - (sy - y0)) * (sx - x0); // For y = 0 x = 1 + float w10 = (sy - y0) * (1 - (sx - x0)); // For y = 1 x = 0 dy(1-dx) + float w11 = (sy - y0) * (sx - x0); // For y = 1 x = 1 + + int r_idx = 0 * r_height * r_width + y * r_width + x; + int g_idx = 1 * r_height * r_width + y * r_width + x; + int b_idx = 2 * r_height * r_width + y * r_width + x; + + uchar3 p00 = image_data[y0 * input_width + x0]; + uchar3 p01 = image_data[y0 * input_width + x1]; + uchar3 p10 = image_data[y1 * input_width + x0]; + uchar3 p11 = image_data[y1 * input_width + x1]; + + // Red channel (uchar3.z) + output[r_idx] = w00 * p00.z + w01 * p01.z + w10 * p10.z + w11 * p11.z; + // Green channel (uchar3.y) + output[g_idx] = w00 * p00.y + w01 * p01.y + w10 * p10.y + w11 * p11.y; + // Blue channel (uchar3.x) + output[b_idx] = w00 * p00.x + w01 * p01.x + w10 * p10.x + w11 * p11.x; +} + +extern "C" void launchGPUResizeAndBlobFromImage(uchar3* image_data, float* output, int r_width, int r_height, + int input_width, int input_height, float w_ratio, float h_ratio, cudaStream_t stream) { + dim3 block(16, 16); + dim3 grid((r_width + block.x - 1) / block.x, + (r_height + block.y - 1) / block.y); + gpuResizeAndBlobFromImage<<>>(image_data, output, r_width, r_height, input_width, input_height, w_ratio, h_ratio); +} \ No newline at end of file diff --git a/yolox_ros_cpp/yolox_cpp/src/yolox_onnxruntime.cpp b/yolox_ros_cpp/yolox_cpp/src/yolox_onnxruntime.cpp index d3f97a0..430e837 100644 --- a/yolox_ros_cpp/yolox_cpp/src/yolox_onnxruntime.cpp +++ b/yolox_ros_cpp/yolox_cpp/src/yolox_onnxruntime.cpp @@ -1,150 +1,147 @@ #include "yolox_cpp/yolox_onnxruntime.hpp" -namespace yolox_cpp{ - - YoloXONNXRuntime::YoloXONNXRuntime(const file_name_t &path_to_model, - int intra_op_num_threads, int inter_op_num_threads, - bool use_cuda, int device_id, bool use_parallel, - float nms_th, float conf_th, const std::string &model_version, - int num_classes, bool p6) - :AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), - intra_op_num_threads_(intra_op_num_threads), inter_op_num_threads_(inter_op_num_threads), - use_cuda_(use_cuda), device_id_(device_id), use_parallel_(use_parallel) - { - try - { - Ort::SessionOptions session_options; - - session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); - if(this->use_parallel_) - { - session_options.SetExecutionMode(ExecutionMode::ORT_PARALLEL); - session_options.SetInterOpNumThreads(this->inter_op_num_threads_); - } - else - { - session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); - } - session_options.SetIntraOpNumThreads(this->intra_op_num_threads_); - - if(this->use_cuda_) - { - OrtCUDAProviderOptions cuda_option; - cuda_option.device_id = this->device_id_; - session_options.AppendExecutionProvider_CUDA(cuda_option); - } - - this->session_ = Ort::Session(this->env_, - path_to_model.c_str(), - session_options); - } - catch (std::exception &e) - { - std::cerr << e.what() << std::endl; - throw e; +namespace yolox_cpp { + +YoloXONNXRuntime::YoloXONNXRuntime(const file_name_t &path_to_model, + int intra_op_num_threads, + int inter_op_num_threads, bool use_cuda, + int device_id, bool use_parallel, + float nms_th, float conf_th, + const std::string &model_version, + int num_classes, bool p6) + : AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), + intra_op_num_threads_(intra_op_num_threads), + inter_op_num_threads_(inter_op_num_threads), use_cuda_(use_cuda), + device_id_(device_id), use_parallel_(use_parallel) { + try { + Ort::SessionOptions session_options; + + session_options.SetGraphOptimizationLevel( + GraphOptimizationLevel::ORT_ENABLE_ALL); + if (this->use_parallel_) { + session_options.SetExecutionMode(ExecutionMode::ORT_PARALLEL); + session_options.SetInterOpNumThreads(this->inter_op_num_threads_); + } else { + session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); } + session_options.SetIntraOpNumThreads(this->intra_op_num_threads_); - Ort::AllocatorWithDefaultOptions ort_alloc; - - // Allocate input memory buffer - std::cout << "input:" << std::endl; - this->input_name_ = std::string(this->session_.GetInputNameAllocated(0, ort_alloc).get()); - // this->input_name_ = this->session_.GetInputName(0, ort_alloc); - std::cout << " name: " << this->input_name_ << std::endl; - auto input_info = this->session_.GetInputTypeInfo(0); - auto input_shape_info = input_info.GetTensorTypeAndShapeInfo(); - std::vector input_shape = input_shape_info.GetShape(); - ONNXTensorElementDataType input_tensor_type = input_shape_info.GetElementType(); - this->input_h_ = input_shape[2]; - this->input_w_ = input_shape[3]; - - std::cout << " shape:" << std::endl; - for (size_t i = 0; i < input_shape.size(); i++) - { - std::cout << " - " << input_shape[i] << std::endl; - } - std::cout << " tensor_type: " << input_tensor_type << std::endl; - - size_t input_byte_count = sizeof(float) * input_shape_info.GetElementCount(); - std::unique_ptr input_buffer = std::make_unique(input_byte_count); - // auto input_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); - auto input_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); - - this->input_tensor_ = Ort::Value::CreateTensor(input_memory_info, - input_buffer.get(), input_byte_count, - input_shape.data(), input_shape.size(), - input_tensor_type); - this->input_buffer_.emplace_back(std::move(input_buffer)); - - // Allocate output memory buffer - std::cout << "outputs" << std::endl; - this->output_name_ = std::string(this->session_.GetOutputNameAllocated(0, ort_alloc).get()); - // this->output_name_ = this->session_.GetOutputName(0, ort_alloc); - std::cout << " name: " << this->output_name_ << std::endl; - - auto output_info = this->session_.GetOutputTypeInfo(0); - auto output_shape_info = output_info.GetTensorTypeAndShapeInfo(); - auto output_shape = output_shape_info.GetShape(); - auto output_tensor_type = output_shape_info.GetElementType(); - - std::cout << " shape:" << std::endl; - for (size_t i = 0; i < output_shape.size(); i++) - { - std::cout << " - " << output_shape[i] << std::endl; - } - std::cout << " tensor_type: " << output_tensor_type << std::endl; - - size_t output_byte_count = sizeof(float) * output_shape_info.GetElementCount(); - std::unique_ptr output_buffer = std::make_unique(output_byte_count); - // auto output_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); - auto output_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); - - this->output_tensor_ = Ort::Value::CreateTensor(output_memory_info, - output_buffer.get(), output_byte_count, - output_shape.data(), output_shape.size(), - output_tensor_type); - this->output_buffer_.emplace_back(std::move(output_buffer)); - - // Prepare GridAndStrides - if(this->p6_) - { - generate_grids_and_stride(this->input_w_, this->input_h_, this->strides_p6_, this->grid_strides_); - } - else - { - generate_grids_and_stride(this->input_w_, this->input_h_, this->strides_, this->grid_strides_); + if (this->use_cuda_) { + OrtCUDAProviderOptions cuda_option; + cuda_option.device_id = this->device_id_; + session_options.AppendExecutionProvider_CUDA(cuda_option); } + + this->session_ = + Ort::Session(this->env_, path_to_model.c_str(), session_options); + } catch (std::exception &e) { + std::cerr << e.what() << std::endl; + throw e; } - std::vector YoloXONNXRuntime::inference(const cv::Mat& frame) - { - // preprocess - cv::Mat pr_img = static_resize(frame); - - float *blob_data = (float *)(this->input_buffer_[0].get()); - blobFromImage(pr_img, blob_data); - - const char* input_names_[] = {this->input_name_.c_str()}; - const char* output_names_[] = {this->output_name_.c_str()}; - - // Inference - Ort::RunOptions run_options; - this->session_.Run(run_options, - input_names_, - &this->input_tensor_, 1, - output_names_, - &this->output_tensor_, 1); - - float* net_pred = (float *)this->output_buffer_[0].get(); - - // postprocess - const float scale = std::min( - static_cast(this->input_w_) / static_cast(frame.cols), - static_cast(this->input_h_) / static_cast(frame.rows) - ); - std::vector objects; - decode_outputs(net_pred, this->grid_strides_, objects, this->bbox_conf_thresh_, scale, frame.cols, frame.rows); - return objects; + Ort::AllocatorWithDefaultOptions ort_alloc; + + // Allocate input memory buffer + std::cout << "input:" << std::endl; + this->input_name_ = + std::string(this->session_.GetInputNameAllocated(0, ort_alloc).get()); + // this->input_name_ = this->session_.GetInputName(0, ort_alloc); + std::cout << " name: " << this->input_name_ << std::endl; + auto input_info = this->session_.GetInputTypeInfo(0); + auto input_shape_info = input_info.GetTensorTypeAndShapeInfo(); + std::vector input_shape = input_shape_info.GetShape(); + ONNXTensorElementDataType input_tensor_type = + input_shape_info.GetElementType(); + this->input_h_ = input_shape[2]; + this->input_w_ = input_shape[3]; + + std::cout << " shape:" << std::endl; + for (size_t i = 0; i < input_shape.size(); i++) { + std::cout << " - " << input_shape[i] << std::endl; + } + std::cout << " tensor_type: " << input_tensor_type << std::endl; + + size_t input_byte_count = + sizeof(float) * input_shape_info.GetElementCount(); + std::unique_ptr input_buffer = + std::make_unique(input_byte_count); + // auto input_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, + // OrtMemTypeDefault); + auto input_memory_info = + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); + + this->input_tensor_ = Ort::Value::CreateTensor( + input_memory_info, input_buffer.get(), input_byte_count, + input_shape.data(), input_shape.size(), input_tensor_type); + this->input_buffer_.emplace_back(std::move(input_buffer)); + + // Allocate output memory buffer + std::cout << "outputs" << std::endl; + this->output_name_ = + std::string(this->session_.GetOutputNameAllocated(0, ort_alloc).get()); + // this->output_name_ = this->session_.GetOutputName(0, ort_alloc); + std::cout << " name: " << this->output_name_ << std::endl; + + auto output_info = this->session_.GetOutputTypeInfo(0); + auto output_shape_info = output_info.GetTensorTypeAndShapeInfo(); + auto output_shape = output_shape_info.GetShape(); + auto output_tensor_type = output_shape_info.GetElementType(); + + std::cout << " shape:" << std::endl; + for (size_t i = 0; i < output_shape.size(); i++) { + std::cout << " - " << output_shape[i] << std::endl; + } + std::cout << " tensor_type: " << output_tensor_type << std::endl; + + size_t output_byte_count = + sizeof(float) * output_shape_info.GetElementCount(); + std::unique_ptr output_buffer = + std::make_unique(output_byte_count); + // auto output_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, + // OrtMemTypeDefault); + auto output_memory_info = + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); + + this->output_tensor_ = Ort::Value::CreateTensor( + output_memory_info, output_buffer.get(), output_byte_count, + output_shape.data(), output_shape.size(), output_tensor_type); + this->output_buffer_.emplace_back(std::move(output_buffer)); + + // Prepare GridAndStrides + if (this->p6_) { + generate_grids_and_stride(this->input_w_, this->input_h_, + this->strides_p6_, this->grid_strides_); + } else { + generate_grids_and_stride(this->input_w_, this->input_h_, + this->strides_, this->grid_strides_); } +} + +std::vector YoloXONNXRuntime::inference(const cv::Mat &frame) { + // preprocess + cv::Mat pr_img = static_resize(frame); + + float *blob_data = (float *)(this->input_buffer_[0].get()); + blobFromImage(pr_img, blob_data); + + const char *input_names_[] = {this->input_name_.c_str()}; + const char *output_names_[] = {this->output_name_.c_str()}; + // Inference + Ort::RunOptions run_options; + this->session_.Run(run_options, input_names_, &this->input_tensor_, 1, + output_names_, &this->output_tensor_, 1); + + float *net_pred = (float *)this->output_buffer_[0].get(); + + // postprocess + const float scale = std::min( + static_cast(this->input_w_) / static_cast(frame.cols), + static_cast(this->input_h_) / static_cast(frame.rows)); + std::vector objects; + decode_outputs(net_pred, this->grid_strides_, objects, + this->bbox_conf_thresh_, scale, frame.cols, frame.rows); + return objects; } + +} // namespace yolox_cpp diff --git a/yolox_ros_cpp/yolox_cpp/src/yolox_openvino.cpp b/yolox_ros_cpp/yolox_cpp/src/yolox_openvino.cpp index a6c9df1..883d182 100644 --- a/yolox_ros_cpp/yolox_cpp/src/yolox_openvino.cpp +++ b/yolox_ros_cpp/yolox_cpp/src/yolox_openvino.cpp @@ -1,84 +1,84 @@ #include "yolox_cpp/yolox_openvino.hpp" -namespace yolox_cpp{ - YoloXOpenVINO::YoloXOpenVINO(const file_name_t &path_to_model, std::string device_name, - float nms_th, float conf_th, const std::string &model_version, - int num_classes, bool p6) - :AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), - device_name_(device_name) - { - // Step 1. Initialize inference engine core - std::cout << "Initialize Inference engine core" << std::endl; - ov::Core ie; +namespace yolox_cpp { +YoloXOpenVINO::YoloXOpenVINO(const file_name_t &path_to_model, + std::string device_name, float nms_th, + float conf_th, const std::string &model_version, + int num_classes, bool p6) + : AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), + device_name_(device_name) { + // Step 1. Initialize inference engine core + std::cout << "Initialize Inference engine core" << std::endl; + ov::Core ie; - // Step 2. Read a model in OpenVINO Intermediate Representation (.xml and - // .bin files) or ONNX (.onnx file) format - std::cout << "Read a model in OpenVINO Intermediate Representation: " << path_to_model << std::endl; - const auto network = ie.read_model(path_to_model); + // Step 2. Read a model in OpenVINO Intermediate Representation (.xml and + // .bin files) or ONNX (.onnx file) format + std::cout << "Read a model in OpenVINO Intermediate Representation: " + << path_to_model << std::endl; + const auto network = ie.read_model(path_to_model); - // Step 3. Loading a model to the device - std::vector available_devices = ie.get_available_devices(); - std::cout << "======= AVAILABLE DEVICES FOR OPENVINO =======" << std::endl; - for (auto device : available_devices) { - std::cout << "- " << device << std::endl; - } - std::cout << "==============================================" << std::endl; - std::cout << "Loading a model to the device: " << device_name_ << std::endl; - auto compiled_model = ie.compile_model(network, device_name); + // Step 3. Loading a model to the device + std::vector available_devices = ie.get_available_devices(); + std::cout << "======= AVAILABLE DEVICES FOR OPENVINO =======" << std::endl; + for (auto device : available_devices) { + std::cout << "- " << device << std::endl; + } + std::cout << "==============================================" << std::endl; + std::cout << "Loading a model to the device: " << device_name_ << std::endl; + auto compiled_model = ie.compile_model(network, device_name); - // Step 4. Create an infer request - std::cout << "Create an infer request" << std::endl; - this->infer_request_ = compiled_model.create_infer_request(); + // Step 4. Create an infer request + std::cout << "Create an infer request" << std::endl; + this->infer_request_ = compiled_model.create_infer_request(); - // Step 5. Configure input & output - std::cout << "Configuring input and output blobs" << std::endl; - this->input_shape_ = compiled_model.input(0).get_shape(); - /* Mark input as resizable by setting of a resize algorithm. - * In this case we will be able to set an input blob of any shape to an - * infer request. Resize and layout conversions are executed automatically - * during inference */ - this->blob_.resize( - this->input_shape_.at(0) * this->input_shape_.at(1) * - this->input_shape_.at(2) * this->input_shape_.at(3)); - this->input_h_ = this->input_shape_.at(2); - this->input_w_ = this->input_shape_.at(3); - std::cout << "INPUT_HEIGHT: " << this->input_h_ << std::endl; - std::cout << "INPUT_WIDTH: " << this->input_w_ << std::endl; + // Step 5. Configure input & output + std::cout << "Configuring input and output blobs" << std::endl; + this->input_shape_ = compiled_model.input(0).get_shape(); + /* Mark input as resizable by setting of a resize algorithm. + * In this case we will be able to set an input blob of any shape to an + * infer request. Resize and layout conversions are executed automatically + * during inference */ + this->blob_.resize(this->input_shape_.at(0) * this->input_shape_.at(1) * + this->input_shape_.at(2) * this->input_shape_.at(3)); + this->input_h_ = this->input_shape_.at(2); + this->input_w_ = this->input_shape_.at(3); + std::cout << "INPUT_HEIGHT: " << this->input_h_ << std::endl; + std::cout << "INPUT_WIDTH: " << this->input_w_ << std::endl; - // Prepare GridAndStrides - if(this->p6_) - { - generate_grids_and_stride(this->input_w_, this->input_h_, this->strides_p6_, this->grid_strides_); - } - else - { - generate_grids_and_stride(this->input_w_, this->input_h_, this->strides_, this->grid_strides_); - } + // Prepare GridAndStrides + if (this->p6_) { + generate_grids_and_stride(this->input_w_, this->input_h_, + this->strides_p6_, this->grid_strides_); + } else { + generate_grids_and_stride(this->input_w_, this->input_h_, + this->strides_, this->grid_strides_); } +} - std::vector YoloXOpenVINO::inference(const cv::Mat& frame) - { - // preprocess - cv::Mat pr_img = static_resize(frame); - // locked memory holder should be alive all time while access to its buffer happens - blobFromImage(pr_img, this->blob_.data()); +std::vector YoloXOpenVINO::inference(const cv::Mat &frame) { + // preprocess + cv::Mat pr_img = static_resize(frame); + // locked memory holder should be alive all time while access to its buffer + // happens + blobFromImage(pr_img, this->blob_.data()); - // do inference - /* Running the request synchronously */ - this->infer_request_.set_input_tensor( - ov::Tensor{ov::element::f32, this->input_shape_, reinterpret_cast(this->blob_.data())}); - infer_request_.infer(); + // do inference + /* Running the request synchronously */ + this->infer_request_.set_input_tensor( + ov::Tensor{ov::element::f32, this->input_shape_, + reinterpret_cast(this->blob_.data())}); + infer_request_.infer(); - const auto &output_tensor = this->infer_request_.get_output_tensor(); - const float* net_pred = reinterpret_cast(output_tensor.data()); + const auto &output_tensor = this->infer_request_.get_output_tensor(); + const float *net_pred = reinterpret_cast(output_tensor.data()); - const float scale = std::min( - static_cast(this->input_w_) / static_cast(frame.cols), - static_cast(this->input_h_) / static_cast(frame.rows) - ); + const float scale = std::min( + static_cast(this->input_w_) / static_cast(frame.cols), + static_cast(this->input_h_) / static_cast(frame.rows)); - std::vector objects; - decode_outputs(net_pred, this->grid_strides_, objects, this->bbox_conf_thresh_, scale, frame.cols, frame.rows); - return objects; - } + std::vector objects; + decode_outputs(net_pred, this->grid_strides_, objects, + this->bbox_conf_thresh_, scale, frame.cols, frame.rows); + return objects; } +} // namespace yolox_cpp diff --git a/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp b/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp index f5c73ad..8fa5dc5 100644 --- a/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp +++ b/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp @@ -1,149 +1,174 @@ #include "yolox_cpp/yolox_tensorrt.hpp" - -namespace yolox_cpp -{ - - YoloXTensorRT::YoloXTensorRT(const file_name_t &path_to_engine, int device, - float nms_th, float conf_th, const std::string &model_version, - int num_classes, bool p6) - : AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), - DEVICE_(device) - { - cudaSetDevice(this->DEVICE_); - // create a model using the API directly and serialize it to a stream - std::vector trtModelStream; - size_t size{0}; - - std::ifstream file(path_to_engine, std::ios::binary); - if (file.good()) - { - file.seekg(0, file.end); - size = file.tellg(); - file.seekg(0, file.beg); - trtModelStream.resize(size); - file.read(trtModelStream.data(), size); - file.close(); - } - else - { - std::string msg = "invalid arguments path_to_engine: "; - msg += path_to_engine; - throw std::runtime_error(msg.c_str()); - } - - this->runtime_ = std::unique_ptr(createInferRuntime(this->gLogger_)); - assert(this->runtime_ != nullptr); - this->engine_ = std::unique_ptr(this->runtime_->deserializeCudaEngine(trtModelStream.data(), size)); - assert(this->engine_ != nullptr); - this->context_ = std::unique_ptr(this->engine_->createExecutionContext()); - assert(this->context_ != nullptr); - - const auto input_name = this->engine_->getIOTensorName(this->inputIndex_); - const auto input_dims = this->engine_->getTensorShape(input_name); - this->input_h_ = input_dims.d[2]; - this->input_w_ = input_dims.d[3]; - std::cout << "INPUT_HEIGHT: " << this->input_h_ << std::endl; - std::cout << "INPUT_WIDTH: " << this->input_w_ << std::endl; - - const auto output_name = this->engine_->getIOTensorName(this->outputIndex_); - auto output_dims = this->engine_->getTensorShape(output_name); - this->output_size_ = 1; - for (int j = 0; j < output_dims.nbDims; ++j) - { - this->output_size_ *= output_dims.d[j]; - } - - // allocate buffer - this->input_blob_.resize(this->input_h_ * this->input_w_ * 3); - this->output_blob_.resize(this->output_size_); - - // Pointers to input and output device buffers to pass to engine. - // Engine requires exactly IEngine::getNbBindings() number of buffers. - assert(this->engine_->getNbIOTensors() == 2); - // In order to bind the buffers, we need to know the names of the input and output tensors. - // Note that indices are guaranteed to be less than IEngine::getNbBindings() - assert(this->engine_->getTensorDataType(input_name) == nvinfer1::DataType::kFLOAT); - assert(this->engine_->getTensorDataType(output_name) == nvinfer1::DataType::kFLOAT); - - // Create GPU buffers on device - CHECK(cudaMalloc(&this->inference_buffers_[this->inputIndex_], 3 * this->input_h_ * this->input_w_ * sizeof(float))); - CHECK(cudaMalloc(&this->inference_buffers_[this->outputIndex_], this->output_size_ * sizeof(float))); - - assert(this->context_->setInputShape(input_name, input_dims)); - assert(this->context_->allInputDimensionsSpecified()); - - assert(this->context_->setInputTensorAddress(input_name, this->inference_buffers_[this->inputIndex_])); - assert(this->context_->setOutputTensorAddress(output_name, this->inference_buffers_[this->outputIndex_])); - - // Prepare GridAndStrides - if (this->p6_) - { - generate_grids_and_stride(this->input_w_, this->input_h_, this->strides_p6_, this->grid_strides_); - } - else - { - generate_grids_and_stride(this->input_w_, this->input_h_, this->strides_, this->grid_strides_); - } - } - - YoloXTensorRT::~YoloXTensorRT() - { - CHECK(cudaFree(inference_buffers_[this->inputIndex_])); - CHECK(cudaFree(inference_buffers_[this->outputIndex_])); +#include + +extern "C" void launchGPUResizeAndBlobFromImage(uchar3 *image_data, + float *output, int r_width, + int r_height, int input_width, + int input_height, float w_ratio, + float h_ratio, + cudaStream_t stream); + +namespace yolox_cpp { + +YoloXTensorRT::YoloXTensorRT(const file_name_t &path_to_engine, int device, + float nms_th, float conf_th, + const std::string &model_version, int num_classes, + bool p6) + : AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), + DEVICE_(device) { + cudaSetDevice(this->DEVICE_); + // create a model using the API directly and serialize it to a stream + std::vector trtModelStream; + size_t size{0}; + + std::ifstream file(path_to_engine, std::ios::binary); + if (file.good()) { + file.seekg(0, file.end); + size = file.tellg(); + file.seekg(0, file.beg); + trtModelStream.resize(size); + file.read(trtModelStream.data(), size); + file.close(); + } else { + std::string msg = "invalid arguments path_to_engine: "; + msg += path_to_engine; + throw std::runtime_error(msg.c_str()); } - std::vector YoloXTensorRT::inference(const cv::Mat &frame) - { - // preprocess - auto pr_img = static_resize(frame); - blobFromImage(pr_img, input_blob_.data()); - - // inference - this->doInference(input_blob_.data(), output_blob_.data()); - - // postprocess - const float scale = std::min( - static_cast(this->input_w_) / static_cast(frame.cols), - static_cast(this->input_h_) / static_cast(frame.rows) - ); - - std::vector objects; - decode_outputs( - output_blob_.data(), this->grid_strides_, objects, - this->bbox_conf_thresh_, scale, frame.cols, frame.rows); - - return objects; + this->runtime_ = + std::unique_ptr(createInferRuntime(this->gLogger_)); + assert(this->runtime_ != nullptr); + this->engine_ = std::unique_ptr( + this->runtime_->deserializeCudaEngine(trtModelStream.data(), size)); + assert(this->engine_ != nullptr); + this->context_ = std::unique_ptr( + this->engine_->createExecutionContext()); + assert(this->context_ != nullptr); + + const auto input_name = this->engine_->getIOTensorName(this->inputIndex_); + const auto input_dims = this->engine_->getTensorShape(input_name); + this->input_h_ = input_dims.d[2]; + this->input_w_ = input_dims.d[3]; + std::cout << "INPUT_HEIGHT: " << this->input_h_ << std::endl; + std::cout << "INPUT_WIDTH: " << this->input_w_ << std::endl; + + const auto output_name = this->engine_->getIOTensorName(this->outputIndex_); + auto output_dims = this->engine_->getTensorShape(output_name); + this->output_size_ = 1; + for (int j = 0; j < output_dims.nbDims; ++j) { + this->output_size_ *= output_dims.d[j]; } - void YoloXTensorRT::doInference(const float *input, float *output) - { - // Create stream - cudaStream_t stream; - CHECK(cudaStreamCreate(&stream)); - - // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host - CHECK( - cudaMemcpyAsync( - this->inference_buffers_[this->inputIndex_], - input, - 3 * this->input_h_ * this->input_w_ * sizeof(float), - cudaMemcpyHostToDevice, stream)); - - bool success = context_->executeV2(this->inference_buffers_); - if (!success) - throw std::runtime_error("failed inference"); - - CHECK( - cudaMemcpyAsync( - output, - this->inference_buffers_[this->outputIndex_], - this->output_size_ * sizeof(float), - cudaMemcpyDeviceToHost, stream)); - - CHECK(cudaStreamSynchronize(stream)); - - // Release stream - CHECK(cudaStreamDestroy(stream)); + // allocate buffer + this->input_blob_.resize(this->input_h_ * this->input_w_ * 3); + this->output_blob_.resize(this->output_size_); + + // Pointers to input and output device buffers to pass to engine. + // Engine requires exactly IEngine::getNbBindings() number of buffers. + assert(this->engine_->getNbIOTensors() == 2); + // In order to bind the buffers, we need to know the names of the input and + // output tensors. Note that indices are guaranteed to be less than + // IEngine::getNbBindings() + assert(this->engine_->getTensorDataType(input_name) == + nvinfer1::DataType::kFLOAT); + assert(this->engine_->getTensorDataType(output_name) == + nvinfer1::DataType::kFLOAT); + + // Create GPU buffers on device + CHECK(cudaMalloc(&this->inference_buffers_[this->inputIndex_], + 3 * this->input_h_ * this->input_w_ * sizeof(float))); + CHECK(cudaMalloc(&this->inference_buffers_[this->outputIndex_], + this->output_size_ * sizeof(float))); + + assert(this->context_->setInputShape(input_name, input_dims)); + assert(this->context_->allInputDimensionsSpecified()); + + assert(this->context_->setInputTensorAddress( + input_name, this->inference_buffers_[this->inputIndex_])); + assert(this->context_->setOutputTensorAddress( + output_name, this->inference_buffers_[this->outputIndex_])); + + // Prepare GridAndStrides + if (this->p6_) { + generate_grids_and_stride(this->input_w_, this->input_h_, + this->strides_p6_, this->grid_strides_); + } else { + generate_grids_and_stride(this->input_w_, this->input_h_, + this->strides_, this->grid_strides_); } +} + +YoloXTensorRT::~YoloXTensorRT() { + CHECK(cudaFree(inference_buffers_[this->inputIndex_])); + CHECK(cudaFree(inference_buffers_[this->outputIndex_])); +} + +std::vector +YoloXTensorRT::inference(const cv::Mat &frame, uchar3 *d_image, float *d_output, + cudaStream_t copy_stream_, + cudaStream_t resize_stream_) { + auto t0 = std::chrono::high_resolution_clock::now(); + + cudaMemcpy2DAsync(d_image, frame.cols * sizeof(uchar3), frame.data, + frame.step, frame.cols * sizeof(uchar3), frame.rows, + cudaMemcpyHostToDevice, copy_stream_); + + auto t1 = std::chrono::high_resolution_clock::now(); + auto elapsed_us = + std::chrono::duration_cast(t1 - t0).count(); + printf("Copy Time: %5ld us\n", elapsed_us); + + float w_r = static_cast(frame.cols) / this->input_w_; + float h_r = static_cast(frame.rows) / this->input_h_; + + t0 = std::chrono::high_resolution_clock::now(); + launchGPUResizeAndBlobFromImage(d_image, d_output, this->input_w_, + this->input_h_, frame.cols, frame.rows, w_r, + h_r, resize_stream_); + cudaStreamSynchronize(resize_stream_); + t1 = std::chrono::high_resolution_clock::now(); + elapsed_us = + std::chrono::duration_cast(t1 - t0).count(); + printf("resize time on GPU: %5ld us\n", elapsed_us); + + t0 = std::chrono::high_resolution_clock::now(); + this->doInference(d_output, output_blob_.data(), copy_stream_); + t1 = std::chrono::high_resolution_clock::now(); + elapsed_us = + std::chrono::duration_cast(t1 - t0).count(); + printf("Inference time on GPU: %5ld us\n", elapsed_us); + + const float scale = + std::min(static_cast(this->input_w_) / frame.cols, + static_cast(this->input_h_) / frame.rows); + + std::vector objects; + t0 = std::chrono::high_resolution_clock::now(); + decode_outputs(output_blob_.data(), this->grid_strides_, objects, + this->bbox_conf_thresh_, scale, frame.cols, frame.rows); + t1 = std::chrono::high_resolution_clock::now(); + elapsed_us = + std::chrono::duration_cast(t1 - t0).count(); + printf("Post time: %5ld us\n", elapsed_us); + + return objects; +} + +void YoloXTensorRT::doInference(const float *input, float *output, + cudaStream_t copy_stream_) { + // DMA input batch data to device, infer on the batch asynchronously, and + // DMA output back to host Change to Device to Device copy + this->inference_buffers_[this->inputIndex_] = const_cast(input); + + bool success = context_->executeV2(this->inference_buffers_); + if (!success) + throw std::runtime_error("failed inference"); + + CHECK(cudaMemcpyAsync(output, this->inference_buffers_[this->outputIndex_], + this->output_size_ * sizeof(float), + cudaMemcpyDeviceToHost, copy_stream_)); + + CHECK(cudaStreamSynchronize(copy_stream_)); +} } // namespace yolox_cpp diff --git a/yolox_ros_cpp/yolox_cpp/src/yolox_tflite.cpp b/yolox_ros_cpp/yolox_cpp/src/yolox_tflite.cpp index 5182150..c03f4ed 100644 --- a/yolox_ros_cpp/yolox_cpp/src/yolox_tflite.cpp +++ b/yolox_ros_cpp/yolox_cpp/src/yolox_tflite.cpp @@ -1,182 +1,162 @@ #include "yolox_cpp/yolox_tflite.hpp" -namespace yolox_cpp -{ - - YoloXTflite::YoloXTflite(const file_name_t &path_to_model, int num_threads, - float nms_th, float conf_th, const std::string &model_version, - int num_classes, bool p6, bool is_nchw) - : AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), is_nchw_(is_nchw) - { - TfLiteStatus status; - this->model_ = tflite::FlatBufferModel::BuildFromFile(path_to_model.c_str()); - TFLITE_MINIMAL_CHECK(model_); - - this->resolver_ = std::make_unique(); - this->interpreter_ = std::make_unique(); +namespace yolox_cpp { + +YoloXTflite::YoloXTflite(const file_name_t &path_to_model, int num_threads, + float nms_th, float conf_th, + const std::string &model_version, int num_classes, + bool p6, bool is_nchw) + : AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), + is_nchw_(is_nchw) { + TfLiteStatus status; + this->model_ = + tflite::FlatBufferModel::BuildFromFile(path_to_model.c_str()); + TFLITE_MINIMAL_CHECK(model_); + + this->resolver_ = + std::make_unique(); + this->interpreter_ = std::make_unique(); + + tflite::InterpreterBuilder builder(*model_, *this->resolver_); + builder(&this->interpreter_); + TFLITE_MINIMAL_CHECK(this->interpreter_ != nullptr); + + TFLITE_MINIMAL_CHECK(this->interpreter_->AllocateTensors() == kTfLiteOk); + // tflite::PrintInterpreterState(this->interpreter_.get()); + + status = this->interpreter_->SetNumThreads(num_threads); + if (status != TfLiteStatus::kTfLiteOk) { + std::string msg = "Failed to SetNumThreads."; + throw std::runtime_error(msg.c_str()); + } - tflite::InterpreterBuilder builder(*model_, *this->resolver_); - builder(&this->interpreter_); - TFLITE_MINIMAL_CHECK(this->interpreter_ != nullptr); + // XNNPACK Delegate + auto xnnpack_options = TfLiteXNNPackDelegateOptionsDefault(); + xnnpack_options.num_threads = num_threads; + this->delegate_ = TfLiteXNNPackDelegateCreate(&xnnpack_options); + status = this->interpreter_->ModifyGraphWithDelegate(this->delegate_); + if (status != TfLiteStatus::kTfLiteOk) { + std::string msg = "Failed to ModifyGraphWithDelegate."; + throw std::runtime_error(msg.c_str()); + } - TFLITE_MINIMAL_CHECK(this->interpreter_->AllocateTensors() == kTfLiteOk); - // tflite::PrintInterpreterState(this->interpreter_.get()); + // // GPU Delegate + // auto gpu_options = TfLiteGpuDelegateOptionsV2Default(); + // gpu_options.inference_preference = + // TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED; + // gpu_options.inference_priority1 = + // TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; this->delegate_ = + // TfLiteGpuDelegateV2Create(&gpu_options); status = + // this->interpreter_->ModifyGraphWithDelegate(this->delegate_); if (status + // != TfLiteStatus::kTfLiteOk) + // { + // std::cerr << "Failed to ModifyGraphWithDelegate." << std::endl; + // exit(1); + // } + + // // NNAPI Delegate + // tflite::StatefulNnApiDelegate::Options nnapi_options; + // nnapi_options.execution_preference = + // tflite::StatefulNnApiDelegate::Options::kSustainedSpeed; + // nnapi_options.allow_fp16 = true; + // nnapi_options.disallow_nnapi_cpu = true; + // this->delegate_ = new tflite::StatefulNnApiDelegate(nnapi_options); + // status = this->interpreter_->ModifyGraphWithDelegate(this->delegate_); + // if (status != TfLiteStatus::kTfLiteOk) + // { + // std::cerr << "Failed to ModifyGraphWithDelegate." << std::endl; + // exit(1); + // } + + if (this->interpreter_->AllocateTensors() != TfLiteStatus::kTfLiteOk) { + std::string msg = "Failed to allocate tensors."; + throw std::runtime_error(msg.c_str()); + } - status = this->interpreter_->SetNumThreads(num_threads); - if (status != TfLiteStatus::kTfLiteOk) - { - std::string msg = "Failed to SetNumThreads."; - throw std::runtime_error(msg.c_str()); + { + TfLiteTensor *tensor = this->interpreter_->input_tensor(0); + std::cout << "input:" << std::endl; + std::cout << " name: " << tensor->name << std::endl; + if (this->is_nchw_ == true) { + // NCHW + this->input_h_ = tensor->dims->data[2]; + this->input_w_ = tensor->dims->data[3]; + } else { + // NHWC + this->input_h_ = tensor->dims->data[1]; + this->input_w_ = tensor->dims->data[2]; } - // XNNPACK Delegate - auto xnnpack_options = TfLiteXNNPackDelegateOptionsDefault(); - xnnpack_options.num_threads = num_threads; - this->delegate_ = TfLiteXNNPackDelegateCreate(&xnnpack_options); - status = this->interpreter_->ModifyGraphWithDelegate(this->delegate_); - if (status != TfLiteStatus::kTfLiteOk) - { - std::string msg = "Failed to ModifyGraphWithDelegate."; - throw std::runtime_error(msg.c_str()); + std::cout << " shape:" << std::endl; + if (tensor->type == kTfLiteUInt8) { + this->input_size_ = sizeof(uint8_t); + } else { + this->input_size_ = sizeof(float); } - - // // GPU Delegate - // auto gpu_options = TfLiteGpuDelegateOptionsV2Default(); - // gpu_options.inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED; - // gpu_options.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; - // this->delegate_ = TfLiteGpuDelegateV2Create(&gpu_options); - // status = this->interpreter_->ModifyGraphWithDelegate(this->delegate_); - // if (status != TfLiteStatus::kTfLiteOk) - // { - // std::cerr << "Failed to ModifyGraphWithDelegate." << std::endl; - // exit(1); - // } - - // // NNAPI Delegate - // tflite::StatefulNnApiDelegate::Options nnapi_options; - // nnapi_options.execution_preference = tflite::StatefulNnApiDelegate::Options::kSustainedSpeed; - // nnapi_options.allow_fp16 = true; - // nnapi_options.disallow_nnapi_cpu = true; - // this->delegate_ = new tflite::StatefulNnApiDelegate(nnapi_options); - // status = this->interpreter_->ModifyGraphWithDelegate(this->delegate_); - // if (status != TfLiteStatus::kTfLiteOk) - // { - // std::cerr << "Failed to ModifyGraphWithDelegate." << std::endl; - // exit(1); - // } - - if (this->interpreter_->AllocateTensors() != TfLiteStatus::kTfLiteOk) - { - std::string msg = "Failed to allocate tensors."; - throw std::runtime_error(msg.c_str()); + for (int i = 0; i < tensor->dims->size; i++) { + this->input_size_ *= tensor->dims->data[i]; + std::cout << " - " << tensor->dims->data[i] << std::endl; } + std::cout << " input_h: " << this->input_h_ << std::endl; + std::cout << " input_w: " << this->input_w_ << std::endl; + std::cout << " tensor_type: " << tensor->type << std::endl; + } - { - TfLiteTensor *tensor = this->interpreter_->input_tensor(0); - std::cout << "input:" << std::endl; - std::cout << " name: " << tensor->name << std::endl; - if (this->is_nchw_ == true) - { - // NCHW - this->input_h_ = tensor->dims->data[2]; - this->input_w_ = tensor->dims->data[3]; - } - else - { - // NHWC - this->input_h_ = tensor->dims->data[1]; - this->input_w_ = tensor->dims->data[2]; - } - - std::cout << " shape:" << std::endl; - if (tensor->type == kTfLiteUInt8) - { - this->input_size_ = sizeof(uint8_t); - } - else - { - this->input_size_ = sizeof(float); - } - for (int i = 0; i < tensor->dims->size; i++) - { - this->input_size_ *= tensor->dims->data[i]; - std::cout << " - " << tensor->dims->data[i] << std::endl; - } - std::cout << " input_h: " << this->input_h_ << std::endl; - std::cout << " input_w: " << this->input_w_ << std::endl; - std::cout << " tensor_type: " << tensor->type << std::endl; + { + TfLiteTensor *tensor = this->interpreter_->output_tensor(0); + std::cout << "output:" << std::endl; + std::cout << " name: " << tensor->name << std::endl; + std::cout << " shape:" << std::endl; + if (tensor->type == kTfLiteUInt8) { + this->output_size_ = sizeof(uint8_t); + } else { + this->output_size_ = sizeof(float); } - - { - TfLiteTensor *tensor = this->interpreter_->output_tensor(0); - std::cout << "output:" << std::endl; - std::cout << " name: " << tensor->name << std::endl; - std::cout << " shape:" << std::endl; - if (tensor->type == kTfLiteUInt8) - { - this->output_size_ = sizeof(uint8_t); - } - else - { - this->output_size_ = sizeof(float); - } - for (int i = 0; i < tensor->dims->size; i++) - { - this->output_size_ *= tensor->dims->data[i]; - std::cout << " - " << tensor->dims->data[i] << std::endl; - } - std::cout << " tensor_type: " << tensor->type << std::endl; + for (int i = 0; i < tensor->dims->size; i++) { + this->output_size_ *= tensor->dims->data[i]; + std::cout << " - " << tensor->dims->data[i] << std::endl; } + std::cout << " tensor_type: " << tensor->type << std::endl; + } - // Prepare GridAndStrides - if(this->p6_) - { - generate_grids_and_stride(this->input_w_, this->input_h_, this->strides_p6_, this->grid_strides_); - } - else - { - generate_grids_and_stride(this->input_w_, this->input_h_, this->strides_, this->grid_strides_); - } + // Prepare GridAndStrides + if (this->p6_) { + generate_grids_and_stride(this->input_w_, this->input_h_, + this->strides_p6_, this->grid_strides_); + } else { + generate_grids_and_stride(this->input_w_, this->input_h_, + this->strides_, this->grid_strides_); } - YoloXTflite::~YoloXTflite() - { - TfLiteXNNPackDelegateDelete(this->delegate_); +} +YoloXTflite::~YoloXTflite() { TfLiteXNNPackDelegateDelete(this->delegate_); } +std::vector YoloXTflite::inference(const cv::Mat &frame) { + // preprocess + cv::Mat pr_img = static_resize(frame); + + float *input_blob = this->interpreter_->typed_input_tensor(0); + if (this->is_nchw_ == true) { + blobFromImage(pr_img, input_blob); + } else { + blobFromImage_nhwc(pr_img, input_blob); } - std::vector YoloXTflite::inference(const cv::Mat &frame) - { - // preprocess - cv::Mat pr_img = static_resize(frame); - float *input_blob = this->interpreter_->typed_input_tensor(0); - if (this->is_nchw_ == true) - { - blobFromImage(pr_img, input_blob); - } - else - { - blobFromImage_nhwc(pr_img, input_blob); - } - - // inference - TfLiteStatus ret = this->interpreter_->Invoke(); - if (ret != TfLiteStatus::kTfLiteOk) - { - std::cerr << "Failed to invoke." << std::endl; - return std::vector(); - } - - // postprocess - const float scale = std::min( - static_cast(this->input_w_) / static_cast(frame.cols), - static_cast(this->input_h_) / static_cast(frame.rows) - ); - std::vector objects; - decode_outputs( - this->interpreter_->typed_output_tensor(0), - this->grid_strides_, objects, - this->bbox_conf_thresh_, scale, frame.cols, frame.rows); - - return objects; + // inference + TfLiteStatus ret = this->interpreter_->Invoke(); + if (ret != TfLiteStatus::kTfLiteOk) { + std::cerr << "Failed to invoke." << std::endl; + return std::vector(); } + // postprocess + const float scale = std::min( + static_cast(this->input_w_) / static_cast(frame.cols), + static_cast(this->input_h_) / static_cast(frame.rows)); + std::vector objects; + decode_outputs(this->interpreter_->typed_output_tensor(0), + this->grid_strides_, objects, this->bbox_conf_thresh_, scale, + frame.cols, frame.rows); + + return objects; +} + } // namespace yolox_cpp diff --git a/yolox_ros_cpp/yolox_param/CMakeLists.txt b/yolox_ros_cpp/yolox_param/CMakeLists.txt index 59aea79..3689f2a 100644 --- a/yolox_ros_cpp/yolox_param/CMakeLists.txt +++ b/yolox_ros_cpp/yolox_param/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.8) +cmake_minimum_required(VERSION 3.14) project(yolox_param) if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") diff --git a/yolox_ros_cpp/yolox_ros_cpp/CMakeLists.txt b/yolox_ros_cpp/yolox_ros_cpp/CMakeLists.txt index d087feb..c51d79c 100644 --- a/yolox_ros_cpp/yolox_ros_cpp/CMakeLists.txt +++ b/yolox_ros_cpp/yolox_ros_cpp/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.8) +cmake_minimum_required(VERSION 3.14) project(yolox_ros_cpp) if(NOT CMAKE_CXX_STANDARD) @@ -7,11 +7,19 @@ if(NOT CMAKE_CXX_STANDARD) set(CMAKE_CXX_EXTENSIONS OFF) endif() +set(CMAKE_BUILD_TYPE Release) +set(CMAKE_CUDA_FLAGS_RELEASE "-O3 --use_fast_math -DNDEBUG") +set(CMAKE_CUDA_ARCHITECTURES 87) +project(yolox_ros_cpp CXX CUDA) + if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") add_compile_options(-Wall -Wextra -Wpedantic) endif() find_package(ament_cmake_auto REQUIRED) +find_package(tr_messages REQUIRED) + + ament_auto_find_build_dependencies() if(NOT yolox_cpp_FOUND) @@ -37,3 +45,5 @@ ament_auto_package( INSTALL_TO_SHARE launch ) + +ament_target_dependencies(yolox_ros_cpp tr_messages) diff --git a/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp b/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp index 4253237..80329a9 100644 --- a/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp +++ b/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp @@ -22,6 +22,8 @@ #include "yolox_cpp/utils.hpp" #include "yolox_param/yolox_param.hpp" +#include "tr_messages/msg/det_with_img.hpp" + namespace yolox_ros_cpp{ class YoloXNode : public rclcpp::Node { @@ -30,22 +32,31 @@ namespace yolox_ros_cpp{ private: void onInit(); void colorImageCallback(const sensor_msgs::msg::Image::ConstSharedPtr &); - static bboxes_ex_msgs::msg::BoundingBoxes objects_to_bboxes(const cv::Mat &, const std::vector &, const std_msgs::msg::Header &); static vision_msgs::msg::Detection2DArray objects_to_detection2d(const std::vector &, const std_msgs::msg::Header &); - protected: std::shared_ptr param_listener_; yolox_parameters::Params params_; private: + bool init; + + cudaStream_t copy_stream_; + cudaStream_t resize_stream_; + + uchar3* d_image_; + float* d_output_; + std::unique_ptr yolox_; std::vector class_names_; + rclcpp::CallbackGroup::SharedPtr callback_group_reentrant_; + std::shared_ptr sub_options_; + rclcpp::TimerBase::SharedPtr init_timer_; image_transport::Subscriber sub_image_; rclcpp::Publisher::SharedPtr pub_bboxes_; - rclcpp::Publisher::SharedPtr pub_detection2d_; + rclcpp::Publisher::SharedPtr pub_detection2d_; image_transport::Publisher pub_image_; }; } diff --git a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_onnxruntime.launch.py b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_onnxruntime.launch.py index 61d0802..6013229 100644 --- a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_onnxruntime.launch.py +++ b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_onnxruntime.launch.py @@ -19,150 +19,152 @@ from launch_ros.actions import ComposableNodeContainer from launch_ros.descriptions import ComposableNode + def generate_launch_description(): launch_args = [ DeclareLaunchArgument( - 'video_device', - default_value='/dev/video0', - description='input video source' - ), - DeclareLaunchArgument( - 'model_path', - default_value='./src/YOLOX-ROS/weights/onnx/yolox_tiny.onnx', - description='yolox model path.' - ), - DeclareLaunchArgument( - 'p6', - default_value='false', - description='with p6.' - ), - DeclareLaunchArgument( - 'class_labels_path', - default_value='', - description='if use custom model, set class name labels. ' + "video_device", + default_value="/dev/video0", + description="input video source", ), DeclareLaunchArgument( - 'num_classes', - default_value='80', - description='num classes.' + "model_path", + default_value="./src/YOLOX-ROS/weights/onnx/yolox_tiny.onnx", + description="yolox model path.", ), + DeclareLaunchArgument("p6", default_value="false", description="with p6."), DeclareLaunchArgument( - 'model_version', - default_value='0.1.1rc0', - description='yolox model version.' + "class_labels_path", + default_value="", + description="if use custom model, set class name labels. ", ), DeclareLaunchArgument( - 'onnxruntime_use_cuda', - default_value='true', - description='onnxruntime use cuda.' + "num_classes", default_value="80", description="num classes." ), DeclareLaunchArgument( - 'onnxruntime_device_id', - default_value='0', - description='onnxruntime gpu device id.' + "model_version", + default_value="0.1.1rc0", + description="yolox model version.", ), DeclareLaunchArgument( - 'onnxruntime_use_parallel', - default_value='false', - description='if use_parallel is true, you can set inter_op_num_threads.' + "onnxruntime_use_cuda", + default_value="true", + description="onnxruntime use cuda.", ), DeclareLaunchArgument( - 'onnxruntime_inter_op_num_threads', - default_value='1' + "onnxruntime_device_id", + default_value="0", + description="onnxruntime gpu device id.", ), DeclareLaunchArgument( - 'onnxruntime_intra_op_num_threads', - default_value='1', - description='ontrols the number of threads to use to run the model.' + "onnxruntime_use_parallel", + default_value="false", + description="if use_parallel is true, you can set inter_op_num_threads.", ), + DeclareLaunchArgument("onnxruntime_inter_op_num_threads", default_value="1"), DeclareLaunchArgument( - 'conf', - default_value='0.30', - description='yolox confidence threshold.' + "onnxruntime_intra_op_num_threads", + default_value="1", + description="ontrols the number of threads to use to run the model.", ), DeclareLaunchArgument( - 'nms', - default_value='0.45', - description='yolox nms threshold' + "conf", default_value="0.30", description="yolox confidence threshold." ), DeclareLaunchArgument( - 'imshow_isshow', - default_value='true', - description='' + "nms", default_value="0.45", description="yolox nms threshold" ), + DeclareLaunchArgument("imshow_isshow", default_value="true", description=""), DeclareLaunchArgument( - 'src_image_topic_name', - default_value='/image_raw', - description='topic name for source image' + "src_image_topic_name", + default_value="/image_raw", + description="topic name for source image", ), DeclareLaunchArgument( - 'publish_image_topic_name', - default_value='/yolox/image_raw', - description='topic name for publishing image with bounding box drawn' + "publish_image_topic_name", + default_value="/yolox/image_raw", + description="topic name for publishing image with bounding box drawn", ), DeclareLaunchArgument( - 'publish_boundingbox_topic_name', - default_value='/yolox/bounding_boxes', - description='topic name for publishing bounding box message.' + "publish_boundingbox_topic_name", + default_value="/yolox/bounding_boxes", + description="topic name for publishing bounding box message.", ), DeclareLaunchArgument( - 'use_bbox_ex_msgs', - default_value='false', - description='use BoundingBoxArray message type.' + "use_bbox_ex_msgs", + default_value="false", + description="use BoundingBoxArray message type.", ), DeclareLaunchArgument( - 'publish_resized_image', - default_value='false', - description='use BoundingBoxArray message type.' + "publish_resized_image", + default_value="false", + description="use BoundingBoxArray message type.", ), ] container = ComposableNodeContainer( - name='yolox_container', - namespace='', - package='rclcpp_components', - executable='component_container', + name="yolox_container", + namespace="", + package="rclcpp_components", + executable="component_container", composable_node_descriptions=[ ComposableNode( - package='usb_cam', - plugin='usb_cam::UsbCamNode', - name='usb_cam_node', - parameters=[{ - 'video_device': LaunchConfiguration('video_device'), - 'brightness': 100 - }]), + package="usb_cam", + plugin="usb_cam::UsbCamNode", + name="usb_cam_node", + parameters=[ + { + "video_device": LaunchConfiguration("video_device"), + "brightness": 100, + } + ], + ), ComposableNode( - package='yolox_ros_cpp', - plugin='yolox_ros_cpp::YoloXNode', - name='yolox_ros_cpp', - parameters=[{ - 'model_path': LaunchConfiguration('model_path'), - 'p6': LaunchConfiguration('p6'), - 'class_labels_path': LaunchConfiguration('class_labels_path'), - 'num_classes': LaunchConfiguration('num_classes'), - 'model_type': 'onnxruntime', - 'model_version': LaunchConfiguration('model_version'), - 'onnxruntime_use_cuda': LaunchConfiguration('onnxruntime_use_cuda'), - 'onnxruntime_device_id': LaunchConfiguration('onnxruntime_device_id'), - 'onnxruntime_use_parallel': LaunchConfiguration('onnxruntime_use_parallel'), - 'onnxruntime_inter_op_num_threads': LaunchConfiguration('onnxruntime_inter_op_num_threads'), - 'onnxruntime_intra_op_num_threads': LaunchConfiguration('onnxruntime_intra_op_num_threads'), - 'conf': LaunchConfiguration('conf'), - 'nms': LaunchConfiguration('nms'), - 'imshow_isshow': LaunchConfiguration('imshow_isshow'), - 'src_image_topic_name': LaunchConfiguration('src_image_topic_name'), - 'publish_image_topic_name': LaunchConfiguration('publish_image_topic_name'), - 'publish_boundingbox_topic_name': LaunchConfiguration('publish_boundingbox_topic_name'), - 'publish_resized_image': LaunchConfiguration('publish_resized_image'), - 'use_bbox_ex_msgs': LaunchConfiguration('use_bbox_ex_msgs'), - }], - ), + package="yolox_ros_cpp", + plugin="yolox_ros_cpp::YoloXNode", + name="yolox_ros_cpp", + parameters=[ + { + "model_path": LaunchConfiguration("model_path"), + "p6": LaunchConfiguration("p6"), + "class_labels_path": LaunchConfiguration("class_labels_path"), + "num_classes": LaunchConfiguration("num_classes"), + "model_type": "onnxruntime", + "model_version": LaunchConfiguration("model_version"), + "onnxruntime_use_cuda": LaunchConfiguration( + "onnxruntime_use_cuda" + ), + "onnxruntime_device_id": LaunchConfiguration( + "onnxruntime_device_id" + ), + "onnxruntime_use_parallel": LaunchConfiguration( + "onnxruntime_use_parallel" + ), + "onnxruntime_inter_op_num_threads": LaunchConfiguration( + "onnxruntime_inter_op_num_threads" + ), + "onnxruntime_intra_op_num_threads": LaunchConfiguration( + "onnxruntime_intra_op_num_threads" + ), + "conf": LaunchConfiguration("conf"), + "nms": LaunchConfiguration("nms"), + "imshow_isshow": LaunchConfiguration("imshow_isshow"), + "src_image_topic_name": LaunchConfiguration( + "src_image_topic_name" + ), + "publish_image_topic_name": LaunchConfiguration( + "publish_image_topic_name" + ), + "publish_boundingbox_topic_name": LaunchConfiguration( + "publish_boundingbox_topic_name" + ), + "publish_resized_image": LaunchConfiguration( + "publish_resized_image" + ), + "use_bbox_ex_msgs": LaunchConfiguration("use_bbox_ex_msgs"), + } + ], + ), ], - output='screen', + output="screen", ) - return launch.LaunchDescription( - launch_args + - [ - container - ] - ) + return launch.LaunchDescription(launch_args + [container]) diff --git a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_openvino.launch.py b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_openvino.launch.py index 8b18767..09d44c0 100644 --- a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_openvino.launch.py +++ b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_openvino.launch.py @@ -4,127 +4,122 @@ from launch_ros.actions import ComposableNodeContainer from launch_ros.descriptions import ComposableNode + def generate_launch_description(): launch_args = [ DeclareLaunchArgument( - 'video_device', - default_value='/dev/video0', - description='input video source' - ), - DeclareLaunchArgument( - 'model_path', - default_value='./src/YOLOX-ROS/weights/onnx/yolox_tiny.onnx', - description='yolox model path.' + "video_device", + default_value="/dev/video0", + description="input video source", ), DeclareLaunchArgument( - 'p6', - default_value='false', - description='with p6.' + "model_path", + default_value="./src/YOLOX-ROS/weights/onnx/yolox_tiny.onnx", + description="yolox model path.", ), + DeclareLaunchArgument("p6", default_value="false", description="with p6."), DeclareLaunchArgument( - 'class_labels_path', - default_value='', - description='if use custom model, set class name labels. ' + "class_labels_path", + default_value="", + description="if use custom model, set class name labels. ", ), DeclareLaunchArgument( - 'num_classes', - default_value='80', - description='num classes.' + "num_classes", default_value="80", description="num classes." ), DeclareLaunchArgument( - 'model_version', - default_value='0.1.1rc0', - description='yolox model version.' + "model_version", + default_value="0.1.1rc0", + description="yolox model version.", ), DeclareLaunchArgument( - 'openvino_device', - default_value='CPU', - description='model device. CPU, GPU, MYRIAD, etc...' + "openvino_device", + default_value="CPU", + description="model device. CPU, GPU, MYRIAD, etc...", ), DeclareLaunchArgument( - 'conf', - default_value='0.30', - description='yolox confidence threshold.' + "conf", default_value="0.30", description="yolox confidence threshold." ), DeclareLaunchArgument( - 'nms', - default_value='0.45', - description='yolox nms threshold' + "nms", default_value="0.45", description="yolox nms threshold" ), + DeclareLaunchArgument("imshow_isshow", default_value="true", description=""), DeclareLaunchArgument( - 'imshow_isshow', - default_value='true', - description='' + "src_image_topic_name", + default_value="/image_raw", + description="topic name for source image", ), DeclareLaunchArgument( - 'src_image_topic_name', - default_value='/image_raw', - description='topic name for source image' + "publish_image_topic_name", + default_value="/yolox/image_raw", + description="topic name for publishing image with bounding box drawn", ), DeclareLaunchArgument( - 'publish_image_topic_name', - default_value='/yolox/image_raw', - description='topic name for publishing image with bounding box drawn' + "publish_boundingbox_topic_name", + default_value="/yolox/bounding_boxes", + description="topic name for publishing bounding box message.", ), DeclareLaunchArgument( - 'publish_boundingbox_topic_name', - default_value='/yolox/bounding_boxes', - description='topic name for publishing bounding box message.' + "use_bbox_ex_msgs", + default_value="false", + description="use BoundingBoxArray message type.", ), DeclareLaunchArgument( - 'use_bbox_ex_msgs', - default_value='false', - description='use BoundingBoxArray message type.' - ), - DeclareLaunchArgument( - 'publish_resized_image', - default_value='false', - description='use BoundingBoxArray message type.' + "publish_resized_image", + default_value="false", + description="use BoundingBoxArray message type.", ), ] container = ComposableNodeContainer( - name='yolox_container', - namespace='', - package='rclcpp_components', - executable='component_container', + name="yolox_container", + namespace="", + package="rclcpp_components", + executable="component_container", composable_node_descriptions=[ ComposableNode( - package='usb_cam', - plugin='usb_cam::UsbCamNode', - name='usb_cam_node', - parameters=[{ - 'video_device': LaunchConfiguration('video_device'), - 'brightness': 100 - }]), + package="usb_cam", + plugin="usb_cam::UsbCamNode", + name="usb_cam_node", + parameters=[ + { + "video_device": LaunchConfiguration("video_device"), + "brightness": 100, + } + ], + ), ComposableNode( - package='yolox_ros_cpp', - plugin='yolox_ros_cpp::YoloXNode', - name='yolox_ros_cpp', - parameters=[{ - 'model_path': LaunchConfiguration('model_path'), - 'p6': LaunchConfiguration('p6'), - 'class_labels_path': LaunchConfiguration('class_labels_path'), - 'num_classes': LaunchConfiguration('num_classes'), - 'model_type': 'openvino', - 'model_version': LaunchConfiguration('model_version'), - 'openvino_device': LaunchConfiguration('openvino_device'), - 'conf': LaunchConfiguration('conf'), - 'nms': LaunchConfiguration('nms'), - 'imshow_isshow': LaunchConfiguration('imshow_isshow'), - 'src_image_topic_name': LaunchConfiguration('src_image_topic_name'), - 'publish_image_topic_name': LaunchConfiguration('publish_image_topic_name'), - 'publish_boundingbox_topic_name': LaunchConfiguration('publish_boundingbox_topic_name'), - 'publish_resized_image': LaunchConfiguration('publish_resized_image'), - 'use_bbox_ex_msgs': LaunchConfiguration('use_bbox_ex_msgs'), - }], - ), + package="yolox_ros_cpp", + plugin="yolox_ros_cpp::YoloXNode", + name="yolox_ros_cpp", + parameters=[ + { + "model_path": LaunchConfiguration("model_path"), + "p6": LaunchConfiguration("p6"), + "class_labels_path": LaunchConfiguration("class_labels_path"), + "num_classes": LaunchConfiguration("num_classes"), + "model_type": "openvino", + "model_version": LaunchConfiguration("model_version"), + "openvino_device": LaunchConfiguration("openvino_device"), + "conf": LaunchConfiguration("conf"), + "nms": LaunchConfiguration("nms"), + "imshow_isshow": LaunchConfiguration("imshow_isshow"), + "src_image_topic_name": LaunchConfiguration( + "src_image_topic_name" + ), + "publish_image_topic_name": LaunchConfiguration( + "publish_image_topic_name" + ), + "publish_boundingbox_topic_name": LaunchConfiguration( + "publish_boundingbox_topic_name" + ), + "publish_resized_image": LaunchConfiguration( + "publish_resized_image" + ), + "use_bbox_ex_msgs": LaunchConfiguration("use_bbox_ex_msgs"), + } + ], + ), ], - output='screen', + output="screen", ) - return launch.LaunchDescription( - launch_args + - [ - container - ] - ) + return launch.LaunchDescription(launch_args + [container]) diff --git a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tensorrt.launch.py b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tensorrt.launch.py index fd6eea8..7c63210 100644 --- a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tensorrt.launch.py +++ b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tensorrt.launch.py @@ -2,127 +2,115 @@ from launch.actions import DeclareLaunchArgument from launch.substitutions import LaunchConfiguration from launch_ros.actions import ComposableNodeContainer +from launch.actions import SetEnvironmentVariable from launch_ros.descriptions import ComposableNode -def generate_launch_description(): +def generate_launch_description(): launch_args = [ DeclareLaunchArgument( - 'video_device', - default_value='/dev/video0', - description='input video source' - ), - DeclareLaunchArgument( - 'model_path', - default_value='./src/YOLOX-ROS/weights/tensorrt/yolox_tiny.trt', - description='yolox model path.' + "video_device", + default_value="/dev/video0", + description="input video source", ), DeclareLaunchArgument( - 'p6', - default_value='false', - description='with p6.' + "model_path", + default_value="/home/triton/Documents/cv-dev/TR-Autonomy-2024-2025/src/TR-YOLOX-ROS/weights/tensorrt/modelInt8_clocks.engine", + description="yolox model path.", ), + DeclareLaunchArgument("p6", default_value="false", description="with p6."), DeclareLaunchArgument( - 'class_labels_path', - default_value='', - description='if use custom model, set class name labels. ' + "class_labels_path", + default_value="", + description="if use custom model, set class name labels. ", ), DeclareLaunchArgument( - 'num_classes', - default_value='80', - description='num classes.' + "num_classes", default_value="80", description="num classes." ), DeclareLaunchArgument( - 'tensorrt_device', - default_value='0', - description='GPU index. Set in string type. ex 0' + "tensorrt_device", + default_value="0", + description="GPU index. Set in string type. ex 0", ), DeclareLaunchArgument( - 'model_version', - default_value='0.1.1rc0', - description='yolox model version.' + "model_version", + default_value="0.1.1rc0", + description="yolox model version.", ), DeclareLaunchArgument( - 'conf', - default_value='0.30', - description='yolox confidence threshold.' + "conf", default_value="0.7", description="yolox confidence threshold." ), DeclareLaunchArgument( - 'nms', - default_value='0.45', - description='yolox nms threshold' + "nms", default_value="0.45", description="yolox nms threshold" ), + DeclareLaunchArgument("imshow_isshow", default_value="false", description=""), DeclareLaunchArgument( - 'imshow_isshow', - default_value='true', - description='' + "src_image_topic_name", + default_value="/camera/image", + description="topic name for source image", ), DeclareLaunchArgument( - 'src_image_topic_name', - default_value='/image_raw', - description='topic name for source image' + "publish_image_topic_name", + default_value="/yolox/image_raw", + description="topic name for publishing image with bounding box drawn", ), DeclareLaunchArgument( - 'publish_image_topic_name', - default_value='/yolox/image_raw', - description='topic name for publishing image with bounding box drawn' + "publish_boundingbox_topic_name", + default_value="/detections", + description="topic name for publishing bounding box message.", ), DeclareLaunchArgument( - 'publish_boundingbox_topic_name', - default_value='/yolox/bounding_boxes', - description='topic name for publishing bounding box message.' + "use_bbox_ex_msgs", + default_value="false", + description="use BoundingBoxArray message type.", ), DeclareLaunchArgument( - 'use_bbox_ex_msgs', - default_value='false', - description='use BoundingBoxArray message type.' - ), - DeclareLaunchArgument( - 'publish_resized_image', - default_value='false', - description='use BoundingBoxArray message type.' + "publish_resized_image", + default_value="false", + description="use BoundingBoxArray message type.", ), ] + (SetEnvironmentVariable(name="RCLCPP_EXECUTOR_THREAD_COUNT", value="2"),) container = ComposableNodeContainer( - name='yolox_container', - namespace='', - package='rclcpp_components', - executable='component_container', + name="yolox_container", + namespace="", + package="rclcpp_components", + executable="component_container", composable_node_descriptions=[ ComposableNode( - package='usb_cam', - plugin='usb_cam::UsbCamNode', - name='usb_cam_node', - parameters=[{ - 'video_device': LaunchConfiguration('video_device'), - 'brightness': 100 - }]), - ComposableNode( - package='yolox_ros_cpp', - plugin='yolox_ros_cpp::YoloXNode', - name='yolox_ros_cpp', - parameters=[{ - 'model_path': LaunchConfiguration('model_path'), - 'p6': LaunchConfiguration('p6'), - 'class_labels_path': LaunchConfiguration('class_labels_path'), - 'num_classes': LaunchConfiguration('num_classes'), - 'model_type': 'tensorrt', - 'model_version': LaunchConfiguration('model_version'), - 'tensorrt_device': LaunchConfiguration('tensorrt_device'), - 'conf': LaunchConfiguration('conf'), - 'nms': LaunchConfiguration('nms'), - 'imshow_isshow': LaunchConfiguration('imshow_isshow'), - 'src_image_topic_name': LaunchConfiguration('src_image_topic_name'), - 'publish_image_topic_name': LaunchConfiguration('publish_image_topic_name'), - 'publish_boundingbox_topic_name': LaunchConfiguration('publish_boundingbox_topic_name'), - 'publish_resized_image': LaunchConfiguration('publish_resized_image'), - 'use_bbox_ex_msgs': LaunchConfiguration('use_bbox_ex_msgs'), - }], + package="yolox_ros_cpp", + plugin="yolox_ros_cpp::YoloXNode", + name="yolox_ros_cpp", + parameters=[ + { + "model_path": LaunchConfiguration("model_path"), + "p6": LaunchConfiguration("p6"), + "class_labels_path": LaunchConfiguration("class_labels_path"), + "num_classes": LaunchConfiguration("num_classes"), + "model_type": "tensorrt", + "model_version": LaunchConfiguration("model_version"), + "tensorrt_device": LaunchConfiguration("tensorrt_device"), + "conf": LaunchConfiguration("conf"), + "nms": LaunchConfiguration("nms"), + "imshow_isshow": LaunchConfiguration("imshow_isshow"), + "src_image_topic_name": LaunchConfiguration( + "src_image_topic_name" + ), + "publish_image_topic_name": LaunchConfiguration( + "publish_image_topic_name" + ), + "publish_boundingbox_topic_name": LaunchConfiguration( + "publish_boundingbox_topic_name" + ), + "publish_resized_image": LaunchConfiguration( + "publish_resized_image" + ), + "use_bbox_ex_msgs": LaunchConfiguration("use_bbox_ex_msgs"), + } + ], ), ], - output='screen', + output="screen", ) - return launch.LaunchDescription( - launch_args + [container] - ) + return launch.LaunchDescription(launch_args + [container]) diff --git a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tflite.launch.py b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tflite.launch.py index 023f972..0fdfdf7 100644 --- a/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tflite.launch.py +++ b/yolox_ros_cpp/yolox_ros_cpp/launch/yolox_tflite.launch.py @@ -8,127 +8,122 @@ def generate_launch_description(): launch_args = [ DeclareLaunchArgument( - 'video_device', - default_value='/dev/video0', - description='input video source' + "video_device", + default_value="/dev/video0", + description="input video source", ), DeclareLaunchArgument( - 'model_path', - default_value='./src/YOLOX-ROS/weights/tflite/model.tflite', - description='yolox model path.' + "model_path", + default_value="./src/YOLOX-ROS/weights/tflite/model.tflite", + description="yolox model path.", ), + DeclareLaunchArgument("p6", default_value="false", description="with p6."), DeclareLaunchArgument( - 'p6', - default_value='false', - description='with p6.' + "is_nchw", + default_value="true", + description="model input shape is NCHW or NWHC.", ), DeclareLaunchArgument( - 'is_nchw', - default_value='true', - description='model input shape is NCHW or NWHC.' + "class_labels_path", + default_value="", + description="if use custom model, set class name labels. ", ), DeclareLaunchArgument( - 'class_labels_path', - default_value='', - description='if use custom model, set class name labels. ' + "num_classes", default_value="1", description="num classes." ), DeclareLaunchArgument( - 'num_classes', - default_value='1', - description='num classes.' + "model_version", + default_value="0.1.1rc0", + description="yolox model version.", ), DeclareLaunchArgument( - 'model_version', - default_value='0.1.1rc0', - description='yolox model version.' + "tflite_num_threads", default_value="1", description="tflite num_threads." ), DeclareLaunchArgument( - 'tflite_num_threads', - default_value='1', - description='tflite num_threads.' + "conf", default_value="0.30", description="yolox confidence threshold." ), DeclareLaunchArgument( - 'conf', - default_value='0.30', - description='yolox confidence threshold.' + "nms", default_value="0.45", description="yolox nms threshold" ), + DeclareLaunchArgument("imshow_isshow", default_value="true", description=""), DeclareLaunchArgument( - 'nms', - default_value='0.45', - description='yolox nms threshold' + "src_image_topic_name", + default_value="/image_raw", + description="topic name for source image", ), DeclareLaunchArgument( - 'imshow_isshow', - default_value='true', - description='' + "publish_image_topic_name", + default_value="/yolox/image_raw", + description="topic name for publishing image with bounding box drawn", ), DeclareLaunchArgument( - 'src_image_topic_name', - default_value='/image_raw', - description='topic name for source image' + "publish_boundingbox_topic_name", + default_value="/yolox/bounding_boxes", + description="topic name for publishing bounding box message.", ), DeclareLaunchArgument( - 'publish_image_topic_name', - default_value='/yolox/image_raw', - description='topic name for publishing image with bounding box drawn' + "use_bbox_ex_msgs", + default_value="false", + description="use BoundingBoxArray message type.", ), DeclareLaunchArgument( - 'publish_boundingbox_topic_name', - default_value='/yolox/bounding_boxes', - description='topic name for publishing bounding box message.' - ), - DeclareLaunchArgument( - 'use_bbox_ex_msgs', - default_value='false', - description='use BoundingBoxArray message type.' - ), - DeclareLaunchArgument( - 'publish_resized_image', - default_value='false', - description='use BoundingBoxArray message type.' + "publish_resized_image", + default_value="false", + description="use BoundingBoxArray message type.", ), ] container = ComposableNodeContainer( - name='yolox_container', - namespace='', - package='rclcpp_components', - executable='component_container', + name="yolox_container", + namespace="", + package="rclcpp_components", + executable="component_container", composable_node_descriptions=[ ComposableNode( - package='usb_cam', - plugin='usb_cam::UsbCamNode', - name='usb_cam_node', - parameters=[{ - 'video_device': LaunchConfiguration('video_device'), - 'brightness': 100 - }]), + package="usb_cam", + plugin="usb_cam::UsbCamNode", + name="usb_cam_node", + parameters=[ + { + "video_device": LaunchConfiguration("video_device"), + "brightness": 100, + } + ], + ), ComposableNode( - package='yolox_ros_cpp', - plugin='yolox_ros_cpp::YoloXNode', - name='yolox_ros_cpp', - parameters=[{ - 'model_path': LaunchConfiguration('model_path'), - 'p6': LaunchConfiguration('p6'), - 'class_labels_path': LaunchConfiguration('class_labels_path'), - 'num_classes': LaunchConfiguration('num_classes'), - 'is_nchw': LaunchConfiguration('is_nchw'), - 'model_type': 'tflite', - 'model_version': LaunchConfiguration('model_version'), - 'tflite_num_threads': LaunchConfiguration('tflite_num_threads'), - 'conf': LaunchConfiguration('conf'), - 'nms': LaunchConfiguration('nms'), - 'imshow_isshow': LaunchConfiguration('imshow_isshow'), - 'src_image_topic_name': LaunchConfiguration('src_image_topic_name'), - 'publish_image_topic_name': LaunchConfiguration('publish_image_topic_name'), - 'publish_boundingbox_topic_name': LaunchConfiguration('publish_boundingbox_topic_name'), - 'publish_resized_image': LaunchConfiguration('publish_resized_image'), - 'use_bbox_ex_msgs': LaunchConfiguration('use_bbox_ex_msgs'), - }], + package="yolox_ros_cpp", + plugin="yolox_ros_cpp::YoloXNode", + name="yolox_ros_cpp", + parameters=[ + { + "model_path": LaunchConfiguration("model_path"), + "p6": LaunchConfiguration("p6"), + "class_labels_path": LaunchConfiguration("class_labels_path"), + "num_classes": LaunchConfiguration("num_classes"), + "is_nchw": LaunchConfiguration("is_nchw"), + "model_type": "tflite", + "model_version": LaunchConfiguration("model_version"), + "tflite_num_threads": LaunchConfiguration("tflite_num_threads"), + "conf": LaunchConfiguration("conf"), + "nms": LaunchConfiguration("nms"), + "imshow_isshow": LaunchConfiguration("imshow_isshow"), + "src_image_topic_name": LaunchConfiguration( + "src_image_topic_name" + ), + "publish_image_topic_name": LaunchConfiguration( + "publish_image_topic_name" + ), + "publish_boundingbox_topic_name": LaunchConfiguration( + "publish_boundingbox_topic_name" + ), + "publish_resized_image": LaunchConfiguration( + "publish_resized_image" + ), + "use_bbox_ex_msgs": LaunchConfiguration("use_bbox_ex_msgs"), + } + ], ), ], - output='screen', + output="screen", ) - return launch.LaunchDescription( - launch_args + [container] - ) + return launch.LaunchDescription(launch_args + [container]) diff --git a/yolox_ros_cpp/yolox_ros_cpp/package.xml b/yolox_ros_cpp/yolox_ros_cpp/package.xml index 1f6e181..ec7ebe5 100644 --- a/yolox_ros_cpp/yolox_ros_cpp/package.xml +++ b/yolox_ros_cpp/yolox_ros_cpp/package.xml @@ -1,33 +1,34 @@ - yolox_ros_cpp - 0.4.1 - The yolox_ros_cpp package - fateshelled - Apache-2.0 License - fateshelled + yolox_ros_cpp + 0.4.1 + The yolox_ros_cpp package + fateshelled + Apache-2.0 License + fateshelled - ament_cmake_auto + ament_cmake_auto - bboxes_ex_msgs - cv_bridge - image_transport - libopencv-dev - rclcpp - rclcpp_components - sensor_msgs - std_msgs - vision_msgs - yolox_cpp - yolox_param + bboxes_ex_msgs + cv_bridge + image_transport + libopencv-dev + rclcpp + rclcpp_components + sensor_msgs + std_msgs + vision_msgs + yolox_cpp + yolox_param + tr_messages - usb_cam + usb_cam - ament_lint_auto - ament_lint_common + ament_lint_auto + ament_lint_common - - ament_cmake - - + + ament_cmake + + \ No newline at end of file diff --git a/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp b/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp index a5f5bcf..685fa5a 100644 --- a/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp +++ b/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp @@ -1,207 +1,232 @@ #include "yolox_ros_cpp/yolox_ros_cpp.hpp" -namespace yolox_ros_cpp -{ - YoloXNode::YoloXNode(const rclcpp::NodeOptions &options) - : Node("yolox_ros_cpp", options) - { - using namespace std::chrono_literals; // NOLINT - this->init_timer_ = this->create_wall_timer( - 0s, std::bind(&YoloXNode::onInit, this)); - } - - void YoloXNode::onInit() - { - this->init_timer_->cancel(); - this->param_listener_ = std::make_shared( - this->get_node_parameters_interface()); - - this->params_ = this->param_listener_->get_params(); +namespace yolox_ros_cpp { +YoloXNode::YoloXNode(const rclcpp::NodeOptions &options) + : Node("yolox_ros_cpp", options) { + using namespace std::chrono_literals; // NOLINT + this->init_timer_ = + this->create_wall_timer(0s, std::bind(&YoloXNode::onInit, this)); +} - if (this->params_.imshow_isshow) - { - cv::namedWindow("yolox", cv::WINDOW_AUTOSIZE); - } +void YoloXNode::onInit() { + this->init = true; + this->d_image_ = nullptr; + this->d_output_ = nullptr; + cudaStreamCreate(&this->copy_stream_); + cudaStreamCreate(&this->resize_stream_); + rclcpp::on_shutdown([this]() { + cudaStreamDestroy(this->copy_stream_); + cudaStreamDestroy(this->resize_stream_); + cudaFree(this->d_image_); + cudaFree(this->d_output_); + this->copy_stream_ = nullptr; + this->resize_stream_ = nullptr; + }); + this->init_timer_->cancel(); + this->param_listener_ = std::make_shared( + this->get_node_parameters_interface()); + this->params_ = this->param_listener_->get_params(); + this->callback_group_reentrant_ = + this->create_callback_group(rclcpp::CallbackGroupType::Reentrant); + this->sub_options_ = std::make_shared(); + this->sub_options_->callback_group = callback_group_reentrant_; + this->sub_options_->use_intra_process_comm = + rclcpp::IntraProcessSetting::Enable; + if (this->params_.imshow_isshow) { + cv::namedWindow("yolox", cv::WINDOW_AUTOSIZE); + } - if (this->params_.class_labels_path != "") - { - RCLCPP_INFO(this->get_logger(), "read class labels from '%s'", this->params_.class_labels_path.c_str()); - this->class_names_ = yolox_cpp::utils::read_class_labels_file(this->params_.class_labels_path); - } - else - { - this->class_names_ = yolox_cpp::COCO_CLASSES; - } + if (this->params_.class_labels_path != "") { + RCLCPP_INFO(this->get_logger(), "read class labels from '%s'", + this->params_.class_labels_path.c_str()); + this->class_names_ = yolox_cpp::utils::read_class_labels_file( + this->params_.class_labels_path); + } else { + this->class_names_ = yolox_cpp::COCO_CLASSES; + } - if (this->params_.model_type == "tensorrt") - { + if (this->params_.model_type == "tensorrt") { #ifdef ENABLE_TENSORRT - RCLCPP_INFO(this->get_logger(), "Model Type is TensorRT"); - this->yolox_ = std::make_unique( - this->params_.model_path, this->params_.tensorrt_device, - this->params_.nms, this->params_.conf, this->params_.model_version, - this->params_.num_classes, this->params_.p6); + RCLCPP_INFO(this->get_logger(), "Model Type is TensorRT"); + this->yolox_ = std::make_unique( + this->params_.model_path, this->params_.tensorrt_device, + this->params_.nms, this->params_.conf, this->params_.model_version, + this->params_.num_classes, this->params_.p6); #else - RCLCPP_ERROR(this->get_logger(), "yolox_cpp is not built with TensorRT"); - rclcpp::shutdown(); + RCLCPP_ERROR(this->get_logger(), + "yolox_cpp is not built with TensorRT"); + rclcpp::shutdown(); #endif - } - else if (this->params_.model_type == "openvino") - { + } else if (this->params_.model_type == "openvino") { #ifdef ENABLE_OPENVINO - RCLCPP_INFO(this->get_logger(), "Model Type is OpenVINO"); - this->yolox_ = std::make_unique( - this->params_.model_path, this->params_.openvino_device, - this->params_.nms, this->params_.conf, this->params_.model_version, - this->params_.num_classes, this->params_.p6); + RCLCPP_INFO(this->get_logger(), "Model Type is OpenVINO"); + this->yolox_ = std::make_unique( + this->params_.model_path, this->params_.openvino_device, + this->params_.nms, this->params_.conf, this->params_.model_version, + this->params_.num_classes, this->params_.p6); #else - RCLCPP_ERROR(this->get_logger(), "yolox_cpp is not built with OpenVINO"); - rclcpp::shutdown(); + RCLCPP_ERROR(this->get_logger(), + "yolox_cpp is not built with OpenVINO"); + rclcpp::shutdown(); #endif - } - else if (this->params_.model_type == "onnxruntime") - { + } else if (this->params_.model_type == "onnxruntime") { #ifdef ENABLE_ONNXRUNTIME - RCLCPP_INFO(this->get_logger(), "Model Type is ONNXRuntime"); - this->yolox_ = std::make_unique( - this->params_.model_path, - this->params_.onnxruntime_intra_op_num_threads, - this->params_.onnxruntime_inter_op_num_threads, - this->params_.onnxruntime_use_cuda, this->params_.onnxruntime_device_id, - this->params_.onnxruntime_use_parallel, - this->params_.nms, this->params_.conf, this->params_.model_version, - this->params_.num_classes, this->params_.p6); + RCLCPP_INFO(this->get_logger(), "Model Type is ONNXRuntime"); + this->yolox_ = std::make_unique( + this->params_.model_path, + this->params_.onnxruntime_intra_op_num_threads, + this->params_.onnxruntime_inter_op_num_threads, + this->params_.onnxruntime_use_cuda, + this->params_.onnxruntime_device_id, + this->params_.onnxruntime_use_parallel, this->params_.nms, + this->params_.conf, this->params_.model_version, + this->params_.num_classes, this->params_.p6); #else - RCLCPP_ERROR(this->get_logger(), "yolox_cpp is not built with ONNXRuntime"); - rclcpp::shutdown(); + RCLCPP_ERROR(this->get_logger(), + "yolox_cpp is not built with ONNXRuntime"); + rclcpp::shutdown(); #endif - } - else if (this->params_.model_type == "tflite") - { + } else if (this->params_.model_type == "tflite") { #ifdef ENABLE_TFLITE - RCLCPP_INFO(this->get_logger(), "Model Type is tflite"); - this->yolox_ = std::make_unique( - this->params_.model_path, this->params_.tflite_num_threads, - this->params_.nms, this->params_.conf, this->params_.model_version, - this->params_.num_classes, this->params_.p6, this->params_.is_nchw); + RCLCPP_INFO(this->get_logger(), "Model Type is tflite"); + this->yolox_ = std::make_unique( + this->params_.model_path, this->params_.tflite_num_threads, + this->params_.nms, this->params_.conf, this->params_.model_version, + this->params_.num_classes, this->params_.p6, this->params_.is_nchw); #else - RCLCPP_ERROR(this->get_logger(), "yolox_cpp is not built with tflite"); - rclcpp::shutdown(); + RCLCPP_ERROR(this->get_logger(), "yolox_cpp is not built with tflite"); + rclcpp::shutdown(); #endif - } - RCLCPP_INFO(this->get_logger(), "model loaded"); - - this->sub_image_ = image_transport::create_subscription( - this, this->params_.src_image_topic_name, - std::bind(&YoloXNode::colorImageCallback, this, std::placeholders::_1), - "raw"); - - if (this->params_.use_bbox_ex_msgs) { - this->pub_bboxes_ = this->create_publisher( - this->params_.publish_boundingbox_topic_name, - 10); - } else { - this->pub_detection2d_ = this->create_publisher( - this->params_.publish_boundingbox_topic_name, - 10); - } - - if (this->params_.publish_resized_image) { - this->pub_image_ = image_transport::create_publisher(this, this->params_.publish_image_topic_name); - } + } + RCLCPP_INFO(this->get_logger(), "model loaded"); + + rclcpp::QoS qos_profile(5); // Queue depth of 5 for multithreading + this->sub_image_ = image_transport::create_subscription( + this, this->params_.src_image_topic_name, + std::bind(&YoloXNode::colorImageCallback, this, std::placeholders::_1), + "raw", qos_profile.get_rmw_qos_profile(), *this->sub_options_); + + if (this->params_.use_bbox_ex_msgs) { + this->pub_bboxes_ = + this->create_publisher( + this->params_.publish_boundingbox_topic_name, 10); + } else { + this->pub_detection2d_ = + this->create_publisher( + this->params_.publish_boundingbox_topic_name, 10); } - void YoloXNode::colorImageCallback(const sensor_msgs::msg::Image::ConstSharedPtr &ptr) - { - auto img = cv_bridge::toCvCopy(ptr, "bgr8"); - cv::Mat frame = img->image; - - auto now = std::chrono::system_clock::now(); - auto objects = this->yolox_->inference(frame); - auto end = std::chrono::system_clock::now(); - - auto elapsed = std::chrono::duration_cast(end - now); - RCLCPP_INFO(this->get_logger(), "Inference time: %5ld us", elapsed.count()); + if (this->params_.publish_resized_image) { + this->pub_image_ = image_transport::create_publisher( + this, this->params_.publish_image_topic_name); + } +} - yolox_cpp::utils::draw_objects(frame, objects, this->class_names_); - if (this->params_.imshow_isshow) - { - cv::imshow("yolox", frame); - auto key = cv::waitKey(1); - if (key == 27) - { - rclcpp::shutdown(); - } +void YoloXNode::colorImageCallback( + const sensor_msgs::msg::Image::ConstSharedPtr &ptr) { + auto now_noninf = std::chrono::system_clock::now(); + auto img = cv_bridge::toCvShare(ptr, "bgr8"); + + auto now = std::chrono::system_clock::now(); + // Initialization + if (this->init) { + size_t image_bytes = sizeof(uchar3) * img->image.cols * img->image.rows; + cudaMallocManaged(reinterpret_cast(&this->d_image_), + image_bytes); + size_t output_bytes = sizeof(float) * 416 * 416 * 3; + cudaMallocManaged(reinterpret_cast(&this->d_output_), + output_bytes); + this->init = false; + } + auto objects = this->yolox_->inference( + img->image, this->d_image_, this->d_output_, this->copy_stream_, + this->resize_stream_); // Use img->image + auto end = std::chrono::system_clock::now(); + auto elapsed = + std::chrono::duration_cast(end - now); + + if (this->params_.imshow_isshow) { + yolox_cpp::utils::draw_objects(img->image, objects, this->class_names_); + cv::imshow("yolox", img->image); + if (cv::waitKey(1) == 27) { + rclcpp::shutdown(); } + } - if (this->params_.use_bbox_ex_msgs) - { - if (this->pub_bboxes_ == nullptr) - { - RCLCPP_ERROR(this->get_logger(), "pub_bboxes_ is nullptr"); - return; - } - auto boxes = objects_to_bboxes(frame, objects, img->header); - this->pub_bboxes_->publish(boxes); + if (this->params_.use_bbox_ex_msgs) { + if (!this->pub_bboxes_) { + RCLCPP_ERROR(this->get_logger(), "pub_bboxes_ is nullptr"); + return; } - else - { - if (this->pub_detection2d_ == nullptr) - { - RCLCPP_ERROR(this->get_logger(), "pub_detection2d_ is nullptr"); - return; - } - vision_msgs::msg::Detection2DArray detections = objects_to_detection2d(objects, img->header); - this->pub_detection2d_->publish(detections); + auto boxes = + objects_to_bboxes(img->image, objects, img->header); // Fix here + this->pub_bboxes_->publish(boxes); + } else { + if (!this->pub_detection2d_) { + RCLCPP_ERROR(this->get_logger(), "pub_detection2d_ is nullptr"); + return; } - - if (this->params_.publish_resized_image) { - sensor_msgs::msg::Image::SharedPtr pub_img = - cv_bridge::CvImage(img->header, "bgr8", frame).toImageMsg(); - this->pub_image_.publish(pub_img); + vision_msgs::msg::Detection2DArray detections = + objects_to_detection2d(objects, img->header); + if (!detections.detections.empty()) { + tr_messages::msg::DetWithImg detwithimg; + // detwithimg.image = *ptr; // Copy unavoidable due to const shared + // ptr + detwithimg.detection_info.detections = detections.detections; + this->pub_detection2d_->publish(detwithimg); + } else { + RCLCPP_INFO(this->get_logger(), "no detections so not publishing"); } } + auto end_noninf = std::chrono::system_clock::now(); + auto elapsed_noninf = std::chrono::duration_cast( + end_noninf - now_noninf); + RCLCPP_INFO(this->get_logger(), + "Inference time: %5ld us Non Inference time: %5ld", + elapsed.count(), elapsed_noninf.count() - elapsed.count()); +} - bboxes_ex_msgs::msg::BoundingBoxes YoloXNode::objects_to_bboxes( - const cv::Mat &frame, const std::vector &objects, const std_msgs::msg::Header &header) - { - bboxes_ex_msgs::msg::BoundingBoxes boxes; - boxes.header = header; - for (const auto &obj : objects) - { - bboxes_ex_msgs::msg::BoundingBox box; - box.probability = obj.prob;; - box.class_id = std::to_string(obj.label); - box.xmin = obj.rect.x; - box.ymin = obj.rect.y; - box.xmax = (obj.rect.x + obj.rect.width); - box.ymax = (obj.rect.y + obj.rect.height); - box.img_width = frame.cols; - box.img_height = frame.rows; - boxes.bounding_boxes.emplace_back(box); - } - return boxes; +bboxes_ex_msgs::msg::BoundingBoxes +YoloXNode::objects_to_bboxes(const cv::Mat &frame, + const std::vector &objects, + const std_msgs::msg::Header &header) { + bboxes_ex_msgs::msg::BoundingBoxes boxes; + boxes.header = header; + for (const auto &obj : objects) { + bboxes_ex_msgs::msg::BoundingBox box; + box.probability = obj.prob; + ; + box.class_id = std::to_string(obj.label); + box.xmin = obj.rect.x; + box.ymin = obj.rect.y; + box.xmax = (obj.rect.x + obj.rect.width); + box.ymax = (obj.rect.y + obj.rect.height); + box.img_width = frame.cols; + box.img_height = frame.rows; + boxes.bounding_boxes.emplace_back(box); } + return boxes; +} - vision_msgs::msg::Detection2DArray YoloXNode::objects_to_detection2d(const std::vector &objects, const std_msgs::msg::Header &header) - { - vision_msgs::msg::Detection2DArray detection2d; - detection2d.header = header; - for (const auto &obj : objects) - { - vision_msgs::msg::Detection2D det; - det.bbox.center.position.x = obj.rect.x + obj.rect.width / 2; - det.bbox.center.position.y = obj.rect.y + obj.rect.height / 2; - det.bbox.size_x = obj.rect.width; - det.bbox.size_y = obj.rect.height; - - det.results.resize(1); - det.results[0].hypothesis.class_id = std::to_string(obj.label); - det.results[0].hypothesis.score = obj.prob; - detection2d.detections.emplace_back(det); - } - return detection2d; +vision_msgs::msg::Detection2DArray +YoloXNode::objects_to_detection2d(const std::vector &objects, + const std_msgs::msg::Header &header) { + vision_msgs::msg::Detection2DArray detection2d; + detection2d.header = header; + for (const auto &obj : objects) { + vision_msgs::msg::Detection2D det; + det.bbox.center.position.x = obj.rect.x + obj.rect.width / 2; + det.bbox.center.position.y = obj.rect.y + obj.rect.height / 2; + det.bbox.size_x = obj.rect.width; + det.bbox.size_y = obj.rect.height; + + det.results.resize(1); + det.results[0].hypothesis.class_id = std::to_string(obj.label); + det.results[0].hypothesis.score = obj.prob; + detection2d.detections.emplace_back(det); } + return detection2d; } +} // namespace yolox_ros_cpp RCLCPP_COMPONENTS_REGISTER_NODE(yolox_ros_cpp::YoloXNode)