5757// 7. Run the compute graph.
5858// 8. Transfer the output to the host and post-process it.
5959//
60- // Custom pipelines are simply functions which call the individual steps and extend them
61- // where needed. The implementation of the high-level API functions is a good starting point.
60+ // Custom pipelines can be created simply by writing a function that calls the
61+ // individual steps. As a starting point, check out or copy the implementation
62+ // of the high-level API functions. Then adapt them as needed.
6263// This allows to:
6364// * load model weights from a different source
6465// * control exactly when allocation happens
7677
7778#include < array>
7879#include < span>
80+ #include < vector>
7981
8082namespace visp {
8183
82- // SWIN - vision transformer for feature extraction
84+ // SWIN v1 - vision transformer for feature extraction
8385
8486constexpr int swin_n_layers = 4 ;
8587
@@ -102,7 +104,7 @@ VISP_API swin_params swin_detect_params(model_file const&);
102104VISP_API swin_buffers swin_precompute (model_ref, i32x2 image_extent, swin_params const &);
103105VISP_API swin_result swin_encode (model_ref, tensor image, swin_params const &);
104106
105- // DINO - vision transformer for feature extraction
107+ // DINO v2 - vision transformer for feature extraction
106108
107109struct dino_params {
108110 int patch_size = 16 ;
@@ -169,7 +171,9 @@ VISP_API image_data sam_process_mask(
169171struct birefnet_model ;
170172
171173// Loads a BiRefNet model from GGUF file onto the backend device.
172- // * supports BiRefNet, BiRefNet_lite, BiRefNet_Matting variants at 1024px resolution
174+ // * supports BiRefNet, BiRefNet-lite, BiRefNet-Matting variants at 1024px resolution
175+ // * supports BiRefNet-HR variant at 2048px resolution
176+ // * supports BiRefNet-dynamic variant at arbitrary resolution
173177VISP_API birefnet_model birefnet_load_model (char const * filepath, backend_device const &);
174178
175179// Takes RGB input and computes an alpha mask with foreground as 1.0 and background as 0.0.
@@ -203,7 +207,12 @@ VISP_API tensor birefnet_predict(model_ref, tensor image, birefnet_params const&
203207
204208struct depthany_model ;
205209
210+ // Loads a Depth Anything V2 model from GGUF file onto the backend device.
211+ // * supports Small/Base/Large variants with flexible input resolution
206212VISP_API depthany_model depthany_load_model (char const * filepath, backend_device const &);
213+
214+ // Takes RGB input and computes estimated depth (distance from camera).
215+ // Output is a single-channel float32 image in range [0, 1.0].
207216VISP_API image_data depthany_compute (depthany_model&, image_view image);
208217
209218// --- Depth Anything pipeline
@@ -222,7 +231,7 @@ VISP_API i32x2 depthany_image_extent(i32x2 input_extent, depthany_params const&)
222231
223232VISP_API image_data depthany_process_input (image_view image, depthany_params const &);
224233image_data depthany_process_output (
225- span<float const > output_data, i32x2 target_extent, depthany_params const &);
234+ std:: span<float const > output_data, i32x2 target_extent, depthany_params const &);
226235
227236VISP_API tensor depthany_predict (model_ref, tensor image, depthany_params const &);
228237
0 commit comments