Skip to content

Commit 085b4d2

Browse files
authored
Preserving hardware memory during hw decoding, exporting/importing via dlpack (#2155)
Add DLPack support for zero-copy GPU frame exchange between PyAV and frameworks like PyTorch/CuPy, avoiding GPU->CPU->GPU round-trips when using cuvid decode + GPU processing + nvenc encode. - Implement __dlpack__ and __dlpack_device__ on VideoPlane - Add VideoFrame.from_dlpack for importing DLPack tensors (nv12/p010le/p016le) - Add CudaContext for managing CUDA device/frames contexts - Add HWAccel.is_hw_owned to preserve frames on GPU memory - Switch frame buffer management from av_image_alloc to av_frame_get_buffer - Handle hw_frames_ctx in planes, reformatter, and to_ndarray paths
1 parent f6de2c5 commit 085b4d2

File tree

14 files changed

+1305
-50
lines changed

14 files changed

+1305
-50
lines changed

av/codec/hwaccel.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ cdef class HWAccel:
1616
cdef readonly Codec codec
1717
cdef readonly HWConfig config
1818
cdef lib.AVBufferRef *ptr
19+
cdef readonly int device_id
20+
cdef readonly bint is_hw_owned
1921
cdef public bint allow_software_fallback
2022
cdef public dict options
2123
cdef public int flags

av/codec/hwaccel.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ def __init__(
110110
allow_software_fallback=True,
111111
options=None,
112112
flags=None,
113+
is_hw_owned=False,
113114
):
114115
if isinstance(device_type, HWDeviceType):
115116
self._device_type = device_type
@@ -120,9 +121,17 @@ def __init__(
120121
else:
121122
raise ValueError("Unknown type for device_type")
122123

123-
self._device = device
124+
self.is_hw_owned = is_hw_owned
125+
self.device_id = 0
126+
if self._device_type == HWDeviceType.cuda and device:
127+
self.device_id = int(device)
128+
129+
self._device = None if device is None else f"{device}"
124130
self.allow_software_fallback = allow_software_fallback
131+
125132
self.options = {} if not options else dict(options)
133+
if self._device_type == HWDeviceType.cuda and self.is_hw_owned:
134+
self.options.setdefault("primary_ctx", "1")
126135
self.flags = 0 if not flags else flags
127136
self.ptr = cython.NULL
128137
self.config = None
@@ -135,7 +144,7 @@ def _initialize_hw_context(self, codec: Codec):
135144
if self._device_type and config.device_type != self._device_type:
136145
continue
137146
break
138-
else:
147+
else: # nobreak
139148
raise NotImplementedError(f"No supported hardware config for {codec}")
140149

141150
self.config = config
@@ -155,7 +164,7 @@ def _initialize_hw_context(self, codec: Codec):
155164
)
156165
)
157166

158-
def create(self, codec: Codec):
167+
def create(self, codec: Codec) -> HWAccel:
159168
"""Create a new hardware accelerator context with the given codec"""
160169
if self.ptr:
161170
raise RuntimeError("Hardware context already initialized")
@@ -165,6 +174,7 @@ def create(self, codec: Codec):
165174
device=self._device,
166175
allow_software_fallback=self.allow_software_fallback,
167176
options=self.options,
177+
is_hw_owned=self.is_hw_owned,
168178
)
169179
ret._initialize_hw_context(codec)
170180
return ret

av/codec/hwaccel.pyi

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,20 @@ class HWConfig:
3737
def is_supported(self) -> bool: ...
3838

3939
class HWAccel:
40+
options: dict[str, object]
41+
42+
@property
43+
def is_hw_owned(self) -> bool: ...
44+
@property
45+
def device_id(self) -> int: ...
4046
def __init__(
4147
self,
4248
device_type: str | HWDeviceType,
43-
device: str | None = None,
49+
device: str | int | None = None,
4450
allow_software_fallback: bool = False,
4551
options: dict[str, object] | None = None,
4652
flags: int | None = None,
53+
is_hw_owned: bool = False,
4754
) -> None: ...
4855
def create(self, codec: Codec) -> HWAccel: ...
4956

av/video/codeccontext.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ def _transfer_hwframe(self, frame: Frame):
127127
# need to transfer.
128128
return frame
129129

130+
if self.hwaccel_ctx.is_hw_owned:
131+
cython.cast(VideoFrame, frame)._device_id = self.hwaccel_ctx.device_id
132+
return frame
133+
130134
frame_sw: Frame = self._alloc_next_frame()
131135
err_check(lib.av_hwframe_transfer_data(frame_sw.ptr, frame.ptr, 0))
132136
# TODO: Is there anything else to transfer?

av/video/frame.pxd

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,21 @@ from av.video.format cimport VideoFormat
66
from av.video.reformatter cimport VideoReformatter
77

88

9-
cdef class VideoFrame(Frame):
10-
# This is the buffer that is used to back everything in the AVFrame.
11-
# We don't ever actually access it directly.
12-
cdef uint8_t *_buffer
13-
cdef object _np_buffer
9+
cdef class CudaContext:
10+
cdef readonly int device_id
11+
cdef readonly bint primary_ctx
12+
cdef lib.AVBufferRef* _device_ref
13+
cdef dict _frames_cache
14+
cdef lib.AVBufferRef* _get_device_ref(self)
15+
cdef public lib.AVBufferRef* get_frames_ctx(
16+
self, lib.AVPixelFormat sw_fmt, int width, int height
17+
)
1418

19+
cdef class VideoFrame(Frame):
20+
cdef CudaContext _cuda_ctx
1521
cdef VideoReformatter reformatter
1622
cdef readonly VideoFormat format
17-
23+
cdef readonly int _device_id
1824
cdef _init(self, lib.AVPixelFormat format, unsigned int width, unsigned int height)
1925
cdef _init_user_attributes(self)
2026
cpdef save(self, object filepath)

0 commit comments

Comments
 (0)