Skip to content

Commit afae8b6

Browse files
authored
Reduce overhead of to_ndarray (#2165)
Move pix-format dtype dict out of function call. Prevent some unneeded NumPy copies
1 parent 80da805 commit afae8b6

File tree

1 file changed

+128
-141
lines changed

1 file changed

+128
-141
lines changed

av/video/frame.py

Lines changed: 128 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,90 @@ def _numpy_avbuffer_free(
264264
"yuyv422",
265265
}
266266

267+
# Mapping from format name to (itemsize, dtype) for formats where planes
268+
# are simply concatenated into shape (height, width, channels).
269+
_np_pix_fmt_dtypes: dict[str, tuple[cython.uint, str]] = {
270+
"abgr": (4, "uint8"),
271+
"argb": (4, "uint8"),
272+
"bayer_bggr8": (1, "uint8"),
273+
"bayer_gbrg8": (1, "uint8"),
274+
"bayer_grbg8": (1, "uint8"),
275+
"bayer_rggb8": (1, "uint8"),
276+
"bayer_bggr16le": (2, "uint16"),
277+
"bayer_bggr16be": (2, "uint16"),
278+
"bayer_gbrg16le": (2, "uint16"),
279+
"bayer_gbrg16be": (2, "uint16"),
280+
"bayer_grbg16le": (2, "uint16"),
281+
"bayer_grbg16be": (2, "uint16"),
282+
"bayer_rggb16le": (2, "uint16"),
283+
"bayer_rggb16be": (2, "uint16"),
284+
"bgr24": (3, "uint8"),
285+
"bgr48be": (6, "uint16"),
286+
"bgr48le": (6, "uint16"),
287+
"bgr8": (1, "uint8"),
288+
"bgra": (4, "uint8"),
289+
"bgra64be": (8, "uint16"),
290+
"bgra64le": (8, "uint16"),
291+
"gbrap": (1, "uint8"),
292+
"gbrap10be": (2, "uint16"),
293+
"gbrap10le": (2, "uint16"),
294+
"gbrap12be": (2, "uint16"),
295+
"gbrap12le": (2, "uint16"),
296+
"gbrap14be": (2, "uint16"),
297+
"gbrap14le": (2, "uint16"),
298+
"gbrap16be": (2, "uint16"),
299+
"gbrap16le": (2, "uint16"),
300+
"gbrapf32be": (4, "float32"),
301+
"gbrapf32le": (4, "float32"),
302+
"gbrp": (1, "uint8"),
303+
"gbrp10be": (2, "uint16"),
304+
"gbrp10le": (2, "uint16"),
305+
"gbrp12be": (2, "uint16"),
306+
"gbrp12le": (2, "uint16"),
307+
"gbrp14be": (2, "uint16"),
308+
"gbrp14le": (2, "uint16"),
309+
"gbrp16be": (2, "uint16"),
310+
"gbrp16le": (2, "uint16"),
311+
"gbrp9be": (2, "uint16"),
312+
"gbrp9le": (2, "uint16"),
313+
"gbrpf32be": (4, "float32"),
314+
"gbrpf32le": (4, "float32"),
315+
"gray": (1, "uint8"),
316+
"gray10be": (2, "uint16"),
317+
"gray10le": (2, "uint16"),
318+
"gray12be": (2, "uint16"),
319+
"gray12le": (2, "uint16"),
320+
"gray14be": (2, "uint16"),
321+
"gray14le": (2, "uint16"),
322+
"gray16be": (2, "uint16"),
323+
"gray16le": (2, "uint16"),
324+
"gray8": (1, "uint8"),
325+
"gray9be": (2, "uint16"),
326+
"gray9le": (2, "uint16"),
327+
"grayf32be": (4, "float32"),
328+
"grayf32le": (4, "float32"),
329+
"rgb24": (3, "uint8"),
330+
"rgb48be": (6, "uint16"),
331+
"rgb48le": (6, "uint16"),
332+
"rgb8": (1, "uint8"),
333+
"rgba": (4, "uint8"),
334+
"rgba64be": (8, "uint16"),
335+
"rgba64le": (8, "uint16"),
336+
"rgbaf16be": (8, "float16"),
337+
"rgbaf16le": (8, "float16"),
338+
"rgbaf32be": (16, "float32"),
339+
"rgbaf32le": (16, "float32"),
340+
"rgbf32be": (12, "float32"),
341+
"rgbf32le": (12, "float32"),
342+
"yuv444p": (1, "uint8"),
343+
"yuv444p16be": (2, "uint16"),
344+
"yuv444p16le": (2, "uint16"),
345+
"yuva444p16be": (2, "uint16"),
346+
"yuva444p16le": (2, "uint16"),
347+
"yuvj444p": (1, "uint8"),
348+
"yuyv422": (2, "uint8"),
349+
}
350+
267351

268352
@cython.cfunc
269353
def alloc_video_frame() -> VideoFrame:
@@ -354,9 +438,10 @@ def useful_array(
354438

355439
total_line_size: cython.size_t = abs(plane.line_size)
356440
useful_line_size: cython.size_t = plane.width * bytes_per_pixel
441+
if total_line_size == useful_line_size:
442+
return np.frombuffer(plane, dtype=dtype)
357443
arr = np.frombuffer(plane, np.uint8)
358-
if total_line_size != useful_line_size:
359-
arr = arr.reshape(-1, total_line_size)[:, 0:useful_line_size].reshape(-1)
444+
arr = arr.reshape(-1, total_line_size)[:, 0:useful_line_size].reshape(-1)
360445
return arr.view(np.dtype(dtype))
361446

362447

@@ -613,187 +698,89 @@ def to_ndarray(self, channel_last=False, **kwargs):
613698
.. note:: For ``gbrp`` formats, channels are flipped to RGB order.
614699
615700
"""
616-
kwargs2 = dict(kwargs)
617-
if self.ptr.hw_frames_ctx and "format" not in kwargs2:
701+
if self.ptr.hw_frames_ctx and "format" not in kwargs:
618702
frames_ctx: cython.pointer[lib.AVHWFramesContext] = cython.cast(
619703
cython.pointer[lib.AVHWFramesContext], self.ptr.hw_frames_ctx.data
620704
)
621-
kwargs2["format"] = get_video_format(
705+
kwargs = dict(kwargs)
706+
kwargs["format"] = get_video_format(
622707
frames_ctx.sw_format, self.ptr.width, self.ptr.height
623708
).name
624709

625-
frame: VideoFrame = self.reformat(**kwargs2)
710+
frame: VideoFrame = self.reformat(**kwargs)
626711
if frame.ptr.hw_frames_ctx:
627712
raise ValueError("Cannot convert a hardware frame to numpy directly.")
628713

629714
import numpy as np
630715

631716
# check size
632-
if frame.format.name in {
633-
"yuv420p",
634-
"yuvj420p",
635-
"yuyv422",
636-
"yuv422p10le",
637-
"yuv422p",
638-
}:
639-
assert frame.width % 2 == 0, (
640-
"the width has to be even for this pixel format"
641-
)
642-
assert frame.height % 2 == 0, (
643-
"the height has to be even for this pixel format"
644-
)
717+
format_name = frame.format.name
718+
height, width = frame.ptr.height, frame.ptr.width
719+
planes: tuple[VideoPlane, ...] = frame.planes
720+
if format_name in {"yuv420p", "yuvj420p", "yuyv422", "yuv422p10le", "yuv422p"}:
721+
assert width % 2 == 0, "the width has to be even for this pixel format"
722+
assert height % 2 == 0, "the height has to be even for this pixel format"
645723

646724
# cases planes are simply concatenated in shape (height, width, channels)
647-
itemsize, dtype = {
648-
"abgr": (4, "uint8"),
649-
"argb": (4, "uint8"),
650-
"bayer_bggr8": (1, "uint8"),
651-
"bayer_gbrg8": (1, "uint8"),
652-
"bayer_grbg8": (1, "uint8"),
653-
"bayer_rggb8": (1, "uint8"),
654-
"bayer_bggr16le": (2, "uint16"),
655-
"bayer_bggr16be": (2, "uint16"),
656-
"bayer_gbrg16le": (2, "uint16"),
657-
"bayer_gbrg16be": (2, "uint16"),
658-
"bayer_grbg16le": (2, "uint16"),
659-
"bayer_grbg16be": (2, "uint16"),
660-
"bayer_rggb16le": (2, "uint16"),
661-
"bayer_rggb16be": (2, "uint16"),
662-
"bgr24": (3, "uint8"),
663-
"bgr48be": (6, "uint16"),
664-
"bgr48le": (6, "uint16"),
665-
"bgr8": (1, "uint8"),
666-
"bgra": (4, "uint8"),
667-
"bgra64be": (8, "uint16"),
668-
"bgra64le": (8, "uint16"),
669-
"gbrap": (1, "uint8"),
670-
"gbrap10be": (2, "uint16"),
671-
"gbrap10le": (2, "uint16"),
672-
"gbrap12be": (2, "uint16"),
673-
"gbrap12le": (2, "uint16"),
674-
"gbrap14be": (2, "uint16"),
675-
"gbrap14le": (2, "uint16"),
676-
"gbrap16be": (2, "uint16"),
677-
"gbrap16le": (2, "uint16"),
678-
"gbrapf32be": (4, "float32"),
679-
"gbrapf32le": (4, "float32"),
680-
"gbrp": (1, "uint8"),
681-
"gbrp10be": (2, "uint16"),
682-
"gbrp10le": (2, "uint16"),
683-
"gbrp12be": (2, "uint16"),
684-
"gbrp12le": (2, "uint16"),
685-
"gbrp14be": (2, "uint16"),
686-
"gbrp14le": (2, "uint16"),
687-
"gbrp16be": (2, "uint16"),
688-
"gbrp16le": (2, "uint16"),
689-
"gbrp9be": (2, "uint16"),
690-
"gbrp9le": (2, "uint16"),
691-
"gbrpf32be": (4, "float32"),
692-
"gbrpf32le": (4, "float32"),
693-
"gray": (1, "uint8"),
694-
"gray10be": (2, "uint16"),
695-
"gray10le": (2, "uint16"),
696-
"gray12be": (2, "uint16"),
697-
"gray12le": (2, "uint16"),
698-
"gray14be": (2, "uint16"),
699-
"gray14le": (2, "uint16"),
700-
"gray16be": (2, "uint16"),
701-
"gray16le": (2, "uint16"),
702-
"gray8": (1, "uint8"),
703-
"gray9be": (2, "uint16"),
704-
"gray9le": (2, "uint16"),
705-
"grayf32be": (4, "float32"),
706-
"grayf32le": (4, "float32"),
707-
"rgb24": (3, "uint8"),
708-
"rgb48be": (6, "uint16"),
709-
"rgb48le": (6, "uint16"),
710-
"rgb8": (1, "uint8"),
711-
"rgba": (4, "uint8"),
712-
"rgba64be": (8, "uint16"),
713-
"rgba64le": (8, "uint16"),
714-
"rgbaf16be": (8, "float16"),
715-
"rgbaf16le": (8, "float16"),
716-
"rgbaf32be": (16, "float32"),
717-
"rgbaf32le": (16, "float32"),
718-
"rgbf32be": (12, "float32"),
719-
"rgbf32le": (12, "float32"),
720-
"yuv444p": (1, "uint8"),
721-
"yuv444p16be": (2, "uint16"),
722-
"yuv444p16le": (2, "uint16"),
723-
"yuva444p16be": (2, "uint16"),
724-
"yuva444p16le": (2, "uint16"),
725-
"yuvj444p": (1, "uint8"),
726-
"yuyv422": (2, "uint8"),
727-
}.get(frame.format.name, (None, None))
728-
if itemsize is not None:
729-
layers = [
730-
useful_array(plan, itemsize, dtype).reshape(
731-
frame.height, frame.width, -1
725+
if format_name in _np_pix_fmt_dtypes:
726+
itemsize: cython.uint
727+
itemsize, dtype = _np_pix_fmt_dtypes[format_name]
728+
if len(planes) == 1: # shortcut, avoid memory copy
729+
array = useful_array(planes[0], itemsize, dtype).reshape(
730+
height, width, -1
732731
)
733-
for plan in frame.planes
734-
]
735-
if len(layers) == 1: # shortcut, avoid memory copy
736-
array = layers[0]
737732
else: # general case
738-
array = np.concatenate(layers, axis=2)
739-
array = byteswap_array(array, frame.format.name.endswith("be"))
733+
array = np.empty((height, width, len(planes)), dtype=dtype)
734+
for i, plane in enumerate(planes):
735+
array[:, :, i] = useful_array(plane, itemsize, dtype).reshape(
736+
height, width
737+
)
738+
array = byteswap_array(array, format_name.endswith("be"))
740739
if array.shape[2] == 1: # skip last channel for gray images
741740
return array.squeeze(2)
742-
if frame.format.name.startswith("gbr"): # gbr -> rgb
743-
buffer = array[:, :, 0].copy()
744-
array[:, :, 0] = array[:, :, 2]
745-
array[:, :, 2] = array[:, :, 1]
746-
array[:, :, 1] = buffer
747-
if not channel_last and frame.format.name in {"yuv444p", "yuvj444p"}:
741+
if format_name.startswith("gbr"): # gbr -> rgb
742+
array[:, :, :3] = array[:, :, [2, 0, 1]]
743+
if not channel_last and format_name in {"yuv444p", "yuvj444p"}:
748744
array = np.moveaxis(array, 2, 0)
749745
return array
750746

751747
# special cases
752-
if frame.format.name in {"yuv420p", "yuvj420p", "yuv422p"}:
748+
if format_name in {"yuv420p", "yuvj420p", "yuv422p"}:
753749
return np.hstack(
754750
[
755-
useful_array(frame.planes[0]),
756-
useful_array(frame.planes[1]),
757-
useful_array(frame.planes[2]),
751+
useful_array(planes[0]),
752+
useful_array(planes[1]),
753+
useful_array(planes[2]),
758754
]
759-
).reshape(-1, frame.width)
760-
if frame.format.name == "yuv422p10le":
755+
).reshape(-1, width)
756+
if format_name == "yuv422p10le":
761757
# Read planes as uint16 at their original width
762-
y = useful_array(frame.planes[0], 2, "uint16").reshape(
763-
frame.height, frame.width
764-
)
765-
u = useful_array(frame.planes[1], 2, "uint16").reshape(
766-
frame.height, frame.width // 2
767-
)
768-
v = useful_array(frame.planes[2], 2, "uint16").reshape(
769-
frame.height, frame.width // 2
770-
)
758+
y = useful_array(planes[0], 2, "uint16").reshape(height, width)
759+
u = useful_array(planes[1], 2, "uint16").reshape(height, width // 2)
760+
v = useful_array(planes[2], 2, "uint16").reshape(height, width // 2)
771761

772762
# Double the width of U and V by repeating each value
773763
u_full = np.repeat(u, 2, axis=1)
774764
v_full = np.repeat(v, 2, axis=1)
775765
if channel_last:
776766
return np.stack([y, u_full, v_full], axis=2)
777767
return np.stack([y, u_full, v_full], axis=0)
778-
if frame.format.name == "pal8":
779-
image = useful_array(frame.planes[0]).reshape(frame.height, frame.width)
768+
if format_name == "pal8":
769+
image = useful_array(planes[0]).reshape(height, width)
780770
palette = (
781-
np.frombuffer(frame.planes[1], "i4")
771+
np.frombuffer(planes[1], "i4")
782772
.astype(">i4")
783773
.reshape(-1, 1)
784774
.view(np.uint8)
785775
)
786776
return image, palette
787-
if frame.format.name == "nv12":
777+
if format_name == "nv12":
788778
return np.hstack(
789-
[
790-
useful_array(frame.planes[0]),
791-
useful_array(frame.planes[1], 2),
792-
]
793-
).reshape(-1, frame.width)
779+
[useful_array(planes[0]), useful_array(planes[1], 2)]
780+
).reshape(-1, width)
794781

795782
raise ValueError(
796-
f"Conversion to numpy array with format `{frame.format.name}` is not yet supported"
783+
f"Conversion to numpy array with format `{format_name}` is not yet supported"
797784
)
798785

799786
def set_image(self, img):

0 commit comments

Comments
 (0)