@@ -264,6 +264,90 @@ def _numpy_avbuffer_free(
264264 "yuyv422" ,
265265}
266266
267+ # Mapping from format name to (itemsize, dtype) for formats where planes
268+ # are simply concatenated into shape (height, width, channels).
269+ _np_pix_fmt_dtypes : dict [str , tuple [cython .uint , str ]] = {
270+ "abgr" : (4 , "uint8" ),
271+ "argb" : (4 , "uint8" ),
272+ "bayer_bggr8" : (1 , "uint8" ),
273+ "bayer_gbrg8" : (1 , "uint8" ),
274+ "bayer_grbg8" : (1 , "uint8" ),
275+ "bayer_rggb8" : (1 , "uint8" ),
276+ "bayer_bggr16le" : (2 , "uint16" ),
277+ "bayer_bggr16be" : (2 , "uint16" ),
278+ "bayer_gbrg16le" : (2 , "uint16" ),
279+ "bayer_gbrg16be" : (2 , "uint16" ),
280+ "bayer_grbg16le" : (2 , "uint16" ),
281+ "bayer_grbg16be" : (2 , "uint16" ),
282+ "bayer_rggb16le" : (2 , "uint16" ),
283+ "bayer_rggb16be" : (2 , "uint16" ),
284+ "bgr24" : (3 , "uint8" ),
285+ "bgr48be" : (6 , "uint16" ),
286+ "bgr48le" : (6 , "uint16" ),
287+ "bgr8" : (1 , "uint8" ),
288+ "bgra" : (4 , "uint8" ),
289+ "bgra64be" : (8 , "uint16" ),
290+ "bgra64le" : (8 , "uint16" ),
291+ "gbrap" : (1 , "uint8" ),
292+ "gbrap10be" : (2 , "uint16" ),
293+ "gbrap10le" : (2 , "uint16" ),
294+ "gbrap12be" : (2 , "uint16" ),
295+ "gbrap12le" : (2 , "uint16" ),
296+ "gbrap14be" : (2 , "uint16" ),
297+ "gbrap14le" : (2 , "uint16" ),
298+ "gbrap16be" : (2 , "uint16" ),
299+ "gbrap16le" : (2 , "uint16" ),
300+ "gbrapf32be" : (4 , "float32" ),
301+ "gbrapf32le" : (4 , "float32" ),
302+ "gbrp" : (1 , "uint8" ),
303+ "gbrp10be" : (2 , "uint16" ),
304+ "gbrp10le" : (2 , "uint16" ),
305+ "gbrp12be" : (2 , "uint16" ),
306+ "gbrp12le" : (2 , "uint16" ),
307+ "gbrp14be" : (2 , "uint16" ),
308+ "gbrp14le" : (2 , "uint16" ),
309+ "gbrp16be" : (2 , "uint16" ),
310+ "gbrp16le" : (2 , "uint16" ),
311+ "gbrp9be" : (2 , "uint16" ),
312+ "gbrp9le" : (2 , "uint16" ),
313+ "gbrpf32be" : (4 , "float32" ),
314+ "gbrpf32le" : (4 , "float32" ),
315+ "gray" : (1 , "uint8" ),
316+ "gray10be" : (2 , "uint16" ),
317+ "gray10le" : (2 , "uint16" ),
318+ "gray12be" : (2 , "uint16" ),
319+ "gray12le" : (2 , "uint16" ),
320+ "gray14be" : (2 , "uint16" ),
321+ "gray14le" : (2 , "uint16" ),
322+ "gray16be" : (2 , "uint16" ),
323+ "gray16le" : (2 , "uint16" ),
324+ "gray8" : (1 , "uint8" ),
325+ "gray9be" : (2 , "uint16" ),
326+ "gray9le" : (2 , "uint16" ),
327+ "grayf32be" : (4 , "float32" ),
328+ "grayf32le" : (4 , "float32" ),
329+ "rgb24" : (3 , "uint8" ),
330+ "rgb48be" : (6 , "uint16" ),
331+ "rgb48le" : (6 , "uint16" ),
332+ "rgb8" : (1 , "uint8" ),
333+ "rgba" : (4 , "uint8" ),
334+ "rgba64be" : (8 , "uint16" ),
335+ "rgba64le" : (8 , "uint16" ),
336+ "rgbaf16be" : (8 , "float16" ),
337+ "rgbaf16le" : (8 , "float16" ),
338+ "rgbaf32be" : (16 , "float32" ),
339+ "rgbaf32le" : (16 , "float32" ),
340+ "rgbf32be" : (12 , "float32" ),
341+ "rgbf32le" : (12 , "float32" ),
342+ "yuv444p" : (1 , "uint8" ),
343+ "yuv444p16be" : (2 , "uint16" ),
344+ "yuv444p16le" : (2 , "uint16" ),
345+ "yuva444p16be" : (2 , "uint16" ),
346+ "yuva444p16le" : (2 , "uint16" ),
347+ "yuvj444p" : (1 , "uint8" ),
348+ "yuyv422" : (2 , "uint8" ),
349+ }
350+
267351
268352@cython .cfunc
269353def alloc_video_frame () -> VideoFrame :
@@ -354,9 +438,10 @@ def useful_array(
354438
355439 total_line_size : cython .size_t = abs (plane .line_size )
356440 useful_line_size : cython .size_t = plane .width * bytes_per_pixel
441+ if total_line_size == useful_line_size :
442+ return np .frombuffer (plane , dtype = dtype )
357443 arr = np .frombuffer (plane , np .uint8 )
358- if total_line_size != useful_line_size :
359- arr = arr .reshape (- 1 , total_line_size )[:, 0 :useful_line_size ].reshape (- 1 )
444+ arr = arr .reshape (- 1 , total_line_size )[:, 0 :useful_line_size ].reshape (- 1 )
360445 return arr .view (np .dtype (dtype ))
361446
362447
@@ -613,187 +698,89 @@ def to_ndarray(self, channel_last=False, **kwargs):
613698 .. note:: For ``gbrp`` formats, channels are flipped to RGB order.
614699
615700 """
616- kwargs2 = dict (kwargs )
617- if self .ptr .hw_frames_ctx and "format" not in kwargs2 :
701+ if self .ptr .hw_frames_ctx and "format" not in kwargs :
618702 frames_ctx : cython .pointer [lib .AVHWFramesContext ] = cython .cast (
619703 cython .pointer [lib .AVHWFramesContext ], self .ptr .hw_frames_ctx .data
620704 )
621- kwargs2 ["format" ] = get_video_format (
705+ kwargs = dict (kwargs )
706+ kwargs ["format" ] = get_video_format (
622707 frames_ctx .sw_format , self .ptr .width , self .ptr .height
623708 ).name
624709
625- frame : VideoFrame = self .reformat (** kwargs2 )
710+ frame : VideoFrame = self .reformat (** kwargs )
626711 if frame .ptr .hw_frames_ctx :
627712 raise ValueError ("Cannot convert a hardware frame to numpy directly." )
628713
629714 import numpy as np
630715
631716 # check size
632- if frame .format .name in {
633- "yuv420p" ,
634- "yuvj420p" ,
635- "yuyv422" ,
636- "yuv422p10le" ,
637- "yuv422p" ,
638- }:
639- assert frame .width % 2 == 0 , (
640- "the width has to be even for this pixel format"
641- )
642- assert frame .height % 2 == 0 , (
643- "the height has to be even for this pixel format"
644- )
717+ format_name = frame .format .name
718+ height , width = frame .ptr .height , frame .ptr .width
719+ planes : tuple [VideoPlane , ...] = frame .planes
720+ if format_name in {"yuv420p" , "yuvj420p" , "yuyv422" , "yuv422p10le" , "yuv422p" }:
721+ assert width % 2 == 0 , "the width has to be even for this pixel format"
722+ assert height % 2 == 0 , "the height has to be even for this pixel format"
645723
646724 # cases planes are simply concatenated in shape (height, width, channels)
647- itemsize , dtype = {
648- "abgr" : (4 , "uint8" ),
649- "argb" : (4 , "uint8" ),
650- "bayer_bggr8" : (1 , "uint8" ),
651- "bayer_gbrg8" : (1 , "uint8" ),
652- "bayer_grbg8" : (1 , "uint8" ),
653- "bayer_rggb8" : (1 , "uint8" ),
654- "bayer_bggr16le" : (2 , "uint16" ),
655- "bayer_bggr16be" : (2 , "uint16" ),
656- "bayer_gbrg16le" : (2 , "uint16" ),
657- "bayer_gbrg16be" : (2 , "uint16" ),
658- "bayer_grbg16le" : (2 , "uint16" ),
659- "bayer_grbg16be" : (2 , "uint16" ),
660- "bayer_rggb16le" : (2 , "uint16" ),
661- "bayer_rggb16be" : (2 , "uint16" ),
662- "bgr24" : (3 , "uint8" ),
663- "bgr48be" : (6 , "uint16" ),
664- "bgr48le" : (6 , "uint16" ),
665- "bgr8" : (1 , "uint8" ),
666- "bgra" : (4 , "uint8" ),
667- "bgra64be" : (8 , "uint16" ),
668- "bgra64le" : (8 , "uint16" ),
669- "gbrap" : (1 , "uint8" ),
670- "gbrap10be" : (2 , "uint16" ),
671- "gbrap10le" : (2 , "uint16" ),
672- "gbrap12be" : (2 , "uint16" ),
673- "gbrap12le" : (2 , "uint16" ),
674- "gbrap14be" : (2 , "uint16" ),
675- "gbrap14le" : (2 , "uint16" ),
676- "gbrap16be" : (2 , "uint16" ),
677- "gbrap16le" : (2 , "uint16" ),
678- "gbrapf32be" : (4 , "float32" ),
679- "gbrapf32le" : (4 , "float32" ),
680- "gbrp" : (1 , "uint8" ),
681- "gbrp10be" : (2 , "uint16" ),
682- "gbrp10le" : (2 , "uint16" ),
683- "gbrp12be" : (2 , "uint16" ),
684- "gbrp12le" : (2 , "uint16" ),
685- "gbrp14be" : (2 , "uint16" ),
686- "gbrp14le" : (2 , "uint16" ),
687- "gbrp16be" : (2 , "uint16" ),
688- "gbrp16le" : (2 , "uint16" ),
689- "gbrp9be" : (2 , "uint16" ),
690- "gbrp9le" : (2 , "uint16" ),
691- "gbrpf32be" : (4 , "float32" ),
692- "gbrpf32le" : (4 , "float32" ),
693- "gray" : (1 , "uint8" ),
694- "gray10be" : (2 , "uint16" ),
695- "gray10le" : (2 , "uint16" ),
696- "gray12be" : (2 , "uint16" ),
697- "gray12le" : (2 , "uint16" ),
698- "gray14be" : (2 , "uint16" ),
699- "gray14le" : (2 , "uint16" ),
700- "gray16be" : (2 , "uint16" ),
701- "gray16le" : (2 , "uint16" ),
702- "gray8" : (1 , "uint8" ),
703- "gray9be" : (2 , "uint16" ),
704- "gray9le" : (2 , "uint16" ),
705- "grayf32be" : (4 , "float32" ),
706- "grayf32le" : (4 , "float32" ),
707- "rgb24" : (3 , "uint8" ),
708- "rgb48be" : (6 , "uint16" ),
709- "rgb48le" : (6 , "uint16" ),
710- "rgb8" : (1 , "uint8" ),
711- "rgba" : (4 , "uint8" ),
712- "rgba64be" : (8 , "uint16" ),
713- "rgba64le" : (8 , "uint16" ),
714- "rgbaf16be" : (8 , "float16" ),
715- "rgbaf16le" : (8 , "float16" ),
716- "rgbaf32be" : (16 , "float32" ),
717- "rgbaf32le" : (16 , "float32" ),
718- "rgbf32be" : (12 , "float32" ),
719- "rgbf32le" : (12 , "float32" ),
720- "yuv444p" : (1 , "uint8" ),
721- "yuv444p16be" : (2 , "uint16" ),
722- "yuv444p16le" : (2 , "uint16" ),
723- "yuva444p16be" : (2 , "uint16" ),
724- "yuva444p16le" : (2 , "uint16" ),
725- "yuvj444p" : (1 , "uint8" ),
726- "yuyv422" : (2 , "uint8" ),
727- }.get (frame .format .name , (None , None ))
728- if itemsize is not None :
729- layers = [
730- useful_array (plan , itemsize , dtype ).reshape (
731- frame .height , frame .width , - 1
725+ if format_name in _np_pix_fmt_dtypes :
726+ itemsize : cython .uint
727+ itemsize , dtype = _np_pix_fmt_dtypes [format_name ]
728+ if len (planes ) == 1 : # shortcut, avoid memory copy
729+ array = useful_array (planes [0 ], itemsize , dtype ).reshape (
730+ height , width , - 1
732731 )
733- for plan in frame .planes
734- ]
735- if len (layers ) == 1 : # shortcut, avoid memory copy
736- array = layers [0 ]
737732 else : # general case
738- array = np .concatenate (layers , axis = 2 )
739- array = byteswap_array (array , frame .format .name .endswith ("be" ))
733+ array = np .empty ((height , width , len (planes )), dtype = dtype )
734+ for i , plane in enumerate (planes ):
735+ array [:, :, i ] = useful_array (plane , itemsize , dtype ).reshape (
736+ height , width
737+ )
738+ array = byteswap_array (array , format_name .endswith ("be" ))
740739 if array .shape [2 ] == 1 : # skip last channel for gray images
741740 return array .squeeze (2 )
742- if frame .format .name .startswith ("gbr" ): # gbr -> rgb
743- buffer = array [:, :, 0 ].copy ()
744- array [:, :, 0 ] = array [:, :, 2 ]
745- array [:, :, 2 ] = array [:, :, 1 ]
746- array [:, :, 1 ] = buffer
747- if not channel_last and frame .format .name in {"yuv444p" , "yuvj444p" }:
741+ if format_name .startswith ("gbr" ): # gbr -> rgb
742+ array [:, :, :3 ] = array [:, :, [2 , 0 , 1 ]]
743+ if not channel_last and format_name in {"yuv444p" , "yuvj444p" }:
748744 array = np .moveaxis (array , 2 , 0 )
749745 return array
750746
751747 # special cases
752- if frame . format . name in {"yuv420p" , "yuvj420p" , "yuv422p" }:
748+ if format_name in {"yuv420p" , "yuvj420p" , "yuv422p" }:
753749 return np .hstack (
754750 [
755- useful_array (frame . planes [0 ]),
756- useful_array (frame . planes [1 ]),
757- useful_array (frame . planes [2 ]),
751+ useful_array (planes [0 ]),
752+ useful_array (planes [1 ]),
753+ useful_array (planes [2 ]),
758754 ]
759- ).reshape (- 1 , frame . width )
760- if frame . format . name == "yuv422p10le" :
755+ ).reshape (- 1 , width )
756+ if format_name == "yuv422p10le" :
761757 # Read planes as uint16 at their original width
762- y = useful_array (frame .planes [0 ], 2 , "uint16" ).reshape (
763- frame .height , frame .width
764- )
765- u = useful_array (frame .planes [1 ], 2 , "uint16" ).reshape (
766- frame .height , frame .width // 2
767- )
768- v = useful_array (frame .planes [2 ], 2 , "uint16" ).reshape (
769- frame .height , frame .width // 2
770- )
758+ y = useful_array (planes [0 ], 2 , "uint16" ).reshape (height , width )
759+ u = useful_array (planes [1 ], 2 , "uint16" ).reshape (height , width // 2 )
760+ v = useful_array (planes [2 ], 2 , "uint16" ).reshape (height , width // 2 )
771761
772762 # Double the width of U and V by repeating each value
773763 u_full = np .repeat (u , 2 , axis = 1 )
774764 v_full = np .repeat (v , 2 , axis = 1 )
775765 if channel_last :
776766 return np .stack ([y , u_full , v_full ], axis = 2 )
777767 return np .stack ([y , u_full , v_full ], axis = 0 )
778- if frame . format . name == "pal8" :
779- image = useful_array (frame . planes [0 ]).reshape (frame . height , frame . width )
768+ if format_name == "pal8" :
769+ image = useful_array (planes [0 ]).reshape (height , width )
780770 palette = (
781- np .frombuffer (frame . planes [1 ], "i4" )
771+ np .frombuffer (planes [1 ], "i4" )
782772 .astype (">i4" )
783773 .reshape (- 1 , 1 )
784774 .view (np .uint8 )
785775 )
786776 return image , palette
787- if frame . format . name == "nv12" :
777+ if format_name == "nv12" :
788778 return np .hstack (
789- [
790- useful_array (frame .planes [0 ]),
791- useful_array (frame .planes [1 ], 2 ),
792- ]
793- ).reshape (- 1 , frame .width )
779+ [useful_array (planes [0 ]), useful_array (planes [1 ], 2 )]
780+ ).reshape (- 1 , width )
794781
795782 raise ValueError (
796- f"Conversion to numpy array with format `{ frame . format . name } ` is not yet supported"
783+ f"Conversion to numpy array with format `{ format_name } ` is not yet supported"
797784 )
798785
799786 def set_image (self , img ):
0 commit comments