diff --git a/av/audio/frame.pyi b/av/audio/frame.pyi
index 7f61e4e6d..6aeb86b4d 100644
--- a/av/audio/frame.pyi
+++ b/av/audio/frame.pyi
@@ -35,13 +35,15 @@ class AudioFrame(Frame):
 
     def __init__(
         self,
-        format: str = "s16",
-        layout: str = "stereo",
+        format: AudioFormat | str = "s16",
+        layout: AudioLayout | str = "stereo",
         samples: int = 0,
         align: int = 1,
     ) -> None: ...
     @staticmethod
     def from_ndarray(
-        array: _SupportedNDarray, format: str = "s16", layout: str = "stereo"
+        array: _SupportedNDarray,
+        format: AudioFormat | str = "s16",
+        layout: AudioLayout | str = "stereo",
     ) -> AudioFrame: ...
     def to_ndarray(self) -> _SupportedNDarray: ...
diff --git a/av/audio/frame.pyx b/av/audio/frame.pyx
index 14356cb4e..cf3ab5934 100644
--- a/av/audio/frame.pyx
+++ b/av/audio/frame.pyx
@@ -1,11 +1,16 @@
-from av.audio.format cimport get_audio_format
-from av.audio.layout cimport get_audio_layout
-from av.audio.plane cimport AudioPlane
-from av.error cimport err_check
-from av.utils cimport check_ndarray
+import cython
+from cython.cimports.av.audio.format import get_audio_format
+from cython.cimports.av.audio.layout import get_audio_layout
+from cython.cimports.av.audio.plane import AudioPlane
+from cython.cimports.av.error import err_check
+from cython.cimports.av.utils import check_ndarray
 
+_cinit_bypass_sentinel = object()
 
-cdef object _cinit_bypass_sentinel
+
+@cython.cfunc
+def alloc_audio_frame() -> AudioFrame:
+    return AudioFrame(_cinit_bypass_sentinel)
 
 
 format_dtypes = {
@@ -22,30 +27,28 @@ format_dtypes = {
 }
 
 
-cdef AudioFrame alloc_audio_frame():
-    """Get a mostly uninitialized AudioFrame.
-
-    You MUST call AudioFrame._init(...) or AudioFrame._init_user_attributes()
-    before exposing to the user.
-
-    """
-    return AudioFrame.__new__(AudioFrame, _cinit_bypass_sentinel)
-
-
-cdef class AudioFrame(Frame):
+@cython.cclass
+class AudioFrame(Frame):
     """A frame of audio."""
 
     def __cinit__(self, format="s16", layout="stereo", samples=0, align=1):
         if format is _cinit_bypass_sentinel:
             return
 
-        cdef AudioFormat cy_format = AudioFormat(format)
-        cdef AudioLayout cy_layout = AudioLayout(layout)
+        cy_format: AudioFormat = AudioFormat(format)
+        cy_layout: AudioLayout = AudioLayout(layout)
         self._init(cy_format.sample_fmt, cy_layout.layout, samples, align)
 
-    cdef _init(self, lib.AVSampleFormat format, lib.AVChannelLayout layout, unsigned int nb_samples, unsigned int align):
+    @cython.cfunc
+    def _init(
+        self,
+        format: lib.AVSampleFormat,
+        layout: lib.AVChannelLayout,
+        nb_samples: cython.uint,
+        align: cython.uint,
+    ):
         self.ptr.nb_samples = nb_samples
-        self.ptr.format = <int>format
+        self.ptr.format = format
         self.ptr.ch_layout = layout
 
         # Sometimes this is called twice. Oh well.
@@ -53,41 +56,44 @@ cdef class AudioFrame(Frame):
 
         if self.layout.nb_channels != 0 and nb_samples:
             # Cleanup the old buffer.
-            lib.av_freep(&self._buffer)
+            lib.av_freep(cython.address(self._buffer))
 
             # Get a new one.
-            self._buffer_size = err_check(lib.av_samples_get_buffer_size(
-                NULL,
-                self.layout.nb_channels,
-                nb_samples,
-                format,
-                align
-            ))
-            self._buffer = <uint8_t *>lib.av_malloc(self._buffer_size)
+            self._buffer_size = err_check(
+                lib.av_samples_get_buffer_size(
+                    cython.NULL, self.layout.nb_channels, nb_samples, format, align
+                )
+            )
+            self._buffer = cython.cast(
+                cython.pointer[uint8_t], lib.av_malloc(self._buffer_size)
+            )
             if not self._buffer:
                 raise MemoryError("cannot allocate AudioFrame buffer")
 
             # Connect the data pointers to the buffer.
-            err_check(lib.avcodec_fill_audio_frame(
-                self.ptr,
-                self.layout.nb_channels,
-                <lib.AVSampleFormat>self.ptr.format,
-                self._buffer,
-                self._buffer_size,
-                align
-            ))
+            err_check(
+                lib.avcodec_fill_audio_frame(
+                    self.ptr,
+                    self.layout.nb_channels,
+                    cython.cast(lib.AVSampleFormat, self.ptr.format),
+                    self._buffer,
+                    self._buffer_size,
+                    align,
+                )
+            )
 
     def __dealloc__(self):
-        lib.av_freep(&self._buffer)
+        lib.av_freep(cython.address(self._buffer))
 
-    cdef _init_user_attributes(self):
+    @cython.cfunc
+    def _init_user_attributes(self):
         self.layout = get_audio_layout(self.ptr.ch_layout)
-        self.format = get_audio_format(<lib.AVSampleFormat>self.ptr.format)
+        self.format = get_audio_format(cython.cast(lib.AVSampleFormat, self.ptr.format))
 
     def __repr__(self):
         return (
-           f"<av.{self.__class__.__name__} pts={self.pts}, {self.samples} "
-           f"samples at {self.rate}Hz, {self.layout.name}, {self.format.name} at 0x{id(self):x}"
+            f"<av.{self.__class__.__name__} pts={self.pts}, {self.samples} "
+            f"samples at {self.rate}Hz, {self.layout.name}, {self.format.name} at 0x{id(self):x}>"
         )
 
     @staticmethod
@@ -97,6 +103,10 @@ cdef class AudioFrame(Frame):
         """
         import numpy as np
 
+        py_format = format if isinstance(format, AudioFormat) else AudioFormat(format)
+        py_layout = layout if isinstance(layout, AudioLayout) else AudioLayout(layout)
+        format = py_format.name
+
         # map avcodec type to numpy type
         try:
             dtype = np.dtype(format_dtypes[format])
@@ -106,18 +116,22 @@ cdef class AudioFrame(Frame):
             )
 
         # check input format
-        nb_channels = AudioLayout(layout).nb_channels
+        nb_channels = py_layout.nb_channels
         check_ndarray(array, dtype, 2)
-        if AudioFormat(format).is_planar:
+        if py_format.is_planar:
             if array.shape[0] != nb_channels:
-                raise ValueError(f"Expected planar `array.shape[0]` to equal `{nb_channels}` but got `{array.shape[0]}`")
+                raise ValueError(
+                    f"Expected planar `array.shape[0]` to equal `{nb_channels}` but got `{array.shape[0]}`"
+                )
             samples = array.shape[1]
         else:
             if array.shape[0] != 1:
-                raise ValueError(f"Expected packed `array.shape[0]` to equal `1` but got `{array.shape[0]}`")
+                raise ValueError(
+                    f"Expected packed `array.shape[0]` to equal `1` but got `{array.shape[0]}`"
+                )
             samples = array.shape[1] // nb_channels
 
-        frame = AudioFrame(format=format, layout=layout, samples=samples)
+        frame = AudioFrame(format=py_format, layout=py_layout, samples=samples)
         for i, plane in enumerate(frame.planes):
             plane.update(array[i, :])
         return frame
@@ -129,7 +143,7 @@ cdef class AudioFrame(Frame):
 
         :type: tuple
         """
-        cdef int plane_count = 0
+        plane_count: cython.int = 0
         while self.ptr.extended_data[plane_count]:
             plane_count += 1
 
@@ -177,11 +191,15 @@ cdef class AudioFrame(Frame):
         try:
             dtype = np.dtype(format_dtypes[self.format.name])
         except KeyError:
-            raise ValueError(f"Conversion from {self.format.name!r} format to numpy array is not supported.")
+            raise ValueError(
+                f"Conversion from {self.format.name!r} format to numpy array is not supported."
+            )
 
         if self.format.is_planar:
             count = self.samples
         else:
             count = self.samples * self.layout.nb_channels
 
-        return np.vstack([np.frombuffer(x, dtype=dtype, count=count) for x in self.planes])
+        return np.vstack(
+            [np.frombuffer(x, dtype=dtype, count=count) for x in self.planes]
+        )