From 36c222a8291ec7d90b8531ce76d9d7e2dc236dd5 Mon Sep 17 00:00:00 2001
From: Mads Marquart <mads@marquart.dk>
Date: Tue, 20 Jan 2026 21:28:19 +0100
Subject: [PATCH 1/2] TMP alpha mode

---
 src/backend_interface.rs       | 17 +++++--
 src/backends/cg.rs             |  2 +-
 src/backends/wayland/buffer.rs | 22 ++++-----
 src/backends/wayland/mod.rs    | 22 +++++++--
 src/lib.rs                     | 85 +++++++++++++++++++++++++++++++++-
 5 files changed, 127 insertions(+), 21 deletions(-)
diff --git a/src/backend_interface.rs b/src/backend_interface.rs
index 1eafbf8..687ccd9 100644
--- a/src/backend_interface.rs
+++ b/src/backend_interface.rs
@@ -1,6 +1,6 @@
 //! Interface implemented by backends
 
-use crate::{InitError, Rect, SoftBufferError};
+use crate::{AlphaMode, InitError, Rect, SoftBufferError};
 
 use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
 use std::num::NonZeroU32;
@@ -22,12 +22,23 @@ pub(crate) trait SurfaceInterface<D: HasDisplayHandle + ?Sized, W: HasWindowHand
     where
         W: Sized,
         Self: Sized;
+
     /// Get the inner window handle.
     fn window(&self) -> &W;
-    /// Resize the internal buffer to the given width and height.
-    fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) -> Result<(), SoftBufferError>;
+
+    fn alpha_mode(&self) -> AlphaMode;
+
+    /// Reconfigure the internal buffer(s).
+    fn configure(
+        &mut self,
+        width: NonZeroU32,
+        height: NonZeroU32,
+        alpha_mode: AlphaMode,
+    ) -> Result<(), SoftBufferError>;
+
     /// Get a mutable reference to the buffer.
     fn buffer_mut(&mut self) -> Result<Self::Buffer<'_>, SoftBufferError>;
+
     /// Fetch the buffer from the window.
     fn fetch(&mut self) -> Result<Vec<u32>, SoftBufferError> {
         Err(SoftBufferError::Unimplemented)
diff --git a/src/backends/cg.rs b/src/backends/cg.rs
index f45779f..bd43430 100644
--- a/src/backends/cg.rs
+++ b/src/backends/cg.rs
@@ -252,7 +252,7 @@ impl<D: HasDisplayHandle, W: HasWindowHandle> SurfaceInterface<D, W> for CGImpl<
         &self.window_handle
     }
 
-    fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) -> Result<(), SoftBufferError> {
+    fn configure(&mut self, width: NonZeroU32, height: NonZeroU32) -> Result<(), SoftBufferError> {
         self.width = width.get() as usize;
         self.height = height.get() as usize;
         Ok(())
diff --git a/src/backends/wayland/buffer.rs b/src/backends/wayland/buffer.rs
index e28653a..f8563d6 100644
--- a/src/backends/wayland/buffer.rs
+++ b/src/backends/wayland/buffer.rs
@@ -82,7 +82,13 @@ pub(super) struct WaylandBuffer {
 }
 
 impl WaylandBuffer {
-    pub fn new(shm: &wl_shm::WlShm, width: i32, height: i32, qh: &QueueHandle<State>) -> Self {
+    pub fn new(
+        shm: &wl_shm::WlShm,
+        width: i32,
+        height: i32,
+        format: wl_shm::Format,
+        qh: &QueueHandle<State>,
+    ) -> Self {
         // Calculate size to use for shm pool
         let pool_size = get_pool_size(width, height);
 
@@ -94,15 +100,7 @@ impl WaylandBuffer {
         // Create wayland shm pool and buffer
         let pool = shm.create_pool(tempfile.as_fd(), pool_size, qh, ());
         let released = Arc::new(AtomicBool::new(true));
-        let buffer = pool.create_buffer(
-            0,
-            width,
-            height,
-            width * 4,
-            wl_shm::Format::Xrgb8888,
-            qh,
-            released.clone(),
-        );
+        let buffer = pool.create_buffer(0, width, height, width * 4, format, qh, released.clone());
 
         Self {
             qh: qh.clone(),
@@ -118,7 +116,7 @@ impl WaylandBuffer {
         }
     }
 
-    pub fn resize(&mut self, width: i32, height: i32) {
+    pub fn configure(&mut self, width: i32, height: i32, format: wl_shm::Format) {
         // If size is the same, there's nothing to do
         if self.width != width || self.height != height {
             // Destroy old buffer
@@ -139,7 +137,7 @@ impl WaylandBuffer {
                 width,
                 height,
                 width * 4,
-                wl_shm::Format::Xrgb8888,
+                format,
                 &self.qh,
                 self.released.clone(),
             );
diff --git a/src/backends/wayland/mod.rs b/src/backends/wayland/mod.rs
index 77b7b87..dcf1ebe 100644
--- a/src/backends/wayland/mod.rs
+++ b/src/backends/wayland/mod.rs
@@ -1,7 +1,7 @@
 use crate::{
     backend_interface::*,
     error::{InitError, SwResultExt},
-    Rect, SoftBufferError,
+    AlphaMode, Rect, SoftBufferError,
 };
 use raw_window_handle::{HasDisplayHandle, HasWindowHandle, RawDisplayHandle, RawWindowHandle};
 use std::{
@@ -81,6 +81,7 @@ pub struct WaylandImpl<D: ?Sized, W: ?Sized> {
     surface: Option<wl_surface::WlSurface>,
     buffers: Option<(WaylandBuffer, WaylandBuffer)>,
     size: Option<(NonZeroI32, NonZeroI32)>,
+    format: wl_shm::Format,
 
     /// The pointer to the window object.
     ///
@@ -128,7 +129,12 @@ impl<D: HasDisplayHandle + ?Sized, W: HasWindowHandle> SurfaceInterface<D, W>
         &self.window_handle
     }
 
-    fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) -> Result<(), SoftBufferError> {
+    fn configure(
+        &mut self,
+        width: NonZeroU32,
+        height: NonZeroU32,
+        alpha_mode: AlphaMode,
+    ) -> Result<AlphaMode, SoftBufferError> {
         self.size = Some(
             (|| {
                 let width = NonZeroI32::try_from(width).ok()?;
@@ -137,7 +143,13 @@ impl<D: HasDisplayHandle + ?Sized, W: HasWindowHandle> SurfaceInterface<D, W>
             })()
             .ok_or(SoftBufferError::SizeOutOfRange { width, height })?,
         );
-        Ok(())
+        let (format, chosen_alpha_mode) = match alpha_mode {
+            AlphaMode::Opaque => (wl_shm::Format::Xrgb8888, AlphaMode::Opaque),
+            AlphaMode::PreMultiplied => (wl_shm::Format::Argb8888, AlphaMode::PreMultiplied),
+            AlphaMode::PostMultiplied => (wl_shm::Format::Argb8888, AlphaMode::PostMultiplied),
+        };
+        self.format = format;
+        Ok(chosen_alpha_mode)
     }
 
     fn buffer_mut(&mut self) -> Result<BufferImpl<'_>, SoftBufferError> {
@@ -164,7 +176,7 @@ impl<D: HasDisplayHandle + ?Sized, W: HasWindowHandle> SurfaceInterface<D, W>
             }
 
             // Resize, if buffer isn't large enough
-            back.resize(width.get(), height.get());
+            back.configure(width.get(), height.get(), self.format);
         } else {
             // Allocate front and back buffer
             self.buffers = Some((
@@ -172,12 +184,14 @@ impl<D: HasDisplayHandle + ?Sized, W: HasWindowHandle> SurfaceInterface<D, W>
                     &self.display.shm,
                     width.get(),
                     height.get(),
+                    self.format,
                     &self.display.qh,
                 ),
                 WaylandBuffer::new(
                     &self.display.shm,
                     width.get(),
                     height.get(),
+                    self.format,
                     &self.display.qh,
                 ),
             ));
diff --git a/src/lib.rs b/src/lib.rs
index d54d2b9..6818e99 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -74,6 +74,9 @@ pub struct Rect {
 /// A surface for drawing to a window with software buffers.
 #[derive(Debug)]
 pub struct Surface<D, W> {
+    alpha_mode: AlphaMode,
+    /// A buffer that is used when `!supported_alpha_mode.contains(alpha_mode)`.
+    fallback_buffer: Option<Vec<u8>>,
     /// This is boxed so that `Surface` is the same size on every platform.
     surface_impl: Box<SurfaceDispatch<D, W>>,
     _marker: PhantomData<Cell<()>>,
@@ -104,6 +107,11 @@ impl<D: HasDisplayHandle, W: HasWindowHandle> Surface<D, W> {
         self.surface_impl.window()
     }
 
+    /// The current alpha mode of the surface.
+    pub fn alpha_mode(&self) -> AlphaMode {
+        self.surface_impl.alpha_mode()
+    }
+
     /// Set the size of the buffer that will be returned by [`Surface::buffer_mut`].
     ///
     /// If the size of the buffer does not match the size of the window, the buffer is drawn
@@ -111,7 +119,16 @@ impl<D: HasDisplayHandle, W: HasWindowHandle> Surface<D, W> {
     /// to have the buffer fill the entire window. Use your windowing library to find the size
     /// of the window.
     pub fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) -> Result<(), SoftBufferError> {
-        self.surface_impl.resize(width, height)
+        self.configure(width, height, self.alpha_mode())
+    }
+
+    pub fn configure(
+        &mut self,
+        width: NonZeroU32,
+        height: NonZeroU32,
+        alpha_mode: AlphaMode,
+    ) -> Result<(), SoftBufferError> {
+        self.surface_impl.configure(width, height, alpha_mode)
     }
 
     /// Copies the window contents into a buffer.
@@ -424,6 +441,72 @@ impl Buffer<'_> {
     }
 }
 
+/// Specifies how the alpha channel of the surface should be handled by the compositor.
+///
+/// Whether this has an effect is dependent on whether [the pixel format](crate::Format) has an
+/// alpha component.
+///
+/// See [the WhatWG spec][whatwg-premultiplied] for a good description of the difference between
+/// premultiplied and postmultiplied alpha.
+///
+/// [whatwg-premultiplied]: https://html.spec.whatwg.org/multipage/canvas.html#premultiplied-alpha-and-the-2d-rendering-context
+#[doc(alias = "Transparency")]
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
+pub enum AlphaMode {
+    /// The alpha channel is ignored.
+    ///
+    /// # Platform-specific
+    ///
+    /// This requires a copy on Web.
+    #[default]
+    Opaque,
+    /// The non-alpha channels are expected to already have been multiplied by the alpha channel.
+    ///
+    /// # Platform-specific
+    ///
+    /// This requires a copy on Web.
+    PreMultiplied,
+    /// The non-alpha channels are not expected to already be multiplied by the alpha channel;
+    /// instead, the compositor will multiply the non-alpha channels by the alpha channel during
+    /// compositing.
+    ///
+    /// Also known as "straight alpha".
+    ///
+    /// # Platform-specific
+    ///
+    /// This is unsupported on Wayland (TODO: And others?), so there Softbuffer creates a separate
+    /// buffer that is used later on for the conversion.
+    ///
+    /// Web: Works great.
+    #[doc(alias = "Straight")]
+    PostMultiplied,
+}
+
+/// Convenience helpers.
+impl AlphaMode {
+    /// Check if this is [`AlphaMode::Opaque`].
+    pub fn is_opaque(self) -> bool {
+        matches!(self, Self::Opaque)
+    }
+
+    /// Check if this is [`AlphaMode::PreMultiplied`].
+    pub fn is_pre_multiplied(self) -> bool {
+        matches!(self, Self::PreMultiplied)
+    }
+
+    /// Check if this is [`AlphaMode::PostMultiplied`].
+    pub fn is_post_multiplied(self) -> bool {
+        matches!(self, Self::PostMultiplied)
+    }
+
+    /// Whether the alpha mode allows the surface to be transparent.
+    ///
+    /// This is true for pre-multiplied and post-multiplied alpha modes.
+    pub fn has_transparency(self) -> bool {
+        !self.is_opaque()
+    }
+}
+
 /// There is no display handle.
 #[derive(Debug)]
 #[allow(dead_code)]

From f2f1d6e11559538a993d8982120442ef06ca56f3 Mon Sep 17 00:00:00 2001
From: Mads Marquart <mads@marquart.dk>
Date: Sat, 17 Jan 2026 00:04:06 +0100
Subject: [PATCH 2/2] TMP

---
 Cargo.toml               |   6 +
 src/backend_interface.rs |   7 +-
 src/backends/web.rs      |   5 +
 src/convert.rs           | 887 +++++++++++++++++++++++++++++++++++++++
 src/format.rs            | 304 ++++++++++++++
 src/lib.rs               |  40 +-
 6 files changed, 1244 insertions(+), 5 deletions(-)
 create mode 100644 src/convert.rs
 create mode 100644 src/format.rs

diff --git a/Cargo.toml b/Cargo.toml
index e560eb0..dcef70f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -40,6 +40,7 @@ x11 = [
     "winit/x11",
 ]
 x11-dlopen = ["tiny-xlib/dlopen", "x11rb/dl-libxcb"]
+f16 = ["half"]
 
 [dependencies]
 raw_window_handle = { package = "raw-window-handle", version = "0.6", features = [
@@ -47,6 +48,11 @@ raw_window_handle = { package = "raw-window-handle", version = "0.6", features =
 ] }
 tracing = { version = "0.1.41", default-features = false }
 
+# For f16 support in pixel conversions.
+# TODO: Use standard library's f16 once stable:
+# https://github.com/rust-lang/rust/issues/116909
+half = { version = "2.7.1", optional = true }
+
 [target.'cfg(target_os = "android")'.dependencies]
 bytemuck = "1.12.3"
 ndk = "0.9.0"
diff --git a/src/backend_interface.rs b/src/backend_interface.rs
index 687ccd9..af33055 100644
--- a/src/backend_interface.rs
+++ b/src/backend_interface.rs
@@ -1,6 +1,6 @@
 //! Interface implemented by backends
 
-use crate::{AlphaMode, InitError, Rect, SoftBufferError};
+use crate::{AlphaMode, InitError, PixelFormat, Rect, SoftBufferError};
 
 use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
 use std::num::NonZeroU32;
@@ -29,12 +29,15 @@ pub(crate) trait SurfaceInterface<D: HasDisplayHandle + ?Sized, W: HasWindowHand
     fn alpha_mode(&self) -> AlphaMode;
 
     /// Reconfigure the internal buffer(s).
+    ///
+    /// Returns `Some(byte_stride)` if the buffer was successfully resized, or `None` if the surface does not support the given pixel format.
     fn configure(
         &mut self,
         width: NonZeroU32,
         height: NonZeroU32,
         alpha_mode: AlphaMode,
-    ) -> Result<(), SoftBufferError>;
+        pixel_format: PixelFormat,
+    ) -> Result<Option<u32>, SoftBufferError>;
 
     /// Get a mutable reference to the buffer.
     fn buffer_mut(&mut self) -> Result<Self::Buffer<'_>, SoftBufferError>;
diff --git a/src/backends/web.rs b/src/backends/web.rs
index a829e5b..5ff3e6c 100644
--- a/src/backends/web.rs
+++ b/src/backends/web.rs
@@ -10,6 +10,7 @@ use web_sys::{CanvasRenderingContext2d, HtmlCanvasElement};
 use web_sys::{OffscreenCanvas, OffscreenCanvasRenderingContext2d};
 
 use crate::backend_interface::*;
+use crate::convert::FALLBACK_FORMAT;
 use crate::error::{InitError, SwResultExt};
 use crate::{util, NoDisplayHandle, NoWindowHandle, Rect, SoftBufferError};
 use std::marker::PhantomData;
@@ -231,6 +232,8 @@ impl SurfaceExtWeb for crate::Surface<NoDisplayHandle, NoWindowHandle> {
         let imple = crate::SurfaceDispatch::Web(WebImpl::from_canvas(canvas, NoWindowHandle(()))?);
 
         Ok(Self {
+            pixel_format: FALLBACK_FORMAT,
+            fallback_buffer: None,
             surface_impl: Box::new(imple),
             _marker: PhantomData,
         })
@@ -243,6 +246,8 @@ impl SurfaceExtWeb for crate::Surface<NoDisplayHandle, NoWindowHandle> {
         )?);
 
         Ok(Self {
+            pixel_format: FALLBACK_FORMAT,
+            fallback_buffer: None,
             surface_impl: Box::new(imple),
             _marker: PhantomData,
         })
diff --git a/src/convert.rs b/src/convert.rs
new file mode 100644
index 0000000..d8b407c
--- /dev/null
+++ b/src/convert.rs
@@ -0,0 +1,887 @@
+use std::iter;
+
+#[cfg(feature = "f16")]
+use half::f16;
+
+use crate::{AlphaMode, PixelFormat};
+
+#[derive(Debug)]
+pub(crate) struct Input<'a> {
+    data: &'a [u8],
+    byte_stride: usize,
+    alpha_mode: AlphaMode,
+    format: PixelFormat,
+}
+
+#[derive(Debug)]
+pub(crate) struct Output<'a> {
+    data: &'a mut [FallbackPixel],
+    stride: usize,
+    alpha_mode: AlphaMode,
+}
+
+/// Convert and write pixel data from one row to another.
+///
+/// This is primarily meant as a fallback, so while it may be fairly efficient, that is not the
+/// primary purpose. Users should instead.
+///
+/// # Strategy
+///
+/// Doing a generic conversion from data in one pixel format to another is difficult, so we allow
+/// ourselves to assume that the output format is [`FALLBACK_FORMAT`].
+///
+/// If we didn't do this, we'd have to introduce a round-trip to a format like `Rgba32f` that is
+/// (mostly) capable of storing all the other formats.
+///
+/// # Prior art
+///
+/// `SDL_ConvertPixels` + `SDL_PremultiplyAlpha` maybe?
+pub(crate) fn convert_fallback(i: Input<'_>, o: Output<'_>) {
+    // REMARK: We monomorphize a conversion implementation for each pixel format, this might not
+    // be the most desirable?
+    match i.format {
+        // Unpacked formats.
+        PixelFormat::Bgr8 => each_pixel::<u8, 3>(i, o, |[b, g, r]| [r, g, b, u8::ALPHA_OPAQUE]),
+        PixelFormat::Rgb8 => each_pixel::<u8, 3>(i, o, |[r, g, b]| [r, g, b, u8::ALPHA_OPAQUE]),
+        PixelFormat::Bgra8 => each_pixel::<u8, 4>(i, o, |[b, g, r, a]| [r, g, b, a]),
+        PixelFormat::Rgba8 => each_pixel::<u8, 4>(i, o, |[r, g, b, a]| [r, g, b, a]),
+        PixelFormat::Abgr8 => each_pixel::<u8, 4>(i, o, |[a, b, g, r]| [r, g, b, a]),
+        PixelFormat::Argb8 => each_pixel::<u8, 4>(i, o, |[a, r, g, b]| [r, g, b, a]),
+        PixelFormat::Bgr16 => each_pixel::<u16, 3>(i, o, |[b, g, r]| [r, g, b, u16::ALPHA_OPAQUE]),
+        PixelFormat::Rgb16 => each_pixel::<u16, 3>(i, o, |[r, g, b]| [r, g, b, u16::ALPHA_OPAQUE]),
+        PixelFormat::Bgra16 => each_pixel::<u16, 4>(i, o, |[b, g, r, a]| [r, g, b, a]),
+        PixelFormat::Rgba16 => each_pixel::<u16, 4>(i, o, |[r, g, b, a]| [r, g, b, a]),
+        PixelFormat::Abgr16 => each_pixel::<u16, 4>(i, o, |[a, b, g, r]| [r, g, b, a]),
+        PixelFormat::Argb16 => each_pixel::<u16, 4>(i, o, |[a, r, g, b]| [r, g, b, a]),
+
+        // Grayscale formats.
+        PixelFormat::R1 => each_bitpacked_grayscale::<1>(i, o, |l| [l, l, l, u8::ALPHA_OPAQUE]),
+        PixelFormat::R2 => each_bitpacked_grayscale::<2>(i, o, |l| [l, l, l, u8::ALPHA_OPAQUE]),
+        PixelFormat::R4 => each_bitpacked_grayscale::<4>(i, o, |l| [l, l, l, u8::ALPHA_OPAQUE]),
+        PixelFormat::R8 => each_pixel::<u8, 1>(i, o, |[l]| [l, l, l, u8::ALPHA_OPAQUE]),
+        PixelFormat::R16 => each_pixel::<u16, 1>(i, o, |[l]| [l, l, l, u16::ALPHA_OPAQUE]),
+
+        // Packed formats.
+        PixelFormat::B2g3r3 => each_packed::<u8>(i, o, |pixel| {
+            let b = pixel.extract_packed(6, 2);
+            let g = pixel.extract_packed(3, 3);
+            let r = pixel.extract_packed(0, 3);
+            [r, g, b, u8::ALPHA_OPAQUE]
+        }),
+        PixelFormat::R3g3b2 => each_packed::<u8>(i, o, |pixel| {
+            let r = pixel.extract_packed(5, 3);
+            let g = pixel.extract_packed(2, 3);
+            let b = pixel.extract_packed(0, 2);
+            [r, g, b, u8::ALPHA_OPAQUE]
+        }),
+
+        PixelFormat::B5g6r5 => each_packed::<u16>(i, o, |pixel| {
+            let b = pixel.extract_packed(11, 5);
+            let g = pixel.extract_packed(5, 6);
+            let r = pixel.extract_packed(0, 5);
+            [r, g, b, u16::ALPHA_OPAQUE]
+        }),
+        PixelFormat::R5g6b5 => each_packed::<u16>(i, o, |pixel| {
+            let r = pixel.extract_packed(11, 5);
+            let g = pixel.extract_packed(5, 6);
+            let b = pixel.extract_packed(0, 5);
+            [r, g, b, u16::ALPHA_OPAQUE]
+        }),
+
+        PixelFormat::Bgra4 => each_packed::<u16>(i, o, |pixel| {
+            let b = pixel.extract_packed(12, 4);
+            let g = pixel.extract_packed(8, 4);
+            let r = pixel.extract_packed(4, 4);
+            let a = pixel.extract_packed(0, 4);
+            [r, g, b, a]
+        }),
+        PixelFormat::Rgba4 => each_packed::<u16>(i, o, |pixel| {
+            let r = pixel.extract_packed(12, 4);
+            let g = pixel.extract_packed(8, 4);
+            let b = pixel.extract_packed(4, 4);
+            let a = pixel.extract_packed(0, 4);
+            [r, g, b, a]
+        }),
+        PixelFormat::Abgr4 => each_packed::<u16>(i, o, |pixel| {
+            let a = pixel.extract_packed(12, 4);
+            let b = pixel.extract_packed(8, 4);
+            let g = pixel.extract_packed(4, 4);
+            let r = pixel.extract_packed(0, 4);
+            [r, g, b, a]
+        }),
+        PixelFormat::Argb4 => each_packed::<u16>(i, o, |pixel| {
+            let a = pixel.extract_packed(12, 4);
+            let r = pixel.extract_packed(8, 4);
+            let g = pixel.extract_packed(4, 4);
+            let b = pixel.extract_packed(0, 4);
+            [r, g, b, a]
+        }),
+
+        PixelFormat::Bgr5a1 => each_packed::<u16>(i, o, |pixel| {
+            let b = pixel.extract_packed(11, 5);
+            let g = pixel.extract_packed(6, 5);
+            let r = pixel.extract_packed(1, 5);
+            let a = pixel.extract_packed(0, 1);
+            [r, g, b, a]
+        }),
+        PixelFormat::Rgb5a1 => each_packed::<u16>(i, o, |pixel| {
+            let r = pixel.extract_packed(11, 5);
+            let g = pixel.extract_packed(6, 5);
+            let b = pixel.extract_packed(1, 5);
+            let a = pixel.extract_packed(0, 1);
+            [r, g, b, a]
+        }),
+        PixelFormat::A1bgr5 => each_packed::<u16>(i, o, |pixel| {
+            let a = pixel.extract_packed(15, 1);
+            let b = pixel.extract_packed(10, 5);
+            let g = pixel.extract_packed(5, 5);
+            let r = pixel.extract_packed(0, 5);
+            [r, g, b, a]
+        }),
+        PixelFormat::A1rgb5 => each_packed::<u16>(i, o, |pixel| {
+            let a = pixel.extract_packed(15, 1);
+            let r = pixel.extract_packed(10, 5);
+            let g = pixel.extract_packed(5, 5);
+            let b = pixel.extract_packed(0, 5);
+            [r, g, b, a]
+        }),
+
+        PixelFormat::Bgr10a2 => each_packed::<u32>(i, o, |pixel| {
+            let b = pixel.extract_packed(22, 10);
+            let g = pixel.extract_packed(12, 10);
+            let r = pixel.extract_packed(2, 10);
+            let a = pixel.extract_packed(0, 2);
+            [r, g, b, a]
+        }),
+        PixelFormat::Rgb10a2 => each_packed::<u32>(i, o, |pixel| {
+            let r = pixel.extract_packed(22, 10);
+            let g = pixel.extract_packed(12, 10);
+            let b = pixel.extract_packed(2, 10);
+            let a = pixel.extract_packed(0, 2);
+            [r, g, b, a]
+        }),
+        PixelFormat::A2bgr10 => each_packed::<u32>(i, o, |pixel| {
+            let a = pixel.extract_packed(30, 2);
+            let b = pixel.extract_packed(20, 10);
+            let g = pixel.extract_packed(10, 10);
+            let r = pixel.extract_packed(0, 10);
+            [r, g, b, a]
+        }),
+        PixelFormat::A2rgb10 => each_packed::<u32>(i, o, |pixel| {
+            let a = pixel.extract_packed(30, 2);
+            let r = pixel.extract_packed(20, 10);
+            let g = pixel.extract_packed(10, 10);
+            let b = pixel.extract_packed(0, 10);
+            [r, g, b, a]
+        }),
+
+        // Floating point formats.
+        #[cfg(feature = "f16")]
+        PixelFormat::Bgr16f => each_pixel::<f16, 3>(i, o, |[b, g, r]| [r, g, b, f16::ALPHA_OPAQUE]),
+        #[cfg(feature = "f16")]
+        PixelFormat::Rgb16f => each_pixel::<f16, 3>(i, o, |[r, g, b]| [r, g, b, f16::ALPHA_OPAQUE]),
+        #[cfg(feature = "f16")]
+        PixelFormat::Bgra16f => each_pixel::<f16, 4>(i, o, |[b, g, r, a]| [r, g, b, a]),
+        #[cfg(feature = "f16")]
+        PixelFormat::Rgba16f => each_pixel::<f16, 4>(i, o, |[r, g, b, a]| [r, g, b, a]),
+        #[cfg(feature = "f16")]
+        PixelFormat::Abgr16f => each_pixel::<f16, 4>(i, o, |[a, b, g, r]| [r, g, b, a]),
+        #[cfg(feature = "f16")]
+        PixelFormat::Argb16f => each_pixel::<f16, 4>(i, o, |[a, r, g, b]| [r, g, b, a]),
+        PixelFormat::Bgr32f => each_pixel::<f32, 3>(i, o, |[b, g, r]| [r, g, b, f32::ALPHA_OPAQUE]),
+        PixelFormat::Rgb32f => each_pixel::<f32, 3>(i, o, |[r, g, b]| [r, g, b, f32::ALPHA_OPAQUE]),
+        PixelFormat::Bgra32f => each_pixel::<f32, 4>(i, o, |[b, g, r, a]| [r, g, b, a]),
+        PixelFormat::Rgba32f => each_pixel::<f32, 4>(i, o, |[r, g, b, a]| [r, g, b, a]),
+        PixelFormat::Abgr32f => each_pixel::<f32, 4>(i, o, |[a, b, g, r]| [r, g, b, a]),
+        PixelFormat::Argb32f => each_pixel::<f32, 4>(i, o, |[a, r, g, b]| [r, g, b, a]),
+    }
+}
+
+pub(crate) const FALLBACK_FORMAT: PixelFormat =
+    if cfg!(any(target_family = "wasm", target_os = "android")) {
+        PixelFormat::Rgba8
+    } else {
+        PixelFormat::Bgra8
+    };
+
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
+#[repr(C, align(4))]
+pub(crate) struct FallbackPixel {
+    #[cfg(not(any(target_family = "wasm", target_os = "android")))]
+    b: u8,
+    #[cfg(any(target_family = "wasm", target_os = "android"))]
+    r: u8,
+
+    g: u8,
+
+    #[cfg(any(target_family = "wasm", target_os = "android"))]
+    b: u8,
+    #[cfg(not(any(target_family = "wasm", target_os = "android")))]
+    r: u8,
+
+    a: u8,
+}
+
+/// Convert the input data to a number of components with the given type `T`, loop over each pixel,
+/// and convert it to RGBA using the provided closure.
+///
+/// This routine cannot be used when multiple pixels are packed into a single byte (such as for
+/// [`PixelFormat::R2`]).
+fn each_pixel_raw<T: PixelComponent, const COMPONENTS: usize>(
+    i: Input<'_>,
+    o: Output<'_>,
+    convert_rgba: impl Fn([T; COMPONENTS]) -> [T; 4],
+) {
+    let input_rows = i.data.chunks_exact(i.byte_stride);
+    let output_rows = o.data.chunks_exact_mut(o.stride);
+    for (input_row, output_row) in input_rows.zip(output_rows) {
+        // TODO: Make sure that the input stride is always multiple of the input pixel format's
+        // bytes per pixel, such that we can do this cast before the loop.
+        let input_row = T::cast(input_row);
+
+        // Intentionally ignore trailing bytes, we might be working with stride-aligned rows where
+        // there can be a few extra bytes at the end that shouldn't be treated as pixel values.
+        let (input_row, _rest) = input_row.as_chunks::<COMPONENTS>();
+
+        // The input should never be larger than the output, though the output is allowed to have
+        // more pixels (in cases where it's stride-aligned, there can be leftover pixels at the end).
+        debug_assert!(input_row.len() <= output_row.len());
+
+        for (input_pixel, output_pixel) in input_row.iter().zip(output_row) {
+            // Extract the components and convert alpha.
+            let [r, g, b, a] = convert_rgba(*input_pixel);
+
+            // Scale each pixel down to the native `u8` format that we're using.
+            *output_pixel = FallbackPixel {
+                r: r.scale(),
+                g: g.scale(),
+                b: b.scale(),
+                a: a.scale(),
+            };
+        }
+    }
+}
+
+/// Extends [`each_pixel_raw`] with alpha conversion.
+fn each_pixel<T: PixelComponent, const COMPONENTS: usize>(
+    i: Input<'_>,
+    o: Output<'_>,
+    convert_rgba: impl Fn([T; COMPONENTS]) -> [T; 4],
+) {
+    // Convert alpha.
+    //
+    // We do this before scaling, to make sure that components with higher resolution
+    // than the target `u8` get scaled more precisely.
+    //
+    // NOTE: We monomorphize `each_pixel_raw` here depending on the required alpha conversion. This
+    // is nice because it allows the compiler to autovectorize the inner conversion loop (probably),
+    // though it does also cost a bit in terms of compile times and code size.
+    //
+    // TODO: Consider doing the alpha conversion in a separate pass afterwards, and then not worry
+    // about the multiplication precision? That would reduce the code-size a fair bit.
+    match (i.alpha_mode, o.alpha_mode) {
+        // No conversion.
+        (AlphaMode::Opaque, AlphaMode::Opaque)
+        | (AlphaMode::PostMultiplied, AlphaMode::PostMultiplied)
+        | (AlphaMode::PreMultiplied, AlphaMode::PreMultiplied) => {
+            each_pixel_raw(i, o, convert_rgba)
+        }
+        // Convert opaque -> transparent (i.e. ignore alpha / x component).
+        (AlphaMode::Opaque, AlphaMode::PostMultiplied | AlphaMode::PreMultiplied) => {
+            each_pixel_raw(i, o, |pixel| {
+                let [r, g, b, _a] = convert_rgba(pixel);
+                [r, g, b, T::ALPHA_OPAQUE]
+            });
+        }
+        (AlphaMode::PostMultiplied, AlphaMode::PreMultiplied) => {
+            each_pixel_raw(i, o, |pixel| {
+                let [r, g, b, a] = convert_rgba(pixel);
+                [r.premultiply(a), g.premultiply(a), b.premultiply(a), a]
+            });
+        }
+        (AlphaMode::PreMultiplied, AlphaMode::PostMultiplied) => {
+            each_pixel_raw(i, o, |pixel| {
+                let [r, g, b, a] = convert_rgba(pixel);
+                [
+                    r.unpremultiply(a),
+                    g.unpremultiply(a),
+                    b.unpremultiply(a),
+                    a,
+                ]
+            });
+        }
+        (AlphaMode::PostMultiplied | AlphaMode::PreMultiplied, AlphaMode::Opaque) => {
+            unreachable!("cannot convert alpha to opaque") // TODO
+        }
+    }
+}
+
+/// Helper on top of [`each_pixel`] for when working with packed formats.
+fn each_packed<T: PixelComponent>(i: Input<'_>, o: Output<'_>, convert: impl Fn(T) -> [T; 4]) {
+    each_pixel::<T, 1>(i, o, |[pixel]| convert(pixel));
+}
+
+/// Convert multiple grayscale pixels packed into a single byte.
+///
+/// Kind of a special case, multiple pixels are packed into a single byte.
+fn each_bitpacked_grayscale<const BPP: u8>(
+    i: Input<'_>,
+    o: Output<'_>,
+    convert: impl Fn(u8) -> [u8; 4],
+) {
+    // Assume pixels have a stride of at least 8 bits.
+    let input_rows = i.data.chunks_exact(i.byte_stride);
+    let output_rows = o.data.chunks_exact_mut(o.stride);
+
+    for (input_row, output_row) in input_rows.zip(output_rows) {
+        let input_row = input_row
+            .iter()
+            // Split multiple pixels packed into a single byte up into several bytes.
+            .flat_map(|pixels| iter::repeat_n(pixels, 8 / BPP as usize).enumerate())
+            // Extract the pixel from the byte.
+            .map(|(i, v)| v.extract_packed(((8 / BPP - 1) - i as u8) * BPP, BPP));
+
+        for (input_pixel, output_pixel) in input_row.zip(output_row) {
+            let [r, g, b, a] = convert(input_pixel);
+            *output_pixel = FallbackPixel { r, g, b, a };
+        }
+    }
+}
+
+/// A trait for representing the different kinds of data.
+trait PixelComponent: Sized + Copy + std::fmt::Debug {
+    const ALPHA_OPAQUE: Self;
+
+    fn cast(data: &[u8]) -> &[Self];
+
+    fn scale(self) -> u8;
+
+    fn premultiply(self, alpha: Self) -> Self;
+
+    fn unpremultiply(self, alpha: Self) -> Self;
+
+    /// Extract part
+    fn extract_packed(self, position: u8, size: u8) -> Self;
+}
+
+impl PixelComponent for u8 {
+    const ALPHA_OPAQUE: Self = 0xff;
+
+    fn cast(data: &[u8]) -> &[u8] {
+        data
+    }
+
+    fn scale(self) -> u8 {
+        self
+    }
+
+    fn premultiply(self, alpha: u8) -> Self {
+        // TODO: Do we need to optimize this using bit shifts or similar?
+        ((self as u16 * alpha as u16) / 0xff) as u8
+    }
+
+    fn unpremultiply(self, alpha: u8) -> Self {
+        // TODO: Can we find a cleaner / more efficient way to implement this?
+        (self as u16 * u8::MAX as u16)
+            .checked_div(alpha as u16)
+            .unwrap_or(0)
+            .min(u8::MAX as u16) as u8
+    }
+
+    fn extract_packed(self, position: u8, size: u8) -> Self {
+        // The maximum value of the component.
+        //
+        // size=1 => 0b1
+        // size=2 => 0b11
+        // size=4 => 0b1111
+        let max = (1 << size) - 1;
+
+        // Extract the value of the component.
+        //
+        // self=0b11001011, position=0, max=0b11 => 0b11
+        // self=0b11001011, position=2, max=0b11 => 0b10
+        // self=0b11001011, position=4, max=0b00 => 0b00
+        let value = (self >> position) & max;
+
+        // Expand the value to fill the entire u8.
+        (value as u16 * u8::MAX as u16 / max as u16) as u8
+    }
+}
+
+impl PixelComponent for u16 {
+    const ALPHA_OPAQUE: Self = 0xffff;
+
+    fn cast(data: &[u8]) -> &[u16] {
+        // SAFETY: `u8`s can be safely reinterpreted as a `u16`.
+        let ([], data, []) = (unsafe { data.align_to::<u16>() }) else {
+            unreachable!("data was not properly aligned");
+        };
+        data
+    }
+
+    fn scale(self) -> u8 {
+        // Grab the high bytes of the value.
+        //
+        // NOTE: This truncates instead of rounding, which is a bit imprecise, but it's probably
+        // fine, we're going to be displaying the image right after.
+        (self >> 8) as u8
+    }
+
+    fn premultiply(self, alpha: u16) -> Self {
+        ((self as u32 * alpha as u32) / Self::MAX as u32) as u16
+    }
+
+    fn unpremultiply(self, alpha: u16) -> Self {
+        (self as u32 * u16::MAX as u32)
+            .checked_div(alpha as u32)
+            .unwrap_or(0)
+            .min(u16::MAX as u32) as u16
+    }
+
+    fn extract_packed(self, position: u8, size: u8) -> Self {
+        let max = (1 << size) - 1;
+        let value = (self >> position) & max;
+        (value as u32 * u16::MAX as u32 / max as u32) as u16
+    }
+}
+
+impl PixelComponent for u32 {
+    const ALPHA_OPAQUE: Self = 0xffffffff;
+
+    fn cast(data: &[u8]) -> &[u32] {
+        // SAFETY: `u8`s can be safely reinterpreted as a `u32`.
+        let ([], data, []) = (unsafe { data.align_to::<u32>() }) else {
+            unreachable!("data was not properly aligned");
+        };
+        data
+    }
+
+    fn scale(self) -> u8 {
+        (self >> 24) as u8
+    }
+
+    fn premultiply(self, alpha: u32) -> Self {
+        ((self as u64 * alpha as u64) / Self::MAX as u64) as u32
+    }
+
+    fn unpremultiply(self, alpha: u32) -> Self {
+        (self as u64 * u32::MAX as u64)
+            .checked_div(alpha as u64)
+            .unwrap_or(0)
+            .min(u32::MAX as u64) as u32
+    }
+
+    fn extract_packed(self, position: u8, size: u8) -> Self {
+        let max = (1 << size) - 1;
+        let value = (self >> position) & max;
+        (value as u64 * u32::MAX as u64 / max as u64) as u32
+    }
+}
+
+#[cfg(feature = "f16")]
+impl PixelComponent for f16 {
+    const ALPHA_OPAQUE: Self = f16::from_f32_const(1.0);
+
+    fn cast(data: &[u8]) -> &[f16] {
+        // SAFETY: `u8`s can be safely reinterpreted as a `f16`.
+        let ([], data, []) = (unsafe { data.align_to::<f16>() }) else {
+            unreachable!("data was not properly aligned");
+        };
+        data
+    }
+
+    fn scale(self) -> u8 {
+        let this: f32 = self.into();
+        this.scale()
+    }
+
+    fn premultiply(self, alpha: f16) -> Self {
+        self * alpha
+    }
+
+    fn unpremultiply(self, alpha: f16) -> Self {
+        self / alpha
+    }
+
+    fn extract_packed(self, _position: u8, _size: u8) -> Self {
+        unreachable!()
+    }
+}
+
+impl PixelComponent for f32 {
+    const ALPHA_OPAQUE: Self = 1.0;
+
+    fn cast(data: &[u8]) -> &[f32] {
+        // SAFETY: `u8`s can be safely reinterpreted as a `f32`.
+        let ([], data, []) = (unsafe { data.align_to::<f32>() }) else {
+            unreachable!("data was not properly aligned");
+        };
+        data
+    }
+
+    fn scale(self) -> u8 {
+        (self * 255.0) as u8
+    }
+
+    fn premultiply(self, alpha: f32) -> Self {
+        self * alpha
+    }
+
+    fn unpremultiply(self, alpha: f32) -> Self {
+        self / alpha
+    }
+
+    fn extract_packed(self, _position: u8, _size: u8) -> Self {
+        unreachable!()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[track_caller]
+    fn assert_converts_to<T: PixelComponent, const N: usize>(
+        input: &[[T; N]],
+        expected: &[[u8; 4]],
+        input_format: PixelFormat,
+        input_alpha_mode: AlphaMode,
+        output_alpha_mode: AlphaMode,
+    ) {
+        // SAFETY: All pixel components can be converted to `u8`.
+        let ([], input_row, []) = (unsafe { input.align_to::<u8>() }) else {
+            unreachable!()
+        };
+        let mut output_row = vec![
+            FallbackPixel::default();
+            input_row.len() * 8 / input_format.bits_per_pixel() as usize
+        ];
+        let input = Input {
+            data: input_row,
+            byte_stride: input_row.len(),
+            alpha_mode: input_alpha_mode,
+            format: input_format,
+        };
+        let output_stride = output_row.len();
+        let output = Output {
+            data: &mut output_row,
+            stride: output_stride,
+            alpha_mode: output_alpha_mode,
+        };
+        convert_fallback(input, output);
+
+        let expected: Vec<FallbackPixel> = expected
+            .iter()
+            .cloned()
+            .map(|[r, g, b, a]| FallbackPixel { r, g, b, a })
+            .collect();
+        assert_eq!(output_row, expected);
+    }
+
+    #[test]
+    fn rgb_alpha() {
+        let pixel_data: &[[u8; 3]] = &[[0x00, 0x11, 0x22], [0x33, 0x44, 0x55]];
+        let expected = &[[0x00, 0x11, 0x22, 0xff], [0x33, 0x44, 0x55, 0xff]];
+        let input_format = PixelFormat::Rgb8;
+
+        assert_converts_to(
+            pixel_data,
+            expected,
+            input_format,
+            AlphaMode::Opaque,
+            AlphaMode::Opaque,
+        );
+
+        // The alpha mode doesn't matter for opaque formats such as `Format::Rgb8`.
+        assert_converts_to(
+            pixel_data,
+            expected,
+            input_format,
+            AlphaMode::Opaque,
+            AlphaMode::PreMultiplied,
+        );
+        assert_converts_to(
+            pixel_data,
+            expected,
+            input_format,
+            AlphaMode::PostMultiplied,
+            AlphaMode::PreMultiplied,
+        );
+        assert_converts_to(
+            pixel_data,
+            expected,
+            input_format,
+            AlphaMode::PreMultiplied,
+            AlphaMode::PostMultiplied,
+        );
+    }
+
+    #[test]
+    fn rgba_alpha() {
+        // See the following link for expected values:
+        // https://html.spec.whatwg.org/multipage/canvas.html#premultiplied-alpha-and-the-2d-rendering-context
+        let pixel_data: &[[u8; 4]] = &[
+            [0xff, 0x7f, 0x00, 0xff],
+            [0xff, 0x7f, 0x00, 0x7f],
+            [0xff, 0x7f, 0x00, 0x00],
+        ];
+        let input_format = PixelFormat::Rgba8;
+
+        assert_converts_to(
+            pixel_data,
+            pixel_data,
+            input_format,
+            AlphaMode::Opaque,
+            AlphaMode::Opaque,
+        );
+        assert_converts_to(
+            pixel_data,
+            // Opaque -> alpha means make alpha channel opaque.
+            &[
+                [0xff, 0x7f, 0x00, 0xff],
+                [0xff, 0x7f, 0x00, 0xff],
+                [0xff, 0x7f, 0x00, 0xff],
+            ],
+            input_format,
+            AlphaMode::Opaque,
+            AlphaMode::PreMultiplied,
+        );
+        assert_converts_to(
+            pixel_data,
+            &[
+                [0xff, 0x7f, 0x00, 0xff], // Opaque -> Values aren't changed.
+                [0x7f, 0x3f, 0x00, 0x7f], // Semi-transparent -> Values are scaled according to alpha.
+                [0x00, 0x00, 0x00, 0x00], // Fully transparent -> Fully transparent black.
+            ],
+            input_format,
+            AlphaMode::PostMultiplied,
+            AlphaMode::PreMultiplied,
+        );
+        assert_converts_to(
+            pixel_data,
+            &[
+                [0xff, 0x7f, 0x00, 0xff], // Opaque -> Values aren't changed.
+                [0xff, 0xff, 0x00, 0x7f], // Semi-transparent -> Pixels whose value are larger than the alpha are unrepresentable.
+                [0x00, 0x00, 0x00, 0x00], // Fully transparent -> Non-black pixels are unrepresentable.
+            ],
+            input_format,
+            AlphaMode::PreMultiplied,
+            AlphaMode::PostMultiplied,
+        );
+    }
+
+    #[test]
+    fn grayscale_alpha() {
+        let pixel_data: &[[u8; 1]] = &[[0b11_00_11_00], [0b00_01_10_11]];
+        let expected = &[
+            [0xff, 0xff, 0xff, 0xff],
+            [0x00, 0x00, 0x00, 0xff],
+            [0xff, 0xff, 0xff, 0xff],
+            [0x00, 0x00, 0x00, 0xff],
+            [0x00, 0x00, 0x00, 0xff],
+            [0x55, 0x55, 0x55, 0xff],
+            [0xaa, 0xaa, 0xaa, 0xff],
+            [0xff, 0xff, 0xff, 0xff],
+        ];
+        let input_format = PixelFormat::R2;
+
+        assert_converts_to(
+            pixel_data,
+            expected,
+            input_format,
+            AlphaMode::Opaque,
+            AlphaMode::Opaque,
+        );
+
+        // The alpha mode doesn't matter for grayscale formats.
+        assert_converts_to(
+            pixel_data,
+            expected,
+            input_format,
+            AlphaMode::Opaque,
+            AlphaMode::PreMultiplied,
+        );
+        assert_converts_to(
+            pixel_data,
+            expected,
+            input_format,
+            AlphaMode::PostMultiplied,
+            AlphaMode::PreMultiplied,
+        );
+        assert_converts_to(
+            pixel_data,
+            expected,
+            input_format,
+            AlphaMode::PreMultiplied,
+            AlphaMode::PostMultiplied,
+        );
+    }
+
+    /// Test a conversion in each format.
+    #[test]
+    fn all_formats() {
+        #[track_caller]
+        fn assert_convert<T: PixelComponent, const N: usize>(
+            input: [T; N],
+            expected: &[[u8; 4]],
+            input_format: PixelFormat,
+        ) {
+            assert_converts_to(
+                &[input],
+                expected,
+                input_format,
+                // Same alpha mode.
+                AlphaMode::PreMultiplied,
+                AlphaMode::PreMultiplied,
+            );
+        }
+
+        let expected = &[[0x22, 0x44, 0x66, 0xff]];
+        assert_convert::<u8, _>([0x66, 0x44, 0x22], expected, PixelFormat::Bgr8);
+        assert_convert::<u8, _>([0x22, 0x44, 0x66], expected, PixelFormat::Rgb8);
+        assert_convert::<u8, _>([0x66, 0x44, 0x22, 0xff], expected, PixelFormat::Bgra8);
+        assert_convert::<u8, _>([0x22, 0x44, 0x66, 0xff], expected, PixelFormat::Rgba8);
+        assert_convert::<u8, _>([0xff, 0x66, 0x44, 0x22], expected, PixelFormat::Abgr8);
+        assert_convert::<u8, _>([0xff, 0x22, 0x44, 0x66], expected, PixelFormat::Argb8);
+        assert_convert::<u16, _>([0x6655, 0x4433, 0x2211], expected, PixelFormat::Bgr16);
+        assert_convert::<u16, _>([0x2211, 0x4433, 0x6655], expected, PixelFormat::Rgb16);
+        assert_convert::<u16, _>(
+            [0x6655, 0x4433, 0x2211, 0xffee],
+            expected,
+            PixelFormat::Bgra16,
+        );
+        assert_convert::<u16, _>(
+            [0x2211, 0x4433, 0x6655, 0xffee],
+            expected,
+            PixelFormat::Rgba16,
+        );
+        assert_convert::<u16, _>(
+            [0xffee, 0x6655, 0x4433, 0x2211],
+            expected,
+            PixelFormat::Abgr16,
+        );
+        assert_convert::<u16, _>(
+            [0xffee, 0x2211, 0x4433, 0x6655],
+            expected,
+            PixelFormat::Argb16,
+        );
+
+        assert_convert::<u8, 1>(
+            [0b1_0_1_1_0_0_0_1],
+            &[
+                [0xff, 0xff, 0xff, 0xff],
+                [0x00, 0x00, 0x00, 0xff],
+                [0xff, 0xff, 0xff, 0xff],
+                [0xff, 0xff, 0xff, 0xff],
+                [0x00, 0x00, 0x00, 0xff],
+                [0x00, 0x00, 0x00, 0xff],
+                [0x00, 0x00, 0x00, 0xff],
+                [0xff, 0xff, 0xff, 0xff],
+            ],
+            PixelFormat::R1,
+        );
+        assert_convert::<u8, 1>(
+            [0b10_11_00_01],
+            &[
+                [0xaa, 0xaa, 0xaa, 0xff],
+                [0xff, 0xff, 0xff, 0xff],
+                [0x00, 0x00, 0x00, 0xff],
+                [0x55, 0x55, 0x55, 0xff],
+            ],
+            PixelFormat::R2,
+        );
+        assert_convert::<u8, 1>(
+            [0b1011_0001],
+            &[[0xbb, 0xbb, 0xbb, 0xff], [0x11, 0x11, 0x11, 0xff]],
+            PixelFormat::R4,
+        );
+        assert_convert::<u8, 1>([0x11], &[[0x11, 0x11, 0x11, 0xff]], PixelFormat::R8);
+        assert_convert::<u16, 1>([0x11_22], &[[0x11, 0x11, 0x11, 0xff]], PixelFormat::R16);
+
+        let expected = &[[0xb6, 0x48, 0xaa, 0xff]];
+        assert_convert::<u8, 1>([0b10_010_101], expected, PixelFormat::B2g3r3);
+        assert_convert::<u8, 1>([0b101_010_10], expected, PixelFormat::R3g3b2);
+        let expected = &[[0xbd, 0xc7, 0xa5, 0xff]];
+        assert_convert::<u16, 1>([0b10100_110001_10111], expected, PixelFormat::B5g6r5);
+        assert_convert::<u16, 1>([0b10111_110001_10100], expected, PixelFormat::R5g6b5);
+        let expected = &[[0x11, 0x22, 0x44, 0x88]];
+        assert_convert::<u16, 1>([0b0100_0010_0001_1000], expected, PixelFormat::Bgra4);
+        assert_convert::<u16, 1>([0b0001_0010_0100_1000], expected, PixelFormat::Rgba4);
+        assert_convert::<u16, 1>([0b1000_0100_0010_0001], expected, PixelFormat::Abgr4);
+        assert_convert::<u16, 1>([0b1000_0001_0010_0100], expected, PixelFormat::Argb4);
+        let expected = &[[0xe7, 0x5a, 0xAd, 0xff]];
+        assert_convert::<u16, 1>([0b10101_01011_11100_1], expected, PixelFormat::Bgr5a1);
+        assert_convert::<u16, 1>([0b11100_01011_10101_1], expected, PixelFormat::Rgb5a1);
+        assert_convert::<u16, 1>([0b1_10101_01011_11100], expected, PixelFormat::A1bgr5);
+        assert_convert::<u16, 1>([0b1_11100_01011_10101], expected, PixelFormat::A1rgb5);
+        let expected = &[[0xe3, 0x59, 0xAe, 0xaa]];
+        assert_convert::<u32, 1>(
+            [0b1010111011_0101100101_1110001111_10],
+            expected,
+            PixelFormat::Bgr10a2,
+        );
+        assert_convert::<u32, 1>(
+            [0b1110001111_0101100101_1010111011_10],
+            expected,
+            PixelFormat::Rgb10a2,
+        );
+        assert_convert::<u32, 1>(
+            [0b10_1010111011_0101100101_1110001111],
+            expected,
+            PixelFormat::A2bgr10,
+        );
+        assert_convert::<u32, 1>(
+            [0b10_1110001111_0101100101_1010111011],
+            expected,
+            PixelFormat::A2rgb10,
+        );
+
+        // [0.3, 0.5, 0.7, 1.0]
+        let expected = &[[0x4c, 0x7f, 0xb2, 0xff]];
+        #[cfg(feature = "f16")]
+        assert_convert::<f16, 3>(
+            [0.7, 0.5, 0.3].map(f16::from_f32),
+            expected,
+            PixelFormat::Bgr16f,
+        );
+        #[cfg(feature = "f16")]
+        assert_convert::<f16, 3>(
+            [0.3, 0.5, 0.7].map(f16::from_f32),
+            expected,
+            PixelFormat::Rgb16f,
+        );
+        #[cfg(feature = "f16")]
+        assert_convert::<f16, 4>(
+            [0.7, 0.5, 0.3, 1.0].map(f16::from_f32),
+            expected,
+            PixelFormat::Bgra16f,
+        );
+        #[cfg(feature = "f16")]
+        assert_convert::<f16, 4>(
+            [0.3, 0.5, 0.7, 1.0].map(f16::from_f32),
+            expected,
+            PixelFormat::Rgba16f,
+        );
+        #[cfg(feature = "f16")]
+        assert_convert::<f16, 4>(
+            [1.0, 0.7, 0.5, 0.3].map(f16::from_f32),
+            expected,
+            PixelFormat::Abgr16f,
+        );
+        #[cfg(feature = "f16")]
+        assert_convert::<f16, 4>(
+            [1.0, 0.3, 0.5, 0.7].map(f16::from_f32),
+            expected,
+            PixelFormat::Argb16f,
+        );
+        assert_convert::<f32, 3>([0.7, 0.5, 0.3], expected, PixelFormat::Bgr32f);
+        assert_convert::<f32, 3>([0.3, 0.5, 0.7], expected, PixelFormat::Rgb32f);
+        assert_convert::<f32, 4>([0.7, 0.5, 0.3, 1.0], expected, PixelFormat::Bgra32f);
+        assert_convert::<f32, 4>([0.3, 0.5, 0.7, 1.0], expected, PixelFormat::Rgba32f);
+        assert_convert::<f32, 4>([1.0, 0.7, 0.5, 0.3], expected, PixelFormat::Abgr32f);
+        assert_convert::<f32, 4>([1.0, 0.3, 0.5, 0.7], expected, PixelFormat::Argb32f);
+    }
+
+    #[test]
+    fn stride() {}
+}
diff --git a/src/format.rs b/src/format.rs
new file mode 100644
index 0000000..ec03992
--- /dev/null
+++ b/src/format.rs
@@ -0,0 +1,304 @@
+/// The pixel format of a surface and pixel buffer.
+///
+/// # Byte order
+///
+/// Non-packed formats (also called array formats) such as [`PixelFormat::Rgb8`] are stored with
+/// each component in byte order (in this case R in byte 0, G in byte 1 and B in byte 2).
+///
+/// The recommended way to work with these is to split rows in chunks of the number of components:
+///
+/// ```no_run
+/// // Fill a buffer that uses `PixelFormat::Rgb8` with red.
+/// # let buffer: softbuffer::Buffer<'_> = todo!();
+/// for row in buffer.rows_mut() {
+///     row.as_chunks_mut::<3>().0.map(|[r, g, b]| {
+///         *r = 0xff;
+///         *g = 0x00;
+///         *b = 0x00;
+///     });
+/// }
+/// ```
+///
+/// When components are larger than one byte, such as in [`PixelFormat::Rgb32f`], the data for each
+/// component is stored in the target platform's native endianess (in this case, on a little endian
+/// system, it would be stored in memory as `R3,R2,R1,R0,G3,G2,G1,G0,B3,B2,B1,B0`).
+///
+/// The recommended way to work with these is to first transmute the data to the component type (in
+/// this case `u32`), and then split rows in chunks by of the number of components:
+///
+/// ```no_run
+/// // Fill a buffer that uses `PixelFormat::Rgb32` with red.
+/// # let buffer: softbuffer::Buffer<'_> = todo!();
+/// for row in buffer.rows_u32_mut() {
+///     row.as_chunks_mut::<3>().0.map(|[r, g, b]| {
+///         *r = 0xffff;
+///         *g = 0x0000;
+///         *b = 0x0000;
+///     });
+/// }
+/// ```
+///
+/// Packed formats such as [`PixelFormat::A1Bgr5`] are stored as an integer with same size, in the
+/// target platform's native endianess (in this case, on a little endian system, it would be stored
+/// in memory as `0bGGGBBBBB` in byte 0 and `0bARRRRGG` in byte 1).
+///
+/// The recommended way to work with these is to first transform the data to an integer that can
+/// contain the entire pixel (in this case `u16`), and then work with the data as an integer:
+///
+/// ```
+/// // Fill a buffer that uses `PixelFormat::A1Bgr5` with red.
+/// # let buffer: softbuffer::Buffer<'_> = todo!();
+/// for row in buffer.rows_u16_mut() {
+///     row.iter_mut().map(|pixel| {
+///         *pixel = 0b0_11111_00000_00000;
+///     });
+/// }
+/// ```
+///
+/// Finally, some formats such as [`PixelFormat::R2`] have multiple pixels packed into a single
+/// byte. These are stored with the first pixel in the most significant bits (so in this case
+/// `0b00112233`). TODO: Verify this!
+///
+/// The recommended way to work with these is to iterate over the data as an `u8`, and then use
+/// bitwise operators to write each pixel (or write them in bulk if you can):
+///
+/// ```
+/// // Fill a buffer that uses `PixelFormat::R2` with gray.
+/// # let buffer: softbuffer::Buffer<'_> = todo!();
+/// for row in buffer.rows_mut() {
+///     row.iter_mut().map(|pixels| {
+///         *pixels = 0b00000000; // Clear
+///         for i in 0..3 {
+///             let pixel: u8 = 0b10;
+///             *pixels |= pixel << i * 2;
+///         }
+///     });
+/// }
+/// ```
+///
+/// This is roughly the same naming scheme as what WebGPU uses.
+///
+/// See also the [Pixel Format Guide](https://afrantzis.com/pixel-format-guide/).
+#[non_exhaustive]
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
+pub enum PixelFormat {
+    //
+    // Byte-aligned RGB formats
+    //
+    /// Red, green, blue, and alpha components. `u8` per channel. Laid out as `R0, G0, B0`. TODO.
+    #[doc(alias = "Rgb888")]
+    #[doc(alias = "VK_FORMAT_B8G8R8_UNORM")]
+    Bgr8,
+    #[doc(alias = "Bgr888")]
+    #[doc(alias = "VK_FORMAT_R8G8B8_UNORM")]
+    Rgb8,
+    #[doc(alias = "Argb8888")]
+    #[doc(alias = "Xrgb8888")]
+    #[doc(alias = "VK_FORMAT_B8G8R8A8_UNORM")]
+    Bgra8,
+    #[doc(alias = "Abgr8888")]
+    #[doc(alias = "Xbgr8888")]
+    #[doc(alias = "VK_FORMAT_R8G8B8A8_UNORM")]
+    Rgba8,
+    #[doc(alias = "Rgba8888")]
+    #[doc(alias = "Rgbx8888")]
+    Abgr8,
+    #[doc(alias = "Bgra8888")]
+    #[doc(alias = "Bgrx8888")]
+    Argb8,
+
+    Bgr16,
+    #[doc(alias = "VK_FORMAT_R16G16B16_UNORM")]
+    Rgb16,
+    #[doc(alias = "Argb16161616")]
+    #[doc(alias = "Xrgb16161616")]
+    Bgra16,
+    #[doc(alias = "Abgr16161616")]
+    #[doc(alias = "Xbgr16161616")]
+    #[doc(alias = "VK_FORMAT_R16G16B16A16_UNORM")]
+    Rgba16,
+    #[doc(alias = "Rgba16161616")]
+    #[doc(alias = "Rgbx16161616")]
+    Abgr16,
+    #[doc(alias = "Bgra16161616")]
+    #[doc(alias = "Bgrx16161616")]
+    Argb16,
+
+    //
+    // Grayscale formats
+    //
+    R1,
+    R2,
+    R4,
+    #[doc(alias = "VK_FORMAT_R8_UNORM")]
+    R8,
+    #[doc(alias = "VK_FORMAT_R16_UNORM")]
+    R16,
+    // TODO: R16f,
+    // TODO: R32f,
+
+    //
+    // Packed RGB formats
+    //
+    /// Packed into a `u8` as `0bBB_GGG_RR`.
+    #[doc(alias = "Bgr233")]
+    B2g3r3,
+    /// Packed into a `u8` as `0bRRR_GGG_BB`.
+    #[doc(alias = "Rgb332")]
+    R3g3b2,
+
+    /// Packed into a `u16` as `0bBBBBB_GGGGGG_RRRRR`.
+    #[doc(alias = "Bgr565")]
+    #[doc(alias = "VK_FORMAT_B5G6R5_UNORM_PACK16")]
+    B5g6r5,
+    /// Packed into a `u16` as `0bRRRRR_GGGGGG_BBBBB`.
+    #[doc(alias = "Rgb565")]
+    #[doc(alias = "VK_FORMAT_R5G6B5_UNORM_PACK16")]
+    R5g6b5,
+
+    /// Packed into a `u16` as `0bBBBB_GGGG_RRRR_AAAA`.
+    #[doc(alias = "Bgra4444")]
+    #[doc(alias = "Bgrx4444")]
+    #[doc(alias = "VK_FORMAT_B4G4R4A4_UNORM_PACK16")]
+    Bgra4,
+    /// Packed into a `u16` as `0bRRRR_GGGG_BBBB_AAAA`.
+    #[doc(alias = "Rgba4444")]
+    #[doc(alias = "Rgbx4444")]
+    #[doc(alias = "VK_FORMAT_R4G4B4A4_UNORM_PACK16")]
+    Rgba4,
+    /// Packed into a `u16` as `0bAAAA_BBBB_GGGG_RRRR`.
+    #[doc(alias = "Abgr4444")]
+    #[doc(alias = "Xbgr4444")]
+    Abgr4,
+    /// Packed into a `u16` as `0bAAAA_RRRR_GGGG_BBBB`.
+    #[doc(alias = "Argb4444")]
+    #[doc(alias = "Xrgb4444")]
+    Argb4,
+
+    /// Packed into a `u16` as `0bBBBBB_GGGGG_RRRRR_A`.
+    #[doc(alias = "Bgrx5551")]
+    #[doc(alias = "VK_FORMAT_B5G5R5A1_UNORM_PACK16")]
+    Bgr5a1,
+    /// Packed into a `u16` as `0bRRRRR_GGGGG_BBBBB_A`.
+    #[doc(alias = "Rgbx5551")]
+    #[doc(alias = "VK_FORMAT_R5G5B5A1_UNORM_PACK16")]
+    Rgb5a1,
+    /// Packed into a `u16` as `0bA_BBBBB_GGGGG_RRRRR`.
+    #[doc(alias = "Xbgr1555")]
+    A1bgr5,
+    /// Packed into a `u16` as `0bA_RRRRR_GGGGG_BBBBB`.
+    #[doc(alias = "Xrgb1555")]
+    #[doc(alias = "VK_FORMAT_A1R5G5B5_UNORM_PACK16")]
+    A1rgb5,
+
+    /// Packed into a `u32` as `0bBBBBBBBBBB_GGGGGGGGGG_RRRRRRRRRR_AA`.
+    #[doc(alias = "Bgra1010102")]
+    #[doc(alias = "Bgrx1010102")]
+    Bgr10a2,
+    /// Packed into a `u32` as `0bRRRRRRRRRR_GGGGGGGGGG_BBBBBBBBBB_AA`.
+    #[doc(alias = "Rgba1010102")]
+    #[doc(alias = "Rgbx1010102")]
+    Rgb10a2,
+    /// Packed into a `u32` as `0bAA_BBBBBBBBBB_GGGGGGGGGG_RRRRRRRRRR`.
+    #[doc(alias = "Abgr2101010")]
+    #[doc(alias = "Xbgr2101010")]
+    #[doc(alias = "VK_FORMAT_A2B10G10R10_UNORM_PACK32")]
+    A2bgr10,
+    /// Packed into a `u32` as `0bAA_RRRRRRRRRR_GGGGGGGGGG_BBBBBBBBBB`.
+    #[doc(alias = "Argb2101010")]
+    #[doc(alias = "Xrgb2101010")]
+    #[doc(alias = "VK_FORMAT_A2R10G10B10_UNORM_PACK32")]
+    A2rgb10,
+
+    //
+    // Floating point RGB formats.
+    //
+    #[cfg(feature = "f16")]
+    Bgr16f,
+    #[cfg(feature = "f16")]
+    Rgb16f,
+    #[doc(alias = "Argb16161616f")]
+    #[doc(alias = "Xrgb16161616f")]
+    #[cfg(feature = "f16")]
+    Bgra16f,
+    #[doc(alias = "Abgr16161616f")]
+    #[doc(alias = "Xbgr16161616f")]
+    #[cfg(feature = "f16")]
+    Rgba16f,
+    #[doc(alias = "Rgba16161616f")]
+    #[doc(alias = "Rgbx16161616f")]
+    #[cfg(feature = "f16")]
+    Abgr16f,
+    #[doc(alias = "Bgra16161616f")]
+    #[doc(alias = "Bgrx16161616f")]
+    #[cfg(feature = "f16")]
+    Argb16f,
+
+    Bgr32f,
+    #[doc(alias = "VK_FORMAT_R32G32B32_SFLOAT")]
+    Rgb32f,
+    #[doc(alias = "Argb32323232f")]
+    #[doc(alias = "Xrgb32323232f")]
+    Bgra32f,
+    #[doc(alias = "Abgr32323232f")]
+    #[doc(alias = "Xbgr32323232f")]
+    #[doc(alias = "VK_FORMAT_R32G32B32A32_SFLOAT")]
+    Rgba32f,
+    #[doc(alias = "Rgba32323232f")]
+    #[doc(alias = "Rgbx32323232f")]
+    Abgr32f,
+    #[doc(alias = "Bgra32323232f")]
+    #[doc(alias = "Bgrx32323232f")]
+    Argb32f,
+    // TODO: AYCbCr formats?
+}
+
+impl PixelFormat {
+    /// The number of bits wide that a single pixel in a buffer would be.
+    pub const fn bits_per_pixel(self) -> u8 {
+        match self {
+            Self::Rgb8 | Self::Bgr8 => 24,
+            Self::Bgra8 | Self::Rgba8 | Self::Abgr8 | Self::Argb8 => 32,
+            Self::Rgb16 | Self::Bgr16 => 48,
+            Self::Bgra16 | Self::Rgba16 | Self::Abgr16 | Self::Argb16 => 64,
+            Self::R1 => 1,
+            Self::R2 => 2,
+            Self::R4 => 4,
+            Self::R8 => 8,
+            Self::R16 => 16,
+            Self::B2g3r3 | Self::R3g3b2 => 8,
+            Self::B5g6r5 | Self::R5g6b5 => 16,
+            Self::Bgra4 | Self::Rgba4 | Self::Abgr4 | Self::Argb4 => 16,
+            Self::Bgr5a1 | Self::Rgb5a1 | Self::A1bgr5 | Self::A1rgb5 => 16,
+            Self::Bgr10a2 | Self::Rgb10a2 | Self::A2bgr10 | Self::A2rgb10 => 32,
+            #[cfg(feature = "f16")]
+            Self::Bgr16f | Self::Rgb16f => 48,
+            #[cfg(feature = "f16")]
+            Self::Bgra16f | Self::Rgba16f | Self::Abgr16f | Self::Argb16f => 64,
+            Self::Bgr32f | Self::Rgb32f => 96,
+            Self::Bgra32f | Self::Rgba32f | Self::Abgr32f | Self::Argb32f => 128,
+        }
+    }
+
+    /// The number of components this format has.
+    pub const fn components(self) -> u8 {
+        match self {
+            Self::Rgb8 | Self::Bgr8 => 3,
+            Self::Bgra8 | Self::Rgba8 | Self::Abgr8 | Self::Argb8 => 4,
+            Self::Rgb16 | Self::Bgr16 => 3,
+            Self::Bgra16 | Self::Rgba16 | Self::Abgr16 | Self::Argb16 => 4,
+            Self::R1 | Self::R2 | Self::R4 | Self::R8 | Self::R16 => 1,
+            Self::B2g3r3 | Self::R3g3b2 => 3,
+            Self::B5g6r5 | Self::R5g6b5 => 3,
+            Self::Bgra4 | Self::Rgba4 | Self::Abgr4 | Self::Argb4 => 4,
+            Self::Bgr5a1 | Self::Rgb5a1 | Self::A1bgr5 | Self::A1rgb5 => 4,
+            Self::Bgr10a2 | Self::Rgb10a2 | Self::A2bgr10 | Self::A2rgb10 => 4,
+            #[cfg(feature = "f16")]
+            Self::Bgr16f | Self::Rgb16f => 3,
+            #[cfg(feature = "f16")]
+            Self::Bgra16f | Self::Rgba16f | Self::Abgr16f | Self::Argb16f => 4,
+            Self::Bgr32f | Self::Rgb32f => 3,
+            Self::Bgra32f | Self::Rgba32f | Self::Abgr32f | Self::Argb32f => 4,
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 6818e99..f9d7c54 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,6 +7,8 @@
 extern crate core;
 
 mod backend_dispatch;
+mod convert;
+mod format;
 use backend_dispatch::*;
 mod backend_interface;
 use backend_interface::*;
@@ -19,6 +21,10 @@ use std::marker::PhantomData;
 use std::num::NonZeroU32;
 use std::sync::Arc;
 
+use crate::convert::FALLBACK_FORMAT;
+
+pub use self::format::PixelFormat;
+
 use error::InitError;
 pub use error::SoftBufferError;
 
@@ -75,7 +81,10 @@ pub struct Rect {
 #[derive(Debug)]
 pub struct Surface<D, W> {
     alpha_mode: AlphaMode,
-    /// A buffer that is used when `!supported_alpha_mode.contains(alpha_mode)`.
+    pixel_format: PixelFormat,
+    /// A buffer that is used when:
+    /// `!supported_alpha_mode.contains(alpha_mode)`.
+    /// or `!supported_formats.contains(pixel_format)`.
     fallback_buffer: Option<Vec<u8>>,
     /// This is boxed so that `Surface` is the same size on every platform.
     surface_impl: Box<SurfaceDispatch<D, W>>,
@@ -87,6 +96,8 @@ impl<D: HasDisplayHandle, W: HasWindowHandle> Surface<D, W> {
     pub fn new(context: &Context<D>, window: W) -> Result<Self, SoftBufferError> {
         match SurfaceDispatch::new(window, &context.context_impl) {
             Ok(surface_dispatch) => Ok(Self {
+                pixel_format: FALLBACK_FORMAT,
+                fallback_buffer: None,
                 surface_impl: Box::new(surface_dispatch),
                 _marker: PhantomData,
             }),
@@ -119,16 +130,39 @@ impl<D: HasDisplayHandle, W: HasWindowHandle> Surface<D, W> {
     /// to have the buffer fill the entire window. Use your windowing library to find the size
     /// of the window.
     pub fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) -> Result<(), SoftBufferError> {
-        self.configure(width, height, self.alpha_mode())
+        self.configure(width, height, self.pixel_format, self.alpha_mode)
     }
 
     pub fn configure(
         &mut self,
         width: NonZeroU32,
         height: NonZeroU32,
+        pixel_format: PixelFormat,
         alpha_mode: AlphaMode,
     ) -> Result<(), SoftBufferError> {
-        self.surface_impl.configure(width, height, alpha_mode)
+        // TODO: Do we really want this optimization? And if we do, should we clear the buffers
+        // here too, for consistency / more predictability?
+        if self.width == width && self.height == height && self.pixel_format == pixel_format {
+            return Ok(());
+        }
+
+        if let Some(byte_stride) = self.surface_impl.configure(width, height, pixel_format)? {
+            self.byte_stride = byte_stride;
+            self.fallback_buffer = None;
+        } else {
+            // Needs fallback buffer
+            let bit_width = width as usize * pixel_format.bits_per_pixel() as usize;
+            let byte_stride = bit_width.next_multiple_of(32) / 8;
+            self.byte_stride = byte_stride as u32;
+            self.fallback_buffer = Some(vec![0x00; byte_stride * height as usize]);
+        }
+
+        // We successfully configured the buffer.
+        self.width = width;
+        self.height = height;
+        self.pixel_format = pixel_format;
+
+        Ok(())
     }
 
     /// Copies the window contents into a buffer.