From badc098fc548a882dbedb88c07685b678938397e Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Mon, 19 Jan 2026 19:47:25 +0100 Subject: [PATCH 1/3] Add Buffer::byte_stride --- CHANGELOG.md | 1 + examples/animation.rs | 8 +++---- examples/fruit.rs | 4 ++-- src/backend_dispatch.rs | 10 ++++++++ src/backend_interface.rs | 1 + src/backends/android.rs | 4 ++++ src/backends/cg.rs | 4 ++++ src/backends/kms.rs | 4 ++++ src/backends/orbital.rs | 4 ++++ src/backends/wayland/mod.rs | 4 ++++ src/backends/web.rs | 4 ++++ src/backends/win32.rs | 9 ++++++++ src/backends/x11.rs | 34 ++++++++++----------------- src/lib.rs | 46 +++++++++++++++++++++++++++++-------- 14 files changed, 100 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59a21cf..b7c8e6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ - Added `Buffer::pixels()` for accessing the buffer's pixel data. - Added `Buffer::pixel_rows()` for iterating over rows of the buffer data. - Added `Buffer::pixels_iter()` for iterating over each pixel with its associated `x`/`y` coordinate. +- Added `Buffer::byte_stride()` for pixel buffers whose rows are aligned and may contain padding bytes at the end. Prefer to use the above helpers instead of accessing pixel data directly. - **Breaking:** Removed generic type parameters `D` and `W` from `Buffer<'_>` struct. - **Breaking:** Removed `Deref[Mut]` implementation on `Buffer<'_>`. Use `Buffer::pixels()` instead. diff --git a/examples/animation.rs b/examples/animation.rs index 1603883..1962ed9 100644 --- a/examples/animation.rs +++ b/examples/animation.rs @@ -22,7 +22,7 @@ fn main() { let window = util::make_window(event_loop, |w| w); let old_size = (0, 0); - let frames = pre_render_frames(0, 0); + let frames = pre_render_frames(0, 0, 0); (window, old_size, frames) }, @@ -65,7 +65,7 @@ fn main() { let size = (buffer.width().get(), buffer.height().get()); if size != *old_size { *old_size = size; - *frames = pre_render_frames(size.0, size.1); + *frames = pre_render_frames(buffer.byte_stride().get() / 4, size.0, size.1); } let frame = &frames[((elapsed * 60.0).round() as usize).clamp(0, 59)]; @@ -95,11 +95,11 @@ fn main() { util::run_app(event_loop, app); } -fn pre_render_frames(width: u32, height: u32) -> Vec> { +fn pre_render_frames(stride: u32, width: u32, height: u32) -> Vec> { let render = |frame_id| { let elapsed = ((frame_id as f64) / (60.0)) * 2.0 * PI; - let coords = (0..height).flat_map(|x| (0..width).map(move |y| (x, y))); + let coords = (0..height).flat_map(|x| (0..stride).map(move |y| (x, y))); coords .map(|(x, y)| { let y = (y as f64) / (height as f64); diff --git a/examples/fruit.rs b/examples/fruit.rs index c4fef13..8cf9e09 100644 --- a/examples/fruit.rs +++ b/examples/fruit.rs @@ -51,14 +51,14 @@ fn main() { }; let mut buffer = surface.buffer_mut().unwrap(); - let width = fruit.width(); + let stride = buffer.byte_stride().get() / 4; for (x, y, pixel) in fruit.pixels() { let red = pixel.0[0] as u32; let green = pixel.0[1] as u32; let blue = pixel.0[2] as u32; let color = blue | (green << 8) | (red << 16); - buffer.pixels()[(y * width + x) as usize] = color; + buffer.pixels()[(y * stride + x) as usize] = color; } buffer.present().unwrap(); diff --git a/src/backend_dispatch.rs b/src/backend_dispatch.rs index f2a9454..baac357 100644 --- a/src/backend_dispatch.rs +++ b/src/backend_dispatch.rs @@ -146,6 +146,16 @@ macro_rules! make_dispatch { } impl BufferInterface for BufferDispatch<'_> { + #[inline] + fn byte_stride(&self) -> NonZeroU32 { + match self { + $( + $(#[$attr])* + Self::$name(inner) => inner.byte_stride(), + )* + } + } + #[inline] fn width(&self) -> NonZeroU32 { match self { diff --git a/src/backend_interface.rs b/src/backend_interface.rs index 1eafbf8..00f1a36 100644 --- a/src/backend_interface.rs +++ b/src/backend_interface.rs @@ -35,6 +35,7 @@ pub(crate) trait SurfaceInterface NonZeroU32; fn width(&self) -> NonZeroU32; fn height(&self) -> NonZeroU32; fn pixels_mut(&mut self) -> &mut [u32]; diff --git a/src/backends/android.rs b/src/backends/android.rs index cc23b9c..d489d70 100644 --- a/src/backends/android.rs +++ b/src/backends/android.rs @@ -123,6 +123,10 @@ pub struct BufferImpl<'a> { unsafe impl Send for BufferImpl<'_> {} impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width().get() * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { NonZeroU32::new(self.native_window_buffer.width() as u32).unwrap() } diff --git a/src/backends/cg.rs b/src/backends/cg.rs index f45779f..9ae3e25 100644 --- a/src/backends/cg.rs +++ b/src/backends/cg.rs @@ -279,6 +279,10 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width().get() * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { NonZeroU32::new(self.width as u32).unwrap() } diff --git a/src/backends/kms.rs b/src/backends/kms.rs index a3a982d..eb64ec2 100644 --- a/src/backends/kms.rs +++ b/src/backends/kms.rs @@ -305,6 +305,10 @@ impl Drop for KmsImpl { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width().get() * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { self.size.0 } diff --git a/src/backends/orbital.rs b/src/backends/orbital.rs index 89a718d..64f7145 100644 --- a/src/backends/orbital.rs +++ b/src/backends/orbital.rs @@ -153,6 +153,10 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width().get() * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { NonZeroU32::new(self.width as u32).unwrap() } diff --git a/src/backends/wayland/mod.rs b/src/backends/wayland/mod.rs index 77b7b87..b8f6741 100644 --- a/src/backends/wayland/mod.rs +++ b/src/backends/wayland/mod.rs @@ -219,6 +219,10 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width as u32 * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { NonZeroU32::new(self.width as u32).unwrap() } diff --git a/src/backends/web.rs b/src/backends/web.rs index a829e5b..42a438f 100644 --- a/src/backends/web.rs +++ b/src/backends/web.rs @@ -306,6 +306,10 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width().get() * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { self.size .expect("must set size of surface before calling `width()` on the buffer") diff --git a/src/backends/win32.rs b/src/backends/win32.rs index 40bd3bd..90609c6 100644 --- a/src/backends/win32.rs +++ b/src/backends/win32.rs @@ -250,6 +250,15 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + let width = self.buffer.width.get() as u32; + let bit_count = 32; + // https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader#calculating-surface-stride + // When `bit_count == 32`, this is always just equal to `width * 4`. + let stride = ((width * bit_count + 31) & !31) >> 3; + NonZeroU32::new(stride).unwrap() + } + fn width(&self) -> NonZeroU32 { self.buffer.width.try_into().unwrap() } diff --git a/src/backends/x11.rs b/src/backends/x11.rs index 1ea9cec..3fbcece 100644 --- a/src/backends/x11.rs +++ b/src/backends/x11.rs @@ -327,10 +327,13 @@ impl SurfaceInterface fo height, }))?; + let byte_stride = width.get() * 4; + let num_bytes = byte_stride as usize * height.get() as usize; + if self.size != Some((width, height)) { self.buffer_presented = false; self.buffer - .resize(self.display.connection(), width.get(), height.get()) + .resize(self.display.connection(), num_bytes) .swbuf_err("Failed to resize X11 buffer")?; // We successfully resized the buffer. @@ -408,6 +411,10 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width().get() * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { self.size.unwrap().0.into() } @@ -531,17 +538,12 @@ impl BufferInterface for BufferImpl<'_> { } impl Buffer { - /// Resize the buffer to the given size. - fn resize( - &mut self, - conn: &impl Connection, - width: u16, - height: u16, - ) -> Result<(), PushBufferError> { + /// Resize the buffer to the given number of bytes. + fn resize(&mut self, conn: &impl Connection, num_bytes: usize) -> Result<(), PushBufferError> { match self { - Buffer::Shm(ref mut shm) => shm.alloc_segment(conn, total_len(width, height)), + Buffer::Shm(ref mut shm) => shm.alloc_segment(conn, num_bytes), Buffer::Wire(wire) => { - wire.resize(total_len(width, height) / 4, 0); + wire.resize(num_bytes / 4, 0); Ok(()) } } @@ -970,15 +972,3 @@ impl> PushResultExt for Result { self.map_err(Into::into) } } - -/// Get the length that a slice needs to be to hold a buffer of the given dimensions. -#[inline(always)] -fn total_len(width: u16, height: u16) -> usize { - let width: usize = width.into(); - let height: usize = height.into(); - - width - .checked_mul(height) - .and_then(|len| len.checked_mul(4)) - .unwrap_or_else(|| panic!("Dimensions are too large: ({} x {})", width, height)) -} diff --git a/src/lib.rs b/src/lib.rs index d54d2b9..453df11 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -111,7 +111,14 @@ impl Surface { /// to have the buffer fill the entire window. Use your windowing library to find the size /// of the window. pub fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) -> Result<(), SoftBufferError> { - self.surface_impl.resize(width, height) + if u32::MAX / 4 < width.get() { + // Stride would be too large. + return Err(SoftBufferError::SizeOutOfRange { width, height }); + } + + self.surface_impl.resize(width, height)?; + + Ok(()) } /// Copies the window contents into a buffer. @@ -137,11 +144,21 @@ impl Surface { /// sending another frame. pub fn buffer_mut(&mut self) -> Result, SoftBufferError> { let mut buffer_impl = self.surface_impl.buffer_mut()?; + + debug_assert_eq!( + buffer_impl.byte_stride().get() % 4, + 0, + "stride must be a multiple of 4" + ); debug_assert_eq!( - buffer_impl.height().get() as usize * buffer_impl.width().get() as usize, - buffer_impl.pixels_mut().len(), + buffer_impl.height().get() as usize * buffer_impl.byte_stride().get() as usize, + buffer_impl.pixels_mut().len() * 4, "buffer must be sized correctly" ); + debug_assert!( + buffer_impl.width().get() * 4 <= buffer_impl.byte_stride().get(), + "width * 4 must be less than or equal to stride" + ); Ok(Buffer { buffer_impl, @@ -244,6 +261,17 @@ pub struct Buffer<'a> { } impl Buffer<'_> { + /// The number of bytes wide each row in the buffer is. + /// + /// On some platforms, the buffer is slightly larger than `width * height` for performance + /// reasons. In your code, prefer to use [`Buffer::pixel_rows`] (which handles this correctly), + /// or failing that, make sure to chunk rows by the stride instead of the width. + /// + /// This is guaranteed to be `>= width * 4`, and is guaranteed to be a multiple of 4. + pub fn byte_stride(&self) -> NonZeroU32 { + self.buffer_impl.byte_stride() + } + /// The amount of pixels wide the buffer is. pub fn width(&self) -> NonZeroU32 { self.buffer_impl.width() @@ -300,7 +328,7 @@ impl Buffer<'_> { impl Buffer<'_> { /// Get a mutable reference to the buffer's pixels. /// - /// The size of the returned slice is `buffer.width() * buffer.height()`. + /// The size of the returned slice is `buffer.byte_stride() * buffer.height() / 4`. /// /// # Examples /// @@ -316,7 +344,7 @@ impl Buffer<'_> { /// Iterate over each row of pixels. /// - /// Each slice returned from the iterator has a length of `buffer.width()`. + /// Each slice returned from the iterator has a length of `buffer.byte_stride() / 4`. /// /// # Examples /// @@ -366,11 +394,11 @@ impl Buffer<'_> { pub fn pixel_rows( &mut self, ) -> impl DoubleEndedIterator + ExactSizeIterator { - let width = self.width().get() as usize; + let pixel_stride = self.byte_stride().get() as usize / 4; let pixels = self.pixels(); - assert_eq!(pixels.len() % width, 0, "buffer must be multiple of width"); - // NOTE: This won't panic because `width` is `NonZeroU32` - pixels.chunks_mut(width) + assert_eq!(pixels.len() % pixel_stride, 0, "must be multiple of stride"); + // NOTE: This won't panic because `pixel_stride` came from `NonZeroU32` + pixels.chunks_mut(pixel_stride) } /// Iterate over each pixel in the data. From a7abd65eddee4f438b0e23d679f6cc8dd35c0d8c Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Mon, 19 Jan 2026 19:39:35 +0100 Subject: [PATCH 2/3] Optimize Android backend a bit using buffer stride --- src/backends/android.rs | 34 +++++++++++++--------------------- src/lib.rs | 5 +++-- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/backends/android.rs b/src/backends/android.rs index d489d70..d032611 100644 --- a/src/backends/android.rs +++ b/src/backends/android.rs @@ -99,7 +99,7 @@ impl SurfaceInterface for Android )); } - let buffer = vec![0; native_window_buffer.width() * native_window_buffer.height()]; + let buffer = vec![0; native_window_buffer.stride() * native_window_buffer.height()]; Ok(BufferImpl { native_window_buffer, @@ -124,7 +124,7 @@ unsafe impl Send for BufferImpl<'_> {} impl BufferInterface for BufferImpl<'_> { fn byte_stride(&self) -> NonZeroU32 { - NonZeroU32::new(self.width().get() * 4).unwrap() + NonZeroU32::new(self.native_window_buffer.stride() as u32 * 4).unwrap() } fn width(&self) -> NonZeroU32 { @@ -147,26 +147,18 @@ impl BufferInterface for BufferImpl<'_> { // TODO: This function is pretty slow this way fn present(mut self) -> Result<(), SoftBufferError> { - let input_lines = self.buffer.chunks(self.native_window_buffer.width()); - for (output, input) in self - .native_window_buffer - .lines() - // Unreachable as we checked before that this is a valid, mappable format - .unwrap() - .zip(input_lines) - { - // .lines() removed the stride - assert_eq!(output.len(), input.len() * 4); - - for (i, pixel) in input.iter().enumerate() { - // Swizzle colors from BGR(A) to RGB(A) - let [b, g, r, a] = pixel.to_le_bytes(); - output[i * 4].write(r); - output[i * 4 + 1].write(g); - output[i * 4 + 2].write(b); - output[i * 4 + 3].write(a); - } + // Unreachable as we checked before that this is a valid, mappable format + let native_buffer = self.native_window_buffer.bytes().unwrap(); + + for (pixel, output) in self.buffer.iter().zip(native_buffer.chunks_mut(4)) { + // Swizzle colors from BGR(A) to RGB(A) + let [b, g, r, a] = pixel.to_le_bytes(); + output[0].write(r); + output[1].write(g); + output[2].write(b); + output[3].write(a); } + Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 453df11..7a44045 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -241,6 +241,7 @@ impl HasWindowHandle for Surface /// B: Blue channel /// /// # Platform dependent behavior +/// /// No-copy presentation is currently supported on: /// - Wayland /// - X, when XShm is available @@ -248,11 +249,11 @@ impl HasWindowHandle for Surface /// - Orbital, when buffer size matches window size /// /// Currently [`Buffer::present`] must block copying image data on: -/// - Web /// - AppKit /// - UIKit /// -/// Buffer copies an channel swizzling happen on: +/// Buffer copies and channel swizzling happen on: +/// - Web /// - Android #[derive(Debug)] pub struct Buffer<'a> { From c543d19734277ba6b697a197241f6df973e8020a Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Mon, 19 Jan 2026 20:26:42 +0100 Subject: [PATCH 3/3] Use higher stride on some platforms when cfg(debug_assertions) To help with debugging incorrect stride/width calculations, it helps that there are more common backends (than Android) that have `stride != width * 4`. --- src/backends/cg.rs | 7 ++++--- src/backends/wayland/buffer.rs | 7 ++++--- src/backends/wayland/mod.rs | 4 ++-- src/backends/win32.rs | 22 +++++++++++----------- src/util.rs | 16 ++++++++++++++++ 5 files changed, 37 insertions(+), 19 deletions(-) diff --git a/src/backends/cg.rs b/src/backends/cg.rs index 9ae3e25..a8e4979 100644 --- a/src/backends/cg.rs +++ b/src/backends/cg.rs @@ -259,8 +259,9 @@ impl SurfaceInterface for CGImpl< } fn buffer_mut(&mut self) -> Result, SoftBufferError> { + let buffer_size = util::byte_stride(self.width as u32) as usize * self.height / 4; Ok(BufferImpl { - buffer: util::PixelBuffer(vec![0; self.width * self.height]), + buffer: util::PixelBuffer(vec![0; buffer_size]), width: self.width, height: self.height, color_space: &self.color_space, @@ -280,7 +281,7 @@ pub struct BufferImpl<'a> { impl BufferInterface for BufferImpl<'_> { fn byte_stride(&self) -> NonZeroU32 { - NonZeroU32::new(self.width().get() * 4).unwrap() + NonZeroU32::new(util::byte_stride(self.width as u32)).unwrap() } fn width(&self) -> NonZeroU32 { @@ -342,7 +343,7 @@ impl BufferInterface for BufferImpl<'_> { self.height, 8, 32, - self.width * 4, + util::byte_stride(self.width as u32) as usize, Some(self.color_space), bitmap_info, Some(&data_provider), diff --git a/src/backends/wayland/buffer.rs b/src/backends/wayland/buffer.rs index e28653a..1fab892 100644 --- a/src/backends/wayland/buffer.rs +++ b/src/backends/wayland/buffer.rs @@ -15,6 +15,7 @@ use wayland_client::{ }; use super::State; +use crate::util; #[cfg(any(target_os = "linux", target_os = "freebsd"))] fn create_memfile() -> File { @@ -98,7 +99,7 @@ impl WaylandBuffer { 0, width, height, - width * 4, + util::byte_stride(width as u32) as i32, wl_shm::Format::Xrgb8888, qh, released.clone(), @@ -138,7 +139,7 @@ impl WaylandBuffer { 0, width, height, - width * 4, + util::byte_stride(width as u32) as i32, wl_shm::Format::Xrgb8888, &self.qh, self.released.clone(), @@ -158,7 +159,7 @@ impl WaylandBuffer { } fn len(&self) -> usize { - self.width as usize * self.height as usize + util::byte_stride(self.width as u32) as usize * self.height as usize / 4 } #[inline] diff --git a/src/backends/wayland/mod.rs b/src/backends/wayland/mod.rs index b8f6741..1f2ede5 100644 --- a/src/backends/wayland/mod.rs +++ b/src/backends/wayland/mod.rs @@ -1,7 +1,7 @@ use crate::{ backend_interface::*, error::{InitError, SwResultExt}, - Rect, SoftBufferError, + util, Rect, SoftBufferError, }; use raw_window_handle::{HasDisplayHandle, HasWindowHandle, RawDisplayHandle, RawWindowHandle}; use std::{ @@ -220,7 +220,7 @@ pub struct BufferImpl<'a> { impl BufferInterface for BufferImpl<'_> { fn byte_stride(&self) -> NonZeroU32 { - NonZeroU32::new(self.width as u32 * 4).unwrap() + NonZeroU32::new(util::byte_stride(self.width as u32)).unwrap() } fn width(&self) -> NonZeroU32 { diff --git a/src/backends/win32.rs b/src/backends/win32.rs index 90609c6..76b3759 100644 --- a/src/backends/win32.rs +++ b/src/backends/win32.rs @@ -116,12 +116,9 @@ impl Buffer { #[inline] fn pixels_mut(&mut self) -> &mut [u32] { - unsafe { - slice::from_raw_parts_mut( - self.pixels.as_ptr(), - i32::from(self.width) as usize * i32::from(self.height) as usize, - ) - } + let num_bytes = + byte_stride(self.width.get() as u32, 32) as usize * i32::from(self.height) as usize; + unsafe { slice::from_raw_parts_mut(self.pixels.as_ptr(), num_bytes / 4) } } } @@ -252,11 +249,7 @@ pub struct BufferImpl<'a> { impl BufferInterface for BufferImpl<'_> { fn byte_stride(&self) -> NonZeroU32 { let width = self.buffer.width.get() as u32; - let bit_count = 32; - // https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader#calculating-surface-stride - // When `bit_count == 32`, this is always just equal to `width * 4`. - let stride = ((width * bit_count + 31) & !31) >> 3; - NonZeroU32::new(stride).unwrap() + NonZeroU32::new(byte_stride(width, 32)).unwrap() } fn width(&self) -> NonZeroU32 { @@ -476,3 +469,10 @@ impl From for OnlyUsedFromOrigin { Self(t) } } + +#[inline] +fn byte_stride(width: u32, bit_count: u32) -> u32 { + // + // When `bit_count == 32`, this is always just equal to `width * 4`. + ((width * bit_count + 31) & !31) >> 3 +} diff --git a/src/util.rs b/src/util.rs index 5c29071..f8d6ccc 100644 --- a/src/util.rs +++ b/src/util.rs @@ -65,3 +65,19 @@ impl ops::DerefMut for PixelBuffer { &mut self.0 } } + +/// Compute the byte stride desired by Softbuffer when a platform can use any stride. +/// +/// TODO(madsmtm): This should take the pixel format / bit depth as input after: +/// +#[inline] +pub(crate) fn byte_stride(width: u32) -> u32 { + let row_alignment = if cfg!(debug_assertions) { + 16 // Use a higher alignment to help users catch issues with their stride calculations. + } else { + 4 // At least 4 is necessary for `Buffer` to return `&mut [u32]`. + }; + // TODO: Use `next_multiple_of` when in MSRV. + let mask = row_alignment * 4 - 1; + ((width * 32 + mask) & !mask) >> 3 +}