Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/backend/libc/thread/syscalls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,28 @@ pub(crate) fn sched_getcpu() -> usize {
r as usize
}

#[cfg(linux_kernel)]
#[inline]
pub(crate) fn getcpu() -> (usize, usize) {
let (mut cpu, mut node): (core::mem::MaybeUninit<u32>, core::mem::MaybeUninit<u32>) = (
core::mem::MaybeUninit::uninit(),
core::mem::MaybeUninit::uninit(),
);

let r = unsafe {
libc::syscall(
libc::SYS_getcpu,
cpu.as_mut_ptr(),
node.as_mut_ptr(),
core::ptr::null::<libc::c_void>(),
)
};

debug_assert!(r >= 0);

unsafe { (cpu.assume_init() as usize, node.assume_init() as usize) }
}

#[cfg(any(freebsdlike, linux_kernel, target_os = "fuchsia"))]
#[inline]
pub(crate) fn sched_getaffinity(pid: Option<Pid>, cpuset: &mut RawCpuSet) -> io::Result<()> {
Expand Down
30 changes: 28 additions & 2 deletions src/backend/linux_raw/thread/syscalls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ pub(crate) fn setgroups_thread(gids: &[crate::ugid::Gid]) -> io::Result<()> {
unsafe { ret(syscall_readonly!(__NR_setgroups, len, addr)) }
}

// `sched_getcpu` has special optimizations via the vDSO on some architectures.
// `sched_getcpu` and `getcpu` have special optimizations via the vDSO on some architectures.
#[cfg(any(
target_arch = "x86_64",
target_arch = "x86",
Expand All @@ -455,7 +455,30 @@ pub(crate) fn setgroups_thread(gids: &[crate::ugid::Gid]) -> io::Result<()> {
target_arch = "powerpc64",
target_arch = "s390x"
))]
pub(crate) use crate::backend::vdso_wrappers::sched_getcpu;
pub(crate) use crate::backend::vdso_wrappers::{getcpu, sched_getcpu};

// `getcpu` on platforms without a vDSO entry for it.
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "x86",
target_arch = "riscv64",
target_arch = "powerpc",
target_arch = "powerpc64",
target_arch = "s390x"
)))]
#[inline]
pub(crate) fn getcpu() -> (usize, usize) {
let mut cpu = MaybeUninit::<u32>::uninit();
let mut numa_node = MaybeUninit::<u32>::uninit();

unsafe {
let r = ret(syscall!(__NR_getcpu, &mut cpu, &mut numa_node, zero()));

debug_assert!(r.is_ok());

(cpu.assume_init() as usize, numa_node.assume_init() as usize)
}
}

// `sched_getcpu` on platforms without a vDSO entry for it.
#[cfg(not(any(
Expand All @@ -468,6 +491,9 @@ pub(crate) use crate::backend::vdso_wrappers::sched_getcpu;
)))]
#[inline]
pub(crate) fn sched_getcpu() -> usize {
// We should not implement this function by using the `getcpu` function definded above
// because we want to provide exactly one pointer to the system call.

let mut cpu = MaybeUninit::<u32>::uninit();
unsafe {
let r = ret(syscall!(__NR_getcpu, &mut cpu, zero(), zero()));
Expand Down
33 changes: 33 additions & 0 deletions src/backend/linux_raw/vdso_wrappers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,35 @@ pub(crate) fn clock_gettime_dynamic(id: DynamicClockId<'_>) -> io::Result<Timesp
}
}

#[cfg(feature = "thread")]
#[cfg(any(
target_arch = "x86_64",
target_arch = "x86",
target_arch = "riscv64",
target_arch = "powerpc",
target_arch = "powerpc64",
target_arch = "s390x",
))]
#[inline]
pub(crate) fn getcpu() -> (usize, usize) {
// SAFETY: `GETCPU` contains either null or the address of a function with
// an ABI like libc `getcpu`, and calling it has the side effect of writing
// to the result buffers, and no others.
unsafe {
let mut cpu = MaybeUninit::<u32>::uninit();
let mut numa_node = MaybeUninit::<u32>::uninit();
let callee = match transmute(GETCPU.load(Relaxed)) {
Some(callee) => callee,
None => init_getcpu(),
};
let r0 = callee(cpu.as_mut_ptr(), numa_node.as_mut_ptr(), null_mut());

debug_assert_eq!(r0, 0);

(cpu.assume_init() as usize, numa_node.assume_init() as usize)
}
}

#[cfg(feature = "thread")]
#[cfg(any(
target_arch = "x86_64",
Expand All @@ -128,6 +157,9 @@ pub(crate) fn clock_gettime_dynamic(id: DynamicClockId<'_>) -> io::Result<Timesp
))]
#[inline]
pub(crate) fn sched_getcpu() -> usize {
// We should not implement this function by using the `getcpu` function definded above
// because we want to provide exactly one pointer to the system call.

// SAFETY: `GETCPU` contains either null or the address of a function with
// an ABI like libc `getcpu`, and calling it has the side effect of writing
// to the result buffers, and no others.
Expand Down Expand Up @@ -308,6 +340,7 @@ fn init_clock_gettime() -> ClockGettimeType {
target_arch = "s390x",
))]
#[cold]
#[inline(never)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason for inline(never) here? It's already marked #[cold], which should have the desired effect. It a compiler decides it really wants to inline this, even given what we've told it, that seems fine.

Copy link
Author

@Eugene-Usachev Eugene-Usachev Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am still sure that non-inlining provides a better performance, but I agree with both points: the difference is tiny and the compiler can do it as it wants. But I want to explicitly tell the compiler to generate exactly one call method for the calling this function that should be called only once. If you don't like this, I can't roll back this change. After all, the main goal of the PR is adding getcpu.

fn init_getcpu() -> GetcpuType {
init();
// SAFETY: Load the function address from static storage that we just
Expand Down
16 changes: 8 additions & 8 deletions src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,8 @@ mod tests {
let nread = read(&input, &mut buf).unwrap();
assert_eq!(nread, buf.len());
assert_eq!(
&buf[..58],
b"//! Utilities for functions that return data via buffers.\n"
&buf[..57],
b"//! Utilities for functions that return data via buffers."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this code changing?

Copy link
Author

@Eugene-Usachev Eugene-Usachev Feb 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I develop with WSL, which adds \r\n at the end of lines, so I patched tests for not reading the end of lines. cargo test fails if we try to read 58 characters (with the end of line char).

);
input.seek(SeekFrom::End(-1)).unwrap();
let nread = read(&input, &mut buf).unwrap();
Expand All @@ -407,13 +407,13 @@ mod tests {
let (init, uninit) = read(&input, &mut buf).unwrap();
assert_eq!(uninit.len(), 0);
assert_eq!(
&init[..58],
b"//! Utilities for functions that return data via buffers.\n"
&init[..57],
b"//! Utilities for functions that return data via buffers."
);
assert_eq!(init.len(), buf.len());
assert_eq!(
unsafe { core::mem::transmute::<&mut [MaybeUninit<u8>], &mut [u8]>(&mut buf[..58]) },
b"//! Utilities for functions that return data via buffers.\n"
unsafe { core::mem::transmute::<&mut [MaybeUninit<u8>], &mut [u8]>(&mut buf[..57]) },
b"//! Utilities for functions that return data via buffers."
);
input.seek(SeekFrom::End(-1)).unwrap();
let (init, uninit) = read(&input, &mut buf).unwrap();
Expand All @@ -440,8 +440,8 @@ mod tests {
assert_eq!(nread, buf.capacity());
assert_eq!(nread, buf.len());
assert_eq!(
&buf[..58],
b"//! Utilities for functions that return data via buffers.\n"
&buf[..57],
b"//! Utilities for functions that return data via buffers."
);
buf.clear();
input.seek(SeekFrom::End(-1)).unwrap();
Expand Down
26 changes: 24 additions & 2 deletions src/thread/sched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use core::{fmt, hash};
/// - [Linux]
///
/// [Linux]: https://man7.org/linux/man-pages/man3/CPU_SET.3.html
/// [`sched_setaffinity`]: crate::thread::sched_setaffinity
/// [`sched_getaffinity`]: crate::thread::sched_getaffinity
/// [`sched_setaffinity`]: sched_setaffinity
/// [`sched_getaffinity`]: sched_getaffinity
#[repr(transparent)]
#[derive(Clone, Copy)]
pub struct CpuSet {
Expand Down Expand Up @@ -159,3 +159,25 @@ pub fn sched_getaffinity(pid: Option<Pid>) -> io::Result<CpuSet> {
pub fn sched_getcpu() -> usize {
backend::thread::syscalls::sched_getcpu()
}

/// `sched_getcpu()`—Get the CPU and NUMA node that the current thread is currently on.
///
/// # Example
///
/// ```rust
/// use rustix::thread::getcpu;
///
/// let (core, numa_node) = getcpu();
///
/// println!("The current thread was on the {core} core and {numa_node} numa node.");
/// ```
///
/// # References
/// - [Linux]
///
/// [Linux]: https://man7.org/linux/man-pages/man2/getcpu.2.html
#[cfg(linux_kernel)]
#[inline]
pub fn getcpu() -> (usize, usize) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the Linux docs, the values written by getcpu have type unsigned int.

   int getcpu(unsigned int *_Nullable cpu, unsigned int *_Nullable node);

Would it be better to reflect them here as u32, rather than usize?

I see that sched_getcpu already returns usize, but that appears to be an error.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to do the same as sched_getcpu. To be honest, I am not sure what users prefer more here. I use numa_node: usize in my code, but I can't say everyone does it. If you want me to change it I can do it but I think it is not important.

backend::thread::syscalls::getcpu()
}
Loading