From af30e4714ccb0dabff85298ee2305d84103b98d1 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 22 Dec 2025 11:24:08 -0500 Subject: [PATCH 1/3] [ruby/mmtk] Implement Ruby heap This heap emulates the growth characteristics of the Ruby default GC's heap. By default, the heap grows by 40%, requires at least 20% empty after a GC, and allows at most 65% empty before it shrinks the heap. This is all configurable via the same environment variables the default GC uses (`RUBY_GC_HEAP_FREE_SLOTS_GOAL_RATIO`, `RUBY_GC_HEAP_FREE_SLOTS_MIN_RATIO`, `RUBY_GC_HEAP_FREE_SLOTS_MAX_RATIO`, respectively). The Ruby heap can be enabled via the `MMTK_HEAP_MODE=ruby` environment variable. Compared to the dynamic heap in MMTk (which uses the MemBalancer algorithm), the Ruby heap allows the heap to grow more generously, which uses a bit more memory but offers significant performance gains because it runs GC much less frequently. We can see in the benchmarks below that this Ruby heap heap gives faster performance than the dynamic heap in every benchmark, with over 2x faster in many of them. We see that memory is often around 10-20% higher with certain outliers that use significantly more memory like hexapdf and erubi-rails. We can also see that this brings MMTk's Ruby heap much closer in performance to the default GC. Ruby heap benchmark results: -------------- -------------- ---------- --------- bench ruby heap (ms) stddev (%) RSS (MiB) activerecord 233.6 10.7 85.9 chunky-png 457.1 1.1 79.3 erubi-rails 1148.0 3.8 133.3 hexapdf 1570.5 2.4 403.0 liquid-c 42.8 5.3 43.4 liquid-compile 41.3 7.6 52.6 liquid-render 102.8 3.8 55.3 lobsters 651.9 8.0 426.3 mail 106.4 1.8 67.2 psych-load 1552.1 0.8 43.4 railsbench 1707.2 6.0 145.6 rubocop 127.2 15.3 148.8 ruby-lsp 136.6 11.7 113.7 sequel 47.2 5.9 44.4 shipit 1197.5 3.6 301.0 -------------- -------------- ---------- --------- Dynamic heap benchmark results: -------------- ----------------- ---------- --------- bench dynamic heap (ms) stddev (%) RSS (MiB) activerecord 845.3 3.1 76.7 chunky-png 525.9 0.4 38.9 erubi-rails 2694.9 3.4 115.8 hexapdf 2344.8 5.6 164.9 liquid-c 73.7 5.0 40.5 liquid-compile 107.1 6.8 40.3 liquid-render 147.2 1.7 39.5 lobsters 697.6 4.5 342.0 mail 224.6 2.1 64.0 psych-load 4326.7 0.6 37.4 railsbench 3218.0 5.5 124.7 rubocop 203.6 6.1 110.9 ruby-lsp 350.7 3.2 79.0 sequel 121.8 2.5 39.6 shipit 1510.1 3.1 220.8 -------------- ----------------- ---------- --------- Default GC benchmark results: -------------- --------------- ---------- --------- bench default GC (ms) stddev (%) RSS (MiB) activerecord 148.4 0.6 67.9 chunky-png 440.2 0.7 57.0 erubi-rails 722.7 0.3 97.8 hexapdf 1466.2 1.7 254.3 liquid-c 32.5 3.6 42.3 liquid-compile 31.2 1.9 35.4 liquid-render 88.3 0.7 30.8 lobsters 633.6 7.0 305.4 mail 76.6 1.6 53.2 psych-load 1166.2 1.3 29.1 railsbench 1262.9 2.3 114.7 rubocop 105.6 0.8 95.4 ruby-lsp 101.6 1.4 75.4 sequel 27.4 1.2 33.1 shipit 1083.1 1.5 163.4 -------------- --------------- ---------- --------- https://github.com/ruby/mmtk/commit/c0ca29922d --- gc/mmtk/src/api.rs | 64 +++++++++++++++- gc/mmtk/src/collection.rs | 6 ++ gc/mmtk/src/heap/mod.rs | 4 + gc/mmtk/src/heap/ruby_heap_trigger.rs | 104 ++++++++++++++++++++++++++ gc/mmtk/src/lib.rs | 1 + 5 files changed, 175 insertions(+), 4 deletions(-) create mode 100644 gc/mmtk/src/heap/mod.rs create mode 100644 gc/mmtk/src/heap/ruby_heap_trigger.rs diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs index b99cbdc939f151..d735a81cac810d 100644 --- a/gc/mmtk/src/api.rs +++ b/gc/mmtk/src/api.rs @@ -4,6 +4,7 @@ use mmtk::util::alloc::BumpPointer; use mmtk::util::alloc::ImmixAllocator; +use mmtk::util::conversions; use mmtk::util::options::PlanSelector; use std::str::FromStr; use std::sync::atomic::Ordering; @@ -13,6 +14,8 @@ use crate::abi::RubyBindingOptions; use crate::abi::RubyUpcalls; use crate::binding; use crate::binding::RubyBinding; +use crate::heap::RubyHeapTriggerConfig; +use crate::heap::RUBY_HEAP_TRIGGER_CONFIG; use crate::mmtk; use crate::utils::default_heap_max; use crate::utils::parse_capacity; @@ -79,6 +82,29 @@ fn mmtk_builder_default_parse_heap_max() -> usize { parse_env_var_with("MMTK_HEAP_MAX", parse_capacity).unwrap_or_else(default_heap_max) } +fn parse_float_env_var(key: &str, default: f64, min: f64, max: f64) -> f64 { + parse_env_var_with(key, |s| { + let mut float = f64::from_str(s).unwrap_or(default); + + if float <= min { + eprintln!( + "{key} has value {float} which must be greater than {min}, using default instead" + ); + float = default; + } + + if float >= max { + eprintln!( + "{key} has value {float} which must be less than {max}, using default instead" + ); + float = default; + } + + Some(float) + }) + .unwrap_or(default) +} + fn mmtk_builder_default_parse_heap_mode(heap_min: usize, heap_max: usize) -> GCTriggerSelector { let make_fixed = || GCTriggerSelector::FixedHeapSize(heap_max); let make_dynamic = || GCTriggerSelector::DynamicHeapSize(heap_min, heap_max); @@ -86,6 +112,25 @@ fn mmtk_builder_default_parse_heap_mode(heap_min: usize, heap_max: usize) -> GCT parse_env_var_with("MMTK_HEAP_MODE", |s| match s { "fixed" => Some(make_fixed()), "dynamic" => Some(make_dynamic()), + "ruby" => { + let min_ratio = parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_MIN_RATIO", 0.2, 0.0, 1.0); + let goal_ratio = + parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_GOAL_RATIO", 0.4, min_ratio, 1.0); + let max_ratio = + parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_MAX_RATIO", 0.65, goal_ratio, 1.0); + + crate::heap::RUBY_HEAP_TRIGGER_CONFIG + .set(RubyHeapTriggerConfig { + min_heap_pages: conversions::bytes_to_pages_up(heap_min), + max_heap_pages: conversions::bytes_to_pages_up(heap_max), + heap_pages_min_ratio: min_ratio, + heap_pages_goal_ratio: goal_ratio, + heap_pages_max_ratio: max_ratio, + }) + .unwrap_or_else(|_| panic!("RUBY_HEAP_TRIGGER_CONFIG is already set")); + + Some(GCTriggerSelector::Delegated) + } _ => None, }) .unwrap_or_else(make_dynamic) @@ -146,7 +191,7 @@ pub unsafe extern "C" fn mmtk_init_binding( crate::set_panic_hook(); - let builder = unsafe { Box::from_raw(builder) }; + let builder: Box = unsafe { Box::from_raw(builder) }; let binding_options = RubyBindingOptions { ractor_check_mode: false, suffix_size: 0, @@ -388,11 +433,12 @@ pub extern "C" fn mmtk_plan() -> *const u8 { pub extern "C" fn mmtk_heap_mode() -> *const u8 { static FIXED_HEAP: &[u8] = b"fixed\0"; static DYNAMIC_HEAP: &[u8] = b"dynamic\0"; + static RUBY_HEAP: &[u8] = b"ruby\0"; match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(_) => FIXED_HEAP.as_ptr(), GCTriggerSelector::DynamicHeapSize(_, _) => DYNAMIC_HEAP.as_ptr(), - _ => panic!("Unknown heap mode"), + GCTriggerSelector::Delegated => RUBY_HEAP.as_ptr(), } } @@ -401,7 +447,12 @@ pub extern "C" fn mmtk_heap_min() -> usize { match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(_) => 0, GCTriggerSelector::DynamicHeapSize(min_size, _) => min_size, - _ => panic!("Unknown heap mode"), + GCTriggerSelector::Delegated => conversions::pages_to_bytes( + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("RUBY_HEAP_TRIGGER_CONFIG not set") + .min_heap_pages, + ), } } @@ -410,7 +461,12 @@ pub extern "C" fn mmtk_heap_max() -> usize { match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(max_size) => max_size, GCTriggerSelector::DynamicHeapSize(_, max_size) => max_size, - _ => panic!("Unknown heap mode"), + GCTriggerSelector::Delegated => conversions::pages_to_bytes( + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("RUBY_HEAP_TRIGGER_CONFIG not set") + .max_heap_pages, + ), } } diff --git a/gc/mmtk/src/collection.rs b/gc/mmtk/src/collection.rs index 41c508afd9115d..824747b04c4051 100644 --- a/gc/mmtk/src/collection.rs +++ b/gc/mmtk/src/collection.rs @@ -1,9 +1,11 @@ use crate::abi::GCThreadTLS; use crate::api::RubyMutator; +use crate::heap::RubyHeapTrigger; use crate::{mmtk, upcalls, Ruby}; use mmtk::memory_manager; use mmtk::scheduler::*; +use mmtk::util::heap::GCTriggerPolicy; use mmtk::util::{VMMutatorThread, VMThread, VMWorkerThread}; use mmtk::vm::{Collection, GCThreadContext}; use std::sync::atomic::Ordering; @@ -67,6 +69,10 @@ impl Collection for VMCollection { fn vm_live_bytes() -> usize { (upcalls().vm_live_bytes)() } + + fn create_gc_trigger() -> Box> { + Box::new(RubyHeapTrigger::default()) + } } impl VMCollection { diff --git a/gc/mmtk/src/heap/mod.rs b/gc/mmtk/src/heap/mod.rs new file mode 100644 index 00000000000000..6af7c1b2e50682 --- /dev/null +++ b/gc/mmtk/src/heap/mod.rs @@ -0,0 +1,4 @@ +mod ruby_heap_trigger; +pub use ruby_heap_trigger::RubyHeapTrigger; +pub use ruby_heap_trigger::RubyHeapTriggerConfig; +pub use ruby_heap_trigger::RUBY_HEAP_TRIGGER_CONFIG; diff --git a/gc/mmtk/src/heap/ruby_heap_trigger.rs b/gc/mmtk/src/heap/ruby_heap_trigger.rs new file mode 100644 index 00000000000000..9215e2ebb0ec04 --- /dev/null +++ b/gc/mmtk/src/heap/ruby_heap_trigger.rs @@ -0,0 +1,104 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; + +use mmtk::util::heap::GCTriggerPolicy; +use mmtk::util::heap::SpaceStats; +use mmtk::Plan; +use mmtk::MMTK; +use once_cell::sync::OnceCell; + +use crate::Ruby; + +pub static RUBY_HEAP_TRIGGER_CONFIG: OnceCell = OnceCell::new(); + +pub struct RubyHeapTriggerConfig { + /// Min heap size + pub min_heap_pages: usize, + /// Max heap size + pub max_heap_pages: usize, + /// Minimum ratio of empty space after a GC before the heap will grow + pub heap_pages_min_ratio: f64, + /// Ratio the heap will grow by + pub heap_pages_goal_ratio: f64, + /// Maximum ratio of empty space after a GC before the heap will shrink + pub heap_pages_max_ratio: f64, +} + +pub struct RubyHeapTrigger { + /// Target number of heap pages + target_heap_pages: AtomicUsize, +} + +impl GCTriggerPolicy for RubyHeapTrigger { + fn is_gc_required( + &self, + space_full: bool, + space: Option>, + plan: &dyn Plan, + ) -> bool { + // Let the plan decide + plan.collection_required(space_full, space) + } + + fn on_gc_end(&self, mmtk: &'static MMTK) { + if let Some(plan) = mmtk.get_plan().generational() { + if plan.is_current_gc_nursery() { + // Nursery GC + } else { + // Full GC + } + + panic!("TODO: support for generational GC not implemented") + } else { + let used_pages = mmtk.get_plan().get_used_pages(); + + let target_min = + (used_pages as f64 * (1.0 + Self::get_config().heap_pages_min_ratio)) as usize; + let target_max = + (used_pages as f64 * (1.0 + Self::get_config().heap_pages_max_ratio)) as usize; + let new_target = + (((used_pages as f64) * (1.0 + Self::get_config().heap_pages_goal_ratio)) as usize) + .clamp( + Self::get_config().min_heap_pages, + Self::get_config().max_heap_pages, + ); + + if used_pages < target_min || used_pages > target_max { + self.target_heap_pages.store(new_target, Ordering::Relaxed); + } + } + } + + fn is_heap_full(&self, plan: &dyn Plan) -> bool { + plan.get_reserved_pages() > self.target_heap_pages.load(Ordering::Relaxed) + } + + fn get_current_heap_size_in_pages(&self) -> usize { + self.target_heap_pages.load(Ordering::Relaxed) + } + + fn get_max_heap_size_in_pages(&self) -> usize { + Self::get_config().max_heap_pages + } + + fn can_heap_size_grow(&self) -> bool { + self.target_heap_pages.load(Ordering::Relaxed) < Self::get_config().max_heap_pages + } +} + +impl Default for RubyHeapTrigger { + fn default() -> Self { + let min_heap_pages = Self::get_config().min_heap_pages; + + Self { + target_heap_pages: AtomicUsize::new(min_heap_pages), + } + } +} + +impl RubyHeapTrigger { + fn get_config<'b>() -> &'b RubyHeapTriggerConfig { + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("Attempt to use RUBY_HEAP_TRIGGER_CONFIG before it is initialized") + } +} diff --git a/gc/mmtk/src/lib.rs b/gc/mmtk/src/lib.rs index 4bcafb597a2bb6..8647793522ed82 100644 --- a/gc/mmtk/src/lib.rs +++ b/gc/mmtk/src/lib.rs @@ -25,6 +25,7 @@ pub mod active_plan; pub mod api; pub mod binding; pub mod collection; +pub mod heap; pub mod object_model; pub mod reference_glue; pub mod scanning; From e69f41a0a88df1d843f5d94cc4e5e757b96300e0 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Mon, 22 Dec 2025 16:52:52 -0600 Subject: [PATCH 2/3] [DOC] Languages in Examples (#15697) * [DOC] Languages in Examples * Update doc/contributing/documentation_guide.md Co-authored-by: Jeremy Evans * Update doc/contributing/documentation_guide.md Co-authored-by: Jeremy Evans --------- Co-authored-by: Jeremy Evans --- doc/contributing/documentation_guide.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/contributing/documentation_guide.md b/doc/contributing/documentation_guide.md index 8a73543e6c9076..29a1c72b02f0de 100644 --- a/doc/contributing/documentation_guide.md +++ b/doc/contributing/documentation_guide.md @@ -326,6 +326,16 @@ Alternatives: - Example {source}[https://github.com/ruby/ruby/blob/34d802f32f00df1ac0220b62f72605827c16bad8/doc/contributing/glossary.md?plain=1]. - Corresponding {output}[https://docs.ruby-lang.org/en/master/contributing/glossary_md.html]. +### Languages in Examples + +For symbols and strings in documentation examples: + +- Prefer \English in \English documentation: 'Hello'. +- Prefer Japanese in Japanese documentation: 'こんにちは'. +- If a second language is needed (as, for example, characters with different byte-sizes), + prefer Japanese in \English documentation and \English in Japanese documentation. +- Use other languages examples only as necessary: see String#capitalize. + ## Documenting Classes and Modules The general structure of the class or module documentation should be: From 0b3199a65333a0ea8a3a653b3151674602ad4e84 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 22 Dec 2025 19:08:21 -0500 Subject: [PATCH 3/3] [DOC] Fix backticks in docs for Set#add --- set.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/set.c b/set.c index 734a6ecaea2107..469078ee9968af 100644 --- a/set.c +++ b/set.c @@ -697,7 +697,7 @@ set_i_join(int argc, VALUE *argv, VALUE set) * call-seq: * add(obj) -> self * - * Adds the given object to the set and returns self. Use `merge` to + * Adds the given object to the set and returns self. Use Set#merge to * add many elements at once. * * Set[1, 2].add(3) #=> Set[1, 2, 3]