Skip to content

Commit c617965

Browse files
authored
feat(timings): derive concurrency data from unit data (#16350)
### What does this PR try to resolve? Added a post-processing that derives concurrency information (active/waiting/inactive units over time) from completed unit timing data. This enables eliminating runtime concurrency tracking. Part of <#15844>. ### How to test and review this PR? Generate timing report before and after this, and compare side by side. (With and without `-Zsection-timings` enabled)
2 parents 7a776fc + 7814230 commit c617965

File tree

3 files changed

+169
-52
lines changed

3 files changed

+169
-52
lines changed

src/cargo/core/compiler/job_queue/mod.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -887,11 +887,6 @@ impl<'gctx> DrainState<'gctx> {
887887
// Record some timing information if `--timings` is enabled, and
888888
// this'll end up being a noop if we're not recording this
889889
// information.
890-
self.timings.mark_concurrency(
891-
self.active.len(),
892-
self.pending_queue.len(),
893-
self.queue.len(),
894-
);
895890
self.timings.record_cpu();
896891

897892
let active_names = self

src/cargo/core/compiler/timings/mod.rs

Lines changed: 2 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,6 @@ pub struct Timings<'gctx> {
6262
/// Units that are in the process of being built.
6363
/// When they finished, they are moved to `unit_times`.
6464
active: HashMap<JobId, UnitTime>,
65-
/// Concurrency-tracking information. This is periodically updated while
66-
/// compilation progresses.
67-
concurrency: Vec<Concurrency>,
6865
/// Last recorded state of the system's CPUs and when it happened
6966
last_cpu_state: Option<State>,
7067
last_cpu_recording: Instant,
@@ -106,20 +103,6 @@ struct UnitTime {
106103
sections: IndexMap<String, CompilationSection>,
107104
}
108105

109-
/// Periodic concurrency tracking information.
110-
#[derive(serde::Serialize)]
111-
struct Concurrency {
112-
/// Time as an offset in seconds from `Timings::start`.
113-
t: f64,
114-
/// Number of units currently running.
115-
active: usize,
116-
/// Number of units that could run, but are waiting for a jobserver token.
117-
waiting: usize,
118-
/// Number of units that are not yet ready, because they are waiting for
119-
/// dependencies to finish.
120-
inactive: usize,
121-
}
122-
123106
/// Data for a single compilation unit, prepared for serialization to JSON.
124107
///
125108
/// This is used by the HTML report's JavaScript to render the pipeline graph.
@@ -161,7 +144,6 @@ impl<'gctx> Timings<'gctx> {
161144
unit_to_index: HashMap::new(),
162145
unit_times: Vec::new(),
163146
active: HashMap::new(),
164-
concurrency: Vec::new(),
165147
last_cpu_state: None,
166148
last_cpu_recording: Instant::now(),
167149
cpu_usage: Vec::new(),
@@ -214,7 +196,6 @@ impl<'gctx> Timings<'gctx> {
214196
unit_to_index,
215197
unit_times: Vec::new(),
216198
active: HashMap::new(),
217-
concurrency: Vec::new(),
218199
last_cpu_state,
219200
last_cpu_recording: Instant::now(),
220201
cpu_usage: Vec::new(),
@@ -384,20 +365,6 @@ impl<'gctx> Timings<'gctx> {
384365
}
385366
}
386367

387-
/// This is called periodically to mark the concurrency of internal structures.
388-
pub fn mark_concurrency(&mut self, active: usize, waiting: usize, inactive: usize) {
389-
if !self.enabled {
390-
return;
391-
}
392-
let c = Concurrency {
393-
t: self.start.elapsed().as_secs_f64(),
394-
active,
395-
waiting,
396-
inactive,
397-
};
398-
self.concurrency.push(c);
399-
}
400-
401368
/// Mark that a fresh unit was encountered. (No re-compile needed)
402369
pub fn add_fresh(&mut self) {
403370
self.total_fresh += 1;
@@ -444,7 +411,6 @@ impl<'gctx> Timings<'gctx> {
444411
if !self.enabled {
445412
return Ok(());
446413
}
447-
self.mark_concurrency(0, 0, 0);
448414
self.unit_times
449415
.sort_unstable_by(|a, b| a.start.partial_cmp(&b.start).unwrap());
450416
if self.report_html {
@@ -473,6 +439,7 @@ impl<'gctx> Timings<'gctx> {
473439
.collect::<Vec<_>>();
474440

475441
let unit_data = report::to_unit_data(&self.unit_times, &self.unit_to_index);
442+
let concurrency = report::compute_concurrency(&unit_data);
476443

477444
let ctx = report::RenderContext {
478445
start: self.start,
@@ -482,7 +449,7 @@ impl<'gctx> Timings<'gctx> {
482449
total_fresh: self.total_fresh,
483450
total_dirty: self.total_dirty,
484451
unit_data,
485-
concurrency: &self.concurrency,
452+
concurrency,
486453
cpu_usage: &self.cpu_usage,
487454
rustc_version,
488455
host: &build_runner.bcx.rustc().host,

src/cargo/core/compiler/timings/report.rs

Lines changed: 167 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use std::borrow::Cow;
44
use std::collections::HashMap;
5+
use std::collections::HashSet;
56
use std::io::Write;
67
use std::time::Instant;
78

@@ -10,7 +11,6 @@ use itertools::Itertools as _;
1011
use crate::CargoResult;
1112
use crate::core::compiler::Unit;
1213

13-
use super::Concurrency;
1414
use super::UnitData;
1515
use super::UnitTime;
1616

@@ -81,6 +81,20 @@ impl SectionData {
8181
}
8282
}
8383

84+
/// Concurrency tracking information.
85+
#[derive(serde::Serialize)]
86+
pub struct Concurrency {
87+
/// Time as an offset in seconds from `Timings::start`.
88+
t: f64,
89+
/// Number of units currently running.
90+
active: usize,
91+
/// Number of units that could run, but are waiting for a jobserver token.
92+
waiting: usize,
93+
/// Number of units that are not yet ready, because they are waiting for
94+
/// dependencies to finish.
95+
inactive: usize,
96+
}
97+
8498
pub struct RenderContext<'a> {
8599
/// When Cargo started.
86100
pub start: Instant,
@@ -100,7 +114,7 @@ pub struct RenderContext<'a> {
100114
pub unit_data: Vec<UnitData>,
101115
/// Concurrency-tracking information. This is periodically updated while
102116
/// compilation progresses.
103-
pub concurrency: &'a [Concurrency],
117+
pub concurrency: Vec<Concurrency>,
104118
/// Recorded CPU states, stored as tuples. First element is when the
105119
/// recording was taken and second element is percentage usage of the
106120
/// system.
@@ -346,7 +360,6 @@ pub(super) fn to_unit_data(
346360
unit_times: &[UnitTime],
347361
unit_map: &HashMap<Unit, u64>,
348362
) -> Vec<UnitData> {
349-
let round = |x: f64| (x * 100.0).round() / 100.0;
350363
unit_times
351364
.iter()
352365
.map(|ut| (unit_map[&ut.unit], ut))
@@ -382,8 +395,8 @@ pub(super) fn to_unit_data(
382395
mode,
383396
target: ut.target.clone(),
384397
features: ut.unit.features.iter().map(|f| f.to_string()).collect(),
385-
start: round(ut.start),
386-
duration: round(ut.duration),
398+
start: round_to_centisecond(ut.start),
399+
duration: round_to_centisecond(ut.duration),
387400
unblocked_units,
388401
unblocked_rmeta_units,
389402
sections,
@@ -392,6 +405,131 @@ pub(super) fn to_unit_data(
392405
.collect()
393406
}
394407

408+
/// Derives concurrency information from unit timing data.
409+
pub(super) fn compute_concurrency(unit_data: &[UnitData]) -> Vec<Concurrency> {
410+
if unit_data.is_empty() {
411+
return Vec::new();
412+
}
413+
414+
let unit_by_index: HashMap<_, _> = unit_data.iter().map(|u| (u.i, u)).collect();
415+
416+
enum UnblockedBy {
417+
Rmeta(u64),
418+
Full(u64),
419+
}
420+
421+
// unit_id -> unit that unblocks it.
422+
let mut unblocked_by: HashMap<_, _> = HashMap::new();
423+
for unit in unit_data {
424+
for id in unit.unblocked_rmeta_units.iter() {
425+
assert!(
426+
unblocked_by
427+
.insert(*id, UnblockedBy::Rmeta(unit.i))
428+
.is_none()
429+
);
430+
}
431+
432+
for id in unit.unblocked_units.iter() {
433+
assert!(
434+
unblocked_by
435+
.insert(*id, UnblockedBy::Full(unit.i))
436+
.is_none()
437+
);
438+
}
439+
}
440+
441+
let ready_time = |unit: &UnitData| -> Option<f64> {
442+
let dep = unblocked_by.get(&unit.i)?;
443+
match dep {
444+
UnblockedBy::Rmeta(id) => {
445+
let dep = unit_by_index.get(id)?;
446+
let duration = dep.sections.iter().flatten().find_map(|(name, section)| {
447+
matches!(name, SectionName::Frontend).then_some(section.end)
448+
});
449+
450+
Some(dep.start + duration.unwrap_or(dep.duration))
451+
}
452+
UnblockedBy::Full(id) => {
453+
let dep = unit_by_index.get(id)?;
454+
Some(dep.start + dep.duration)
455+
}
456+
}
457+
};
458+
459+
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
460+
enum State {
461+
Ready,
462+
Start,
463+
End,
464+
}
465+
466+
let mut events: Vec<_> = unit_data
467+
.iter()
468+
.flat_map(|unit| {
469+
// Adding rounded numbers may cause ready > start,
470+
// so cap with unit.start here to be defensive.
471+
let ready = ready_time(unit).unwrap_or(unit.start).min(unit.start);
472+
473+
[
474+
(ready, State::Ready, unit.i),
475+
(unit.start, State::Start, unit.i),
476+
(unit.start + unit.duration, State::End, unit.i),
477+
]
478+
})
479+
.collect();
480+
481+
events.sort_by(|a, b| {
482+
a.0.partial_cmp(&b.0)
483+
.unwrap()
484+
.then_with(|| a.1.cmp(&b.1))
485+
.then_with(|| a.2.cmp(&b.2))
486+
});
487+
488+
let mut concurrency: Vec<Concurrency> = Vec::new();
489+
let mut inactive: HashSet<u64> = unit_data.iter().map(|unit| unit.i).collect();
490+
let mut waiting: HashSet<u64> = HashSet::new();
491+
let mut active: HashSet<u64> = HashSet::new();
492+
493+
for (t, state, unit_id) in events {
494+
match state {
495+
State::Ready => {
496+
inactive.remove(&unit_id);
497+
waiting.insert(unit_id);
498+
active.remove(&unit_id);
499+
}
500+
State::Start => {
501+
inactive.remove(&unit_id);
502+
waiting.remove(&unit_id);
503+
active.insert(unit_id);
504+
}
505+
State::End => {
506+
inactive.remove(&unit_id);
507+
waiting.remove(&unit_id);
508+
active.remove(&unit_id);
509+
}
510+
}
511+
512+
let record = Concurrency {
513+
t,
514+
active: active.len(),
515+
waiting: waiting.len(),
516+
inactive: inactive.len(),
517+
};
518+
519+
if let Some(last) = concurrency.last_mut()
520+
&& last.t == t
521+
{
522+
// We don't want to draw long vertical lines at the same timestamp,
523+
// so we keep only the latest state.
524+
*last = record;
525+
} else {
526+
concurrency.push(record);
527+
}
528+
}
529+
530+
concurrency
531+
}
532+
395533
/// Aggregates section timing information from individual compilation sections.
396534
///
397535
/// We can have a bunch of situations here.
@@ -413,7 +551,13 @@ fn aggregate_sections(unit_time: &UnitTime) -> AggregatedSections {
413551
// The frontend section is currently implicit in rustc.
414552
// It is assumed to start at compilation start and end when codegen starts,
415553
// So we hard-code it here.
416-
vec![(SectionName::Frontend, SectionData { start: 0.0, end })],
554+
vec![(
555+
SectionName::Frontend,
556+
SectionData {
557+
start: 0.0,
558+
end: round_to_centisecond(end),
559+
},
560+
)],
417561
|mut sections, (name, section)| {
418562
let previous = sections.last_mut().unwrap();
419563
// Setting the end of previous to the start of the current.
@@ -422,8 +566,8 @@ fn aggregate_sections(unit_time: &UnitTime) -> AggregatedSections {
422566
sections.push((
423567
SectionName::Named(name),
424568
SectionData {
425-
start: section.start,
426-
end: section.end.unwrap_or(end),
569+
start: round_to_centisecond(section.start),
570+
end: round_to_centisecond(section.end.unwrap_or(end)),
427571
},
428572
));
429573

@@ -443,8 +587,8 @@ fn aggregate_sections(unit_time: &UnitTime) -> AggregatedSections {
443587
sections.push((
444588
SectionName::Other,
445589
SectionData {
446-
start: section.end,
447-
end,
590+
start: round_to_centisecond(section.end),
591+
end: round_to_centisecond(end),
448592
},
449593
));
450594
}
@@ -457,17 +601,28 @@ fn aggregate_sections(unit_time: &UnitTime) -> AggregatedSections {
457601
SectionName::Frontend,
458602
SectionData {
459603
start: 0.0,
460-
end: rmeta,
604+
end: round_to_centisecond(rmeta),
605+
},
606+
),
607+
(
608+
SectionName::Codegen,
609+
SectionData {
610+
start: round_to_centisecond(rmeta),
611+
end: round_to_centisecond(end),
461612
},
462613
),
463-
(SectionName::Codegen, SectionData { start: rmeta, end }),
464614
])
465615
} else {
466616
// We only know the total duration
467617
AggregatedSections::OnlyTotalDuration
468618
}
469619
}
470620

621+
/// Rounds seconds to 0.01s precision.
622+
fn round_to_centisecond(x: f64) -> f64 {
623+
(x * 100.0).round() / 100.0
624+
}
625+
471626
static HTML_TMPL: &str = r#"
472627
<html>
473628
<head>

0 commit comments

Comments
 (0)