//! Pass timing. //! //! This modules provides facilities for timing the execution of individual compilation passes. use core::fmt; use std::any::Any; use std::boxed::Box; use std::cell::{Cell, RefCell}; use std::mem; use std::time::{Duration, Instant}; // Each pass that can be timed is predefined with the `define_passes!` macro. Each pass has a // snake_case name and a plain text description used when printing out the timing report. // // This macro defines: // // - A C-style enum containing all the pass names and a `None` variant. // - A usize constant with the number of defined passes. // - A const array of pass descriptions. // - A public function per pass used to start the timing of that pass. macro_rules! define_passes { ($($pass:ident: $desc:expr,)+) => { /// A single profiled pass. #[allow(non_camel_case_types)] #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Pass { $(#[doc=$desc] $pass,)+ /// No active pass. None, } /// The amount of profiled passes. pub const NUM_PASSES: usize = Pass::None as usize; const DESCRIPTIONS: [&str; NUM_PASSES] = [ $($desc),+ ]; $( #[doc=$desc] #[must_use] pub fn $pass() -> Box { start_pass(Pass::$pass) } )+ } } // Pass definitions. define_passes! { // All these are used in other crates but defined here so they appear in the unified // `PassTimes` output. process_file: "Processing test file", parse_text: "Parsing textual Cranelift IR", wasm_translate_module: "Translate WASM module", wasm_translate_function: "Translate WASM function", verifier: "Verify Cranelift IR", compile: "Compilation passes", try_incremental_cache: "Try loading from incremental cache", store_incremental_cache: "Store in incremental cache", flowgraph: "Control flow graph", domtree: "Dominator tree", loop_analysis: "Loop analysis", preopt: "Pre-legalization rewriting", dce: "Dead code elimination", egraph: "Egraph based optimizations", gvn: "Global value numbering", licm: "Loop invariant code motion", unreachable_code: "Remove unreachable blocks", remove_constant_phis: "Remove constant phi-nodes", vcode_lower: "VCode lowering", vcode_emit: "VCode emission", vcode_emit_finish: "VCode emission finalization", regalloc: "Register allocation", regalloc_checker: "Register allocation symbolic verification", layout_renumber: "Layout full renumbering", canonicalize_nans: "Canonicalization of NaNs", } impl Pass { fn idx(self) -> usize { self as usize } /// Description of the pass. pub fn description(self) -> &'static str { match DESCRIPTIONS.get(self.idx()) { Some(s) => s, None => "", } } } impl fmt::Display for Pass { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(self.description()) } } /// A profiler. pub trait Profiler { /// Start a profiling pass. /// /// Will return a token which when dropped indicates the end of the pass. /// /// Multiple passes can be active at the same time, but they must be started and stopped in a /// LIFO fashion. fn start_pass(&self, pass: Pass) -> Box; } // Information about passes in a single thread. thread_local! { static PROFILER: RefCell> = RefCell::new(Box::new(DefaultProfiler)); } /// Set the profiler for the current thread. /// /// Returns the old profiler. pub fn set_thread_profiler(new_profiler: Box) -> Box { PROFILER.with(|profiler| std::mem::replace(&mut *profiler.borrow_mut(), new_profiler)) } /// Start timing `pass` as a child of the currently running pass, if any. /// /// This function is called by the publicly exposed pass functions. fn start_pass(pass: Pass) -> Box { PROFILER.with(|profiler| profiler.borrow().start_pass(pass)) } /// A timing token is responsible for timing the currently running pass. Timing starts when it /// is created and ends when it is dropped. /// /// Multiple passes can be active at the same time, but they must be started and stopped in a /// LIFO fashion. struct DefaultTimingToken { /// Start time for this pass. start: Instant, // Pass being timed by this token. pass: Pass, // The previously active pass which will be restored when this token is dropped. prev: Pass, } /// Accumulated timing information for a single pass. #[derive(Default, Copy, Clone)] struct PassTime { /// Total time spent running this pass including children. total: Duration, /// Time spent running in child passes. child: Duration, } /// Accumulated timing for all passes. pub struct PassTimes { pass: [PassTime; NUM_PASSES], } impl PassTimes { /// Add `other` to the timings of this `PassTimes`. pub fn add(&mut self, other: &Self) { for (a, b) in self.pass.iter_mut().zip(&other.pass[..]) { a.total += b.total; a.child += b.child; } } /// Returns the total amount of time taken by all the passes measured. pub fn total(&self) -> Duration { self.pass.iter().map(|p| p.total - p.child).sum() } } impl Default for PassTimes { fn default() -> Self { Self { pass: [Default::default(); NUM_PASSES], } } } impl fmt::Display for PassTimes { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { writeln!(f, "======== ======== ==================================")?; writeln!(f, " Total Self Pass")?; writeln!(f, "-------- -------- ----------------------------------")?; for (time, desc) in self.pass.iter().zip(&DESCRIPTIONS[..]) { // Omit passes that haven't run. if time.total == Duration::default() { continue; } // Write a duration as secs.millis, trailing space. fn fmtdur(mut dur: Duration, f: &mut fmt::Formatter) -> fmt::Result { // Round to nearest ms by adding 500us. dur += Duration::new(0, 500_000); let ms = dur.subsec_millis(); write!(f, "{:4}.{:03} ", dur.as_secs(), ms) } fmtdur(time.total, f)?; if let Some(s) = time.total.checked_sub(time.child) { fmtdur(s, f)?; } writeln!(f, " {}", desc)?; } writeln!(f, "======== ======== ==================================") } } // Information about passes in a single thread. thread_local! { static CURRENT_PASS: Cell = const { Cell::new(Pass::None) }; static PASS_TIME: RefCell = RefCell::new(Default::default()); } /// The default profiler. You can get the results using [`take_current`]. pub struct DefaultProfiler; impl Profiler for DefaultProfiler { fn start_pass(&self, pass: Pass) -> Box { let prev = CURRENT_PASS.with(|p| p.replace(pass)); log::debug!("timing: Starting {}, (during {})", pass, prev); Box::new(DefaultTimingToken { start: Instant::now(), pass, prev, }) } } /// Dropping a timing token indicated the end of the pass. impl Drop for DefaultTimingToken { fn drop(&mut self) { let duration = self.start.elapsed(); log::debug!("timing: Ending {}: {}ms", self.pass, duration.as_millis()); let old_cur = CURRENT_PASS.with(|p| p.replace(self.prev)); debug_assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order"); PASS_TIME.with(|rc| { let mut table = rc.borrow_mut(); table.pass[self.pass.idx()].total += duration; if let Some(parent) = table.pass.get_mut(self.prev.idx()) { parent.child += duration; } }) } } /// Take the current accumulated pass timings and reset the timings for the current thread. /// /// Only applies when [`DefaultProfiler`] is used. pub fn take_current() -> PassTimes { PASS_TIME.with(|rc| mem::take(&mut *rc.borrow_mut())) } #[cfg(test)] mod tests { use super::*; use alloc::string::ToString; #[test] fn display() { assert_eq!(Pass::None.to_string(), ""); assert_eq!(Pass::regalloc.to_string(), "Register allocation"); } }