Skip to content

Commit 00b5064

Browse files
authored
Optimize: skip logging and stats collection calls if they are no-ops (#20839)
We had lots of logging that was fairly expensive even if disabled, since just calling the functions or constructing the log messages adds overhead, and calling `time.time()` to collect timings has a cost. Now put some of the most expensive logging/stats calls behind fast bool flag checks. Based on CPU profiles, this should improve performance of mostly cached incremental runs if they are big enough by at least 2%, but potentially by even more (hard to predict).
1 parent d52169d commit 00b5064

File tree

1 file changed

+93
-50
lines changed

1 file changed

+93
-50
lines changed

mypy/build.py

Lines changed: 93 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,12 @@ def __init__(
814814
if not isinstance(plugin, ChainedPlugin):
815815
plugin = ChainedPlugin(options, [plugin])
816816
self.plugin = plugin
817+
# These allow quickly skipping logging and stats collection calls. Note
818+
# that some stats impact mypy behavior, so be careful when skipping stats
819+
# collection calls.
820+
self.stats_enabled = self.options.dump_build_stats
821+
self.logging_enabled = self.options.verbosity >= 1
822+
self.tracing_enabled = self.options.verbosity >= 2
817823
# Set of namespaces (module or class) that are being populated during semantic
818824
# analysis and may have missing definitions.
819825
self.incomplete_namespaces: set[str] = set()
@@ -928,7 +934,7 @@ def __init__(
928934
self.known_partial_packages: dict[str, bool] = {}
929935

930936
def dump_stats(self) -> None:
931-
if self.options.dump_build_stats:
937+
if self.stats_enabled:
932938
print("Stats:")
933939
for key, value in sorted(self.stats_summary().items()):
934940
print(f"{key + ':':24}{value}")
@@ -1478,14 +1484,23 @@ def read_deps_cache(manager: BuildManager, graph: Graph) -> dict[str, FgDepMeta]
14781484
return module_deps_metas
14791485

14801486

1481-
def _load_ff_file(file: str, manager: BuildManager, log_error: str) -> bytes | None:
1482-
t0 = time.time()
1487+
def _load_ff_file(
1488+
file: str, manager: BuildManager, log_error_fmt: str, id: str | None
1489+
) -> bytes | None:
1490+
if manager.stats_enabled:
1491+
t0 = time.time()
14831492
try:
14841493
data = manager.metastore.read(file)
14851494
except OSError:
1486-
manager.log(log_error + file)
1495+
if manager.logging_enabled:
1496+
if id:
1497+
message = log_error_fmt.format(id) + file
1498+
else:
1499+
message = log_error_fmt + file
1500+
manager.log(message)
14871501
return None
1488-
manager.add_stats(metastore_read_time=time.time() - t0)
1502+
if manager.stats_enabled:
1503+
manager.add_stats(metastore_read_time=time.time() - t0)
14891504
return data
14901505

14911506

@@ -1653,11 +1668,15 @@ def find_cache_meta(
16531668
"""
16541669
# TODO: May need to take more build options into account
16551670
meta_file, data_file, _ = get_cache_names(id, path, manager.options)
1656-
manager.trace(f"Looking for {id} at {meta_file}")
1671+
if manager.tracing_enabled:
1672+
manager.trace(f"Looking for {id} at {meta_file}")
16571673
meta: bytes | dict[str, Any] | None
1658-
t0 = time.time()
1674+
if manager.stats_enabled:
1675+
t0 = time.time()
16591676
if manager.options.fixed_format_cache:
1660-
meta = _load_ff_file(meta_file, manager, log_error=f"Could not load cache for {id}: ")
1677+
meta = _load_ff_file(
1678+
meta_file, manager, log_error_fmt="Could not load cache for {}: ", id=id
1679+
)
16611680
if meta is None:
16621681
return None
16631682
else:
@@ -1674,7 +1693,8 @@ def find_cache_meta(
16741693
f"Could not load cache for {id}: meta cache is not a dict: {repr(meta)}"
16751694
)
16761695
return None
1677-
t1 = time.time()
1696+
if manager.stats_enabled:
1697+
t1 = time.time()
16781698
if isinstance(meta, bytes):
16791699
# If either low-level buffer format or high-level cache layout changed, we
16801700
# cannot use the cache files, even with --skip-version-check.
@@ -1689,10 +1709,11 @@ def find_cache_meta(
16891709
if m is None:
16901710
manager.log(f"Metadata abandoned for {id}: cannot deserialize data")
16911711
return None
1692-
t2 = time.time()
1693-
manager.add_stats(
1694-
load_meta_time=t2 - t0, load_meta_load_time=t1 - t0, load_meta_from_dict_time=t2 - t1
1695-
)
1712+
if manager.stats_enabled:
1713+
t2 = time.time()
1714+
manager.add_stats(
1715+
load_meta_time=t2 - t0, load_meta_load_time=t1 - t0, load_meta_from_dict_time=t2 - t1
1716+
)
16961717
if skip_validation:
16971718
return m
16981719

@@ -1769,7 +1790,8 @@ def validate_meta(
17691790
manager.log(f"Metadata abandoned for {id}: errors were previously ignored")
17701791
return None
17711792

1772-
t0 = time.time()
1793+
if manager.stats_enabled:
1794+
t0 = time.time()
17731795
bazel = manager.options.bazel
17741796
assert path is not None, "Internal error: meta was provided without a path"
17751797
if not manager.options.skip_cache_mtime_checks:
@@ -1794,7 +1816,8 @@ def validate_meta(
17941816
manager.log(f"Metadata abandoned for {id}: file or directory {path} does not exist")
17951817
return None
17961818

1797-
manager.add_stats(validate_stat_time=time.time() - t0)
1819+
if manager.stats_enabled:
1820+
manager.add_stats(validate_stat_time=time.time() - t0)
17981821

17991822
# When we are using a fine-grained cache, we want our initial
18001823
# build() to load all of the cache information and then do a
@@ -1850,25 +1873,31 @@ def validate_meta(
18501873
manager.log(f"Metadata abandoned for {id}: file {path} has different hash")
18511874
return None
18521875
else:
1853-
t0 = time.time()
1876+
if manager.stats_enabled:
1877+
t0 = time.time()
18541878
# Optimization: update mtime and path (otherwise, this mismatch will reappear).
18551879
meta.mtime = mtime
18561880
meta.path = path
18571881
meta.size = size
18581882
meta.options = options_snapshot(id, manager)
18591883
meta_file, _, _ = get_cache_names(id, path, manager.options)
1860-
manager.log(
1861-
"Updating mtime for {}: file {}, meta {}, mtime {}".format(
1862-
id, path, meta_file, meta.mtime
1884+
if manager.logging_enabled:
1885+
manager.log(
1886+
"Updating mtime for {}: file {}, meta {}, mtime {}".format(
1887+
id, path, meta_file, meta.mtime
1888+
)
18631889
)
1864-
)
18651890
write_cache_meta(meta, manager, meta_file)
1866-
t1 = time.time()
1867-
manager.add_stats(validate_update_time=time.time() - t1, validate_munging_time=t1 - t0)
1891+
if manager.stats_enabled:
1892+
t1 = time.time()
1893+
manager.add_stats(
1894+
validate_update_time=time.time() - t1, validate_munging_time=t1 - t0
1895+
)
18681896
return meta
18691897

18701898
# It's a match on (id, path, size, hash, mtime).
1871-
manager.log(f"Metadata fresh for {id}: file {path}")
1899+
if manager.logging_enabled:
1900+
manager.log(f"Metadata fresh for {id}: file {path}")
18721901
return meta
18731902

18741903

@@ -2337,9 +2366,11 @@ def new_state(
23372366
if path and source is None and manager.fscache.isdir(path):
23382367
source = ""
23392368

2340-
t0 = time.time()
2369+
if manager.stats_enabled:
2370+
t0 = time.time()
23412371
meta = validate_meta(meta, id, path, ignore_all, manager)
2342-
manager.add_stats(validate_meta_time=time.time() - t0)
2372+
if manager.stats_enabled:
2373+
manager.add_stats(validate_meta_time=time.time() - t0)
23432374

23442375
if meta:
23452376
# Make copies, since we may modify these and want to
@@ -2687,7 +2718,7 @@ def load_tree(self, temporary: bool = False) -> None:
26872718

26882719
data: bytes | dict[str, Any] | None
26892720
if self.options.fixed_format_cache:
2690-
data = _load_ff_file(data_file, self.manager, "Could not load tree: ")
2721+
data = _load_ff_file(data_file, self.manager, "Could not load tree: ", None)
26912722
else:
26922723
data = _load_json_file(data_file, self.manager, "Load tree ", "Could not load tree: ")
26932724
if data is None:
@@ -3364,19 +3395,23 @@ def exist_removed_submodules(dependencies: list[str], manager: BuildManager) ->
33643395

33653396
def find_module_simple(id: str, manager: BuildManager) -> str | None:
33663397
"""Find a filesystem path for module `id` or `None` if not found."""
3367-
t0 = time.time()
3398+
if manager.stats_enabled:
3399+
t0 = time.time()
33683400
x = manager.find_module_cache.find_module(id, fast_path=True)
3369-
manager.add_stats(find_module_time=time.time() - t0, find_module_calls=1)
3401+
if manager.stats_enabled:
3402+
manager.add_stats(find_module_time=time.time() - t0, find_module_calls=1)
33703403
if isinstance(x, ModuleNotFoundReason):
33713404
return None
33723405
return x
33733406

33743407

33753408
def find_module_with_reason(id: str, manager: BuildManager) -> ModuleSearchResult:
33763409
"""Find a filesystem path for module `id` or the reason it can't be found."""
3377-
t0 = time.time()
3410+
if manager.stats_enabled:
3411+
t0 = time.time()
33783412
x = manager.find_module_cache.find_module(id, fast_path=False)
3379-
manager.add_stats(find_module_time=time.time() - t0, find_module_calls=1)
3413+
if manager.stats_enabled:
3414+
manager.add_stats(find_module_time=time.time() - t0, find_module_calls=1)
33803415
return x
33813416

33823417

@@ -3491,6 +3526,9 @@ def skipping_ancestor(manager: BuildManager, id: str, path: str, ancestor_for: S
34913526
def log_configuration(manager: BuildManager, sources: list[BuildSource]) -> None:
34923527
"""Output useful configuration information to LOG and TRACE"""
34933528

3529+
if not manager.logging_enabled:
3530+
return
3531+
34943532
config_file = manager.options.config_file
34953533
if config_file:
34963534
config_file = os.path.abspath(config_file)
@@ -3953,23 +3991,25 @@ def find_stale_sccs(
39533991
if stale_indirect is not None:
39543992
fresh = False
39553993

3956-
if fresh:
3957-
fresh_msg = "fresh"
3958-
elif stale_scc:
3959-
fresh_msg = "inherently stale"
3960-
if stale_scc != ascc.mod_ids:
3961-
fresh_msg += f" ({' '.join(sorted(stale_scc))})"
3962-
if stale_deps:
3963-
fresh_msg += f" with stale deps ({' '.join(sorted(stale_deps))})"
3964-
elif stale_deps:
3965-
fresh_msg = f"stale due to deps ({' '.join(sorted(stale_deps))})"
3966-
else:
3967-
assert stale_indirect is not None
3968-
fresh_msg = f"stale due to stale indirect dep(s): first {stale_indirect}"
3994+
if manager.logging_enabled:
3995+
if fresh:
3996+
fresh_msg = "fresh"
3997+
elif stale_scc:
3998+
fresh_msg = "inherently stale"
3999+
if stale_scc != ascc.mod_ids:
4000+
fresh_msg += f" ({' '.join(sorted(stale_scc))})"
4001+
if stale_deps:
4002+
fresh_msg += f" with stale deps ({' '.join(sorted(stale_deps))})"
4003+
elif stale_deps:
4004+
fresh_msg = f"stale due to deps ({' '.join(sorted(stale_deps))})"
4005+
else:
4006+
assert stale_indirect is not None
4007+
fresh_msg = f"stale due to stale indirect dep(s): first {stale_indirect}"
4008+
scc_str = " ".join(ascc.mod_ids)
39694009

3970-
scc_str = " ".join(ascc.mod_ids)
39714010
if fresh:
3972-
manager.trace(f"Found {fresh_msg} SCC ({scc_str})")
4011+
if manager.tracing_enabled:
4012+
manager.trace(f"Found {fresh_msg} SCC ({scc_str})")
39734013
# If there is at most one file with errors we can skip the ordering to save time.
39744014
mods_with_errors = [id for id in ascc.mod_ids if graph[id].error_lines]
39754015
if len(mods_with_errors) <= 1:
@@ -3986,11 +4026,14 @@ def find_stale_sccs(
39864026
manager.flush_errors(path, formatted, False)
39874027
fresh_sccs.append(ascc)
39884028
else:
3989-
size = len(ascc.mod_ids)
3990-
if size == 1:
3991-
manager.log(f"Scheduling SCC singleton ({scc_str}) as {fresh_msg}")
3992-
else:
3993-
manager.log("Scheduling SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg))
4029+
if manager.logging_enabled:
4030+
size = len(ascc.mod_ids)
4031+
if size == 1:
4032+
manager.log(f"Scheduling SCC singleton ({scc_str}) as {fresh_msg}")
4033+
else:
4034+
manager.log(
4035+
"Scheduling SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg)
4036+
)
39944037
stale_sccs.append(ascc)
39954038
return stale_sccs, fresh_sccs
39964039

0 commit comments

Comments
 (0)