diff --git a/BRANCH_COVERAGE_GUIDE.md b/BRANCH_COVERAGE_GUIDE.md new file mode 100644 index 000000000000..77ff84c3666c --- /dev/null +++ b/BRANCH_COVERAGE_GUIDE.md @@ -0,0 +1,61 @@ +## Add Branch Coverage + +### Step 1: Register in `mypy/branch_coverage.py` + +```python +BRANCH_COVERAGE = { + 'check_return_stmt': set(), + 'your_function_name': set(), # Add your function +} + +BRANCH_DESCRIPTIONS = { + 'your_function_name': { + 1: 'Function entry', + 2: 'if condition_x - TRUE', + 3: 'if condition_x - FALSE', + 4: 'elif condition_y - TRUE', + 5: 'else branch', + } +} +``` + +### Step 2: Instrument Your Function + +```python +def your_function_name(self, param): + from mypy.branch_coverage import record_branch + record_branch('your_function_name', 1) # Function entry + + if condition_x: + record_branch('your_function_name', 2) # TRUE + # code... + elif condition_y: + record_branch('your_function_name', 3) # FALSE from if + record_branch('your_function_name', 4) # TRUE for elif + # code... + else: + record_branch('your_function_name', 3) # FALSE from if + record_branch('your_function_name', 5) # else + # code... +``` + +**Important:** Import `record_branch` inside the function to avoid circular imports. + +## Run Tests + +**CRITICAL**: Must use `-n0` to disable parallel execution, or coverage data will not be collected! + +```bash +# Activate virtual environment first +source venv/bin/activate + +# Run all tests +pytest mypy/test/testcheck.py -n0 + +# Run specific test file +pytest mypy/test/testcheck.py::TypeCheckSuite::::check-basic.test::testInvalidReturn -n0 +``` + +## View Reports + +Reports are automatically saved in the project root directory:`branch_coverage_report.txt` diff --git a/mypy/branch_coverage.py b/mypy/branch_coverage.py new file mode 100644 index 000000000000..768d63a8b795 --- /dev/null +++ b/mypy/branch_coverage.py @@ -0,0 +1,99 @@ +"""Branch Coverage Tracking Module""" + +from __future__ import annotations + +from pathlib import Path +from typing import Final + +BRANCH_COVERAGE: dict[str, set[int]] = {"check_return_stmt": set()} + +BRANCH_DESCRIPTIONS: Final[dict[str, dict[int, str]]] = { + "check_return_stmt": { + 1: "Function entry", + 2: "defn is not None - TRUE", + 3: "defn is not None - FALSE", + 4: "defn.is_generator - TRUE", + 5: "defn.is_generator - FALSE (else/elif)", + 6: "defn.is_coroutine - TRUE", + 7: "defn.is_coroutine - FALSE (else)", + 8: "isinstance(return_type, UninhabitedType) - TRUE", + 9: "isinstance(return_type, UninhabitedType) - FALSE", + 10: "not is_lambda and not return_type.ambiguous - TRUE", + 11: "not is_lambda and not return_type.ambiguous - FALSE", + 12: "s.expr - TRUE (has return value)", + 13: "s.expr - FALSE (empty return)", + 14: "isinstance(s.expr, (CallExpr, ...)) or isinstance(s.expr, AwaitExpr) - TRUE", + 15: "isinstance(s.expr, (CallExpr, ...)) or isinstance(s.expr, AwaitExpr) - FALSE", + 16: "isinstance(typ, Instance) and typ.type.fullname in NOT_IMPLEMENTED - TRUE", + 17: "isinstance(typ, Instance) and typ.type.fullname in NOT_IMPLEMENTED - FALSE", + 18: "defn.is_async_generator - TRUE", + 19: "defn.is_async_generator - FALSE", + 20: "isinstance(typ, AnyType) - TRUE", + 21: "isinstance(typ, AnyType) - FALSE", + 22: "warn_return_any conditions - TRUE (all conditions met)", + 23: "warn_return_any conditions - FALSE (at least one condition not met)", + 24: "declared_none_return - TRUE", + 25: "declared_none_return - FALSE", + 26: "is_lambda or isinstance(typ, NoneType) - TRUE", + 27: "is_lambda or isinstance(typ, NoneType) - FALSE", + 28: "defn.is_generator and not defn.is_coroutine and isinstance(return_type, AnyType) - TRUE", + 29: "defn.is_generator and not defn.is_coroutine and isinstance(return_type, AnyType) - FALSE", + 30: "isinstance(return_type, (NoneType, AnyType)) - TRUE", + 31: "isinstance(return_type, (NoneType, AnyType)) - FALSE", + 32: "self.in_checked_function() - TRUE", + 33: "self.in_checked_function() - FALSE", + } +} + + +def record_branch(function_name: str, branch_id: int) -> None: + """Record that a branch has been executed.""" + if function_name in BRANCH_COVERAGE: + BRANCH_COVERAGE[function_name].add(branch_id) + + +def get_coverage_report() -> str: + """Generate a coverage report as a string.""" + report: list[str] = [] + report.append("=" * 80) + report.append("BRANCH COVERAGE REPORT") + report.append("=" * 80) + + for func_name, covered_branches in BRANCH_COVERAGE.items(): + report.append(f"\n{'=' * 80}") + report.append(f"Function: {func_name}") + report.append(f"{'=' * 80}") + + descriptions = BRANCH_DESCRIPTIONS.get(func_name, {}) + total_branches = len(descriptions) + covered_count = len(covered_branches) + + report.append( + f"Coverage: {covered_count}/{total_branches} branches ({covered_count / total_branches * 100:.1f}%)" + ) + report.append("") + + for branch_id in sorted(descriptions.keys()): + status = "COVERED" if branch_id in covered_branches else "NOT COVERED" + desc = descriptions[branch_id] + report.append(f" Branch {branch_id:2d}: {status:15s} | {desc}") + + uncovered = set(descriptions.keys()) - covered_branches + if uncovered: + report.append("\n" + "=" * 80) + report.append("UNCOVERED BRANCHES:") + report.append("=" * 80) + for branch_id in sorted(uncovered): + report.append(f" Branch {branch_id:2d}: {descriptions[branch_id]}") + + report.append("\n" + "=" * 80) + return "\n".join(report) + + +def save_coverage_report(filename: str = "branch_coverage_report.txt") -> None: + """Save the coverage report to a file.""" + report = get_coverage_report() + output_path = Path.cwd() / filename + with open(output_path, "w", encoding="utf-8") as f: + f.write(report) + print(f"\nCoverage report saved to: {output_path}") diff --git a/mypy/test/conftest.py b/mypy/test/conftest.py new file mode 100644 index 000000000000..432c813866c4 --- /dev/null +++ b/mypy/test/conftest.py @@ -0,0 +1,44 @@ +""" +Pytest configuration for branch coverage collection +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from _pytest.main import Session + + +def pytest_sessionfinish(session: Session, exitstatus: int) -> None: + """ + Hook that runs after all tests complete + """ + try: + from mypy.branch_coverage import BRANCH_COVERAGE, get_coverage_report, save_coverage_report + + total_covered = sum(len(branches) for branches in BRANCH_COVERAGE.values()) + + if total_covered > 0: + print("\n" + "=" * 80) + print("BRANCH COVERAGE COLLECTION COMPLETED") + print("=" * 80) + print(f"Total branches covered: {total_covered}") + + save_coverage_report() + + print("\n" + get_coverage_report()) + + print("\n" + "=" * 80) + print("Coverage reports saved!") + print("=" * 80) + else: + print("\nWarning: No branch coverage data collected") + + except ImportError: + print("\nBranch coverage module not found - skipping coverage report") + except Exception as e: + print(f"\nError saving coverage report: {e}") + import traceback + + traceback.print_exc() diff --git a/report.md b/report.md new file mode 100644 index 000000000000..e0548094e414 --- /dev/null +++ b/report.md @@ -0,0 +1,171 @@ +# Report for assignment 3 + +This is a template for your report. You are free to modify it as needed. +It is not required to use markdown for your report either, but the report +has to be delivered in a standard, cross-platform format. + +## Project + +Name: + +URL: + +One or two sentences describing it + +## Onboarding experience + +Did it build and run as documented? + +See the assignment for details; if everything works out of the box, +there is no need to write much here. If the first project(s) you picked +ended up being unsuitable, you can describe the "onboarding experience" +for each project, along with reason(s) why you changed to a different one. + + +## Complexity + +1. What are your results for five complex functions? + * Did all methods (tools vs. manual count) get the same result? + * Are the results clear? +2. Are the functions just complex, or also long? +3. What is the purpose of the functions? +4. Are exceptions taken into account in the given measurements? +5. Is the documentation clear w.r.t. all the possible outcomes? + +### `solve_constraints@mypy/solve.py` +Lizard's output for `solve_constraints` in `mypy/solve.py` is as follows: +``` + NLOC CCN token PARAM length location +------------------------------------------------ + 57 30 423 5 86 solve_constraints@41-126@mypy/solve.py +``` + +By manually counting the number of `if`, `for` and `else` statements we got a CC of **18**, not matching the 30 reported by Lizard. The difference lies in logical clauses and list comprehensions, which were initially overlooked. + +With counting each logical clause (e.g., `and`, `or`) as well as the `for` and `if` statements inside of comprehensions, we arrive at a CC of **30**, matching Lizard's output. + +This function in particular is just complex, but not aggressively long. Although 57 lines of code is at the upper end of what should be considered acceptable. Further this function in particular clearly has a lot of different responsibilities (handling three separate boolean flags), which could be a sign that it is possible to reduce its complexity by refactoring those out as separate handlers. + +The purpose of the function in to solve type constraints for type variables in the checked program. Which is a core part of the type checking process. + +There are no exceptions raised in this function, so they are not taken into account. + +The documentation for the function is not very clear. The code is somewhat descriptive but lacks a lot of comments given its complexity and the docstring only gives a very high-level overview. I also feel that the naming of a lot of stuff may be a little to generic, there is a lot of `solve_x`. + + + +### `is_overlapping_types@mypy/meet.py` +Lizard's output for `is_overlapping_types` in `mypy/meet.py` is as follows: +``` + NLOC CCN token PARAM length location +------------------------------------------------ + 168 81 1185 6 322 is_overlapping_types@336-657@./mypy/meet.py +``` + +With counting each logical clause (e.g., `and`, `or`) as well as the `for` and `if` statements inside of comprehensions, we arrive at a CC of **80**, being off by one compared to lizard output. The difference of 1 compared to Lizard’s result is likely due to how the tool counts certain Python-specific constructs, it might count the generator expression for example. + +The purpose of is_overlapping_types() is to check whether two mypy types overlap at runtime, meaning whether it is possible for any value to be both left and right. It is used for reachability checks and for verifying whether overload variants or unions might match. Since it must handle many different type categories and each category requires its own branching rules and return paths, it is in my opinion, justified that this function has a high cyclomatic complexity, but some of this logic I think should be moved out into helper function to improve readability. + +There are no exceptions or try/catch blocks. The function contains a few asserts for paths that should never occur as sanity checks, but in general it attempts to explicitly handle every case and return early instead of relying on a generic try/catch. + +There is quiet a bit of documentation around the different branches. + + +The quality of our own coverage measurement is quiet limited as it's very annoying to add new paths and requires quiet a bit of boilerplate. This function has no ternary operators or excpetions so that is not applicable here. I did not use an automated tool for this one. + +The result of the coverage analyzer was that 49/51 branches were already covered under existing test suite. Of these 3, 2 were unreachable by design so impossible to test. So I added the one test for the branch I could reach and created 3 different tests for path coverage. This was also very difficult to look at the existing test suite and try to figure out if the path had already been covered so I did my best to try and figure it out, but it's almost impossible to actually check on such a large test suite. + +### `comparing_type_narrowing_help@mypy/checker.py` +Lizard's output for `covering_type_narrowing_helper` in `mypy/checker.py` is as follows: + +``` + NLOC CCN token PARAM length location +------------------------------------------------ + 126 30 618 2 171 comparison_type_narrowing_helper@6452-6622@mypy/checker.py +``` + +By manually counting the number of decision points (if, elif, logical and/or, and loop constructs) I obtained 29 decision points, resulting in a cyclomatic complexity of 30, which matches Lizard’s output. + +Initially, some logical operators inside compound conditions were easy to overlook, but after including each logical clause as a decision point, the manual count aligned with the tool. Therefore, the results are consistent and clear. + +This function is both complex and fairly long. With an NLOC of 126 and total length of 171 lines, it is noticeably larger than what is typically considered simple or easy to maintain. The nested conditional structure contributes significantly to the high cyclomatic complexity. + +The purpose of comparison_type_narrowing_helper() is to support mypy’s type checker by analyzing comparison expressions (such as x == y, x is y, membership checks, and chained comparisons) and determining how variable types can be narrowed based on the comparison outcome. It returns type maps describing the narrowed types for different execution branches. Because Python comparisons and type relationships are rich and varied, the function must handle many special cases, which largely explains the high complexity. + +There are no try/except blocks in this function, so exception handling is not taken into account in the cyclomatic complexity measurement. + +The documentation of the function gives a overview of its purpose. However, given the large number of branches and special cases, the documentation does not fully describe all possible outcomes. Understanding the exact behavior in edge cases requires reading the implementation. Additional comments would help. + +The result of the coverage analysis for comparison_type_narrowing_helper() showed that most branches (13/14) were already exercised by the existing mypy test suite. However, Branch 2 was determined to be unreachable by design, making it impossible to cover through additional tests. I then chose to think about path coverage. To improve coverage, I therefore added two additional tests focused on path coverage, ensuring that different combinations of comparison and membership logic are exercised. Identifying whether particular paths were already covered was challenging due to the size and complexity of the existing test suite. + + + +## Refactoring + +Plan for refactoring complex code: + +Estimated impact of refactoring (lower CC, but other drawbacks?). + +Carried out refactoring (optional, P+): + +git diff ... + +## Coverage + +### Tools + +Document your experience in using a "new"/different coverage tool. + +How well was the tool documented? Was it possible/easy/difficult to +integrate it with your build environment? + +### Your own coverage tool + +Show a patch (or link to a branch) that shows the instrumented code to +gather coverage measurements. + +The patch is probably too long to be copied here, so please add +the git command that is used to obtain the patch instead: + +git diff ... + +What kinds of constructs does your tool support, and how accurate is +its output? + +### Evaluation + +1. How detailed is your coverage measurement? + +2. What are the limitations of your own tool? + +3. Are the results of your tool consistent with existing coverage tools? + +## Coverage improvement + +Show the comments that describe the requirements for the coverage. + +Report of old coverage: [link] + +Report of new coverage: [link] + +Test cases added: + +git diff ... + +Number of test cases added: two per team member (P) or at least four (P+). + +## Self-assessment: Way of working + +Current state according to the Essence standard: ... + +Was the self-assessment unanimous? Any doubts about certain items? + +How have you improved so far? + +Where is potential for improvement? + +## Overall experience + +What are your main take-aways from this project? What did you learn? + +Is there something special you want to mention here?