Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions .demo_qa.mk

This file was deleted.

24 changes: 0 additions & 24 deletions .github/workflows/pytest.yml

This file was deleted.

1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,3 @@ build/
.env.demo_qa
_demo_data/*/.runs/*
.coverage
.DS_Store
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ repos:
name: pytest
language: system
pass_filenames: false
entry: bash -lc 'source .venv/bin/activate PYTHONPATH=".:src:${PYTHONPATH}"; python -m pytest -q -m "not slow and not known_bad"'
entry: bash -lc 'source .venv/bin/activate PYTHONPATH=".:src:${PYTHONPATH}"; python -m pytest -q -m "not slow"'
26 changes: 1 addition & 25 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,6 @@ COMPARE_TAG_JUNIT ?= $(DATA)/.runs/diff.tags.junit.xml

MAX_FAILS ?= 5

PURGE_RUNS ?= 0
PRUNE_HISTORY ?= 0
PRUNE_CASE_HISTORY ?= 0
DRY ?= 0

# ==============================================================================
# 6) Настройки LLM-конфига (редактирование/просмотр)
# ==============================================================================
Expand Down Expand Up @@ -99,7 +94,7 @@ LIMIT_FLAG := $(if $(strip $(LIMIT)),--limit $(LIMIT),)
batch batch-tag batch-failed batch-failed-from \
batch-missed batch-missed-from batch-failed-tag batch-missed-tag \
batch-fail-fast batch-max-fails \
stats history-case report-tag report-tag-changes tags tag-rm case-run case-open compare compare-tag
stats history-case report-tag report-tag-changes tags case-run case-open compare compare-tag

# ==============================================================================
# help (на русском)
Expand Down Expand Up @@ -147,14 +142,6 @@ help:
@echo " make case-run CASE=case_42 - прогнать один кейс"
@echo " make case-open CASE=case_42 - открыть артефакты кейса"
@echo ""
@echo "Уборка:"
@echo " make tag-rm TAG=... [DRY=1] [PURGE_RUNS=1] [PRUNE_HISTORY=1] [PRUNE_CASE_HISTORY=1]"
@echo " - удаляет effective snapshot тега и tag-latest* указатели"
@echo " DRY=1 - dry-run: только показать, что будет удалено"
@echo " PURGE_RUNS=1 - дополнительно удалить все runs, где run_meta.tag == TAG"
@echo " PRUNE_HISTORY=1 - вычистить записи с этим тегом из $${DATA}/.runs/history.jsonl"
@echo " PRUNE_CASE_HISTORY=1 - вычистить записи с этим тегом из $${DATA}/.runs/runs/cases/*.jsonl"
@echo ""
@echo "Сравнение результатов:"
@echo " make compare BASE=... NEW=... [DIFF_OUT=...] [JUNIT=...]"
@echo " make compare-tag BASE_TAG=baseline NEW_TAG=... [COMPARE_TAG_OUT=...] [COMPARE_TAG_JUNIT=...]"
Expand Down Expand Up @@ -353,14 +340,3 @@ compare-tag: check
--new-tag "$(NEW_TAG)" \
--out "$(OUT)" \
--junit "$(JUNIT)"

# команды очистки

tag-rm:
@test -n "$(strip $(TAG))" || (echo "TAG обязателен: make tag-rm TAG=..." && exit 1)
@TAG="$(TAG)" DATA="$(DATA)" PURGE_RUNS="$(PURGE_RUNS)" PRUNE_HISTORY="$(PRUNE_HISTORY)" PRUNE_CASE_HISTORY="$(PRUNE_CASE_HISTORY)" DRY="$(DRY)" $(PYTHON) -m scripts.tag_rm





87 changes: 0 additions & 87 deletions caffeinate_make.sh

This file was deleted.

2 changes: 1 addition & 1 deletion examples/demo_qa/demo_qa.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[llm]
base_url = "http://localhost:8002/v1"
base_url = "http://localhost:8000/v1"
plan_model = "default"
synth_model = "default"
plan_temperature = 0.0
Expand Down
20 changes: 2 additions & 18 deletions examples/demo_qa/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,6 @@ def _stringify(value: object | None) -> str | None:
return str(value)


def _normalize_text(value: str) -> str:
return value.strip().casefold()


def _normalize_strings(values: Iterable[object]) -> list[str]:
return [_normalize_text(str(value)) for value in values]


def _match_expected(case: Case, answer: str | None) -> ExpectedCheck | None:
if not case.has_asserts:
return None
Expand All @@ -226,15 +218,7 @@ def _match_expected(case: Case, answer: str | None) -> ExpectedCheck | None:
return ExpectedCheck(mode="none", expected=expected_value, passed=False, detail="no answer")
if case.expected is not None:
expected_str = _stringify(case.expected) or ""
if isinstance(case.expected, (list, tuple, set)):
expected_items = _normalize_strings(case.expected)
answer_items = _normalize_strings(answer) if isinstance(answer, (list, tuple, set)) else []
if isinstance(case.expected, set) or isinstance(answer, set):
passed = set(expected_items) == set(answer_items)
else:
passed = expected_items == answer_items
else:
passed = _normalize_text(answer) == _normalize_text(expected_str)
passed = answer.strip() == expected_str.strip()
detail = None if passed else f"expected={expected_str!r}, got={answer!r}"
return ExpectedCheck(mode="exact", expected=expected_str, passed=passed, detail=detail)
if case.expected_regex is not None:
Expand All @@ -245,7 +229,7 @@ def _match_expected(case: Case, answer: str | None) -> ExpectedCheck | None:
return ExpectedCheck(mode="regex", expected=expected_regex, passed=passed, detail=detail)
if case.expected_contains is not None:
expected_contains = _stringify(case.expected_contains) or ""
passed = _normalize_text(expected_contains) in _normalize_text(answer)
passed = expected_contains in answer
detail = None if passed else f"expected to contain {expected_contains!r}"
return ExpectedCheck(mode="contains", expected=expected_contains, passed=passed, detail=detail)
return None
Expand Down
22 changes: 1 addition & 21 deletions examples/demo_qa/tests/test_demo_qa_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_match_expected_coerces_non_string_expected_values() -> None:
def test_match_expected_contains_pass_and_fail() -> None:
case = Case(id="c2", question="Q", expected_contains="bar")

match = _match_expected(case, "value BAR baz")
match = _match_expected(case, "value bar baz")
assert match is not None
assert match.passed is True

Expand All @@ -47,26 +47,6 @@ def test_match_expected_contains_pass_and_fail() -> None:
assert missing_answer.detail == "no answer"


def test_match_expected_equals_is_case_insensitive() -> None:
case = Case(id="c3", question="Q", expected="Alpha")

match = _match_expected(case, "alpha")
assert match is not None
assert match.passed is True


def test_match_expected_list_comparison_normalizes_elements() -> None:
case = Case(id="c4", question="Q", expected=["Foo", "Bar"])

match = _match_expected(case, cast(str, ["foo", "bar"]))
assert match is not None
assert match.passed is True

mismatch = _match_expected(case, cast(str, ["foo", "baz"]))
assert mismatch is not None
assert mismatch.passed is False


def test_diff_runs_tracks_regressions_and_improvements() -> None:
baseline = [
RunResult(
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "fetchgraph"
version = "0.2.1"
version = "0.2.0"
description = "Graph-like planning → context fetching → synthesis agent (library-style)."
readme = "README.md"
requires-python = ">=3.11"
Expand Down
1 change: 0 additions & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ markers =
slow: slow tests (exclude via -m "not slow")
integration: integration tests (providers / IO / external deps)
e2e: end-to-end scenarios
known_bad: real-world TDD cases that are allowed to fail (excluded from CI by default)

# Удобные дефолты для логов в CI и локально
log_cli = true
Expand Down
Loading