From b2994ddf743de019bce6789b09cbb669b0090cbc Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Mon, 15 Jun 2026 15:48:28 +0800 Subject: [PATCH 01/17] test: re-add extracted multi-language tests for follow-up PR Stage the 35 test files that were split out of the code PR (commit 5e02cfc) on top of the latest pipeline code, including the review-fix commits. Basing this branch on the current pipeline HEAD keeps the eventual follow-up PR's diff limited to the test files once the code PR has merged to main. --- .../scripts/decoder_lang/tests/__init__.py | 0 .../decoder_lang/tests/test_c_cpp_backend.py | 172 +++++ .../tests/test_javascript_backend.py | 176 +++++ .../tests/test_phase1_propagation.py | 258 +++++++ .../tests/test_phase2_skeleton.py | 307 ++++++++ .../tests/test_phase3_code_structure.py | 343 +++++++++ .../tests/test_phase5_prompt_directive.py | 84 +++ .../decoder_lang/tests/test_python_backend.py | 297 ++++++++ .../decoder_lang/tests/test_unit_kind.py | 97 +++ .../tests/test_branch_name_sanitization.py | 67 ++ CoderMind/tests/test_code_gen_multilingual.py | 382 ++++++++++ CoderMind/tests/test_entry_reconciliation.py | 195 +++++ CoderMind/tests/test_feature_build.py | 68 ++ CoderMind/tests/test_final_test_repair.py | 161 ++++ .../tests/test_init_codebase_gitignore.py | 72 ++ CoderMind/tests/test_interface_coverage.py | 493 ++++++++++++ CoderMind/tests/test_lang_parser_c.py | 98 +++ CoderMind/tests/test_lang_parser_cpp.py | 100 +++ CoderMind/tests/test_lang_parser_fallback.py | 58 ++ CoderMind/tests/test_lang_parser_go.py | 138 ++++ .../tests/test_lang_parser_javascript.py | 80 ++ .../tests/test_lang_parser_python_parity.py | 238 ++++++ CoderMind/tests/test_lang_parser_registry.py | 237 ++++++ CoderMind/tests/test_lang_parser_rust.py | 151 ++++ .../tests/test_lang_parser_typescript.py | 142 ++++ .../tests/test_multilingual_code_unit.py | 76 ++ .../tests/test_multilingual_dep_graph.py | 702 ++++++++++++++++++ .../test_multilingual_encoder_pipeline.py | 264 +++++++ .../tests/test_multilingual_prompt_safety.py | 59 ++ .../tests/test_orphan_test_build_exclusion.py | 381 ++++++++++ CoderMind/tests/test_plan_language_support.py | 645 ++++++++++++++++ .../tests/test_repo_language_resolution.py | 146 ++++ CoderMind/tests/test_rpg_builder.py | 23 + CoderMind/tests/test_smoke_multilang.py | 99 +++ CoderMind/tests/test_zero_test_guard.py | 167 +++++ 35 files changed, 6976 insertions(+) create mode 100644 CoderMind/scripts/decoder_lang/tests/__init__.py create mode 100644 CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py create mode 100644 CoderMind/scripts/decoder_lang/tests/test_javascript_backend.py create mode 100644 CoderMind/scripts/decoder_lang/tests/test_phase1_propagation.py create mode 100644 CoderMind/scripts/decoder_lang/tests/test_phase2_skeleton.py create mode 100644 CoderMind/scripts/decoder_lang/tests/test_phase3_code_structure.py create mode 100644 CoderMind/scripts/decoder_lang/tests/test_phase5_prompt_directive.py create mode 100644 CoderMind/scripts/decoder_lang/tests/test_python_backend.py create mode 100644 CoderMind/scripts/decoder_lang/tests/test_unit_kind.py create mode 100644 CoderMind/tests/test_branch_name_sanitization.py create mode 100644 CoderMind/tests/test_code_gen_multilingual.py create mode 100644 CoderMind/tests/test_entry_reconciliation.py create mode 100644 CoderMind/tests/test_feature_build.py create mode 100644 CoderMind/tests/test_final_test_repair.py create mode 100644 CoderMind/tests/test_init_codebase_gitignore.py create mode 100644 CoderMind/tests/test_interface_coverage.py create mode 100644 CoderMind/tests/test_lang_parser_c.py create mode 100644 CoderMind/tests/test_lang_parser_cpp.py create mode 100644 CoderMind/tests/test_lang_parser_fallback.py create mode 100644 CoderMind/tests/test_lang_parser_go.py create mode 100644 CoderMind/tests/test_lang_parser_javascript.py create mode 100644 CoderMind/tests/test_lang_parser_python_parity.py create mode 100644 CoderMind/tests/test_lang_parser_registry.py create mode 100644 CoderMind/tests/test_lang_parser_rust.py create mode 100644 CoderMind/tests/test_lang_parser_typescript.py create mode 100644 CoderMind/tests/test_multilingual_code_unit.py create mode 100644 CoderMind/tests/test_multilingual_dep_graph.py create mode 100644 CoderMind/tests/test_multilingual_encoder_pipeline.py create mode 100644 CoderMind/tests/test_multilingual_prompt_safety.py create mode 100644 CoderMind/tests/test_orphan_test_build_exclusion.py create mode 100644 CoderMind/tests/test_plan_language_support.py create mode 100644 CoderMind/tests/test_repo_language_resolution.py create mode 100644 CoderMind/tests/test_rpg_builder.py create mode 100644 CoderMind/tests/test_smoke_multilang.py create mode 100644 CoderMind/tests/test_zero_test_guard.py diff --git a/CoderMind/scripts/decoder_lang/tests/__init__.py b/CoderMind/scripts/decoder_lang/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py b/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py new file mode 100644 index 0000000..d5ab956 --- /dev/null +++ b/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py @@ -0,0 +1,172 @@ +"""Tests for C and C++ decoder language backends.""" +from __future__ import annotations + +import sys +import unittest +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import patch + +_SCRIPTS_DIR = Path(__file__).resolve().parents[2] +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) + +from decoder_lang import ( # noqa: E402 + CBackend, + CppBackend, + ProjectTaskContext, + ToolchainUnavailable, + get_backend, + language_directive, + list_backends, +) + + +class CBackendTests(unittest.TestCase): + """C backend registry and parser-backed behaviour.""" + + def setUp(self) -> None: + self.backend = get_backend("c") + + def test_registered(self) -> None: + self.assertIn("c", list_backends()) + self.assertIsInstance(self.backend, CBackend) + + def test_file_classification(self) -> None: + self.assertTrue(self.backend.is_source_file("src/store.c")) + self.assertTrue(self.backend.is_source_file("include/store.h")) + self.assertFalse(self.backend.is_source_file("src/store.cpp")) + self.assertTrue(self.backend.is_test_file("tests/test_store.c")) + self.assertTrue(self.backend.is_test_file("src/store_test.c")) + + def test_identifier_rules(self) -> None: + self.assertTrue(self.backend.is_valid_module_identifier("task_store")) + self.assertFalse(self.backend.is_valid_module_identifier("struct")) + self.assertEqual(self.backend.sanitize_module_identifier("task-store"), "task_store") + self.assertEqual(self.backend.sanitize_module_identifier("1task"), "_1task") + + def test_code_units_imports_and_signature(self) -> None: + code = """ + #include "store.h" + + struct Task { int id; }; + + int load_task(int id); + + int add_task(int id) { + return id + 1; + } + """ + ok, error = self.backend.syntax_check(code, "src/store.c") + self.assertTrue(ok, error) + units = self.backend.list_code_units(code, "src/store.c") + names = {(unit.unit_type, unit.name) for unit in units} + self.assertIn(("struct", "Task"), names) + self.assertIn(("function", "load_task"), names) + self.assertIn(("function", "add_task"), names) + function = next(unit for unit in units if unit.name == "add_task") + self.assertIn("add_task", self.backend.format_signature(function)) + imports = self.backend.list_imports(code, "src/store.c") + self.assertEqual([dep.dst for dep in imports], ["store.h"]) + + def test_prompt_hints_and_project_tasks(self) -> None: + hints = self.backend.prompt_hints() + self.assertEqual(hints.display_name, "C") + self.assertEqual(hints.markdown_fence, "c") + self.assertIn("C99", hints.style_directive) + self.assertIn("Target language: C", language_directive(self.backend)) + templates = self.backend.project_task_templates( + ProjectTaskContext(repo_name="tasklite", repo_info="task cli", package_name="tasklite") + ) + self.assertIn("Makefile", templates.dependencies) + self.assertIn("src/main.c", templates.main_entry) + self.assertIn("C CLI", templates.readme) + + def test_missing_toolchain_raises(self) -> None: + with TemporaryDirectory() as temp_dir: + with patch("decoder_lang.c_backend.shutil.which", return_value=None): + with self.assertRaises(ToolchainUnavailable): + self.backend.ensure_env(Path(temp_dir)) + + +class CppBackendTests(unittest.TestCase): + """C++ backend registry and parser-backed behaviour.""" + + def setUp(self) -> None: + self.backend = get_backend("cpp") + + def test_registered(self) -> None: + self.assertIn("cpp", list_backends()) + self.assertIsInstance(self.backend, CppBackend) + + def test_file_classification(self) -> None: + self.assertTrue(self.backend.is_source_file("src/store.cpp")) + self.assertTrue(self.backend.is_source_file("include/store.hpp")) + self.assertTrue(self.backend.is_source_file("include/store.h")) + self.assertFalse(self.backend.is_source_file("src/store.c")) + self.assertTrue(self.backend.is_test_file("tests/store_test.cpp")) + self.assertTrue(self.backend.is_test_file("src/test_store.cc")) + + def test_identifier_rules(self) -> None: + self.assertTrue(self.backend.is_valid_module_identifier("TaskStore")) + self.assertFalse(self.backend.is_valid_module_identifier("class")) + self.assertEqual(self.backend.sanitize_module_identifier("task-store"), "task_store") + self.assertEqual(self.backend.sanitize_module_identifier("1task"), "_1task") + + def test_code_units_imports_and_signature(self) -> None: + code = """ + #include "store.hpp" + + int run_task(int id); + + class TaskStore { + public: + int add(int id) { return id + 1; } + }; + + int run() { + TaskStore store; + return store.add(1); + } + """ + ok, error = self.backend.syntax_check(code, "src/store.cpp") + self.assertTrue(ok, error) + units = self.backend.list_code_units(code, "src/store.cpp") + names = {(unit.unit_type, unit.name) for unit in units} + self.assertIn(("class", "TaskStore"), names) + self.assertIn(("function", "run_task"), names) + self.assertIn(("function", "run"), names) + run = next(unit for unit in units if unit.name == "run") + self.assertIn("run", self.backend.format_signature(run)) + imports = self.backend.list_imports(code, "src/store.cpp") + self.assertEqual([dep.dst for dep in imports], ["store.hpp"]) + + def test_h_header_parses_as_cpp(self) -> None: + code = "class Reader { public: int value() const { return 1; } };\n" + ok, error = self.backend.syntax_check(code, "include/reader.h") + self.assertTrue(ok, error) + units = self.backend.list_code_units(code, "include/reader.h") + self.assertTrue(any(unit.name == "Reader" for unit in units)) + + def test_prompt_hints_and_project_tasks(self) -> None: + hints = self.backend.prompt_hints() + self.assertEqual(hints.display_name, "C++") + self.assertEqual(hints.markdown_fence, "cpp") + self.assertIn("C++17", hints.style_directive) + self.assertIn("Target language: C++", language_directive(self.backend)) + templates = self.backend.project_task_templates( + ProjectTaskContext(repo_name="tasklite", repo_info="task cli", package_name="tasklite") + ) + self.assertIn("CMakeLists.txt", templates.dependencies) + self.assertIn("src/main.cpp", templates.main_entry) + self.assertIn("C++ CLI", templates.readme) + + def test_missing_toolchain_raises(self) -> None: + with TemporaryDirectory() as temp_dir: + with patch("decoder_lang.cpp_backend.shutil.which", return_value=None): + with self.assertRaises(ToolchainUnavailable): + self.backend.ensure_env(Path(temp_dir)) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/CoderMind/scripts/decoder_lang/tests/test_javascript_backend.py b/CoderMind/scripts/decoder_lang/tests/test_javascript_backend.py new file mode 100644 index 0000000..54c7c1d --- /dev/null +++ b/CoderMind/scripts/decoder_lang/tests/test_javascript_backend.py @@ -0,0 +1,176 @@ +"""Tests for the JavaScript decoder backend. + +Run from ``scripts/`` (e.g. ``python -m pytest decoder_lang/tests``) so the +sibling ``common`` / ``lang_parser`` packages are importable. +""" +from __future__ import annotations + +import sys +import unittest +from pathlib import Path +from unittest.mock import patch + +_SCRIPTS_DIR = Path(__file__).resolve().parents[2] +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) + +from decoder_lang import ( # noqa: E402 + JavaScriptBackend, + ToolchainUnavailable, + get_backend, + list_backends, +) +from decoder_lang.test_result import EnvHandle # noqa: E402 + + +class JavaScriptBackendRegistrationTests(unittest.TestCase): + def test_registered(self) -> None: + self.assertIn("javascript", list_backends()) + + def test_get_backend_returns_singleton(self) -> None: + a = get_backend("javascript") + b = get_backend("javascript") + self.assertIs(a, b) + self.assertIsInstance(a, JavaScriptBackend) + + +class JavaScriptBackendBehaviourTests(unittest.TestCase): + def setUp(self) -> None: + self.backend = get_backend("javascript") + + # --- identity ---------------------------------------------------- + + def test_identity_fields(self) -> None: + self.assertEqual(self.backend.name, "javascript") + self.assertEqual(self.backend.display_name, "JavaScript") + self.assertEqual(self.backend.file_extension, ".js") + self.assertEqual(self.backend.markdown_fence, "javascript") + + # --- file classification ----------------------------------------- + + def test_is_source_file(self) -> None: + for path in ("src/index.js", "src/cli.mjs", "lib/store.cjs", "ui/app.jsx"): + with self.subTest(path=path): + self.assertTrue(self.backend.is_source_file(path)) + for path in ("README.md", "main.py", "src/app.ts", "main"): + with self.subTest(path=path): + self.assertFalse(self.backend.is_source_file(path)) + + def test_is_test_file(self) -> None: + for path in ("tests/cli.js", "src/store.test.js", "src/cli.spec.mjs"): + with self.subTest(path=path): + self.assertTrue(self.backend.is_test_file(path)) + for path in ("src/index.js", "lib/store.cjs"): + with self.subTest(path=path): + self.assertFalse(self.backend.is_test_file(path)) + + # --- package marker / identifiers -------------------------------- + + def test_no_package_marker(self) -> None: + self.assertIsNone(self.backend.package_marker_filename()) + self.assertIsNone(self.backend.package_marker_content("any/path")) + + def test_identifier_rules(self) -> None: + self.assertTrue(self.backend.is_valid_module_identifier("task-store")) + self.assertTrue(self.backend.is_valid_module_identifier("cli")) + self.assertFalse(self.backend.is_valid_module_identifier("")) + self.assertFalse(self.backend.is_valid_module_identifier("a/b")) + + def test_sanitize(self) -> None: + self.assertEqual(self.backend.sanitize_module_identifier("my mod"), "my-mod") + self.assertEqual(self.backend.sanitize_module_identifier("a/b/c"), "a-b-c") + s = self.backend.sanitize_module_identifier("x--y z") + self.assertEqual(self.backend.sanitize_module_identifier(s), s) # idempotent + + # --- code structure ---------------------------------------------- + + def test_syntax_check_ok(self) -> None: + ok, err = self.backend.syntax_check( + "// user's data — doesn't break\nexport function f() { return 1; }\n", + "src/a.js", + ) + self.assertTrue(ok, err) + + def test_syntax_check_failure(self) -> None: + ok, err = self.backend.syntax_check("export function broken(\n", "src/b.js") + self.assertFalse(ok) + self.assertIsNotNone(err) + + def test_list_code_units(self) -> None: + code = "export function foo() {}\nclass Bar { run() {} }\n" + units = self.backend.list_code_units(code, "src/c.js") + kinds = {(u.unit_type, u.name) for u in units} + self.assertIn(("function", "foo"), kinds) + self.assertIn(("class", "Bar"), kinds) + + def test_has_placeholder(self) -> None: + self.assertTrue(self.backend.has_placeholder( + 'export function f() { throw new Error("not implemented"); }\n' + )) + self.assertFalse(self.backend.has_placeholder( + "export function f() { return 42; }\n" + )) + + def test_list_imports(self) -> None: + code = "import { store } from './store.js';\nexport function f() {}\n" + imports = self.backend.list_imports(code, "src/c.js") + self.assertTrue(any(getattr(d, "relation", "") == "imports" for d in imports)) + + # --- test environment -------------------------------------------- + + def test_detect_env_none_when_node_missing(self) -> None: + with patch("decoder_lang.javascript_backend.shutil.which", return_value=None): + self.assertIsNone(self.backend.detect_env(Path("."))) + + def test_ensure_env_raises_when_node_missing(self) -> None: + with patch("decoder_lang.javascript_backend.shutil.which", return_value=None): + with self.assertRaises(ToolchainUnavailable): + self.backend.ensure_env(Path(".")) + + def test_ensure_env_creates_package_json(self) -> None: + from tempfile import TemporaryDirectory + + with TemporaryDirectory() as tmp: + root = Path(tmp) + with patch( + "decoder_lang.javascript_backend.shutil.which", + return_value="/usr/bin/npm", + ): + self.backend.ensure_env(root) + pkg = root / "package.json" + self.assertTrue(pkg.exists()) + self.assertIn('"type": "module"', pkg.read_text()) + self.assertNotIn("tsconfig", pkg.read_text()) + + def test_test_command_npm_vs_node(self) -> None: + npm_env = EnvHandle(project_root=Path("."), runtime_executable="/usr/bin/npm") + self.assertEqual(self.backend.test_command(npm_env), ["/usr/bin/npm", "test"]) + node_env = EnvHandle(project_root=Path("."), runtime_executable="/usr/bin/node") + self.assertEqual(self.backend.test_command(node_env), ["/usr/bin/node", "--test"]) + + # --- prompt hints / templates ------------------------------------ + + def test_prompt_hints_are_javascript(self) -> None: + hints = self.backend.prompt_hints() + self.assertEqual(hints.markdown_fence, "javascript") + self.assertIn(".js", hints.entrypoint_example) + # Must steer away from TypeScript. + self.assertIn("TypeScript", hints.style_directive) + + def test_project_task_templates_avoid_typescript(self) -> None: + from decoder_lang.project_tasks import ProjectTaskContext + + ctx = ProjectTaskContext( + repo_name="tasklite", + repo_info="A small task CLI", + package_name="tasklite", + ) + templates = self.backend.project_task_templates(ctx) + self.assertIsNotNone(templates) + self.assertIn("package.json", templates.dependencies) + self.assertIn("tsconfig", templates.dependencies) # mentioned as a "do NOT" + self.assertIn("src/index.js", templates.main_entry) + + +if __name__ == "__main__": + unittest.main() diff --git a/CoderMind/scripts/decoder_lang/tests/test_phase1_propagation.py b/CoderMind/scripts/decoder_lang/tests/test_phase1_propagation.py new file mode 100644 index 0000000..72f95a4 --- /dev/null +++ b/CoderMind/scripts/decoder_lang/tests/test_phase1_propagation.py @@ -0,0 +1,258 @@ +"""Tests for target-language propagation through decoder entry points. + +Focus: +* :func:`decoder_lang.resolve_decoder_language` priority chain. +* ``FeatureSpecOutput.meta.primary_language`` is optional and defaults + to None, so specs without the field load unchanged. +* ``FileDesigner`` accepts and stores the language; the resolved + backend is the registered :class:`PythonBackend` singleton in the + decoder pipeline. +""" +from __future__ import annotations + +import sys +import unittest +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import MagicMock + +# Make ``scripts/`` importable for direct invocation. +_SCRIPTS_DIR = Path(__file__).resolve().parents[2] +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) + +from decoder_lang import ( # noqa: E402 + PythonBackend, + get_backend, + resolve_decoder_language, + resolve_target_language, +) + + +class ResolveDecoderLanguageTests(unittest.TestCase): + """The four-tier chain documented on ``resolve_decoder_language``.""" + + # --- Tier 0: feature_spec -------------------------------------- + + def test_tier_0_dict_feature_spec_wins_over_rpg(self) -> None: + result = resolve_decoder_language( + feature_spec={"meta": {"primary_language": "go"}}, + rpg_obj={"root": {"meta": {"language": "python"}}}, + ) + self.assertEqual(result, "go") + + def test_tier_0_object_feature_spec_wins_over_rpg(self) -> None: + spec = SimpleNamespace( + meta=SimpleNamespace(primary_language="rust", target_languages=[]) + ) + result = resolve_decoder_language( + feature_spec=spec, + rpg_obj={"root": {"meta": {"language": "python"}}}, + ) + self.assertEqual(result, "rust") + + def test_tier_0_skipped_when_feature_spec_lang_blank(self) -> None: + # Empty string is treated as "not specified" so we fall through + # to the RPG-meta tier rather than blowing up later in + # get_backend(""). + result = resolve_decoder_language( + feature_spec={"meta": {"primary_language": ""}}, + rpg_obj={"root": {"meta": {"language": "go"}}}, + ) + self.assertEqual(result, "go") + + def test_tier_0_skipped_when_feature_spec_lang_none(self) -> None: + result = resolve_decoder_language( + feature_spec={"meta": {"primary_language": None}}, + rpg_obj={"root": {"meta": {"language": "typescript"}}}, + ) + self.assertEqual(result, "typescript") + + def test_tier_0_uses_first_target_languages_item(self) -> None: + result = resolve_decoder_language( + feature_spec={"meta": {"target_languages": ["go", "typescript"]}}, + rpg_obj={"root": {"meta": {"language": "python"}}}, + ) + self.assertEqual(result, "go") + + # --- Tier 1: RPG root meta ------------------------------------- + + def test_tier_1_rpg_meta_when_no_feature_spec(self) -> None: + result = resolve_decoder_language( + feature_spec=None, + rpg_obj={"root": {"meta": {"language": "c"}}}, + ) + self.assertEqual(result, "c") + + # --- Tier 3 default -------------------------------------------- + + def test_default_python_with_warning(self) -> None: + with self.assertLogs("decoder_lang.backend", level="WARNING"): + result = resolve_decoder_language() + self.assertEqual(result, "python") + + # --- Robustness ------------------------------------------------ + + def test_handles_missing_target_language_attr(self) -> None: + # Object without language metadata should fall through. + class _Bare: + pass + + with self.assertLogs("decoder_lang.backend", level="WARNING"): + result = resolve_decoder_language(feature_spec=_Bare()) + self.assertEqual(result, "python") + + def test_resolve_target_language_unchanged(self) -> None: + # The project-language resolver works without a feature_spec + # argument; callers that only have RPG metadata use this path. + self.assertEqual( + resolve_target_language({"root": {"meta": {"language": "go"}}}), + "go", + ) + + +class FeatureSpecOutputSchemaTests(unittest.TestCase): + """Language metadata is optional and lives under ``meta``.""" + + def setUp(self) -> None: + from feature.schemas.spec import FeatureSpecOutput # noqa: E402 + + self.FeatureSpecOutput = FeatureSpecOutput + self.minimal_payload = { + "meta": { + "project_types": ["LIBRARY"], + "project_notes": "test", + "generated_at": "2026-06-04", + "source_documents": ["user_input"], + }, + "background_and_overview": [], + "non_functional_requirements": [], + "functional_requirements": [], + "repository_name": "demo-project", + "repository_purpose": "Test repository.", + } + + def test_payload_loads_without_language_metadata(self) -> None: + spec = self.FeatureSpecOutput.model_validate(self.minimal_payload) + self.assertIsNone(spec.target_language) + + def test_primary_language_round_trips_under_meta(self) -> None: + payload = { + **self.minimal_payload, + "meta": { + **self.minimal_payload["meta"], + "primary_language": "go", + }, + } + spec = self.FeatureSpecOutput.model_validate(payload) + self.assertEqual(spec.target_language, "go") + self.assertEqual(spec.target_languages, ["go"]) + round_tripped = self.FeatureSpecOutput.model_validate_json( + spec.model_dump_json() + ) + self.assertEqual(round_tripped.target_language, "go") + self.assertEqual(round_tripped.target_languages, ["go"]) + self.assertNotIn("target_language", spec.model_dump()) + + def test_language_aliases_are_canonicalized(self) -> None: + from common.language_meta import normalize_language_metadata # noqa: E402 + + primary, languages = normalize_language_metadata("C++", ["C++", "TS", "js"]) + + self.assertEqual(primary, "cpp") + self.assertEqual(languages, ["cpp", "typescript", "javascript"]) + + def test_target_languages_sets_primary_language(self) -> None: + payload = { + **self.minimal_payload, + "meta": { + **self.minimal_payload["meta"], + "target_languages": ["go", "typescript"], + }, + } + spec = self.FeatureSpecOutput.model_validate(payload) + self.assertEqual(spec.target_language, "go") + self.assertEqual(spec.target_languages, ["go", "typescript"]) + + def test_infers_go_from_requirement_text(self) -> None: + from feature.spec import InputSource, _infer_target_languages # noqa: E402 + + source = InputSource( + kind="user_input", + text=( + "TaskLite is a small command-line task tracker written in Go. " + "It validates the decoder pipeline for a non-Python project. " + "Run it with go test ./..." + ), + ) + + self.assertEqual(_infer_target_languages(source)[0], "go") + + +class FileDesignerWiringTests(unittest.TestCase): + """``FileDesigner.__init__`` resolves language + stores backend. + + Only checks constructor language resolution; the rest of the + designer pipeline is covered by skeleton-stage tests. + """ + + def _make_rpg(self, root_language: str | None = None): + """Build the minimum RPG-shaped object the new code path reads + (just ``rpg.repo_node.meta.language``). Using stubs keeps the + test independent of the full RPG construction path.""" + rpg = MagicMock() + if root_language is None: + rpg.repo_node = MagicMock() + rpg.repo_node.meta = MagicMock() + rpg.repo_node.meta.language = None + else: + rpg.repo_node = MagicMock() + rpg.repo_node.meta = MagicMock() + rpg.repo_node.meta.language = root_language + return rpg + + def _make_designer(self, *, rpg, target_language=None): + # Avoid the full FileDesigner import cost on test collection by + # importing inside the helper. + from skeleton.file_designer import FileDesigner # noqa: E402 + + # ``llm_client`` is supplied so the constructor doesn't try to + # build a real LLMClient (which would touch network config). + return FileDesigner( + rpg=rpg, + llm_client=MagicMock(), + target_language=target_language, + ) + + def test_uses_explicit_target_language_kwarg(self) -> None: + # The kwarg wins over RPG meta and resolves to the registered + # Go backend. + from decoder_lang import GoBackend # local import to avoid + rpg = self._make_rpg(root_language="python") + designer = self._make_designer(rpg=rpg, target_language="go") + self.assertEqual(designer.target_language, "go") + self.assertIsInstance(designer.backend, GoBackend) + + def test_falls_back_to_rpg_root_meta_language(self) -> None: + rpg = self._make_rpg(root_language="python") + designer = self._make_designer(rpg=rpg) + self.assertEqual(designer.target_language, "python") + self.assertIs(designer.backend, get_backend("python")) + + def test_falls_back_to_python_default(self) -> None: + rpg = self._make_rpg(root_language=None) + with self.assertLogs("decoder_lang.backend", level="WARNING"): + designer = self._make_designer(rpg=rpg) + self.assertEqual(designer.target_language, "python") + + def test_backend_is_singleton(self) -> None: + rpg1 = self._make_rpg(root_language="python") + rpg2 = self._make_rpg(root_language="python") + d1 = self._make_designer(rpg=rpg1) + d2 = self._make_designer(rpg=rpg2) + # Both designers receive the same registered backend instance. + self.assertIs(d1.backend, d2.backend) + + +if __name__ == "__main__": + unittest.main() diff --git a/CoderMind/scripts/decoder_lang/tests/test_phase2_skeleton.py b/CoderMind/scripts/decoder_lang/tests/test_phase2_skeleton.py new file mode 100644 index 0000000..52e257e --- /dev/null +++ b/CoderMind/scripts/decoder_lang/tests/test_phase2_skeleton.py @@ -0,0 +1,307 @@ +"""Tests for backend-aware skeleton behaviour. + +Covers: + +* :class:`decoder_lang.GoBackend` registration + backend methods. +* :func:`skeleton.file_designer.validate_directory_structure` honors + the supplied backend's identifier rules; Python defaults apply when + ``backend=None`` (Python default). +* :meth:`skeleton_models.RepoSkeleton.add_init_files` is a no-op for + backends whose :meth:`package_marker_filename` returns ``None`` + (Go / Rust / TypeScript), and equivalent to the Python default + path otherwise. +* :class:`FileDesigner.backend` is the registered backend for the + resolved language (Go instance for a Go RPG, Python instance for a + Python RPG). +""" +from __future__ import annotations + +import sys +import unittest +from tempfile import TemporaryDirectory +from pathlib import Path +from unittest.mock import MagicMock, patch + +# Make ``scripts/`` importable for direct invocation. +_SCRIPTS_DIR = Path(__file__).resolve().parents[2] +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) + +from decoder_lang import ( # noqa: E402 + GoBackend, + PythonBackend, + ToolchainUnavailable, + get_backend, + list_backends, +) +from decoder_lang.test_result import EnvHandle # noqa: E402 + + +class GoBackendRegistrationTests(unittest.TestCase): + """Go backend is in the registry and returns the same instance.""" + + def test_go_backend_registered(self) -> None: + self.assertIn("go", list_backends()) + + def test_get_backend_go_returns_singleton(self) -> None: + a = get_backend("go") + b = get_backend("go") + self.assertIs(a, b) + self.assertIsInstance(a, GoBackend) + + +class GoBackendBehaviourTests(unittest.TestCase): + """GoBackend behaviour exposed through the decoder backend contract.""" + + def setUp(self) -> None: + self.backend = get_backend("go") + + # --- file classification ----------------------------------------- + + def test_is_source_file(self) -> None: + self.assertTrue(self.backend.is_source_file("cmd/myapp/main.go")) + self.assertTrue(self.backend.is_source_file("internal/core/core_test.go")) + for path in ("README.md", "main.py", "main.GO", "main"): + with self.subTest(path=path): + self.assertFalse(self.backend.is_source_file(path)) + + def test_is_test_file(self) -> None: + self.assertTrue(self.backend.is_test_file("foo_test.go")) + self.assertTrue(self.backend.is_test_file("internal/x/y_test.go")) + for path in ("foo.go", "tests/foo.go", "test_foo.go"): + # Note: Go convention is *_test.go, NOT test_*.go + with self.subTest(path=path): + self.assertFalse(self.backend.is_test_file(path)) + + # --- package marker ---------------------------------------------- + + def test_no_package_marker(self) -> None: + self.assertIsNone(self.backend.package_marker_filename()) + self.assertIsNone(self.backend.package_marker_content("any/path")) + + # --- identifier rules -------------------------------------------- + + def test_valid_identifiers(self) -> None: + for seg in ("auth", "auth_utils", "_internal", "Foo123"): + with self.subTest(seg=seg): + self.assertTrue(self.backend.is_valid_module_identifier(seg)) + + def test_invalid_identifiers(self) -> None: + for seg in ("", "1auth", "auth-utils", "auth utils", "package", "func"): + with self.subTest(seg=seg): + self.assertFalse(self.backend.is_valid_module_identifier(seg)) + + def test_sanitize(self) -> None: + self.assertEqual(self.backend.sanitize_module_identifier("auth-utils"), "auth_utils") + self.assertEqual(self.backend.sanitize_module_identifier("1auth"), "_1auth") + # Keyword collision avoided by suffix. + self.assertEqual(self.backend.sanitize_module_identifier("func"), "func_") + # Idempotency + s = self.backend.sanitize_module_identifier("a-b-c") + self.assertEqual(self.backend.sanitize_module_identifier(s), s) + + # --- code structure ---------------------------------------------- + + def test_syntax_check(self) -> None: + ok, error = self.backend.syntax_check("package main\nfunc Run() {}\n") + self.assertTrue(ok, error) + ok, error = self.backend.syntax_check("func Run() {}\n") + self.assertFalse(ok) + self.assertIn("package", error or "") + + def test_has_placeholder(self) -> None: + code = 'package main\nfunc Run() string { return "TODO: implement" }\n' + self.assertTrue(self.backend.has_placeholder(code)) + self.assertFalse( + self.backend.has_placeholder('package main\nfunc Run() string { return "ok" }\n') + ) + + # --- test environment -------------------------------------------- + + def test_detect_env_none_when_go_missing(self) -> None: + with patch("decoder_lang.go_backend.shutil.which", return_value=None): + self.assertIsNone(self.backend.detect_env(Path("."))) + + def test_ensure_env_raises_when_go_missing(self) -> None: + with patch("decoder_lang.go_backend.shutil.which", return_value=None): + with self.assertRaises(ToolchainUnavailable): + self.backend.ensure_env(Path(".")) + + def test_ensure_env_creates_go_mod_when_toolchain_exists(self) -> None: + with TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + with patch("decoder_lang.go_backend.shutil.which", return_value="/usr/bin/go"): + env = self.backend.ensure_env(root) + self.assertEqual(env.runtime_executable, "/usr/bin/go") + self.assertEqual(env.extra.get("module"), f"codermind.local/{root.name.lower()}") + self.assertTrue((root / "go.mod").exists()) + self.assertIn("module codermind.local", (root / "go.mod").read_text()) + + def test_test_command(self) -> None: + cmd = self.backend.test_command( + EnvHandle(project_root=Path("."), runtime_executable="/usr/bin/go"), + selectors=["TestRun", "TestStop"], + ) + self.assertEqual(cmd, ["/usr/bin/go", "test", "-v", "-run", "TestRun|TestStop", "./..."]) + + def test_install_deps_command(self) -> None: + env = EnvHandle(project_root=Path("."), runtime_executable="/usr/bin/go") + self.assertIsNone(self.backend.install_deps_command(env, [])) + self.assertEqual( + self.backend.install_deps_command(env, ["github.com/acme/lib"]), + ["/usr/bin/go", "get", "github.com/acme/lib"], + ) + + def test_parse_test_output(self) -> None: + raw = "\n".join([ + "=== RUN TestRun", + "--- PASS: TestRun (0.01s)", + "=== RUN TestBroken", + " service_test.go:12: expected true", + "--- FAIL: TestBroken (0.02s)", + "FAIL\texample.com/demo\t0.03s", + ]) + result = self.backend.parse_test_output(raw, 1) + self.assertEqual(result.status, "failed") + self.assertEqual(result.passed_count, 1) + self.assertEqual(result.failed_count, 1) + self.assertEqual(result.failures[0].test_id, "TestBroken") + self.assertEqual(result.failures[0].file_path, "service_test.go") + self.assertEqual(result.failures[0].line, 12) + + def test_parse_test_output_without_test_failure_is_error(self) -> None: + result = self.backend.parse_test_output("FAIL\texample.com/demo\n", 1) + self.assertEqual(result.status, "errored") + self.assertEqual(result.error_count, 1) + + # --- prompt hints ------------------------------------------------ + + def test_prompt_hints(self) -> None: + hints = self.backend.prompt_hints() + self.assertEqual(hints.display_name, "Go") + self.assertEqual(hints.markdown_fence, "go") + self.assertEqual(hints.file_extension, ".go") + self.assertEqual(hints.test_framework_name, "go test") + self.assertIn("idiomatic Go", hints.style_directive) + + +class ValidateDirectoryStructureTests(unittest.TestCase): + """Backend-aware identifier validation in ``validate_directory_structure``.""" + + def setUp(self) -> None: + from skeleton.file_designer import validate_directory_structure # noqa + self.validate = validate_directory_structure + + def test_python_default_identifier_rules(self) -> None: + # No backend → Python identifier rules: hyphens are rejected. + ok, msg = self.validate( + {"comp": "src/my-pkg/utils"}, ["comp"], + ) + self.assertFalse(ok) + self.assertIn("my-pkg", msg) + self.assertIn("Python identifier", msg) + + def test_go_backend_accepts_lowercase_underscored(self) -> None: + ok, msg = self.validate( + {"comp": "internal/auth_utils/token"}, ["comp"], + backend=get_backend("go"), + ) + self.assertTrue(ok, msg) + + def test_go_backend_rejects_hyphen(self) -> None: + ok, msg = self.validate( + {"comp": "internal/auth-utils"}, ["comp"], + backend=get_backend("go"), + ) + self.assertFalse(ok) + self.assertIn("auth-utils", msg) + self.assertIn("Go identifier", msg) + + def test_go_backend_rejects_keyword(self) -> None: + ok, msg = self.validate( + {"comp": "internal/func"}, ["comp"], + backend=get_backend("go"), + ) + self.assertFalse(ok) + self.assertIn("func", msg) + + +class AddInitFilesTests(unittest.TestCase): + """Verify the behaviour-preservation contract on ``add_init_files``. + + Uses a small in-memory ``RepoSkeleton`` so the test runs without + touching the LLM pipeline. + """ + + def _make_skeleton(self): + from skeleton.skeleton_models import RepoSkeleton # noqa: E402 + + # RepoSkeleton accepts a flat ``{file_path: source_code}`` map + # and builds the directory tree automatically. We only need a + # single source file under a sub-directory so that + # ``add_init_files`` has at least one candidate directory. + return RepoSkeleton({"src/foo.py": ""}) + + def test_python_marker_added_without_backend(self) -> None: + # backend=None uses Python __init__.py emission. + skel = self._make_skeleton() + added = skel.add_init_files() + self.assertEqual(added, 1) + self.assertIn("src/__init__.py", skel.path_to_node) + + def test_python_backend_matches_no_backend(self) -> None: + # Passing PythonBackend explicitly produces the same package + # markers as default backend resolution. + skel_a = self._make_skeleton() + a = skel_a.add_init_files() + + skel_b = self._make_skeleton() + b = skel_b.add_init_files(backend=get_backend("python")) + + self.assertEqual(a, b) + self.assertEqual( + set(skel_a.path_to_node), set(skel_b.path_to_node), + ) + + def test_go_backend_is_noop(self) -> None: + # backend whose package_marker_filename() is None makes the + # whole method a no-op: zero files added, registry unchanged. + skel = self._make_skeleton() + before = set(skel.path_to_node) + added = skel.add_init_files(backend=get_backend("go")) + self.assertEqual(added, 0) + self.assertEqual(set(skel.path_to_node), before) + + +class FileDesignerBackendInstanceTests(unittest.TestCase): + """``FileDesigner.backend`` is the right instance for the language + resolved from the RPG, including the registered Go backend.""" + + def _make_designer(self, root_language): + from skeleton.file_designer import FileDesigner # noqa + + rpg = MagicMock() + rpg.repo_node = MagicMock() + rpg.repo_node.meta = MagicMock() + rpg.repo_node.meta.language = root_language + return FileDesigner(rpg=rpg, llm_client=MagicMock()) + + def test_python_rpg_gets_python_backend(self) -> None: + d = self._make_designer("python") + self.assertIs(d.backend, get_backend("python")) + self.assertIsInstance(d.backend, PythonBackend) + + def test_go_rpg_gets_go_backend(self) -> None: + d = self._make_designer("go") + self.assertIs(d.backend, get_backend("go")) + self.assertIsInstance(d.backend, GoBackend) + + def test_fallback_filename_uses_backend_extension(self) -> None: + d = self._make_designer("go") + # We don't run the full designer pipeline; just assert the + # backend extension is what the misc-fallback code uses. + self.assertEqual(d.backend.file_extension, ".go") + + +if __name__ == "__main__": + unittest.main() diff --git a/CoderMind/scripts/decoder_lang/tests/test_phase3_code_structure.py b/CoderMind/scripts/decoder_lang/tests/test_phase3_code_structure.py new file mode 100644 index 0000000..f7050f1 --- /dev/null +++ b/CoderMind/scripts/decoder_lang/tests/test_phase3_code_structure.py @@ -0,0 +1,343 @@ +"""Tests for PythonBackend code-structure helpers. + +The suite covers ``list_code_units``, ``format_signature``, +``list_imports``, ``list_inheritance``, and ``find_main_block_lineno``. +Assertions focus on the shapes consumed by ``func_design`` and +code-generation prompts. +""" +from __future__ import annotations + +import ast +import sys +import unittest +from pathlib import Path + +# Make ``scripts/`` importable for direct invocation. +_SCRIPTS_DIR = Path(__file__).resolve().parents[2] +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) + +from decoder_lang import GoBackend, PythonBackend, get_backend # noqa: E402 + + +# Sample Python source that exercises top-level functions, classes +# with methods, nested functions, async, and decorators. +_SAMPLE_SRC = '''\ +"""Module docstring.""" +import os +from typing import Optional, List +from .util import helper as h + +CONST = 1 + +def top_level_func(a: int, b: str = "x") -> bool: + """Top-level.""" + return True + +async def top_level_async(data: bytes) -> None: + pass + +class Parser: + """Parser class.""" + + def __init__(self, path: str) -> None: + self.path = path + + def parse(self, data: bytes, *, strict: bool = False) -> List[int]: + return [] + + async def parse_async(self, x, y, z, w, extra) -> None: + pass + + def _private(self): + pass + +def outer(): + def inner(): + pass + return inner + +if __name__ == "__main__": + main() +''' + + +class ListCodeUnitsTests(unittest.TestCase): + """``list_code_units`` walks all nesting; matches ast.walk semantics + used by ``func_design/interface_agent``.""" + + def setUp(self) -> None: + self.backend: PythonBackend = get_backend("python") # type: ignore + self.units = self.backend.list_code_units(_SAMPLE_SRC, "demo.py") + + def test_returns_units_for_every_decl(self) -> None: + names = [u.name for u in self.units] + # Order is ast.walk's BFS; we only assert membership. + for expected in ( + "top_level_func", "top_level_async", "Parser", + "__init__", "parse", "parse_async", "_private", + "outer", "inner", + ): + with self.subTest(decl=expected): + self.assertIn(expected, names) + + def test_unit_types_assigned_correctly(self) -> None: + by_name = {u.name: u for u in self.units} + self.assertEqual(by_name["top_level_func"].unit_type, "function") + self.assertEqual(by_name["top_level_async"].unit_type, "function") + self.assertEqual(by_name["Parser"].unit_type, "class") + self.assertEqual(by_name["__init__"].unit_type, "method") + self.assertEqual(by_name["parse"].unit_type, "method") + self.assertEqual(by_name["parse_async"].unit_type, "method") + # Nested function is NOT a method: its parent is a function, + # not a class. Both ``outer`` and ``inner`` are functions. + self.assertEqual(by_name["outer"].unit_type, "function") + self.assertEqual(by_name["inner"].unit_type, "function") + + def test_parent_populated_for_methods(self) -> None: + by_name = {u.name: u for u in self.units} + self.assertEqual(by_name["__init__"].parent, "Parser") + self.assertEqual(by_name["parse"].parent, "Parser") + # Top-level decls have no parent. + self.assertIsNone(by_name["top_level_func"].parent) + # Nested function has no class parent. + self.assertIsNone(by_name["inner"].parent) + + def test_line_numbers_populated(self) -> None: + by_name = {u.name: u for u in self.units} + for n in ("top_level_func", "Parser", "parse"): + with self.subTest(decl=n): + self.assertIsNotNone(by_name[n].line_start) + self.assertIsNotNone(by_name[n].line_end) + self.assertGreaterEqual(by_name[n].line_end, by_name[n].line_start) + + def test_ast_node_escape_hatch_preserved(self) -> None: + # PythonBackend stuffs the raw ast node into extra["ast_node"] + # so format_signature can use ast.unparse without re-parsing. + by_name = {u.name: u for u in self.units} + parse_unit = by_name["parse"] + node = parse_unit.extra.get("ast_node") + self.assertIsInstance(node, ast.FunctionDef) + self.assertEqual(node.name, "parse") + + def test_empty_on_syntax_error(self) -> None: + # Callers in func_design tolerate empty results; backend must + # not raise even on garbled source. + self.assertEqual(self.backend.list_code_units("def f(:\n pass\n"), []) + + def test_file_path_propagated(self) -> None: + # File path on every unit matches the path argument so callers + # can index by file without re-passing it. + for u in self.units: + self.assertEqual(u.file_path, "demo.py") + + +class FormatSignatureTests(unittest.TestCase): + """Signature formatting matches interface-registry expectations.""" + + def setUp(self) -> None: + self.backend: PythonBackend = get_backend("python") # type: ignore + self.units = self.backend.list_code_units(_SAMPLE_SRC, "demo.py") + self.by_name = {u.name: u for u in self.units} + + def test_simple_function(self) -> None: + self.assertEqual( + self.backend.format_signature(self.by_name["top_level_func"]), + "top_level_func(a: int, b: str) -> bool", + ) + + def test_async_function(self) -> None: + self.assertEqual( + self.backend.format_signature(self.by_name["top_level_async"]), + "top_level_async(data: bytes) -> None", + ) + + def test_method_skips_self(self) -> None: + # ``self`` is excluded from rendered params. + self.assertEqual( + self.backend.format_signature(self.by_name["__init__"]), + "__init__(path: str) -> None", + ) + + def test_method_with_keyword_only(self) -> None: + # Keyword-only args are omitted from the rendered prompt + # signature, so ``strict`` does not appear. + sig = self.backend.format_signature(self.by_name["parse"]) + self.assertIn("data: bytes", sig) + self.assertNotIn("strict", sig) + self.assertTrue(sig.endswith(" -> List[int]")) + + def test_truncation_when_more_than_4_params(self) -> None: + # parse_async has 5 positional params after dropping ``self``. + sig = self.backend.format_signature(self.by_name["parse_async"]) + self.assertIn(", ...", sig) + self.assertTrue(sig.endswith(" -> None")) + + def test_non_function_returns_name(self) -> None: + self.assertEqual( + self.backend.format_signature(self.by_name["Parser"]), + "Parser", + ) + + def test_none_safe(self) -> None: + self.assertEqual(self.backend.format_signature(None), "") + + +class ListImportsTests(unittest.TestCase): + """``list_imports`` matches lang_parser's dependency shape.""" + + def setUp(self) -> None: + self.backend: PythonBackend = get_backend("python") # type: ignore + self.deps = self.backend.list_imports(_SAMPLE_SRC, "demo.py") + + def test_all_imports_emitted(self) -> None: + # 3 statements → 1 + 2 + 1 = 4 entries (typing imports List + Optional). + modules = [d.extra.get("module") for d in self.deps] + self.assertIn("os", modules) + self.assertIn("typing", modules) + self.assertIn(".util", modules) + + def test_relation_is_imports(self) -> None: + for dep in self.deps: + self.assertEqual(dep.relation, "imports") + + def test_alias_recorded(self) -> None: + # ``from .util import helper as h`` → alias=h. + util_deps = [d for d in self.deps if d.extra.get("module") == ".util"] + self.assertEqual(len(util_deps), 1) + self.assertEqual(util_deps[0].extra.get("alias"), "h") + self.assertEqual(util_deps[0].extra.get("imported"), "helper") + + def test_empty_on_syntax_error(self) -> None: + self.assertEqual(self.backend.list_imports("import"), []) + + +class ListInheritanceTests(unittest.TestCase): + """``list_inheritance`` yields uniform ``inherits`` edges per language.""" + + def test_python_derives_edges_from_class_bases(self) -> None: + backend = get_backend("python") + code = ( + "class Base:\n pass\n\n" + "class Mixin:\n pass\n\n" + "class Child(Base, Mixin):\n pass\n" + ) + edges = backend.list_inheritance(code, "m.py") + pairs = {(d.src, d.symbol) for d in edges} + self.assertEqual(pairs, {("Child", "Base"), ("Child", "Mixin")}) + for dep in edges: + self.assertEqual(dep.relation, "inherits") + + def test_python_empty_on_syntax_error(self) -> None: + self.assertEqual(get_backend("python").list_inheritance("class"), []) + + def test_rust_trait_impl_is_inheritance(self) -> None: + backend = get_backend("rust") + code = "struct Store;\ntrait Repo {}\nimpl Repo for Store {}\n" + edges = backend.list_inheritance(code, "m.rs") + pairs = {(d.src, d.symbol) for d in edges} + self.assertIn(("Store", "Repo"), pairs) + for dep in edges: + self.assertEqual(dep.relation, "inherits") + + def test_go_without_inheritance_is_empty(self) -> None: + backend = get_backend("go") + code = "package m\n\ntype S struct{}\n" + self.assertEqual(backend.list_inheritance(code, "m.go"), []) + + +class FindMainBlockLinenoTests(unittest.TestCase): + """``find_main_block_lineno`` is the Python-only hook + ``interface_review`` will call (others get None via getattr).""" + + def setUp(self) -> None: + self.backend: PythonBackend = get_backend("python") # type: ignore + + def test_finds_main_block(self) -> None: + ln = self.backend.find_main_block_lineno(_SAMPLE_SRC) + # The ``if __name__ == "__main__":`` line in the fixture is the + # 2nd-to-last line. We don't pin it absolutely — just check it + # points at an ``if`` line in the source. + self.assertIsNotNone(ln) + line_text = _SAMPLE_SRC.splitlines()[ln - 1] + self.assertIn("__name__", line_text) + + def test_none_when_absent(self) -> None: + src = "def foo():\n return 1\n" + self.assertIsNone(self.backend.find_main_block_lineno(src)) + + def test_none_on_syntax_error(self) -> None: + self.assertIsNone(self.backend.find_main_block_lineno("def f(:")) + + def test_not_in_protocol(self) -> None: + # Documented as a Python-only hook; non-Python backends don't + # expose it. Feature detection via getattr is the contract. + self.assertFalse(hasattr(get_backend("go"), "find_main_block_lineno")) + + +class GoBackendCodeStructureTests(unittest.TestCase): + """Go backend code-structure helpers delegate to ``lang_parser``.""" + + SAMPLE_GO = """\ +package server + +import ( + "fmt" + nethttp "net/http" +) + +type Server struct { + Name string +} + +func NewServer(name string) *Server { + return &Server{Name: name} +} + +func (s *Server) Handle() { + fmt.Println(s.Name) +} +""" + + def setUp(self) -> None: + self.backend: GoBackend = get_backend("go") # type: ignore + + def test_list_code_units(self) -> None: + units = self.backend.list_code_units(self.SAMPLE_GO, "server.go") + by_name = {unit.name: unit for unit in units} + self.assertEqual(by_name["Server"].unit_type, "struct") + self.assertEqual(by_name["NewServer"].unit_type, "function") + self.assertEqual(by_name["Handle"].unit_type, "method") + self.assertEqual(by_name["Handle"].parent, "Server") + + def test_list_code_units_empty_on_syntax_error(self) -> None: + self.assertEqual(self.backend.list_code_units("func broken(\n", "bad.go"), []) + + def test_format_signature(self) -> None: + units = self.backend.list_code_units(self.SAMPLE_GO, "server.go") + by_name = {unit.name: unit for unit in units} + self.assertEqual( + self.backend.format_signature(by_name["NewServer"]), + "func NewServer(name string) *Server", + ) + self.assertEqual( + self.backend.format_signature(by_name["Handle"]), + "func (s *Server) Handle()", + ) + self.assertEqual(self.backend.format_signature(by_name["Server"]), "Server") + self.assertEqual(self.backend.format_signature(None), "") + + def test_list_imports(self) -> None: + imports = self.backend.list_imports(self.SAMPLE_GO, "server.go") + self.assertEqual([dep.dst for dep in imports], ["fmt", "net/http"]) + self.assertEqual(imports[1].extra.get("alias"), "nethttp") + + def test_list_imports_empty_on_syntax_error(self) -> None: + self.assertEqual(self.backend.list_imports("func broken(\n", "bad.go"), []) + + def test_list_inheritance_empty_for_plain_struct(self) -> None: + self.assertEqual(self.backend.list_inheritance(self.SAMPLE_GO, "server.go"), []) + + +if __name__ == "__main__": + unittest.main() diff --git a/CoderMind/scripts/decoder_lang/tests/test_phase5_prompt_directive.py b/CoderMind/scripts/decoder_lang/tests/test_phase5_prompt_directive.py new file mode 100644 index 0000000..6322ecb --- /dev/null +++ b/CoderMind/scripts/decoder_lang/tests/test_phase5_prompt_directive.py @@ -0,0 +1,84 @@ +"""Tests for language-directive preambles in LLM prompts. + +Critical regression invariant: when the target language is Python +the directive is the empty string and prompt text is unchanged. +""" +from __future__ import annotations + +import sys +import unittest +from pathlib import Path + +# Make ``scripts/`` importable for direct invocation. +_SCRIPTS_DIR = Path(__file__).resolve().parents[2] +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) + +from decoder_lang import ( # noqa: E402 + get_backend, + language_directive, + with_language_directive, +) + + +class LanguageDirectiveTests(unittest.TestCase): + """``language_directive`` produces the right preamble per language.""" + + def test_python_directive_is_empty(self) -> None: + # Critical: Python prompts must render byte-identically. + self.assertEqual(language_directive(get_backend("python")), "") + + def test_none_backend_directive_is_empty(self) -> None: + # Defensive: callers without a backend supply None and should + # see no behavioural change. + self.assertEqual(language_directive(None), "") + + def test_go_directive_mentions_go(self) -> None: + d = language_directive(get_backend("go")) + self.assertTrue(d) + self.assertIn("Go", d) + # Markdown fence reminder helps the LLM emit the right code block. + self.assertIn("```go", d) + # Extension reminder. + self.assertIn(".go", d) + # Test framework hint. + self.assertIn("go test", d) + + def test_directive_ends_with_blank_line(self) -> None: + # When a directive is emitted, it must end with a blank line + # so the system prompt body after it is visually separated. + d = language_directive(get_backend("go")) + self.assertTrue(d.endswith("\n")) + + +class WithLanguageDirectiveTests(unittest.TestCase): + """``with_language_directive`` prepends correctly + is no-op for Python.""" + + def setUp(self) -> None: + self.body = "You are a helpful assistant.\nFollow the rules." + + def test_python_returns_body_unchanged(self) -> None: + result = with_language_directive(self.body, get_backend("python")) + self.assertEqual(result, self.body) + # ``is`` check confirms no allocation either when nothing to do. + self.assertIs(result, self.body) + + def test_none_returns_body_unchanged(self) -> None: + result = with_language_directive(self.body, None) + self.assertEqual(result, self.body) + + def test_go_prepends_directive(self) -> None: + result = with_language_directive(self.body, get_backend("go")) + self.assertTrue(result.endswith(self.body)) + self.assertTrue(result.startswith("### Target language: Go")) + # The original body is preserved verbatim at the tail. + self.assertIn(self.body, result) + + def test_empty_body_handled(self) -> None: + # Edge: empty body + Go directive → just the directive. + result = with_language_directive("", get_backend("go")) + self.assertTrue(result.startswith("### Target language: Go")) + + +if __name__ == "__main__": + unittest.main() diff --git a/CoderMind/scripts/decoder_lang/tests/test_python_backend.py b/CoderMind/scripts/decoder_lang/tests/test_python_backend.py new file mode 100644 index 0000000..8e57001 --- /dev/null +++ b/CoderMind/scripts/decoder_lang/tests/test_python_backend.py @@ -0,0 +1,297 @@ +"""Tests for the decoder backend registry and Python backend contract. + +These tests focus on invariants relied on by code paths that already +route through :mod:`decoder_lang`. Unsupported methods are asserted to +raise ``NotImplementedError`` so accidental partial implementations are +visible. +""" +from __future__ import annotations + +import sys +import unittest +from pathlib import Path + +# Make ``scripts/`` importable when these tests are run directly. +_SCRIPTS_DIR = Path(__file__).resolve().parents[2] +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) + +from decoder_lang import ( # noqa: E402 + LanguageBackend, + PromptHints, + PythonBackend, + ToolchainUnavailable, + get_backend, + list_backends, + register_backend, +) +from decoder_lang.backend import resolve_target_language # noqa: E402 + + +class RegistryTests(unittest.TestCase): + """Backend registry behaviour.""" + + def test_python_backend_registered_by_default(self) -> None: + self.assertIn("python", list_backends()) + + def test_get_backend_returns_singleton(self) -> None: + a = get_backend("python") + b = get_backend("python") + self.assertIs(a, b) + + def test_unknown_language_falls_back_to_python_with_warning(self) -> None: + with self.assertLogs("decoder_lang.backend", level="WARNING") as cm: + backend = get_backend("nonexistent-language") + self.assertEqual(backend.name, "python") + self.assertTrue( + any("falling back" in msg for msg in cm.output), + f"expected fallback warning, got: {cm.output}", + ) + + def test_none_language_returns_default_silently(self) -> None: + # None is the explicit "no info" case; not a misconfiguration, + # so no warning expected. + backend = get_backend(None) + self.assertEqual(backend.name, "python") + + def test_python_backend_satisfies_protocol(self) -> None: + backend = get_backend("python") + # Runtime Protocol check confirms every required attribute exists. + self.assertIsInstance(backend, LanguageBackend) + + def test_register_backend_replaces_existing(self) -> None: + # Roundtrip: register a fake then restore. + class _FakePython(PythonBackend): + name = "python" + + try: + register_backend(_FakePython) + self.assertIsInstance(get_backend("python"), _FakePython) + finally: + register_backend(PythonBackend) + self.assertNotIsInstance(get_backend("python"), _FakePython) + + +class FileLayoutTests(unittest.TestCase): + """Behaviour-preservation for the trial-wired + ``is_source_file`` path and surrounding layout helpers.""" + + def setUp(self) -> None: + self.backend = get_backend("python") + + # --- is_source_file equivalence with old suffix check ----------- + + def test_is_source_file_accepts_py(self) -> None: + self.assertTrue(self.backend.is_source_file("foo/bar.py")) + + def test_is_source_file_rejects_non_py(self) -> None: + for path in ("README.md", "data.json", "foo.pyc", "Makefile", + "src/no_ext", "foo.PY"): + with self.subTest(path=path): + self.assertFalse(self.backend.is_source_file(path)) + + # Cross-check: ``not is_source_file(p)`` == ``Path(p).suffix != ".py"`` + # — the exact predicate the original ``static_completeness_check`` + # used. Equivalence here is what makes the trial wiring safe. + def test_is_source_file_equivalent_to_old_suffix_check(self) -> None: + from pathlib import PurePosixPath + for path in ("a.py", "a.PY", "a.pyi", "x/y.py", "Makefile", + "tests/test_x.py", "weird.py.bak"): + with self.subTest(path=path): + old = PurePosixPath(path).suffix != ".py" + new = not self.backend.is_source_file(path) + self.assertEqual(old, new, f"divergent for {path}") + + # --- is_test_file --------------------------------------------- + + def test_is_test_file_matches_pytest_conventions(self) -> None: + for path in ( + "tests/test_foo.py", + "src/pkg/tests/test_inner.py", + "test_root.py", + "foo_test.py", + ): + with self.subTest(path=path): + self.assertTrue(self.backend.is_test_file(path)) + + def test_is_test_file_rejects_regular_sources(self) -> None: + for path in ("src/pkg/core.py", "main.py", "tester.py"): + with self.subTest(path=path): + self.assertFalse(self.backend.is_test_file(path)) + + # --- package markers ----------------------------------------- + + def test_package_marker(self) -> None: + self.assertEqual(self.backend.package_marker_filename(), "__init__.py") + # Empty body matches the pre-existing skeleton emitter. + self.assertEqual(self.backend.package_marker_content("pkg/sub"), "") + + # --- identifier rules ---------------------------------------- + + def test_is_valid_module_identifier(self) -> None: + self.assertTrue(self.backend.is_valid_module_identifier("auth")) + self.assertTrue(self.backend.is_valid_module_identifier("auth_utils")) + for bad in ("", "1auth", "auth-utils", "auth utils", "class", "def"): + with self.subTest(seg=bad): + self.assertFalse(self.backend.is_valid_module_identifier(bad)) + + def test_sanitize_module_identifier_is_idempotent(self) -> None: + cases = [ + ("auth-utils", "auth_utils"), + ("1stage", "_1stage"), + ("foo bar", "foo_bar"), + ("ok_name", "ok_name"), + ("", "_"), + ] + for raw, want in cases: + with self.subTest(raw=raw): + got = self.backend.sanitize_module_identifier(raw) + self.assertEqual(got, want) + # Idempotency: a second pass changes nothing. + self.assertEqual( + self.backend.sanitize_module_identifier(got), got, + ) + + +class CodeStructureTests(unittest.TestCase): + """``has_placeholder`` + ``syntax_check`` mirror the original + semantics inside ``static_completeness_check``.""" + + def setUp(self) -> None: + self.backend = get_backend("python") + + def test_has_placeholder_true_on_todo_return(self) -> None: + code = ( + "def f():\n" + " return 'TODO: implement me'\n" + ) + self.assertTrue(self.backend.has_placeholder(code)) + + def test_has_placeholder_true_on_placeholder_marker(self) -> None: + code = "def f():\n return 'PLACEHOLDER value'\n" + self.assertTrue(self.backend.has_placeholder(code)) + + def test_has_placeholder_true_on_not_implemented_string(self) -> None: + code = "def f():\n return 'Not implemented yet'\n" + self.assertTrue(self.backend.has_placeholder(code)) + + def test_has_placeholder_false_on_normal_code(self) -> None: + code = ( + "def add(a, b):\n" + " '''A docstring mentioning TODO is fine.'''\n" + " return a + b\n" + ) + self.assertFalse(self.backend.has_placeholder(code)) + + def test_has_placeholder_false_on_non_string_return(self) -> None: + self.assertFalse(self.backend.has_placeholder("def f(): return 42")) + + def test_has_placeholder_false_on_syntax_error(self) -> None: + # Garbled source must NOT be reported as containing a placeholder. + self.assertFalse(self.backend.has_placeholder("def f(:\n pass\n")) + + def test_syntax_check(self) -> None: + ok, err = self.backend.syntax_check("x = 1\n") + self.assertTrue(ok) + self.assertIsNone(err) + ok, err = self.backend.syntax_check("def f(:\n pass\n") + self.assertFalse(ok) + self.assertIsNotNone(err) + self.assertIn("SyntaxError", err or "") + + +class StubbedMethodsTests(unittest.TestCase): + """Unsupported methods must raise instead of returning bad data.""" + + def setUp(self) -> None: + self.backend = get_backend("python") + + def test_detect_env_stub(self) -> None: + with self.assertRaises(NotImplementedError): + self.backend.detect_env(Path(".")) + + def test_ensure_env_stub(self) -> None: + with self.assertRaises(NotImplementedError): + self.backend.ensure_env(Path(".")) + + def test_test_command_stub(self) -> None: + from decoder_lang.test_result import EnvHandle + with self.assertRaises(NotImplementedError): + self.backend.test_command(EnvHandle(project_root=Path("."))) + + def test_install_deps_command_stub(self) -> None: + from decoder_lang.test_result import EnvHandle + with self.assertRaises(NotImplementedError): + self.backend.install_deps_command( + EnvHandle(project_root=Path(".")), deps=["x"], + ) + + def test_parse_test_output_stub(self) -> None: + with self.assertRaises(NotImplementedError): + self.backend.parse_test_output("foo", 0) + + +class PromptHintsTests(unittest.TestCase): + """Prompt-hint fields are populated and the instance is cached.""" + + def test_prompt_hints_shape(self) -> None: + hints = get_backend("python").prompt_hints() + self.assertIsInstance(hints, PromptHints) + self.assertEqual(hints.display_name, "Python") + self.assertEqual(hints.markdown_fence, "python") + self.assertEqual(hints.file_extension, ".py") + self.assertEqual(hints.test_framework_name, "pytest") + # Non-empty guidance strings ensure templates don't render blanks. + self.assertTrue(hints.style_directive.strip()) + self.assertTrue(hints.module_naming_rule.strip()) + self.assertTrue(hints.package_layout_example.strip()) + + def test_prompt_hints_is_cached(self) -> None: + a = get_backend("python").prompt_hints() + b = get_backend("python").prompt_hints() + self.assertIs(a, b) + + +class ResolveTargetLanguageTests(unittest.TestCase): + """Three-tier target-language fallback chain.""" + + def test_tier_1_reads_root_meta_language(self) -> None: + rpg = {"root": {"meta": {"language": "go"}}} + self.assertEqual(resolve_target_language(rpg), "go") + + def test_tier_2_uses_dominant_language_when_root_missing(self) -> None: + # Without root.meta.language, fall back to dominant_language() + # over the provided file list. Use a Python-heavy list so we + # don't depend on whatever lang_parser ships for non-Python. + result = resolve_target_language( + rpg_obj={"root": {}}, + valid_files=["a.py", "b.py", "c.py"], + ) + self.assertEqual(result, "python") + + def test_tier_3_defaults_to_python_with_warning(self) -> None: + with self.assertLogs("decoder_lang.backend", level="WARNING") as cm: + result = resolve_target_language({}, valid_files=None) + self.assertEqual(result, "python") + self.assertTrue( + any("defaulting to 'python'" in msg for msg in cm.output), + ) + + def test_handles_bad_input_gracefully(self) -> None: + # None / non-dict shouldn't crash. + with self.assertLogs("decoder_lang.backend", level="WARNING"): + self.assertEqual(resolve_target_language(None), "python") + with self.assertLogs("decoder_lang.backend", level="WARNING"): + self.assertEqual(resolve_target_language("garbage"), "python") + + +class ToolchainUnavailableTests(unittest.TestCase): + """:class:`ToolchainUnavailable` is a real exception type callers + can catch by name.""" + + def test_is_runtime_error(self) -> None: + self.assertTrue(issubclass(ToolchainUnavailable, RuntimeError)) + + +if __name__ == "__main__": + unittest.main() diff --git a/CoderMind/scripts/decoder_lang/tests/test_unit_kind.py b/CoderMind/scripts/decoder_lang/tests/test_unit_kind.py new file mode 100644 index 0000000..bb5a157 --- /dev/null +++ b/CoderMind/scripts/decoder_lang/tests/test_unit_kind.py @@ -0,0 +1,97 @@ +"""Tests for unit-name classification (callable vs type-like). + +Covers the shared :func:`decoder_lang.unit_kind.classify_unit_kind` +helper and the ``unit_kind`` / ``is_callable_unit`` backend methods. +Classification feeds orphan detection: callable units are subject to +the "no incoming edge => dead code" heuristic; type-like units are +exempt (a data structure legitimately has no incoming invocation edge). +""" +from __future__ import annotations + +import sys +import unittest +from pathlib import Path + +_SCRIPTS_DIR = Path(__file__).resolve().parents[2] +if str(_SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(_SCRIPTS_DIR)) + +from decoder_lang import get_backend, list_backends # noqa: E402 +from decoder_lang.unit_kind import classify_unit_kind # noqa: E402 + + +class TestClassifyUnitKind(unittest.TestCase): + def test_callable_prefixes(self): + for name in ("function f", "method m", "class C", "fn g", "func h"): + self.assertEqual(classify_unit_kind(name), "callable", name) + + def test_type_prefixes(self): + for name in ( + "struct S", "enum E", "interface I", "trait T", + "type Alias", "union U", "typedef Td", + ): + self.assertEqual(classify_unit_kind(name), "type", name) + + def test_unknown(self): + for name in ("", "weird X", "constant K", "noprefix"): + self.assertEqual(classify_unit_kind(name), "unknown", name) + + def test_case_insensitive(self): + self.assertEqual(classify_unit_kind("Function F"), "callable") + self.assertEqual(classify_unit_kind("STRUCT S"), "type") + + def test_custom_prefix_sets(self): + self.assertEqual( + classify_unit_kind( + "widget W", + callable_prefixes=frozenset({"widget"}), + type_prefixes=frozenset(), + ), + "callable", + ) + + +class TestBackendUnitKind(unittest.TestCase): + """Every registered backend exposes unit_kind / is_callable_unit.""" + + def test_all_backends_have_methods(self): + for name in list_backends(): + b = get_backend(name) + self.assertTrue(hasattr(b, "unit_kind"), name) + self.assertTrue(hasattr(b, "is_callable_unit"), name) + + def test_python_class_is_callable(self): + # Decision: Python class stays callable (zero regression — the + # encoder records Foo() instantiation as an invocation edge). + b = get_backend("python") + self.assertEqual(b.unit_kind("class JsonTodoStore"), "callable") + self.assertTrue(b.is_callable_unit("class JsonTodoStore")) + + def test_go_struct_is_type(self): + # The Go false-positive case: struct Store / struct PageData must + # be exempt from orphan detection. + b = get_backend("go") + self.assertEqual(b.unit_kind("struct Store"), "type") + self.assertFalse(b.is_callable_unit("struct Store")) + self.assertTrue(b.is_callable_unit("function main")) + self.assertTrue(b.is_callable_unit("method ServeHTTP")) + + def test_rust_struct_enum_are_types(self): + b = get_backend("rust") + self.assertFalse(b.is_callable_unit("struct Config")) + self.assertFalse(b.is_callable_unit("enum Command")) + self.assertTrue(b.is_callable_unit("fn main")) + + def test_typescript_interface_is_type(self): + b = get_backend("typescript") + self.assertFalse(b.is_callable_unit("interface Todo")) + self.assertTrue(b.is_callable_unit("function render")) + + def test_cpp_class_callable_struct_type(self): + b = get_backend("cpp") + self.assertTrue(b.is_callable_unit("class Evaluator")) + self.assertFalse(b.is_callable_unit("struct Token")) + + +if __name__ == "__main__": + unittest.main() diff --git a/CoderMind/tests/test_branch_name_sanitization.py b/CoderMind/tests/test_branch_name_sanitization.py new file mode 100644 index 0000000..f28496a --- /dev/null +++ b/CoderMind/tests/test_branch_name_sanitization.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +from common.git_utils import sanitize_branch_component # noqa: E402 + + +def test_trailing_dot_after_truncation_is_removed() -> None: + # The exact id that produced an invalid 'batch/...state.' ref for C++. + batch_id = "src_expression_calculator_syntax_expression_state.cpp_20260613_082726_e88325bc" + + safe = sanitize_branch_component(batch_id, max_len=50, fallback="batch") + + assert safe == "src_expression_calculator_syntax_expression_state" + assert not safe.endswith(".") + + +def test_empty_and_separator_only_values_use_fallback() -> None: + assert sanitize_branch_component("", fallback="batch") == "batch" + assert sanitize_branch_component(" ", fallback="task") == "task" + assert sanitize_branch_component("///", fallback="review") == "review" + assert sanitize_branch_component("...", fallback="batch") == "batch" + + +def test_unsafe_ref_characters_are_replaced() -> None: + assert sanitize_branch_component("unsafe@{name}") == "unsafe_name" + assert sanitize_branch_component("a b:c?d*e[f") == "a_b_c_d_e_f" + assert sanitize_branch_component("abc..def@@@ghi---jkl") == "abc_def_ghi_jkl" + + +def test_lock_suffix_is_stripped() -> None: + assert sanitize_branch_component("foo.lock") == "foo" + assert sanitize_branch_component("only.lock", fallback="batch") == "only" + + +def test_non_ascii_language_identifiers_stay_git_safe() -> None: + # Identifiers from non-English task names must still yield a valid ref. + safe = sanitize_branch_component("模块_state.go", fallback="batch") + + assert safe + assert ".." not in safe + assert not safe.endswith(".") + assert "/" not in safe + + +def test_result_is_idempotent() -> None: + once = sanitize_branch_component("Some Mixed/Name..value.lock") + twice = sanitize_branch_component(once) + + assert once == twice + + +def test_all_branch_prefixes_consume_the_shared_sanitizer() -> None: + # Guard against a future call site re-introducing ad-hoc truncation. + from code_gen import git_ops + from code_gen import subtree_review + from common import git_utils + + for module in (git_ops, subtree_review, git_utils): + source = Path(module.__file__).read_text(encoding="utf-8") + assert "sanitize_branch_component" in source diff --git a/CoderMind/tests/test_code_gen_multilingual.py b/CoderMind/tests/test_code_gen_multilingual.py new file mode 100644 index 0000000..b8038b4 --- /dev/null +++ b/CoderMind/tests/test_code_gen_multilingual.py @@ -0,0 +1,382 @@ +from __future__ import annotations + +import json +import sys +from pathlib import Path +from types import SimpleNamespace + + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +from code_gen import batch_prompts # noqa: E402 +from code_gen import context_collector # noqa: E402 +from code_gen import static_checks # noqa: E402 +from code_gen import test_runner # noqa: E402 +from common.execution_state import BatchExecutionState # noqa: E402 +from common.task_batch import PlannedTask # noqa: E402 +from decoder_lang import EnvHandle, TestRunResult as BackendTestRunResult, get_backend # noqa: E402 +import run_batch # noqa: E402 + + +def _task(file_path: str) -> PlannedTask: + return PlannedTask( + task="Implement the target unit.", + file_path=file_path, + units_key=["Unit"], + unit_to_code={"Unit": "interface code"}, + unit_to_features={"Unit": ["Feature/path"]}, + subtree="Core", + ) + + +def _state(task: PlannedTask) -> BatchExecutionState: + state = BatchExecutionState( + batch_id=task.task_id, + file_path=task.file_path, + subtree=task.subtree, + ) + state.test_prompt = "Write focused tests." + state.code_prompt = "Implement the code." + return state + + +def _set_language(monkeypatch, tmp_path: Path, language: str) -> None: + spec_path = tmp_path / "feature_spec.json" + spec_path.write_text( + json.dumps({"meta": {"primary_language": language, "target_languages": [language]}}), + encoding="utf-8", + ) + monkeypatch.setattr(batch_prompts, "FEATURE_SPEC_FILE", spec_path) + monkeypatch.setattr(batch_prompts, "REPO_RPG_FILE", tmp_path / "missing_rpg.json") + monkeypatch.setattr(static_checks, "FEATURE_SPEC_FILE", spec_path) + monkeypatch.setattr(static_checks, "REPO_RPG_FILE", tmp_path / "missing_rpg.json") + monkeypatch.setattr(test_runner, "FEATURE_SPEC_FILE", spec_path) + monkeypatch.setattr(test_runner, "REPO_RPG_FILE", tmp_path / "missing_rpg.json") + + +def test_python_codegen_prompt_keeps_pytest_command(monkeypatch, tmp_path: Path) -> None: + _set_language(monkeypatch, tmp_path, "python") + task = _task("src/app/service.py") + + prompt = batch_prompts.build_tdd_prompt(_state(task), task, tmp_path) + + assert "Language: Python" in prompt + assert "python3 -m pytest" in prompt + assert "Read `requirements.txt`" in prompt + assert "Use snake_case file and directory names" in prompt + + +def test_go_codegen_prompt_uses_go_test(monkeypatch, tmp_path: Path) -> None: + _set_language(monkeypatch, tmp_path, "go") + task = _task("internal/task/store.go") + + prompt = batch_prompts.build_tdd_prompt(_state(task), task, tmp_path) + + assert "Language: Go" in prompt + assert "go test -v ./..." in prompt + assert "Read `go.mod`" in prompt + assert "go get " in prompt + assert "python3 -m pytest" not in prompt + # Go must not be instructed to manage Python deps. requirements.txt may now + # appear only inside the explicit FORBIDDEN list, never as an instruction. + assert "Append the package to `requirements.txt`" not in prompt + assert "Update `requirements.txt`" not in prompt + + +def test_cpp_codegen_prompt_injects_cpp_context(monkeypatch, tmp_path: Path) -> None: + _set_language(monkeypatch, tmp_path, "cpp") + task = _task("src/tasklite_cli/task.cpp") + + prompt = batch_prompts.build_tdd_prompt(_state(task), task, tmp_path) + + assert "Language: C++" in prompt + assert "Source extension: `.cpp`" in prompt + assert "C++17" in prompt + # Non-Python projects get the strengthened prohibition, not the legacy line. + assert "NOT Python" in prompt + assert "Do NOT create ANY `.py` file" in prompt + assert "conftest.py" in prompt + assert "python3 -m pytest" not in prompt + + +def test_non_python_integration_prompt_uses_native_entry_point(monkeypatch, tmp_path: Path) -> None: + # Regression for the bug where every language was told "Do NOT create + # main.py", planting a Python file name into Go/JS/C projects. + _set_language(monkeypatch, tmp_path, "go") + task = PlannedTask( + task="Add the cross-module integration tests.", + file_path="", + units_key=["Core_integration_tests"], + unit_to_code={"Core_integration_tests": ""}, + unit_to_features={"Core_integration_tests": ["Feature/path"]}, + subtree="Core", + task_type="integration_test", + ) + + prompt = batch_prompts.build_tdd_prompt(_state(task), task, tmp_path) + + assert "main.go" in prompt # native entry point referenced + assert "create main.py" not in prompt # no Python file name planted + + +def test_javascript_codegen_prompt_forbids_python_files(monkeypatch, tmp_path: Path) -> None: + _set_language(monkeypatch, tmp_path, "javascript") + task = _task("src/store.js") + + prompt = batch_prompts.build_tdd_prompt(_state(task), task, tmp_path) + + assert "Language: JavaScript" in prompt + assert "npm test" in prompt + assert "Do NOT create ANY `.py` file" in prompt + assert "python3 -m pytest" not in prompt + + +def test_api_summary_uses_backend_for_non_python(monkeypatch, tmp_path: Path) -> None: + # Regression: _build_api_summary previously hardcoded the Python backend, + # so a Go/Rust/TS project's API signatures (used by test-writing batches) + # came back empty. It must resolve the project backend and render via + # backend.format_signature for non-Python. + _set_language(monkeypatch, tmp_path, "go") + (tmp_path / "internal").mkdir() + (tmp_path / "internal" / "store.go").write_text( + "package store\n\n" + "type Store struct{ path string }\n\n" + "func NewStore(path string) *Store { return &Store{path: path} }\n\n" + "func (s *Store) Save(id int) error { return nil }\n", + encoding="utf-8", + ) + + summary = batch_prompts._build_api_summary(tmp_path, ["internal/store.go"]) + + assert "internal/store.go" in summary + # Go declarations surface (not an empty Python-parsed result). + assert "Store" in summary + assert "NewStore" in summary + # No Python "def " rendering leaked in. + assert "def NewStore" not in summary + + +def test_dependency_context_base_class_summary_uses_backend(tmp_path: Path) -> None: + # Regression: _format_dependency_context previously parsed base-class code + # with the Python backend, so a Go/Rust base class surfaced as a + # "parse error — read file directly" line instead of its real + # struct/method summary. The backend must be resolved from the file path. + from code_gen import prompts # noqa: PLC0415 + + ctx = { + "base_classes": { + "base_classes": [ + { + "file_path": "internal/base.go", + "code": ( + "package store\n\n" + "type Store struct{ path string }\n\n" + "func (s *Store) Save(id int) error { return nil }\n" + ), + "subclasses": {}, + } + ] + } + } + + summary = prompts._format_dependency_context(ctx) + + assert "`Store` in `internal/base.go`" in summary + assert "Save" in summary + # The Python-backend fallback line must not appear for valid Go code. + assert "parse error" not in summary + + +def test_run_project_tests_uses_backend_command(monkeypatch, tmp_path: Path) -> None: + seen: dict[str, object] = {} + + class FakeBackend: + """Minimal backend for exercising generic test execution.""" + + name = "go" + display_name = "Go" + + def detect_env(self, repo_root: Path) -> EnvHandle: + return EnvHandle(project_root=repo_root, runtime_executable="fake-go") + + def test_command(self, env: EnvHandle) -> list[str]: + return [env.runtime_executable or "go", "test", "./..."] + + def parse_test_output(self, raw: str, exit_code: int) -> BackendTestRunResult: + return BackendTestRunResult( + status="passed" if exit_code == 0 else "failed", + exit_code=exit_code, + passed_count=1, + raw_output=raw, + ) + + class FakeProcess: + """Subprocess stand-in that records the command and succeeds.""" + + returncode = 0 + pid = 12345 + + def __init__(self, cmd, **kwargs): + seen["cmd"] = cmd + seen["cwd"] = kwargs.get("cwd") + + def communicate(self, timeout=None): + seen["timeout"] = timeout + return "ok\n", "" + + monkeypatch.setattr(test_runner.subprocess, "Popen", FakeProcess) + + result = test_runner.run_project_tests( + tmp_path, + timeout=12, + backend=FakeBackend(), + ) + + assert result.success + assert result.passed == 1 + assert seen["cmd"] == ["fake-go", "test", "./..."] + assert seen["cwd"] == tmp_path + assert seen["timeout"] == 12 + + +def test_static_completeness_uses_c_backend(monkeypatch, tmp_path: Path) -> None: + _set_language(monkeypatch, tmp_path, "c") + source = tmp_path / "src" / "task.c" + source.parent.mkdir() + source.write_text("int task_count(void) { return 0; }\n", encoding="utf-8") + + assert static_checks.static_completeness_check(["src/task.c"], tmp_path) == [] + + source.write_text("int task_count(void) { abort(); }\n", encoding="utf-8") + + issues = static_checks.static_completeness_check(["src/task.c"], tmp_path) + + assert issues == ["PLACEHOLDER: src/task.c contains placeholder code"] + + +def test_c_backend_syntax_command_includes_repo_root(tmp_path: Path) -> None: + source = tmp_path / "src" / "task.c" + source.parent.mkdir() + source.write_text('#include "src/task.h"\nint task_count(void) { return 0; }\n', encoding="utf-8") + (tmp_path / "src" / "task.h").write_text("int task_count(void);\n", encoding="utf-8") + env = EnvHandle(project_root=tmp_path, extra={"cc": "cc"}) + + command = get_backend("c").test_command(env) + + assert command[:4] == ["cc", "-std=c99", "-I", str(tmp_path)] + assert str(source) in command + + +def test_cpp_backend_syntax_command_includes_repo_root(tmp_path: Path) -> None: + source = tmp_path / "configs" / "repository_layout.cpp" + source.parent.mkdir() + source.write_text( + '#include "configs/repository_layout.hpp"\nint layout_count() { return 0; }\n', + encoding="utf-8", + ) + (tmp_path / "configs" / "repository_layout.hpp").write_text( + "int layout_count();\n", + encoding="utf-8", + ) + env = EnvHandle(project_root=tmp_path, extra={"cxx": "c++"}) + + command = get_backend("cpp").test_command(env) + + assert command[:4] == ["c++", "-std=c++17", "-I", str(tmp_path)] + assert str(source) in command + + +def test_write_interface_skeletons_keeps_c_code_unchanged(tmp_path: Path) -> None: + interfaces_path = tmp_path / "interfaces.json" + interfaces_path.write_text( + json.dumps({ + "meta": {"primary_language": "c", "target_languages": ["c"]}, + "subtrees": { + "Core": { + "interfaces": { + "src/task.c": {"file_code": "int task_count(void);\n"} + } + } + }, + }), + encoding="utf-8", + ) + repo = tmp_path / "repo" + + result = context_collector.write_interface_skeletons(interfaces_path, repo) + + assert result == {"written": ["src/task.c"], "skipped": []} + assert (repo / "src" / "task.c").read_text(encoding="utf-8") == "int task_count(void);\n" + + +def test_run_batch_skips_python_env_for_non_python(monkeypatch, tmp_path: Path) -> None: + class FakeBackend: + name = "go" + display_name = "Go" + + monkeypatch.setattr(run_batch, "resolve_test_backend", lambda *_a, **_k: FakeBackend()) + monkeypatch.setattr( + run_batch, + "ensure_dev_venv", + lambda _repo: (_ for _ in ()).throw(AssertionError("venv should not run")), + ) + monkeypatch.setattr( + run_batch, + "ensure_deps_installed", + lambda _repo: (_ for _ in ()).throw(AssertionError("deps should not run")), + ) + + run_batch._setup_codegen_environment(tmp_path) + + +def test_run_batch_keeps_python_env_setup(monkeypatch, tmp_path: Path) -> None: + calls: list[str] = [] + + class FakeBackend: + name = "python" + display_name = "Python" + + monkeypatch.setattr(run_batch, "resolve_test_backend", lambda *_a, **_k: FakeBackend()) + monkeypatch.setattr( + run_batch, + "ensure_dev_venv", + lambda _repo: (calls.append("venv") or False, tmp_path / ".venv_dev"), + ) + monkeypatch.setattr( + run_batch, + "ensure_deps_installed", + lambda _repo: calls.append("deps"), + ) + + run_batch._setup_codegen_environment(tmp_path) + + assert calls == ["venv", "deps"] + + +def test_run_batch_loop_honors_max_batches(monkeypatch) -> None: + calls: list[int] = [] + + def fake_run_batch(**_kwargs): + calls.append(len(calls) + 1) + return { + "success": True, + "type": "batch_complete", + "batch_id": f"batch-{len(calls)}", + "attempts_used": 1, + "total_duration": 0, + "stats": {"completed": len(calls), "total": 10, "failed": 0}, + } + + monkeypatch.setattr(run_batch, "run_batch", fake_run_batch) + args = SimpleNamespace( + merge_file=False, + max_units=0, + agent_timeout=1, + max_batches=2, + json=True, + ) + + assert run_batch._run_loop(args) == 0 + assert calls == [1, 2] diff --git a/CoderMind/tests/test_entry_reconciliation.py b/CoderMind/tests/test_entry_reconciliation.py new file mode 100644 index 0000000..b561ff9 --- /dev/null +++ b/CoderMind/tests/test_entry_reconciliation.py @@ -0,0 +1,195 @@ +"""Regression tests for language-agnostic entry-point reconciliation. + +Locks in the fix for the dual-``main`` bug: the program entry was placed +by TWO uncoordinated deciders — the LLM-built skeleton (free to choose a +path) and the synthetic ```` task (the backend's canonical +path). When they differed (C++ skeleton ``src/cli/main.cpp`` vs canonical +``src/main.cpp``), two ``main`` files were produced. Reconciliation was +only implemented for Go; these tests assert it now works for all 7 +languages through the ``backend.find_existing_entry`` / +``entry_point_candidates`` protocol, with no hardcoded ``backend.name`` +branch in the planner. +""" +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +_SCRIPTS = Path(__file__).resolve().parents[1] / "scripts" +if str(_SCRIPTS) not in sys.path: + sys.path.insert(0, str(_SCRIPTS)) + +from decoder_lang import get_backend # noqa: E402 +from decoder_lang.project_tasks import ProjectTaskContext # noqa: E402 + +_LANGS = ["python", "javascript", "typescript", "go", "c", "cpp", "rust"] + + +def _interfaces_with(file_path: str) -> dict: + return { + "subtrees": { + "Some Subtree": { + "interfaces": { + file_path: {"units": ["function main"], "units_to_features": {}}, + }, + }, + }, + } + + +class TestProtocolPresence: + @pytest.mark.parametrize("language", _LANGS) + def test_all_backends_implement_new_methods(self, language): + b = get_backend(language) + assert callable(b.find_existing_entry) + assert callable(b.entry_point_candidates) + assert callable(b.prepare_test_env) + + +class TestDefaultFindExistingEntry: + def test_reuses_off_canonical_entry_by_filename(self): + # The cpp case: skeleton placed main.cpp at src/cli/main.cpp, + # canonical is src/main.cpp. Filename match must reuse the former. + cpp = get_backend("cpp") + itf = _interfaces_with("src/cli/main.cpp") + assert cpp.find_existing_entry(itf) == "src/cli/main.cpp" + + def test_returns_none_when_no_entry_in_skeleton(self): + cpp = get_backend("cpp") + itf = _interfaces_with("src/calc/evaluator.cpp") # not an entry + assert cpp.find_existing_entry(itf) is None + + def test_empty_interfaces_returns_none(self): + assert get_backend("rust").find_existing_entry({}) is None + assert get_backend("c").find_existing_entry({"subtrees": {}}) is None + + @pytest.mark.parametrize( + ("language", "entry_path"), + [ + ("python", "app/main.py"), + ("javascript", "lib/index.js"), + ("typescript", "lib/index.ts"), + ("c", "src/cli/main.c"), + ("rust", "bin/main.rs"), + ], + ) + def test_each_language_reuses_off_canonical_entry(self, language, entry_path): + b = get_backend(language) + itf = _interfaces_with(entry_path) + assert b.find_existing_entry(itf) == entry_path + + +class TestGoEntryReconciliation: + def test_reuses_existing_cmd_main(self): + go = get_backend("go") + itf = _interfaces_with("cmd/todoapp/main.go") + assert go.find_existing_entry(itf) == "cmd/todoapp/main.go" + + def test_ignores_non_cmd_main_go(self): + # A main.go NOT under cmd// is not a Go command entry. + go = get_backend("go") + itf = _interfaces_with("internal/main.go") + assert go.find_existing_entry(itf) is None + + def test_no_cmd_package_returns_none(self): + go = get_backend("go") + itf = _interfaces_with("internal/store/store.go") + assert go.find_existing_entry(itf) is None + + +class TestEntryPointCandidates: + def test_go_uses_glob(self): + assert get_backend("go").entry_point_candidates() == ["cmd/*/main.go"] + + @pytest.mark.parametrize( + ("language", "expected"), + [ + ("python", "main.py"), + ("javascript", "src/index.js"), + ("typescript", "src/index.ts"), + ("c", "src/main.c"), + ("cpp", "src/main.cpp"), + ("rust", "src/main.rs"), + ], + ) + def test_fixed_path_languages(self, language, expected): + assert get_backend(language).entry_point_candidates() == [expected] + + +class TestTemplatesConsumeReconciledEntry: + @pytest.mark.parametrize( + ("language", "off_canonical"), + [ + ("javascript", "lib/index.js"), + ("typescript", "lib/index.ts"), + ("go", "cmd/todoapp/main.go"), + ("c", "src/cli/main.c"), + ("cpp", "src/cli/main.cpp"), + ("rust", "bin/main.rs"), + ], + ) + def test_main_entry_template_uses_reconciled_path(self, language, off_canonical): + # The template must reference the reconciled entry (not the + # canonical hardcoded path) and forbid a second entry file. + b = get_backend(language) + ctx = ProjectTaskContext( + repo_name="demo", + repo_info="purpose", + package_name="demo", + entry_point_path=off_canonical, + ) + templates = b.project_task_templates(ctx) + assert templates is not None + assert off_canonical in templates.main_entry + assert "extend it in place" in templates.main_entry + + @pytest.mark.parametrize( + ("language", "canonical"), + [ + ("javascript", "src/index.js"), + ("typescript", "src/index.ts"), + ("c", "src/main.c"), + ("cpp", "src/main.cpp"), + ("rust", "src/main.rs"), + ], + ) + def test_main_entry_falls_back_to_canonical_when_none(self, language, canonical): + b = get_backend(language) + ctx = ProjectTaskContext( + repo_name="demo", + repo_info="purpose", + package_name="demo", + entry_point_path=None, + ) + templates = b.project_task_templates(ctx) + assert canonical in templates.main_entry + + +class TestPrepareTestEnvNoOp: + @pytest.mark.parametrize("language", ["python", "javascript", "typescript", "go", "rust"]) + def test_no_op_for_non_compiled_cmake(self, language): + # Must not raise even with a bogus env handle. + get_backend(language).prepare_test_env(object()) + + +class TestNoLanguageNameBranchInPlanner: + """Guard: the planner's entry reconciliation must not re-introduce a + per-language ``backend.name == "go"`` branch.""" + + def test_reconciled_entry_point_path_has_no_go_branch(self): + src = (_SCRIPTS / "plan_tasks.py").read_text(encoding="utf-8") + # Locate the method body and assert it delegates to the backend. + start = src.index("def _reconciled_entry_point_path") + end = src.index("def _build_requirements_task", start) + body = src[start:end] + assert "find_existing_entry" in body + assert 'backend.name == "go"' not in body + + def test_check_code_gen_entry_has_no_go_branch(self): + src = (_SCRIPTS / "check_code_gen.py").read_text(encoding="utf-8") + # The MAIN_ENTRY artifact check must use entry_point_candidates, + # not a go-only glob branch. + assert "entry_point_candidates" in src + assert 'backend.name == "go"' not in src diff --git a/CoderMind/tests/test_feature_build.py b/CoderMind/tests/test_feature_build.py new file mode 100644 index 0000000..8258f8e --- /dev/null +++ b/CoderMind/tests/test_feature_build.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +import json +import sys +from pathlib import Path + + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +from feature_build import _load_feature_data, apply_changes # noqa: E402 + + +def test_apply_changes_promotes_dict_list_leaf_to_branch() -> None: + tree = { + "ui": { + "homepage": [ + { + "name": "render recent todos", + "description": "Render existing todo items.", + "children": [], + } + ] + } + } + + result = apply_changes( + tree, + ["ui/homepage/render recent todos/escape todo title"], + ) + + assert result["ui"]["homepage"] == { + "render recent todos": ["escape todo title"] + } + assert tree["ui"]["homepage"][0]["name"] == "render recent todos" + + +def test_apply_changes_preserves_single_key_dict_leaf_when_promoted() -> None: + tree = {"storage": {"file": [{"load dataset": []}]}} + + result = apply_changes(tree, ["storage/file/load dataset/handle corrupt json"]) + + assert result["storage"]["file"] == { + "load dataset": ["handle corrupt json"] + } + + +def test_load_feature_data_preserves_target_languages(tmp_path) -> None: + feature_spec = tmp_path / "feature_spec.json" + feature_build = tmp_path / "feature_build.json" + feature_spec.write_text( + json.dumps({ + "repository_name": "tasklite", + "repository_purpose": "Go CLI task tracker.", + "meta": { + "primary_language": "go", + "target_languages": ["go"], + }, + "functional_requirements": [], + }), + encoding="utf-8", + ) + + data = _load_feature_data(feature_build, feature_spec) + + assert data["meta"]["primary_language"] == "go" + assert data["meta"]["target_languages"] == ["go"] \ No newline at end of file diff --git a/CoderMind/tests/test_final_test_repair.py b/CoderMind/tests/test_final_test_repair.py new file mode 100644 index 0000000..eb63801 --- /dev/null +++ b/CoderMind/tests/test_final_test_repair.py @@ -0,0 +1,161 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +from code_gen import final_validation as fv # noqa: E402 +from code_gen.test_runner import TestResult as _TestResult # noqa: E402 + + +def _fail_result(output: str = "FAILED tests/test_x.py::t - assert ...") -> _TestResult: + return _TestResult( + success=False, + return_code=1, + output=output, + test_files=[], + passed=10, + failed=1, + ) + + +def _pass_result() -> _TestResult: + return _TestResult( + success=True, + return_code=0, + output="", + test_files=[], + passed=11, + failed=0, + ) + + +class _Backend: + name = "python" + display_name = "Python" + + +def _patch_common(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setattr(fv, "ensure_on_main", lambda *_a, **_k: None) + monkeypatch.setattr(fv, "GitRunner", lambda *_a, **_k: object()) + monkeypatch.setattr(fv, "resolve_test_backend", lambda *_a, **_k: _Backend()) + monkeypatch.setattr(fv, "ensure_deps_installed", lambda *_a, **_k: None) + monkeypatch.setattr(fv, "get_dev_python", lambda *_a, **_k: "python3") + # build_batch_pytest_cmd is imported lazily inside the loop. + import code_gen.batch_prompts as bp + monkeypatch.setattr(bp, "build_batch_pytest_cmd", lambda *_a, **_k: "pytest -q") + + +def test_final_test_repairs_failing_suite(monkeypatch, tmp_path): + _patch_common(monkeypatch, tmp_path) + + runs = {"n": 0} + + def fake_run_tests(*_a, **_k): + runs["n"] += 1 + # First run fails, second (post-repair) passes. + return _fail_result() if runs["n"] == 1 else _pass_result() + + dispatched = {"n": 0, "prompt": None} + + def fake_dispatch(prompt, repo_path, timeout=0, purpose=""): + dispatched["n"] += 1 + dispatched["prompt"] = prompt + dispatched["purpose"] = purpose + return "BATCH_RESULT: PASS", None + + monkeypatch.setattr(fv, "run_project_tests", fake_run_tests) + monkeypatch.setattr(fv, "dispatch_sub_agent", fake_dispatch) + # Skip smoke step on the success path for this unit test. + monkeypatch.setattr(fv, "save_stage_result", lambda *_a, **_k: None) + monkeypatch.setitem(sys.modules, "smoke_test", type(sys)("smoke_test")) + sys.modules["smoke_test"].run_smoke_test = lambda: type( + "S", (), {"findings": [], "to_dict": lambda self: {"findings": [], "error_count": 0}} + )() + + out = fv.final_test(repo_path=tmp_path, max_repair_iters=2) + + assert out["success"] is True + assert out["final_test_repair_attempts"] == 1 + assert out["final_test_repaired"] is True + assert dispatched["n"] == 1 + assert dispatched["purpose"] == "final_test_repair" + assert "Do NOT delete, skip, or weaken any test." in dispatched["prompt"] + + +def test_final_test_fails_loudly_on_zero_tests_executed(monkeypatch, tmp_path): + # A no-op final test (the go-test-found-no-packages case): exit-0 but zero + # tests executed. It must fail with a clear diagnostic and must NOT dispatch + # a code-repair agent (which cannot fix a "no tests ran" state). + _patch_common(monkeypatch, tmp_path) + + def fake_run_tests(*_a, **_k): + return _TestResult( + success=False, return_code=0, output="", + test_files=[], passed=0, failed=0, + ) + + dispatched = {"n": 0} + + def fake_dispatch(*_a, **_k): + dispatched["n"] += 1 + return "BATCH_RESULT: PASS", None + + monkeypatch.setattr(fv, "run_project_tests", fake_run_tests) + monkeypatch.setattr(fv, "dispatch_sub_agent", fake_dispatch) + monkeypatch.setattr(fv, "save_stage_result", lambda *_a, **_k: None) + + out = fv.final_test(repo_path=tmp_path, max_repair_iters=2) + + assert out["success"] is False + assert out["no_tests_executed"] is True + assert dispatched["n"] == 0 + + +def test_final_test_repair_bounded_when_still_failing(monkeypatch, tmp_path): + _patch_common(monkeypatch, tmp_path) + + def fake_run_tests(*_a, **_k): + return _fail_result() # always fails + + dispatched = {"n": 0} + + def fake_dispatch(prompt, repo_path, timeout=0, purpose=""): + dispatched["n"] += 1 + return "BATCH_RESULT: PASS", None + + monkeypatch.setattr(fv, "run_project_tests", fake_run_tests) + monkeypatch.setattr(fv, "dispatch_sub_agent", fake_dispatch) + monkeypatch.setattr(fv, "save_stage_result", lambda *_a, **_k: None) + + out = fv.final_test(repo_path=tmp_path, max_repair_iters=2) + + assert out["success"] is False + # Bounded: exactly max_repair_iters dispatches, no infinite loop. + assert dispatched["n"] == 2 + assert out["final_test_repair_attempts"] == 2 + assert out["final_test_repaired"] is False + + +def test_final_test_no_repair_when_first_pass(monkeypatch, tmp_path): + _patch_common(monkeypatch, tmp_path) + monkeypatch.setattr(fv, "run_project_tests", lambda *_a, **_k: _pass_result()) + monkeypatch.setattr(fv, "save_stage_result", lambda *_a, **_k: None) + monkeypatch.setitem(sys.modules, "smoke_test", type(sys)("smoke_test")) + sys.modules["smoke_test"].run_smoke_test = lambda: type( + "S", (), {"findings": [], "to_dict": lambda self: {"findings": [], "error_count": 0}} + )() + + def fake_dispatch(*_a, **_k): + raise AssertionError("repair must not be dispatched when tests pass") + + monkeypatch.setattr(fv, "dispatch_sub_agent", fake_dispatch) + + out = fv.final_test(repo_path=tmp_path, max_repair_iters=2) + + assert out["success"] is True + assert "final_test_repair_attempts" not in out diff --git a/CoderMind/tests/test_init_codebase_gitignore.py b/CoderMind/tests/test_init_codebase_gitignore.py new file mode 100644 index 0000000..215422a --- /dev/null +++ b/CoderMind/tests/test_init_codebase_gitignore.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +"""Tests for ``init_codebase.create_gitignore`` dev-env coverage. + +A fixture- or hand-authored ``.gitignore`` may already carry ``.cmind/`` and a +Python cache block while predating the throwaway-venv rules. The updater must +still append ``.venv_dev/`` so codegen scratch environments are never committed. +""" + +import os +import sys + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +import init_codebase + + +def _read(path): + return path.read_text(encoding="utf-8") + + +class TestCreateGitignoreDevEnv: + def test_greenfield_includes_venv_dev(self, tmp_path): + changed = init_codebase.create_gitignore(tmp_path) + assert changed is True + content = _read(tmp_path / ".gitignore") + assert ".venv_dev/" in content + assert ".cmind/" in content + assert "__pycache__/" in content + + def test_cmind_and_python_present_but_missing_venv_dev_appends_dev_env(self, tmp_path): + # Mirrors the fixture-shipped gitignore that fooled the old detection. + gi = tmp_path / ".gitignore" + gi.write_text( + "build/\n*.o\n.cmind/\n__pycache__/\n*.py[cod]\n", + encoding="utf-8", + ) + changed = init_codebase.create_gitignore(tmp_path) + assert changed is True + content = _read(gi) + assert ".venv_dev/" in content + assert ".cmind_dev_env/" in content + # Existing user entries are preserved. + assert "build/" in content + assert "*.o" in content + # The full CoderMind block is not duplicated (only the dev-env subset). + assert content.count(".cmind/") == 1 + + def test_fully_configured_is_noop(self, tmp_path): + gi = tmp_path / ".gitignore" + gi.write_text( + ".cmind/\n__pycache__/\n.venv_dev/\n.cmind_dev_env/\n", + encoding="utf-8", + ) + changed = init_codebase.create_gitignore(tmp_path) + assert changed is False + + def test_dev_env_detection_accepts_unslashed_form(self, tmp_path): + gi = tmp_path / ".gitignore" + gi.write_text( + ".cmind/\n__pycache__/\n.venv_dev\n", + encoding="utf-8", + ) + changed = init_codebase.create_gitignore(tmp_path) + assert changed is False + + def test_idempotent_after_dev_env_append(self, tmp_path): + gi = tmp_path / ".gitignore" + gi.write_text(".cmind/\n__pycache__/\n", encoding="utf-8") + assert init_codebase.create_gitignore(tmp_path) is True + # Second run sees venv_dev now present → no further change. + assert init_codebase.create_gitignore(tmp_path) is False diff --git a/CoderMind/tests/test_interface_coverage.py b/CoderMind/tests/test_interface_coverage.py new file mode 100644 index 0000000..9639663 --- /dev/null +++ b/CoderMind/tests/test_interface_coverage.py @@ -0,0 +1,493 @@ +from __future__ import annotations + +import importlib.util +import json +import sys +from pathlib import Path + +_REPO = Path(__file__).resolve().parents[1] +_SCRIPTS = _REPO / "scripts" + +if str(_SCRIPTS) not in sys.path: + sys.path.insert(0, str(_SCRIPTS)) + +from func_design.interface_agent import GlobalInterfaceRegistry, InterfaceOrchestrator +from decoder_lang import get_backend as get_backend_for + +_SPEC = importlib.util.spec_from_file_location( + "design_interfaces_script", + _SCRIPTS / "design_interfaces.py", +) +assert _SPEC is not None and _SPEC.loader is not None +design_interfaces = importlib.util.module_from_spec(_SPEC) +sys.modules["design_interfaces_script"] = design_interfaces +_SPEC.loader.exec_module(design_interfaces) + + +def test_file_coverage_success_requires_all_features() -> None: + coverage = InterfaceOrchestrator._new_coverage_status() + InterfaceOrchestrator._record_file_coverage( + coverage_status=coverage, + subtree_name="Task Store", + file_node={ + "path": "src/store.ts", + "feature_paths": ["Task Store/Add", "Task Store/List"], + }, + result={ + "units": ["class TaskStore"], + "units_to_features": { + "class TaskStore": ["Task Store/Add", "Task Store/List"], + }, + }, + ) + + assert coverage["expected_files"] == 1 + assert coverage["successful_files"] == 1 + assert coverage["covered_features"] == 2 + assert coverage["missing_features"] == 0 + assert coverage["issues"] == [] + + +def test_file_coverage_records_partial_result() -> None: + coverage = InterfaceOrchestrator._new_coverage_status() + InterfaceOrchestrator._record_file_coverage( + coverage_status=coverage, + subtree_name="Task Store", + file_node={ + "path": "src/store.rs", + "feature_paths": ["Task Store/Add", "Task Store/List"], + }, + result={ + "units": ["struct TaskStore"], + "units_to_features": {"struct TaskStore": ["Task Store/Add"]}, + }, + ) + + assert coverage["successful_files"] == 0 + assert coverage["covered_features"] == 1 + assert coverage["missing_features"] == 1 + assert coverage["failed_files"] == ["src/store.rs"] + assert coverage["issues"] == [ + { + "subtree": "Task Store", + "file_path": "src/store.rs", + "reason": "missing features", + "missing_features": ["Task Store/List"], + } + ] + + +def test_build_result_marks_coverage_issues_unsuccessful() -> None: + coverage = InterfaceOrchestrator._new_coverage_status() + InterfaceOrchestrator._record_missing_subtree(coverage, "CLI") + orchestrator = InterfaceOrchestrator( + llm_client=object(), + target_language="typescript", + ) + + result = orchestrator._build_result({}, ["CLI"], {}, coverage) + + assert result["success"] is False + assert result["coverage"]["missing_subtrees"] == ["CLI"] + + +def test_design_interfaces_main_fails_on_incomplete_coverage( + tmp_path: Path, + monkeypatch, +) -> None: + skeleton_path = tmp_path / "skeleton.json" + data_flow_path = tmp_path / "data_flow.json" + base_classes_path = tmp_path / "base_classes.json" + output_path = tmp_path / "interfaces.json" + skeleton_path.write_text(json.dumps({"root": {"type": "directory", "children": []}})) + data_flow_path.write_text(json.dumps({})) + base_classes_path.write_text(json.dumps({})) + + class FakeDesigner: + def __init__(self, *args, **kwargs): + pass + + def build(self, skeleton, data_flow, base_classes): + return { + "success": False, + "subtrees": {}, + "subtree_order": [], + "coverage": { + "issues": [ + { + "subtree": "CLI", + "file_path": "src/main.ts", + "reason": "no units", + "missing_features": ["CLI/Run"], + } + ] + }, + } + + def print_summary(self, result): + pass + + monkeypatch.setattr(design_interfaces, "InterfaceDesigner", FakeDesigner) + monkeypatch.setattr( + sys, + "argv", + [ + "design_interfaces.py", + "--skeleton", + str(skeleton_path), + "--data-flow", + str(data_flow_path), + "--base-classes", + str(base_classes_path), + "--output", + str(output_path), + "--no-trajectory", + ], + ) + + assert design_interfaces.main() == 1 + saved = json.loads(output_path.read_text()) + assert saved["success"] is False + + +def _callable_by_prefix(unit_name: str) -> bool: + return unit_name.split(" ", 1)[0] in {"function", "method", "class"} + + +def test_global_review_reconciles_retained_orphans() -> None: + # A single isolated callable unit (no edges) that the orphan review + # explicitly RETAINED must not fail the verdict. + interfaces_data = { + "subtrees": { + "App": { + "interfaces": { + "src/app.py": { + "units": ["function main"], + "units_to_features": {"function main": ["App/run"]}, + } + } + } + } + } + global_review = { + "feature_orphans_count": 1, + "orphan_units_count": 1, + "blocking_unapplied_fixes_count": 0, + "passed": False, + } + + design_interfaces._finalize_global_review_verdict( + global_review=global_review, + interfaces_data=interfaces_data, + enhanced_data_flow={"invocation_edges": []}, + entry_points=[], + is_callable=_callable_by_prefix, + retained_keys={"src/app.py::function main"}, + ) + + assert global_review["passed"] is True + assert global_review["orphan_units_count"] == 0 + assert global_review["feature_orphans_count"] == 0 + + +def test_global_review_keeps_unresolved_orphans_failing() -> None: + # Two isolated callable units; only one is retained, so the other + # remains an orphan and the verdict stays failing. + interfaces_data = { + "subtrees": { + "App": { + "interfaces": { + "src/app.py": { + "units": ["function main", "function unused"], + "units_to_features": { + "function main": ["App/run"], + "function unused": ["App/unused"], + }, + } + } + } + } + } + global_review = { + "feature_orphans_count": 2, + "orphan_units_count": 2, + "blocking_unapplied_fixes_count": 0, + "passed": False, + } + + design_interfaces._finalize_global_review_verdict( + global_review=global_review, + interfaces_data=interfaces_data, + enhanced_data_flow={"invocation_edges": []}, + entry_points=[], + is_callable=_callable_by_prefix, + retained_keys={"src/app.py::function main"}, + ) + + assert global_review["passed"] is False + assert global_review["orphan_units_count"] == 1 + assert global_review["orphan_units_count"] == 1 + assert global_review["feature_orphans_count"] == 1 + assert global_review["unresolved_orphan_units"] == ["src/app.py::function unused"] + + +def _store_skeleton_and_interfaces(): + """A skeleton feature missing from interfaces, plus its file block.""" + skeleton = { + "root": { + "type": "directory", + "name": "root", + "path": ".", + "children": [ + { + "type": "file", + "name": "schema.js", + "path": "src/store/schema.js", + "feature_paths": [ + "Data/schema/define store structure", + "Data/schema/define todo object schema", + ], + } + ], + } + } + interfaces = { + "subtrees": { + "Data": { + "interfaces": { + "src/store/schema.js": { + "units": ["function parseTodoRecord"], + "units_to_features": { + "function parseTodoRecord": [ + "Data/schema/define store structure" + ] + }, + } + } + } + } + } + return skeleton, interfaces + + +def test_backfill_attributes_missing_feature() -> None: + skeleton, interfaces = _store_skeleton_and_interfaces() + audit = design_interfaces.backfill_uncovered_features(skeleton, interfaces) + + # The orphan feature is attributed to the file's existing unit. + assert len(audit["backfilled"]) == 1 + assert audit["backfilled"][0]["feature"] == "Data/schema/define todo object schema" + assert audit["backfilled"][0]["file_path"] == "src/store/schema.js" + assert audit["unbackfilled"] == [] + + covered = design_interfaces._collect_interface_features(interfaces) + assert "Data/schema/define todo object schema" in covered + # Coverage now equals the skeleton (the bench consistency gate passes). + assert design_interfaces.collect_skeleton_features(skeleton) - covered == set() + + +def test_backfill_noop_when_fully_covered() -> None: + skeleton, interfaces = _store_skeleton_and_interfaces() + # Pre-attribute the missing feature so nothing is uncovered. + u2f = interfaces["subtrees"]["Data"]["interfaces"]["src/store/schema.js"]["units_to_features"] + u2f["function parseTodoRecord"].append("Data/schema/define todo object schema") + + audit = design_interfaces.backfill_uncovered_features(skeleton, interfaces) + assert audit["backfilled"] == [] + assert audit["unbackfilled"] == [] + + +def test_backfill_reports_unbackfillable_when_file_absent() -> None: + skeleton, interfaces = _store_skeleton_and_interfaces() + # Remove the interface file block so the feature has nowhere to attach. + interfaces["subtrees"]["Data"]["interfaces"] = {} + + audit = design_interfaces.backfill_uncovered_features(skeleton, interfaces) + assert audit["backfilled"] == [] + reasons = {item["reason"] for item in audit["unbackfilled"]} + assert reasons == {"file not in interfaces"} + + +def test_restore_completed_subtrees_reuses_only_complete_prefix(tmp_path: Path) -> None: + output_path = tmp_path / "interfaces.json" + output_path.write_text(json.dumps({ + "subtrees": { + "Core": { + "files_order": ["core.go"], + "interfaces": { + "core.go": { + "file_code": "package core\n\ntype Core struct{}\n", + "units": ["struct Core"], + "units_to_features": {"struct Core": ["Core/run"]}, + } + }, + }, + "Store": { + "files_order": ["store.go"], + "interfaces": {}, + }, + } + })) + skeleton = { + "root": { + "type": "directory", + "children": [ + {"type": "file", "path": "core.go", "feature_paths": ["Core/run"]}, + {"type": "file", "path": "store.go", "feature_paths": ["Store/load"]}, + ], + } + } + orchestrator = InterfaceOrchestrator( + llm_client=object(), + output_path=str(output_path), + target_language="go", + ) + all_interfaces = {} + implemented_subtrees = {} + coverage = InterfaceOrchestrator._new_coverage_status() + registry = GlobalInterfaceRegistry() + + restored = orchestrator._restore_completed_subtrees( + skeleton=skeleton, + subtree_order=["Core", "Store"], + all_interfaces=all_interfaces, + implemented_subtrees=implemented_subtrees, + coverage_status=coverage, + global_registry=registry, + ) + + assert restored == {"Core"} + assert list(all_interfaces) == ["Core"] + assert implemented_subtrees["Core"][0]["path"] == "core.go" + assert coverage["expected_features"] == 1 + assert coverage["covered_features"] == 1 + + +def test_subtree_complete_allows_cross_file_feature_mapping() -> None: + file_nodes = [ + {"path": "cmd/main.go", "feature_paths": ["CLI/run"]}, + {"path": "cmd/usage.go", "feature_paths": ["CLI/help"]}, + ] + file_container = { + "cmd/main.go": { + "units": ["function Run"], + "units_to_features": {"function Run": ["CLI/run", "CLI/help"]}, + }, + "cmd/usage.go": {"units": [], "units_to_features": {}}, + } + + assert InterfaceOrchestrator._subtree_interfaces_complete( + file_nodes, + file_container, + ) + + +# --------------------------------------------------------------------------- +# Global interface review — multilingual (G4 stage 1) +# --------------------------------------------------------------------------- + +class _StubLLM: + """Minimal LLMClient stand-in (review fixes are applied directly).""" + + +def _make_reviewer(language: str): + from func_design.interface_review import InterfaceReviewer + + return InterfaceReviewer(llm_client=_StubLLM(), target_language=language) + + +def test_apply_fixes_add_dependency_is_language_agnostic() -> None: + reviewer = _make_reviewer("go") + enhanced_data_flow: dict = {"invocation_edges": []} + registry = GlobalInterfaceRegistry(backend=get_backend_for("go")) + + stats = reviewer._apply_fixes( + fixes=[{ + "action": "add_dependency", + "file_path": "internal/cli.go", + "unit_name": "function Run", + "calls_to_add": [ + {"callee": "NewStore", "callee_file": "internal/store.go"}, + ], + }], + interfaces_data={"subtrees": {}}, + enhanced_data_flow=enhanced_data_flow, + global_registry=registry, + ) + + assert stats["applied_edges"] == 1 + assert stats["unapplied"] == [] + assert enhanced_data_flow["invocation_edges"][0]["callee"] == "NewStore" + + +def test_apply_fixes_skips_add_interface_for_non_python() -> None: + reviewer = _make_reviewer("go") + registry = GlobalInterfaceRegistry(backend=get_backend_for("go")) + interfaces_data = { + "subtrees": { + "Core": { + "interfaces": { + "internal/cli.go": {"units": [], "units_to_features": {}, "file_code": ""}, + } + } + } + } + + stats = reviewer._apply_fixes( + fixes=[{ + "action": "add_interface", + "file_path": "internal/cli.go", + "unit_name": "function Run", + "signature": "func Run() error", + "docstring": "Run the CLI.", + "feature_path": "CLI/run", + }], + interfaces_data=interfaces_data, + enhanced_data_flow={"invocation_edges": []}, + global_registry=registry, + skeleton_features={"CLI/run"}, + rpg_features={"CLI/run"}, + ) + + # add_interface is skipped for non-Python and NOT counted as unapplied, + # so the review can still pass on structural grounds. + assert stats["applied_fixes"] == 0 + assert stats["applied_edges"] == 0 + assert stats["unapplied"] == [] + # No Go stub was injected into the interface file. + cli = interfaces_data["subtrees"]["Core"]["interfaces"]["internal/cli.go"] + assert cli["units"] == [] + + +def test_apply_fixes_applies_add_interface_for_python() -> None: + reviewer = _make_reviewer("python") + registry = GlobalInterfaceRegistry(backend=get_backend_for("python")) + interfaces_data = { + "subtrees": { + "Core": { + "interfaces": { + "src/cli.py": {"units": [], "units_to_features": {}, "file_code": ""}, + } + } + } + } + + stats = reviewer._apply_fixes( + fixes=[{ + "action": "add_interface", + "file_path": "src/cli.py", + "unit_name": "function run", + "signature": "def run() -> None:", + "docstring": "Run the CLI.", + "feature_path": "CLI/run", + }], + interfaces_data=interfaces_data, + enhanced_data_flow={"invocation_edges": []}, + global_registry=registry, + skeleton_features={"CLI/run"}, + rpg_features={"CLI/run"}, + ) + + assert stats["applied_fixes"] == 1 + cli = interfaces_data["subtrees"]["Core"]["interfaces"]["src/cli.py"] + assert "function run" in cli["units"] diff --git a/CoderMind/tests/test_lang_parser_c.py b/CoderMind/tests/test_lang_parser_c.py new file mode 100644 index 0000000..cb713aa --- /dev/null +++ b/CoderMind/tests/test_lang_parser_c.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +"""Tests for the C language parser.""" + +import os +import sys +import textwrap + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from lang_parser import parse_file, validate_syntax + + +C_SOURCE = textwrap.dedent( + """\ + #include "math_utils.h" + #include + + struct Point { int x; int y; }; + + static int add(int a, int b) { + return a + b; + } + + int run(void) { + return add(1, 2); + } + """ +) + + +def _unit_map(result): + return {(unit.unit_type, unit.name): unit for unit in result.units} + + +class TestCParser: + def test_extracts_includes_struct_and_functions(self): + result = parse_file("src/math.c", C_SOURCE) + assert result.file_path == "src/math.c" + assert result.language == "c" + assert result.syntax_error is None + + units = _unit_map(result) + assert ("import", "math_utils.h") in units + assert ("import", "string.h") in units + assert units[("import", "math_utils.h")].extra["include_style"] == "quote" + assert units[("import", "string.h")].extra["include_style"] == "angle" + assert ("struct", "Point") in units + assert ("function", "add") in units + assert ("function", "run") in units + + def test_units_preserve_language_and_line_metadata(self): + result = parse_file("src/math.c", C_SOURCE) + assert result.units + for unit in result.units: + assert unit.language == "c" + assert unit.line_start is not None + assert unit.line_end is not None + assert unit.extra["language"] == "c" + assert unit.extra["line_start"] == unit.line_start + assert unit.extra["line_end"] == unit.line_end + + def test_dependencies_are_recorded_for_includes_and_invokes(self): + result = parse_file("src/math.c", C_SOURCE) + imports = [dep for dep in result.dependencies if dep.relation == "imports"] + assert [(dep.dst, dep.extra["include_style"]) for dep in imports] == [ + ("math_utils.h", "quote"), + ("string.h", "angle"), + ] + + invokes = [dep for dep in result.dependencies if dep.relation == "invokes"] + assert [(dep.src, dep.symbol, dep.dst, dep.extra["call_kind"]) for dep in invokes] == [ + ("src/math.c:run", "add", "add", "direct"), + ] + + def test_builtin_calls_are_not_emitted_as_invokes(self): + source = textwrap.dedent( + """\ + #include + + int run(void) { + printf("hello"); + return 0; + } + """ + ) + result = parse_file("src/main.c", source) + invokes = [dep for dep in result.dependencies if dep.relation == "invokes"] + assert invokes == [] + + def test_invalid_source_returns_syntax_error_without_crashing(self): + result = parse_file("bad.c", "int broken(\n") + assert result.language == "c" + assert result.syntax_error is not None + valid, error = validate_syntax("bad.c", "int broken(\n") + assert valid is False + assert error is not None diff --git a/CoderMind/tests/test_lang_parser_cpp.py b/CoderMind/tests/test_lang_parser_cpp.py new file mode 100644 index 0000000..f953a32 --- /dev/null +++ b/CoderMind/tests/test_lang_parser_cpp.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +"""Tests for the C++ language parser.""" + +import os +import sys +import textwrap + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from lang_parser import parse_file, validate_syntax + + +CPP_SOURCE = textwrap.dedent( + """\ + #include "model.hpp" + #include + + class Widget { + public: + Widget(); + int value() const { return helper(); } + private: + int helper() const { return 1; } + }; + + int Widget::make() { + Widget* widget = new Widget(); + return value(); + } + + int run() { + return Widget::make(); + } + """ +) + + +def _unit_map(result): + return {(unit.unit_type, unit.name, unit.parent): unit for unit in result.units} + + +class TestCppParser: + def test_extracts_includes_class_methods_and_functions(self): + result = parse_file("src/model.cpp", CPP_SOURCE) + assert result.file_path == "src/model.cpp" + assert result.language == "cpp" + assert result.syntax_error is None + + units = _unit_map(result) + assert ("import", "model.hpp", None) in units + assert ("import", "vector", None) in units + assert ("class", "Widget", None) in units + assert ("method", "Widget", "Widget") in units + assert ("method", "value", "Widget") in units + assert ("method", "helper", "Widget") in units + assert ("method", "make", "Widget") in units + assert ("function", "run", None) in units + + def test_dependencies_include_constructor_static_and_direct_calls(self): + result = parse_file("src/model.cpp", CPP_SOURCE) + invokes = [dep for dep in result.dependencies if dep.relation == "invokes"] + observed = {(dep.src, dep.symbol, dep.dst, dep.extra["call_kind"]) for dep in invokes} + assert ("src/model.cpp:Widget.value", "helper", "helper", "direct") in observed + assert ("src/model.cpp:Widget.make", "Widget", "Widget", "constructor") in observed + assert ("src/model.cpp:Widget.make", "value", "value", "direct") in observed + assert ("src/model.cpp:run", "make", "Widget", "static") in observed + + def test_units_preserve_language_and_line_metadata(self): + result = parse_file("src/model.cpp", CPP_SOURCE) + assert result.units + for unit in result.units: + assert unit.language == "cpp" + assert unit.line_start is not None + assert unit.line_end is not None + assert unit.extra["language"] == "cpp" + assert unit.extra["line_start"] == unit.line_start + assert unit.extra["line_end"] == unit.line_end + + def test_struct_definition_is_class_like_unit(self): + source = textwrap.dedent( + """\ + struct Packet { + int size() const { return 1; } + }; + """ + ) + result = parse_file("include/packet.hpp", source) + units = _unit_map(result) + assert ("struct", "Packet", None) in units + assert ("method", "size", "Packet") in units + + def test_invalid_source_returns_syntax_error_without_crashing(self): + result = parse_file("bad.cpp", "class Broken {\n") + assert result.language == "cpp" + assert result.syntax_error is not None + valid, error = validate_syntax("bad.cpp", "class Broken {\n") + assert valid is False + assert error is not None diff --git a/CoderMind/tests/test_lang_parser_fallback.py b/CoderMind/tests/test_lang_parser_fallback.py new file mode 100644 index 0000000..a169bb3 --- /dev/null +++ b/CoderMind/tests/test_lang_parser_fallback.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""Tests for the fallback delimiter/syntax scanner. + +The scanner in ``lang_parser.extractors.fallback`` is only exercised when a +tree-sitter backend is unavailable, so these cases call it directly to lock in +its handling of block comments, char literals, and Rust lifetimes — historical +sources of spurious "Unterminated string literal" errors. +""" + +import os +import sys + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from lang_parser.extractors.fallback import delimiter_syntax_error + + +class TestDelimiterSyntaxError: + def test_apostrophe_in_block_comment_is_not_an_error(self): + src = "/* the store's document — doesn't crash */\nint main(void){return 0;}\n" + assert delimiter_syntax_error(src) is None + + def test_apostrophe_in_line_comment_is_not_an_error(self): + src = "// user's request — see spec\nint x = 1;\n" + assert delimiter_syntax_error(src) is None + + def test_valid_char_literals_are_not_errors(self): + src = "char c = 'a';\nchar n = '\\n';\nchar q = '\\'';\n" + assert delimiter_syntax_error(src) is None + + def test_rust_lifetimes_are_not_unterminated_strings(self): + src = "fn foo<'a>(x: &'a str) -> &'a str { x }\n" + assert delimiter_syntax_error(src) is None + + def test_rust_static_lifetime_is_not_an_error(self): + src = 'static S: &\'static str = "hi";\n' + assert delimiter_syntax_error(src) is None + + def test_multiline_block_comment_with_quotes_is_not_an_error(self): + src = "/*\n * The store's codec — handles \"json\" persistence\n */\nint x = 1;\n" + assert delimiter_syntax_error(src) is None + + def test_unterminated_block_comment_is_flagged(self): + src = "/* never closed\nint x = 1;\n" + assert delimiter_syntax_error(src) == "Unterminated block comment" + + def test_brace_imbalance_is_flagged(self): + src = "int main(void){ return 0;\n" + assert delimiter_syntax_error(src) is not None + + def test_unmatched_closing_delimiter_is_flagged(self): + src = "int main(void){ return 0; }}\n" + assert delimiter_syntax_error(src) is not None + + def test_balanced_source_is_clean(self): + src = "int add(int a, int b){ return a + b; }\n" + assert delimiter_syntax_error(src) is None diff --git a/CoderMind/tests/test_lang_parser_go.py b/CoderMind/tests/test_lang_parser_go.py new file mode 100644 index 0000000..e4c529b --- /dev/null +++ b/CoderMind/tests/test_lang_parser_go.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +"""Tests for the Go language parser.""" + +import os +import sys +import textwrap + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from lang_parser import parse_file, validate_syntax + + +GO_SOURCE = textwrap.dedent( + """\ + package server + + import ( + "fmt" + nethttp "net/http" + ) + + type Server struct { + Name string + } + + func NewServer(name string) *Server { + return &Server{Name: name} + } + + func (s *Server) Handle() { + fmt.Println(s.Name) + } + """ +) + + +def _unit_map(result): + return {(unit.unit_type, unit.name): unit for unit in result.units} + + +class TestGoParser: + def test_extracts_package_import_function_struct_and_receiver_method(self): + result = parse_file("internal/server/main.go", GO_SOURCE) + assert result.file_path == "internal/server/main.go" + assert result.language == "go" + assert result.syntax_error is None + + units = _unit_map(result) + assert ("package", "server") in units + assert ("import", "fmt") in units + assert ("import", "net/http") in units + assert ("struct", "Server") in units + assert ("function", "NewServer") in units + assert ("method", "Handle") in units + assert units[("method", "Handle")].parent == "Server" + + def test_units_preserve_language_and_line_metadata(self): + result = parse_file("main.go", GO_SOURCE) + assert result.units + for unit in result.units: + assert unit.language == "go" + assert unit.line_start is not None + assert unit.line_end is not None + assert unit.extra["language"] == "go" + assert unit.extra["line_start"] == unit.line_start + assert unit.extra["line_end"] == unit.line_end + + def test_dependencies_are_recorded_for_imports(self): + result = parse_file("main.go", GO_SOURCE) + imports = [dep for dep in result.dependencies if dep.relation == "imports"] + assert [dep.dst for dep in imports] == ["fmt", "net/http"] + + def test_invokes_include_same_package_direct_and_imported_selector_calls(self): + source = textwrap.dedent( + """\ + package app + + import "github.com/example/project/constraints" + + func Run() { + AllC() + constraints.Check() + } + + func AllC() bool { + return true + } + """ + ) + result = parse_file("cmd/app/app.go", source) + + invokes = [dep for dep in result.dependencies if dep.relation == "invokes"] + assert [(dep.symbol, dep.dst, dep.extra["call_kind"]) for dep in invokes] == [ + ("AllC", "AllC", "direct"), + ("Check", "github.com/example/project/constraints", "selector"), + ] + assert invokes[1].extra["qualifier"] == "constraints" + assert invokes[1].extra["module"] == "github.com/example/project/constraints" + + def test_generic_functions_and_receiver_methods_are_parsed(self): + source = textwrap.dedent( + """\ + package collections + + func All[T any](items []T) bool { + return AllC(items) + } + + func AllC[T any](items []T) bool { + return true + } + + func (s Set[T]) Add(value T) {} + func (s *Set[T]) Remove(value T) {} + func (s Set[T]) Map[U any](f func(T) U) []U { return nil } + """ + ) + result = parse_file("collections/set.go", source) + + units = _unit_map(result) + assert ("function", "All") in units + assert ("function", "AllC") in units + assert ("method", "Add") in units + assert ("method", "Remove") in units + assert ("method", "Map") in units + assert units[("method", "Add")].parent == "Set" + assert units[("method", "Remove")].parent == "Set" + assert units[("method", "Map")].parent == "Set" + + def test_invalid_source_returns_syntax_error_without_crashing(self): + result = parse_file("bad.go", "package main\nfunc broken(\n") + assert result.language == "go" + assert result.syntax_error is not None + valid, error = validate_syntax("bad.go", "package main\nfunc broken(\n") + assert valid is False + assert error is not None diff --git a/CoderMind/tests/test_lang_parser_javascript.py b/CoderMind/tests/test_lang_parser_javascript.py new file mode 100644 index 0000000..c4b18d3 --- /dev/null +++ b/CoderMind/tests/test_lang_parser_javascript.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +"""Tests for the JavaScript language parser.""" + +import os +import sys +import textwrap + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from lang_parser import parse_file, validate_syntax + + +JS_SOURCE = textwrap.dedent( + """\ + import fs from "fs"; + + export class Loader { + load(path) { + return fs.readFileSync(path, "utf8"); + } + + static create() { + return new Loader(); + } + } + + export function makeLoader() { + return Loader.create(); + } + + const normalizePath = (path) => path.trim(); + """ +) + + +def _unit_map(result): + return {(unit.unit_type, unit.name): unit for unit in result.units} + + +class TestJavaScriptParser: + def test_extracts_imports_classes_functions_and_methods(self): + result = parse_file("src/loader.js", JS_SOURCE) + assert result.file_path == "src/loader.js" + assert result.language == "javascript" + assert result.syntax_error is None + + units = _unit_map(result) + assert ("import", "fs") in units + assert ("class", "Loader") in units + assert ("method", "load") in units + assert ("method", "create") in units + assert ("function", "makeLoader") in units + assert ("function", "normalizePath") in units + assert units[("method", "load")].parent == "Loader" + + def test_jsx_extension_uses_javascript_language(self): + result = parse_file("src/view.jsx", "import React from 'react';\nexport function View() { return
; }\n") + assert result.language == "javascript" + assert any(unit.unit_type == "function" and unit.name == "View" for unit in result.units) + + def test_units_preserve_language_and_line_metadata(self): + result = parse_file("src/loader.js", JS_SOURCE) + assert result.units + for unit in result.units: + assert unit.language == "javascript" + assert unit.line_start is not None + assert unit.line_end is not None + assert unit.extra["language"] == "javascript" + assert unit.extra["line_start"] == unit.line_start + assert unit.extra["line_end"] == unit.line_end + + def test_invalid_source_returns_syntax_error_without_crashing(self): + result = parse_file("bad.js", "export function broken(\n") + assert result.language == "javascript" + assert result.syntax_error is not None + valid, error = validate_syntax("bad.js", "export function broken(\n") + assert valid is False + assert error is not None diff --git a/CoderMind/tests/test_lang_parser_python_parity.py b/CoderMind/tests/test_lang_parser_python_parity.py new file mode 100644 index 0000000..b4c3e89 --- /dev/null +++ b/CoderMind/tests/test_lang_parser_python_parity.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +""" +Tests for PythonParser and ParsedFile parity with the existing AST semantics. +""" + +import ast +import inspect +import os +import sys +import textwrap +from types import SimpleNamespace +from typing import Optional + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from lang_parser import parse_file, validate_syntax +from lang_parser.python_parser import PythonParser +from rpg.code_unit import CodeSnippetBuilder, CodeUnit, ParsedFile + + +SAMPLE_CODE = textwrap.dedent( + """\ + import os + from pathlib import Path as P + + CONSTANT = 1 + typed_value: int = 2 + + def top_function(x: int = 1) -> int: + return x + CONSTANT + + async def fetch_data(): + return None + + class Example(Base): + class_attr = "value" + typed_attr: str = "typed" + + def __init__(self, value): + self.value = value + + async def run(self): + return self.value + """ +) + + +def _extract_assignment_name(node) -> Optional[str]: + if isinstance(node, ast.Assign): + if node.targets and isinstance(node.targets[0], ast.Name): + return node.targets[0].id + elif isinstance(node, ast.AnnAssign): + if isinstance(node.target, ast.Name): + return node.target.id + return None + + +def _legacy_units(code: str, file_path: str) -> list[CodeUnit]: + tree = ast.parse(code) + units: list[CodeUnit] = [] + for node in tree.body: + if isinstance(node, (ast.Import, ast.ImportFrom)): + units.append(CodeUnit(ast.unparse(node).strip(), node, "import", file_path)) + elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + units.append(CodeUnit(node.name, node, "function", file_path)) + elif isinstance(node, ast.ClassDef): + units.append(CodeUnit(node.name, node, "class", file_path)) + for sub_node in node.body: + if isinstance(sub_node, (ast.FunctionDef, ast.AsyncFunctionDef)): + units.append(CodeUnit(sub_node.name, sub_node, "method", file_path, parent=node.name)) + elif isinstance(sub_node, (ast.Assign, ast.AnnAssign)): + units.append( + CodeUnit( + _extract_assignment_name(sub_node), + sub_node, + "assignment", + file_path, + parent=node.name, + ) + ) + elif isinstance(node, (ast.Assign, ast.AnnAssign)): + units.append(CodeUnit(_extract_assignment_name(node), node, "assignment", file_path)) + return units + + +def _unit_summary(units): + return [ + ( + unit.name, + unit.unit_type, + unit.parent, + unit.lineno, + unit.end_lineno, + unit.unparse(), + ) + for unit in units + ] + + +def _lp_summary(units): + return [ + ( + unit.name, + unit.unit_type, + unit.parent, + unit.line_start, + unit.line_end, + unit.code, + unit.language, + ) + for unit in units + ] + + +class TestPythonParserResult: + def test_valid_python_parser_result(self): + result = PythonParser().parse_file("pkg/sample.py", SAMPLE_CODE) + assert result.file_path == "pkg/sample.py" + assert result.language == "python" + assert result.syntax_error is None + + unit_types = [unit.unit_type for unit in result.units] + assert unit_types == [ + "import", + "import", + "assignment", + "assignment", + "function", + "function", + "class", + "assignment", + "assignment", + "method", + "method", + ] + assert all(unit.language == "python" for unit in result.units) + assert all(unit.line_start is not None and unit.line_end is not None for unit in result.units) + assert result.units[0].code == "import os" + assert result.units[1].code == "from pathlib import Path as P" + assert result.units[4].code.startswith("def top_function") + assert result.units[6].code.startswith("class Example") + assert len(result.dependencies) == 2 + assert [dependency.relation for dependency in result.dependencies] == ["imports", "imports"] + + def test_public_parse_file_matches_parser(self): + direct = PythonParser().parse_file("pkg/sample.py", SAMPLE_CODE) + public = parse_file("pkg/sample.py", SAMPLE_CODE) + assert _lp_summary(public.units) == _lp_summary(direct.units) + + def test_invalid_python_parser_result(self): + invalid_code = "def broken(\n" + result = PythonParser().parse_file("bad.py", invalid_code) + assert result.file_path == "bad.py" + assert result.language == "python" + assert result.syntax_error is not None + assert result.units == [] + assert result.dependencies == [] + assert validate_syntax("bad.py", invalid_code)[0] is False + + def test_validate_syntax_matches_ast_parse(self): + assert PythonParser().validate_syntax("ok.py", SAMPLE_CODE) == (True, None) + invalid_code = "def broken(\n" + parser_valid, parser_error = PythonParser().validate_syntax("bad.py", invalid_code) + try: + ast.parse(invalid_code) + except SyntaxError as exc: + ast_error = str(exc) + else: + ast_error = None + assert parser_valid is False + assert parser_error == ast_error + + +class TestParsedFileParity: + def test_constructor_signature_is_unchanged(self): + signature = inspect.signature(ParsedFile.__init__) + assert list(signature.parameters) == ["self", "code", "file_path"] + assert signature.parameters["code"].annotation is str + assert signature.parameters["file_path"].annotation is str + + def test_parsed_file_units_match_legacy_extraction(self): + parsed = ParsedFile(SAMPLE_CODE, "pkg/sample.py") + legacy = _legacy_units(SAMPLE_CODE, "pkg/sample.py") + assert parsed.has_error() is False + assert isinstance(parsed.tree, ast.Module) + assert _unit_summary(parsed.units) == _unit_summary(legacy) + + def test_parsed_file_queries_match_legacy_extraction(self): + parsed = ParsedFile(SAMPLE_CODE, "pkg/sample.py") + legacy = _legacy_units(SAMPLE_CODE, "pkg/sample.py") + legacy_by_name = {unit.name: unit for unit in legacy if unit.name is not None} + + for name in ["CONSTANT", "typed_value", "top_function", "fetch_data", "Example", "__init__", "run"]: + parsed_unit = parsed.get_unit_by_name(name) + assert parsed_unit is not None + assert parsed_unit.unit_type == legacy_by_name[name].unit_type + assert parsed_unit.parent == legacy_by_name[name].parent + + assert [unit.name for unit in parsed.get_units_by_type("method")] == ["__init__", "run"] + assert [unit.name for unit in parsed.get_units_by_type("assignment")] == [ + "CONSTANT", + "typed_value", + "class_attr", + "typed_attr", + ] + + def test_snippet_and_count_line_behavior_match_legacy_extraction(self): + path = "pkg/sample.py" + parsed = ParsedFile(SAMPLE_CODE, path) + legacy_units = _legacy_units(SAMPLE_CODE, path) + + parsed_function = parsed.get_unit_by_name("top_function") + legacy_function = next(unit for unit in legacy_units if unit.name == "top_function") + assert parsed_function.count_lines(original=True, return_code=True) == legacy_function.count_lines( + original=True, + return_code=True, + ) + assert parsed_function.count_lines(original=False, return_code=True) == legacy_function.count_lines( + original=False, + return_code=True, + ) + + parsed_builder = CodeSnippetBuilder({path: SAMPLE_CODE}, {path: parsed}) + legacy_builder = CodeSnippetBuilder({path: SAMPLE_CODE}, {path: SimpleNamespace(units=legacy_units)}) + assert parsed_builder.generate_code_snippet(SAMPLE_CODE, parsed.units) == legacy_builder.generate_code_snippet( + SAMPLE_CODE, + legacy_units, + ) + + def test_invalid_python_matches_existing_error_behavior(self): + parsed = ParsedFile("def broken(\n", "bad.py") + assert parsed.has_error() is True + assert isinstance(parsed.error, SyntaxError) + assert isinstance(parsed.tree, ast.Module) + assert parsed.tree.body == [] + assert parsed.units == [] diff --git a/CoderMind/tests/test_lang_parser_registry.py b/CoderMind/tests/test_lang_parser_registry.py new file mode 100644 index 0000000..a95f827 --- /dev/null +++ b/CoderMind/tests/test_lang_parser_registry.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +""" +Tests for the language parser registry. +""" + +import ast +import os +import sys +from dataclasses import is_dataclass +from pathlib import Path + +import pytest + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +import lang_parser +from lang_parser import ( + BaseLanguageParser, + LPFileResult, + NotSupported, + detect_language, + get_config, + get_config_for_path, + get_parser, + get_parser_for_file, + is_supported_source, + is_test_file, + markdown_fence_for_path, + parse_file, + validate_syntax, +) + + +class TestLangParserRegistry: + def test_import_and_public_api_exports(self): + assert lang_parser.detect_language is detect_language + assert lang_parser.parse_file is parse_file + assert lang_parser.validate_syntax is validate_syntax + + def test_python_config_lookup(self): + config = get_config("python") + assert is_dataclass(config) + assert config.name == "python" + assert config.display_name == "Python" + assert config.extensions == (".py",) + assert config.markdown_fence == "python" + assert config.tree_sitter_language is None + assert config.module_path_style == "python" + assert config.default_test_command == ("uv", "run", "pytest") + + @pytest.mark.parametrize( + ("language", "extensions", "fence", "tree_sitter_language", "style"), + [ + ("go", (".go",), "go", "go", "go"), + ("typescript", (".ts", ".tsx"), "typescript", "typescript", "node"), + ("javascript", (".js", ".jsx"), "javascript", "javascript", "node"), + ("c", (".c", ".h"), "c", "c", "c"), + ("cpp", (".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx"), "cpp", "cpp", "c"), + ("rust", (".rs",), "rust", "rust", "rust"), + ], + ) + def test_multilingual_config_lookup(self, language, extensions, fence, tree_sitter_language, style): + config = get_config(language) + assert is_dataclass(config) + assert config.name == language + assert config.extensions == extensions + assert config.markdown_fence == fence + assert config.tree_sitter_language == tree_sitter_language + assert config.module_path_style == style + assert config.source_globs + assert config.test_globs + + def test_unknown_config_raises(self): + with pytest.raises(NotSupported): + get_config("ruby") + + def test_detects_supported_paths(self): + assert detect_language("x.py") == "python" + assert detect_language("pkg/module.py") == "python" + assert detect_language("./nested/pkg/module.py") == "python" + assert detect_language("pkg/module.py:SomeClass.method") == "python" + assert detect_language("main.go") == "go" + assert detect_language("src/app.ts") == "typescript" + assert detect_language("src/app.tsx") == "typescript" + assert detect_language("src/app.js") == "javascript" + assert detect_language("src/app.jsx") == "javascript" + assert detect_language("src/math.c") == "c" + assert detect_language("include/math_utils.h") == "c" + assert detect_language("src/model.cpp") == "cpp" + assert detect_language("include/model.hpp") == "cpp" + assert detect_language("src/main.rs") == "rust" + assert detect_language("crates/foo/src/lib.rs") == "rust" + + def test_unsupported_paths_are_not_supported_source(self): + unsupported = [ + "README.md", + "notes.txt", + "Makefile", + "pkg/module", + "src/app.java", + ] + for path in unsupported: + assert detect_language(path) is None + assert get_config_for_path(path) is None + assert get_parser_for_file(path) is None + assert is_supported_source(path) is False + + def test_supported_source_includes_phase_b_languages(self): + supported = [ + "main.py", + "pkg/core.py", + "pkg/core.py:helper", + "main.go", + "src/app.ts", + "src/app.tsx", + "src/app.js", + "src/app.jsx", + "src/math.c", + "include/math_utils.h", + "src/model.cpp", + "include/model.hpp", + "src/main.rs", + "crates/foo/src/lib.rs", + ] + for path in supported: + assert is_supported_source(path) is True + + def test_multilingual_test_file_detection(self): + test_files = [ + "tests/test_example.py", + "pkg/foo_test.py", + "src/test_utils.py", + "testing/helpers.py", + "server/server_test.go", + "tests/helper.go", + "src/foo.test.ts", + "src/foo.spec.ts", + "src/foo.test.tsx", + "src/foo.spec.tsx", + "src/foo.test.js", + "src/foo.spec.js", + "src/foo.test.jsx", + "src/foo.spec.jsx", + "src/__tests__/helper.ts", + "src/tests/helper.js", + "server/server_test.c", + "tests/helper.c", + "src/app_test.cpp", + "tests/helper.cpp", + "tests/helper.rs", + "crates/foo/tests/integration.rs", + "examples/demo.rs", + "benches/bench.rs", + ] + for path in test_files: + assert is_test_file(path) is True + + def test_test_file_detection_avoids_false_positives_and_unsupported(self): + assert is_test_file("src/contest.py") is False + assert is_test_file("src/core.py") is False + assert is_test_file("src/testimonial.ts") is False + assert is_test_file("src/protest.js") is False + assert is_test_file("tests/readme.md") is False + + def test_parser_lookup(self): + for language, path in [ + ("python", "pkg/mod.py"), + ("go", "main.go"), + ("typescript", "src/app.ts"), + ("javascript", "src/app.js"), + ("c", "src/math.c"), + ("cpp", "src/model.cpp"), + ("rust", "src/main.rs"), + ]: + parser = get_parser(language) + assert isinstance(parser, BaseLanguageParser) + assert get_parser_for_file(path) is parser + with pytest.raises(NotSupported): + get_parser("ruby") + + def test_parse_file_public_api_python(self): + result = parse_file("pkg/mod.py", "import os\n\nx = 1\n") + assert isinstance(result, LPFileResult) + assert result.file_path == "pkg/mod.py" + assert result.language == "python" + assert result.syntax_error is None + assert [unit.unit_type for unit in result.units] == ["import", "assignment"] + + def test_validate_syntax_public_api(self): + assert validate_syntax("pkg/mod.py", "x = 1\n") == (True, None) + valid, error = validate_syntax("pkg/mod.py", "def broken(\n") + assert valid is False + assert error is not None + unsupported_valid, unsupported_error = validate_syntax("README.md", "text") + assert unsupported_valid is False + assert "Unsupported source file" in unsupported_error + + def test_parse_file_unsupported_raises(self): + with pytest.raises(NotSupported): + parse_file("README.md", "# docs\n") + + def test_markdown_fence(self): + assert markdown_fence_for_path("x.py") == "python" + assert markdown_fence_for_path("main.go") == "go" + assert markdown_fence_for_path("src/app.ts") == "typescript" + assert markdown_fence_for_path("src/app.tsx") == "typescript" + assert markdown_fence_for_path("src/app.js") == "javascript" + assert markdown_fence_for_path("src/app.jsx") == "javascript" + assert markdown_fence_for_path("src/math.c") == "c" + assert markdown_fence_for_path("include/model.hpp") == "cpp" + assert markdown_fence_for_path("src/main.rs") == "rust" + assert markdown_fence_for_path("README.md") == "text" + + def test_no_top_level_grammar_package_imports(self): + forbidden = { + "tree_sitter_go", + "tree_sitter_typescript", + "tree_sitter_javascript", + "tree_sitter_c", + "tree_sitter_cpp", + "tree_sitter_rust", + "tree_sitter_language_pack", + "tree_sitter_languages", + } + parser_root = Path(_project_root) / "scripts" / "lang_parser" + for path in parser_root.rglob("*.py"): + tree = ast.parse(path.read_text()) + for node in tree.body: + if isinstance(node, ast.Import): + imported = {alias.name.split(".")[0] for alias in node.names} + elif isinstance(node, ast.ImportFrom) and node.module: + imported = {node.module.split(".")[0]} + else: + continue + assert imported.isdisjoint(forbidden), f"{path} imports {imported & forbidden} at module scope" diff --git a/CoderMind/tests/test_lang_parser_rust.py b/CoderMind/tests/test_lang_parser_rust.py new file mode 100644 index 0000000..0fa1a07 --- /dev/null +++ b/CoderMind/tests/test_lang_parser_rust.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +"""Tests for the Rust language parser.""" + +import os +import sys +import textwrap + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from lang_parser import parse_file, validate_syntax + + +RUST_SOURCE = textwrap.dedent( + """\ + use crate::error::GwsError; + use std::collections::HashMap; + + pub struct Client { + url: String, + } + + pub enum Status { + Ok, + Error(String), + } + + pub trait Provider { + fn get(&self) -> String; + } + + pub fn build_client() -> Result { + todo!() + } + + impl Client { + pub fn new(url: String) -> Self { + Client { url } + } + + pub fn fetch(&self) -> Status { + build_client().unwrap(); + Status::Ok + } + } + """ +) + + +def _unit_map(result): + return {(unit.unit_type, unit.name): unit for unit in result.units} + + +class TestRustParser: + def test_extracts_struct_enum_trait_function_and_methods(self): + result = parse_file("src/client.rs", RUST_SOURCE) + assert result.file_path == "src/client.rs" + assert result.language == "rust" + assert result.syntax_error is None + + units = _unit_map(result) + assert ("import", "crate::error::GwsError") in units + assert ("import", "std::collections::HashMap") in units + assert ("struct", "Client") in units + assert ("enum", "Status") in units + assert ("trait", "Provider") in units + assert ("function", "build_client") in units + assert ("method", "new") in units + assert ("method", "fetch") in units + assert units[("method", "new")].parent == "Client" + assert units[("method", "fetch")].parent == "Client" + + def test_trait_methods_are_parented_to_trait(self): + result = parse_file("src/client.rs", RUST_SOURCE) + trait_methods = [unit for unit in result.units if unit.unit_type == "method" and unit.parent == "Provider"] + assert [unit.name for unit in trait_methods] == ["get"] + + def test_dependencies_are_recorded_for_use_declarations(self): + result = parse_file("src/client.rs", RUST_SOURCE) + imports = [dep for dep in result.dependencies if dep.relation == "imports"] + assert [(dep.dst, dep.extra["import_kind"]) for dep in imports] == [ + ("crate::error::GwsError", "rust_use"), + ("std::collections::HashMap", "rust_use"), + ] + + def test_invokes_include_direct_calls_but_not_macros_or_enum_variants(self): + result = parse_file("src/client.rs", RUST_SOURCE) + invokes = [dep for dep in result.dependencies if dep.relation == "invokes"] + invoke_keys = {(dep.src, dep.symbol, dep.extra["call_kind"]) for dep in invokes} + assert ("src/client.rs:Client.fetch", "build_client", "direct") in invoke_keys + assert all(dep.symbol != "todo" for dep in invokes) + assert all(dep.symbol != "Ok" for dep in invokes) + + def test_mod_decl_produces_import_unit_and_dependency(self): + result = parse_file("src/lib.rs", "mod error;\npub mod services;\n") + units = _unit_map(result) + assert ("import", "error") in units + assert ("import", "services") in units + imports = [dep for dep in result.dependencies if dep.relation == "imports"] + assert [(dep.dst, dep.extra["import_kind"]) for dep in imports] == [ + ("error", "rust_mod_decl"), + ("services", "rust_mod_decl"), + ] + + def test_grouped_use_imports_expand_to_multiple_units(self): + result = parse_file("src/lib.rs", "use crate::foo::{A, B};\n") + imports = [unit.name for unit in result.units if unit.unit_type == "import"] + assert imports == ["crate::foo::A", "crate::foo::B"] + + def test_trait_impl_emits_high_confidence_inherits_dependency(self): + source = textwrap.dedent( + """\ + pub trait Provider { + fn get(&self) -> String; + } + + pub struct Client; + + impl Provider for Client { + fn get(&self) -> String { + String::new() + } + } + """ + ) + result = parse_file("src/client.rs", source) + inherits = [dep for dep in result.dependencies if dep.relation == "inherits"] + assert len(inherits) == 1 + assert inherits[0].src == "Client" + assert inherits[0].dst == "Provider" + assert inherits[0].confidence == "high" + + def test_units_preserve_language_and_line_metadata(self): + result = parse_file("src/client.rs", RUST_SOURCE) + assert result.units + for unit in result.units: + assert unit.language == "rust" + assert unit.line_start is not None + assert unit.line_end is not None + assert unit.extra["language"] == "rust" + assert unit.extra["line_start"] == unit.line_start + assert unit.extra["line_end"] == unit.line_end + + def test_invalid_source_returns_syntax_error_without_crashing(self): + result = parse_file("bad.rs", "pub fn broken(\n") + assert result.language == "rust" + assert result.syntax_error is not None + valid, error = validate_syntax("bad.rs", "pub fn broken(\n") + assert valid is False + assert error is not None diff --git a/CoderMind/tests/test_lang_parser_typescript.py b/CoderMind/tests/test_lang_parser_typescript.py new file mode 100644 index 0000000..7172dda --- /dev/null +++ b/CoderMind/tests/test_lang_parser_typescript.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +"""Tests for the TypeScript language parser.""" + +import os +import sys +import textwrap + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from lang_parser import parse_file, validate_syntax + + +TS_SOURCE = textwrap.dedent( + """\ + import { User } from "./models"; + + export class Greeter { + constructor(private user: User) {} + + greet(name: string): string { + return `hello ${name}`; + } + } + + export function makeGreeter(user: User): Greeter { + return new Greeter(user); + } + + export const formatName = (name: string): string => { + return name.trim(); + }; + """ +) + + +def _unit_map(result): + return {(unit.unit_type, unit.name): unit for unit in result.units} + + +class TestTypeScriptParser: + def test_extracts_imports_classes_functions_and_methods(self): + result = parse_file("src/greeter.ts", TS_SOURCE) + assert result.file_path == "src/greeter.ts" + assert result.language == "typescript" + assert result.syntax_error is None + + units = _unit_map(result) + assert ("import", "./models") in units + assert ("class", "Greeter") in units + assert ("method", "constructor") in units + assert ("method", "greet") in units + assert ("function", "makeGreeter") in units + assert ("function", "formatName") in units + assert units[("method", "greet")].parent == "Greeter" + + def test_tsx_extension_uses_typescript_language(self): + result = parse_file("src/component.tsx", "import React from 'react';\nexport function View() { return
; }\n") + assert result.language == "typescript" + assert any(unit.unit_type == "function" and unit.name == "View" for unit in result.units) + + def test_consecutive_semicolonless_imports_stay_separate(self): + source = textwrap.dedent( + """\ + import { A } from "./a" + import B from "./b" + export { C } from "./c" + """ + ) + result = parse_file("src/app.ts", source) + + import_units = [unit for unit in result.units if unit.unit_type == "import"] + assert [unit.extra["module"] for unit in import_units] == ["./a", "./b", "./c"] + assert [dep.dst for dep in result.dependencies if dep.relation == "imports"] == ["./a", "./b", "./c"] + + def test_multiline_import_stays_single_dependency(self): + source = textwrap.dedent( + """\ + import { + A, + B, + } from "./types" + import { C } from "./c" + """ + ) + result = parse_file("src/app.ts", source) + + import_units = [unit for unit in result.units if unit.unit_type == "import"] + assert [unit.extra["module"] for unit in import_units] == ["./types", "./c"] + assert import_units[0].line_start == 1 + assert import_units[0].line_end == 4 + + def test_invokes_include_imported_function_and_constructor_calls(self): + source = textwrap.dedent( + """\ + import { getDebugOption } from "./debug" + import { ChromeRemote } from "./remote" + + export function boot() { + getDebugOption(); + return new ChromeRemote(); + } + """ + ) + result = parse_file("src/app.ts", source) + + invokes = [dep for dep in result.dependencies if dep.relation == "invokes"] + assert [(dep.symbol, dep.extra["module"], dep.extra["call_kind"]) for dep in invokes] == [ + ("getDebugOption", "./debug", "function"), + ("ChromeRemote", "./remote", "constructor"), + ] + + def test_default_exported_class_and_function_units_are_marked(self): + class_result = parse_file("src/local.ts", "export default class ActualClass {}\n") + class_units = _unit_map(class_result) + assert ("class", "ActualClass") in class_units + assert class_units[("class", "ActualClass")].extra["export_default"] is True + + function_result = parse_file("src/factory.ts", "export default function createActual() { return true; }\n") + function_units = _unit_map(function_result) + assert ("function", "createActual") in function_units + assert function_units[("function", "createActual")].extra["export_default"] is True + + def test_units_preserve_language_and_line_metadata(self): + result = parse_file("src/greeter.ts", TS_SOURCE) + assert result.units + for unit in result.units: + assert unit.language == "typescript" + assert unit.line_start is not None + assert unit.line_end is not None + assert unit.extra["language"] == "typescript" + assert unit.extra["line_start"] == unit.line_start + assert unit.extra["line_end"] == unit.line_end + + def test_invalid_source_returns_syntax_error_without_crashing(self): + result = parse_file("bad.ts", "export function broken(\n") + assert result.language == "typescript" + assert result.syntax_error is not None + valid, error = validate_syntax("bad.ts", "export function broken(\n") + assert valid is False + assert error is not None diff --git a/CoderMind/tests/test_multilingual_code_unit.py b/CoderMind/tests/test_multilingual_code_unit.py new file mode 100644 index 0000000..76b156d --- /dev/null +++ b/CoderMind/tests/test_multilingual_code_unit.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +"""Tests for multilingual ParsedFile and CodeSnippetBuilder behavior.""" + +import os +import sys +from unittest.mock import patch + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from rpg.code_unit import CodeSnippetBuilder, ParsedFile + + +def test_parsed_file_handles_go_without_ast_parse_crash(): + code = "package main\n\nimport \"fmt\"\n\ntype Server struct {}\nfunc (s *Server) Handle() { fmt.Println(\"ok\") }\n" + parsed = ParsedFile(code, "main.go") + + assert parsed.has_error() is False + # Go struct is normalised to ``class`` so semantic_parsing.py's + # class-vs-function grouping picks it up; the original kind is kept + # in ``extra['lp_kind']`` for callers that need the raw taxonomy. + assert [(unit.unit_type, unit.name, unit.parent) for unit in parsed.units] == [ + ("package", "main", None), + ("import", "fmt", None), + ("class", "Server", None), + ("method", "Handle", "Server"), + ] + struct_unit = parsed.get_unit_by_name("Server") + assert struct_unit.extra["lp_kind"] == "struct" + method = parsed.get_unit_by_name("Handle") + assert method.lineno == 6 + assert method.end_lineno == 6 + assert method.extra["language"] == "go" + + +def test_snippet_builder_uses_go_fence_and_skips_ast_parse_for_go(): + path = "main.go" + code = "package main\n\nimport \"fmt\"\n\ntype Server struct {}\nfunc (s *Server) Handle() { fmt.Println(\"ok\") }\n" + parsed = ParsedFile(code, path) + builder = CodeSnippetBuilder({path: code}, {path: parsed}) + units = [unit for unit in parsed.units if unit.unit_type in {"class", "method"}] + + with patch("ast.parse", side_effect=AssertionError("ast.parse should not run for Go")): + snippet = builder.build(units) + + assert snippet.startswith("```go") + assert "type Server struct" in snippet + assert "func (s *Server) Handle" in snippet + + +def test_snippet_builder_uses_typescript_and_javascript_fences(): + ts_path = "src/app.ts" + ts_code = "import { x } from './x';\nexport function run(): number { return x; }\n" + js_path = "src/app.jsx" + js_code = "import React from 'react';\nexport function View() { return
; }\n" + ts_parsed = ParsedFile(ts_code, ts_path) + js_parsed = ParsedFile(js_code, js_path) + builder = CodeSnippetBuilder( + {ts_path: ts_code, js_path: js_code}, + {ts_path: ts_parsed, js_path: js_parsed}, + ) + + assert builder.build(ts_parsed.units).startswith("```typescript") + assert builder.build(js_parsed.units).startswith("```javascript") + + +def test_python_snippet_behavior_still_uses_python_fence(): + path = "pkg/mod.py" + code = "import os\n\ndef helper():\n return os.getcwd()\n" + parsed = ParsedFile(code, path) + builder = CodeSnippetBuilder({path: code}, {path: parsed}) + snippet = builder.build(parsed.units) + + assert snippet.startswith("```python") + assert "def helper" in snippet diff --git a/CoderMind/tests/test_multilingual_dep_graph.py b/CoderMind/tests/test_multilingual_dep_graph.py new file mode 100644 index 0000000..03febe5 --- /dev/null +++ b/CoderMind/tests/test_multilingual_dep_graph.py @@ -0,0 +1,702 @@ +#!/usr/bin/env python3 +"""Tests for multilingual DependencyGraph parsing.""" + +import os +import sys +import textwrap + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from rpg import EdgeType, NodeType +from rpg.dep_graph import DependencyGraph, _exclude_irrelevant_for_parse + + +def _parse_repo(repo_path): + graph = DependencyGraph(str(repo_path)) + graph.build() + graph.parse() + return graph + + +def _edge_attrs(graph, src, dst, edge_type): + edge_data = graph.G.get_edge_data(src, dst, default={}) + return [attrs for attrs in edge_data.values() if attrs.get("type") == edge_type] + + +def test_parse_filter_accepts_supported_sources_and_rejects_tests(): + assert _exclude_irrelevant_for_parse("cmd/server/main.go") is True + assert _exclude_irrelevant_for_parse("src/app.ts") is True + assert _exclude_irrelevant_for_parse("src/view.tsx") is True + assert _exclude_irrelevant_for_parse("src/math.c") is True + assert _exclude_irrelevant_for_parse("include/math_utils.h") is True + assert _exclude_irrelevant_for_parse("src/model.cpp") is True + assert _exclude_irrelevant_for_parse("include/model.hpp") is True + assert _exclude_irrelevant_for_parse("src/main.rs") is True + assert _exclude_irrelevant_for_parse("crates/foo/src/lib.rs") is True + assert _exclude_irrelevant_for_parse("src/config.json") is False + assert _exclude_irrelevant_for_parse("pkg/server/server_test.go") is False + assert _exclude_irrelevant_for_parse("src/app.test.ts") is False + assert _exclude_irrelevant_for_parse("tests/test_main.py") is False + assert _exclude_irrelevant_for_parse("server/server_test.c") is False + assert _exclude_irrelevant_for_parse("tests/helper.cpp") is False + assert _exclude_irrelevant_for_parse("tests/helper.rs") is False + assert _exclude_irrelevant_for_parse("examples/demo.rs") is False + + +def test_go_graph_structure_and_receiver_containment(tmp_path): + source = textwrap.dedent( + """\ + package server + + import "fmt" + + type Server struct { + Name string + } + + func NewServer(name string) *Server { + return &Server{Name: name} + } + + func (s *Server) Handle() { + fmt.Println(s.Name) + } + """ + ) + server_dir = tmp_path / "internal" / "server" + server_dir.mkdir(parents=True) + (server_dir / "server.go").write_text(source) + + graph = _parse_repo(tmp_path) + + file_id = "internal/server/server.go" + struct_id = f"{file_id}:Server" + function_id = f"{file_id}:NewServer" + method_id = f"{file_id}:Server.Handle" + + assert graph.G.nodes[file_id]["language"] == "go" + assert graph.G.nodes[file_id]["unit_type"] == "file" + assert graph.G.nodes[struct_id]["type"] == NodeType.CLASS + assert graph.G.nodes[struct_id]["unit_type"] == "struct" + assert graph.G.nodes[struct_id]["language"] == "go" + assert graph.G.nodes[function_id]["type"] == NodeType.FUNCTION + assert graph.G.nodes[function_id]["language"] == "go" + assert graph.G.nodes[method_id]["type"] == NodeType.METHOD + assert graph.G.nodes[method_id]["language"] == "go" + assert graph.G.nodes[method_id]["receiver_type"] == "Server" + assert graph.G.nodes[method_id]["code"].startswith("func (s *Server) Handle") + + assert _edge_attrs(graph, file_id, struct_id, EdgeType.CONTAINS) + assert _edge_attrs(graph, struct_id, method_id, EdgeType.CONTAINS) + assert any( + attrs.get("type") == NodeType.IMPORT and attrs.get("language") == "go" + for _, attrs in graph.G.nodes(data=True) + ) + assert any( + attrs.get("type") == NodeType.PACKAGE and attrs.get("language") == "go" + for _, attrs in graph.G.nodes(data=True) + ) + + +def test_typescript_graph_structure_and_resolvable_import_edge(tmp_path): + src = tmp_path / "src" + src.mkdir() + (src / "models.ts").write_text( + textwrap.dedent( + """\ + export class User { + constructor(public name: string) {} + } + """ + ) + ) + (src / "app.ts").write_text( + textwrap.dedent( + """\ + import { User } from "./models"; + + export class Greeter { + greet(user: User): string { + return `hello ${user.name}`; + } + } + + export function makeGreeter(): Greeter { + return new Greeter(); + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + file_id = "src/app.ts" + class_id = f"{file_id}:Greeter" + method_id = f"{file_id}:Greeter.greet" + function_id = f"{file_id}:makeGreeter" + + assert graph.G.nodes[file_id]["language"] == "typescript" + assert graph.G.nodes[class_id]["type"] == NodeType.CLASS + assert graph.G.nodes[class_id]["language"] == "typescript" + assert graph.G.nodes[method_id]["type"] == NodeType.METHOD + assert graph.G.nodes[method_id]["language"] == "typescript" + assert graph.G.nodes[function_id]["type"] == NodeType.FUNCTION + assert graph.G.nodes[function_id]["language"] == "typescript" + assert _edge_attrs(graph, class_id, method_id, EdgeType.CONTAINS) + + import_edges = _edge_attrs(graph, file_id, "src/models.ts", EdgeType.IMPORTS) + assert import_edges + assert import_edges[0]["resolved"] is True + assert import_edges[0]["confidence"] == "resolved" + assert import_edges[0]["import_module"] == "./models" + + +def test_typescript_imported_and_same_file_invokes_resolve(tmp_path): + src = tmp_path / "src" + src.mkdir() + (src / "debug.ts").write_text( + textwrap.dedent( + """\ + export function getDebugOption(): boolean { + return true; + } + """ + ) + ) + (src / "remote.ts").write_text( + textwrap.dedent( + """\ + export class ChromeRemote { + start(): void {} + } + """ + ) + ) + (src / "app.ts").write_text( + textwrap.dedent( + """\ + import { getDebugOption } from "./debug" + import { ChromeRemote } from "./remote" + + export function localHelper(): void {} + + export function boot(): ChromeRemote { + localHelper(); + getDebugOption(); + return new ChromeRemote(); + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + source_id = "src/app.ts:boot" + assert _edge_attrs(graph, source_id, "src/app.ts:localHelper", EdgeType.INVOKES) + assert _edge_attrs(graph, source_id, "src/debug.ts:getDebugOption", EdgeType.INVOKES) + assert _edge_attrs(graph, source_id, "src/remote.ts:ChromeRemote", EdgeType.INVOKES) + + +def test_typescript_default_import_alias_constructor_resolves_to_default_export(tmp_path): + src = tmp_path / "src" + src.mkdir() + (src / "local.ts").write_text( + textwrap.dedent( + """\ + export default class ActualClass { + start(): void {} + } + """ + ) + ) + (src / "app.ts").write_text( + textwrap.dedent( + """\ + import LocalAlias from "./local" + + export function boot(): ActualClass { + return new LocalAlias(); + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + assert graph.G.nodes["src/local.ts:ActualClass"]["export_default"] is True + assert _edge_attrs(graph, "src/app.ts:boot", "src/local.ts:ActualClass", EdgeType.INVOKES) + + +def test_go_module_prefix_import_and_invokes_resolve(tmp_path): + (tmp_path / "go.mod").write_text("module github.com/example/project\n") + constraints_dir = tmp_path / "constraints" + constraints_dir.mkdir() + (constraints_dir / "doc.go").write_text("package constraints\n") + (constraints_dir / "check.go").write_text( + textwrap.dedent( + """\ + package constraints + + func Check() bool { + return true + } + """ + ) + ) + cmd_dir = tmp_path / "cmd" + cmd_dir.mkdir() + (cmd_dir / "helpers.go").write_text( + textwrap.dedent( + """\ + package cmd + + func AllC() bool { + return true + } + """ + ) + ) + (cmd_dir / "app.go").write_text( + textwrap.dedent( + """\ + package cmd + + import "github.com/example/project/constraints" + + func Run() { + AllC() + constraints.Check() + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + assert _edge_attrs(graph, "cmd/app.go", "constraints/doc.go", EdgeType.IMPORTS) + source_id = "cmd/app.go:Run" + assert _edge_attrs(graph, source_id, "cmd/helpers.go:AllC", EdgeType.INVOKES) + assert _edge_attrs(graph, source_id, "constraints/check.go:Check", EdgeType.INVOKES) + + +def test_go_same_package_generic_wrapper_invokes_resolve_across_files(tmp_path): + channels_dir = tmp_path / "channels" + channels_dir.mkdir() + (channels_dir / "channel.go").write_text( + textwrap.dedent( + """\ + package channels + + func All[T any](c <-chan T) bool { + return AllC(c) + } + + func Any[T any](c <-chan T) bool { + return AnyC(c) + } + """ + ) + ) + (channels_dir / "channel_ctx.go").write_text( + textwrap.dedent( + """\ + package channels + + func AllC[T any](c <-chan T) bool { + return true + } + + func AnyC[T any](c <-chan T) bool { + return false + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + assert _edge_attrs(graph, "channels/channel.go:All", "channels/channel_ctx.go:AllC", EdgeType.INVOKES) + assert _edge_attrs(graph, "channels/channel.go:Any", "channels/channel_ctx.go:AnyC", EdgeType.INVOKES) + + +def test_unresolved_typescript_import_is_represented_with_metadata(tmp_path): + src = tmp_path / "src" + src.mkdir() + (src / "app.ts").write_text( + textwrap.dedent( + """\ + import { Missing } from "./missing"; + + export function run(value: Missing): Missing { + return value; + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + file_id = "src/app.ts" + import_id = f"{file_id}:import:1" + assert graph.G.nodes[import_id]["type"] == NodeType.IMPORT + assert graph.G.nodes[import_id]["language"] == "typescript" + assert graph.G.nodes[import_id]["resolved"] is False + assert graph.G.nodes[import_id]["confidence"] == "unresolved" + assert graph.G.nodes[import_id]["heuristic"] is True + + import_edges = _edge_attrs(graph, file_id, import_id, EdgeType.IMPORTS) + assert import_edges + assert import_edges[0]["resolved"] is False + assert import_edges[0]["confidence"] == "unresolved" + assert import_edges[0]["heuristic"] is True + + +def test_incremental_update_keeps_typescript_import_edges(tmp_path): + src = tmp_path / "src" + src.mkdir() + (src / "models.ts").write_text("export class User {}\n") + app = src / "app.ts" + app.write_text( + textwrap.dedent( + """\ + import { User } from "./models"; + + export function makeUser(): User { + return new User(); + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + app.write_text( + textwrap.dedent( + """\ + import { User } from "./models"; + + export function run(): User { + return new User(); + } + """ + ) + ) + + stats = graph.update_files(["src/app.ts"]) + + assert stats["modified"] == 1 + assert "src/app.ts:run" in graph.G + import_edges = _edge_attrs(graph, "src/app.ts", "src/models.ts", EdgeType.IMPORTS) + assert import_edges + assert import_edges[0]["resolved"] is True + + +def test_non_python_syntax_error_metadata_does_not_abort_parsing(tmp_path): + (tmp_path / "bad.go").write_text("package main\nfunc broken(\n") + (tmp_path / "good.go").write_text( + textwrap.dedent( + """\ + package main + + func Works() string { + return "ok" + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + assert graph.G.nodes["bad.go"]["language"] == "go" + assert graph.G.nodes["bad.go"]["unit_type"] == "file" + assert graph.G.nodes["bad.go"].get("syntax_error") + assert "good.go:Works" in graph.G + assert graph.G.nodes["good.go:Works"]["language"] == "go" + + +def test_c_graph_resolves_local_include_and_direct_cross_file_call(tmp_path): + (tmp_path / "util.h").write_text( + textwrap.dedent( + """\ + struct Counter { int value; }; + int add_one(int value); + """ + ) + ) + (tmp_path / "util.c").write_text( + textwrap.dedent( + """\ + #include "util.h" + + int add_one(int value) { + return value + 1; + } + """ + ) + ) + (tmp_path / "main.c").write_text( + textwrap.dedent( + """\ + #include "util.h" + + int main(void) { + return add_one(1); + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + assert graph.G.nodes["main.c"]["language"] == "c" + assert graph.G.nodes["util.h"]["language"] == "c" + assert graph.G.nodes["util.h:Counter"]["type"] == NodeType.CLASS + assert graph.G.nodes["util.c:add_one"]["type"] == NodeType.FUNCTION + assert graph.G.nodes["main.c:main"]["type"] == NodeType.FUNCTION + + import_edges = _edge_attrs(graph, "main.c", "util.h", EdgeType.IMPORTS) + assert import_edges + assert import_edges[0]["resolved"] is True + assert import_edges[0]["include_style"] == "quote" + assert _edge_attrs(graph, "main.c:main", "util.c:add_one", EdgeType.INVOKES) + + +def test_c_system_include_remains_unresolved_placeholder(tmp_path): + (tmp_path / "main.c").write_text( + textwrap.dedent( + """\ + #include + + int main(void) { + return 0; + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + import_id = "main.c:import:1" + assert graph.G.nodes[import_id]["type"] == NodeType.IMPORT + assert graph.G.nodes[import_id]["language"] == "c" + assert graph.G.nodes[import_id]["resolved"] is False + assert graph.G.nodes[import_id]["confidence"] == "unresolved" + assert graph.G.nodes[import_id]["heuristic"] is True + + import_edges = _edge_attrs(graph, "main.c", import_id, EdgeType.IMPORTS) + assert import_edges + assert import_edges[0]["resolved"] is False + assert import_edges[0]["include_style"] == "angle" + + +def test_cpp_graph_resolves_class_methods_constructor_and_static_call(tmp_path): + (tmp_path / "model.hpp").write_text( + textwrap.dedent( + """\ + class Widget { + public: + int value() const { return 1; } + }; + """ + ) + ) + (tmp_path / "model.cpp").write_text( + textwrap.dedent( + """\ + #include "model.hpp" + + int Widget::make() { + Widget* widget = new Widget(); + return 0; + } + """ + ) + ) + (tmp_path / "main.cpp").write_text( + textwrap.dedent( + """\ + #include "model.hpp" + + int run() { + return Widget::make(); + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + assert graph.G.nodes["model.hpp"]["language"] == "cpp" + assert graph.G.nodes["model.hpp:Widget"]["type"] == NodeType.CLASS + assert graph.G.nodes["model.hpp:Widget.value"]["type"] == NodeType.METHOD + assert graph.G.nodes["model.cpp:Widget.make"]["type"] == NodeType.METHOD + assert graph.G.nodes["main.cpp:run"]["type"] == NodeType.FUNCTION + + assert _edge_attrs(graph, "main.cpp", "model.hpp", EdgeType.IMPORTS) + assert _edge_attrs(graph, "model.cpp", "model.hpp", EdgeType.IMPORTS) + assert _edge_attrs(graph, "model.cpp:Widget.make", "model.hpp:Widget", EdgeType.INVOKES) + assert _edge_attrs(graph, "main.cpp:run", "model.cpp:Widget.make", EdgeType.INVOKES) + + +def test_c_syntax_error_metadata_does_not_abort_parsing(tmp_path): + (tmp_path / "bad.c").write_text("int broken(\n") + (tmp_path / "good.c").write_text( + textwrap.dedent( + """\ + int works(void) { + return 0; + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + assert graph.G.nodes["bad.c"]["language"] == "c" + assert graph.G.nodes["bad.c"].get("syntax_error") + assert "good.c:works" in graph.G + assert graph.G.nodes["good.c:works"]["language"] == "c" + + +def test_rust_graph_structure_trait_impl_and_containment(tmp_path): + src = tmp_path / "src" + src.mkdir() + (src / "lib.rs").write_text( + textwrap.dedent( + """\ + pub trait Provider { + fn get(&self) -> String; + } + + pub struct Client; + + pub enum Status { + Ok, + } + + impl Provider for Client { + fn get(&self) -> String { + String::new() + } + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + file_id = "src/lib.rs" + trait_id = f"{file_id}:Provider" + struct_id = f"{file_id}:Client" + enum_id = f"{file_id}:Status" + trait_method_id = f"{file_id}:Provider.get" + impl_method_id = f"{file_id}:Client.get" + + assert graph.G.nodes[file_id]["language"] == "rust" + assert graph.G.nodes[struct_id]["type"] == NodeType.CLASS + assert graph.G.nodes[struct_id]["unit_type"] == "struct" + assert graph.G.nodes[enum_id]["type"] == NodeType.CLASS + assert graph.G.nodes[enum_id]["unit_type"] == "enum" + assert graph.G.nodes[trait_id]["type"] == NodeType.INTERFACE + assert graph.G.nodes[trait_id]["unit_type"] == "trait" + assert graph.G.nodes[trait_method_id]["type"] == NodeType.METHOD + assert graph.G.nodes[impl_method_id]["type"] == NodeType.METHOD + assert _edge_attrs(graph, trait_id, trait_method_id, EdgeType.CONTAINS) + assert _edge_attrs(graph, struct_id, impl_method_id, EdgeType.CONTAINS) + assert _edge_attrs(graph, struct_id, trait_id, EdgeType.INHERITS) + + +def test_rust_mod_decl_resolves_to_sibling_file(tmp_path): + src = tmp_path / "src" + src.mkdir() + (src / "lib.rs").write_text( + textwrap.dedent( + """\ + mod error; + + pub fn run() { + crate::error::make_error(); + } + """ + ) + ) + (src / "error.rs").write_text("pub fn make_error() {}\n") + + graph = _parse_repo(tmp_path) + + import_edges = _edge_attrs(graph, "src/lib.rs", "src/error.rs", EdgeType.IMPORTS) + assert import_edges + assert import_edges[0]["resolved"] is True + assert import_edges[0]["import_kind"] == "rust_mod_decl" + assert _edge_attrs(graph, "src/lib.rs:run", "src/error.rs:make_error", EdgeType.INVOKES) + + +def test_rust_crate_use_resolves_to_file(tmp_path): + src = tmp_path / "src" + src.mkdir() + (src / "lib.rs").write_text("pub mod a;\npub mod b;\n") + (src / "a.rs").write_text( + textwrap.dedent( + """\ + use crate::b::Worker; + + pub fn make() { + crate::b::build(); + } + """ + ) + ) + (src / "b.rs").write_text( + textwrap.dedent( + """\ + pub struct Worker; + + pub fn build() {} + """ + ) + ) + + graph = _parse_repo(tmp_path) + + import_edges = _edge_attrs(graph, "src/a.rs", "src/b.rs", EdgeType.IMPORTS) + assert import_edges + assert import_edges[0]["resolved"] is True + assert import_edges[0]["import_kind"] == "rust_use" + assert _edge_attrs(graph, "src/a.rs:make", "src/b.rs:build", EdgeType.INVOKES) + + +def test_rust_grouped_use_import_nodes_are_distinct(tmp_path): + src = tmp_path / "src" + src.mkdir() + (src / "lib.rs").write_text("pub mod foo;\n") + (src / "foo.rs").write_text("pub struct A;\npub struct B;\n") + (src / "app.rs").write_text("use crate::foo::{A, B};\n") + + graph = _parse_repo(tmp_path) + + assert "src/app.rs:import:1:1" in graph.G + assert "src/app.rs:import:1:2" in graph.G + assert graph.G.nodes["src/app.rs:import:1:1"]["import_module"] == "crate::foo::A" + assert graph.G.nodes["src/app.rs:import:1:2"]["import_module"] == "crate::foo::B" + assert _edge_attrs(graph, "src/app.rs", "src/foo.rs", EdgeType.IMPORTS) + + +def test_rust_direct_invoke_resolves_within_file(tmp_path): + src = tmp_path / "src" + src.mkdir() + (src / "main.rs").write_text( + textwrap.dedent( + """\ + fn helper() {} + + fn caller() { + helper(); + } + """ + ) + ) + + graph = _parse_repo(tmp_path) + + assert _edge_attrs(graph, "src/main.rs:caller", "src/main.rs:helper", EdgeType.INVOKES) diff --git a/CoderMind/tests/test_multilingual_encoder_pipeline.py b/CoderMind/tests/test_multilingual_encoder_pipeline.py new file mode 100644 index 0000000..3e16611 --- /dev/null +++ b/CoderMind/tests/test_multilingual_encoder_pipeline.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python3 +"""Tests for multilingual encoder discovery and semantic parsing entry.""" + +import os +import sys +import textwrap +from unittest.mock import MagicMock, patch + +_project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, _project_root) +sys.path.insert(0, os.path.join(_project_root, "scripts")) + +from rpg import NodeType +from rpg.code_unit import ParsedFile +from rpg_encoder.refactor_tree import RefactorTree +from rpg_encoder.rpg_encoding import RPGParser +from rpg_encoder.semantic_parsing import ParseFeatures + + +GO_SOURCE = textwrap.dedent( + """\ + package main + + type Server struct {} + + func (s *Server) Handle() {} + + func NewServer() *Server { + return &Server{} + } + """ +) + + +TS_SOURCE = textwrap.dedent( + """\ + import { User } from "./model"; + + export class Greeter { + greet(user: User): string { + return user.name; + } + } + + export function makeGreeter(): Greeter { + return new Greeter(); + } + """ +) + + +def _make_parse_features(tmp_path, valid_files, responses): + mock_llm = MagicMock() + mock_llm.generate_with_memory.side_effect = responses + parser = ParseFeatures( + repo_dir=str(tmp_path), + repo_info="test repo", + repo_skeleton="\n".join(valid_files), + valid_files=valid_files, + repo_name="test-repo", + llm_client=mock_llm, + ) + return parser, mock_llm + + +def test_rpg_parser_skeleton_includes_supported_languages_and_excludes_tests(tmp_path): + for rel_path, content in { + "pkg/mod.py": "def helper():\n return 1\n", + "main.go": "package main\nfunc Run() {}\n", + "src/app.ts": "export function run(): number { return 1; }\n", + "src/component.tsx": "export function View() { return
; }\n", + "web/app.js": "export function run() { return 1; }\n", + "web/view.jsx": "export function View() { return
; }\n", + "main_test.go": "package main\nfunc TestRun() {}\n", + "src/app.test.ts": "export function testRun() {}\n", + "web/app.spec.js": "export function specRun() {}\n", + "README.md": "# docs\n", + }.items(): + full_path = tmp_path / rel_path + full_path.parent.mkdir(parents=True, exist_ok=True) + full_path.write_text(content) + + with patch.object(RPGParser, "__init__", lambda self, **kw: None): + parser = RPGParser.__new__(RPGParser) + parser.repo_dir = str(tmp_path) + parser.repo_name = "test" + parser.logger = MagicMock() + skeleton, valid_files = parser._load_skeleton_from_repo() + + assert "pkg/mod.py" in valid_files + assert "main.go" in valid_files + assert "src/app.ts" in valid_files + assert "src/component.tsx" in valid_files + assert "web/app.js" in valid_files + assert "web/view.jsx" in valid_files + assert "main_test.go" not in valid_files + assert "src/app.test.ts" not in valid_files + assert "web/app.spec.js" not in valid_files + assert "README.md" not in valid_files + assert "README.md" in skeleton + + +def test_go_repo_enters_semantic_parsing_with_non_empty_units(tmp_path): + (tmp_path / "main.go").write_text(GO_SOURCE) + abs_path = str(tmp_path / "main.go") + parsed = ParsedFile(GO_SOURCE, abs_path) + assert parsed.units + + responses = [ + '{"Server": {"Handle": ["serve request"]}}', + '{"NewServer": ["create server"]}', + f'{{"{abs_path}": "server runtime"}}', + ] + parser, mock_llm = _make_parse_features(tmp_path, ["main.go", "main_test.go", "README.md"], responses) + + features, trajectories = parser.parse_repo(max_workers=1, max_iterations=1) + + assert "main.go" in features + assert features["main.go"]["class Server"] == {"Handle": ["serve request"]} + assert features["main.go"]["function NewServer"] == ["create server"] + assert features["main.go"]["_file_summary_"] == "server runtime" + assert trajectories + assert mock_llm.generate_with_memory.call_count == 3 + + +def test_typescript_repo_enters_semantic_parsing_with_non_empty_units(tmp_path): + source_path = tmp_path / "src" / "greeter.ts" + source_path.parent.mkdir(parents=True, exist_ok=True) + source_path.write_text(TS_SOURCE) + (tmp_path / "src" / "greeter.test.ts").write_text("export function testGreeter() {}\n") + abs_path = str(source_path) + parsed = ParsedFile(TS_SOURCE, abs_path) + assert parsed.units + + responses = [ + '{"Greeter": {"greet": ["format greeting"]}}', + '{"makeGreeter": ["create greeter"]}', + f'{{"{abs_path}": "greeting utilities"}}', + ] + parser, mock_llm = _make_parse_features( + tmp_path, + ["src/greeter.ts", "src/greeter.test.ts", "notes.txt"], + responses, + ) + + features, _ = parser.parse_repo(max_workers=1, max_iterations=1) + + assert "src/greeter.ts" in features + assert "src/greeter.test.ts" not in features + assert features["src/greeter.ts"]["class Greeter"] == {"greet": ["format greeting"]} + assert features["src/greeter.ts"]["function makeGreeter"] == ["create greeter"] + assert features["src/greeter.ts"]["_file_summary_"] == "greeting utilities" + assert mock_llm.generate_with_memory.call_count == 3 + + +def test_refactor_tree_assigns_language_metadata_to_go_and_typescript_nodes(tmp_path): + refactor_go_source = textwrap.dedent( + """\ + package main + + type Server struct {} + type Handler struct {} + + func (h *Handler) Handle() {} + + func NewServer() *Server { + return &Server{} + } + """ + ) + refactor_ts_source = textwrap.dedent( + """\ + export class Greeter { + greet(): string { + return "hello"; + } + } + + export class Helper {} + + export function makeGreeter(): Greeter { + return new Greeter(); + } + """ + ) + + go_path = tmp_path / "cmd" / "server.go" + go_path.parent.mkdir(parents=True, exist_ok=True) + go_path.write_text(refactor_go_source) + + ts_path = tmp_path / "frontend" / "greeter.ts" + ts_path.parent.mkdir(parents=True, exist_ok=True) + ts_path.write_text(refactor_ts_source) + + parsed_tree = { + "cmd/server.go": { + "_file_summary_": "server runtime", + "class Server": ["server model"], + "class Handler": {"Handle": ["handle request"]}, + "function NewServer": ["create server"], + }, + "frontend/greeter.ts": { + "_file_summary_": "greeting utilities", + "class Greeter": {"greet": ["format greeting"]}, + "class Helper": ["helper model"], + "function makeGreeter": ["create greeter"], + }, + } + + def fake_process_folder( + self, + functional_areas, + folder_path, + cur_feature_tree, + dir_file2node, + area_update, + parsed_tree, + context_window, + max_iters, + ): + area_name = functional_areas[0] + area_update.setdefault(area_name, {}) + for file_node in dir_file2node.values(): + area_update[area_name][f"{area_name}/Source/{file_node.name}"] = file_node + return cur_feature_tree, [] + + refactor = RefactorTree( + repo_dir=str(tmp_path), + repo_info="test repo", + repo_skeleton="cmd/server.go\nfrontend/greeter.ts", + repo_name="test-repo", + llm_client=MagicMock(), + language="python", + language_map={"cmd/": "go", "frontend/": "typescript"}, + ) + + with patch.object(RefactorTree, "plan_functional_areas", return_value={"final_plan": ["Core"]}), \ + patch.object(RefactorTree, "process_folder", fake_process_folder), \ + patch.object(RefactorTree, "_estimate_batch_tokens_for_process_folder", return_value=1): + _, _, rpg = refactor.run(parsed_tree, max_iters=1) + + language_by_type = { + node.meta.type_name: node.meta.language + for node in rpg.nodes.values() + if node.meta + and node.meta.type_name in {NodeType.FILE, NodeType.CLASS, NodeType.FUNCTION, NodeType.METHOD} + and node.meta.path + and str(node.meta.path).startswith("cmd/") + } + assert language_by_type[NodeType.FILE] == "go" + assert language_by_type[NodeType.CLASS] == "go" + assert language_by_type[NodeType.FUNCTION] == "go" + assert language_by_type[NodeType.METHOD] == "go" + + ts_nodes = [ + node + for node in rpg.nodes.values() + if node.meta + and node.meta.type_name in {NodeType.FILE, NodeType.CLASS, NodeType.FUNCTION, NodeType.METHOD} + and node.meta.path + and str(node.meta.path).startswith("frontend/") + ] + assert ts_nodes + assert {node.meta.language for node in ts_nodes} == {"typescript"} diff --git a/CoderMind/tests/test_multilingual_prompt_safety.py b/CoderMind/tests/test_multilingual_prompt_safety.py new file mode 100644 index 0000000..4656435 --- /dev/null +++ b/CoderMind/tests/test_multilingual_prompt_safety.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +"""Tests for language-neutral encoder prompt wording.""" + +import os +import sys +from pathlib import Path + +_project_root = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(_project_root)) +sys.path.insert(0, str(_project_root / "scripts")) + +from rpg_encoder.prompts import EXCLUDE_FILES, PARSE_CLASS, PARSE_FUNCTION + + +PROMPT_FILES = [ + _project_root / "scripts" / "rpg_encoder" / "prompts" / "parse_prompts.py", + _project_root / "scripts" / "rpg_encoder" / "prompts" / "encoding_prompts.py", +] + +FORBIDDEN_ENCODER_PROMPT_TERMS = [ + ".py only", + "Python classes", + "Python repository", + "__init__", + "pandas.DataFrame", + "pyarrow.Table", +] + + +def test_prompt_files_do_not_contain_forbidden_python_only_terms(): + prompt_text = "\n".join(path.read_text() for path in PROMPT_FILES) + for term in FORBIDDEN_ENCODER_PROMPT_TERMS: + assert term not in prompt_text + + +def test_prompt_files_do_not_scope_exclusion_to_python_extensions(): + prompt_text = "\n".join(path.read_text() for path in PROMPT_FILES) + assert "Consider only:\n1) `.py` files" not in prompt_text + assert "Directories containing `.py` files" not in prompt_text + + +def test_solution_output_schemas_are_preserved(): + # The prompt was updated to emit a richer ``{feature: description}`` + # mapping (instead of the legacy ``[feature1, feature2]`` array). + # The multilingual scrub must not regress the example payloads — + # downstream parsers (``semantic_parsing.py``) rely on these exact + # shapes when validating LLM output. + assert "" in PARSE_CLASS + assert "" in PARSE_CLASS + # Class examples: dict-of-dict with method -> {feature: description}. + assert '"method_1": {{' in PARSE_CLASS + assert '"feature 1": "description of feature 1"' in PARSE_CLASS + assert "" in PARSE_FUNCTION + assert "" in PARSE_FUNCTION + # Function examples: dict-of-dict with func_name -> {feature: description}. + assert '"func_name_1": {{' in PARSE_FUNCTION + assert '"feature one": "description of feature one"' in PARSE_FUNCTION + assert "" in EXCLUDE_FILES + assert "" in EXCLUDE_FILES diff --git a/CoderMind/tests/test_orphan_test_build_exclusion.py b/CoderMind/tests/test_orphan_test_build_exclusion.py new file mode 100644 index 0000000..6db2c51 --- /dev/null +++ b/CoderMind/tests/test_orphan_test_build_exclusion.py @@ -0,0 +1,381 @@ +"""Regression tests for excluding test/build units from orphan detection. + +Reproduces the plan-stage WARN seen across languages +(``global_review.passed=false ... N orphan feature(s)``) where the orphan +heuristic flagged TEST functions and BUILD targets. Those units are +callable, so the type-like (``is_callable``) exclusion does not cover +them, yet they have no incoming *production* invocation edge — they are +invoked by an external runner (test framework / ``make``), so flagging +them as dead code is a false positive. + +Exclusion uses two complementary signals: + * language-agnostic: the feature path / subtree category + (``Testing`` / ``Build System`` / ...); + * per-language: ``backend.is_test_file`` on the unit's file. + +Real production dead code (a production-category callable with no edges) +must STILL be flagged, so the gate keeps its value. +""" +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +_SCRIPTS = Path(__file__).resolve().parents[1] / "scripts" +if str(_SCRIPTS) not in sys.path: + sys.path.insert(0, str(_SCRIPTS)) + +from decoder_lang import get_backend # noqa: E402 +from func_design.interface_review import ( # noqa: E402 + _is_non_production_feature, + check_call_graph_connectivity, + check_feature_dependency_coverage, +) + + +def _interfaces(units_to_features: dict, *, subtree: str, file_path: str) -> dict: + """Build a minimal interfaces_data with one subtree/file/units block. + + Includes both the ``units`` list (consumed by ``build_call_graph`` to + register graph nodes) and ``units_to_features`` (consumed by the + feature-coverage check), so both orphan detectors see the unit. + """ + return { + "subtrees": { + subtree: { + "interfaces": { + file_path: { + "units": list(units_to_features.keys()), + "units_to_features": units_to_features, + }, + }, + }, + }, + } + + +_EMPTY_FLOW: dict = { + "invocation_edges": [], + "inheritance_edges": [], + "reference_edges": [], +} + + +class TestIsNonProductionFeature: + @pytest.mark.parametrize( + "features,subtree", + [ + (["Testing/error reporting/verify division by zero"], "Testing"), + (["Build System/make targets/run test suite"], "Build System"), + ([], "Test Infrastructure"), + (["Tooling/lint/run linter"], "Tooling"), + ], + ) + def test_test_and_build_categories_are_non_production(self, features, subtree): + assert _is_non_production_feature(features, subtree) is True + + @pytest.mark.parametrize( + "features,subtree", + [ + (["Task Store/add/append todo"], "Task Store"), + (["Web Routes/handle add"], "Web Routes"), + ([], "Data Layer"), + ], + ) + def test_production_categories_are_production(self, features, subtree): + assert _is_non_production_feature(features, subtree) is False + + def test_case_insensitive_and_path_head(self): + assert _is_non_production_feature(["TESTING/x/y"], "") is True + assert _is_non_production_feature(["tests/unit/foo"], "") is True + + +class TestFeatureCoverageExcludesTestBuild: + def test_test_function_not_flagged_by_category(self): + # A callable test function with no incoming edge: previously an + # orphan, now excluded by the Testing category (no backend needed). + data = _interfaces( + {"function test_division_by_zero": ["Testing/error reporting/div by zero"]}, + subtree="Testing", + file_path="tests/test_errors.c", + ) + orphans = check_feature_dependency_coverage( + data, _EMPTY_FLOW, entry_points=[], + is_callable=get_backend("c").is_callable_unit, + ) + assert orphans == [] + + def test_build_target_not_flagged_by_category(self): + data = _interfaces( + {"function build_run_tests": ["Build System/make targets/run test suite"]}, + subtree="Build System", + file_path="build/Makefile", + ) + orphans = check_feature_dependency_coverage( + data, _EMPTY_FLOW, entry_points=[], + is_callable=get_backend("c").is_callable_unit, + ) + assert orphans == [] + + def test_test_file_excluded_even_with_production_category(self): + # Defence in depth: a unit in a test file is excluded via + # is_test_file even if its feature category were not recognised. + data = _interfaces( + {"function helper_in_test": ["Some Category/x/y"]}, + subtree="Some Category", + file_path="internal/store/store_test.go", + ) + orphans = check_feature_dependency_coverage( + data, _EMPTY_FLOW, entry_points=[], + is_callable=get_backend("go").is_callable_unit, + is_test_file=get_backend("go").is_test_file, + ) + assert orphans == [] + + def test_real_production_dead_code_still_flagged(self): + # A production-category callable with no incoming edge must STILL + # be an orphan — the gate keeps its value. + data = _interfaces( + {"function unused_helper": ["Data Layer/transform/normalize"]}, + subtree="Data Layer", + file_path="src/data.c", + ) + orphans = check_feature_dependency_coverage( + data, _EMPTY_FLOW, entry_points=[], + is_callable=get_backend("c").is_callable_unit, + is_test_file=get_backend("c").is_test_file, + ) + assert len(orphans) == 1 + assert orphans[0]["unit_name"] == "function unused_helper" + + def test_outgoing_production_root_is_not_feature_orphan(self): + data = _interfaces( + { + "function build_app": ["Runtime/bootstrap/wire dependencies"], + "function make_store": [], + }, + subtree="Runtime", + file_path="src/app.c", + ) + flow = { + "invocation_edges": [ + { + "caller": "function build_app", + "caller_file": "src/app.c", + "callee": "function make_store", + "callee_file": "src/app.c", + } + ], + "inheritance_edges": [], + "reference_edges": [], + } + + orphans = check_feature_dependency_coverage( + data, flow, entry_points=[], + is_callable=get_backend("c").is_callable_unit, + is_test_file=get_backend("c").is_test_file, + ) + + assert orphans == [] + + def test_bare_entry_point_name_matches_prefixed_unit(self): + data = _interfaces( + {"function RunMain": ["Runtime/startup/run main package"]}, + subtree="Runtime", + file_path="internal/app/main.go", + ) + entry_points = [ + {"file_path": "internal/app/main.go", "unit_name": "RunMain"} + ] + + connectivity = check_call_graph_connectivity( + data, _EMPTY_FLOW, entry_points=entry_points, + is_callable=get_backend("go").is_callable_unit, + is_test_file=get_backend("go").is_test_file, + ) + feature_orphans = check_feature_dependency_coverage( + data, _EMPTY_FLOW, entry_points=entry_points, + is_callable=get_backend("go").is_callable_unit, + is_test_file=get_backend("go").is_test_file, + ) + + assert connectivity["orphan_units"] == [] + assert feature_orphans == [] + + def test_receiver_entry_point_name_matches_method_unit(self): + data = _interfaces( + {"method ServeHTTP": ["Web/server/serve request"]}, + subtree="Web", + file_path="internal/web/handler.go", + ) + entry_points = [ + { + "file_path": "internal/web/handler.go", + "unit_name": "(*HomeHandler).ServeHTTP", + } + ] + + connectivity = check_call_graph_connectivity( + data, _EMPTY_FLOW, entry_points=entry_points, + is_callable=get_backend("go").is_callable_unit, + is_test_file=get_backend("go").is_test_file, + ) + feature_orphans = check_feature_dependency_coverage( + data, _EMPTY_FLOW, entry_points=entry_points, + is_callable=get_backend("go").is_callable_unit, + is_test_file=get_backend("go").is_test_file, + ) + + assert connectivity["orphan_units"] == [] + assert feature_orphans == [] + + def test_ambiguous_entry_point_alias_without_file_is_not_overmatched(self): + data = { + "subtrees": { + "Web": { + "interfaces": { + "internal/web/home.go": { + "units": ["method ServeHTTP"], + "units_to_features": { + "method ServeHTTP": ["Web/home/serve request"] + }, + }, + "internal/web/action.go": { + "units": ["method ServeHTTP"], + "units_to_features": { + "method ServeHTTP": ["Web/action/serve request"] + }, + }, + }, + }, + }, + } + + connectivity = check_call_graph_connectivity( + data, _EMPTY_FLOW, entry_points=[{"unit_name": "ServeHTTP"}], + is_callable=get_backend("go").is_callable_unit, + is_test_file=get_backend("go").is_test_file, + ) + + assert len(connectivity["orphan_units"]) == 2 + + def test_unit_and_feature_detectors_agree_on_same_graph(self): + # The two orphan detectors must share one definition of "orphan" + # so the published verdict can never drift from the structural + # gate. A production root with an outgoing edge is an orphan for + # NEITHER; a genuinely isolated production unit is an orphan for + # BOTH. + data = _interfaces( + { + "function wired_root": ["Runtime/bootstrap/wire app"], + "function reachable_leaf": ["Runtime/bootstrap/make store"], + "function dead_unit": ["Runtime/extra/never wired"], + }, + subtree="Runtime", + file_path="src/app.c", + ) + flow = { + "invocation_edges": [ + { + "caller": "function wired_root", + "caller_file": "src/app.c", + "callee": "function reachable_leaf", + "callee_file": "src/app.c", + } + ], + "inheritance_edges": [], + "reference_edges": [], + } + backend = get_backend("c") + + unit_orphans = check_call_graph_connectivity( + data, flow, entry_points=[], + is_callable=backend.is_callable_unit, + is_test_file=backend.is_test_file, + )["orphan_units"] + feature_orphans = check_feature_dependency_coverage( + data, flow, entry_points=[], + is_callable=backend.is_callable_unit, + is_test_file=backend.is_test_file, + ) + + unit_keys = {o["unit_key"] for o in unit_orphans} + feature_keys = { + f"{f['file_path']}::{f['unit_name']}" for f in feature_orphans + } + # Only the genuinely isolated unit is an orphan, and both + # detectors agree on exactly that set. + assert unit_keys == {"src/app.c::function dead_unit"} + assert feature_keys == unit_keys + + def test_legacy_no_predicates_preserves_behaviour(self): + # With no is_callable/is_test_file, the category check still applies + # but file-level does not; production dead code is still flagged. + data = _interfaces( + {"function unused_helper": ["Data Layer/x/y"]}, + subtree="Data Layer", + file_path="src/data.py", + ) + orphans = check_feature_dependency_coverage(data, _EMPTY_FLOW, entry_points=[]) + assert len(orphans) == 1 + + +class TestConnectivityExcludesTestBuild: + def test_isolated_test_function_not_orphan_unit(self): + data = _interfaces( + {"function test_addition": ["Testing/eval/verify addition"]}, + subtree="Testing", + file_path="tests/test_eval.c", + ) + result = check_call_graph_connectivity( + data, _EMPTY_FLOW, entry_points=[], + is_callable=get_backend("c").is_callable_unit, + is_test_file=get_backend("c").is_test_file, + ) + assert result["orphan_units"] == [] + + def test_isolated_production_function_still_orphan_unit(self): + data = _interfaces( + {"function unused": ["Data Layer/x/y"]}, + subtree="Data Layer", + file_path="src/data.c", + ) + result = check_call_graph_connectivity( + data, _EMPTY_FLOW, entry_points=[], + is_callable=get_backend("c").is_callable_unit, + is_test_file=get_backend("c").is_test_file, + ) + assert len(result["orphan_units"]) == 1 + + +class TestPerLanguageTestFileExclusion: + @pytest.mark.parametrize( + "language,test_path", + [ + ("python", "tests/test_store.py"), + ("go", "internal/store/store_test.go"), + ("rust", "tests/integration_test.rs"), + ("javascript", "test/store.test.js"), + ("typescript", "test/store.test.ts"), + ("c", "tests/test_eval.c"), + ("cpp", "tests/test_eval.cpp"), + ], + ) + def test_units_in_test_files_excluded(self, language, test_path): + # Use a production-looking category so ONLY is_test_file can exclude it. + data = _interfaces( + {"function some_unit": ["Feature Area/x/y"]}, + subtree="Feature Area", + file_path=test_path, + ) + backend = get_backend(language) + if not backend.is_test_file(test_path): + pytest.skip(f"{language} backend does not classify {test_path} as a test file") + orphans = check_feature_dependency_coverage( + data, _EMPTY_FLOW, entry_points=[], + is_callable=backend.is_callable_unit, + is_test_file=backend.is_test_file, + ) + assert orphans == [] diff --git a/CoderMind/tests/test_plan_language_support.py b/CoderMind/tests/test_plan_language_support.py new file mode 100644 index 0000000..a15ca39 --- /dev/null +++ b/CoderMind/tests/test_plan_language_support.py @@ -0,0 +1,645 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +from decoder_lang import ProjectTaskTemplates, get_backend # noqa: E402 +from func_design.base_class_agent import ( # noqa: E402 + BaseClassOutput, + validate_base_classes_model, + validate_data_structures, +) +from func_design.interface_agent import ( # noqa: E402 + DependencyCollector, + SubtreeInterfaceAgent, + SubtreeInterfaceOutput, + validate_interface, +) +from func_design.interface_prompts import SUBTREE_INTERFACE_PROMPT # noqa: E402 +from plan_tasks import TaskPlanner # noqa: E402 + + +def test_dependency_collector_extracts_rust_inheritance() -> None: + # Regression for G3: non-Python inheritance edges must be extracted via + # the backend's list_inheritance (Rust trait impls emit `inherits`), + # not the Python-only AST path that silently produced nothing before. + collector = DependencyCollector( + known_base_classes={"Repo"}, + known_types=set(), + target_language="rust", + ) + code = "struct Store;\ntrait Repo {}\nimpl Repo for Store {}\n" + collector.analyze_code_dependencies( + code=code, + file_path="src/store.rs", + base_class_files={"Repo": "src/repo.rs"}, + ) + assert any( + e["child"] == "Store" and e["parent"] == "Repo" + and e["parent_file"] == "src/repo.rs" + for e in collector.inheritance_edges + ), collector.inheritance_edges + + +def test_dependency_collector_python_inheritance_still_works() -> None: + # The Python AST-derived path keeps producing inheritance edges. + collector = DependencyCollector( + known_base_classes={"Base"}, + known_types=set(), + target_language="python", + ) + code = "class Base:\n pass\n\nclass Child(Base):\n pass\n" + collector.analyze_code_dependencies( + code=code, + file_path="pkg/child.py", + base_class_files={"Base": "pkg/base.py"}, + ) + assert any( + e["child"] == "Child" and e["parent"] == "Base" + for e in collector.inheritance_edges + ), collector.inheritance_edges + + +def test_base_class_validation_accepts_go_source() -> None: + backend = get_backend("go") + model = BaseClassOutput.model_validate({ + "base_classes": [ + { + "file_path": "internal/task/store.go", + "code": "package task\n\ntype Store interface {\n\tLoad() error\n}\n", + "scope": "Task Store", + "subclasses": {"Store": ["FileStore", "MemoryStore"]}, + } + ], + "data_structures": [], + }) + + ok, error = validate_base_classes_model( + model, + valid_subtrees=["Task Store"], + backend=backend, + ) + + assert ok, error + + +def test_data_structure_validation_accepts_go_source() -> None: + backend = get_backend("go") + ok, error = validate_data_structures( + [ + { + "code": "package task\n\ntype TaskRecord struct {\n\tID int\n}\n", + "subtree": "Task Store", + "data_flow_types": ["TaskRecord"], + } + ], + ["TaskRecord"], + valid_subtrees=["Task Store"], + backend=backend, + ) + + assert ok, error + + +def test_interface_validation_accepts_go_declaration() -> None: + backend = get_backend("go") + ok, error, info = validate_interface( + { + "features": ["Task Lifecycle Management/task/create"], + "code": "package task\n\ntype Task struct {\n\tTitle string\n}\n", + }, + {"Task Lifecycle Management/task/create"}, + set(), + backend=backend, + ) + + assert ok, error + assert "struct Task" in info["declarations"] + + +def test_subtree_interface_output_accepts_common_file_aliases() -> None: + model = SubtreeInterfaceOutput.model_validate({ + "files": [ + { + "path": "src/tasklite_cli/task/task.c", + "features": ["Task Domain Model/task schema/define record"], + "code": "int task_record_init(void);\n", + } + ] + }) + + assert model.files[0].file_path == "src/tasklite_cli/task/task.c" + assert model.files[0].interfaces[0].features == [ + "Task Domain Model/task schema/define record" + ] + + +def test_interface_validation_filters_non_target_and_duplicate_features() -> None: + backend = get_backend("c") + interface = { + "features": [ + "Task Domain Model/task schema/define record", + "Task Domain Model/glue/generated helper", + "Task Domain Model/task schema/already covered", + ], + "code": "int task_record_init(void);\n", + } + + ok, error, info = validate_interface( + interface, + { + "Task Domain Model/task schema/define record", + "Task Domain Model/task schema/already covered", + }, + {"Task Domain Model/task schema/already covered"}, + backend=backend, + ) + + assert ok, error + assert interface["features"] == ["Task Domain Model/task schema/define record"] + assert "function task_record_init" in info["declarations"] + + +def test_subtree_agent_adds_c_fallback_for_remaining_features() -> None: + agent = SubtreeInterfaceAgent(target_language="c") + feature = "Task Domain Model/task schema/status representation/encode completion flag" + state = { + "target_features": {feature}, + "covered_features": set(), + "all_interfaces": [], + "all_code_blocks": [], + } + + agent._complete_remaining_c_family_features("src/tasklite_cli/task/task.c", state) + result, _new_features = agent._build_file_result( + file_path="src/tasklite_cli/task/task.c", + all_interfaces=state["all_interfaces"], + all_code_blocks=state["all_code_blocks"], + target_features=state["target_features"], + covered_features=state["covered_features"], + ) + + assert result["success"] + assert feature in next(iter(result["units_to_features"].values())) + assert "int task" in result["file_code"] + + +def test_subtree_agent_adds_cpp_fallback_for_empty_file_result() -> None: + agent = SubtreeInterfaceAgent(target_language="cpp") + features = { + "CLI Entry and Dispatch/storage/options/use local tasks file", + "CLI Entry and Dispatch/storage/options/resolve store path", + } + state = { + "target_features": features, + "covered_features": set(), + "all_interfaces": [], + "all_code_blocks": [], + } + + agent._complete_remaining_c_family_features( + "src/tasklite_cli/cli/store_path_options.cpp", + state, + ) + result, _new_features = agent._build_file_result( + file_path="src/tasklite_cli/cli/store_path_options.cpp", + all_interfaces=state["all_interfaces"], + all_code_blocks=state["all_code_blocks"], + target_features=state["target_features"], + covered_features=state["covered_features"], + ) + + assert result["success"] + assert set(next(iter(result["units_to_features"].values()))) == features + assert "namespace tasklite" in result["file_code"] + + +def test_subtree_agent_uses_cpp_fallback_for_verification_subtree() -> None: + class FailingLLM: + def call_structured(self, **_kwargs): + raise AssertionError("LLM should not run for C++ verification fallback") + + agent = SubtreeInterfaceAgent( + llm_client=FailingLLM(), + target_language="cpp", + ) + files = [ + { + "path": "tests/store_test.cpp", + "feature_paths": [ + "Verification and Test Isolation/store/loading coverage/verify missing file loading", + "Verification and Test Isolation/store/corruption coverage/verify corrupt json handling", + ], + }, + { + "path": "tests/cli_test.cpp", + "feature_paths": [ + "Verification and Test Isolation/cli/list coverage/verify task list output", + ], + }, + ] + + result = agent.design_subtree_interfaces( + file_nodes=files, + file_order=["tests/store_test.cpp", "tests/cli_test.cpp"], + repo_info="TaskLite C++ CLI", + data_flow_str="", + base_classes_str="", + upstream_context="", + subtree_name="Verification and Test Isolation", + ) + + assert result["tests/store_test.cpp"]["success"] + assert result["tests/cli_test.cpp"]["success"] + assert len(result["tests/store_test.cpp"]["units"]) == 1 + assert len(result["tests/cli_test.cpp"]["units"]) == 1 + + +def test_interface_validation_strips_markdown_fence() -> None: + backend = get_backend("go") + ok, error, info = validate_interface( + { + "features": ["Runtime Architecture Constraints/layout/packages/use fixed package layout"], + "code": "```go\npackage app\n\ntype AppLayout struct {\n\tStorePath string\n}\n```", + }, + {"Runtime Architecture Constraints/layout/packages/use fixed package layout"}, + set(), + backend=backend, + ) + + assert ok, error + assert "struct AppLayout" in info["declarations"] + + +def test_interface_validation_accepts_python_backend_docstring() -> None: + backend = get_backend("python") + ok, error, info = validate_interface( + { + "features": ["Application Infrastructure/server bootstrap/application factory setup"], + "code": ( + "from flask import Flask\n\n" + "def create_app() -> Flask:\n" + " \"\"\"Create and configure the Flask application.\"\"\"\n" + " ...\n" + ), + }, + {"Application Infrastructure/server bootstrap/application factory setup"}, + set(), + backend=backend, + ) + + assert ok, error + assert "function create_app" in info["declarations"] + + +def test_subtree_interface_prompt_is_language_neutral() -> None: + assert "with `pass` bodies" not in SUBTREE_INTERFACE_PROMPT + assert "All function/method bodies must use `pass`" not in SUBTREE_INTERFACE_PROMPT + assert "target-language declaration stubs" in SUBTREE_INTERFACE_PROMPT + + +def test_typescript_subtree_prompt_omits_python_import_convention() -> None: + agent = SubtreeInterfaceAgent(target_language="typescript") + prompt = agent._build_subtree_user_prompt( + remaining_files=["src/tasklite-cli/cli/main.ts"], + file_states={ + "src/tasklite-cli/cli/main.ts": { + "target_features": {"CLI Application/startup/process bootstrap"}, + "covered_features": set(), + "all_code_blocks": [], + } + }, + file_info_map={ + "src/tasklite-cli/cli/main.ts": { + "path": "src/tasklite-cli/cli/main.ts", + "feature_paths": ["CLI Application/startup/process bootstrap"], + } + }, + repo_info="TypeScript CLI task tracker.", + data_flow_str="No data flow.", + base_classes_str="No base classes.", + upstream_context="No upstream interfaces.", + last_error="", + ) + + assert "Import Convention" not in prompt + assert "from src.tasklite-cli" not in prompt + + +def test_task_planner_project_tasks_use_go_conventions() -> None: + planner = TaskPlanner( + interfaces={"meta": {"primary_language": "go", "target_languages": ["go"]}}, + data_flow={"meta": {"primary_language": "go", "target_languages": ["go"]}}, + repo_name="tasklite", + repo_info="Go CLI task tracker.", + ) + + requirements = planner._build_requirements_task() + main_entry = planner._build_main_entry_task() + readme = planner._build_readme_task() + + assert "go.mod" in requirements + assert "requirements.txt" not in requirements + assert "cmd/tasklite/main.go" in main_entry + assert "main.py" not in main_entry + assert "go test ./..." in readme + assert "pytest" not in readme + + +def test_task_planner_prefers_backend_project_task_templates(monkeypatch) -> None: + planner = TaskPlanner( + interfaces={"meta": {"primary_language": "go", "target_languages": ["go"]}}, + data_flow={"meta": {"primary_language": "go", "target_languages": ["go"]}}, + repo_name="tasklite", + repo_info="Go CLI task tracker.", + ) + + def fake_templates(context): + return ProjectTaskTemplates( + dependencies=f"deps for {context.package_name}", + main_entry=f"main for {context.package_name}", + readme=f"readme for {context.package_name}", + ) + + monkeypatch.setattr(planner.backend, "project_task_templates", fake_templates) + + assert planner._build_requirements_task() == "deps for tasklite" + assert planner._build_main_entry_task() == "main for tasklite" + assert planner._build_readme_task() == "readme for tasklite" + + +def test_task_planner_project_tasks_use_rust_conventions() -> None: + planner = TaskPlanner( + interfaces={"meta": {"primary_language": "rust", "target_languages": ["rust"]}}, + data_flow={"meta": {"primary_language": "rust", "target_languages": ["rust"]}}, + repo_name="tasklite", + repo_info="Rust CLI task tracker.", + ) + + requirements = planner._build_requirements_task() + main_entry = planner._build_main_entry_task() + readme = planner._build_readme_task() + + assert "Cargo.toml" in requirements + assert "requirements.txt" not in requirements + assert "src/main.rs" in main_entry + assert "main.py" not in main_entry + assert "cargo test" in readme + assert "pytest" not in readme + + +def test_task_planner_project_tasks_use_typescript_conventions() -> None: + planner = TaskPlanner( + interfaces={ + "meta": { + "primary_language": "typescript", + "target_languages": ["typescript"], + } + }, + data_flow={ + "meta": { + "primary_language": "typescript", + "target_languages": ["typescript"], + } + }, + repo_name="tasklite", + repo_info="TypeScript CLI task tracker.", + ) + + requirements = planner._build_requirements_task() + main_entry = planner._build_main_entry_task() + readme = planner._build_readme_task() + + assert "package.json" in requirements + assert "requirements.txt" not in requirements + assert "src/index.ts" in main_entry + assert "main.py" not in main_entry + assert "npm test" in readme + assert "pytest" not in readme + + +def test_task_planner_special_tasks_are_language_neutral() -> None: + planner = TaskPlanner( + interfaces={"meta": {"primary_language": "rust", "target_languages": ["rust"]}}, + data_flow={ + "meta": {"primary_language": "rust", "target_languages": ["rust"]}, + "data_flow": [ + {"source": "Core", "target": "CLI", "data_type": "Payload"}, + ], + }, + repo_name="tasklite", + repo_info="Rust CLI task tracker.", + ) + planned_tasks: dict = {"Core": {}} + agent_results: dict = {"Core": {}} + + planner._add_special_tasks(planned_tasks, agent_results, ["Core"]) + text = "\n".join( + task["task"] + for files in planned_tasks.values() + for tasks in files.values() + for task in tasks + ) + + assert "main.py" not in text + assert "styles.py" not in text + + +def test_go_main_entry_reuses_existing_command_package() -> None: + # The skeleton already placed the entry under cmd/todo/main.go. The + # MAIN_ENTRY task must reuse that path, not generate a second + # cmd//main.go (which would yield two func main()). + interfaces = { + "meta": {"primary_language": "go", "target_languages": ["go"]}, + "subtrees": { + "Server": { + "interfaces": { + "cmd/todo/main.go": {"units": ["function main"]}, + "internal/store/store.go": {"units": ["struct Store"]}, + } + } + }, + } + planner = TaskPlanner( + interfaces=interfaces, + data_flow={"meta": {"primary_language": "go", "target_languages": ["go"]}}, + repo_name="demo-go-web-todo", + repo_info="Go web todo.", + ) + + assert planner._resolve_go_command_path() == "cmd/todo/main.go" + main_entry = planner._build_main_entry_task() + assert "cmd/todo/main.go" in main_entry + assert "cmd/demo-go-web-todo/main.go" not in main_entry + + +def test_go_main_entry_falls_back_when_no_command_package() -> None: + # No cmd/*/main.go in the skeleton → fall back to the canonical + # cmd//main.go from the backend. + interfaces = { + "meta": {"primary_language": "go", "target_languages": ["go"]}, + "subtrees": { + "Core": {"interfaces": {"internal/store/store.go": {"units": ["struct Store"]}}} + }, + } + planner = TaskPlanner( + interfaces=interfaces, + data_flow={"meta": {"primary_language": "go", "target_languages": ["go"]}}, + repo_name="tasklite", + repo_info="Go CLI.", + ) + + assert planner._resolve_go_command_path() == "cmd/tasklite/main.go" + + +def test_rust_backend_accepts_basic_declarations() -> None: + backend = get_backend("rust") + code = "pub struct Task {\n pub title: String,\n}\n\npub fn run() {}\n" + + ok, error = backend.syntax_check(code, "src/lib.rs") + units = backend.list_code_units(code, "src/lib.rs") + + assert ok, error + assert {unit.unit_type for unit in units} >= {"struct", "function"} + assert backend.prompt_hints().test_framework_name == "cargo test" + + +def test_typescript_backend_accepts_basic_declarations() -> None: + backend = get_backend("typescript") + code = """ +export interface Task { title: string } +export type TaskId = number; +export declare function run(task: Task): void; +export declare class TaskCli { run(task: Task): void; } +""" + + ok, error = backend.syntax_check(code, "src/index.ts") + units = backend.list_code_units(code, "src/index.ts") + + assert ok, error + declarations = [f"{unit.unit_type} {unit.name}" for unit in units] + assert "interface Task" in declarations + assert "type TaskId" in declarations + assert "function run" in declarations + assert "class TaskCli" in declarations + assert any(unit.name == "run" for unit in units) + assert backend.prompt_hints().test_framework_name == "npm test" + + +def test_interface_validation_accepts_typescript_interface() -> None: + backend = get_backend("typescript") + ok, error, info = validate_interface( + { + "features": ["Task Domain Model/task schema"], + "code": "export interface Task { title: string }\n", + }, + {"Task Domain Model/task schema"}, + set(), + backend=backend, + ) + + assert ok, error + assert "interface Task" in info["declarations"] + + +def test_interface_validation_accepts_typescript_declare_function() -> None: + backend = get_backend("typescript") + ok, error, info = validate_interface( + { + "features": ["CLI Application/startup/process bootstrap/bootstrap main command"], + "code": "export declare function runTasklite(argv: readonly string[]): Promise;\n", + }, + {"CLI Application/startup/process bootstrap/bootstrap main command"}, + set(), + backend=backend, + ) + + assert ok, error + assert "function runTasklite" in info["declarations"] + + +def test_interface_validation_accepts_typescript_jsdoc_backticks() -> None: + backend = get_backend("typescript") + ok, error, info = validate_interface( + { + "features": [ + "CLI Application/store file/path selection/select default file", + "CLI Application/store file/path selection/select override file", + ], + "code": """```typescript +import { homedir } from 'node:os'; +import { join } from 'node:path'; + +/** + * Resolve the filesystem path for the JSON task store. + * + * When an override path is supplied, it is returned as-is. + * Otherwise the default path is computed as `~/.tasklite.json`. + * Empty-string values are treated as "no override". + * + * @param override - Optional explicit path to the store file. + * @returns Absolute filesystem path to the JSON store file. + */ +export function resolveStorePath(override?: string): string; +```""", + }, + { + "CLI Application/store file/path selection/select default file", + "CLI Application/store file/path selection/select override file", + }, + set(), + backend=backend, + ) + + assert ok, error + assert "function resolveStorePath" in info["declarations"] + + +def test_file_ordering_uses_imports_for_go() -> None: + # Regression: non-Python file ordering previously degraded to the raw LLM + # order because dependency extraction used Python AST only. Go imports must + # now drive the topological sort (store before its cli importer). + from plan_tasks import correct_intra_subtree_file_order + + interfaces = { + "internal/store/store.go": { + "file_code": "package store\n\ntype Store struct{}\nfunc New() *Store { return &Store{} }\n", + }, + "cmd/app/cli.go": { + "file_code": "package main\n\nimport \"tasklite/internal/store\"\n\nfunc main(){ _ = store.New() }\n", + }, + } + corrected, diag = correct_intra_subtree_file_order( + subtree_name="Core", + files_order=["cmd/app/cli.go", "internal/store/store.go"], + subtree_interfaces=interfaces, + language="go", + ) + + assert corrected == ["internal/store/store.go", "cmd/app/cli.go"] + assert diag["changed"] is True + assert diag["reason"] == "import_toposort_by_stem" + + +def test_file_ordering_keeps_python_dotted_module_path() -> None: + from plan_tasks import correct_intra_subtree_file_order + + interfaces = { + "src/app/store.py": {"file_code": "class Store:\n pass\n"}, + "src/app/cli.py": {"file_code": "from app.store import Store\n"}, + } + corrected, diag = correct_intra_subtree_file_order( + subtree_name="Core", + files_order=["src/app/cli.py", "src/app/store.py"], + subtree_interfaces=interfaces, + language="python", + ) + + assert corrected == ["src/app/store.py", "src/app/cli.py"] + assert diag["reason"] == "import_toposort" + diff --git a/CoderMind/tests/test_repo_language_resolution.py b/CoderMind/tests/test_repo_language_resolution.py new file mode 100644 index 0000000..2093592 --- /dev/null +++ b/CoderMind/tests/test_repo_language_resolution.py @@ -0,0 +1,146 @@ +"""Regression tests for on-disk repository language resolution. + +These lock in the fix for the verification blind spot where the +final-test / smoke-test / global-review stages resolved the project +language from encoder metadata alone (``feature_spec.json`` / +``rpg.json``). When that metadata was missing or unreadable at the path +the stage computed, resolution silently fell back to ``python`` — so a +non-python project's final gate ran ``pytest`` over zero files and +"passed" trivially. + +The canonical resolver guarantees an on-disk source scan tier, so the +language is inferred from the real files when metadata is absent. The +scan is language-agnostic (extension set lives in :mod:`lang_parser`), so +adding a language needs no change here. +""" +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +_SCRIPTS = Path(__file__).resolve().parents[1] / "scripts" +if str(_SCRIPTS) not in sys.path: + sys.path.insert(0, str(_SCRIPTS)) + +from decoder_lang import resolve_repo_backend, scan_repo_source_files # noqa: E402 + + +def _write(path: Path, content: str = "") -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + + +class TestScanRepoSourceFiles: + def test_detects_sources_and_skips_build_and_dependency_dirs(self, tmp_path): + _write(tmp_path / "internal" / "store" / "store.go", "package store\n") + _write(tmp_path / "cmd" / "app" / "main.go", "package main\n") + # Build / dependency / VCS dirs must be pruned, not voted for. + _write(tmp_path / "target" / "junk.rs", "fn main() {}\n") + _write(tmp_path / "node_modules" / "dep.js", "module.exports = {}\n") + _write(tmp_path / ".git" / "hooks" / "pre-commit.py", "x = 1\n") + + found = scan_repo_source_files(tmp_path) + + assert sorted(found) == ["cmd/app/main.go", "internal/store/store.go"] + + def test_ignores_files_with_unknown_extensions(self, tmp_path): + _write(tmp_path / "README.md", "# docs\n") + _write(tmp_path / "demo-go-web-todo", "") # no extension + _write(tmp_path / "data.json", "{}\n") + + assert scan_repo_source_files(tmp_path) == [] + + def test_missing_path_returns_empty(self, tmp_path): + assert scan_repo_source_files(tmp_path / "does-not-exist") == [] + + +class TestResolveRepoBackend: + def test_infers_language_from_disk_without_metadata(self, tmp_path): + # The core regression: no feature_spec / rpg metadata, only Go + # sources on disk. Resolution must NOT default to python. + _write(tmp_path / "internal" / "store" / "store.go", "package store\n") + _write(tmp_path / "cmd" / "app" / "main.go", "package main\n") + + backend = resolve_repo_backend(tmp_path) + + assert backend.name == "go" + + @pytest.mark.parametrize( + ("relpath", "expected"), + [ + ("src/main.rs", "rust"), + ("src/index.js", "javascript"), + ("src/app.ts", "typescript"), + ("src/calc.c", "c"), + ("src/model.cpp", "cpp"), + ], + ) + def test_infers_each_supported_language(self, tmp_path, relpath, expected): + _write(tmp_path / relpath, "\n") + + assert resolve_repo_backend(tmp_path).name == expected + + def test_explicit_feature_spec_metadata_wins_over_disk(self, tmp_path): + # Disk says Go, but the encoder explicitly declared Rust. The + # authoritative metadata tier must win over the scan fallback. + _write(tmp_path / "cmd" / "app" / "main.go", "package main\n") + feature_spec = {"meta": {"primary_language": "rust"}} + + backend = resolve_repo_backend(tmp_path, feature_spec=feature_spec) + + assert backend.name == "rust" + + def test_explicit_rpg_metadata_wins_over_disk(self, tmp_path): + _write(tmp_path / "cmd" / "app" / "main.go", "package main\n") + rpg_obj = {"root": {"meta": {"language": "typescript"}}} + + backend = resolve_repo_backend(tmp_path, rpg_obj=rpg_obj) + + assert backend.name == "typescript" + + def test_empty_repo_defaults_to_python(self, tmp_path): + # Graceful default preserved for a genuinely empty / unknown repo. + assert resolve_repo_backend(tmp_path).name == "python" + + +class TestResolveTestBackendRepoPath: + """The test_runner wrapper is the path final_test / global_review use.""" + + def test_repo_path_infers_non_python_when_metadata_absent( + self, tmp_path, monkeypatch + ): + from code_gen import test_runner + + # Force the metadata tiers to miss (as they did at final_test time + # in the failing bench run) so only the on-disk scan can resolve. + monkeypatch.setattr( + test_runner, "FEATURE_SPEC_FILE", tmp_path / "absent_feature_spec.json" + ) + monkeypatch.setattr( + test_runner, "REPO_RPG_FILE", tmp_path / "absent_rpg.json" + ) + + repo = tmp_path / "repo" + _write(repo / "cmd" / "app" / "main.go", "package main\n") + + backend = test_runner.resolve_test_backend(repo_path=repo) + + assert backend.name == "go" + + def test_scoped_valid_files_still_take_precedence(self, tmp_path, monkeypatch): + from code_gen import test_runner + + monkeypatch.setattr( + test_runner, "FEATURE_SPEC_FILE", tmp_path / "absent_feature_spec.json" + ) + monkeypatch.setattr( + test_runner, "REPO_RPG_FILE", tmp_path / "absent_rpg.json" + ) + + backend = test_runner.resolve_test_backend( + valid_files=["src/app.ts", "src/store.ts"] + ) + + assert backend.name == "typescript" diff --git a/CoderMind/tests/test_rpg_builder.py b/CoderMind/tests/test_rpg_builder.py new file mode 100644 index 0000000..7327273 --- /dev/null +++ b/CoderMind/tests/test_rpg_builder.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +from rpg.builder import create_initial_rpg # noqa: E402 + + +def test_create_initial_rpg_preserves_target_language() -> None: + rpg = create_initial_rpg({ + "repository_name": "tasklite", + "repository_purpose": "Go CLI task tracker.", + "meta": {"primary_language": "Go", "target_languages": ["Go"]}, + "components": [], + }) + + assert rpg.repo_node is not None + assert rpg.repo_node.meta.language == "go" diff --git a/CoderMind/tests/test_smoke_multilang.py b/CoderMind/tests/test_smoke_multilang.py new file mode 100644 index 0000000..468f204 --- /dev/null +++ b/CoderMind/tests/test_smoke_multilang.py @@ -0,0 +1,99 @@ +"""Tests for the language-aware smoke-test entry probe. + +Covers the multilang `check_entry_point` path: the run command comes +from the backend, runs in a CLEAN subprocess (no PYTHONPATH bridging), +and the Python-only import/stub layers are skipped for other languages. +""" +from __future__ import annotations + +import sys +import textwrap +import unittest +from pathlib import Path +from tempfile import TemporaryDirectory + +_SCRIPTS = Path(__file__).resolve().parents[1] / "scripts" +if str(_SCRIPTS) not in sys.path: + sys.path.insert(0, str(_SCRIPTS)) + +import smoke_test # noqa: E402 + + +class TestEntryPointCleanEnv(unittest.TestCase): + def test_src_layout_without_bridge_fails(self): + # Reproduces the P3 bug: main.py imports a package under src/ but + # there is no pyproject/path bridge → ModuleNotFoundError in a + # clean env. The smoke entry probe must catch it. + with TemporaryDirectory() as d: + repo = Path(d) + (repo / "src" / "pkg").mkdir(parents=True) + (repo / "src" / "pkg" / "__init__.py").write_text("") + (repo / "src" / "pkg" / "app.py").write_text("def run():\n return 0\n") + (repo / "main.py").write_text( + textwrap.dedent( + """\ + import argparse + from pkg.app import run + + def main(): + argparse.ArgumentParser().parse_args() + return run() + + if __name__ == "__main__": + raise SystemExit(main()) + """ + ) + ) + result = smoke_test.SmokeResult() + layer = smoke_test.check_entry_point(repo, result) + self.assertFalse(layer.get("passed")) + self.assertTrue( + any(f.check == "help_fails" for f in result.findings), + [f.check for f in result.findings], + ) + + def test_src_layout_with_path_bridge_passes(self): + # Same layout, but main.py adds the sys.path bridge → --help works. + with TemporaryDirectory() as d: + repo = Path(d) + (repo / "src" / "pkg").mkdir(parents=True) + (repo / "src" / "pkg" / "__init__.py").write_text("") + (repo / "src" / "pkg" / "app.py").write_text("def run():\n return 0\n") + (repo / "main.py").write_text( + textwrap.dedent( + """\ + import sys, pathlib + sys.path.insert(0, str(pathlib.Path(__file__).parent / "src")) + import argparse + from pkg.app import run + + def main(): + argparse.ArgumentParser().parse_args() + return run() + + if __name__ == "__main__": + raise SystemExit(main()) + """ + ) + ) + result = smoke_test.SmokeResult() + layer = smoke_test.check_entry_point(repo, result) + self.assertTrue(layer.get("passed"), [f.message for f in result.findings]) + + +class TestPythonOnlyLayersSkipped(unittest.TestCase): + def test_non_python_skips_import_and_stub_layers(self): + # A Go-flagged repo must skip the ast-based import/stub layers. + with TemporaryDirectory() as d: + repo = Path(d) + (repo / ".cmind" / "data").mkdir(parents=True) + (repo / ".cmind" / "data" / "rpg.json").write_text( + '{"root": {"meta": {"language": "go"}}}' + ) + res = smoke_test.run_smoke_test(repo_path=repo) + self.assertTrue(res.layers["imports"].get("skipped")) + self.assertTrue(res.layers["stubs"].get("skipped")) + + +if __name__ == "__main__": + unittest.main() diff --git a/CoderMind/tests/test_zero_test_guard.py b/CoderMind/tests/test_zero_test_guard.py new file mode 100644 index 0000000..00a7d7b --- /dev/null +++ b/CoderMind/tests/test_zero_test_guard.py @@ -0,0 +1,167 @@ +"""Regression tests for the zero-test "no-op pass" guard. + +A verification gate that executed zero tests is not a pass — it is a +non-result. Before this guard every non-Python backend reported +``status = "passed"`` whenever the test command exited 0, so a no-op run +(e.g. ``go test ./...`` matching no packages, or a runner invoked before +the sources were in the tree) silently satisfied the final gate. These +tests lock in that an exit-0 run with no executed tests is reported as +``errored`` (non-success), while real passes and real failures are +unaffected, across every language backend. +""" +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +_SCRIPTS = Path(__file__).resolve().parents[1] / "scripts" +if str(_SCRIPTS) not in sys.path: + sys.path.insert(0, str(_SCRIPTS)) + +from decoder_lang import get_backend # noqa: E402 +from decoder_lang.test_result import ran_no_tests # noqa: E402 + + +class TestRanNoTestsHelper: + def test_nonzero_exit_is_never_a_no_op(self): + # A non-zero exit is already a failure; the no-op concept does not apply. + assert ran_no_tests(1, "") is False + assert ran_no_tests(2, "boom") is False + + def test_empty_output_exit0_is_no_op(self): + assert ran_no_tests(0, "") is True + assert ran_no_tests(0, " \n\t ") is True + + def test_marker_phrase_is_no_op(self): + assert ran_no_tests( + 0, "ctest: No tests were found!!!", + no_tests_markers=("No tests were found",), + ) is True + + def test_reliable_zero_count_is_no_op(self): + assert ran_no_tests(0, "some banner output", observed_tests=0) is True + + def test_positive_count_is_not_a_no_op(self): + assert ran_no_tests(0, "anything", observed_tests=5) is False + + def test_nonempty_unknown_output_is_not_a_no_op(self): + # Fail-safe: unrecognized but non-empty output (no count, no marker) + # must be treated as a real run, never a false failure. + assert ran_no_tests(0, "weird tool output") is False + + def test_empty_output_opt_out_for_compile_check(self): + # C / C++ fall back to a clean ``-fsyntax-only`` compile that emits + # no output; that is a legitimate pass, not a no-op. + assert ran_no_tests(0, "", empty_output_is_no_op=False) is False + + +class TestGoVerdict: + def setup_method(self): + self.backend = get_backend("go") + + def test_empty_output_exit0_is_errored(self): + # The exact bench failure: go test matched no packages → no-op. + result = self.backend.parse_test_output("", 0) + assert result.status == "errored" + + def test_real_pass_with_events(self): + raw = "=== RUN TestAdd\n--- PASS: TestAdd (0.00s)\nok \tpkg\t0.01s\n" + result = self.backend.parse_test_output(raw, 0) + assert result.status == "passed" + assert result.passed_count == 1 + + def test_nonempty_output_without_parsed_counts_still_passes(self): + # ``-json`` output the text regexes don't parse → 0 counts, but the + # non-empty stream proves a run happened: must not false-fail. + raw = '{"Action":"pass","Package":"pkg","Test":"TestAdd"}\n' + result = self.backend.parse_test_output(raw, 0) + assert result.status == "passed" + + def test_real_failure(self): + raw = "=== RUN TestAdd\n--- FAIL: TestAdd (0.00s)\nFAIL\tpkg\t0.01s\n" + result = self.backend.parse_test_output(raw, 1) + assert result.status == "failed" + + def test_test_command_requests_verbose_output(self): + # ``-v`` is what makes go emit the per-test lines parse_test_output + # counts; without it a real run reports passed_count 0 and looks like + # a no-op. Lock the flag into the command. + from decoder_lang.test_result import EnvHandle + + cmd = self.backend.test_command(EnvHandle(project_root=Path("/tmp/x"))) + assert "-v" in cmd + assert cmd[-1] == "./..." + + +class TestNodeBackendsVerdict: + @pytest.mark.parametrize("language", ["javascript", "typescript"]) + def test_empty_output_exit0_is_errored(self, language): + result = get_backend(language).parse_test_output("", 0) + assert result.status == "errored" + + @pytest.mark.parametrize("language", ["javascript", "typescript"]) + def test_real_pass_reports_counts(self, language): + raw = "# tests 74\n# pass 74\n# fail 0\n" + result = get_backend(language).parse_test_output(raw, 0) + assert result.status == "passed" + assert result.passed_count == 74 + + @pytest.mark.parametrize("language", ["javascript", "typescript"]) + def test_zero_tests_summary_is_errored(self, language): + result = get_backend(language).parse_test_output("# tests 0\n# pass 0\n", 0) + assert result.status == "errored" + + @pytest.mark.parametrize("language", ["javascript", "typescript"]) + def test_real_failure(self, language): + raw = "# tests 5\n# pass 4\n# fail 1\n" + result = get_backend(language).parse_test_output(raw, 1) + assert result.status == "failed" + assert result.failed_count == 1 + + +class TestRustVerdict: + def setup_method(self): + self.backend = get_backend("rust") + + def test_empty_output_exit0_is_errored(self): + assert self.backend.parse_test_output("", 0).status == "errored" + + def test_real_pass_sums_counts(self): + raw = ( + "test result: ok. 5 passed; 0 failed; 0 ignored\n" + "test result: ok. 3 passed; 0 failed; 1 ignored\n" + ) + result = self.backend.parse_test_output(raw, 0) + assert result.status == "passed" + assert result.passed_count == 8 + + def test_zero_tests_result_is_errored(self): + raw = "test result: ok. 0 passed; 0 failed; 0 ignored\n" + assert self.backend.parse_test_output(raw, 0).status == "errored" + + +class TestCompiledBackendsVerdict: + """C / C++ fall back to a compile check, so empty output is a real pass.""" + + @pytest.mark.parametrize("language", ["c", "cpp"]) + def test_empty_output_is_pass_not_no_op(self, language): + # A clean ``-fsyntax-only`` compile emits nothing and exits 0. + result = get_backend(language).parse_test_output("", 0) + assert result.status == "passed" + + @pytest.mark.parametrize("language", ["c", "cpp"]) + def test_ctest_no_tests_marker_is_errored(self, language): + raw = "Test project /tmp/build\nNo tests were found!!!\n" + assert get_backend(language).parse_test_output(raw, 0).status == "errored" + + @pytest.mark.parametrize("language", ["c", "cpp"]) + def test_ctest_real_pass(self, language): + raw = "100% tests passed, 0 tests failed out of 19\n" + assert get_backend(language).parse_test_output(raw, 0).status == "passed" + + @pytest.mark.parametrize("language", ["c", "cpp"]) + def test_compile_failure_is_failed(self, language): + raw = "error: expected ';' before '}' token\n" + assert get_backend(language).parse_test_output(raw, 1).status == "failed" From 68b69c59fda0d7d9ecf4acb52ed023729da78e93 Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Mon, 15 Jun 2026 17:20:15 +0800 Subject: [PATCH 02/17] fix(plan): dedup interface source in planner prompt Non-Python interface units are LPCodeUnit instances, which have no count_lines method. The except branch in interface synthesis then stores the whole interface block as every unit's code, so interfaces.json's file_code embeds the entire file once per unit. On large modules that O(units x file_size) blow-up pushes the plan_tasks prompt past the 128 KB single-argument limit, crashing the planner with "Argument list too long" and producing an incomplete tasks.json. Collapse identical per-unit blocks before building the planner prompt. Keeping one copy reconstructs the original complete file (imports plus each unit once), so the planner sees valid source while distinct per-unit slices are preserved. Measured 55-67% reduction on a real Rust subtree. --- CoderMind/scripts/plan_tasks.py | 34 ++++++++++++++++- CoderMind/tests/test_plan_prompt_dedup.py | 46 +++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 CoderMind/tests/test_plan_prompt_dedup.py diff --git a/CoderMind/scripts/plan_tasks.py b/CoderMind/scripts/plan_tasks.py index 0c0e73e..d5d1511 100644 --- a/CoderMind/scripts/plan_tasks.py +++ b/CoderMind/scripts/plan_tasks.py @@ -496,6 +496,38 @@ def validate_tasks( return True, f"Planned {len(tasks)} tasks covering all {total_units} units across {len(file_unit_keys)} files.", tasks +def _dedup_interface_source(fdata: Dict[str, Any]) -> str: + """Return a file's interface source with per-unit duplication collapsed. + + ``interfaces.json`` stores ``file_code`` as ``"\n\n".join(unit codes)``. + For non-Python units the ``count_lines`` call in interface synthesis + raises (``LPCodeUnit`` has no such method), and the fallback stores the + whole interface block as *every* unit's code, so a file with N units + embeds the entire file N times — an O(units x file_size) blow-up that + pushes the planner prompt past the 128 KB single-argument limit on large + modules. + + Collapsing identical blocks reconstructs the original single file: because + each duplicate copy carries the module header and imports, keeping exactly + one copy yields a valid, complete file rather than a header-less + concatenation of bodies. Genuinely distinct per-unit slices are preserved + unchanged. + """ + unit_codes = list(fdata.get("units_to_code", {}).values()) + if not unit_codes: + return fdata.get("file_code", "") + seen: Set[str] = set() + unique: List[str] = [] + for code in unit_codes: + stripped = code.strip() + if stripped and stripped not in seen: + seen.add(stripped) + unique.append(stripped) + if not unique: + return fdata.get("file_code", "") + return "\n\n".join(unique) + + # ============================================================================ # Task Planner Agent (per subtree) # ============================================================================ @@ -551,7 +583,7 @@ def plan_subtree_tasks( files_context_parts.append( f"### File {i + 1}: {fp}\n" f"Units ({len(unit_keys)}): {json.dumps(unit_keys)}\n\n" - f"Source code (interfaces only):\n{fdata.get('file_code', '')}\n" + f"Source code (interfaces only):\n{_dedup_interface_source(fdata)}\n" ) files_context = "\n---\n".join(files_context_parts) diff --git a/CoderMind/tests/test_plan_prompt_dedup.py b/CoderMind/tests/test_plan_prompt_dedup.py new file mode 100644 index 0000000..ff3ea38 --- /dev/null +++ b/CoderMind/tests/test_plan_prompt_dedup.py @@ -0,0 +1,46 @@ +"""Tests for plan_tasks interface-source deduplication. + +The interface synthesis stores the whole-file text as every unit's code, so +``file_code`` (built as ``"\n\n".join(unit codes)``) repeats the entire file +once per unit. On large modules that O(units x file_size) blow-up pushes the +planner prompt past the 128 KB single-argument limit and crashes ``plan_tasks``. +``_dedup_interface_source`` collapses the duplication while preserving genuinely +distinct per-unit slices. +""" +from __future__ import annotations + +import sys +from pathlib import Path + +_SCRIPTS = Path(__file__).resolve().parents[1] / "scripts" +if str(_SCRIPTS) not in sys.path: + sys.path.insert(0, str(_SCRIPTS)) + +from plan_tasks import _dedup_interface_source # noqa: E402 + + +def test_collapses_whole_file_repeated_per_unit(): + whole = "//! mod doc\nuse std::path::Path;\n\nstruct A {}\nfn new() {}\nfn run() {}\n" + fdata = { + "units_to_code": {"struct A": whole, "fn new": whole, "fn run": whole}, + "file_code": "\n\n".join([whole, whole, whole]), + } + out = _dedup_interface_source(fdata) + assert out.count("//! mod doc") == 1 + assert out.count("use std::path::Path;") == 1 + assert len(out) < len(fdata["file_code"]) + + +def test_preserves_distinct_unit_slices(): + fdata = { + "units_to_code": {"a": "fn a() {}", "b": "fn b() {}", "c": "fn c() {}"}, + "file_code": "fn a() {}\n\nfn b() {}\n\nfn c() {}", + } + out = _dedup_interface_source(fdata) + for symbol in ("fn a()", "fn b()", "fn c()"): + assert symbol in out + + +def test_empty_units_falls_back_to_file_code(): + fdata = {"units_to_code": {}, "file_code": "raw source"} + assert _dedup_interface_source(fdata) == "raw source" From c1d9c2d536372263b2489cedf082050dbd866b38 Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Mon, 15 Jun 2026 19:17:19 +0800 Subject: [PATCH 03/17] fix(lang): classify header-heavy C++ repos as C++ not C MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dominant_language votes one ballot per file and detect_language maps every .h to C (the only config owning .h). A C++ repo that uses .h headers — googletest has 2018 .h vs 1062 .cc — therefore gets more C votes than C++ and is misclassified as C, which fails the encoder's dominant_language expectation and poisons every downstream language decision (backend, test_command, entry_point). Fold C votes into C++ whenever both appear: a pure C repo never carries .cc/.cpp/.hpp sources, so the presence of any C++-only extension means the repo is C++ and its .h files are C++ headers. Pure C repos (only .c/.h) and C mixed with unrelated languages are unaffected. --- CoderMind/scripts/lang_parser/registry.py | 10 +++++++++ CoderMind/tests/test_lang_parser_registry.py | 23 ++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/CoderMind/scripts/lang_parser/registry.py b/CoderMind/scripts/lang_parser/registry.py index 93e8be8..a3e5092 100644 --- a/CoderMind/scripts/lang_parser/registry.py +++ b/CoderMind/scripts/lang_parser/registry.py @@ -139,6 +139,14 @@ def dominant_language(paths) -> str | None: skipped, not voted for ``None``. ``None`` is returned only when *every* path is unknown (empty or assets-only input). + A ``.h`` header is detected as C (it is the only config owning ``.h``), + but C++ projects routinely use ``.h`` for headers. When a repository + contains any C++-only extension (``.cpp``/``.cc``/``.cxx``/``.hpp``/ + ``.hh``/``.hxx``) alongside ``.h`` files, the repo is C++ and those + ``.h`` votes belong to C++; a pure C repo never carries C++ sources. + So C votes fold into C++ whenever both appear, preventing header-heavy + C++ repos (e.g. googletest) from being misclassified as C. + Tie-breaking is deterministic on CPython (insertion order) but callers that care about precise behaviour in mixed-language repos should pass a curated ``language_map`` to the consumer instead. @@ -150,4 +158,6 @@ def dominant_language(paths) -> str | None: counts[lang] = counts.get(lang, 0) + 1 if not counts: return None + if "cpp" in counts and "c" in counts: + counts["cpp"] += counts.pop("c") return max(counts.items(), key=lambda kv: kv[1])[0] diff --git a/CoderMind/tests/test_lang_parser_registry.py b/CoderMind/tests/test_lang_parser_registry.py index a95f827..f4cc118 100644 --- a/CoderMind/tests/test_lang_parser_registry.py +++ b/CoderMind/tests/test_lang_parser_registry.py @@ -21,6 +21,7 @@ LPFileResult, NotSupported, detect_language, + dominant_language, get_config, get_config_for_path, get_parser, @@ -93,6 +94,28 @@ def test_detects_supported_paths(self): assert detect_language("src/main.rs") == "rust" assert detect_language("crates/foo/src/lib.rs") == "rust" + def test_dominant_language_basic_majority(self): + assert dominant_language(["a.py", "b.py", "c.go"]) == "python" + assert dominant_language(["a.go"] * 3 + ["b.py"]) == "go" + assert dominant_language([]) is None + assert dominant_language(["x.png", "y.md"]) is None + + def test_dominant_language_cpp_with_c_headers(self): + # A C++ repo that uses .h headers (e.g. googletest): .h detects as C, + # but the C++-only extensions mean the repo is C++, so C votes fold + # into C++ rather than letting header count win. + paths = ["h%d.h" % i for i in range(2018)] + ["s%d.cc" % i for i in range(1062)] + assert dominant_language(paths) == "cpp" + # Even a single C++ source flips a header-only-looking repo to C++. + assert dominant_language(["a.h", "b.h", "c.h", "d.cpp"]) == "cpp" + + def test_dominant_language_pure_c_unaffected(self): + # No C++ extension present → stays C (no regression). + assert dominant_language(["a.c", "b.h", "c.h"]) == "c" + assert dominant_language(["main.c"]) == "c" + # C alongside an unrelated language must not fold into C++. + assert dominant_language(["a.c"] * 5 + ["b.go"] * 2) == "c" + def test_unsupported_paths_are_not_supported_source(self): unsupported = [ "README.md", From 9bfc2aec7e50e38812d2c9014835fa1e7c08e589 Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Mon, 15 Jun 2026 19:41:30 +0800 Subject: [PATCH 04/17] fix(interfaces): dedup file_code at the serialization source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The planner-side dedup (1f22944) only repaired the prompt; interfaces.json itself still stored file_code as the whole file repeated once per unit, and context_collector writes that file_code straight to disk as the code-gen seed source — so generated repos were seeded with N duplicate definitions. Add a shared common/code_dedup helper and apply it at every point that rebuilds file_code from per-unit code: InterfacesStore.to_interfaces_json (serialization) and interface_review prune (regeneration). The planner helper now reuses it as a consumer-side safety net for older artifacts. Keeping one copy reconstructs the original single file (imports plus each unit once); units_to_code is left untouched so per-unit stubs stay valid. --- CoderMind/scripts/common/code_dedup.py | 43 +++++++++ .../scripts/func_design/interface_review.py | 14 +-- .../scripts/func_design/interfaces_store.py | 3 +- CoderMind/scripts/plan_tasks.py | 19 ++-- .../tests/test_interface_source_dedup.py | 90 +++++++++++++++++++ 5 files changed, 147 insertions(+), 22 deletions(-) create mode 100644 CoderMind/scripts/common/code_dedup.py create mode 100644 CoderMind/tests/test_interface_source_dedup.py diff --git a/CoderMind/scripts/common/code_dedup.py b/CoderMind/scripts/common/code_dedup.py new file mode 100644 index 0000000..7e1907a --- /dev/null +++ b/CoderMind/scripts/common/code_dedup.py @@ -0,0 +1,43 @@ +"""Shared helpers for collapsing duplicated interface source blocks. + +Interface synthesis stores each unit's code as the whole-file text for +non-Python units (``LPCodeUnit`` has no ``count_lines`` slicing), so a +file with N units repeats the entire file N times when those blocks are +joined into ``file_code``. These helpers collapse identical blocks so the +joined source reconstructs the original single file (imports plus each +unit once) instead of an O(units x file_size) blow-up. +""" +from __future__ import annotations + +from typing import Iterable, List + + +def dedup_code_blocks(codes: Iterable[str]) -> List[str]: + """Return ``codes`` with blank and duplicate (stripped) blocks removed. + + Order of first appearance is preserved. Comparison is on the + whitespace-stripped block so trivially different indentation does not + defeat dedup; the stripped form is returned so the join is clean. + """ + seen: set[str] = set() + unique: List[str] = [] + for code in codes: + stripped = code.strip() + if stripped and stripped not in seen: + seen.add(stripped) + unique.append(stripped) + return unique + + +def dedup_file_code(unit_codes: Iterable[str], fallback: str = "") -> str: + """Build ``file_code`` from per-unit code blocks with duplication removed. + + ``unit_codes`` are the values of ``units_to_code``. When every block is + an identical whole-file copy, the result is that single file; when + blocks are genuinely distinct per-unit slices they are all kept. Falls + back to ``fallback`` when no non-empty block survives. + """ + unique = dedup_code_blocks(unit_codes) + if not unique: + return fallback + return "\n\n".join(unique) diff --git a/CoderMind/scripts/func_design/interface_review.py b/CoderMind/scripts/func_design/interface_review.py index db9c7f8..0be8637 100644 --- a/CoderMind/scripts/func_design/interface_review.py +++ b/CoderMind/scripts/func_design/interface_review.py @@ -22,6 +22,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) from common import LLMClient +from common.code_dedup import dedup_file_code # AST inspection routes through the Python backend's # ``find_main_block_lineno`` helper so entry-point splicing shares the @@ -1716,13 +1717,12 @@ def prune_orphan_interfaces( # All units pruned → remove the entire file entry files_to_remove.append(file_path) else: - # Regenerate file_code from surviving units - code_parts = [] - for uname in units: - code = units_to_code.get(uname, "") - if code: - code_parts.append(code) - file_data["file_code"] = "\n\n".join(code_parts) + # Regenerate file_code from surviving units, collapsing the + # whole-file-per-unit duplication so the rebuilt source is a + # single clean file rather than N copies. + file_data["file_code"] = dedup_file_code( + units_to_code.get(uname, "") for uname in units + ) for fp in files_to_remove: del file_interfaces[fp] diff --git a/CoderMind/scripts/func_design/interfaces_store.py b/CoderMind/scripts/func_design/interfaces_store.py index 2c4dd3e..5d8b3a2 100644 --- a/CoderMind/scripts/func_design/interfaces_store.py +++ b/CoderMind/scripts/func_design/interfaces_store.py @@ -17,6 +17,7 @@ from pathlib import Path from typing import Dict, List, Optional, Set, Any, Union, Tuple +from common.code_dedup import dedup_file_code from decoder_lang.unit_kind import classify_unit_kind logger = logging.getLogger(__name__) @@ -1208,7 +1209,7 @@ def to_interfaces_json(self) -> Dict[str, Any]: "units": [u.name for u in units], "units_to_features": {u.name: u.features for u in units}, "units_to_code": {u.name: u.code for u in units}, - "file_code": "\n\n".join(u.code for u in units), + "file_code": dedup_file_code(u.code for u in units), } # Preserve handler-added tag for downstream diagnostics # and so a subsequent ``from_legacy_format`` round-trip diff --git a/CoderMind/scripts/plan_tasks.py b/CoderMind/scripts/plan_tasks.py index d5d1511..cf8f2c2 100644 --- a/CoderMind/scripts/plan_tasks.py +++ b/CoderMind/scripts/plan_tasks.py @@ -22,6 +22,7 @@ from common.trajectory import Trajectory, load_or_create_trajectory from common import LLMClient +from common.code_dedup import dedup_file_code from common.language_meta import extract_language_metadata, metadata_with_languages from decoder_lang import FileDependencyEdge, ProjectTaskContext, get_backend, infer_language_from_path from rpg import uuid8 @@ -511,21 +512,11 @@ def _dedup_interface_source(fdata: Dict[str, Any]) -> str: each duplicate copy carries the module header and imports, keeping exactly one copy yields a valid, complete file rather than a header-less concatenation of bodies. Genuinely distinct per-unit slices are preserved - unchanged. + unchanged. This is a consumer-side safety net; freshly serialized + ``interfaces.json`` is already deduplicated at the source. """ - unit_codes = list(fdata.get("units_to_code", {}).values()) - if not unit_codes: - return fdata.get("file_code", "") - seen: Set[str] = set() - unique: List[str] = [] - for code in unit_codes: - stripped = code.strip() - if stripped and stripped not in seen: - seen.add(stripped) - unique.append(stripped) - if not unique: - return fdata.get("file_code", "") - return "\n\n".join(unique) + unit_codes = fdata.get("units_to_code", {}).values() + return dedup_file_code(unit_codes, fallback=fdata.get("file_code", "")) # ============================================================================ diff --git a/CoderMind/tests/test_interface_source_dedup.py b/CoderMind/tests/test_interface_source_dedup.py new file mode 100644 index 0000000..2a8579e --- /dev/null +++ b/CoderMind/tests/test_interface_source_dedup.py @@ -0,0 +1,90 @@ +"""Tests for shared interface-source dedup and its source-level wiring. + +``interfaces.json`` stores ``file_code`` as the join of every unit's code. +Non-Python interface synthesis stores the whole-file text as each unit's +code, so the join repeats the file once per unit. ``code_dedup`` collapses +that duplication; ``InterfacesStore.to_interfaces_json`` applies it at the +source so the serialized artifact (and the code-gen seed file written from +it) is a clean single file. +""" +from __future__ import annotations + +import sys +from pathlib import Path + +_SCRIPTS = Path(__file__).resolve().parents[1] / "scripts" +if str(_SCRIPTS) not in sys.path: + sys.path.insert(0, str(_SCRIPTS)) + +from common.code_dedup import dedup_code_blocks, dedup_file_code # noqa: E402 +from func_design.interfaces_store import InterfaceUnit, InterfacesStore # noqa: E402 + + +_WHOLE = "//! mod doc\nuse std::path::Path;\n\nstruct A {}\nfn new() {}\nfn run() {}\n" + + +class TestDedupHelpers: + def test_collapses_repeated_whole_file(self): + assert dedup_file_code([_WHOLE, _WHOLE, _WHOLE]) == _WHOLE.strip() + + def test_preserves_distinct_slices(self): + out = dedup_file_code(["fn a() {}", "fn b() {}", "fn c() {}"]) + assert out == "fn a() {}\n\nfn b() {}\n\nfn c() {}" + + def test_blank_and_empty(self): + assert dedup_file_code(["", " ", "\n"]) == "" + assert dedup_file_code([], fallback="FB") == "FB" + assert dedup_file_code(["", ""], fallback="FB") == "FB" + + def test_dedup_code_blocks_order_preserved(self): + assert dedup_code_blocks(["b", "a", "b", "c", "a"]) == ["b", "a", "c"] + + +class TestSerializationDedup: + def test_to_interfaces_json_collapses_whole_file_per_unit(self): + # Non-Python synthesis stores the whole file as every unit's code. + store = InterfacesStore() + for name in ("struct A", "function new", "function run"): + store.add_unit( + InterfaceUnit( + name=name, + file_path="src/a.rs", + subtree_name="Core", + features=["f"], + code=_WHOLE, + ) + ) + store.subtree_order = ["Core"] + + data = store.to_interfaces_json() + fc = data["subtrees"]["Core"]["interfaces"]["src/a.rs"]["file_code"] + + # file_code is the single file, not three concatenated copies. + assert fc.count("//! mod doc") == 1 + assert fc.count("use std::path::Path;") == 1 + # units_to_code is untouched (still per-unit entries, valid as stubs). + utc = data["subtrees"]["Core"]["interfaces"]["src/a.rs"]["units_to_code"] + assert set(utc) == {"struct A", "function new", "function run"} + + def test_to_interfaces_json_keeps_distinct_unit_slices(self): + store = InterfacesStore() + slices = { + "function a": "fn a() {}", + "function b": "fn b() {}", + } + for name, code in slices.items(): + store.add_unit( + InterfaceUnit( + name=name, + file_path="src/b.rs", + subtree_name="Core", + features=["f"], + code=code, + ) + ) + store.subtree_order = ["Core"] + + data = store.to_interfaces_json() + fc = data["subtrees"]["Core"]["interfaces"]["src/b.rs"]["file_code"] + assert "fn a() {}" in fc + assert "fn b() {}" in fc From 4ba22f53738dc933e65edc8e33ad8b870eb14339 Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Thu, 18 Jun 2026 14:34:29 +0800 Subject: [PATCH 05/17] fix(parser): Prevent regex backtracking in string stripping Exclude backslashes from the normal string-literal branch so escaped characters have one matching path. This prevents zod-like commented regex literals from hanging TypeScript encoding. --- .../lang_parser/extractors/fallback.py | 5 ++- .../tests/test_lang_parser_typescript.py | 32 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/CoderMind/scripts/lang_parser/extractors/fallback.py b/CoderMind/scripts/lang_parser/extractors/fallback.py index 248698d..f4bd20b 100644 --- a/CoderMind/scripts/lang_parser/extractors/fallback.py +++ b/CoderMind/scripts/lang_parser/extractors/fallback.py @@ -47,7 +47,10 @@ def line_end_for_statement(lines: list[str], start_index: int) -> int: def strip_string_literals(line: str) -> str: - return re.sub(r"(['\"`])(?:\\.|(?!\1).)*\1", "", line) + # Exclude backslashes from the normal-character branch so escapes have a + # single matching path. Otherwise, unterminated quoted text with many + # escapes can trigger catastrophic backtracking. + return re.sub(r"(['\"`])(?:\\.|(?!\1)[^\\])*\1", "", line) def delimiter_syntax_error(source: str) -> str | None: diff --git a/CoderMind/tests/test_lang_parser_typescript.py b/CoderMind/tests/test_lang_parser_typescript.py index 7172dda..320ed72 100644 --- a/CoderMind/tests/test_lang_parser_typescript.py +++ b/CoderMind/tests/test_lang_parser_typescript.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 """Tests for the TypeScript language parser.""" +import multiprocessing as mp import os import sys import textwrap @@ -12,6 +13,12 @@ from lang_parser import parse_file, validate_syntax +def _strip_string_literals_worker(line, result_queue): + from lang_parser.extractors.fallback import strip_string_literals + + result_queue.put(strip_string_literals(line)) + + TS_SOURCE = textwrap.dedent( """\ import { User } from "./models"; @@ -140,3 +147,28 @@ def test_invalid_source_returns_syntax_error_without_crashing(self): valid, error = validate_syntax("bad.ts", "export function broken(\n") assert valid is False assert error is not None + + def test_comment_with_unterminated_quote_and_many_escapes_does_not_hang(self): + zod_like_line = ( + "// const emailRegex = /^([!#\\$%&'" + + ("\\d" * 20) + + "_`{|}~]/" + ) + result_queue = mp.Queue() + process = mp.Process( + target=_strip_string_literals_worker, + args=(zod_like_line, result_queue), + ) + + process.start() + process.join(3) + + if process.is_alive(): + process.terminate() + process.join() + raise AssertionError( + "strip_string_literals hung on zod-like escaped regex comment" + ) + + assert process.exitcode == 0 + assert result_queue.get_nowait() == zod_like_line From 1457a20c0bf13ac353cd257bc15daa1fd3581e3c Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Thu, 18 Jun 2026 21:08:09 +0800 Subject: [PATCH 06/17] fix(interfaces): converge Python review for split files and method calls Allow Python add_interface fixes to materialize a new interface file when the feature root maps to an existing subtree. Add deterministic same-file Python invocation edges for self calls, private helper fan-out, and constructor-composed class dependencies so review orphan checks can converge. --- .../scripts/func_design/interface_agent.py | 90 +++++++++++++++++++ .../scripts/func_design/interface_review.py | 45 ++++++++-- CoderMind/tests/test_interface_coverage.py | 54 +++++++++++ CoderMind/tests/test_plan_language_support.py | 45 ++++++++++ 4 files changed, 225 insertions(+), 9 deletions(-) diff --git a/CoderMind/scripts/func_design/interface_agent.py b/CoderMind/scripts/func_design/interface_agent.py index 5ac77bb..039d947 100644 --- a/CoderMind/scripts/func_design/interface_agent.py +++ b/CoderMind/scripts/func_design/interface_agent.py @@ -302,8 +302,58 @@ def analyze_code_dependencies( # Type references from annotations — Python-specific rich # extraction. Other languages cover this via LLM ``uses_types``. if self.backend.name == "python": + self._analyze_python_invocations(code, file_path) self._analyze_python_type_references(code, file_path, base_class_files) + def _analyze_python_invocations(self, code: str, file_path: str) -> None: + """Add same-file Python invocation edges from function bodies.""" + units = self.backend.list_code_units(code, file_path) + local_callables: Dict[str, List[str]] = defaultdict(list) + caller_nodes: List[Tuple[str, ast.AST, Optional[str]]] = [] + + for unit in units: + if unit.unit_type not in ("function", "method", "class"): + continue + if unit.unit_type == "method": + prefix = "method" + elif unit.unit_type == "class": + prefix = "class" + else: + prefix = "function" + unit_name = f"{prefix} {unit.name}" + local_callables[unit.name].append(unit_name) + node = (unit.extra or {}).get("ast_node") + if node is not None and unit.unit_type in ("function", "method"): + owner_class = None + if unit.unit_type == "method" and unit.name == "__init__": + parent = getattr(unit, "parent", None) + if parent: + owner_class = f"class {parent}" + caller_nodes.append((unit_name, node, owner_class)) + + local_calls: Dict[str, Set[str]] = defaultdict(set) + for caller, node, owner_class in caller_nodes: + for child in ast.walk(node): + if not isinstance(child, ast.Call): + continue + callee_name = _python_call_name(child.func) + if not callee_name: + continue + candidates = local_callables.get(callee_name, []) + if len(candidates) != 1: + continue + callee = candidates[0] + local_calls[caller].add(callee) + if owner_class and callee.startswith("class "): + local_calls[owner_class].add(callee) + + for caller, callees in local_calls.items(): + for callee in callees: + self.add_invocation(caller, callee, file_path, file_path) + if _is_private_python_unit(callee): + for target in _public_targets_reached_via_private(callee, local_calls): + self.add_invocation(caller, target, file_path, file_path) + def _analyze_python_type_references( self, code: str, @@ -523,6 +573,46 @@ def _extract_name_from_node(node: ast.expr) -> Optional[str]: return None +def _python_call_name(node: ast.expr) -> Optional[str]: + """Return a local callee name for safe same-file call edges.""" + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Attribute) and _attribute_root_is_self(node): + return node.attr + return None + + +def _attribute_root_is_self(node: ast.Attribute) -> bool: + value = node.value + while isinstance(value, ast.Attribute): + value = value.value + return isinstance(value, ast.Name) and value.id == "self" + + +def _is_private_python_unit(unit_name: str) -> bool: + bare_name = unit_name.split(" ", 1)[-1] + return bare_name.startswith("_") + + +def _public_targets_reached_via_private( + start: str, + local_calls: Dict[str, Set[str]], +) -> Set[str]: + targets: Set[str] = set() + seen: Set[str] = set() + stack = list(local_calls.get(start, set())) + while stack: + unit_name = stack.pop() + if unit_name in seen: + continue + seen.add(unit_name) + if _is_private_python_unit(unit_name): + stack.extend(local_calls.get(unit_name, set())) + else: + targets.add(unit_name) + return targets + + def _extract_type_names(node: ast.expr) -> List[str]: """Extract all type names from a type annotation AST node.""" names = [] diff --git a/CoderMind/scripts/func_design/interface_review.py b/CoderMind/scripts/func_design/interface_review.py index 0be8637..a82b6b8 100644 --- a/CoderMind/scripts/func_design/interface_review.py +++ b/CoderMind/scripts/func_design/interface_review.py @@ -1278,7 +1278,8 @@ def _apply_add_interface( """Materialise an LLM-requested new interface unit into interfaces_data. Required fix fields: - - ``file_path``: must already exist as a key under some subtree + - ``file_path``: existing key under some subtree, or a new file + whose feature root names an existing subtree - ``unit_name``: prefixed with ``"function "`` or ``"class "`` - ``signature``: full Python signature (e.g. ``def foo() -> None:``) - ``docstring``: non-empty, 1-3 sentences @@ -1349,18 +1350,44 @@ def _apply_add_interface( subtrees = interfaces_data.get("subtrees") or interfaces_data.get("components") or {} target_subtree: Optional[str] = None file_entry: Optional[Dict[str, Any]] = None + file_container: Optional[Dict[str, Any]] = None for st_name, st_data in subtrees.items(): - file_container = st_data.get("interfaces") or st_data.get("files") or {} - if file_path in file_container: + container = st_data.get("interfaces") + if container is None: + container = st_data.get("files") + if container is not None and file_path in container: target_subtree = st_name - file_entry = file_container[file_path] + file_container = container + file_entry = container[file_path] break if file_entry is None: - return ( - False, - f"file_path '{file_path}' not found in any subtree's interfaces", - 0, - ) + feature_root = feature_path.split("/", 1)[0].strip() + target_data = subtrees.get(feature_root) if feature_root else None + if target_data is None: + return ( + False, + f"file_path '{file_path}' not found in any subtree's interfaces", + 0, + ) + + target_subtree = feature_root + file_container = target_data.get("interfaces") + if file_container is None: + file_container = target_data.get("files") + if file_container is None: + file_container = {} + target_data["interfaces"] = file_container + + file_entry = { + "units": [], + "units_to_features": {}, + "units_to_code": {}, + "file_code": "", + } + file_container[file_path] = file_entry + files_order = target_data.setdefault("files_order", []) + if isinstance(files_order, list) and file_path not in files_order: + files_order.append(file_path) # --- Idempotency check -------------------------------------------- existing_units = file_entry.setdefault("units", []) diff --git a/CoderMind/tests/test_interface_coverage.py b/CoderMind/tests/test_interface_coverage.py index 9639663..adca079 100644 --- a/CoderMind/tests/test_interface_coverage.py +++ b/CoderMind/tests/test_interface_coverage.py @@ -491,3 +491,57 @@ def test_apply_fixes_applies_add_interface_for_python() -> None: assert stats["applied_fixes"] == 1 cli = interfaces_data["subtrees"]["Core"]["interfaces"]["src/cli.py"] assert "function run" in cli["units"] + + +def test_apply_fixes_can_create_python_interface_file_in_feature_subtree() -> None: + reviewer = _make_reviewer("python") + registry = GlobalInterfaceRegistry(backend=get_backend_for("python")) + interfaces_data = { + "subtrees": { + "Todo Display": { + "files_order": ["src/todo_web_app/views/todo_list.py"], + "interfaces": { + "src/todo_web_app/views/todo_list.py": { + "units": ["function render_todo_items"], + "units_to_features": { + "function render_todo_items": [ + "Todo Display/list rendering/items/render all items" + ] + }, + "units_to_code": {}, + "file_code": "", + } + }, + } + } + } + feature_path = "Todo Display/list rendering/page/render complete page" + + stats = reviewer._apply_fixes( + fixes=[{ + "action": "add_interface", + "file_path": "src/todo_web_app/views/render.py", + "unit_name": "function render_todo_page", + "signature": "def render_todo_page(todos: list[dict]) -> str:", + "docstring": "Render the complete todo page.", + "feature_path": feature_path, + "incoming_calls_from": ["list_todos"], + }], + interfaces_data=interfaces_data, + enhanced_data_flow={"invocation_edges": []}, + global_registry=registry, + skeleton_features={feature_path}, + rpg_features={feature_path}, + ) + + assert stats["applied_fixes"] == 1 + files = interfaces_data["subtrees"]["Todo Display"]["interfaces"] + render_file = files["src/todo_web_app/views/render.py"] + assert render_file["units"] == ["function render_todo_page"] + assert render_file["units_to_features"]["function render_todo_page"] == [ + feature_path + ] + assert "def render_todo_page(todos: list[dict]) -> str:" in render_file["file_code"] + assert "src/todo_web_app/views/render.py" in interfaces_data["subtrees"][ + "Todo Display" + ]["files_order"] diff --git a/CoderMind/tests/test_plan_language_support.py b/CoderMind/tests/test_plan_language_support.py index a15ca39..53c64c7 100644 --- a/CoderMind/tests/test_plan_language_support.py +++ b/CoderMind/tests/test_plan_language_support.py @@ -65,6 +65,51 @@ def test_dependency_collector_python_inheritance_still_works() -> None: ), collector.inheritance_edges +def test_dependency_collector_python_same_file_method_calls() -> None: + collector = DependencyCollector( + known_base_classes=set(), + known_types=set(), + target_language="python", + ) + code = """ +class RecordFactory: + def create_record(self, title: str) -> dict: + return {"title": title} + + +class Planner: + def __init__(self, record_factory: RecordFactory) -> None: + self._record_factory = record_factory or RecordFactory() + + def resolve_action(self, action: str) -> str: + return action.strip() + + def _require_action(self, action: str) -> str: + return self.resolve_action(action) + + def plan_add(self, title: str) -> dict: + action = self._require_action("add") + record = self._record_factory.create_record(title) + return {"action": action, "record": record} +""" + + collector.analyze_code_dependencies( + code=code, + file_path="src/domain/todo.py", + base_class_files={}, + ) + + assert { + (edge["caller"], edge["callee"]) + for edge in collector.invocation_edges + } >= { + ("method __init__", "class RecordFactory"), + ("class Planner", "class RecordFactory"), + ("method plan_add", "method resolve_action"), + ("method plan_add", "method create_record"), + } + + def test_base_class_validation_accepts_go_source() -> None: backend = get_backend("go") model = BaseClassOutput.model_validate({ From dfdcf306716f62932daf094cb26a0e58ed2cc2bd Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Sat, 20 Jun 2026 01:36:03 +0800 Subject: [PATCH 07/17] fix(decoder): run CTest from build dir and reject partial interfaces Run C++ ctest with --test-dir build so post-verification sees the CMake-generated test registry. Also make plan_tasks reject interfaces.json files that do not cover skeleton.json features, and update the plan command template so warning states are treated as incomplete. --- .../decoder_lang/tests/test_c_cpp_backend.py | 20 ++++++ CoderMind/scripts/plan_tasks.py | 61 ++++++++++++++++ CoderMind/templates/commands/plan.md | 8 ++- CoderMind/tests/test_plan_prompt_dedup.py | 71 ++++++++++++++++++- 4 files changed, 158 insertions(+), 2 deletions(-) diff --git a/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py b/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py index d5ab956..8071259 100644 --- a/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py +++ b/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py @@ -14,6 +14,7 @@ from decoder_lang import ( # noqa: E402 CBackend, CppBackend, + EnvHandle, ProjectTaskContext, ToolchainUnavailable, get_backend, @@ -161,6 +162,25 @@ def test_prompt_hints_and_project_tasks(self) -> None: self.assertIn("src/main.cpp", templates.main_entry) self.assertIn("C++ CLI", templates.readme) + def test_cmake_test_command_runs_ctest_in_build_dir(self) -> None: + with TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + (root / "CMakeLists.txt").write_text("cmake_minimum_required(VERSION 3.16)\n") + env = EnvHandle( + project_root=root, + extra={"ctest": "/usr/bin/ctest"}, + ) + + self.assertEqual( + self.backend.test_command(env), + [ + "/usr/bin/ctest", + "--test-dir", + str(root / "build"), + "--output-on-failure", + ], + ) + def test_missing_toolchain_raises(self) -> None: with TemporaryDirectory() as temp_dir: with patch("decoder_lang.cpp_backend.shutil.which", return_value=None): diff --git a/CoderMind/scripts/plan_tasks.py b/CoderMind/scripts/plan_tasks.py index cf8f2c2..2ecc350 100644 --- a/CoderMind/scripts/plan_tasks.py +++ b/CoderMind/scripts/plan_tasks.py @@ -31,6 +31,7 @@ from common.paths import ( DATA_FLOW_FILE, INTERFACES_FILE, + SKELETON_FILE, REPO_RPG_FILE as RPG_FILE, REPO_INFO_FILE, TASKS_FILE as OUTPUT_FILE, @@ -1625,6 +1626,65 @@ def load_repo_info() -> tuple[str, str]: return repo_name, repo_info +def _collect_skeleton_features(skeleton: Dict[str, Any]) -> Set[str]: + features: Set[str] = set() + + def visit(node: Any) -> None: + if not isinstance(node, dict): + return + node_features = node.get("feature_paths") or [] + if isinstance(node_features, list): + features.update(str(feature) for feature in node_features if feature) + for child in node.get("children") or []: + visit(child) + + visit(skeleton.get("root")) + return features + + +def _collect_interface_features(interfaces: Dict[str, Any]) -> Set[str]: + features: Set[str] = set() + subtrees = interfaces.get("subtrees") or interfaces.get("components") or {} + if not isinstance(subtrees, dict): + return features + for subtree_data in subtrees.values(): + if not isinstance(subtree_data, dict): + continue + files = subtree_data.get("interfaces") or subtree_data.get("files") or {} + if not isinstance(files, dict): + continue + for file_data in files.values(): + if not isinstance(file_data, dict): + continue + units_to_features = file_data.get("units_to_features") or {} + if not isinstance(units_to_features, dict): + continue + for unit_features in units_to_features.values(): + if isinstance(unit_features, list): + features.update(str(feature) for feature in unit_features if feature) + return features + + +def _validate_interfaces_cover_skeleton_features( + interfaces: Dict[str, Any], + skeleton_path: Path = SKELETON_FILE, +) -> None: + if not skeleton_path.exists(): + return + with open(skeleton_path, "r", encoding="utf-8") as f: + skeleton = json.load(f) + missing = sorted( + _collect_skeleton_features(skeleton) - _collect_interface_features(interfaces) + ) + if missing: + preview = ", ".join(repr(feature) for feature in missing[:5]) + raise ValueError( + "interfaces.json is incomplete for skeleton.json: " + f"{len(missing)} feature(s) missing from interfaces.json: " + f"[{preview}]. Re-run design_interfaces before plan_tasks." + ) + + # ============================================================================ # Main Entry Point # ============================================================================ @@ -1691,6 +1751,7 @@ def main(): with open(args.interfaces, 'r', encoding='utf-8') as f: interfaces = json.load(f) + _validate_interfaces_cover_skeleton_features(interfaces) with open(args.data_flow, 'r', encoding='utf-8') as f: data_flow = json.load(f) diff --git a/CoderMind/templates/commands/plan.md b/CoderMind/templates/commands/plan.md index c52b7f8..1c383b6 100644 --- a/CoderMind/templates/commands/plan.md +++ b/CoderMind/templates/commands/plan.md @@ -36,11 +36,17 @@ cmind script plan.py --check-only --json Parse the JSON. The fields you need: * `total` — total number of stages (always 5) -* `done` — count of stages whose `type` is `update` or `warning` +* `done` — count of stages whose `type` is `update` * `next` — name of the first not-done stage (or `null` if all done) * `stages[*].name`, `stages[*].type` (`update` / `warning` / `init` / `error`), `stages[*].done` +Treat `warning` as **not done**. A warning means the artifact exists but +violates a cross-stage contract (for example, `interfaces.json` does not +cover all `skeleton.json` features). Do not skip the stage, do not run a +later stage directly, and do not create downstream artifacts from a +warning-state input. + ### Step 2: One decision (the only prompt of this command) Choose **exactly one** case based on `done` vs `total`: diff --git a/CoderMind/tests/test_plan_prompt_dedup.py b/CoderMind/tests/test_plan_prompt_dedup.py index ff3ea38..ad6ddb7 100644 --- a/CoderMind/tests/test_plan_prompt_dedup.py +++ b/CoderMind/tests/test_plan_prompt_dedup.py @@ -16,7 +16,10 @@ if str(_SCRIPTS) not in sys.path: sys.path.insert(0, str(_SCRIPTS)) -from plan_tasks import _dedup_interface_source # noqa: E402 +from plan_tasks import ( # type: ignore[import-not-found] # noqa: E402 + _dedup_interface_source, + _validate_interfaces_cover_skeleton_features, +) def test_collapses_whole_file_repeated_per_unit(): @@ -44,3 +47,69 @@ def test_preserves_distinct_unit_slices(): def test_empty_units_falls_back_to_file_code(): fdata = {"units_to_code": {}, "file_code": "raw source"} assert _dedup_interface_source(fdata) == "raw source" + + +def test_rejects_partial_interfaces_against_skeleton(tmp_path): + skeleton_path = tmp_path / "skeleton.json" + skeleton_path.write_text( + """ + { + "root": { + "type": "directory", + "children": [ + {"type": "file", "path": "src/a.cpp", "feature_paths": ["Core/a"]}, + {"type": "file", "path": "src/b.cpp", "feature_paths": ["Core/b"]} + ] + } + } + """, + encoding="utf-8", + ) + interfaces = { + "subtrees": { + "Core": { + "interfaces": { + "src/a.cpp": { + "units_to_features": {"function a": ["Core/a"]}, + } + } + } + } + } + + try: + _validate_interfaces_cover_skeleton_features(interfaces, skeleton_path) + except ValueError as exc: + assert "Core/b" in str(exc) + else: + raise AssertionError("partial interfaces should be rejected") + + +def test_accepts_complete_interfaces_against_skeleton(tmp_path): + skeleton_path = tmp_path / "skeleton.json" + skeleton_path.write_text( + """ + { + "root": { + "type": "directory", + "children": [ + {"type": "file", "path": "src/a.cpp", "feature_paths": ["Core/a"]} + ] + } + } + """, + encoding="utf-8", + ) + interfaces = { + "subtrees": { + "Core": { + "interfaces": { + "src/a.cpp": { + "units_to_features": {"function a": ["Core/a"]}, + } + } + } + } + } + + _validate_interfaces_cover_skeleton_features(interfaces, skeleton_path) From 07f2cd42f7d1e573eec70a4e122aa888473efbf7 Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Sat, 20 Jun 2026 17:40:24 +0800 Subject: [PATCH 08/17] fix(decoder): harden C++ interface and syntax verification Keep interface generation progress in a .partial file until complete, add design_interfaces heartbeat output, exclude skipped directories from C/C++ syntax source discovery, and make C/C++ prompt test commands discover sources at run time with standalone translation-unit guidance. --- CoderMind/scripts/code_gen/batch_prompts.py | 41 ++++++++++++++++++- CoderMind/scripts/decoder_lang/c_backend.py | 8 +++- CoderMind/scripts/decoder_lang/cpp_backend.py | 6 ++- .../decoder_lang/tests/test_c_cpp_backend.py | 28 +++++++++++++ CoderMind/scripts/design_interfaces.py | 24 ++++++++++- .../scripts/func_design/interface_agent.py | 30 +++++++++++--- CoderMind/tests/test_code_gen_multilingual.py | 5 +++ CoderMind/tests/test_interface_coverage.py | 29 +++++++++++++ 8 files changed, 161 insertions(+), 10 deletions(-) diff --git a/CoderMind/scripts/code_gen/batch_prompts.py b/CoderMind/scripts/code_gen/batch_prompts.py index fc0282b..ac6d622 100644 --- a/CoderMind/scripts/code_gen/batch_prompts.py +++ b/CoderMind/scripts/code_gen/batch_prompts.py @@ -413,6 +413,35 @@ def _fallback_test_command(backend: LanguageBackend) -> List[str]: return list(_FALLBACK_TEST_COMMANDS.get(backend.name, [backend.prompt_hints().test_framework_name])) +def _dynamic_c_family_syntax_command( + backend: LanguageBackend, + command: List[str], +) -> str: + compiler = shlex.quote(str(command[0])) + include_flags: List[str] = [] + for index, part in enumerate(command): + if part == "-I" and index + 1 < len(command): + include_flags.extend(["-I", "$PWD"]) + standard = "-std=c++17" if backend.name == "cpp" else "-std=c99" + patterns = ( + r'\( -name "*.cpp" -o -name "*.cc" -o -name "*.cxx" \)' + if backend.name == "cpp" + else r'-name "*.c"' + ) + include_text = " ".join(shlex.quote(part) for part in include_flags) + return ( + "bash -lc " + + shlex.quote( + "mapfile -d '' sources < <(find . " + r"\( -path './.git' -o -path './.cmind' -o -path './build' " + r"-o -path './node_modules' -o -path './target' \) -prune " + f"-o -type f {patterns} -print0); " + f"if (( ${{#sources[@]}} == 0 )); then echo 'No {backend.prompt_hints().display_name} source files found' >&2; exit 1; fi; " + f"{compiler} {standard} {include_text} -Wall -Wextra -fsyntax-only \"${{sources[@]}}\"" + ) + ) + + def _build_backend_test_cmd( backend: LanguageBackend, repo_path: Path, @@ -425,7 +454,10 @@ def _build_backend_test_cmd( env = backend.detect_env(repo_path) or EnvHandle(project_root=repo_path.resolve()) try: - return _shell_join(backend.test_command(env)) + command = backend.test_command(env) + if backend.name in {"c", "cpp"} and "-fsyntax-only" in command: + return _dynamic_c_family_syntax_command(backend, command) + return _shell_join(command) except (ToolchainUnavailable, NotImplementedError, OSError): return _shell_join(_fallback_test_command(backend)) @@ -542,6 +574,13 @@ def _build_language_context(backend: LanguageBackend, test_command: str) -> str: f"- Run tests ONLY with `{test_command}` ({hints.test_framework_name}). Do NOT wrap, " "re-implement, or drive the test suite through pytest or any Python script.\n" ) + if backend.name in {"c", "cpp"}: + context += ( + "- C/C++ tests and examples must be valid standalone translation units. " + "If a test or example calls a helper implemented in another `.c`/`.cpp` file, " + "create or update a matching header and include that header; do NOT rely on " + "transitive `.cpp` inclusion or undeclared functions.\n" + ) else: context += ( "- Do NOT introduce Python-specific files, packages, or pytest conventions unless this is a Python project.\n" diff --git a/CoderMind/scripts/decoder_lang/c_backend.py b/CoderMind/scripts/decoder_lang/c_backend.py index ada7f9e..7064b3c 100644 --- a/CoderMind/scripts/decoder_lang/c_backend.py +++ b/CoderMind/scripts/decoder_lang/c_backend.py @@ -6,6 +6,8 @@ from pathlib import Path from typing import Any +from common.utils import path_has_skip_dir + from .backend import ToolchainUnavailable from .file_deps import FileDependencyEdge, resolved_c_family_edges from .prompt_hints import PromptHints @@ -171,7 +173,11 @@ def test_command(self, env: EnvHandle, selectors: list[str] | None = None) -> li cc = env.extra.get("cc") if env.extra else None if not cc: raise ToolchainUnavailable("C compiler is not available on PATH") - sources = sorted(str(path) for path in env.project_root.rglob("*.c")) + sources = sorted( + str(path) + for path in env.project_root.rglob("*.c") + if not path_has_skip_dir(path.relative_to(env.project_root).as_posix()) + ) return [ cc, "-std=c99", diff --git a/CoderMind/scripts/decoder_lang/cpp_backend.py b/CoderMind/scripts/decoder_lang/cpp_backend.py index 1767a2f..b369eea 100644 --- a/CoderMind/scripts/decoder_lang/cpp_backend.py +++ b/CoderMind/scripts/decoder_lang/cpp_backend.py @@ -6,6 +6,8 @@ from pathlib import Path from typing import Any +from common.utils import path_has_skip_dir + from .backend import ToolchainUnavailable from .file_deps import FileDependencyEdge, resolved_c_family_edges from .prompt_hints import PromptHints @@ -183,8 +185,10 @@ def test_command(self, env: EnvHandle, selectors: list[str] | None = None) -> li if not cxx: raise ToolchainUnavailable("C++ compiler is not available on PATH") sources = sorted( - str(path) for ext in ("*.cpp", "*.cc", "*.cxx") + str(path) + for ext in ("*.cpp", "*.cc", "*.cxx") for path in env.project_root.rglob(ext) + if not path_has_skip_dir(path.relative_to(env.project_root).as_posix()) ) return [ cxx, diff --git a/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py b/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py index 8071259..a157e84 100644 --- a/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py +++ b/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py @@ -89,6 +89,20 @@ def test_missing_toolchain_raises(self) -> None: with self.assertRaises(ToolchainUnavailable): self.backend.ensure_env(Path(temp_dir)) + def test_syntax_fallback_skips_git_refs(self) -> None: + with TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + (root / "src").mkdir() + (root / "src" / "main.c").write_text("int main(void) { return 0; }\n") + (root / ".git" / "refs" / "heads" / "batch").mkdir(parents=True) + (root / ".git" / "refs" / "heads" / "batch" / "main.c").write_text("not c") + env = EnvHandle(project_root=root, extra={"cc": "/usr/bin/cc"}) + + cmd = self.backend.test_command(env) + + self.assertIn(str(root / "src" / "main.c"), cmd) + self.assertNotIn(str(root / ".git" / "refs" / "heads" / "batch" / "main.c"), cmd) + class CppBackendTests(unittest.TestCase): """C++ backend registry and parser-backed behaviour.""" @@ -181,6 +195,20 @@ def test_cmake_test_command_runs_ctest_in_build_dir(self) -> None: ], ) + def test_syntax_fallback_skips_git_refs(self) -> None: + with TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + (root / "src").mkdir() + (root / "src" / "main.cpp").write_text("int main() { return 0; }\n") + (root / ".git" / "refs" / "heads" / "batch").mkdir(parents=True) + (root / ".git" / "refs" / "heads" / "batch" / "main.cpp").write_text("not cpp") + env = EnvHandle(project_root=root, extra={"cxx": "/usr/bin/c++"}) + + cmd = self.backend.test_command(env) + + self.assertIn(str(root / "src" / "main.cpp"), cmd) + self.assertNotIn(str(root / ".git" / "refs" / "heads" / "batch" / "main.cpp"), cmd) + def test_missing_toolchain_raises(self) -> None: with TemporaryDirectory() as temp_dir: with patch("decoder_lang.cpp_backend.shutil.which", return_value=None): diff --git a/CoderMind/scripts/design_interfaces.py b/CoderMind/scripts/design_interfaces.py index ec9f5f8..5e94227 100644 --- a/CoderMind/scripts/design_interfaces.py +++ b/CoderMind/scripts/design_interfaces.py @@ -21,6 +21,8 @@ import json import logging import argparse +import threading +import time from pathlib import Path from typing import Callable, Dict, Any, List, Optional, Set @@ -57,6 +59,21 @@ import re from common import get_project_background_context from common.language_meta import extract_language_metadata, metadata_with_languages + + +def _start_heartbeat(label: str, interval_sec: int = 120): + """Print periodic progress while a long-running design step is active.""" + stop = threading.Event() + started = time.monotonic() + + def run() -> None: + while not stop.wait(interval_sec): + elapsed = int(time.monotonic() - started) + print(f"[heartbeat] {label} still running ({elapsed}s elapsed)", flush=True) + + thread = threading.Thread(target=run, name=f"{label}-heartbeat", daemon=True) + thread.start() + return stop, thread from decoder_lang import get_backend from func_design.interface_review import review_orphan_units @@ -1672,7 +1689,12 @@ def main(): output_path=str(args.output) ) - result = designer.build(skeleton, data_flow, base_classes) + heartbeat_stop, heartbeat_thread = _start_heartbeat("design_interfaces") + try: + result = designer.build(skeleton, data_flow, base_classes) + finally: + heartbeat_stop.set() + heartbeat_thread.join(timeout=1) # Extract internal keys before JSON serialisation result.pop("_surviving_feature_paths", None) diff --git a/CoderMind/scripts/func_design/interface_agent.py b/CoderMind/scripts/func_design/interface_agent.py index 039d947..85c98e4 100644 --- a/CoderMind/scripts/func_design/interface_agent.py +++ b/CoderMind/scripts/func_design/interface_agent.py @@ -2365,14 +2365,16 @@ def design_all_interfaces( } implemented_subtrees[subtree_name] = subtree_implemented - # Save after each subtree + # Save resume data after each subtree without publishing a partial + # interfaces.json as the final artifact. self._save_interfaces( self._build_result( all_interfaces, subtree_order, implemented_subtrees, coverage_status, - ) + ), + partial=True, ) self._print_coverage_progress( coverage_status, @@ -2509,7 +2511,12 @@ def _record_file_coverage( "missing_features": missing_features, }) - def _save_interfaces(self, result: Dict[str, Any]) -> None: + def _partial_output_path(self) -> Optional[Path]: + if not self.output_path: + return None + return Path(f"{self.output_path}.partial") + + def _save_interfaces(self, result: Dict[str, Any], partial: bool = False) -> None: """Save current interfaces result to output_path (if configured). Strips internal keys (prefixed with '_') that contain non-serializable @@ -2518,7 +2525,9 @@ def _save_interfaces(self, result: Dict[str, Any]) -> None: if not self.output_path: return try: - output = Path(self.output_path) + output = self._partial_output_path() if partial else Path(self.output_path) + if output is None: + return output.parent.mkdir(parents=True, exist_ok=True) # Filter out non-serializable internal keys serializable = { @@ -2528,6 +2537,10 @@ def _save_interfaces(self, result: Dict[str, Any]) -> None: with open(output, "w", encoding="utf-8") as f: json.dump(serializable, f, indent=2, ensure_ascii=False) self.logger.info(f"[InterfaceOrchestrator] Saved interfaces to {output}") + if not partial: + partial_path = self._partial_output_path() + if partial_path and partial_path.exists(): + partial_path.unlink() except Exception as e: self.logger.warning(f"[InterfaceOrchestrator] Failed to save interfaces: {e}") @@ -2560,8 +2573,13 @@ def _load_existing_interfaces(self) -> Optional[Dict[str, Any]]: """Load an existing interfaces file for subtree-level resume.""" if not self.output_path: return None - path = Path(self.output_path) - if not path.exists(): + candidates = [] + partial_path = self._partial_output_path() + if partial_path is not None: + candidates.append(partial_path) + candidates.append(Path(self.output_path)) + path = next((candidate for candidate in candidates if candidate.exists()), None) + if path is None: return None try: with path.open("r", encoding="utf-8") as handle: diff --git a/CoderMind/tests/test_code_gen_multilingual.py b/CoderMind/tests/test_code_gen_multilingual.py index b8038b4..81dae85 100644 --- a/CoderMind/tests/test_code_gen_multilingual.py +++ b/CoderMind/tests/test_code_gen_multilingual.py @@ -94,10 +94,15 @@ def test_cpp_codegen_prompt_injects_cpp_context(monkeypatch, tmp_path: Path) -> assert "Language: C++" in prompt assert "Source extension: `.cpp`" in prompt assert "C++17" in prompt + assert "mapfile -d" in prompt + assert "sources < <(find ." in prompt + assert "No C++ source files found" in prompt # Non-Python projects get the strengthened prohibition, not the legacy line. assert "NOT Python" in prompt assert "Do NOT create ANY `.py` file" in prompt assert "conftest.py" in prompt + assert "standalone translation units" in prompt + assert "create or update a matching header" in prompt assert "python3 -m pytest" not in prompt diff --git a/CoderMind/tests/test_interface_coverage.py b/CoderMind/tests/test_interface_coverage.py index adca079..ef419b0 100644 --- a/CoderMind/tests/test_interface_coverage.py +++ b/CoderMind/tests/test_interface_coverage.py @@ -545,3 +545,32 @@ def test_apply_fixes_can_create_python_interface_file_in_feature_subtree() -> No assert "src/todo_web_app/views/render.py" in interfaces_data["subtrees"][ "Todo Display" ]["files_order"] + + +def test_interface_orchestrator_writes_partial_resume_file(tmp_path) -> None: + output_path = tmp_path / "interfaces.json" + orchestrator = InterfaceOrchestrator(output_path=str(output_path)) + partial_result = { + "subtrees": {"Core": {"interfaces": {}}}, + "subtree_order": ["Core", "UI"], + "implemented_subtrees": {"Core": []}, + "coverage": {"issues": [{"subtree": "UI"}]}, + "success": False, + } + final_result = { + "subtrees": {"Core": {"interfaces": {}}, "UI": {"interfaces": {}}}, + "subtree_order": ["Core", "UI"], + "implemented_subtrees": {"Core": [], "UI": []}, + "coverage": {"issues": []}, + "success": True, + } + + orchestrator._save_interfaces(partial_result, partial=True) + + assert not output_path.exists() + assert Path(f"{output_path}.partial").exists() + + orchestrator._save_interfaces(final_result) + + assert output_path.exists() + assert not Path(f"{output_path}.partial").exists() From 9724df847a8e7a16a63960307b86cb4583428240 Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Sat, 20 Jun 2026 19:56:56 +0800 Subject: [PATCH 09/17] fix(decoder): harden C++ interface and syntax verification Keep interface generation progress in a .partial file until complete, add design_interfaces heartbeat output, exclude skipped directories from C/C++ syntax source discovery, make C/C++ prompt test commands discover sources at run time, and document the syntax-only success summary fallback. --- CoderMind/scripts/code_gen/batch_prompts.py | 12 ++++++++++++ CoderMind/tests/test_code_gen_multilingual.py | 1 + 2 files changed, 13 insertions(+) diff --git a/CoderMind/scripts/code_gen/batch_prompts.py b/CoderMind/scripts/code_gen/batch_prompts.py index ac6d622..9b7c034 100644 --- a/CoderMind/scripts/code_gen/batch_prompts.py +++ b/CoderMind/scripts/code_gen/batch_prompts.py @@ -212,6 +212,7 @@ for example `5 passed in 0.42s`, `ok ./...`, or `test result: ok`. Copy it verbatim from the run you just performed; do NOT invent it. This lets the runner cross-check your claim against an independent re-run. +{summary_fallback_rule} ## ── Capabilities ───────────────────────────────────────── @@ -545,6 +546,16 @@ def _test_timeout_rule(backend: LanguageBackend) -> str: return "- Run long-lived servers, watchers, or interactive commands instead of the exact test command" +def _summary_fallback_rule(backend: LanguageBackend, test_command: str) -> str: + if backend.name in {"c", "cpp"} and "-fsyntax-only" in test_command: + return ( + "\nFor C/C++ syntax-only commands: if the exact command exits 0 " + "and prints no summary line, use exactly " + "`PYTEST_SUMMARY: syntax check passed`.\n" + ) + return "" + + def _build_language_context(backend: LanguageBackend, test_command: str) -> str: """Build the target-language prompt section.""" hints = backend.prompt_hints() @@ -925,6 +936,7 @@ def build_tdd_prompt( dependency_install_capability=_dependency_install_capability(backend, repo_path), dependency_management=_dependency_management_text(backend, repo_path), test_timeout_rule=_test_timeout_rule(backend), + summary_fallback_rule=_summary_fallback_rule(backend, pytest_cmd), import_convention=import_convention, language_context=_build_language_context(backend, pytest_cmd), dependency_context=dep_ctx_str, diff --git a/CoderMind/tests/test_code_gen_multilingual.py b/CoderMind/tests/test_code_gen_multilingual.py index 81dae85..b7bb3b6 100644 --- a/CoderMind/tests/test_code_gen_multilingual.py +++ b/CoderMind/tests/test_code_gen_multilingual.py @@ -97,6 +97,7 @@ def test_cpp_codegen_prompt_injects_cpp_context(monkeypatch, tmp_path: Path) -> assert "mapfile -d" in prompt assert "sources < <(find ." in prompt assert "No C++ source files found" in prompt + assert "PYTEST_SUMMARY: syntax check passed" in prompt # Non-Python projects get the strengthened prohibition, not the legacy line. assert "NOT Python" in prompt assert "Do NOT create ANY `.py` file" in prompt From aa88b4c3f05a7f40644ae5eaee6f9554d4578710 Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Sun, 21 Jun 2026 10:12:43 +0800 Subject: [PATCH 10/17] fix(decoder): build CMake targets before CTest verification Run cmake --build build during C/C++ test environment preparation so ctest --test-dir build can execute generated test binaries instead of failing on missing executables. --- CoderMind/scripts/decoder_lang/backend.py | 12 ++++++-- .../decoder_lang/tests/test_c_cpp_backend.py | 29 +++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/CoderMind/scripts/decoder_lang/backend.py b/CoderMind/scripts/decoder_lang/backend.py index fb7964c..5a61c3c 100644 --- a/CoderMind/scripts/decoder_lang/backend.py +++ b/CoderMind/scripts/decoder_lang/backend.py @@ -476,7 +476,7 @@ def resolve_decoder_language( def cmake_reconfigure(env: Any) -> None: - """Reconfigure a CMake build dir so a later ``ctest`` sees a fresh test set. + """Prepare a CMake build dir so a later ``ctest`` can execute tests. The C/C++ test command runs ``ctest`` against a ``build/`` directory whose registered test set is materialised by ``cmake``. When sources @@ -484,7 +484,9 @@ def cmake_reconfigure(env: Any) -> None: observe a STALE / partial test set (the post-verify "ran 1 test" false-failure that failed an otherwise-green C++ stage). Running ``cmake -S -B build`` here regenerates the test registration - against the current tree before tests run. + against the current tree before tests run. Running + ``cmake --build build`` then materialises the executable files that + ``ctest --test-dir build`` launches. No-op (silently) when there is no ``CMakeLists.txt`` or no ``cmake`` on PATH — the project then uses ``make`` / direct compile, which has @@ -510,6 +512,12 @@ def cmake_reconfigure(env: Any) -> None: capture_output=True, timeout=120, ) + subprocess.run( + [cmake, "--build", str(root / "build")], + cwd=str(root), + capture_output=True, + timeout=300, + ) except Exception: # noqa: BLE001 - reconfigure is best-effort return diff --git a/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py b/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py index a157e84..d67c7f5 100644 --- a/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py +++ b/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py @@ -195,6 +195,35 @@ def test_cmake_test_command_runs_ctest_in_build_dir(self) -> None: ], ) + def test_prepare_test_env_configures_and_builds_cmake_project(self) -> None: + with TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + (root / "CMakeLists.txt").write_text("cmake_minimum_required(VERSION 3.16)\n") + env = EnvHandle( + project_root=root, + extra={"cmake": "/usr/bin/cmake"}, + ) + calls = [] + + def fake_run(args, **kwargs): + calls.append(args) + + class Result: + returncode = 0 + + return Result() + + with patch("subprocess.run", side_effect=fake_run): + self.backend.prepare_test_env(env) + + self.assertEqual( + calls, + [ + ["/usr/bin/cmake", "-S", str(root), "-B", str(root / "build")], + ["/usr/bin/cmake", "--build", str(root / "build")], + ], + ) + def test_syntax_fallback_skips_git_refs(self) -> None: with TemporaryDirectory() as temp_dir: root = Path(temp_dir) From 9aef1e189a5e890e03b3f206f070cbe7f74612ea Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Sun, 21 Jun 2026 17:56:50 +0800 Subject: [PATCH 11/17] fix(decoder): Enforce generated artifact hygiene Add a shared generated-artifact policy for prompt guidance, local git excludes, post-verification, and merge-time checks. Align CMake prompt verification with configure, build, and ctest so agents repair source and build configuration instead of generated build files. --- CoderMind/scripts/code_gen/batch_prompts.py | 22 ++ CoderMind/scripts/code_gen/git_ops.py | 13 + CoderMind/scripts/code_gen/post_verify.py | 27 +- .../scripts/common/generated_artifacts.py | 245 ++++++++++++++++++ CoderMind/scripts/run_batch.py | 2 + CoderMind/tests/test_code_gen_multilingual.py | 13 + .../tests/test_generated_artifact_hygiene.py | 237 +++++++++++++++++ 7 files changed, 558 insertions(+), 1 deletion(-) create mode 100644 CoderMind/scripts/common/generated_artifacts.py create mode 100644 CoderMind/tests/test_generated_artifact_hygiene.py diff --git a/CoderMind/scripts/code_gen/batch_prompts.py b/CoderMind/scripts/code_gen/batch_prompts.py index 9b7c034..d2cf1e6 100644 --- a/CoderMind/scripts/code_gen/batch_prompts.py +++ b/CoderMind/scripts/code_gen/batch_prompts.py @@ -26,6 +26,7 @@ from typing import Any, Dict, List, Optional from common.execution_state import BatchExecutionState, load_code_gen_state +from common.generated_artifacts import generated_artifact_prompt_rule from common.import_normalizer import build_import_convention_snippet from common.paths import ( CODE_GEN_STATE_FILE as STATE_FILE, @@ -443,6 +444,18 @@ def _dynamic_c_family_syntax_command( ) +def _cmake_c_family_test_command(command: List[str]) -> str: + ctest = shlex.quote(str(command[0])) + return ( + "bash -lc " + + shlex.quote( + "cmake -S . -B build && " + "cmake --build build && " + f"{ctest} --test-dir build --output-on-failure" + ) + ) + + def _build_backend_test_cmd( backend: LanguageBackend, repo_path: Path, @@ -456,6 +469,8 @@ def _build_backend_test_cmd( env = backend.detect_env(repo_path) or EnvHandle(project_root=repo_path.resolve()) try: command = backend.test_command(env) + if backend.name in {"c", "cpp"} and command and "ctest" in Path(str(command[0])).name: + return _cmake_c_family_test_command(command) if backend.name in {"c", "cpp"} and "-fsyntax-only" in command: return _dynamic_c_family_syntax_command(backend, command) return _shell_join(command) @@ -569,6 +584,13 @@ def _build_language_context(backend: LanguageBackend, test_command: str) -> str: f"- Module naming: {hints.module_naming_rule}\n" f"- Style: {hints.style_directive}\n" ) + artifact_extra = "" + if backend.name in {"c", "cpp"}: + artifact_extra = ( + "If CTest needs arguments or target wiring, change source files " + "such as `CMakeLists.txt` or the test source instead." + ) + context += generated_artifact_prompt_rule(artifact_extra) if backend.name != "python": # The decoder's defaults are Python-centric; without an explicit # prohibition the sub-agent tends to add Python helpers (a main.py diff --git a/CoderMind/scripts/code_gen/git_ops.py b/CoderMind/scripts/code_gen/git_ops.py index 06491c9..c05aef3 100644 --- a/CoderMind/scripts/code_gen/git_ops.py +++ b/CoderMind/scripts/code_gen/git_ops.py @@ -20,6 +20,10 @@ from pathlib import Path from typing import List, Optional, Tuple +from common.generated_artifacts import ( + find_persisted_generated_artifact_changes, + format_generated_artifact_violation, +) from common.git_utils import GitRunner, sanitize_branch_component logger = logging.getLogger(__name__) @@ -141,6 +145,15 @@ def merge_batch_branch( ) return False, "branch_missing" + generated_artifact_changes = find_persisted_generated_artifact_changes( + git.repo_path, + base_ref=git.main_branch, + ) + if generated_artifact_changes: + summary = format_generated_artifact_violation(generated_artifact_changes) + logger.error("Cannot merge generated artifact changes:\n%s", summary) + return False, summary + # Commit any leftover changes if git.has_uncommitted_changes(): git.stage_and_commit(f"batch: final changes for {batch_id}") diff --git a/CoderMind/scripts/code_gen/post_verify.py b/CoderMind/scripts/code_gen/post_verify.py index d04772f..a73cc4b 100644 --- a/CoderMind/scripts/code_gen/post_verify.py +++ b/CoderMind/scripts/code_gen/post_verify.py @@ -24,6 +24,11 @@ from pathlib import Path from typing import Tuple +from common.generated_artifacts import ( + ensure_generated_artifact_excludes, + find_persisted_generated_artifact_changes, + format_generated_artifact_violation, +) from common.git_utils import GitRunner from common.task_batch import PlannedTask from code_gen.prompts import is_project_docs_batch @@ -61,7 +66,18 @@ def post_verify( Returns: ``(passed, test_output_summary)`` """ - # Skip verification for docs batches + ensure_generated_artifact_excludes(repo_path) + + generated_artifact_changes = find_persisted_generated_artifact_changes( + repo_path, + base_ref=GitRunner.MAIN_BRANCH, + ) + if generated_artifact_changes: + summary = format_generated_artifact_violation(generated_artifact_changes) + logger.warning("Post-verification rejected generated artifact changes:\n%s", summary) + return False, summary + + # Skip test execution for docs batches after shared git hygiene gates pass. if is_project_docs_batch(task): logger.info("Skipping post-verification for docs batch") return True, "Documentation batch — no tests." @@ -137,6 +153,15 @@ def _git_diff_test_files(prefix: str = "tests/") -> list: backend=backend, ) + generated_artifact_changes = find_persisted_generated_artifact_changes( + repo_path, + base_ref=GitRunner.MAIN_BRANCH, + ) + if generated_artifact_changes: + summary = format_generated_artifact_violation(generated_artifact_changes) + logger.warning("Post-verification rejected generated artifact changes:\n%s", summary) + return False, summary + # Build summary summary_lines = [ f"passed={result.passed} failed={result.failed} " diff --git a/CoderMind/scripts/common/generated_artifacts.py b/CoderMind/scripts/common/generated_artifacts.py new file mode 100644 index 0000000..65f503a --- /dev/null +++ b/CoderMind/scripts/common/generated_artifacts.py @@ -0,0 +1,245 @@ +"""Generated artifact hygiene rules for CoderMind-managed repositories.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path, PurePosixPath +import subprocess + + +_EXCLUDE_MARKER_BEGIN = "# BEGIN CoderMind generated artifact hygiene" +_EXCLUDE_MARKER_END = "# END CoderMind generated artifact hygiene" + +GIT_LOCAL_EXCLUDE_PATTERNS: tuple[str, ...] = ( + ".cmind/", + "__pycache__/", + ".pytest_cache/", + ".mypy_cache/", + ".ruff_cache/", + ".venv/", + "venv/", + "env/", + "node_modules/", + ".next/", + ".nuxt/", + "target/", + "build/", + "dist/", + "coverage/", + "cmake-build-debug/", + "cmake-build-release/", + "CMakeFiles/", + "Testing/", + "*.pyc", + "*.pyo", + "*.pyd", + "*.o", + "*.obj", + "*.a", + "*.so", + "*.dylib", + "*.dll", + "*.exe", + "*.rlib", + "*.rmeta", + "CMakeCache.txt", + "CTestTestfile.cmake", + "cmake_install.cmake", + "compile_commands.json", + ".ninja_log", + ".ninja_deps", +) + +GENERATED_ARTIFACT_DIRS = frozenset( + pattern.rstrip("/") + for pattern in GIT_LOCAL_EXCLUDE_PATTERNS + if pattern.endswith("/") +) +GENERATED_ARTIFACT_FILES = frozenset( + pattern + for pattern in GIT_LOCAL_EXCLUDE_PATTERNS + if not pattern.endswith("/") and not pattern.startswith("*.") +) +GENERATED_ARTIFACT_SUFFIXES = tuple( + pattern[1:] + for pattern in GIT_LOCAL_EXCLUDE_PATTERNS + if pattern.startswith("*.") +) + +_PROMPT_EXAMPLES = ( + "build/", + "target/", + "dist/", + "node_modules/", + "CMakeFiles/", + "CTestTestfile.cmake", + "cmake_install.cmake", + "compile_commands.json", + "__pycache__/", +) + + +@dataclass(frozen=True) +class GeneratedArtifactChange: + """A generated artifact path found in a persistent git change scope.""" + + path: str + scope: str + + +def _normalize_repo_path(path: str) -> str: + normalized = path.replace("\\", "/").strip() + while normalized.startswith("./"): + normalized = normalized[2:] + return normalized.strip("/") + + +def is_generated_artifact_path(path: str) -> bool: + """Return True when ``path`` belongs to a generated output/cache area.""" + normalized = _normalize_repo_path(path) + if not normalized: + return False + + parts = tuple(part for part in PurePosixPath(normalized).parts if part not in {"", "."}) + if not parts: + return False + + if any(part in GENERATED_ARTIFACT_DIRS for part in parts): + return True + + name = parts[-1] + if name in GENERATED_ARTIFACT_FILES: + return True + return name.endswith(GENERATED_ARTIFACT_SUFFIXES) + + +def _run_git( + repo_path: Path, + args: list[str], + timeout: float = 10.0, +) -> subprocess.CompletedProcess[str] | None: + try: + return subprocess.run( + ["git", *args], + cwd=repo_path, + capture_output=True, + text=True, + timeout=timeout, + check=False, + ) + except (FileNotFoundError, OSError, subprocess.TimeoutExpired): + return None + + +def _git_changed_artifacts( + repo_path: Path, + args: list[str], + scope: str, +) -> list[GeneratedArtifactChange]: + result = _run_git(repo_path, args) + if result is None or result.returncode != 0: + return [] + changes: list[GeneratedArtifactChange] = [] + for raw_path in result.stdout.splitlines(): + path = _normalize_repo_path(raw_path) + if path and is_generated_artifact_path(path): + changes.append(GeneratedArtifactChange(path=path, scope=scope)) + return changes + + +def find_persisted_generated_artifact_changes( + repo_path: Path, + *, + base_ref: str | None = None, +) -> list[GeneratedArtifactChange]: + """Find generated artifacts in committed, staged, or tracked diffs. + + Untracked ignored build outputs are intentionally ignored; CMake, Cargo, + npm, and pytest legitimately create those while tests run. The local git + exclude policy prevents them from being staged by ordinary ``git add -A``. + """ + repo_path = Path(repo_path) + candidates: list[GeneratedArtifactChange] = [] + if base_ref: + candidates.extend( + _git_changed_artifacts( + repo_path, + ["diff", "--name-only", f"{base_ref}..HEAD"], + "branch", + ) + ) + candidates.extend( + _git_changed_artifacts(repo_path, ["diff", "--name-only", "--cached"], "staged") + ) + candidates.extend( + _git_changed_artifacts(repo_path, ["diff", "--name-only"], "worktree") + ) + + unique: dict[str, GeneratedArtifactChange] = {} + for change in candidates: + unique.setdefault(change.path, change) + return list(unique.values()) + + +def format_generated_artifact_violation(changes: list[GeneratedArtifactChange]) -> str: + """Format a human-readable violation message for codegen agents.""" + shown = changes[:10] + lines = [ + "Generated build/dependency/cache artifacts are not valid source changes.", + "Update source, tests, manifests, or build configuration files instead.", + "Invalid paths:", + ] + lines.extend(f"- {change.path} ({change.scope})" for change in shown) + if len(changes) > len(shown): + lines.append(f"- ... {len(changes) - len(shown)} more") + return "\n".join(lines) + + +def generated_artifact_prompt_rule(extra_guidance: str = "") -> str: + """Return the prompt rule that mirrors the verification policy.""" + examples = ", ".join(f"`{example}`" for example in _PROMPT_EXAMPLES) + text = ( + "- Do NOT edit or commit generated build, dependency, cache, or test-output " + f"artifacts such as {examples}. Update source files, test files, dependency " + "manifests, or build configuration instead." + ) + if extra_guidance: + text += f" {extra_guidance.strip()}" + return text + "\n" + + +def _managed_exclude_block() -> str: + return "\n".join( + (_EXCLUDE_MARKER_BEGIN, *GIT_LOCAL_EXCLUDE_PATTERNS, _EXCLUDE_MARKER_END) + ) + + +def ensure_generated_artifact_excludes(repo_path: Path) -> bool: + """Install CoderMind's generated-artifact ignores in ``.git/info/exclude``. + + Returns True when the local exclude file was changed. Returns False when + the repository is not a git worktree or the current block is present. + """ + repo_path = Path(repo_path) + result = _run_git(repo_path, ["rev-parse", "--git-path", "info/exclude"]) + if result is None or result.returncode != 0 or not result.stdout.strip(): + return False + + exclude_path = Path(result.stdout.strip()) + if not exclude_path.is_absolute(): + exclude_path = repo_path / exclude_path + exclude_path.parent.mkdir(parents=True, exist_ok=True) + content = exclude_path.read_text(encoding="utf-8") if exclude_path.exists() else "" + block = _managed_exclude_block() + if _EXCLUDE_MARKER_BEGIN in content and _EXCLUDE_MARKER_END in content: + before, rest = content.split(_EXCLUDE_MARKER_BEGIN, 1) + _, after = rest.split(_EXCLUDE_MARKER_END, 1) + updated = f"{before}{block}{after}" + if updated == content: + return False + exclude_path.write_text(updated, encoding="utf-8") + return True + + prefix = "" if not content or content.endswith("\n") else "\n" + exclude_path.write_text(f"{content}{prefix}{block}\n", encoding="utf-8") + return True \ No newline at end of file diff --git a/CoderMind/scripts/run_batch.py b/CoderMind/scripts/run_batch.py index bf34e3b..d2ed385 100644 --- a/CoderMind/scripts/run_batch.py +++ b/CoderMind/scripts/run_batch.py @@ -47,6 +47,7 @@ complete_batch as state_complete_batch, skip_current_batch as state_skip_batch, ) +from common.generated_artifacts import ensure_generated_artifact_excludes from common.git_utils import GitRunner from common.task_batch import PlannedTask, get_task_by_id from common.paths import ( @@ -630,6 +631,7 @@ def run_batch( return _error(f"Failed to create branch for batch '{batch_id}'", scripts) logger.info("Branch: %s (initial_commit=%s)", branch_name, initial_commit[:8] if initial_commit else "none") + ensure_generated_artifact_excludes(repo_path) # ── Step 4: Setup language environment ────────────────────────── diff --git a/CoderMind/tests/test_code_gen_multilingual.py b/CoderMind/tests/test_code_gen_multilingual.py index b7bb3b6..6c77ebd 100644 --- a/CoderMind/tests/test_code_gen_multilingual.py +++ b/CoderMind/tests/test_code_gen_multilingual.py @@ -104,9 +104,22 @@ def test_cpp_codegen_prompt_injects_cpp_context(monkeypatch, tmp_path: Path) -> assert "conftest.py" in prompt assert "standalone translation units" in prompt assert "create or update a matching header" in prompt + assert "Do NOT edit or commit generated build, dependency, cache" in prompt assert "python3 -m pytest" not in prompt +def test_cpp_codegen_prompt_aligns_cmake_command_with_post_verify(monkeypatch, tmp_path: Path) -> None: + _set_language(monkeypatch, tmp_path, "cpp") + (tmp_path / "CMakeLists.txt").write_text("cmake_minimum_required(VERSION 3.16)\n") + task = _task("src/tasklite_cli/task.cpp") + + prompt = batch_prompts.build_tdd_prompt(_state(task), task, tmp_path) + + assert "cmake -S . -B build" in prompt + assert "cmake --build build" in prompt + assert "ctest --test-dir build --output-on-failure" in prompt + + def test_non_python_integration_prompt_uses_native_entry_point(monkeypatch, tmp_path: Path) -> None: # Regression for the bug where every language was told "Do NOT create # main.py", planting a Python file name into Go/JS/C projects. diff --git a/CoderMind/tests/test_generated_artifact_hygiene.py b/CoderMind/tests/test_generated_artifact_hygiene.py new file mode 100644 index 0000000..35af678 --- /dev/null +++ b/CoderMind/tests/test_generated_artifact_hygiene.py @@ -0,0 +1,237 @@ +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +import pytest + + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +from code_gen import post_verify # noqa: E402 +from code_gen.git_ops import merge_batch_branch # noqa: E402 +from code_gen.test_runner import TestResult as CodegenTestResult # noqa: E402 +from common.generated_artifacts import ( # noqa: E402 + ensure_generated_artifact_excludes, + find_persisted_generated_artifact_changes, + generated_artifact_prompt_rule, + is_generated_artifact_path, +) +from common.git_utils import GitRunner # noqa: E402 +from common.task_batch import PlannedTask # noqa: E402 + + +def _run_git(repo_path: Path, *args: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["git", *args], + cwd=repo_path, + capture_output=True, + text=True, + check=True, + ) + + +def _init_repo(repo_path: Path) -> Path: + repo_path.mkdir(parents=True, exist_ok=True) + _run_git(repo_path, "init", "-b", "main") + _run_git(repo_path, "config", "user.email", "coder@example.com") + _run_git(repo_path, "config", "user.name", "CoderMind Test") + source = repo_path / "src" / "app.py" + source.parent.mkdir() + source.write_text("def run():\n return 1\n", encoding="utf-8") + _run_git(repo_path, "add", "src/app.py") + _run_git(repo_path, "commit", "-m", "init") + return repo_path + + +def _task() -> PlannedTask: + return PlannedTask( + task="Implement the source unit.", + file_path="src/app.py", + units_key=["function run"], + unit_to_code={"function run": "def run(): ..."}, + unit_to_features={"function run": ["Core/run"]}, + subtree="Core", + ) + + +def test_generated_artifact_path_policy_covers_common_outputs() -> None: + blocked = [ + "build/CTestTestfile.cmake", + "src/CMakeFiles/app.dir/main.cpp.o", + "target/debug/libapp.rlib", + "node_modules/pkg/index.js", + "app.pyc", + "__pycache__/app.cpython-312.pyc", + "compile_commands.json", + "dist/bundle.js", + ] + allowed = [ + "src/app.py", + "src/build_config.py", + "CMakeLists.txt", + "Makefile", + "package.json", + "Cargo.toml", + ] + + assert all(is_generated_artifact_path(path) for path in blocked) + assert not any(is_generated_artifact_path(path) for path in allowed) + + +def test_generated_artifact_prompt_rule_uses_policy_examples() -> None: + rule = generated_artifact_prompt_rule("Change `CMakeLists.txt` instead.") + + assert "build/" in rule + assert "target/" in rule + assert "node_modules/" in rule + assert "CTestTestfile.cmake" in rule + assert "Change `CMakeLists.txt` instead." in rule + + +def test_local_excludes_prevent_git_add_a_from_staging_outputs(tmp_path: Path) -> None: + repo_path = _init_repo(tmp_path / "repo") + + assert ensure_generated_artifact_excludes(repo_path) + (repo_path / "build").mkdir() + (repo_path / "build" / "CTestTestfile.cmake").write_text("generated\n", encoding="utf-8") + source = repo_path / "src" / "feature.py" + source.write_text("def feature():\n return 2\n", encoding="utf-8") + + _run_git(repo_path, "add", "-A") + status = _run_git(repo_path, "status", "--porcelain").stdout + + assert "src/feature.py" in status + assert "build/CTestTestfile.cmake" not in status + + +def test_local_excludes_upgrade_existing_managed_block(tmp_path: Path) -> None: + repo_path = _init_repo(tmp_path / "repo") + exclude_path = repo_path / ".git" / "info" / "exclude" + exclude_path.write_text( + "# keep\n" + "# BEGIN CoderMind generated artifact hygiene\n" + "old-output/\n" + "# END CoderMind generated artifact hygiene\n" + "# tail\n", + encoding="utf-8", + ) + + assert ensure_generated_artifact_excludes(repo_path) + content = exclude_path.read_text(encoding="utf-8") + + assert "# keep" in content + assert "# tail" in content + assert "old-output/" not in content + assert "build/" in content + assert "target/" in content + + +def test_persisted_generated_artifact_changes_are_reported(tmp_path: Path) -> None: + repo_path = _init_repo(tmp_path / "repo") + _run_git(repo_path, "checkout", "-b", "batch/generated-artifact") + (repo_path / "build").mkdir() + (repo_path / "build" / "CTestTestfile.cmake").write_text("generated\n", encoding="utf-8") + + _run_git(repo_path, "add", "-f", "build/CTestTestfile.cmake") + staged = find_persisted_generated_artifact_changes(repo_path, base_ref="main") + + assert [(change.path, change.scope) for change in staged] == [ + ("build/CTestTestfile.cmake", "staged") + ] + + _run_git(repo_path, "commit", "-m", "bad artifact") + committed = find_persisted_generated_artifact_changes(repo_path, base_ref="main") + + assert [(change.path, change.scope) for change in committed] == [ + ("build/CTestTestfile.cmake", "branch") + ] + + +def test_post_verify_rejects_persisted_generated_artifacts_before_tests( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + repo_path = _init_repo(tmp_path / "repo") + _run_git(repo_path, "checkout", "-b", "batch/generated-artifact") + (repo_path / "build").mkdir() + (repo_path / "build" / "CTestTestfile.cmake").write_text("generated\n", encoding="utf-8") + _run_git(repo_path, "add", "-f", "build/CTestTestfile.cmake") + _run_git(repo_path, "commit", "-m", "bad artifact") + + monkeypatch.setattr( + post_verify, + "run_project_tests", + lambda *_args, **_kwargs: pytest.fail("tests should not run"), + ) + + passed, output = post_verify.post_verify(repo_path, _task()) + + assert not passed + assert "Generated build/dependency/cache artifacts" in output + assert "build/CTestTestfile.cmake" in output + + +def test_post_verify_allows_untracked_ignored_build_outputs( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + repo_path = _init_repo(tmp_path / "repo") + _run_git(repo_path, "checkout", "-b", "batch/generated-artifact") + ensure_generated_artifact_excludes(repo_path) + (repo_path / "build").mkdir() + (repo_path / "build" / "CTestTestfile.cmake").write_text("generated\n", encoding="utf-8") + + class FakeBackend: + name = "go" + display_name = "Go" + + monkeypatch.setattr( + post_verify, + "resolve_test_backend", + lambda **_kwargs: FakeBackend(), + ) + monkeypatch.setattr( + post_verify, + "run_project_tests", + lambda *_args, **_kwargs: CodegenTestResult( + success=True, + return_code=0, + output="ok\n", + test_files=[], + passed=1, + ), + ) + + passed, output = post_verify.post_verify(repo_path, _task()) + + assert passed + assert output == "passed=1 failed=0 errors=0 skipped=0" + + +def test_merge_batch_rejects_committed_generated_artifacts(tmp_path: Path) -> None: + repo_path = _init_repo(tmp_path / "repo") + _run_git(repo_path, "checkout", "-b", "batch/generated-artifact") + (repo_path / "build").mkdir() + (repo_path / "build" / "CTestTestfile.cmake").write_text("generated\n", encoding="utf-8") + _run_git(repo_path, "add", "-f", "build/CTestTestfile.cmake") + _run_git(repo_path, "commit", "-m", "bad artifact") + + success, error = merge_batch_branch( + GitRunner(str(repo_path)), + "batch/generated-artifact", + "task-id", + file_path="src/app.py", + units=["function run"], + ) + + assert not success + assert error is not None + assert "Generated build/dependency/cache artifacts" in error + assert ( + _run_git(repo_path, "branch", "--show-current").stdout.strip() + == "batch/generated-artifact" + ) \ No newline at end of file From 910e5ec9cfb76d1c538999a63232528cdaaa9dea Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Mon, 22 Jun 2026 16:33:58 +0800 Subject: [PATCH 12/17] fix(decoder): Harden native verification gates Run post-verification for documentation batches so README and docs edits cannot bypass native test suites. Treat smoke validation failures and C/C++ no-op make test output as validation failures. Narrow generated artifact matching to root build directories so source paths such as configs/build remain valid. --- .../scripts/code_gen/final_validation.py | 60 +++++++++++++++++-- CoderMind/scripts/code_gen/post_verify.py | 6 -- .../scripts/common/generated_artifacts.py | 21 ++++--- CoderMind/scripts/decoder_lang/c_backend.py | 23 +++++-- CoderMind/scripts/decoder_lang/cpp_backend.py | 25 ++++++-- .../decoder_lang/tests/test_c_cpp_backend.py | 30 ++++++++++ CoderMind/tests/test_final_test_repair.py | 26 ++++++++ .../tests/test_generated_artifact_hygiene.py | 52 ++++++++++++++++ 8 files changed, 217 insertions(+), 26 deletions(-) diff --git a/CoderMind/scripts/code_gen/final_validation.py b/CoderMind/scripts/code_gen/final_validation.py index b3edf9e..b9b97fe 100644 --- a/CoderMind/scripts/code_gen/final_validation.py +++ b/CoderMind/scripts/code_gen/final_validation.py @@ -46,6 +46,32 @@ ) +def _fail_final_test_for_smoke_error( + result_dict: Dict[str, Any], + message: str, + *, + smoke_dict: Optional[Dict[str, Any]] = None, +) -> None: + """Mark final validation failed because smoke validation failed.""" + result_dict["success"] = False + result_dict["errors"] = max(int(result_dict.get("errors", 0) or 0), 1) + result_dict["output"] = message + result_dict["next_action"] = ( + "Unit tests passed, but smoke validation failed. Fix the smoke " + "failure and re-run final validation." + ) + result_dict["smoke_test_error"] = message + if smoke_dict is None: + smoke_dict = { + "success": False, + "type": "smoke_test", + "findings": [{"severity": "error", "message": message}], + "error_count": 1, + "warning_count": 0, + } + result_dict["smoke_test"] = smoke_dict + + def final_test( repo_path: Optional[Path] = None, state_path: Path = STATE_FILE, @@ -238,6 +264,8 @@ def final_test( actionable = [f for f in smoke_result.findings if f.severity == "error"] if actionable: + remaining = actionable + recheck_success = True findings_desc = "\n".join( f"- [{f.severity}] {f.message}" for f in actionable ) @@ -293,6 +321,7 @@ def final_test( result_dict["smoke_test"] = smoke_result_2.to_dict() result_dict["smoke_repair_attempted"] = True result_dict["post_repair_tests_pass"] = recheck.success + recheck_success = recheck.success remaining = [ f for f in smoke_result_2.findings if f.severity == "error" @@ -303,18 +332,39 @@ def final_test( len(remaining), len(actionable), "PASS" if recheck.success else "FAIL", ) + if remaining or not recheck_success: + smoke_dict = result_dict.get("smoke_test") + if not isinstance(smoke_dict, dict): + smoke_dict = {} + message = ( + "Smoke validation failed after unit tests passed. " + f"Remaining smoke errors: {len(remaining)}; " + f"post-repair tests pass: {recheck_success}." + ) + _fail_final_test_for_smoke_error( + result_dict, + message, + smoke_dict=smoke_dict, + ) except ImportError: logger.debug("smoke_test module not available, skipping") except Exception as exc: logger.warning("Smoke test / repair failed: %s", exc) + _fail_final_test_for_smoke_error( + result_dict, + f"Smoke test failed to run: {exc}", + ) # Save per-stage results for global_review context save_stage_result("final_test", { - "success": result.success, - "passed": result.passed, - "failed": result.failed, - "errors": result.errors, - "output_tail": "\n".join(result.output.splitlines()[-40:]) if not result.success else "", + "success": bool(result_dict.get("success")), + "passed": result_dict.get("passed", result.passed), + "failed": result_dict.get("failed", result.failed), + "errors": result_dict.get("errors", result.errors), + "output_tail": ( + "\n".join(str(result_dict.get("output", "")).splitlines()[-40:]) + if not result_dict.get("success") else "" + ), }) smoke_data = result_dict.get("smoke_test") if isinstance(smoke_data, dict): diff --git a/CoderMind/scripts/code_gen/post_verify.py b/CoderMind/scripts/code_gen/post_verify.py index a73cc4b..c5de7ee 100644 --- a/CoderMind/scripts/code_gen/post_verify.py +++ b/CoderMind/scripts/code_gen/post_verify.py @@ -31,7 +31,6 @@ ) from common.git_utils import GitRunner from common.task_batch import PlannedTask -from code_gen.prompts import is_project_docs_batch from code_gen.test_runner import ( ensure_deps_installed, find_related_test_files, @@ -77,11 +76,6 @@ def post_verify( logger.warning("Post-verification rejected generated artifact changes:\n%s", summary) return False, summary - # Skip test execution for docs batches after shared git hygiene gates pass. - if is_project_docs_batch(task): - logger.info("Skipping post-verification for docs batch") - return True, "Documentation batch — no tests." - # Use the global safety-net timeout for all task types. # Per-test hang prevention is handled by pytest-timeout (--timeout=DEFAULT_TEST_TIMEOUT). if timeout == 0: diff --git a/CoderMind/scripts/common/generated_artifacts.py b/CoderMind/scripts/common/generated_artifacts.py index 65f503a..b2e0768 100644 --- a/CoderMind/scripts/common/generated_artifacts.py +++ b/CoderMind/scripts/common/generated_artifacts.py @@ -20,12 +20,12 @@ "venv/", "env/", "node_modules/", - ".next/", - ".nuxt/", - "target/", - "build/", - "dist/", - "coverage/", + "/.next/", + "/.nuxt/", + "/target/", + "/build/", + "/dist/", + "/coverage/", "cmake-build-debug/", "cmake-build-release/", "CMakeFiles/", @@ -53,7 +53,12 @@ GENERATED_ARTIFACT_DIRS = frozenset( pattern.rstrip("/") for pattern in GIT_LOCAL_EXCLUDE_PATTERNS - if pattern.endswith("/") + if pattern.endswith("/") and not pattern.startswith("/") +) +ROOT_GENERATED_ARTIFACT_DIRS = frozenset( + pattern.strip("/") + for pattern in GIT_LOCAL_EXCLUDE_PATTERNS + if pattern.startswith("/") and pattern.endswith("/") ) GENERATED_ARTIFACT_FILES = frozenset( pattern @@ -104,6 +109,8 @@ def is_generated_artifact_path(path: str) -> bool: if not parts: return False + if parts[0] in ROOT_GENERATED_ARTIFACT_DIRS: + return True if any(part in GENERATED_ARTIFACT_DIRS for part in parts): return True diff --git a/CoderMind/scripts/decoder_lang/c_backend.py b/CoderMind/scripts/decoder_lang/c_backend.py index 7064b3c..9b922b7 100644 --- a/CoderMind/scripts/decoder_lang/c_backend.py +++ b/CoderMind/scripts/decoder_lang/c_backend.py @@ -20,6 +20,11 @@ _PLACEHOLDER_RE = re.compile( r"(?is)\b(?:TODO|PLACEHOLDER|NOT IMPLEMENTED|abort\s*\(|assert\s*\(\s*0\s*\))" ) +_COMPILE_ONLY_COMMAND_RE = re.compile(r"(?m)^\s*\S*(?:cc|gcc|clang)(?=\s).*\s-c\s") +_TEST_EXECUTION_RE = re.compile( + r"(?im)(^|\s)(?:PASS|FAIL)(?:\s|:)|^\s*ok\b|^\s*1\.\.|" + r"test result:|\btests? passed\b|^\s*running\s+\d+\s+tests?" +) _C_SOURCE_EXTENSIONS = (".c", ".h") _C_KEYWORDS = frozenset({ "auto", "break", "case", "char", "const", "continue", "default", @@ -201,9 +206,9 @@ def parse_test_output(self, raw: str, exit_code: int) -> TestRunResult: observed = int(out_of.group(1)) if out_of else None if ran_no_tests( exit_code, raw, observed_tests=observed, - no_tests_markers=("No tests were found",), + no_tests_markers=("No tests were found", "Nothing to be done for 'test'"), empty_output_is_no_op=False, - ): + ) or self._looks_like_compile_only_make_test(raw): status = "errored" else: status = "passed" if exit_code == 0 else "failed" @@ -229,6 +234,13 @@ def parse_test_output(self, raw: str, exit_code: int) -> TestRunResult: extra={"tool": "make test or compiler syntax check"}, ) + @staticmethod + def _looks_like_compile_only_make_test(raw: str) -> bool: + """Return True when make output compiled tests but ran none.""" + if not _COMPILE_ONLY_COMMAND_RE.search(raw): + return False + return not _TEST_EXECUTION_RE.search(raw) + _PROMPT_HINTS_SINGLETON: PromptHints | None = None def prompt_hints(self) -> PromptHints: @@ -277,7 +289,9 @@ def project_task_templates(self, context: ProjectTaskContext) -> ProjectTaskTemp 1. Prefer standard C99 and the C standard library. 2. Keep compiler flags strict: `-std=c99 -Wall -Wextra`. 3. Provide `make`, `make test`, and `make clean` targets. -4. Keep generated binaries and test artefacts out of source control. +4. `make test` must build and execute real test binaries; it must not only + compile `.o` files or print "Nothing to be done". +5. Keep generated binaries and test artefacts out of source control. **Important:** - Do NOT create Python dependency files for a C project. @@ -303,7 +317,8 @@ def project_task_templates(self, context: ProjectTaskContext) -> ProjectTaskTemp 1. Provide `int main(int argc, char **argv)`. 2. Implement `--help` and documented commands/options. 3. Delegate storage and task lifecycle behavior to existing C modules. -4. Verify with `make` and `make test`. +4. Verify with `make` and `make test`; `make test` must execute tests and + return non-zero when any test fails. **Important:** - Read `docs/` first and faithfully expose the requested behavior. diff --git a/CoderMind/scripts/decoder_lang/cpp_backend.py b/CoderMind/scripts/decoder_lang/cpp_backend.py index b369eea..6dcef41 100644 --- a/CoderMind/scripts/decoder_lang/cpp_backend.py +++ b/CoderMind/scripts/decoder_lang/cpp_backend.py @@ -20,6 +20,13 @@ _PLACEHOLDER_RE = re.compile( r"(?is)\b(?:TODO|PLACEHOLDER|NOT IMPLEMENTED|throw\s+std::logic_error|abort\s*\()" ) +_COMPILE_ONLY_COMMAND_RE = re.compile( + r"(?m)^\s*\S*(?:c\+\+|g\+\+|clang\+\+)(?=\s).*\s-c\s" +) +_TEST_EXECUTION_RE = re.compile( + r"(?im)(^|\s)(?:PASS|FAIL)(?:\s|:)|^\s*ok\b|^\s*1\.\.|" + r"test result:|\btests? passed\b|^\s*running\s+\d+\s+tests?" +) _CPP_SOURCE_EXTENSIONS = (".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx", ".h") _CPP_KEYWORDS = frozenset({ "alignas", "alignof", "and", "asm", "auto", "bool", "break", @@ -213,9 +220,9 @@ def parse_test_output(self, raw: str, exit_code: int) -> TestRunResult: observed = int(out_of.group(1)) if out_of else None if ran_no_tests( exit_code, raw, observed_tests=observed, - no_tests_markers=("No tests were found",), + no_tests_markers=("No tests were found", "Nothing to be done for 'test'"), empty_output_is_no_op=False, - ): + ) or self._looks_like_compile_only_make_test(raw): status = "errored" else: status = "passed" if exit_code == 0 else "failed" @@ -241,6 +248,13 @@ def parse_test_output(self, raw: str, exit_code: int) -> TestRunResult: extra={"tool": "ctest, make test, or compiler syntax check"}, ) + @staticmethod + def _looks_like_compile_only_make_test(raw: str) -> bool: + """Return True when make output compiled tests but ran none.""" + if not _COMPILE_ONLY_COMMAND_RE.search(raw): + return False + return not _TEST_EXECUTION_RE.search(raw) + _PROMPT_HINTS_SINGLETON: PromptHints | None = None def prompt_hints(self) -> PromptHints: @@ -288,7 +302,10 @@ def project_task_templates(self, context: ProjectTaskContext) -> ProjectTaskTemp 1. Prefer the C++ standard library. 2. Use C++17 unless implemented code requires a newer standard. 3. Provide build and test instructions compatible with CMake and ctest. -4. Keep generated binaries and build directories out of source control. +4. The test target must build and execute real test binaries. For Makefile + fallback projects, `make test` must not only compile `.o` files or print + "Nothing to be done". +5. Keep generated binaries and build directories out of source control. **Important:** - Do NOT create Python dependency files for a C++ project. @@ -314,7 +331,7 @@ def project_task_templates(self, context: ProjectTaskContext) -> ProjectTaskTemp 1. Provide `int main(int argc, char **argv)`. 2. Implement `--help` and documented commands/options. 3. Delegate storage and task lifecycle behavior to existing C++ modules. -4. Verify with a CMake build and `ctest`, or with `make test` when the project uses a Makefile. +4. Verify with a CMake build and `ctest`, or with `make test` when the project uses a Makefile. The verification command must execute tests and return non-zero when any test fails. **Important:** - Read `docs/` first and faithfully expose the requested behavior. diff --git a/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py b/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py index d67c7f5..4404ed9 100644 --- a/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py +++ b/CoderMind/scripts/decoder_lang/tests/test_c_cpp_backend.py @@ -80,7 +80,10 @@ def test_prompt_hints_and_project_tasks(self) -> None: ProjectTaskContext(repo_name="tasklite", repo_info="task cli", package_name="tasklite") ) self.assertIn("Makefile", templates.dependencies) + self.assertIn("must build and execute real test binaries", templates.dependencies) + self.assertIn("must not only", templates.dependencies) self.assertIn("src/main.c", templates.main_entry) + self.assertIn("must execute tests", templates.main_entry) self.assertIn("C CLI", templates.readme) def test_missing_toolchain_raises(self) -> None: @@ -103,6 +106,18 @@ def test_syntax_fallback_skips_git_refs(self) -> None: self.assertIn(str(root / "src" / "main.c"), cmd) self.assertNotIn(str(root / ".git" / "refs" / "heads" / "batch" / "main.c"), cmd) + def test_make_test_compile_only_output_is_not_pass(self) -> None: + raw = "cc -Isrc -std=c99 -Wall -Wextra -c tests/test_engine.c -o build/tests/test_engine.o\n" + + result = self.backend.parse_test_output(raw, 0) + + self.assertEqual(result.status, "errored") + + def test_make_test_nothing_to_do_is_not_pass(self) -> None: + result = self.backend.parse_test_output("make: Nothing to be done for 'test'.\n", 0) + + self.assertEqual(result.status, "errored") + class CppBackendTests(unittest.TestCase): """C++ backend registry and parser-backed behaviour.""" @@ -173,7 +188,10 @@ def test_prompt_hints_and_project_tasks(self) -> None: ProjectTaskContext(repo_name="tasklite", repo_info="task cli", package_name="tasklite") ) self.assertIn("CMakeLists.txt", templates.dependencies) + self.assertIn("must build and execute real test binaries", templates.dependencies) + self.assertIn("must not only", templates.dependencies) self.assertIn("src/main.cpp", templates.main_entry) + self.assertIn("must execute tests", templates.main_entry) self.assertIn("C++ CLI", templates.readme) def test_cmake_test_command_runs_ctest_in_build_dir(self) -> None: @@ -238,6 +256,18 @@ def test_syntax_fallback_skips_git_refs(self) -> None: self.assertIn(str(root / "src" / "main.cpp"), cmd) self.assertNotIn(str(root / ".git" / "refs" / "heads" / "batch" / "main.cpp"), cmd) + def test_make_test_compile_only_output_is_not_pass(self) -> None: + raw = "c++ -std=c++17 -c tests/parser_test.cpp -o build/tests/parser_test.o\n" + + result = self.backend.parse_test_output(raw, 0) + + self.assertEqual(result.status, "errored") + + def test_make_test_nothing_to_do_is_not_pass(self) -> None: + result = self.backend.parse_test_output("make: Nothing to be done for 'test'.\n", 0) + + self.assertEqual(result.status, "errored") + def test_missing_toolchain_raises(self) -> None: with TemporaryDirectory() as temp_dir: with patch("decoder_lang.cpp_backend.shutil.which", return_value=None): diff --git a/CoderMind/tests/test_final_test_repair.py b/CoderMind/tests/test_final_test_repair.py index eb63801..267b875 100644 --- a/CoderMind/tests/test_final_test_repair.py +++ b/CoderMind/tests/test_final_test_repair.py @@ -159,3 +159,29 @@ def fake_dispatch(*_a, **_k): assert out["success"] is True assert "final_test_repair_attempts" not in out + + +def test_final_test_fails_when_smoke_test_crashes(monkeypatch, tmp_path): + _patch_common(monkeypatch, tmp_path) + monkeypatch.setattr(fv, "run_project_tests", lambda *_a, **_k: _pass_result()) + + saved = {} + + def fake_save_stage_result(name, data): + saved[name] = data + + monkeypatch.setattr(fv, "save_stage_result", fake_save_stage_result) + monkeypatch.setitem(sys.modules, "smoke_test", type(sys)("smoke_test")) + + def crash_smoke_test(): + raise FileNotFoundError("python") + + sys.modules["smoke_test"].run_smoke_test = crash_smoke_test + + out = fv.final_test(repo_path=tmp_path, max_repair_iters=2) + + assert out["success"] is False + assert out["errors"] == 1 + assert "Smoke test failed to run" in out["smoke_test_error"] + assert saved["final_test"]["success"] is False + assert saved["smoke_test"]["error_count"] == 1 diff --git a/CoderMind/tests/test_generated_artifact_hygiene.py b/CoderMind/tests/test_generated_artifact_hygiene.py index 35af678..a3f071b 100644 --- a/CoderMind/tests/test_generated_artifact_hygiene.py +++ b/CoderMind/tests/test_generated_artifact_hygiene.py @@ -68,10 +68,13 @@ def test_generated_artifact_path_policy_covers_common_outputs() -> None: "__pycache__/app.cpython-312.pyc", "compile_commands.json", "dist/bundle.js", + "build/generated_source.c", ] allowed = [ "src/app.py", "src/build_config.py", + "configs/build/build_contract.c", + "configs/build/build_contract.h", "CMakeLists.txt", "Makefile", "package.json", @@ -100,11 +103,15 @@ def test_local_excludes_prevent_git_add_a_from_staging_outputs(tmp_path: Path) - (repo_path / "build" / "CTestTestfile.cmake").write_text("generated\n", encoding="utf-8") source = repo_path / "src" / "feature.py" source.write_text("def feature():\n return 2\n", encoding="utf-8") + build_contract = repo_path / "configs" / "build" / "build_contract.c" + build_contract.parent.mkdir(parents=True) + build_contract.write_text("int build_contract(void) { return 0; }\n", encoding="utf-8") _run_git(repo_path, "add", "-A") status = _run_git(repo_path, "status", "--porcelain").stdout assert "src/feature.py" in status + assert "configs/build/build_contract.c" in status assert "build/CTestTestfile.cmake" not in status @@ -212,6 +219,51 @@ class FakeBackend: assert output == "passed=1 failed=0 errors=0 skipped=0" +def test_post_verify_runs_project_tests_for_docs_batches( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + repo_path = _init_repo(tmp_path / "repo") + docs_task = PlannedTask( + task="Update README.", + file_path="README.md", + units_key=["README"], + unit_to_code={"README": "# Demo"}, + unit_to_features={"README": ["Docs/readme"]}, + task_type="project_docs", + ) + + class FakeBackend: + name = "cpp" + display_name = "C++" + + monkeypatch.setattr( + post_verify, + "resolve_test_backend", + lambda **_kwargs: FakeBackend(), + ) + + calls = {"count": 0} + + def fake_run_project_tests(*_args, **_kwargs): + calls["count"] += 1 + return CodegenTestResult( + success=False, + return_code=8, + output="ctest failed after README update", + test_files=[], + failed=1, + ) + + monkeypatch.setattr(post_verify, "run_project_tests", fake_run_project_tests) + + passed, output = post_verify.post_verify(repo_path, docs_task) + + assert calls["count"] == 1 + assert not passed + assert "ctest failed after README update" in output + + def test_merge_batch_rejects_committed_generated_artifacts(tmp_path: Path) -> None: repo_path = _init_repo(tmp_path / "repo") _run_git(repo_path, "checkout", "-b", "batch/generated-artifact") From c7a6e6c9d15283e1dccd86b369fa557f884fd871 Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Thu, 25 Jun 2026 20:04:57 +0800 Subject: [PATCH 13/17] fix(verify): Address review feedback Keep shell expansion for C/C++ syntax-check include paths and root-anchor virtualenv artifact directories to avoid false positives. Also cover the false-positive resume prompt summary interpolation with a regression test. --- CoderMind/scripts/code_gen/batch_prompts.py | 6 ++--- .../scripts/common/generated_artifacts.py | 6 ++--- CoderMind/tests/test_code_gen_multilingual.py | 24 +++++++++++++++++++ .../tests/test_generated_artifact_hygiene.py | 3 +++ 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/CoderMind/scripts/code_gen/batch_prompts.py b/CoderMind/scripts/code_gen/batch_prompts.py index d2cf1e6..319c7a4 100644 --- a/CoderMind/scripts/code_gen/batch_prompts.py +++ b/CoderMind/scripts/code_gen/batch_prompts.py @@ -423,14 +423,14 @@ def _dynamic_c_family_syntax_command( include_flags: List[str] = [] for index, part in enumerate(command): if part == "-I" and index + 1 < len(command): - include_flags.extend(["-I", "$PWD"]) + include_flags.append('-I "$PWD"') standard = "-std=c++17" if backend.name == "cpp" else "-std=c99" patterns = ( r'\( -name "*.cpp" -o -name "*.cc" -o -name "*.cxx" \)' if backend.name == "cpp" else r'-name "*.c"' ) - include_text = " ".join(shlex.quote(part) for part in include_flags) + include_text = " ".join(include_flags) return ( "bash -lc " + shlex.quote( @@ -1011,7 +1011,7 @@ def build_resume_prompt( post_verify_section = ( "\n\n## ⚠ False-positive PASS detected\n" "Your previous attempt ended with `BATCH_RESULT: PASS` and the\n" - "PYTEST_SUMMARY line {agent_summary_repr}, but the runner's\n" + f"PYTEST_SUMMARY line {agent_summary_repr}, but the runner's\n" "independent test-command re-run reported the failure shown below.\n" "Possible causes you must investigate:\n" "* You did not actually run the exact test command before declaring PASS.\n" diff --git a/CoderMind/scripts/common/generated_artifacts.py b/CoderMind/scripts/common/generated_artifacts.py index b2e0768..10c6d3c 100644 --- a/CoderMind/scripts/common/generated_artifacts.py +++ b/CoderMind/scripts/common/generated_artifacts.py @@ -16,9 +16,9 @@ ".pytest_cache/", ".mypy_cache/", ".ruff_cache/", - ".venv/", - "venv/", - "env/", + "/.venv/", + "/venv/", + "/env/", "node_modules/", "/.next/", "/.nuxt/", diff --git a/CoderMind/tests/test_code_gen_multilingual.py b/CoderMind/tests/test_code_gen_multilingual.py index 6c77ebd..ce62b59 100644 --- a/CoderMind/tests/test_code_gen_multilingual.py +++ b/CoderMind/tests/test_code_gen_multilingual.py @@ -120,6 +120,30 @@ def test_cpp_codegen_prompt_aligns_cmake_command_with_post_verify(monkeypatch, t assert "ctest --test-dir build --output-on-failure" in prompt +def test_cpp_syntax_prompt_expands_repo_include_path(monkeypatch, tmp_path: Path) -> None: + _set_language(monkeypatch, tmp_path, "cpp") + task = _task("src/tasklite_cli/task.cpp") + + prompt = batch_prompts.build_tdd_prompt(_state(task), task, tmp_path) + + assert '-I "$PWD"' in prompt + assert "-I '$PWD'" not in prompt + + +def test_resume_prompt_includes_agent_pytest_summary() -> None: + prompt = batch_prompts.build_resume_prompt( + "Original prompt", + attempt_number=2, + failure_reason="Post-verification failed", + last_test_output="1 failed in 0.10s", + sub_agent_claimed_pass=True, + agent_pytest_summary="1 passed in 0.10s", + ) + + assert "PYTEST_SUMMARY line `1 passed in 0.10s`" in prompt + assert "{agent_summary_repr}" not in prompt + + def test_non_python_integration_prompt_uses_native_entry_point(monkeypatch, tmp_path: Path) -> None: # Regression for the bug where every language was told "Do NOT create # main.py", planting a Python file name into Go/JS/C projects. diff --git a/CoderMind/tests/test_generated_artifact_hygiene.py b/CoderMind/tests/test_generated_artifact_hygiene.py index a3f071b..95c5d13 100644 --- a/CoderMind/tests/test_generated_artifact_hygiene.py +++ b/CoderMind/tests/test_generated_artifact_hygiene.py @@ -73,6 +73,9 @@ def test_generated_artifact_path_policy_covers_common_outputs() -> None: allowed = [ "src/app.py", "src/build_config.py", + "src/env/config.py", + "src/venv/settings.py", + "src/.venv/config.py", "configs/build/build_contract.c", "configs/build/build_contract.h", "CMakeLists.txt", From b53ff6315740ac15a2885c74001c711cb2308c22 Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Fri, 26 Jun 2026 17:18:14 +0800 Subject: [PATCH 14/17] refactor(interfaces): drop dedup from deprecated pruner prune_orphan_interfaces is a deprecated helper with no production callers; the active design flow uses InterfacesStore.find_orphan_units and prune_units, and file_code is already deduplicated at the serialization source. Restore the plain join so the legacy helper no longer carries duplicate dedup logic, and drop the now-unused dedup_file_code import. --- CoderMind/scripts/func_design/interface_review.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/CoderMind/scripts/func_design/interface_review.py b/CoderMind/scripts/func_design/interface_review.py index a82b6b8..b667170 100644 --- a/CoderMind/scripts/func_design/interface_review.py +++ b/CoderMind/scripts/func_design/interface_review.py @@ -22,7 +22,6 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) from common import LLMClient -from common.code_dedup import dedup_file_code # AST inspection routes through the Python backend's # ``find_main_block_lineno`` helper so entry-point splicing shares the @@ -1744,12 +1743,13 @@ def prune_orphan_interfaces( # All units pruned → remove the entire file entry files_to_remove.append(file_path) else: - # Regenerate file_code from surviving units, collapsing the - # whole-file-per-unit duplication so the rebuilt source is a - # single clean file rather than N copies. - file_data["file_code"] = dedup_file_code( - units_to_code.get(uname, "") for uname in units - ) + # Regenerate file_code from surviving units + code_parts = [] + for uname in units: + code = units_to_code.get(uname, "") + if code: + code_parts.append(code) + file_data["file_code"] = "\n\n".join(code_parts) for fp in files_to_remove: del file_interfaces[fp] From b04ea057f9a81016180c9bb80f00490868942070 Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Fri, 26 Jun 2026 17:18:15 +0800 Subject: [PATCH 15/17] test: realign re-added multilingual decoder tests The re-added follow-up tests encoded pre-refactor behavior that no longer matches the merged decoder pipeline: - add_interface for a non-Python project is recorded as an advisory manual follow-up rather than silently skipped. - correct_intra_subtree_file_order reports reason "backend_file_dependencies", and Go ordering resolves module imports through go.mod. - Go command-path resolution moved to go_backend.find_existing_entry. Update the assertions to match, and remove the standalone C++ ctest backend test, whose behavior is fully covered by the comprehensive decoder_lang backend suite. --- CoderMind/tests/test_decoder_lang_backends.py | 24 ------------------- CoderMind/tests/test_interface_coverage.py | 15 ++++++++---- CoderMind/tests/test_plan_language_support.py | 23 ++++++++++++------ 3 files changed, 27 insertions(+), 35 deletions(-) delete mode 100644 CoderMind/tests/test_decoder_lang_backends.py diff --git a/CoderMind/tests/test_decoder_lang_backends.py b/CoderMind/tests/test_decoder_lang_backends.py deleted file mode 100644 index 119065e..0000000 --- a/CoderMind/tests/test_decoder_lang_backends.py +++ /dev/null @@ -1,24 +0,0 @@ -import os -import sys - -# Ensure scripts/ is importable when tests run from the project root. -_project_root = os.path.join(os.path.dirname(__file__), "..") -sys.path.insert(0, os.path.join(_project_root, "scripts")) - -from decoder_lang.cpp_backend import CppBackend -from decoder_lang.test_result import EnvHandle - - -def test_cpp_cmake_ctest_command_targets_build_dir(tmp_path): - (tmp_path / "CMakeLists.txt").write_text( - "cmake_minimum_required(VERSION 3.20)\nproject(sample)\nenable_testing()\n", - encoding="utf-8", - ) - env = EnvHandle(project_root=tmp_path, extra={"ctest": "/usr/bin/ctest"}) - - assert CppBackend().test_command(env) == [ - "/usr/bin/ctest", - "--test-dir", - str(tmp_path / "build"), - "--output-on-failure", - ] diff --git a/CoderMind/tests/test_interface_coverage.py b/CoderMind/tests/test_interface_coverage.py index ef419b0..dc62943 100644 --- a/CoderMind/tests/test_interface_coverage.py +++ b/CoderMind/tests/test_interface_coverage.py @@ -420,7 +420,7 @@ def test_apply_fixes_add_dependency_is_language_agnostic() -> None: assert enhanced_data_flow["invocation_edges"][0]["callee"] == "NewStore" -def test_apply_fixes_skips_add_interface_for_non_python() -> None: +def test_apply_fixes_records_advisory_add_interface_for_non_python() -> None: reviewer = _make_reviewer("go") registry = GlobalInterfaceRegistry(backend=get_backend_for("go")) interfaces_data = { @@ -449,11 +449,18 @@ def test_apply_fixes_skips_add_interface_for_non_python() -> None: rpg_features={"CLI/run"}, ) - # add_interface is skipped for non-Python and NOT counted as unapplied, - # so the review can still pass on structural grounds. + # add_interface stub synthesis is Python-only. For other languages the + # request is recorded as an advisory manual follow-up rather than silently + # dropped, so the review can still pass on structural grounds. assert stats["applied_fixes"] == 0 assert stats["applied_edges"] == 0 - assert stats["unapplied"] == [] + assert len(stats["unapplied"]) == 1 + advisory = stats["unapplied"][0] + assert advisory["action"] == "add_interface" + assert advisory["unit_name"] == "function Run" + assert advisory["advisory"] is True + assert advisory["manual_follow_up"] is True + assert advisory["unsupported_for_language"] == "go" # No Go stub was injected into the interface file. cli = interfaces_data["subtrees"]["Core"]["interfaces"]["internal/cli.go"] assert cli["units"] == [] diff --git a/CoderMind/tests/test_plan_language_support.py b/CoderMind/tests/test_plan_language_support.py index 53c64c7..aeecd59 100644 --- a/CoderMind/tests/test_plan_language_support.py +++ b/CoderMind/tests/test_plan_language_support.py @@ -517,7 +517,10 @@ def test_go_main_entry_reuses_existing_command_package() -> None: repo_info="Go web todo.", ) - assert planner._resolve_go_command_path() == "cmd/todo/main.go" + # Go command-path resolution moved to the backend + # (``go_backend.find_existing_entry``); the planner reuses it when + # building the synthetic MAIN_ENTRY task. + assert get_backend("go").find_existing_entry(interfaces) == "cmd/todo/main.go" main_entry = planner._build_main_entry_task() assert "cmd/todo/main.go" in main_entry assert "cmd/demo-go-web-todo/main.go" not in main_entry @@ -539,7 +542,10 @@ def test_go_main_entry_falls_back_when_no_command_package() -> None: repo_info="Go CLI.", ) - assert planner._resolve_go_command_path() == "cmd/tasklite/main.go" + # No cmd/*/main.go in the skeleton → the backend reports no existing entry + # and the planner falls back to the canonical cmd//main.go. + assert get_backend("go").find_existing_entry(interfaces) is None + assert "cmd/tasklite/main.go" in planner._build_main_entry_task() def test_rust_backend_accepts_basic_declarations() -> None: @@ -648,10 +654,13 @@ def test_interface_validation_accepts_typescript_jsdoc_backticks() -> None: def test_file_ordering_uses_imports_for_go() -> None: # Regression: non-Python file ordering previously degraded to the raw LLM # order because dependency extraction used Python AST only. Go imports must - # now drive the topological sort (store before its cli importer). + # now drive the topological sort (store before its cli importer). The + # backend resolves module imports through go.mod, so the module manifest + # is part of the ordered file set. from plan_tasks import correct_intra_subtree_file_order interfaces = { + "go.mod": {"file_code": "module tasklite\n\ngo 1.21\n"}, "internal/store/store.go": { "file_code": "package store\n\ntype Store struct{}\nfunc New() *Store { return &Store{} }\n", }, @@ -661,14 +670,14 @@ def test_file_ordering_uses_imports_for_go() -> None: } corrected, diag = correct_intra_subtree_file_order( subtree_name="Core", - files_order=["cmd/app/cli.go", "internal/store/store.go"], + files_order=["cmd/app/cli.go", "internal/store/store.go", "go.mod"], subtree_interfaces=interfaces, language="go", ) - assert corrected == ["internal/store/store.go", "cmd/app/cli.go"] + assert corrected == ["internal/store/store.go", "cmd/app/cli.go", "go.mod"] assert diag["changed"] is True - assert diag["reason"] == "import_toposort_by_stem" + assert diag["reason"] == "backend_file_dependencies" def test_file_ordering_keeps_python_dotted_module_path() -> None: @@ -686,5 +695,5 @@ def test_file_ordering_keeps_python_dotted_module_path() -> None: ) assert corrected == ["src/app/store.py", "src/app/cli.py"] - assert diag["reason"] == "import_toposort" + assert diag["reason"] == "backend_file_dependencies" From 8c0f3385e547dde8656d46cc8fa361170c67b2df Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Fri, 26 Jun 2026 17:48:56 +0800 Subject: [PATCH 16/17] fix(decoder): address Copilot review findings Second Copilot review pass flagged three correctness issues in the restored hardening code: - code_dedup: dedup_code_blocks returned the stripped block, dropping leading indentation from indented unit slices and corrupting the file_code used as a codegen seed. Dedup on the stripped key but keep the block's own indentation (trim only trailing whitespace). - interface_agent: _analyze_python_invocations walked the whole AST, attributing calls inside nested def/class bodies to the enclosing caller. Walk only the caller's own scope. - decoder_lang.backend: cmake_reconfigure now skips the build step when configure fails so ctest surfaces the real error instead of running against a stale build directory. --- CoderMind/scripts/common/code_dedup.py | 16 +++++++++------- CoderMind/scripts/decoder_lang/backend.py | 6 +++++- .../scripts/func_design/interface_agent.py | 17 +++++++++++++---- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/CoderMind/scripts/common/code_dedup.py b/CoderMind/scripts/common/code_dedup.py index 7e1907a..cc60314 100644 --- a/CoderMind/scripts/common/code_dedup.py +++ b/CoderMind/scripts/common/code_dedup.py @@ -13,19 +13,21 @@ def dedup_code_blocks(codes: Iterable[str]) -> List[str]: - """Return ``codes`` with blank and duplicate (stripped) blocks removed. + """Return ``codes`` with blank and duplicate blocks removed. - Order of first appearance is preserved. Comparison is on the + Order of first appearance is preserved. Duplicates are detected on the whitespace-stripped block so trivially different indentation does not - defeat dedup; the stripped form is returned so the join is clean. + defeat dedup, but each surviving block keeps its own leading indentation + (only trailing whitespace is trimmed) so indented unit slices stay valid + when joined into ``file_code``. """ seen: set[str] = set() unique: List[str] = [] for code in codes: - stripped = code.strip() - if stripped and stripped not in seen: - seen.add(stripped) - unique.append(stripped) + key = code.strip() + if key and key not in seen: + seen.add(key) + unique.append(code.rstrip()) return unique diff --git a/CoderMind/scripts/decoder_lang/backend.py b/CoderMind/scripts/decoder_lang/backend.py index 5a61c3c..39c3927 100644 --- a/CoderMind/scripts/decoder_lang/backend.py +++ b/CoderMind/scripts/decoder_lang/backend.py @@ -506,12 +506,16 @@ def cmake_reconfigure(env: Any) -> None: if not cmake or not (root / "CMakeLists.txt").exists(): return try: - subprocess.run( + configure = subprocess.run( [cmake, "-S", str(root), "-B", str(root / "build")], cwd=str(root), capture_output=True, timeout=120, ) + if configure.returncode != 0: + # A failed configure leaves a stale/partial build dir; skip the + # build so a later ctest surfaces the real configure failure. + return subprocess.run( [cmake, "--build", str(root / "build")], cwd=str(root), diff --git a/CoderMind/scripts/func_design/interface_agent.py b/CoderMind/scripts/func_design/interface_agent.py index 85c98e4..3f652ef 100644 --- a/CoderMind/scripts/func_design/interface_agent.py +++ b/CoderMind/scripts/func_design/interface_agent.py @@ -14,7 +14,7 @@ import logging import ast import re -from typing import Dict, List, Optional, Tuple, Any, Set +from typing import Dict, List, Optional, Tuple, Any, Set, Iterator from collections import defaultdict, deque from pydantic import BaseModel, Field, model_validator @@ -333,9 +333,7 @@ def _analyze_python_invocations(self, code: str, file_path: str) -> None: local_calls: Dict[str, Set[str]] = defaultdict(set) for caller, node, owner_class in caller_nodes: - for child in ast.walk(node): - if not isinstance(child, ast.Call): - continue + for child in _iter_calls_in_own_scope(node): callee_name = _python_call_name(child.func) if not callee_name: continue @@ -573,6 +571,17 @@ def _extract_name_from_node(node: ast.expr) -> Optional[str]: return None +def _iter_calls_in_own_scope(scope_node: ast.AST) -> "Iterator[ast.Call]": + """Yield ``Call`` nodes inside ``scope_node`` without descending into + nested function/class/lambda scopes, whose calls belong to them.""" + for child in ast.iter_child_nodes(scope_node): + if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Lambda)): + continue + if isinstance(child, ast.Call): + yield child + yield from _iter_calls_in_own_scope(child) + + def _python_call_name(node: ast.expr) -> Optional[str]: """Return a local callee name for safe same-file call edges.""" if isinstance(node, ast.Name): From cac32f445981cdf9fa8e783349a6947e3611314d Mon Sep 17 00:00:00 2001 From: Yasen Hu <74404492+HuYaSen@users.noreply.github.com> Date: Fri, 26 Jun 2026 18:09:51 +0800 Subject: [PATCH 17/17] fix(decoder): close compile-only guard and scope gaps A second Copilot review pass surfaced three correctness gaps in the restored C/C++ verification and Python invocation analysis: - c/cpp backend: the compile-only `make test` guard regex did not match version-suffixed compilers (gcc-13, clang-18, g++-13, clang++-18, c++-14), so a compile-only run could be reported as a passing test run. Allow an optional version suffix. - batch_prompts: the C/C++ syntax-only find now also prunes dist/coverage/.venv/venv/CMakeFiles so generated sources are not pulled into the syntax check. - interface_agent: lambdas are not separate units, so keep their bodies in the enclosing caller's scope rather than dropping their same-file calls (nested def/class scopes are still excluded). --- CoderMind/scripts/code_gen/batch_prompts.py | 4 +++- CoderMind/scripts/decoder_lang/c_backend.py | 2 +- CoderMind/scripts/decoder_lang/cpp_backend.py | 2 +- CoderMind/scripts/func_design/interface_agent.py | 6 ++++-- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CoderMind/scripts/code_gen/batch_prompts.py b/CoderMind/scripts/code_gen/batch_prompts.py index 319c7a4..3028170 100644 --- a/CoderMind/scripts/code_gen/batch_prompts.py +++ b/CoderMind/scripts/code_gen/batch_prompts.py @@ -436,7 +436,9 @@ def _dynamic_c_family_syntax_command( + shlex.quote( "mapfile -d '' sources < <(find . " r"\( -path './.git' -o -path './.cmind' -o -path './build' " - r"-o -path './node_modules' -o -path './target' \) -prune " + r"-o -path './node_modules' -o -path './target' " + r"-o -path './dist' -o -path './coverage' -o -path './.venv' " + r"-o -path './venv' -o -path './CMakeFiles' \) -prune " f"-o -type f {patterns} -print0); " f"if (( ${{#sources[@]}} == 0 )); then echo 'No {backend.prompt_hints().display_name} source files found' >&2; exit 1; fi; " f"{compiler} {standard} {include_text} -Wall -Wextra -fsyntax-only \"${{sources[@]}}\"" diff --git a/CoderMind/scripts/decoder_lang/c_backend.py b/CoderMind/scripts/decoder_lang/c_backend.py index 9b922b7..3dc3b11 100644 --- a/CoderMind/scripts/decoder_lang/c_backend.py +++ b/CoderMind/scripts/decoder_lang/c_backend.py @@ -20,7 +20,7 @@ _PLACEHOLDER_RE = re.compile( r"(?is)\b(?:TODO|PLACEHOLDER|NOT IMPLEMENTED|abort\s*\(|assert\s*\(\s*0\s*\))" ) -_COMPILE_ONLY_COMMAND_RE = re.compile(r"(?m)^\s*\S*(?:cc|gcc|clang)(?=\s).*\s-c\s") +_COMPILE_ONLY_COMMAND_RE = re.compile(r"(?m)^\s*\S*(?:cc|gcc|clang)(?:-[\w.]+)?(?=\s).*\s-c\s") _TEST_EXECUTION_RE = re.compile( r"(?im)(^|\s)(?:PASS|FAIL)(?:\s|:)|^\s*ok\b|^\s*1\.\.|" r"test result:|\btests? passed\b|^\s*running\s+\d+\s+tests?" diff --git a/CoderMind/scripts/decoder_lang/cpp_backend.py b/CoderMind/scripts/decoder_lang/cpp_backend.py index 6dcef41..864fd95 100644 --- a/CoderMind/scripts/decoder_lang/cpp_backend.py +++ b/CoderMind/scripts/decoder_lang/cpp_backend.py @@ -21,7 +21,7 @@ r"(?is)\b(?:TODO|PLACEHOLDER|NOT IMPLEMENTED|throw\s+std::logic_error|abort\s*\()" ) _COMPILE_ONLY_COMMAND_RE = re.compile( - r"(?m)^\s*\S*(?:c\+\+|g\+\+|clang\+\+)(?=\s).*\s-c\s" + r"(?m)^\s*\S*(?:c\+\+|g\+\+|clang\+\+)(?:-[\w.]+)?(?=\s).*\s-c\s" ) _TEST_EXECUTION_RE = re.compile( r"(?im)(^|\s)(?:PASS|FAIL)(?:\s|:)|^\s*ok\b|^\s*1\.\.|" diff --git a/CoderMind/scripts/func_design/interface_agent.py b/CoderMind/scripts/func_design/interface_agent.py index 3f652ef..1fe55ba 100644 --- a/CoderMind/scripts/func_design/interface_agent.py +++ b/CoderMind/scripts/func_design/interface_agent.py @@ -573,9 +573,11 @@ def _extract_name_from_node(node: ast.expr) -> Optional[str]: def _iter_calls_in_own_scope(scope_node: ast.AST) -> "Iterator[ast.Call]": """Yield ``Call`` nodes inside ``scope_node`` without descending into - nested function/class/lambda scopes, whose calls belong to them.""" + nested ``def``/``class`` scopes, whose calls are attributed to their own + unit. Lambdas are not separate units, so their calls remain attributed to + the enclosing scope.""" for child in ast.iter_child_nodes(scope_node): - if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Lambda)): + if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): continue if isinstance(child, ast.Call): yield child