Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 77 additions & 2 deletions CoderMind/scripts/code_gen/batch_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from typing import Any, Dict, List, Optional

from common.execution_state import BatchExecutionState, load_code_gen_state
from common.generated_artifacts import generated_artifact_prompt_rule
from common.import_normalizer import build_import_convention_snippet
from common.paths import (
CODE_GEN_STATE_FILE as STATE_FILE,
Expand Down Expand Up @@ -212,6 +213,7 @@
for example `5 passed in 0.42s`, `ok ./...`, or `test result: ok`. Copy it
verbatim from the run you just performed; do NOT invent it. This lets the
runner cross-check your claim against an independent re-run.
{summary_fallback_rule}

## ── Capabilities ─────────────────────────────────────────

Expand Down Expand Up @@ -413,6 +415,49 @@ def _fallback_test_command(backend: LanguageBackend) -> List[str]:
return list(_FALLBACK_TEST_COMMANDS.get(backend.name, [backend.prompt_hints().test_framework_name]))


def _dynamic_c_family_syntax_command(
backend: LanguageBackend,
command: List[str],
) -> str:
compiler = shlex.quote(str(command[0]))
include_flags: List[str] = []
for index, part in enumerate(command):
if part == "-I" and index + 1 < len(command):
include_flags.append('-I "$PWD"')
standard = "-std=c++17" if backend.name == "cpp" else "-std=c99"
patterns = (
r'\( -name "*.cpp" -o -name "*.cc" -o -name "*.cxx" \)'
if backend.name == "cpp"
else r'-name "*.c"'
)
include_text = " ".join(include_flags)
return (
"bash -lc "
+ shlex.quote(
"mapfile -d '' sources < <(find . "
r"\( -path './.git' -o -path './.cmind' -o -path './build' "
r"-o -path './node_modules' -o -path './target' "
r"-o -path './dist' -o -path './coverage' -o -path './.venv' "
r"-o -path './venv' -o -path './CMakeFiles' \) -prune "
f"-o -type f {patterns} -print0); "
f"if (( ${{#sources[@]}} == 0 )); then echo 'No {backend.prompt_hints().display_name} source files found' >&2; exit 1; fi; "
f"{compiler} {standard} {include_text} -Wall -Wextra -fsyntax-only \"${{sources[@]}}\""
)
)


def _cmake_c_family_test_command(command: List[str]) -> str:
ctest = shlex.quote(str(command[0]))
return (
"bash -lc "
+ shlex.quote(
"cmake -S . -B build && "
"cmake --build build && "
f"{ctest} --test-dir build --output-on-failure"
)
)


def _build_backend_test_cmd(
backend: LanguageBackend,
repo_path: Path,
Expand All @@ -425,7 +470,12 @@ def _build_backend_test_cmd(

env = backend.detect_env(repo_path) or EnvHandle(project_root=repo_path.resolve())
try:
return _shell_join(backend.test_command(env))
command = backend.test_command(env)
if backend.name in {"c", "cpp"} and command and "ctest" in Path(str(command[0])).name:
return _cmake_c_family_test_command(command)
if backend.name in {"c", "cpp"} and "-fsyntax-only" in command:
return _dynamic_c_family_syntax_command(backend, command)
return _shell_join(command)
except (ToolchainUnavailable, NotImplementedError, OSError):
return _shell_join(_fallback_test_command(backend))

Expand Down Expand Up @@ -513,6 +563,16 @@ def _test_timeout_rule(backend: LanguageBackend) -> str:
return "- Run long-lived servers, watchers, or interactive commands instead of the exact test command"


def _summary_fallback_rule(backend: LanguageBackend, test_command: str) -> str:
if backend.name in {"c", "cpp"} and "-fsyntax-only" in test_command:
return (
"\nFor C/C++ syntax-only commands: if the exact command exits 0 "
"and prints no summary line, use exactly "
"`PYTEST_SUMMARY: syntax check passed`.\n"
)
return ""


def _build_language_context(backend: LanguageBackend, test_command: str) -> str:
"""Build the target-language prompt section."""
hints = backend.prompt_hints()
Expand All @@ -526,6 +586,13 @@ def _build_language_context(backend: LanguageBackend, test_command: str) -> str:
f"- Module naming: {hints.module_naming_rule}\n"
f"- Style: {hints.style_directive}\n"
)
artifact_extra = ""
if backend.name in {"c", "cpp"}:
artifact_extra = (
"If CTest needs arguments or target wiring, change source files "
"such as `CMakeLists.txt` or the test source instead."
)
context += generated_artifact_prompt_rule(artifact_extra)
if backend.name != "python":
# The decoder's defaults are Python-centric; without an explicit
# prohibition the sub-agent tends to add Python helpers (a main.py
Expand All @@ -542,6 +609,13 @@ def _build_language_context(backend: LanguageBackend, test_command: str) -> str:
f"- Run tests ONLY with `{test_command}` ({hints.test_framework_name}). Do NOT wrap, "
"re-implement, or drive the test suite through pytest or any Python script.\n"
)
if backend.name in {"c", "cpp"}:
context += (
"- C/C++ tests and examples must be valid standalone translation units. "
"If a test or example calls a helper implemented in another `.c`/`.cpp` file, "
"create or update a matching header and include that header; do NOT rely on "
"transitive `.cpp` inclusion or undeclared functions.\n"
)
else:
context += (
"- Do NOT introduce Python-specific files, packages, or pytest conventions unless this is a Python project.\n"
Expand Down Expand Up @@ -886,6 +960,7 @@ def build_tdd_prompt(
dependency_install_capability=_dependency_install_capability(backend, repo_path),
dependency_management=_dependency_management_text(backend, repo_path),
test_timeout_rule=_test_timeout_rule(backend),
summary_fallback_rule=_summary_fallback_rule(backend, pytest_cmd),
import_convention=import_convention,
language_context=_build_language_context(backend, pytest_cmd),
dependency_context=dep_ctx_str,
Expand Down Expand Up @@ -938,7 +1013,7 @@ def build_resume_prompt(
post_verify_section = (
"\n\n## ⚠ False-positive PASS detected\n"
"Your previous attempt ended with `BATCH_RESULT: PASS` and the\n"
"PYTEST_SUMMARY line {agent_summary_repr}, but the runner's\n"
f"PYTEST_SUMMARY line {agent_summary_repr}, but the runner's\n"
"independent test-command re-run reported the failure shown below.\n"
"Possible causes you must investigate:\n"
"* You did not actually run the exact test command before declaring PASS.\n"
Expand Down
60 changes: 55 additions & 5 deletions CoderMind/scripts/code_gen/final_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,32 @@
)


def _fail_final_test_for_smoke_error(
result_dict: Dict[str, Any],
message: str,
*,
smoke_dict: Optional[Dict[str, Any]] = None,
) -> None:
"""Mark final validation failed because smoke validation failed."""
result_dict["success"] = False
result_dict["errors"] = max(int(result_dict.get("errors", 0) or 0), 1)
result_dict["output"] = message
result_dict["next_action"] = (
"Unit tests passed, but smoke validation failed. Fix the smoke "
"failure and re-run final validation."
)
result_dict["smoke_test_error"] = message
if smoke_dict is None:
smoke_dict = {
"success": False,
"type": "smoke_test",
"findings": [{"severity": "error", "message": message}],
"error_count": 1,
"warning_count": 0,
}
result_dict["smoke_test"] = smoke_dict


def final_test(
repo_path: Optional[Path] = None,
state_path: Path = STATE_FILE,
Expand Down Expand Up @@ -238,6 +264,8 @@ def final_test(
actionable = [f for f in smoke_result.findings if f.severity == "error"]

if actionable:
remaining = actionable
recheck_success = True
findings_desc = "\n".join(
f"- [{f.severity}] {f.message}" for f in actionable
)
Expand Down Expand Up @@ -293,6 +321,7 @@ def final_test(
result_dict["smoke_test"] = smoke_result_2.to_dict()
result_dict["smoke_repair_attempted"] = True
result_dict["post_repair_tests_pass"] = recheck.success
recheck_success = recheck.success
remaining = [
f for f in smoke_result_2.findings
if f.severity == "error"
Expand All @@ -303,18 +332,39 @@ def final_test(
len(remaining), len(actionable),
"PASS" if recheck.success else "FAIL",
)
if remaining or not recheck_success:
smoke_dict = result_dict.get("smoke_test")
if not isinstance(smoke_dict, dict):
smoke_dict = {}
message = (
"Smoke validation failed after unit tests passed. "
f"Remaining smoke errors: {len(remaining)}; "
f"post-repair tests pass: {recheck_success}."
)
_fail_final_test_for_smoke_error(
result_dict,
message,
smoke_dict=smoke_dict,
)
except ImportError:
logger.debug("smoke_test module not available, skipping")
except Exception as exc:
logger.warning("Smoke test / repair failed: %s", exc)
_fail_final_test_for_smoke_error(
result_dict,
f"Smoke test failed to run: {exc}",
)

# Save per-stage results for global_review context
save_stage_result("final_test", {
"success": result.success,
"passed": result.passed,
"failed": result.failed,
"errors": result.errors,
"output_tail": "\n".join(result.output.splitlines()[-40:]) if not result.success else "",
"success": bool(result_dict.get("success")),
"passed": result_dict.get("passed", result.passed),
"failed": result_dict.get("failed", result.failed),
"errors": result_dict.get("errors", result.errors),
"output_tail": (
"\n".join(str(result_dict.get("output", "")).splitlines()[-40:])
if not result_dict.get("success") else ""
),
})
smoke_data = result_dict.get("smoke_test")
if isinstance(smoke_data, dict):
Expand Down
13 changes: 13 additions & 0 deletions CoderMind/scripts/code_gen/git_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
from pathlib import Path
from typing import List, Optional, Tuple

from common.generated_artifacts import (
find_persisted_generated_artifact_changes,
format_generated_artifact_violation,
)
from common.git_utils import GitRunner, sanitize_branch_component

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -141,6 +145,15 @@ def merge_batch_branch(
)
return False, "branch_missing"

generated_artifact_changes = find_persisted_generated_artifact_changes(
git.repo_path,
base_ref=git.main_branch,
)
if generated_artifact_changes:
summary = format_generated_artifact_violation(generated_artifact_changes)
logger.error("Cannot merge generated artifact changes:\n%s", summary)
return False, summary

# Commit any leftover changes
if git.has_uncommitted_changes():
git.stage_and_commit(f"batch: final changes for {batch_id}")
Expand Down
29 changes: 24 additions & 5 deletions CoderMind/scripts/code_gen/post_verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,13 @@
from pathlib import Path
from typing import Tuple

from common.generated_artifacts import (
ensure_generated_artifact_excludes,
find_persisted_generated_artifact_changes,
format_generated_artifact_violation,
)
from common.git_utils import GitRunner
from common.task_batch import PlannedTask
from code_gen.prompts import is_project_docs_batch
from code_gen.test_runner import (
ensure_deps_installed,
find_related_test_files,
Expand Down Expand Up @@ -61,10 +65,16 @@ def post_verify(
Returns:
``(passed, test_output_summary)``
"""
# Skip verification for docs batches
if is_project_docs_batch(task):
logger.info("Skipping post-verification for docs batch")
return True, "Documentation batch — no tests."
ensure_generated_artifact_excludes(repo_path)

generated_artifact_changes = find_persisted_generated_artifact_changes(
repo_path,
base_ref=GitRunner.MAIN_BRANCH,
)
if generated_artifact_changes:
summary = format_generated_artifact_violation(generated_artifact_changes)
logger.warning("Post-verification rejected generated artifact changes:\n%s", summary)
return False, summary

# Use the global safety-net timeout for all task types.
# Per-test hang prevention is handled by pytest-timeout (--timeout=DEFAULT_TEST_TIMEOUT).
Expand Down Expand Up @@ -137,6 +147,15 @@ def _git_diff_test_files(prefix: str = "tests/") -> list:
backend=backend,
)

generated_artifact_changes = find_persisted_generated_artifact_changes(
repo_path,
base_ref=GitRunner.MAIN_BRANCH,
)
if generated_artifact_changes:
summary = format_generated_artifact_violation(generated_artifact_changes)
logger.warning("Post-verification rejected generated artifact changes:\n%s", summary)
return False, summary

# Build summary
summary_lines = [
f"passed={result.passed} failed={result.failed} "
Expand Down
45 changes: 45 additions & 0 deletions CoderMind/scripts/common/code_dedup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Shared helpers for collapsing duplicated interface source blocks.

Interface synthesis stores each unit's code as the whole-file text for
non-Python units (``LPCodeUnit`` has no ``count_lines`` slicing), so a
file with N units repeats the entire file N times when those blocks are
joined into ``file_code``. These helpers collapse identical blocks so the
joined source reconstructs the original single file (imports plus each
unit once) instead of an O(units x file_size) blow-up.
"""
from __future__ import annotations

from typing import Iterable, List


def dedup_code_blocks(codes: Iterable[str]) -> List[str]:
"""Return ``codes`` with blank and duplicate blocks removed.

Order of first appearance is preserved. Duplicates are detected on the
whitespace-stripped block so trivially different indentation does not
defeat dedup, but each surviving block keeps its own leading indentation
(only trailing whitespace is trimmed) so indented unit slices stay valid
when joined into ``file_code``.
"""
seen: set[str] = set()
unique: List[str] = []
for code in codes:
key = code.strip()
if key and key not in seen:
seen.add(key)
unique.append(code.rstrip())
return unique


def dedup_file_code(unit_codes: Iterable[str], fallback: str = "") -> str:
"""Build ``file_code`` from per-unit code blocks with duplication removed.

``unit_codes`` are the values of ``units_to_code``. When every block is
an identical whole-file copy, the result is that single file; when
blocks are genuinely distinct per-unit slices they are all kept. Falls
back to ``fallback`` when no non-empty block survives.
"""
unique = dedup_code_blocks(unit_codes)
if not unique:
return fallback
return "\n\n".join(unique)
Loading
Loading