microsoft · HuYaSen · Jun 26, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/CoderMind/scripts/code_gen/batch_prompts.py b/CoderMind/scripts/code_gen/batch_prompts.py
@@ -26,6 +26,7 @@
 from typing import Any, Dict, List, Optional
 
 from common.execution_state import BatchExecutionState, load_code_gen_state
+from common.generated_artifacts import generated_artifact_prompt_rule
 from common.import_normalizer import build_import_convention_snippet
 from common.paths import (
     CODE_GEN_STATE_FILE as STATE_FILE,
@@ -212,6 +213,7 @@
 for example `5 passed in 0.42s`, `ok ./...`, or `test result: ok`. Copy it
 verbatim from the run you just performed; do NOT invent it. This lets the
 runner cross-check your claim against an independent re-run.
+{summary_fallback_rule}
 
 ## ── Capabilities ─────────────────────────────────────────
 
@@ -413,6 +415,49 @@ def _fallback_test_command(backend: LanguageBackend) -> List[str]:
     return list(_FALLBACK_TEST_COMMANDS.get(backend.name, [backend.prompt_hints().test_framework_name]))
 
 
+def _dynamic_c_family_syntax_command(
+    backend: LanguageBackend,
+    command: List[str],
+) -> str:
+    compiler = shlex.quote(str(command[0]))
+    include_flags: List[str] = []
+    for index, part in enumerate(command):
+        if part == "-I" and index + 1 < len(command):
+            include_flags.append('-I "$PWD"')
+    standard = "-std=c++17" if backend.name == "cpp" else "-std=c99"
+    patterns = (
+        r'\( -name "*.cpp" -o -name "*.cc" -o -name "*.cxx" \)'
+        if backend.name == "cpp"
+        else r'-name "*.c"'
+    )
+    include_text = " ".join(include_flags)
+    return (
+        "bash -lc "
+        + shlex.quote(
+            "mapfile -d '' sources < <(find . "
+            r"\( -path './.git' -o -path './.cmind' -o -path './build' "
+            r"-o -path './node_modules' -o -path './target' "
+            r"-o -path './dist' -o -path './coverage' -o -path './.venv' "
+            r"-o -path './venv' -o -path './CMakeFiles' \) -prune "
+            f"-o -type f {patterns} -print0); "
+            f"if (( ${{#sources[@]}} == 0 )); then echo 'No {backend.prompt_hints().display_name} source files found' >&2; exit 1; fi; "
+            f"{compiler} {standard} {include_text} -Wall -Wextra -fsyntax-only \"${{sources[@]}}\""
+        )
+    )
+
+
+def _cmake_c_family_test_command(command: List[str]) -> str:
+    ctest = shlex.quote(str(command[0]))
+    return (
+        "bash -lc "
+        + shlex.quote(
+            "cmake -S . -B build && "
+            "cmake --build build && "
+            f"{ctest} --test-dir build --output-on-failure"
+        )
+    )
+
+
 def _build_backend_test_cmd(
     backend: LanguageBackend,
     repo_path: Path,
@@ -425,7 +470,12 @@ def _build_backend_test_cmd(
 
     env = backend.detect_env(repo_path) or EnvHandle(project_root=repo_path.resolve())
     try:
-        return _shell_join(backend.test_command(env))
+        command = backend.test_command(env)
+        if backend.name in {"c", "cpp"} and command and "ctest" in Path(str(command[0])).name:
+            return _cmake_c_family_test_command(command)
+        if backend.name in {"c", "cpp"} and "-fsyntax-only" in command:
+            return _dynamic_c_family_syntax_command(backend, command)
+        return _shell_join(command)
     except (ToolchainUnavailable, NotImplementedError, OSError):
         return _shell_join(_fallback_test_command(backend))
 
@@ -513,6 +563,16 @@ def _test_timeout_rule(backend: LanguageBackend) -> str:
     return "- Run long-lived servers, watchers, or interactive commands instead of the exact test command"
 
 
+def _summary_fallback_rule(backend: LanguageBackend, test_command: str) -> str:
+    if backend.name in {"c", "cpp"} and "-fsyntax-only" in test_command:
+        return (
+            "\nFor C/C++ syntax-only commands: if the exact command exits 0 "
+            "and prints no summary line, use exactly "
+            "`PYTEST_SUMMARY: syntax check passed`.\n"
+        )
+    return ""
+
+
 def _build_language_context(backend: LanguageBackend, test_command: str) -> str:
     """Build the target-language prompt section."""
     hints = backend.prompt_hints()
@@ -526,6 +586,13 @@ def _build_language_context(backend: LanguageBackend, test_command: str) -> str:
         f"- Module naming: {hints.module_naming_rule}\n"
         f"- Style: {hints.style_directive}\n"
     )
+    artifact_extra = ""
+    if backend.name in {"c", "cpp"}:
+        artifact_extra = (
+            "If CTest needs arguments or target wiring, change source files "
+            "such as `CMakeLists.txt` or the test source instead."
+        )
+    context += generated_artifact_prompt_rule(artifact_extra)
     if backend.name != "python":
         # The decoder's defaults are Python-centric; without an explicit
         # prohibition the sub-agent tends to add Python helpers (a main.py
@@ -542,6 +609,13 @@ def _build_language_context(backend: LanguageBackend, test_command: str) -> str:
             f"- Run tests ONLY with `{test_command}` ({hints.test_framework_name}). Do NOT wrap, "
             "re-implement, or drive the test suite through pytest or any Python script.\n"
         )
+        if backend.name in {"c", "cpp"}:
+            context += (
+                "- C/C++ tests and examples must be valid standalone translation units. "
+                "If a test or example calls a helper implemented in another `.c`/`.cpp` file, "
+                "create or update a matching header and include that header; do NOT rely on "
+                "transitive `.cpp` inclusion or undeclared functions.\n"
+            )
     else:
         context += (
             "- Do NOT introduce Python-specific files, packages, or pytest conventions unless this is a Python project.\n"
@@ -886,6 +960,7 @@ def build_tdd_prompt(
         dependency_install_capability=_dependency_install_capability(backend, repo_path),
         dependency_management=_dependency_management_text(backend, repo_path),
         test_timeout_rule=_test_timeout_rule(backend),
+        summary_fallback_rule=_summary_fallback_rule(backend, pytest_cmd),
         import_convention=import_convention,
         language_context=_build_language_context(backend, pytest_cmd),
         dependency_context=dep_ctx_str,
@@ -938,7 +1013,7 @@ def build_resume_prompt(
         post_verify_section = (
             "\n\n## ⚠ False-positive PASS detected\n"
             "Your previous attempt ended with `BATCH_RESULT: PASS` and the\n"
-            "PYTEST_SUMMARY line {agent_summary_repr}, but the runner's\n"
+            f"PYTEST_SUMMARY line {agent_summary_repr}, but the runner's\n"
             "independent test-command re-run reported the failure shown below.\n"
             "Possible causes you must investigate:\n"
             "* You did not actually run the exact test command before declaring PASS.\n"

diff --git a/CoderMind/scripts/code_gen/final_validation.py b/CoderMind/scripts/code_gen/final_validation.py
@@ -46,6 +46,32 @@
 )
 
 
+def _fail_final_test_for_smoke_error(
+    result_dict: Dict[str, Any],
+    message: str,
+    *,
+    smoke_dict: Optional[Dict[str, Any]] = None,
+) -> None:
+    """Mark final validation failed because smoke validation failed."""
+    result_dict["success"] = False
+    result_dict["errors"] = max(int(result_dict.get("errors", 0) or 0), 1)
+    result_dict["output"] = message
+    result_dict["next_action"] = (
+        "Unit tests passed, but smoke validation failed. Fix the smoke "
+        "failure and re-run final validation."
+    )
+    result_dict["smoke_test_error"] = message
+    if smoke_dict is None:
+        smoke_dict = {
+            "success": False,
+            "type": "smoke_test",
+            "findings": [{"severity": "error", "message": message}],
+            "error_count": 1,
+            "warning_count": 0,
+        }
+    result_dict["smoke_test"] = smoke_dict
+
+
 def final_test(
     repo_path: Optional[Path] = None,
     state_path: Path = STATE_FILE,
@@ -238,6 +264,8 @@ def final_test(
             actionable = [f for f in smoke_result.findings if f.severity == "error"]
 
             if actionable:
+                remaining = actionable
+                recheck_success = True
                 findings_desc = "\n".join(
                     f"- [{f.severity}] {f.message}" for f in actionable
                 )
@@ -293,6 +321,7 @@ def final_test(
                     result_dict["smoke_test"] = smoke_result_2.to_dict()
                     result_dict["smoke_repair_attempted"] = True
                     result_dict["post_repair_tests_pass"] = recheck.success
+                    recheck_success = recheck.success
                     remaining = [
                         f for f in smoke_result_2.findings
                         if f.severity == "error"
@@ -303,18 +332,39 @@ def final_test(
                         len(remaining), len(actionable),
                         "PASS" if recheck.success else "FAIL",
                     )
+                if remaining or not recheck_success:
+                    smoke_dict = result_dict.get("smoke_test")
+                    if not isinstance(smoke_dict, dict):
+                        smoke_dict = {}
+                    message = (
+                        "Smoke validation failed after unit tests passed. "
+                        f"Remaining smoke errors: {len(remaining)}; "
+                        f"post-repair tests pass: {recheck_success}."
+                    )
+                    _fail_final_test_for_smoke_error(
+                        result_dict,
+                        message,
+                        smoke_dict=smoke_dict,
+                    )
         except ImportError:
             logger.debug("smoke_test module not available, skipping")
         except Exception as exc:
             logger.warning("Smoke test / repair failed: %s", exc)
+            _fail_final_test_for_smoke_error(
+                result_dict,
+                f"Smoke test failed to run: {exc}",
+            )
 
     # Save per-stage results for global_review context
     save_stage_result("final_test", {
-        "success": result.success,
-        "passed": result.passed,
-        "failed": result.failed,
-        "errors": result.errors,
-        "output_tail": "\n".join(result.output.splitlines()[-40:]) if not result.success else "",
+        "success": bool(result_dict.get("success")),
+        "passed": result_dict.get("passed", result.passed),
+        "failed": result_dict.get("failed", result.failed),
+        "errors": result_dict.get("errors", result.errors),
+        "output_tail": (
+            "\n".join(str(result_dict.get("output", "")).splitlines()[-40:])
+            if not result_dict.get("success") else ""
+        ),
     })
     smoke_data = result_dict.get("smoke_test")
     if isinstance(smoke_data, dict):

diff --git a/CoderMind/scripts/code_gen/git_ops.py b/CoderMind/scripts/code_gen/git_ops.py
@@ -20,6 +20,10 @@
 from pathlib import Path
 from typing import List, Optional, Tuple
 
+from common.generated_artifacts import (
+    find_persisted_generated_artifact_changes,
+    format_generated_artifact_violation,
+)
 from common.git_utils import GitRunner, sanitize_branch_component
 
 logger = logging.getLogger(__name__)
@@ -141,6 +145,15 @@ def merge_batch_branch(
             )
         return False, "branch_missing"
 
+    generated_artifact_changes = find_persisted_generated_artifact_changes(
+        git.repo_path,
+        base_ref=git.main_branch,
+    )
+    if generated_artifact_changes:
+        summary = format_generated_artifact_violation(generated_artifact_changes)
+        logger.error("Cannot merge generated artifact changes:\n%s", summary)
+        return False, summary
+
     # Commit any leftover changes
     if git.has_uncommitted_changes():
         git.stage_and_commit(f"batch: final changes for {batch_id}")

diff --git a/CoderMind/scripts/code_gen/post_verify.py b/CoderMind/scripts/code_gen/post_verify.py
@@ -24,9 +24,13 @@
 from pathlib import Path
 from typing import Tuple
 
+from common.generated_artifacts import (
+    ensure_generated_artifact_excludes,
+    find_persisted_generated_artifact_changes,
+    format_generated_artifact_violation,
+)
 from common.git_utils import GitRunner
 from common.task_batch import PlannedTask
-from code_gen.prompts import is_project_docs_batch
 from code_gen.test_runner import (
     ensure_deps_installed,
     find_related_test_files,
@@ -61,10 +65,16 @@ def post_verify(
     Returns:
         ``(passed, test_output_summary)``
     """
-    # Skip verification for docs batches
-    if is_project_docs_batch(task):
-        logger.info("Skipping post-verification for docs batch")
-        return True, "Documentation batch — no tests."
+    ensure_generated_artifact_excludes(repo_path)
+
+    generated_artifact_changes = find_persisted_generated_artifact_changes(
+        repo_path,
+        base_ref=GitRunner.MAIN_BRANCH,
+    )
+    if generated_artifact_changes:
+        summary = format_generated_artifact_violation(generated_artifact_changes)
+        logger.warning("Post-verification rejected generated artifact changes:\n%s", summary)
+        return False, summary
 
     # Use the global safety-net timeout for all task types.
     # Per-test hang prevention is handled by pytest-timeout (--timeout=DEFAULT_TEST_TIMEOUT).
@@ -137,6 +147,15 @@ def _git_diff_test_files(prefix: str = "tests/") -> list:
         backend=backend,
     )
 
+    generated_artifact_changes = find_persisted_generated_artifact_changes(
+        repo_path,
+        base_ref=GitRunner.MAIN_BRANCH,
+    )
+    if generated_artifact_changes:
+        summary = format_generated_artifact_violation(generated_artifact_changes)
+        logger.warning("Post-verification rejected generated artifact changes:\n%s", summary)
+        return False, summary
+
     # Build summary
     summary_lines = [
         f"passed={result.passed} failed={result.failed} "

diff --git a/CoderMind/scripts/common/code_dedup.py b/CoderMind/scripts/common/code_dedup.py
@@ -0,0 +1,45 @@
+"""Shared helpers for collapsing duplicated interface source blocks.
+
+Interface synthesis stores each unit's code as the whole-file text for
+non-Python units (``LPCodeUnit`` has no ``count_lines`` slicing), so a
+file with N units repeats the entire file N times when those blocks are
+joined into ``file_code``. These helpers collapse identical blocks so the
+joined source reconstructs the original single file (imports plus each
+unit once) instead of an O(units x file_size) blow-up.
+"""
+from __future__ import annotations
+
+from typing import Iterable, List
+
+
+def dedup_code_blocks(codes: Iterable[str]) -> List[str]:
+    """Return ``codes`` with blank and duplicate blocks removed.
+
+    Order of first appearance is preserved. Duplicates are detected on the
+    whitespace-stripped block so trivially different indentation does not
+    defeat dedup, but each surviving block keeps its own leading indentation
+    (only trailing whitespace is trimmed) so indented unit slices stay valid
+    when joined into ``file_code``.
+    """
+    seen: set[str] = set()
+    unique: List[str] = []
+    for code in codes:
+        key = code.strip()
+        if key and key not in seen:
+            seen.add(key)
+            unique.append(code.rstrip())
+    return unique
+
+
+def dedup_file_code(unit_codes: Iterable[str], fallback: str = "") -> str:
+    """Build ``file_code`` from per-unit code blocks with duplication removed.
+
+    ``unit_codes`` are the values of ``units_to_code``. When every block is
+    an identical whole-file copy, the result is that single file; when
+    blocks are genuinely distinct per-unit slices they are all kept. Falls
+    back to ``fallback`` when no non-empty block survives.
+    """
+    unique = dedup_code_blocks(unit_codes)
+    if not unique:
+        return fallback
+    return "\n\n".join(unique)