diff --git a/api/export_api.py b/api/export_api.py index 7651897..1186a13 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -13,7 +13,9 @@ from datetime import datetime from pathlib import Path -from flask import Blueprint, Response, current_app, jsonify, request +from flask import Blueprint, Response, jsonify, request + +from api.flask_config import exclusion_rules from utils.workspace_path import resolve_workspace_path from utils.path_helpers import to_epoch_ms @@ -21,15 +23,15 @@ from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.cursor_md_exporter import cursor_ide_chat_to_markdown from services.workspace_db import ( - _build_composer_id_to_workspace_id, - _collect_workspace_entries, + build_composer_id_to_workspace_id, + collect_workspace_entries, load_bubble_map, load_code_block_diff_map, - _open_global_db, + open_global_db, ) from services.workspace_resolver import ( - _get_workspace_display_name, - _create_project_name_to_workspace_id_map, + create_project_name_to_workspace_id_map, + lookup_workspace_display_name, ) bp = Blueprint("export_api", __name__) @@ -47,8 +49,12 @@ def _get_export_state() -> dict: try: with open(state_path, "r", encoding="utf-8") as f: return json.load(f) - except Exception: - pass + except (json.JSONDecodeError, ValueError, OSError) as e: + _logger.warning( + "Could not read export state from %s: %s", + state_path, + e, + ) return {} @@ -96,25 +102,25 @@ def export_chats(): last_export_ms = to_epoch_ms(ts_str) # ── Workspace scanning via service layer ────────────────────────────── - workspace_entries = _collect_workspace_entries(workspace_path) - composer_id_to_ws = _build_composer_id_to_workspace_id(workspace_path, workspace_entries) - project_name_map = _create_project_name_to_workspace_id_map(workspace_entries) + workspace_entries = collect_workspace_entries(workspace_path) + composer_id_to_ws = build_composer_id_to_workspace_id(workspace_path, workspace_entries) + project_name_map = create_project_name_to_workspace_id_map(workspace_entries) # Build display-name and slug maps ws_id_to_slug: dict[str, str] = {} ws_id_to_display_name: dict[str, str] = {} for e in workspace_entries: - display = _get_workspace_display_name(workspace_path, e["name"]) + display = lookup_workspace_display_name(workspace_path, e["name"]) if display != e["name"]: ws_id_to_display_name[e["name"]] = display ws_id_to_slug[e["name"]] = slug(display) today = datetime.now().strftime("%Y-%m-%d") exported = [] - rules = current_app.config.get("EXCLUSION_RULES") or [] + rules = exclusion_rules() # ── Database reading via service layer ──────────────────────────────── - with _open_global_db(workspace_path) as (global_db, global_db_path): + with open_global_db(workspace_path) as (global_db, _): if global_db is None: return jsonify({"error": "Cursor global storage not found"}), 404 @@ -138,7 +144,11 @@ def export_chats(): if not headers: continue - updated_at_ms = to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(cd.get("createdAt")) or 0 + updated_at_ms = to_epoch_ms(cd.get("lastUpdatedAt")) + if updated_at_ms is None: + updated_at_ms = to_epoch_ms(cd.get("createdAt")) + if updated_at_ms is None: + updated_at_ms = 0 if since == "last" and updated_at_ms and updated_at_ms <= last_export_ms: continue diff --git a/api/flask_config.py b/api/flask_config.py new file mode 100644 index 0000000..30bf4ae --- /dev/null +++ b/api/flask_config.py @@ -0,0 +1,10 @@ +"""Shared Flask request/config helpers for API blueprints.""" + +from __future__ import annotations + +from flask import current_app + + +def exclusion_rules() -> list: + """Return loaded exclusion rules from app config (empty list when unset).""" + return current_app.config.get("EXCLUSION_RULES") or [] diff --git a/api/workspaces.py b/api/workspaces.py index 1f9128b..b322eef 100644 --- a/api/workspaces.py +++ b/api/workspaces.py @@ -11,7 +11,9 @@ import os from datetime import datetime, timezone -from flask import Blueprint, current_app, jsonify +from flask import Blueprint, jsonify + +from api.flask_config import exclusion_rules from utils.workspace_path import resolve_workspace_path, get_cli_chats_path from utils.cli_chat_reader import list_cli_projects @@ -22,16 +24,10 @@ ) from utils.workspace_descriptor import read_json_file from services.workspace_resolver import ( - _infer_workspace_name_from_context, - # Re-exported for back-compat with existing tests that import from api.workspaces - # directly (test_invalid_workspace_aliases, test_workspace_assignment_fallback, - # test_workspace_name_inference, test_models_wired_at_read_sites). - # Production callers should import from services.workspace_resolver instead. - _determine_project_for_conversation, # noqa: F401 - _infer_invalid_workspace_aliases, # noqa: F401 - _get_workspace_display_name, # noqa: F401 + infer_workspace_name_from_context, + lookup_workspace_display_name, ) -from services.cli_tabs import _get_cli_workspace_tabs +from services.cli_tabs import get_cli_workspace_tabs from services.workspace_listing import list_workspace_projects from services.workspace_tabs import assemble_workspace_tabs @@ -54,7 +50,7 @@ def list_workspaces(): try: workspace_path = resolve_workspace_path() - rules = current_app.config.get("EXCLUSION_RULES") or [] + rules = exclusion_rules() projects, warnings = list_workspace_projects(workspace_path, rules) payload: dict = {"projects": projects} if warnings: @@ -121,12 +117,12 @@ def get_workspace(workspace_id): if derived_name: workspace_name = derived_name elif workspace_name == workspace_id: - inferred = _infer_workspace_name_from_context(workspace_path, workspace_id) + inferred = infer_workspace_name_from_context(workspace_path, workspace_id) if inferred: workspace_name = inferred except Exception as e: warn_workspace_json_read(_logger, workspace_id, e) - inferred = _infer_workspace_name_from_context(workspace_path, workspace_id) + inferred = infer_workspace_name_from_context(workspace_path, workspace_id) if inferred: workspace_name = inferred @@ -150,10 +146,14 @@ def get_workspace(workspace_id): @bp.route("/api/workspaces//tabs") def get_workspace_tabs(workspace_id): if workspace_id.startswith("cli:"): - return _get_cli_workspace_tabs(workspace_id) + try: + return get_cli_workspace_tabs(workspace_id, exclusion_rules()) + except Exception: + _logger.exception("Failed to get CLI workspace tabs") + return jsonify({"error": "Failed to get workspace tabs"}), 500 try: workspace_path = resolve_workspace_path() - rules = current_app.config.get("EXCLUSION_RULES") or [] + rules = exclusion_rules() payload, status = assemble_workspace_tabs(workspace_id, workspace_path, rules) return jsonify(payload), status except Exception: diff --git a/scripts/export.py b/scripts/export.py index 6cea025..371cf81 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -39,7 +39,6 @@ extract_text_from_bubble, slug, ) -from utils.tool_parser import parse_tool_call # noqa: E402 from utils.workspace_path import ( # noqa: E402 get_cli_chats_path, resolve_workspace_path, @@ -55,26 +54,26 @@ ) from models import ExportEntry, SchemaError # noqa: E402 from services.workspace_db import ( # noqa: E402 - _build_composer_id_to_workspace_id, - _collect_invalid_workspace_ids, - _collect_workspace_entries, + build_composer_id_to_workspace_id, + collect_invalid_workspace_ids, + collect_workspace_entries, load_bubble_map, load_code_block_diff_map, load_project_layouts_map, - _open_global_db, + open_global_db, ) from services.workspace_resolver import ( # noqa: E402 - _determine_project_for_conversation, - _get_workspace_display_name, - _infer_invalid_workspace_aliases, - _create_project_name_to_workspace_id_map, - _create_workspace_path_to_id_map, + create_project_name_to_workspace_id_map, + create_workspace_path_to_id_map, + determine_project_for_conversation, + infer_invalid_workspace_aliases, + lookup_workspace_display_name, ) _logger = logging.getLogger(__name__) -def _configure_cli_logging() -> None: +def configure_cli_logging() -> None: """Route log records to stderr so stdout stays for export progress lines.""" root = logging.getLogger() if root.handlers: @@ -86,7 +85,7 @@ def _configure_cli_logging() -> None: ) -def _json_dump_safe(value) -> str: +def json_dump_safe(value) -> str: """Best-effort JSON serialization for exclusion matching.""" try: return json.dumps(value, ensure_ascii=False, sort_keys=True) @@ -94,7 +93,7 @@ def _json_dump_safe(value) -> str: return str(value) if value is not None else "" -def _load_manifest_entries(manifest_path: str) -> dict: +def load_manifest_entries(manifest_path: str) -> dict: """Load manifest entries keyed by log_id from a JSONL file.""" existing: dict = {} if not os.path.isfile(manifest_path): @@ -117,7 +116,7 @@ def _load_manifest_entries(manifest_path: str) -> dict: return existing -def _write_manifest_entries(manifest_path: str, entries_by_id: dict): +def write_manifest_entries(manifest_path: str, entries_by_id: dict): """Write manifest entries to JSONL.""" os.makedirs(os.path.dirname(manifest_path), exist_ok=True) with open(manifest_path, "w", encoding="utf-8") as f: @@ -177,7 +176,7 @@ def parse_args(): def main(): - _configure_cli_logging() + configure_cli_logging() opts = parse_args() since = opts["since"] out_dir = os.path.abspath(opts["out_dir"]) @@ -197,15 +196,18 @@ def main(): ts = st.get("lastExportTime") if ts: last_export = int(datetime.fromisoformat(ts.replace("Z", "+00:00")).timestamp() * 1000) - except Exception: - pass + except (json.JSONDecodeError, ValueError, OSError) as e: + _logger.warning( + "Could not read last export timestamp; defaulting to full export: %s", + e, + ) # ── Workspace scanning via service layer ────────────────────────────────── - workspace_entries = _collect_workspace_entries(workspace_path) - invalid_workspace_ids = _collect_invalid_workspace_ids(workspace_entries) - project_name_map = _create_project_name_to_workspace_id_map(workspace_entries) - workspace_path_map = _create_workspace_path_to_id_map(workspace_entries) - composer_id_to_ws = _build_composer_id_to_workspace_id(workspace_path, workspace_entries) + workspace_entries = collect_workspace_entries(workspace_path) + invalid_workspace_ids = collect_invalid_workspace_ids(workspace_entries) + project_name_map = create_project_name_to_workspace_id_map(workspace_entries) + workspace_path_map = create_workspace_path_to_id_map(workspace_entries) + composer_id_to_ws = build_composer_id_to_workspace_id(workspace_path, workspace_entries) # Build display-name and slug maps from workspace entries. # Entries whose workspace.json cannot be resolved are omitted so the @@ -213,11 +215,11 @@ def main(): # behaviour where unresolvable workspaces were skipped. workspace_id_to_display_name: dict[str, str] = {} workspace_id_to_slug: dict[str, str] = {} - for e in workspace_entries: - display = _get_workspace_display_name(workspace_path, e["name"]) - if display != e["name"]: # successfully resolved a human-readable name - workspace_id_to_display_name[e["name"]] = display - workspace_id_to_slug[e["name"]] = slug(display) + for entry in workspace_entries: + display = lookup_workspace_display_name(workspace_path, entry["name"]) + if display != entry["name"]: # successfully resolved a human-readable name + workspace_id_to_display_name[entry["name"]] = display + workspace_id_to_slug[entry["name"]] = slug(display) # ── Database reading via service layer ──────────────────────────────────── project_layouts_map: dict = {} @@ -226,7 +228,7 @@ def main(): ide_composer_rows: list = [] invalid_workspace_aliases: dict = {} - with _open_global_db(workspace_path) as (global_db, global_db_path): + with open_global_db(workspace_path) as (global_db, global_db_path): if global_db is None: _logger.info( "Cursor IDE global storage not found at %s — skipping IDE chats.", @@ -245,7 +247,7 @@ def main(): except sqlite3.Error: pass - invalid_workspace_aliases = _infer_invalid_workspace_aliases( + invalid_workspace_aliases = infer_invalid_workspace_aliases( composer_rows=ide_composer_rows, project_layouts_map=project_layouts_map, project_name_map=project_name_map, @@ -266,19 +268,28 @@ def main(): composer_id = row["key"].split(":")[1] try: cd = json.loads(row["value"]) - except Exception: + except (json.JSONDecodeError, ValueError) as parse_err: + _logger.debug( + "Skipping corrupt composerData row %s: %s", + composer_id, + parse_err, + ) continue headers = cd.get("fullConversationHeadersOnly") or [] if not headers: continue - updated_at = to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(cd.get("createdAt")) or 0 + updated_at = to_epoch_ms(cd.get("lastUpdatedAt")) + if updated_at is None: + updated_at = to_epoch_ms(cd.get("createdAt")) + if updated_at is None: + updated_at = 0 if since == "last" and updated_at <= last_export: continue # Workspace assignment via service layer - pid = _determine_project_for_conversation( + pid = determine_project_for_conversation( cd, composer_id, project_layouts_map, project_name_map, workspace_path_map, workspace_entries, bubble_map, composer_id_to_ws, invalid_workspace_ids, @@ -307,9 +318,9 @@ def main(): text = extract_text_from_bubble(b) if text: bubble_texts.append(text) - bubble_meta_parts.append(_json_dump_safe(b)) + bubble_meta_parts.append(json_dump_safe(b)) - code_diff_parts = [_json_dump_safe(d) for d in code_block_diff_map.get(composer_id, [])] + code_diff_parts = [json_dump_safe(d) for d in code_block_diff_map.get(composer_id, [])] searchable = build_searchable_text( project_name=ws_display_name, chat_title=title, @@ -320,7 +331,7 @@ def main(): bubble_texts + bubble_meta_parts + code_diff_parts - + [_json_dump_safe(model_config), _json_dump_safe(cd)] + + [json_dump_safe(model_config), json_dump_safe(cd)] ) if p ), @@ -484,7 +495,7 @@ def main(): f.write(entry["content"]) manifest_path = os.path.join(out_dir, "manifest.jsonl") - existing = _load_manifest_entries(manifest_path) + existing = load_manifest_entries(manifest_path) for entry in exported: existing[entry["id"]] = { "log_id": entry["id"], @@ -494,10 +505,10 @@ def main(): "updated_at": datetime.fromtimestamp(entry["updatedAt"] / 1000).isoformat() if entry["updatedAt"] else datetime.now().isoformat(), } if existing: - _write_manifest_entries(manifest_path, existing) + write_manifest_entries(manifest_path, existing) global_manifest_path = os.path.join(state_dir, "manifest.jsonl") - global_existing = _load_manifest_entries(global_manifest_path) + global_existing = load_manifest_entries(global_manifest_path) for entry in exported: global_existing[entry["id"]] = { "log_id": entry["id"], @@ -507,7 +518,7 @@ def main(): "updated_at": datetime.fromtimestamp(entry["updatedAt"] / 1000).isoformat() if entry["updatedAt"] else datetime.now().isoformat(), } if global_existing: - _write_manifest_entries(global_manifest_path, global_existing) + write_manifest_entries(global_manifest_path, global_existing) print(f"Exported {count} chat(s) to {out_dir}") state = { diff --git a/services/cli_tabs.py b/services/cli_tabs.py index f28aafc..f036aa2 100644 --- a/services/cli_tabs.py +++ b/services/cli_tabs.py @@ -3,17 +3,28 @@ import logging from datetime import datetime -from flask import current_app, jsonify - -_logger = logging.getLogger(__name__) +from flask import jsonify from utils.cli_chat_reader import list_cli_projects, messages_to_bubbles, traverse_blobs from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.workspace_path import get_cli_chats_path +_logger = logging.getLogger(__name__) + + +def get_cli_workspace_tabs(workspace_id: str, rules: list): + """Return Flask JSON response with tabs for a Cursor CLI project. + + Args: + workspace_id: Workspace id with ``cli:`` prefix (e.g. ``cli:proj-1``). + rules: Exclusion rule token lists (same as :func:`services.workspace_tabs.assemble_workspace_tabs`). -def _get_cli_workspace_tabs(workspace_id: str): - """Return tabs for a Cursor CLI project (workspace_id starts with "cli:").""" + Returns: + ``flask.Response | tuple[flask.Response, int]`` suitable for a Flask route + handler. Success returns ``jsonify({"tabs": ...})`` (plain ``Response``, + status 200). Errors return ``(jsonify({"error": ...}), status)`` with + 404 when the project is missing or 500 on unexpected failure. + """ try: project_id = workspace_id[4:] cli_projects = list_cli_projects(get_cli_chats_path()) @@ -27,7 +38,6 @@ def _get_cli_workspace_tabs(workspace_id: str): if project is None: return jsonify({"error": "CLI project not found"}), 404 - rules = current_app.config.get("EXCLUSION_RULES") or [] ws_name = project.get("workspace_name") or project_id[:12] sessions = project.get("sessions") or [] if not isinstance(sessions, list): diff --git a/services/workspace_db.py b/services/workspace_db.py index f4ffcac..bfe7c0b 100644 --- a/services/workspace_db.py +++ b/services/workspace_db.py @@ -15,7 +15,7 @@ # ── Global-DB KV loaders ──────────────────────────────────────────────────── # Each function accepts an already-opened sqlite3.Connection (row_factory must -# be set to sqlite3.Row by the caller, as _open_global_db does) and returns +# be set to sqlite3.Row by the caller, as open_global_db does) and returns # a populated dict. sqlite3.Error is caught internally so a missing or # corrupt table cannot propagate to callers. @@ -113,8 +113,16 @@ def load_code_block_diff_map(global_db) -> dict[str, list]: return diff_map -def _collect_workspace_entries(workspace_path: str) -> list[dict]: - """Scan workspace directory and return entries with workspace.json.""" +def collect_workspace_entries(workspace_path: str) -> list[dict]: + """Scan workspace directory and return entries with workspace.json. + + Args: + workspace_path: Cursor workspace storage root (parent of per-workspace folders). + + Returns: + List of dicts with keys ``name`` (folder id) and ``workspaceJsonPath``. + Returns an empty list if ``workspace_path`` is missing or unreadable. + """ entries = [] try: for name in os.listdir(workspace_path): @@ -131,8 +139,15 @@ def _collect_workspace_entries(workspace_path: str) -> list[dict]: return entries -def _collect_invalid_workspace_ids(workspace_entries: list[dict]) -> set[str]: - """Workspace IDs whose descriptors have no resolvable folder paths.""" +def collect_invalid_workspace_ids(workspace_entries: list[dict]) -> set[str]: + """Return workspace IDs whose descriptors have no resolvable folder paths. + + Args: + workspace_entries: Output of :func:`collect_workspace_entries`. + + Returns: + Set of workspace folder names that cannot be mapped to a folder path. + """ invalid: set[str] = set() for entry in workspace_entries: try: @@ -149,8 +164,19 @@ def _collect_invalid_workspace_ids(workspace_entries: list[dict]) -> set[str]: return invalid -def _build_composer_id_to_workspace_id(workspace_path: str, workspace_entries: list) -> dict: - """Build mapping: composerId -> workspaceId from per-workspace state.vscdb.""" +def build_composer_id_to_workspace_id(workspace_path: str, workspace_entries: list) -> dict: + """Build mapping from composer ID to workspace folder name. + + Reads ``composer.composerData`` from each workspace's ``state.vscdb``. + Skips workspaces with missing databases or malformed JSON. + + Args: + workspace_path: Cursor workspace storage root. + workspace_entries: Output of :func:`collect_workspace_entries`. + + Returns: + Dict mapping ``composerId`` strings to workspace folder names. + """ mapping: dict = {} for entry in workspace_entries: db_path = os.path.join(workspace_path, entry["name"], "state.vscdb") @@ -187,8 +213,17 @@ def _build_composer_id_to_workspace_id(workspace_path: str, workspace_entries: l @contextmanager -def _open_global_db(workspace_path: str): - """Yield (conn, path) for the global-storage SQLite db (read-only); (None, path) if the file is missing.""" +def open_global_db(workspace_path: str): + """Open Cursor global storage SQLite database read-only. + + Args: + workspace_path: Cursor workspace storage root. + + Yields: + ``(conn, path)`` where ``conn`` is a :class:`sqlite3.Connection` with + ``row_factory=sqlite3.Row``, or ``None`` if the database file is missing + or cannot be opened. ``path`` is always the resolved global DB path. + """ global_db_path = os.path.join(workspace_path, "..", "globalStorage", "state.vscdb") global_db_path = os.path.normpath(global_db_path) if not os.path.isfile(global_db_path): diff --git a/services/workspace_listing.py b/services/workspace_listing.py index 7be214b..879710c 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -20,37 +20,50 @@ from utils.workspace_path import get_cli_chats_path from models import Composer, ParseWarningCollector, SchemaError from services.workspace_db import ( - _build_composer_id_to_workspace_id, - _collect_invalid_workspace_ids, - _collect_workspace_entries, + build_composer_id_to_workspace_id, + collect_invalid_workspace_ids, + collect_workspace_entries, load_bubble_map, load_project_layouts_map, - _open_global_db, + open_global_db, ) from services.workspace_resolver import ( - _create_project_name_to_workspace_id_map, - _create_workspace_path_to_id_map, - _determine_project_for_conversation, - _get_workspace_display_name, - _infer_invalid_workspace_aliases, - _infer_workspace_name_from_context, + create_project_name_to_workspace_id_map, + create_workspace_path_to_id_map, + determine_project_for_conversation, + infer_invalid_workspace_aliases, + infer_workspace_name_from_context, + lookup_workspace_display_name, ) def list_workspace_projects(workspace_path: str, rules: list) -> tuple[list[dict], list[dict]]: - """Return (projects, warnings) for GET /api/workspaces.""" + """List workspace projects for GET /api/workspaces. + + Args: + workspace_path: Cursor ``workspaceStorage`` root. + rules: Exclusion rule token lists from :func:`utils.exclusion_rules.load_rules`. + + Returns: + ``(projects, warnings)``. Each project dict has ``id``, ``name``, + ``path`` (``workspace.json`` path), ``conversationCount``, + ``lastModified`` (ISO 8601), and optional ``aliasIds`` / ``source`` + (``"cli"`` for Cursor CLI projects). *warnings* is a list of structured + parse-error dicts (``type``, ``count``, ``detail``) from + :meth:`models.ParseWarningCollector.to_api_list`; empty when no skips. + """ parse_warnings = ParseWarningCollector() - workspace_entries = _collect_workspace_entries(workspace_path) - invalid_workspace_ids = _collect_invalid_workspace_ids(workspace_entries) + workspace_entries = collect_workspace_entries(workspace_path) + invalid_workspace_ids = collect_invalid_workspace_ids(workspace_entries) - project_name_map = _create_project_name_to_workspace_id_map(workspace_entries) - workspace_path_map = _create_workspace_path_to_id_map(workspace_entries) - composer_id_to_ws = _build_composer_id_to_workspace_id(workspace_path, workspace_entries) + project_name_map = create_project_name_to_workspace_id_map(workspace_entries) + workspace_path_map = create_workspace_path_to_id_map(workspace_entries) + composer_id_to_ws = build_composer_id_to_workspace_id(workspace_path, workspace_entries) conversation_map: dict[str, list] = {} # closing semantics now baked into the context manager (issue #17). - with _open_global_db(workspace_path) as (global_db, _): + with open_global_db(workspace_path) as (global_db, _): if global_db: def _safe_fetchall(query: str, params: tuple = ()) -> list: try: @@ -65,7 +78,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: project_layouts_map: dict[str, list] = load_project_layouts_map(global_db) bubble_map: dict[str, dict] = load_bubble_map(global_db) - invalid_workspace_aliases = _infer_invalid_workspace_aliases( + invalid_workspace_aliases = infer_invalid_workspace_aliases( composer_rows=composer_rows, project_layouts_map=project_layouts_map, project_name_map=project_name_map, @@ -107,7 +120,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: continue cd = composer.raw try: - pid = _determine_project_for_conversation( + pid = determine_project_for_conversation( cd, cid, project_layouts_map, project_name_map, workspace_path_map, workspace_entries, bubble_map, composer_id_to_ws, invalid_workspace_ids, @@ -192,9 +205,9 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: ) mtime = 0 - workspace_name = _get_workspace_display_name(workspace_path, primary["name"]) + workspace_name = lookup_workspace_display_name(workspace_path, primary["name"]) if workspace_name == primary["name"]: - inferred = _infer_workspace_name_from_context(workspace_path, primary["name"]) + inferred = infer_workspace_name_from_context(workspace_path, primary["name"]) workspace_name = inferred or f"Project {primary['name'][:8]}" if is_excluded_by_rules(rules, workspace_name): diff --git a/services/workspace_resolver.py b/services/workspace_resolver.py index e76a8c0..faf5a8b 100644 --- a/services/workspace_resolver.py +++ b/services/workspace_resolver.py @@ -18,12 +18,21 @@ warn_workspace_json_read, ) from utils.workspace_descriptor import basename_from_pathish, read_json_file -from services.workspace_db import _open_global_db +from services.workspace_db import open_global_db from models import SchemaError, Workspace -def _get_workspace_display_name(workspace_path: str, workspace_id: str) -> str: - """Return human-readable workspace name; "Other chats" for global, workspace_id if unreadable.""" +def lookup_workspace_display_name(workspace_path: str, workspace_id: str) -> str: + """Resolve a display name for a workspace folder from storage. + + Args: + workspace_path: Cursor workspace storage root. + workspace_id: Workspace folder name, or ``"global"`` for unassigned chats. + + Returns: + Human-readable name from ``workspace.json`` when parseable; ``"Other chats"`` + for ``global``; otherwise ``workspace_id``. + """ if workspace_id == "global": return "Other chats" wj_path = os.path.join(workspace_path, workspace_id, "workspace.json") @@ -41,8 +50,17 @@ def _get_workspace_display_name(workspace_path: str, workspace_id: str) -> str: return workspace_id -def _infer_workspace_name_from_context(workspace_path: str, workspace_id: str) -> str | None: - """Infer workspace name from projectLayouts when workspace.json is opaque.""" +def infer_workspace_name_from_context(workspace_path: str, workspace_id: str) -> str | None: + """Infer workspace name from ``projectLayouts`` when ``workspace.json`` is opaque. + + Args: + workspace_path: Cursor workspace storage root. + workspace_id: Workspace folder name (not ``"global"``). + + Returns: + Most common folder basename from global ``messageRequestContext`` rows, + or ``None`` when inference fails. + """ if workspace_id == "global": return "Other chats" @@ -77,7 +95,7 @@ def _infer_workspace_name_from_context(workspace_path: str, workspace_id: str) - # Gather folder-name hints from global messageRequestContext.projectLayouts counts: dict[str, int] = {} - with _open_global_db(workspace_path) as (gconn, _): + with open_global_db(workspace_path) as (gconn, _): if not gconn: return None for cid in composer_ids: @@ -93,7 +111,12 @@ def _infer_workspace_name_from_context(workspace_path: str, workspace_id: str) - continue try: ctx = json.loads(row[0]) - except Exception: + except (json.JSONDecodeError, ValueError) as e: + _logger.debug( + "Skipping malformed messageRequestContext for %s: %s", + cid, + e, + ) continue layouts = ctx.get("projectLayouts") if not isinstance(layouts, list): @@ -103,7 +126,12 @@ def _infer_workspace_name_from_context(workspace_path: str, workspace_id: str) - if isinstance(layout, str): try: obj = json.loads(layout) - except Exception: + except (json.JSONDecodeError, ValueError) as e: + _logger.debug( + "Skipping malformed projectLayout for %s: %s", + cid, + e, + ) obj = None elif isinstance(layout, dict): obj = layout @@ -118,10 +146,19 @@ def _infer_workspace_name_from_context(workspace_path: str, workspace_id: str) - return max(counts.items(), key=lambda kv: kv[1])[0] -def _get_project_from_file_path( +def get_project_from_file_path( file_path: str, workspace_entries: list[dict], ) -> str | None: + """Map a file path to the workspace folder that contains it. + + Args: + file_path: Absolute or URI-style file path. + workspace_entries: Output of :func:`services.workspace_db.collect_workspace_entries`. + + Returns: + Workspace folder name with the longest matching root path, or ``None``. + """ normalized_path = normalize_file_path(file_path) best_match = None best_len = 0 @@ -142,7 +179,15 @@ def _get_project_from_file_path( return best_match -def _create_project_name_to_workspace_id_map(workspace_entries): +def create_project_name_to_workspace_id_map(workspace_entries): + """Map workspace folder basenames to workspace folder names. + + Args: + workspace_entries: Output of :func:`services.workspace_db.collect_workspace_entries`. + + Returns: + Dict mapping last path segment (folder name) to workspace id. + """ mapping = {} for entry in workspace_entries: try: @@ -158,7 +203,15 @@ def _create_project_name_to_workspace_id_map(workspace_entries): return mapping -def _create_workspace_path_to_id_map(workspace_entries): +def create_workspace_path_to_id_map(workspace_entries): + """Map normalized workspace root paths to workspace folder names. + + Args: + workspace_entries: Output of :func:`services.workspace_db.collect_workspace_entries`. + + Returns: + Dict mapping normalized folder paths to workspace ids. + """ out = {} for entry in workspace_entries: try: @@ -171,7 +224,7 @@ def _create_workspace_path_to_id_map(workspace_entries): return out -def _determine_project_for_conversation( +def determine_project_for_conversation( composer_data: dict, composer_id: str, project_layouts_map: dict, @@ -182,6 +235,24 @@ def _determine_project_for_conversation( composer_id_to_workspace_id: dict | None = None, invalid_workspace_ids: set[str] | None = None, ) -> str | None: + """Resolve which workspace folder owns a composer conversation. + + Args: + composer_data: Parsed ``composerData`` JSON for *composer_id*. + composer_id: Composer UUID from the global DB key. + project_layouts_map: ``{composer_id: [root_path, ...]}`` from global KV. + project_name_to_workspace_id: Basename-to-workspace-folder map. + workspace_path_to_id: Normalized root path to workspace folder map. + workspace_entries: Output of :func:`services.workspace_db.collect_workspace_entries`. + bubble_map: ``{bubble_id: bubble_dict}`` from global KV. + composer_id_to_workspace_id: Definitive per-workspace composer map; when + ``None``, layout and path heuristics are used without this shortcut. + invalid_workspace_ids: Workspace folders marked invalid; mapped IDs in + this set are ignored when using *composer_id_to_workspace_id*. + + Returns: + Workspace folder name, or ``None`` when no project can be determined. + """ # Primary: definitive per-workspace mapping if composer_id_to_workspace_id and composer_id in composer_id_to_workspace_id: mapped = composer_id_to_workspace_id[composer_id] @@ -205,7 +276,7 @@ def _determine_project_for_conversation( for file_entry in newly: uri = file_entry.get("uri") if isinstance(file_entry, dict) else None if isinstance(uri, dict) and uri.get("path"): - pid = _get_project_from_file_path(uri["path"], workspace_entries) + pid = get_project_from_file_path(uri["path"], workspace_entries) if pid: return pid @@ -213,7 +284,7 @@ def _determine_project_for_conversation( cbd = composer_data.get("codeBlockData") if isinstance(cbd, dict): for fp in cbd.keys(): - pid = _get_project_from_file_path(re.sub(r"^file://", "", fp), workspace_entries) + pid = get_project_from_file_path(re.sub(r"^file://", "", fp), workspace_entries) if pid: return pid @@ -227,19 +298,19 @@ def _determine_project_for_conversation( continue for fp in (bubble.get("relevantFiles") or []): if fp: - pid = _get_project_from_file_path(fp, workspace_entries) + pid = get_project_from_file_path(fp, workspace_entries) if pid: return pid for uri in (bubble.get("attachedFileCodeChunksUris") or []): if isinstance(uri, dict) and uri.get("path"): - pid = _get_project_from_file_path(uri["path"], workspace_entries) + pid = get_project_from_file_path(uri["path"], workspace_entries) if pid: return pid for fs_entry in (bubble.get("context", {}).get("fileSelections") or []): if isinstance(fs_entry, dict): uri = fs_entry.get("uri") if isinstance(uri, dict) and uri.get("path"): - pid = _get_project_from_file_path(uri["path"], workspace_entries) + pid = get_project_from_file_path(uri["path"], workspace_entries) if pid: return pid @@ -299,7 +370,7 @@ def _determine_project_for_conversation( return None -def _infer_invalid_workspace_aliases( +def infer_invalid_workspace_aliases( composer_rows: list, project_layouts_map: dict, project_name_map: dict, @@ -309,7 +380,27 @@ def _infer_invalid_workspace_aliases( composer_id_to_ws: dict, invalid_workspace_ids: set[str], ) -> dict[str, str]: - """Majority-vote each invalid workspace ID to its most likely valid replacement.""" + """Map invalid workspace IDs to valid replacements by majority vote. + + For each composer assigned to an *invalid_workspace_ids* entry, calls + :func:`determine_project_for_conversation` without the definitive composer map + and counts votes for inferred valid workspace folders. + + Args: + composer_rows: Global ``composerData:*`` SQLite rows. + project_layouts_map: Layout map passed to :func:`determine_project_for_conversation`. + project_name_map: Basename map for path resolution. + workspace_path_map: Normalized path map for path resolution. + workspace_entries: Workspace folder entries from storage scan. + bubble_map: Bubble KV map for path resolution. + composer_id_to_ws: Composer-to-workspace map (may point at invalid IDs). + invalid_workspace_ids: Workspace folder names to reassign. + + Returns: + ``{invalid_id: replacement_id}`` for IDs with at least one vote. Ties + break by choosing the replacement with the highest vote count (first + max in iteration order). Returns ``{}`` when no invalid ID receives votes. + """ votes: dict[str, dict[str, int]] = {} for row in composer_rows: cid = row["key"].split(":")[1] @@ -332,7 +423,7 @@ def _infer_invalid_workspace_aliases( type(cd).__name__, ) continue - inferred = _determine_project_for_conversation( + inferred = determine_project_for_conversation( cd, cid, project_layouts_map, diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 3c83e18..f11a94b 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -18,22 +18,22 @@ ) from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.text_extract import extract_text_from_bubble -from utils.tool_parser import parse_tool_call as _parse_tool_call +from utils.tool_parser import parse_tool_call from utils.workspace_descriptor import read_json_file from models import Bubble, Composer, ParseWarningCollector, SchemaError from services.workspace_db import ( - _build_composer_id_to_workspace_id, - _collect_invalid_workspace_ids, - _collect_workspace_entries, + build_composer_id_to_workspace_id, + collect_invalid_workspace_ids, + collect_workspace_entries, load_code_block_diff_map, - _open_global_db, + open_global_db, ) from services.workspace_resolver import ( - _create_project_name_to_workspace_id_map, - _create_workspace_path_to_id_map, - _determine_project_for_conversation, - _get_workspace_display_name, - _infer_invalid_workspace_aliases, + create_project_name_to_workspace_id_map, + create_workspace_path_to_id_map, + determine_project_for_conversation, + infer_invalid_workspace_aliases, + lookup_workspace_display_name, ) @@ -82,15 +82,28 @@ def assemble_workspace_tabs( workspace_path: str, rules: list, ) -> tuple[dict, int]: - """Build (payload, status) for GET /api/workspaces//tabs; status=404 if global storage is missing.""" + """Build tabs payload for GET /api/workspaces//tabs (IDE workspaces). + + Args: + workspace_id: Workspace folder name, or ``"global"`` for unassigned chats. + workspace_path: Cursor ``workspaceStorage`` root. + rules: Exclusion rule token lists from :func:`utils.exclusion_rules.load_rules`. + + Returns: + ``(payload, status)``. On success (``200``), *payload* contains ``tabs`` + (list of tab dicts with ``id``, ``title``, ``timestamp``, ``bubbles``, + optional ``metadata`` / ``codeBlockDiffs``) and optional ``warnings`` + when parse failures were skipped. On failure (``404``), *payload* is + ``{"error": "Global storage not found"}``. + """ parse_warnings = ParseWarningCollector() response: dict = {"tabs": []} - workspace_entries = _collect_workspace_entries(workspace_path) - invalid_workspace_ids = _collect_invalid_workspace_ids(workspace_entries) - project_name_map = _create_project_name_to_workspace_id_map(workspace_entries) - workspace_path_map = _create_workspace_path_to_id_map(workspace_entries) - composer_id_to_ws = _build_composer_id_to_workspace_id(workspace_path, workspace_entries) + workspace_entries = collect_workspace_entries(workspace_path) + invalid_workspace_ids = collect_invalid_workspace_ids(workspace_entries) + project_name_map = create_project_name_to_workspace_id_map(workspace_entries) + workspace_path_map = create_workspace_path_to_id_map(workspace_entries) + composer_id_to_ws = build_composer_id_to_workspace_id(workspace_path, workspace_entries) # Build set of all workspace IDs that share the same folder as workspace_id # (handles Cursor creating multiple workspace entries for the same project) @@ -121,11 +134,11 @@ def assemble_workspace_tabs( code_block_diff_map: dict[str, list] = {} message_request_context_map: dict[str, list] = {} - with _open_global_db(workspace_path) as (global_db, _): + with open_global_db(workspace_path) as (global_db, _): if global_db is None: return {"error": "Global storage not found"}, 404 - workspace_display_name = _get_workspace_display_name(workspace_path, workspace_id) + workspace_display_name = lookup_workspace_display_name(workspace_path, workspace_id) def _safe_fetchall(query: str, params: tuple = ()) -> list: try: @@ -215,7 +228,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: " AND value NOT LIKE '%fullConversationHeadersOnly\":[]%'" ) - invalid_workspace_aliases = _infer_invalid_workspace_aliases( + invalid_workspace_aliases = infer_invalid_workspace_aliases( composer_rows=composer_rows, project_layouts_map=project_layouts_map, project_name_map=project_name_map, @@ -259,7 +272,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: cd = composer.raw # Determine project - pid = _determine_project_for_conversation( + pid = determine_project_for_conversation( cd, composer_id, project_layouts_map, project_name_map, workspace_path_map, workspace_entries, bubble_map, composer_id_to_ws, invalid_workspace_ids, @@ -342,7 +355,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: tool_calls = None tfd = raw.get("toolFormerData") if isinstance(tfd, dict): - tool_call = _parse_tool_call(tfd) + tool_call = parse_tool_call(tfd) if isinstance(tool_call, dict): tool_calls = [tool_call] @@ -359,7 +372,12 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: # Context window ctx_window = raw.get("contextWindowStatusAtCreation") or {} - ctx_pct = ctx_window.get("percentageRemainingFloat") or ctx_window.get("percentageRemaining") + ctx_pct = None + if isinstance(ctx_window, dict): + if ctx_window.get("percentageRemainingFloat") is not None: + ctx_pct = ctx_window.get("percentageRemainingFloat") + elif ctx_window.get("percentageRemaining") is not None: + ctx_pct = ctx_window.get("percentageRemaining") # Display text fallbacks display_text = full_text.strip() diff --git a/tests/test_api_endpoints.py b/tests/test_api_endpoints.py index 4c2aee5..906a838 100644 --- a/tests/test_api_endpoints.py +++ b/tests/test_api_endpoints.py @@ -2,7 +2,7 @@ from app import create_app from tests._fixture_ids import HAPPY_BUBBLE_ID, HAPPY_COMPOSER_ID, HAPPY_WORKSPACE_ID -from utils.exclusion_rules import _tokenize_rule +from utils.exclusion_rules import tokenize_rule # --------------------------------------------------------------------------- @@ -155,7 +155,7 @@ def _client_with_rules(rule_lines): overrides the config with parsed rules — exercising the same code path a real `exclusion-rules.txt` file would. """ - parsed = [_tokenize_rule(line) for line in rule_lines] + parsed = [tokenize_rule(line) for line in rule_lines] app = create_app() app.config["TESTING"] = True app.config["EXCLUSION_RULES"] = [r for r in parsed if r] diff --git a/tests/test_cli_chat_reader.py b/tests/test_cli_chat_reader.py index ce07d42..de236d7 100644 --- a/tests/test_cli_chat_reader.py +++ b/tests/test_cli_chat_reader.py @@ -19,10 +19,10 @@ sys.path.insert(0, str(_root)) from utils.cli_chat_reader import ( - _content_to_text, + content_to_text, extract_blob_refs, - _extract_tool_calls, - _strip_user_info, + extract_tool_calls, + strip_user_info, aggregate_session_stats, extract_workspace_path, iter_sessions, @@ -106,53 +106,53 @@ def test_partial_tag_at_end_ignored(self): # --------------------------------------------------------------------------- -# _content_to_text +# content_to_text # --------------------------------------------------------------------------- class TestContentToText(unittest.TestCase): def test_string_passthrough(self): - self.assertEqual(_content_to_text("hello"), "hello") + self.assertEqual(content_to_text("hello"), "hello") def test_list_with_text_parts(self): content = [{"type": "text", "text": "foo"}, {"type": "text", "text": "bar"}] - result = _content_to_text(content) + result = content_to_text(content) self.assertIn("foo", result) self.assertIn("bar", result) def test_list_with_tool_result(self): content = [{"type": "tool-result", "result": "output here"}] - self.assertIn("output here", _content_to_text(content)) + self.assertIn("output here", content_to_text(content)) def test_empty_list(self): - self.assertEqual(_content_to_text([]), "") + self.assertEqual(content_to_text([]), "") def test_unknown_type_ignored(self): content = [{"type": "image", "url": "http://example.com/img.png"}] - self.assertEqual(_content_to_text(content), "") + self.assertEqual(content_to_text(content), "") def test_non_string_non_list(self): - self.assertEqual(_content_to_text(None), "") - self.assertEqual(_content_to_text(42), "") + self.assertEqual(content_to_text(None), "") + self.assertEqual(content_to_text(42), "") # --------------------------------------------------------------------------- -# _extract_tool_calls +# extract_tool_calls # --------------------------------------------------------------------------- class TestExtractToolCalls(unittest.TestCase): def test_non_list_returns_empty(self): - self.assertEqual(_extract_tool_calls("text"), []) - self.assertEqual(_extract_tool_calls(None), []) + self.assertEqual(extract_tool_calls("text"), []) + self.assertEqual(extract_tool_calls(None), []) def test_list_without_tool_call_type(self): content = [{"type": "text", "text": "hello"}] - self.assertEqual(_extract_tool_calls(content), []) + self.assertEqual(extract_tool_calls(content), []) def test_single_tool_call(self): content = [ {"type": "tool-call", "toolName": "Shell", "args": {"command": "ls"}, "toolCallId": "tc-1"} ] - calls = _extract_tool_calls(content) + calls = extract_tool_calls(content) self.assertEqual(len(calls), 1) self.assertEqual(calls[0]["name"], "Shell") self.assertEqual(calls[0]["args"], {"command": "ls"}) @@ -163,7 +163,7 @@ def test_mixed_content(self): {"type": "text", "text": "I will run a command."}, {"type": "tool-call", "toolName": "Grep", "args": {"pattern": "foo"}, "toolCallId": "tc-2"}, ] - calls = _extract_tool_calls(content) + calls = extract_tool_calls(content) self.assertEqual(len(calls), 1) self.assertEqual(calls[0]["name"], "Grep") @@ -203,22 +203,22 @@ def test_returns_first_match(self): # --------------------------------------------------------------------------- -# _strip_user_info +# strip_user_info # --------------------------------------------------------------------------- class TestStripUserInfo(unittest.TestCase): def test_extracts_user_query_tag(self): text = "some preamble\nmy actual question" - self.assertEqual(_strip_user_info(text), "my actual question") + self.assertEqual(strip_user_info(text), "my actual question") def test_strips_user_info_block_when_no_query_tag(self): text = "preamble stuff\nActual message here." - result = _strip_user_info(text) + result = strip_user_info(text) self.assertNotIn("", result) self.assertIn("Actual message here.", result) def test_passthrough_when_no_user_info(self): - self.assertEqual(_strip_user_info("plain text"), "plain text") + self.assertEqual(strip_user_info("plain text"), "plain text") # --------------------------------------------------------------------------- diff --git a/tests/test_cli_tabs.py b/tests/test_cli_tabs.py index 620c370..58723e2 100644 --- a/tests/test_cli_tabs.py +++ b/tests/test_cli_tabs.py @@ -11,7 +11,7 @@ if REPO_ROOT not in sys.path: sys.path.insert(0, REPO_ROOT) -from services.cli_tabs import _get_cli_workspace_tabs +from services.cli_tabs import get_cli_workspace_tabs def _make_app(): @@ -59,7 +59,7 @@ def fake_traverse_blobs(db_path): patch("services.cli_tabs.list_cli_projects", return_value=[project]), \ patch("services.cli_tabs.traverse_blobs", side_effect=fake_traverse_blobs), \ patch("services.cli_tabs.messages_to_bubbles", side_effect=fake_messages_to_bubbles): - response = _get_cli_workspace_tabs("cli:proj-1") + response = get_cli_workspace_tabs("cli:proj-1", []) self.assertEqual(response.status_code, 200) payload = response.get_json() @@ -86,7 +86,7 @@ def fake_messages_to_bubbles(messages, created_ms): patch("services.cli_tabs.list_cli_projects", return_value=[project]), \ patch("services.cli_tabs.traverse_blobs", side_effect=fake_traverse_blobs), \ patch("services.cli_tabs.messages_to_bubbles", side_effect=fake_messages_to_bubbles): - response = _get_cli_workspace_tabs("cli:proj-1") + response = get_cli_workspace_tabs("cli:proj-1", []) self.assertEqual(response.status_code, 200) payload = response.get_json() @@ -113,7 +113,7 @@ def fake_messages_to_bubbles(messages, created_ms): patch("services.cli_tabs.list_cli_projects", return_value=garbage_then_real), \ patch("services.cli_tabs.traverse_blobs", side_effect=fake_traverse_blobs), \ patch("services.cli_tabs.messages_to_bubbles", side_effect=fake_messages_to_bubbles): - response = _get_cli_workspace_tabs("cli:proj-1") + response = get_cli_workspace_tabs("cli:proj-1", []) self.assertEqual(response.status_code, 200) payload = response.get_json() @@ -131,7 +131,7 @@ def test_project_missing_workspace_name_uses_fallback(self) -> None: patch("services.cli_tabs.traverse_blobs", return_value=["ok"]), \ patch("services.cli_tabs.messages_to_bubbles", return_value=[{"type": "user", "text": "hi", "timestamp": 1}]): - response = _get_cli_workspace_tabs("cli:proj-min") + response = get_cli_workspace_tabs("cli:proj-min", []) self.assertEqual(response.status_code, 200) # Tab still rendered — ws_name fallback (project_id[:12]) used for searchable text. @@ -144,7 +144,7 @@ def test_project_missing_sessions_returns_200_empty_tabs(self) -> None: with app.test_request_context("/api/workspaces/cli:proj-empty/tabs"), \ patch("services.cli_tabs.list_cli_projects", return_value=[project]): - response = _get_cli_workspace_tabs("cli:proj-empty") + response = get_cli_workspace_tabs("cli:proj-empty", []) self.assertEqual(response.status_code, 200) self.assertEqual(response.get_json(), {"tabs": []}) diff --git a/tests/test_exclusion_rules.py b/tests/test_exclusion_rules.py index c11d6d1..3d82d4b 100644 --- a/tests/test_exclusion_rules.py +++ b/tests/test_exclusion_rules.py @@ -111,8 +111,8 @@ def test_implicit_and_adjacent_terms(self): def test_unclosed_quote_treated_as_word(self): """An unclosed double-quote falls back to a plain word/substring match.""" # Tokenizer produces ("word", "unclosed phrase") for `"unclosed phrase` - from utils.exclusion_rules import _tokenize_rule - tokens = _tokenize_rule('"unclosed phrase') + from utils.exclusion_rules import tokenize_rule + tokens = tokenize_rule('"unclosed phrase') self.assertEqual(len(tokens), 1) self.assertEqual(tokens[0][0], "word") rules = [tokens] @@ -121,14 +121,14 @@ def test_unclosed_quote_treated_as_word(self): def test_quoted_logical_operator_is_literal(self): """A quoted "AND" or "OR" is a literal term, not a boolean operator.""" - from utils.exclusion_rules import _tokenize_rule + from utils.exclusion_rules import tokenize_rule # "AND" (quoted) should produce a phrase token, not the "AND" string - tokens_and = _tokenize_rule('"AND"') + tokens_and = tokenize_rule('"AND"') self.assertEqual(len(tokens_and), 1) self.assertIsInstance(tokens_and[0], tuple) self.assertEqual(tokens_and[0][1], "AND") - tokens_or = _tokenize_rule('"OR"') + tokens_or = tokenize_rule('"OR"') self.assertEqual(len(tokens_or), 1) self.assertIsInstance(tokens_or[0], tuple) self.assertEqual(tokens_or[0][1], "OR") diff --git a/tests/test_invalid_workspace_aliases.py b/tests/test_invalid_workspace_aliases.py index 1daa30a..9f963a5 100644 --- a/tests/test_invalid_workspace_aliases.py +++ b/tests/test_invalid_workspace_aliases.py @@ -5,7 +5,7 @@ import json import unittest -from api.workspaces import _infer_invalid_workspace_aliases +from services.workspace_resolver import infer_invalid_workspace_aliases from utils.path_helpers import normalize_file_path @@ -36,7 +36,7 @@ def test_majority_vote_alias_selection(self): normalize_file_path(r"d:\_cpp_digest\team-brain"): "team-ws", } - aliases = _infer_invalid_workspace_aliases( + aliases = infer_invalid_workspace_aliases( composer_rows=composer_rows, project_layouts_map=project_layouts_map, project_name_map={}, @@ -73,7 +73,7 @@ def test_drifted_composer_does_not_skew_vote(self): normalize_file_path(r"d:\_cpp_digest\team-brain"): "team-ws", } - aliases = _infer_invalid_workspace_aliases( + aliases = infer_invalid_workspace_aliases( composer_rows=composer_rows, project_layouts_map=project_layouts_map, project_name_map={}, @@ -104,7 +104,7 @@ def test_non_dict_composer_json_skipped_without_crash(self) -> None: normalize_file_path(r"d:\_cpp_digest\team-brain"): "team-ws", } - aliases = _infer_invalid_workspace_aliases( + aliases = infer_invalid_workspace_aliases( composer_rows=composer_rows, project_layouts_map=project_layouts_map, project_name_map={}, diff --git a/tests/test_models_wired_at_read_sites.py b/tests/test_models_wired_at_read_sites.py index 0e80c89..bdda6e0 100644 --- a/tests/test_models_wired_at_read_sites.py +++ b/tests/test_models_wired_at_read_sites.py @@ -216,13 +216,14 @@ def test_composers_endpoint_calls_workspace_from_dict(self): ) def test_workspace_display_name_calls_workspace_from_dict(self): + from services.workspace_resolver import lookup_workspace_display_name import api.workspaces as workspaces_mod with patch.object(workspaces_mod.Workspace, "from_dict", wraps=workspaces_mod.Workspace.from_dict) as spy: - name = workspaces_mod._get_workspace_display_name(self.workspace_path, WORKSPACE_ID) + name = lookup_workspace_display_name(self.workspace_path, WORKSPACE_ID) self.assertIsInstance(name, str) self.assertGreaterEqual( spy.call_count, 1, - msg="Workspace.from_dict was never called from _get_workspace_display_name", + msg="Workspace.from_dict was never called from lookup_workspace_display_name", ) def test_list_composers_sort_reads_typed_last_updated_at_not_raw_dict(self): @@ -412,12 +413,12 @@ def test_load_manifest_entries_calls_export_entry_from_dict(self): export_mod.ExportEntry, "from_dict", wraps=export_mod.ExportEntry.from_dict, ) as spy: - entries = export_mod._load_manifest_entries(manifest_path) + entries = export_mod.load_manifest_entries(manifest_path) self.assertIn("log-wired", entries) self.assertGreaterEqual( spy.call_count, 1, msg="ExportEntry.from_dict was never called from " - "_load_manifest_entries — model is defined but not " + "load_manifest_entries — model is defined but not " "wired at the production read site", ) @@ -444,7 +445,7 @@ def test_load_manifest_entries_skips_pre_pr30_entries(self): }) + "\n") from scripts import export as export_mod - entries = export_mod._load_manifest_entries(manifest_path) + entries = export_mod.load_manifest_entries(manifest_path) self.assertNotIn("legacy", entries, msg="pre-PR-30 entries must be skipped") self.assertIn("modern", entries, msg="new-schema entries must still load") diff --git a/tests/test_project_path_boundary.py b/tests/test_project_path_boundary.py index baa9ad9..2868129 100644 --- a/tests/test_project_path_boundary.py +++ b/tests/test_project_path_boundary.py @@ -10,7 +10,7 @@ if REPO_ROOT not in sys.path: sys.path.insert(0, REPO_ROOT) -from services.workspace_resolver import _get_project_from_file_path +from services.workspace_resolver import get_project_from_file_path def _write_workspace_json(parent: str, name: str, folder: str) -> dict: @@ -37,7 +37,7 @@ def test_sibling_prefix_does_not_match(self): file_in_app2 = os.path.join(app2, "src", "main.py") self.assertEqual( - _get_project_from_file_path(file_in_app2, entries), + get_project_from_file_path(file_in_app2, entries), "ws-app2", ) @@ -48,7 +48,7 @@ def test_file_outside_any_workspace_returns_none(self): entries = [_write_workspace_json(tmp, "ws-app", app)] unrelated = os.path.join(tmp, "elsewhere", "file.py") - self.assertIsNone(_get_project_from_file_path(unrelated, entries)) + self.assertIsNone(get_project_from_file_path(unrelated, entries)) def test_file_inside_workspace_still_matches(self): with tempfile.TemporaryDirectory() as tmp: @@ -57,7 +57,7 @@ def test_file_inside_workspace_still_matches(self): entries = [_write_workspace_json(tmp, "ws-app", app)] inside = os.path.join(app, "src", "main.py") - self.assertEqual(_get_project_from_file_path(inside, entries), "ws-app") + self.assertEqual(get_project_from_file_path(inside, entries), "ws-app") if __name__ == "__main__": diff --git a/tests/test_workspace_assignment_fallback.py b/tests/test_workspace_assignment_fallback.py index 119bf80..32f8c1e 100644 --- a/tests/test_workspace_assignment_fallback.py +++ b/tests/test_workspace_assignment_fallback.py @@ -4,7 +4,7 @@ import unittest -from api.workspaces import _determine_project_for_conversation +from services.workspace_resolver import determine_project_for_conversation from utils.path_helpers import normalize_file_path @@ -24,7 +24,7 @@ def test_ignores_invalid_composer_to_workspace_mapping(self): composer_id_to_workspace_id = {"cmp-123": "broken-ws"} invalid_workspace_ids = {"broken-ws"} - assigned = _determine_project_for_conversation( + assigned = determine_project_for_conversation( composer_data=composer_data, composer_id=composer_id, project_layouts_map=project_layouts_map, diff --git a/tests/test_workspace_db_special_paths.py b/tests/test_workspace_db_special_paths.py index e948a72..7555518 100644 --- a/tests/test_workspace_db_special_paths.py +++ b/tests/test_workspace_db_special_paths.py @@ -13,8 +13,8 @@ sys.path.insert(0, REPO_ROOT) from services.workspace_db import ( - _build_composer_id_to_workspace_id, - _open_global_db, + build_composer_id_to_workspace_id, + open_global_db, ) @@ -57,13 +57,13 @@ def test_build_composer_id_to_workspace_id_handles_spaces(self): with tempfile.TemporaryDirectory() as tmp: ws_root = self._build_fixture(tmp) entries = [{"name": "ws-with spaces", "workspaceJsonPath": ""}] - mapping = _build_composer_id_to_workspace_id(ws_root, entries) + mapping = build_composer_id_to_workspace_id(ws_root, entries) self.assertEqual(mapping, {"cid-space": "ws-with spaces"}) def test_open_global_db_handles_spaces(self): with tempfile.TemporaryDirectory() as tmp: ws_root = self._build_fixture(tmp) - with _open_global_db(ws_root) as (conn, path): + with open_global_db(ws_root) as (conn, _): self.assertIsNotNone(conn) row = conn.execute( "SELECT key FROM cursorDiskKV WHERE key = 'composerData:probe'" @@ -95,7 +95,7 @@ def test_non_dict_entries_skipped_healthy_one_mapped(self): conn.close() entries = [{"name": "ws-mixed", "workspaceJsonPath": ""}] - mapping = _build_composer_id_to_workspace_id(tmp, entries) + mapping = build_composer_id_to_workspace_id(tmp, entries) self.assertEqual(mapping, {"cid-real": "ws-mixed"}) @@ -112,7 +112,7 @@ def test_sqlite_connect_error_yields_none_conn(self): "services.workspace_db.sqlite3.connect", side_effect=sqlite3.OperationalError("simulated open failure"), ): - with _open_global_db(ws_root) as (conn, path): + with open_global_db(ws_root) as (conn, path): self.assertIsNone(conn) self.assertTrue(path.endswith("state.vscdb")) @@ -137,7 +137,7 @@ def test_corrupt_state_vscdb_skipped_healthy_one_mapped(self): {"name": "ws-ok", "workspaceJsonPath": ""}, {"name": "ws-bad", "workspaceJsonPath": ""}, ] - mapping = _build_composer_id_to_workspace_id(tmp, entries) + mapping = build_composer_id_to_workspace_id(tmp, entries) self.assertEqual(mapping, {"cid-ok": "ws-ok"}) diff --git a/tests/test_workspace_name_db_errors.py b/tests/test_workspace_name_db_errors.py index 9b49f71..74e5323 100644 --- a/tests/test_workspace_name_db_errors.py +++ b/tests/test_workspace_name_db_errors.py @@ -13,7 +13,7 @@ if REPO_ROOT not in sys.path: sys.path.insert(0, REPO_ROOT) -from services.workspace_resolver import _infer_workspace_name_from_context +from services.workspace_resolver import infer_workspace_name_from_context def _seed_local_state(workspace_path: str, workspace_id: str) -> None: @@ -37,7 +37,7 @@ def _seed_local_state(workspace_path: str, workspace_id: str) -> None: class TestGlobalQueryErrorSwallowed(unittest.TestCase): def test_corrupt_cursordiskkv_does_not_propagate(self) -> None: with tempfile.TemporaryDirectory() as tmp: - # _open_global_db reads ``/../globalStorage`` — so + # open_global_db reads ``/../globalStorage`` — so # workspaceStorage must be a child of tmp, not tmp itself. ws_root = os.path.join(tmp, "workspaceStorage") os.makedirs(ws_root, exist_ok=True) @@ -48,14 +48,14 @@ def test_corrupt_cursordiskkv_does_not_propagate(self) -> None: gdb = os.path.join(global_dir, "state.vscdb") conn = sqlite3.connect(gdb) # Schema deliberately missing cursorDiskKV so the LIKE query - # inside _infer_workspace_name_from_context raises + # inside infer_workspace_name_from_context raises # sqlite3.OperationalError("no such table"). conn.execute("CREATE TABLE other (x INTEGER)") conn.commit() conn.close() try: - result = _infer_workspace_name_from_context(ws_root, "ws-corrupt") + result = infer_workspace_name_from_context(ws_root, "ws-corrupt") except sqlite3.Error: self.fail("query error should be caught, not propagated") self.assertIsNone(result) @@ -78,7 +78,7 @@ def test_corrupt_local_state_vscdb_returns_none_not_raises(self) -> None: sqlite3.connect(os.path.join(global_dir, "state.vscdb")).close() try: - result = _infer_workspace_name_from_context(ws_root, "ws-bad-local") + result = infer_workspace_name_from_context(ws_root, "ws-bad-local") except sqlite3.Error: self.fail("local query error should be caught, not propagated") self.assertIsNone(result) diff --git a/tests/test_workspace_name_inference.py b/tests/test_workspace_name_inference.py index eba1a48..4537a0d 100644 --- a/tests/test_workspace_name_inference.py +++ b/tests/test_workspace_name_inference.py @@ -8,7 +8,7 @@ import tempfile import unittest -from api.workspaces import _infer_workspace_name_from_context +from services.workspace_resolver import infer_workspace_name_from_context class TestWorkspaceNameInference(unittest.TestCase): @@ -77,7 +77,7 @@ def test_infers_name_from_project_layouts(self): gconn.close() self.assertEqual( - _infer_workspace_name_from_context(workspace_path, ws_id), + infer_workspace_name_from_context(workspace_path, ws_id), "boostbacklog", ) diff --git a/tests/test_workspace_tabs_malformed_nested.py b/tests/test_workspace_tabs_malformed_nested.py index f811949..914f353 100644 --- a/tests/test_workspace_tabs_malformed_nested.py +++ b/tests/test_workspace_tabs_malformed_nested.py @@ -201,10 +201,10 @@ def test_non_dict_parse_result_does_not_drop_composer(self) -> None: with tempfile.TemporaryDirectory() as tmp: ws_root = _seed_workspace_with_tool_former(tmp) - # Force _parse_tool_call to return None — the previous code + # Force parse_tool_call to return None — the previous code # would have stored ``tool_calls = [None]`` and crashed in the # display-text fallback with ``NoneType.get``. - with patch("services.workspace_tabs._parse_tool_call", return_value=None): + with patch("services.workspace_tabs.parse_tool_call", return_value=None): with app.test_request_context("/api/workspaces/global/tabs"): payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) diff --git a/utils/cli_chat_reader.py b/utils/cli_chat_reader.py index 5c744c4..b87a5d1 100644 --- a/utils/cli_chat_reader.py +++ b/utils/cli_chat_reader.py @@ -175,7 +175,7 @@ def traverse_blobs(db_path: str) -> list[dict]: _WORKSPACE_PATH_RE = re.compile(r"Workspace Path:\s*(.+?)(?:\n|$)") -def _content_to_text(content) -> str: +def content_to_text(content) -> str: """Flatten Vercel AI SDK content (string or typed-part array) to plain text.""" if isinstance(content, str): return content @@ -192,7 +192,7 @@ def _content_to_text(content) -> str: return "" -def _extract_tool_calls(content) -> list[dict]: +def extract_tool_calls(content) -> list[dict]: """Extract tool-call parts from assistant message content.""" if not isinstance(content, list): return [] @@ -214,14 +214,14 @@ def extract_workspace_path(messages: list[dict]) -> str | None: if msg.get("role") != "user": continue content = msg.get("content", "") - text = content if isinstance(content, str) else _content_to_text(content) + text = content if isinstance(content, str) else content_to_text(content) m = _WORKSPACE_PATH_RE.search(text) if m: return m.group(1).strip() return None -def _strip_user_info(text: str) -> str: +def strip_user_info(text: str) -> str: """Remove the ```` preamble and return only the query text. If a ```` tag is present, its content is returned directly. @@ -287,18 +287,18 @@ def messages_to_bubbles(messages: list[dict], created_at_ms: int) -> list[dict]: seq += 1 if role == "user": - text = _content_to_text(content) if isinstance(content, list) else (content or "") + text = content_to_text(content) if isinstance(content, list) else (content or "") # Skip pure preamble messages (contain but no ). if "" in text and "" not in text: continue - text = _strip_user_info(text) + text = strip_user_info(text) if not text: continue bubbles.append({"type": "user", "text": text, "timestamp": ts}) elif role == "assistant": - text = _content_to_text(content) if isinstance(content, list) else (content or "") - tool_calls = _extract_tool_calls(content) + text = content_to_text(content) if isinstance(content, list) else (content or "") + tool_calls = extract_tool_calls(content) if not text.strip() and not tool_calls: continue diff --git a/utils/exclusion_rules.py b/utils/exclusion_rules.py index 18bc54d..c0ff9b2 100644 --- a/utils/exclusion_rules.py +++ b/utils/exclusion_rules.py @@ -65,7 +65,7 @@ def resolve_exclusion_rules_path(cli_path: str | None) -> str | None: return None -def _tokenize_rule(line: str) -> list: +def tokenize_rule(line: str) -> list: """ Tokenize a rule line into terms and operators. @@ -172,7 +172,7 @@ def load_rules(path: str | None) -> list[list]: Load and parse the exclusion rule file at *path*. Returns a list of tokenized rules (each rule is a list of tokens as - produced by :func:`_tokenize_rule`). Returns an empty list when *path* + produced by :func:`tokenize_rule`). Returns an empty list when *path* is ``None``, the file doesn't exist, or the file cannot be read. """ if not path or not os.path.isfile(path): @@ -184,7 +184,7 @@ def load_rules(path: str | None) -> list[list]: line = line.strip() if not line or line.startswith("#"): continue - tokens = _tokenize_rule(line) + tokens = tokenize_rule(line) if tokens: rules.append(tokens) except (OSError, UnicodeDecodeError) as e: