From b32fdee0870bfd55ea4839df16582e17931d0037 Mon Sep 17 00:00:00 2001 From: Brent Rager Date: Wed, 20 May 2026 15:55:14 -0400 Subject: [PATCH 1/3] Fix #189: don't fall back to fork() on macOS; redirect worker stderr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes for the macOS Tahoe segfault reported in issue #189: 1. process_worker_client.py: drop the spawn -> fork fallback when running on darwin. macOS Tahoe (26+) widened the set of system libraries that abort if initialized after fork() — CoreFoundation, Security.framework, libsystem_trace, libsystem_info — so any library the parent has loaded (psycopg, getaddrinfo, setproctitle, NSCharacterSet) can crash the forked child. Letting spawn fail and surfacing it to the caller lets the in-process fallback in query_execution take over instead. Cost: query cancellation isn't available when spawn fails. Gain: no hard crashes ("Worker connection closed.") on macOS. 2. process_worker.py: redirect the worker's stdout/stderr to a log file and enable faulthandler at entry. The worker can SIGPIPE on libpq notice writes when its inherited FDs point to a Textual-managed pipe, and any future C-level fault on the worker side is otherwise invisible to the parent (the pipe just dies). The log path honors $SQLIT_WORKER_LOG and defaults to /sqlit-worker.log. Refs the celery (Tahoe + setproctitle, celery#9894) and gunicorn (Tahoe + NSCharacterSet, gunicorn#3526) reports for the broader fork- safety regression on Tahoe. --- .../process_worker/app/process_worker.py | 33 +++++++++++++++++++ .../app/process_worker_client.py | 8 +++++ 2 files changed, 41 insertions(+) diff --git a/sqlit/domains/process_worker/app/process_worker.py b/sqlit/domains/process_worker/app/process_worker.py index e1d55bfc..31008e7c 100644 --- a/sqlit/domains/process_worker/app/process_worker.py +++ b/sqlit/domains/process_worker/app/process_worker.py @@ -2,14 +2,39 @@ from __future__ import annotations +import faulthandler +import os import pickle +import sys +import tempfile import threading import time from collections import deque from dataclasses import dataclass, field from multiprocessing.connection import Connection +from pathlib import Path from typing import Any + +def _open_worker_log() -> Any | None: + """Open the worker log file, honoring SQLIT_WORKER_LOG when set. + + The parent process may attach this worker to a Textual-managed pipe; + libpq notice writes or unexpected stderr output would then SIGPIPE the + child. Redirecting both streams to a real file avoids that and gives a + place to land C-level fault tracebacks for future diagnosis. + """ + override = os.environ.get("SQLIT_WORKER_LOG") + if override: + path = Path(override).expanduser() + else: + path = Path(tempfile.gettempdir()) / "sqlit-worker.log" + try: + path.parent.mkdir(parents=True, exist_ok=True) + return path.open("a", buffering=1) + except OSError: + return None + from sqlit.domains.connections.domain.config import ConnectionConfig from sqlit.domains.connections.providers.catalog import get_provider from sqlit.domains.connections.providers.config_service import normalize_connection_config @@ -493,6 +518,14 @@ def _get_provider(self, db_type: str) -> Any | None: def run_process_worker(conn: Connection) -> None: """Process entrypoint for query execution.""" + log_file = _open_worker_log() + if log_file is not None: + sys.stdout = log_file + sys.stderr = log_file + try: + faulthandler.enable(file=log_file) + except (RuntimeError, ValueError): + pass state = _WorkerState(conn=conn) try: while True: diff --git a/sqlit/domains/process_worker/app/process_worker_client.py b/sqlit/domains/process_worker/app/process_worker_client.py index b1de75de..3b45e710 100644 --- a/sqlit/domains/process_worker/app/process_worker_client.py +++ b/sqlit/domains/process_worker/app/process_worker_client.py @@ -63,6 +63,14 @@ def _maybe_fallback_start(self, error: Exception) -> None: raise error if os.name != "posix" or sys.platform.startswith("win"): raise error + # macOS (especially Tahoe 26+) aborts in the child whenever a forked + # process touches CoreFoundation / Security.framework / os_log — + # which happens inside psycopg, getaddrinfo, and many other stdlib + # paths. Forking here causes hard segfaults (issue #189), so we + # surface the spawn failure and let the caller fall back to + # in-process execution. + if sys.platform == "darwin": + raise error try: self._start_with_context(get_context("fork")) except Exception: From 5e21d2255727f3c190c251ef796dd5f7a5e072a2 Mon Sep 17 00:00:00 2001 From: Brent Rager Date: Wed, 20 May 2026 16:16:03 -0400 Subject: [PATCH 2/3] Pre-spawn process worker in cli.py to avoid the fds_to_keep race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first two commits made the macOS Tahoe segfault non-fatal by disabling the fork fallback, but every spawn after Textual booted still failed with `bad value(s) in fds_to_keep` — leaving the user with a "Process worker unavailable; falling back" popup and the slower in-process executor on every query. Root cause: `multiprocessing.spawn` snapshots the parent's open file descriptors at spawn time. Once Textual's `App.__init__` has registered its signal-handler pipes and event-loop FDs, several of those are not inheritable, and spawn aborts before the child even runs. The previous lazy creation in the idle scheduler always hit this race. Fix: spawn the worker in `cli.py` *before* `SSMSTUI(...)` is constructed. That's the latest moment at which the parent's FD set is still clean. The new `_prewarm_process_worker(runtime)` helper returns a live `ProcessWorkerClient`, or `None` if the worker is disabled / mocked / spawn raised — in which case we fall through to the lazy path inside the UI (with the in-process fallback on darwin). The pre-spawned client is passed into `SSMSTUI` via a new `process_worker_client` kwarg and stashed as `self._process_worker_client`. `ProcessWorkerLifecycleMixin` already returns the cached client first, so no other changes are needed. Verified on macOS Tahoe 26.4.1 / Python 3.14 / PostgreSQL over TLS: the popup no longer appears and queries run in the worker process. --- sqlit/cli.py | 37 ++++++++++++++++++++++++++++++++- sqlit/domains/shell/app/main.py | 7 +++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/sqlit/cli.py b/sqlit/cli.py index 977205a4..f5b838f4 100644 --- a/sqlit/cli.py +++ b/sqlit/cli.py @@ -177,6 +177,32 @@ def _sane_tty() -> None: pass +def _prewarm_process_worker(runtime: RuntimeConfig) -> Any | None: + """Spawn the process worker before the Textual App is constructed. + + `multiprocessing.spawn` collects the parent's open file descriptors at + spawn time. Textual's `App.__init__` registers signal-handler pipes + and other non-inheritable FDs, which cause spawn to abort with + `ValueError: bad value(s) in fds_to_keep` (see issue #189). Spawning + here — before the App is constructed — is the latest point at which + the FD set is still clean. + + Returns the live client, or None if spawn fails or the worker is + disabled. On failure we fall through to the lazy path inside the UI; + on macOS that path raises and the in-process executor takes over. + """ + if not runtime.process_worker: + return None + if runtime.mock.enabled: + return None + try: + from sqlit.domains.process_worker.app.process_worker_client import ProcessWorkerClient + + return ProcessWorkerClient() + except Exception: + return None + + def _run_app(app: Any) -> int: exit_code: int | None = None handled_signals = [signal.SIGINT, signal.SIGTERM] @@ -843,10 +869,14 @@ def main() -> int: print(f"Error: {exc}") return 1 + # Spawn the worker before the Textual App is constructed; see + # _prewarm_process_worker for why this matters on macOS. + process_worker_client = _prewarm_process_worker(runtime) app = SSMSTUI( services=services, startup_connection=startup_config, exclusive_connection=exclusive_connection, + process_worker_client=process_worker_client, ) exit_code = _run_app(app) if exit_code != 0: @@ -907,7 +937,12 @@ def main() -> int: print(f"Error: {alert_error}") return 1 - app = SSMSTUI(services=services, startup_connection=temp_config) + process_worker_client = _prewarm_process_worker(runtime) + app = SSMSTUI( + services=services, + startup_connection=temp_config, + process_worker_client=process_worker_client, + ) return _run_app(app) if args.command in {"connections", "connection"}: diff --git a/sqlit/domains/shell/app/main.py b/sqlit/domains/shell/app/main.py index aaffd636..e98c4c2c 100644 --- a/sqlit/domains/shell/app/main.py +++ b/sqlit/domains/shell/app/main.py @@ -100,9 +100,16 @@ def __init__( runtime: RuntimeConfig | None = None, startup_connection: ConnectionConfig | None = None, exclusive_connection: bool = False, + process_worker_client: Any | None = None, ): super().__init__() self.services = services or build_app_services(runtime or RuntimeConfig.from_env()) + # A pre-spawned worker handed in from cli.py, before Textual's + # FD set was contaminated. ProcessWorkerLifecycleMixin already + # checks self._process_worker_client first, so seeding it here + # short-circuits the lazy spawn path. + if process_worker_client is not None: + self._process_worker_client = process_worker_client from sqlit.core.connection_manager import ConnectionManager self._connection_manager = ConnectionManager(self.services) From 71c36356d0bf158eb1bb163d612414c5e35cd822 Mon Sep 17 00:00:00 2001 From: Brent Rager Date: Wed, 20 May 2026 16:32:27 -0400 Subject: [PATCH 3/3] Add unit tests for the macOS fallback guard and the cli pre-spawn helper Covers the load-bearing behaviors introduced by the prior commits: - _maybe_fallback_start re-raises on darwin (commit 1) - _maybe_fallback_start still re-raises non-fds_to_keep errors verbatim - _open_worker_log honors $SQLIT_WORKER_LOG and survives an unwritable path - _prewarm_process_worker respects runtime.process_worker, runtime.mock, and swallows spawn exceptions so startup never crashes (commit 3) - SSMSTUI(process_worker_client=client) stashes the client on self._process_worker_client, which is what ProcessWorkerLifecycleMixin consults first --- tests/unit/test_cli_prewarm.py | 64 ++++++++++++++++++++ tests/unit/test_process_worker_fallback.py | 68 ++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 tests/unit/test_cli_prewarm.py create mode 100644 tests/unit/test_process_worker_fallback.py diff --git a/tests/unit/test_cli_prewarm.py b/tests/unit/test_cli_prewarm.py new file mode 100644 index 00000000..19040629 --- /dev/null +++ b/tests/unit/test_cli_prewarm.py @@ -0,0 +1,64 @@ +"""Tests for the pre-spawn helper in cli.py and the SSMSTUI kwarg plumbing.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from sqlit.cli import _prewarm_process_worker +from sqlit.shared.app.runtime import MockConfig, RuntimeConfig + + +def test_prewarm_skipped_when_worker_disabled() -> None: + runtime = RuntimeConfig(process_worker=False) + with patch("sqlit.domains.process_worker.app.process_worker_client.ProcessWorkerClient") as klass: + result = _prewarm_process_worker(runtime) + assert result is None + klass.assert_not_called() + + +def test_prewarm_skipped_when_mock_enabled() -> None: + runtime = RuntimeConfig(process_worker=True, mock=MockConfig(enabled=True)) + with patch("sqlit.domains.process_worker.app.process_worker_client.ProcessWorkerClient") as klass: + result = _prewarm_process_worker(runtime) + assert result is None + klass.assert_not_called() + + +def test_prewarm_returns_client_when_enabled() -> None: + runtime = RuntimeConfig(process_worker=True) + sentinel = MagicMock(name="process-worker-client") + with patch( + "sqlit.domains.process_worker.app.process_worker_client.ProcessWorkerClient", + return_value=sentinel, + ): + result = _prewarm_process_worker(runtime) + assert result is sentinel + + +def test_prewarm_swallows_spawn_errors() -> None: + """A failing pre-spawn must not break startup; lazy path handles fallback.""" + runtime = RuntimeConfig(process_worker=True) + with patch( + "sqlit.domains.process_worker.app.process_worker_client.ProcessWorkerClient", + side_effect=ValueError("bad value(s) in fds_to_keep"), + ): + result = _prewarm_process_worker(runtime) + assert result is None + + +def test_ssmstui_stashes_prewarmed_worker() -> None: + """A client passed via the kwarg should land on self._process_worker_client. + + ProcessWorkerLifecycleMixin returns that attribute first, so this is + what makes the pre-spawn actually short-circuit the lazy path. + """ + from sqlit.domains.shell.app.main import SSMSTUI + from tests.ui.mocks import MockConnectionStore, MockSettingsStore, build_test_services, create_test_connection + + services = build_test_services( + connection_store=MockConnectionStore([create_test_connection("test-db", "sqlite")]), + settings_store=MockSettingsStore({}), + ) + sentinel = MagicMock(name="process-worker-client") + app = SSMSTUI(services=services, process_worker_client=sentinel) + assert app._process_worker_client is sentinel diff --git a/tests/unit/test_process_worker_fallback.py b/tests/unit/test_process_worker_fallback.py new file mode 100644 index 00000000..b5f38c76 --- /dev/null +++ b/tests/unit/test_process_worker_fallback.py @@ -0,0 +1,68 @@ +"""Tests for the macOS fork-fallback guard and the worker log redirect.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from sqlit.domains.process_worker.app.process_worker import _open_worker_log +from sqlit.domains.process_worker.app.process_worker_client import ProcessWorkerClient + + +def _make_client_without_init() -> ProcessWorkerClient: + """Build a bare ProcessWorkerClient instance without running __init__. + + __init__ spawns a real subprocess; we only want to exercise the + in-process branching logic in _maybe_fallback_start. + """ + return ProcessWorkerClient.__new__(ProcessWorkerClient) + + +def test_fork_fallback_disabled_on_darwin(monkeypatch: pytest.MonkeyPatch) -> None: + """On macOS the fallback path must re-raise instead of forking.""" + monkeypatch.setattr("sys.platform", "darwin") + client = _make_client_without_init() + + error = ValueError("bad value(s) in fds_to_keep") + with pytest.raises(ValueError, match="fds_to_keep"): + client._maybe_fallback_start(error) + + +def test_fork_fallback_skips_non_fds_errors(monkeypatch: pytest.MonkeyPatch) -> None: + """Errors unrelated to fds_to_keep should always re-raise verbatim.""" + monkeypatch.setattr("sys.platform", "linux") + client = _make_client_without_init() + + error = ValueError("totally unrelated message") + with pytest.raises(ValueError, match="totally unrelated"): + client._maybe_fallback_start(error) + + +def test_worker_log_honors_env_override( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """SQLIT_WORKER_LOG should redirect the worker log to a custom path.""" + target = tmp_path / "subdir" / "worker.log" + monkeypatch.setenv("SQLIT_WORKER_LOG", str(target)) + + log_file = _open_worker_log() + assert log_file is not None + try: + log_file.write("hello\n") + finally: + log_file.close() + + assert target.exists() + assert "hello" in target.read_text() + + +def test_worker_log_returns_none_on_unwritable_path( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """If the log path can't be opened, the worker should still boot.""" + # A path whose parent can't be created (root-owned, non-existent). + monkeypatch.setenv("SQLIT_WORKER_LOG", "/proc/definitely/not/writable/here.log") + + log_file = _open_worker_log() + assert log_file is None