fix(stability): plug local cache watcher leak + stop local_bridge cascade aborts (v0.7.39)
All checks were successful
boundary-lint / PR boundary-claim (Lint (push) Has been skipped
boundary-lint / ban-list lint (Lint (push) Successful in 18s
ci / mutation test (broker) (push) Has been skipped
ci / test-health gate (push) Successful in 17s
boundary-lint / duplication-deadline (Layer 1/2) (push) Successful in 19s
Release Publish (Gitea session_helper) / verify-release-tag (push) Successful in 17s
ci / rust release (push) Successful in 2m39s
ci / rust debug (push) Successful in 3m11s
Release Publish (Gitea session_helper) / publish-linux-x86_64 (push) Successful in 4m0s
ci / python (push) Successful in 1m32s

Two independent stability fixes prompted by a macOS Sublime Text crash
investigation. Neither is proven to be the root cause of the user-
reported intermittent malloc abort ("pointer being freed was not
allocated") — that signature predates the v0.7.32 watcher and a
parallel FFI ownership audit found the Rust side clean. But both are
genuine bugs the audit surfaced and both reduce future debugging noise.

1. Local cache watcher leak (sublime/sessions/commands.py)
----------------------------------------------------------

``_stop_local_cache_watcher`` had been defined since the v0.7.32
``feat(sync): PR-C — cross-platform local cache filesystem watcher``
landed but **never called from anywhere**. Because
``_start_local_cache_watcher`` early-returns when a handle already
exists for the cache_key, every plugin reload instantiated a fresh
handle on the Python side while the previous ``WatchEntry``
(containing the live ``RecommendedWatcher``) sat in the Rust
``OnceLock<WatcherRegistry>`` forever — the macOS FSEvents thread,
the Linux inotify thread, or the Windows ReadDirectoryChangesW
thread kept running until the Sublime process itself exited.

Fix: add ``_stop_all_local_cache_watchers()`` and call it from
``sessions_plugin_shutdown`` (which already runs from
``plugin.py::plugin_unloaded``). Each shutdown drains the Python
handle dict, asks Rust to drop each ``WatchEntry``, and clears the
dict. Rust ``stop(handle)`` is idempotent — calling it twice on the
same handle just returns ``false`` the second time.

Three regression tests in ``test_bridge_lifecycle``:
  * shutdown stops every queued handle and clears the dict
  * Rust-side ABI exception still clears Python state (so the next
    plugin load starts from a coherent registry)
  * second shutdown call is a no-op (no duplicate ``stop(handle)``)

2. ``local_bridge`` eprintln cascade abort
------------------------------------------

When the parent (Sublime + Python ctypes) dies first, the bridge
subprocess inherits a broken stderr pipe. Three ``eprintln!`` sites
in ``main`` would then panic on EPIPE — and because the workspace
sets ``panic = "abort"``, the process SIGABRT'd, generating a
secondary ``DiagnosticReport`` (``local_bridge-*.ips``) that masked
the upstream Sublime crash report and made post-mortems harder to
read end-to-end.

Fix: replace the three ``eprintln!`` with ``let _ = writeln!(
io::stderr(), ...)`` so EPIPE silently fails through to the
``exit(1)`` that always followed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-03 22:31:58 +09:00
parent 5c8a29efa5
commit b44f708892
7 changed files with 92 additions and 12 deletions

View File

@@ -1,6 +1,6 @@
[project]
name = "sessions-sublime"
version = "0.7.38"
version = "0.7.39"
description = "Sublime-facing Python code for Sessions."
requires-python = ">=3.8"
license = {text = "MIT"}

12
rust/Cargo.lock generated
View File

@@ -221,7 +221,7 @@ checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
[[package]]
name = "local_bridge"
version = "0.7.38"
version = "0.7.39"
dependencies = [
"base64",
"glob",
@@ -432,7 +432,7 @@ dependencies = [
[[package]]
name = "session_helper"
version = "0.7.38"
version = "0.7.39"
dependencies = [
"base64",
"notify",
@@ -443,7 +443,7 @@ dependencies = [
[[package]]
name = "session_protocol"
version = "0.7.38"
version = "0.7.39"
dependencies = [
"base64",
"serde",
@@ -452,14 +452,14 @@ dependencies = [
[[package]]
name = "sessions_askpass"
version = "0.7.38"
version = "0.7.39"
dependencies = [
"tempfile",
]
[[package]]
name = "sessions_native"
version = "0.7.38"
version = "0.7.39"
dependencies = [
"base64",
"notify",
@@ -773,7 +773,7 @@ dependencies = [
[[package]]
name = "workspace_identity"
version = "0.7.38"
version = "0.7.39"
[[package]]
name = "zmij"

View File

@@ -12,7 +12,7 @@ resolver = "2"
[workspace.package]
edition = "2024"
license = "MIT"
version = "0.7.38"
version = "0.7.39"
authors = ["Myeongseon Choi <key262yek@gmail.com>"]
repository = "https://git.teahaven.kr/sublime-rs/sessions"
homepage = "https://git.teahaven.kr/sublime-rs/sessions"

View File

@@ -54,7 +54,13 @@ fn main() {
}
if args.first().map(String::as_str) == Some("lsp-stdio") {
if let Err(error) = run_lsp_stdio(&args[1..]) {
eprintln!("{error}");
// ``eprintln!`` panics on EPIPE (and ``panic = "abort"`` would then
// SIGABRT the process). When the parent (Sublime + Python ctypes)
// dies first the bridge inherits a broken stderr pipe, and a
// secondary abort here only adds a phantom crash report that
// hides the real upstream failure. Use ``writeln!`` + ``let _``
// so EPIPE silently fails through to ``exit(1)``.
let _ = writeln!(std::io::stderr(), "{error}");
std::process::exit(1);
}
return;
@@ -65,12 +71,15 @@ fn main() {
println!("{encoded}");
}
Err(error) => {
eprintln!("local_bridge output serialization failed: {error}");
let _ = writeln!(
std::io::stderr(),
"local_bridge output serialization failed: {error}"
);
std::process::exit(1);
}
},
Err(error) => {
eprintln!("{error}");
let _ = writeln!(std::io::stderr(), "{error}");
std::process::exit(1);
}
}

View File

@@ -3617,6 +3617,26 @@ def _stop_local_cache_watcher(cache_key: str) -> None:
_trace_event("local_watcher.stopped", cache_key=cache_key, handle=handle)
def _stop_all_local_cache_watchers() -> None:
"""Stop every active local cache watcher (plugin shutdown / reload).
Without this, the Rust ``WatchEntry`` registry retains the
``RecommendedWatcher`` handle across plugin re-imports — Python loses
the handle but the OS-level FSEvents / inotify / ReadDirectoryChangesW
thread keeps running until the process exits. Bound to plugin
shutdown so each reload starts from a clean registry.
"""
with _LOCAL_WATCHER_LOCK:
handles = list(_LOCAL_WATCHER_HANDLES.items())
_LOCAL_WATCHER_HANDLES.clear()
for cache_key, handle in handles:
try:
_rust_ffi.local_watcher.stop(handle)
except Exception: # noqa: BLE001
pass
_trace_event("local_watcher.stopped", cache_key=cache_key, handle=handle)
def _schedule_eager_hydrate_if_needed(
window: object,
context: "_WorkspaceContext",
@@ -6346,6 +6366,7 @@ def sessions_plugin_shutdown() -> None:
clear_bridge_handshake_listeners()
shutdown_all_persistent_bridges()
_marimo_session_manager().stop_all()
_stop_all_local_cache_watchers()
def _open_connected_host_window(

View File

@@ -96,6 +96,56 @@ def test_sessions_plugin_shutdown_clears_refs_and_bridges(monkeypatch) -> None:
assert shutdown_calls == 1
def test_sessions_plugin_shutdown_stops_local_cache_watchers(monkeypatch) -> None:
"""Plugin shutdown must drop every active local cache watcher.
Regression: ``_stop_local_cache_watcher`` had zero call sites for
several releases — handles leaked across plugin reload until the
Sublime process exited. Now wired into ``sessions_plugin_shutdown``.
"""
monkeypatch.setattr(commands, "shutdown_all_persistent_bridges", lambda: None)
stopped: list[int] = []
monkeypatch.setattr(
commands._rust_ffi.local_watcher, "stop", lambda h: stopped.append(h) or True
)
with commands._LOCAL_WATCHER_LOCK:
commands._LOCAL_WATCHER_HANDLES.clear()
commands._LOCAL_WATCHER_HANDLES["cache-A"] = 11
commands._LOCAL_WATCHER_HANDLES["cache-B"] = 22
commands.sessions_plugin_shutdown()
assert sorted(stopped) == [11, 22]
assert commands._LOCAL_WATCHER_HANDLES == {}
def test_stop_all_local_cache_watchers_swallows_rust_errors(monkeypatch) -> None:
"""If the Rust ABI raises (symbol missing on a fresh dylib), shutdown
must still clear Python state so the next plugin load starts clean."""
def boom(_handle: int) -> bool:
raise RuntimeError("simulated abi failure")
monkeypatch.setattr(commands._rust_ffi.local_watcher, "stop", boom)
with commands._LOCAL_WATCHER_LOCK:
commands._LOCAL_WATCHER_HANDLES.clear()
commands._LOCAL_WATCHER_HANDLES["cache-X"] = 99
commands._stop_all_local_cache_watchers()
assert commands._LOCAL_WATCHER_HANDLES == {}
def test_stop_all_local_cache_watchers_idempotent(monkeypatch) -> None:
"""Calling twice (e.g. plugin double-unload) must not re-stop handles."""
stopped: list[int] = []
monkeypatch.setattr(
commands._rust_ffi.local_watcher, "stop", lambda h: stopped.append(h) or True
)
with commands._LOCAL_WATCHER_LOCK:
commands._LOCAL_WATCHER_HANDLES.clear()
commands._LOCAL_WATCHER_HANDLES["cache-Y"] = 77
commands._stop_all_local_cache_watchers()
commands._stop_all_local_cache_watchers()
assert stopped == [77]
def test_bridge_window_add_ref_skips_empty_host_alias() -> None:
commands._BRIDGE_HOST_WINDOW_IDS.clear()
commands._bridge_window_add_ref(FakeWindow(window_id=201), "")

2
uv.lock generated
View File

@@ -854,7 +854,7 @@ wheels = [
[[package]]
name = "sessions-sublime"
version = "0.7.38"
version = "0.7.39"
source = { virtual = "." }
[package.dev-dependencies]