From 0da9a4d9fa63bb1fee016292cd4e4eb5a75f7a0c Mon Sep 17 00:00:00 2001 From: chen Date: Tue, 23 Jun 2026 09:34:29 +0800 Subject: [PATCH 1/5] Address export consolidation review findings --- api/export_api.py | 283 +++++++++++++++++++++++----------------------- 1 file changed, 142 insertions(+), 141 deletions(-) diff --git a/api/export_api.py b/api/export_api.py index a2bed91..f5cad64 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -1,141 +1,142 @@ -""" -API route for export — produces per-chat Markdown in a zip download. -POST /api/export { since: "all"|"last", zip: true } -GET /api/export/state — returns last export time -""" - -from __future__ import annotations - -import io -import json -import logging -import os -import zipfile -from datetime import datetime -from pathlib import Path -from typing import Any, Literal - -from flask import Blueprint, Response, request - -from api.flask_config import exclusion_rules, json_response -from services.export_engine import collect_export_entries, read_last_export_ms -from services.workspace_db import global_storage_db_path -from utils.workspace_path import resolve_workspace_path - -bp = Blueprint("export_api", __name__) -_logger = logging.getLogger(__name__) - - -def _get_state_dir() -> str: - return os.path.join(str(Path.home()), ".cursor-chat-browser") - - -def _get_export_state() -> dict[str, Any]: - """Read the export state file.""" - state_path = os.path.join(_get_state_dir(), "export_state.json") - if os.path.isfile(state_path): - try: - with open(state_path, "r", encoding="utf-8") as f: - parsed = json.load(f) - if isinstance(parsed, dict): - return parsed - _logger.warning( - "Export state in %s is not a JSON object (got %s); ignoring", - state_path, - type(parsed).__name__, - ) - except (json.JSONDecodeError, ValueError, OSError) as e: - _logger.warning( - "Could not read export state from %s: %s", - state_path, - e, - ) - return {} - - -def _save_export_state(count: int) -> None: - """Save export state after an export.""" - state_dir = _get_state_dir() - os.makedirs(state_dir, exist_ok=True) - state = { - "lastExportTime": datetime.now().isoformat(), - "exportedCount": count, - } - state_path = os.path.join(state_dir, "export_state.json") - with open(state_path, "w", encoding="utf-8") as f: - json.dump(state, f, indent=2) - - -@bp.route("/api/export/state") -def get_export_state() -> Response: - """Return the last export timestamp.""" - state = _get_export_state() - return json_response(state) - - -@bp.route("/api/export", methods=["POST"]) -def export_chats() -> tuple[Response, int] | Response: - """Export chats as a zip archive. - - Exclusion rules (``EXCLUSION_RULES`` app config key) are evaluated against - each chat's project name, title, and model. Rules are loaded once at - application startup; an app restart is required to pick up changes to the - exclusion rules file. - """ - try: - body = request.get_json(silent=True) - if not isinstance(body, dict): - return json_response({"error": "request body must be a JSON object"}, 400) - since: Literal["all", "last"] = ( - "last" if body.get("since") == "last" else "all" - ) - - workspace_path = resolve_workspace_path() - gdb = global_storage_db_path(workspace_path) - if not os.path.isfile(gdb): - return json_response({"error": "Cursor global storage not found"}, 404) - - exported = collect_export_entries( - workspace_path=workspace_path, - exclusion_rules=exclusion_rules(), - since=since, - last_export_ms=read_last_export_ms(since, state=_get_export_state()), - out_dir="", - include_composer=True, - include_cli=False, - ) - count = len(exported) - if count == 0: - return json_response( - {"error": "No conversations to export" + ( - " since last export" if since == "last" else "" - )}, - 404, - ) - - buf = io.BytesIO() - with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: - for entry in exported: - zf.writestr(entry["rel_path"], entry["content"]) - - buf.seek(0) - _save_export_state(count) - - filename = "cursor-export.zip" - return Response( - buf.getvalue(), - mimetype="application/zip", - headers={ - "Content-Disposition": f'attachment; filename="{filename}"', - "X-Export-Count": str(count), - }, - ) - - except Exception as e: - _logger.error( - "Export failed: %s (%s)", - e, - type(e).__name__, - exc_info=True, - ) - return json_response({"error": "Export failed"}, 500) +""" +API route for export — produces per-chat Markdown in a zip download. +POST /api/export { since: "all"|"last", zip: true } +GET /api/export/state — returns last export time +""" + +from __future__ import annotations + +import io +import json +import logging +import os +import zipfile +from datetime import datetime +from pathlib import Path +from typing import Any, Literal + +from flask import Blueprint, Response, request + +from api.flask_config import exclusion_rules, json_response +from services.export_engine import collect_export_entries, read_last_export_ms +from services.workspace_db import global_storage_db_path +from utils.workspace_path import resolve_workspace_path + +bp = Blueprint("export_api", __name__) +_logger = logging.getLogger(__name__) + + +def _get_state_dir() -> str: + return os.path.join(str(Path.home()), ".cursor-chat-browser") + + +def _get_export_state() -> dict[str, Any]: + """Read the export state file.""" + state_path = os.path.join(_get_state_dir(), "export_state.json") + if os.path.isfile(state_path): + try: + with open(state_path, "r", encoding="utf-8") as f: + parsed = json.load(f) + if isinstance(parsed, dict): + return parsed + _logger.warning( + "Export state in %s is not a JSON object (got %s); ignoring", + state_path, + type(parsed).__name__, + ) + except (json.JSONDecodeError, ValueError, OSError) as e: + _logger.warning( + "Could not read export state from %s: %s", + state_path, + e, + ) + return {} + + +def _save_export_state(count: int) -> None: + """Save export state after an export.""" + state_dir = _get_state_dir() + os.makedirs(state_dir, exist_ok=True) + state = { + "lastExportTime": datetime.now().isoformat(), + "exportedCount": count, + } + state_path = os.path.join(state_dir, "export_state.json") + with open(state_path, "w", encoding="utf-8") as f: + json.dump(state, f, indent=2) + + +@bp.route("/api/export/state") +def get_export_state() -> Response: + """Return the last export timestamp.""" + state = _get_export_state() + return json_response(state) + + +@bp.route("/api/export", methods=["POST"]) +def export_chats() -> tuple[Response, int] | Response: + """Export chats as a zip archive. + + Exclusion rules (``EXCLUSION_RULES`` app config key) are evaluated against + each chat's project name, title, and model. Rules are loaded once at + application startup; an app restart is required to pick up changes to the + exclusion rules file. + """ + try: + body = request.get_json(silent=True) + if not isinstance(body, dict): + return json_response({"error": "request body must be a JSON object"}, 400) + since: Literal["all", "last"] = ( + "last" if body.get("since") == "last" else "all" + ) + + workspace_path = resolve_workspace_path() + gdb = global_storage_db_path(workspace_path) + if not os.path.isfile(gdb): + return json_response({"error": "Cursor global storage not found"}, 404) + + exported = collect_export_entries( + workspace_path=workspace_path, + exclusion_rules=exclusion_rules(), + since=since, + last_export_ms=read_last_export_ms(since, state=_get_export_state()), + out_dir="", + include_composer=True, + include_cli=False, + ) + count = len(exported) + if count == 0: + return json_response( + {"error": "No conversations to export" + ( + " since last export" if since == "last" else "" + )}, + 404, + ) + + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: + for entry in exported: + zf.writestr(entry["rel_path"], entry["content"]) + + buf.seek(0) + _save_export_state(count) + + filename = "cursor-export.zip" + return Response( + buf.getvalue(), + mimetype="application/zip", + headers={ + "Content-Disposition": f'attachment; filename="{filename}"', + "X-Export-Count": str(count), + }, + ) + + except Exception as e: + _logger.error( + "Export failed: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) + return json_response({"error": "Export failed"}, 500) + \ No newline at end of file From 96d196c45fb2c11ddfe5ca9c75c472f7c7e774fb Mon Sep 17 00:00:00 2001 From: chen Date: Tue, 23 Jun 2026 09:38:24 +0800 Subject: [PATCH 2/5] Add regression tests for --base-dir workspace path override --- tests/test_export_base_dir_override.py | 79 ++++++++++++++++++++++++++ utils/workspace_path.py | 3 +- 2 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 tests/test_export_base_dir_override.py diff --git a/tests/test_export_base_dir_override.py b/tests/test_export_base_dir_override.py new file mode 100644 index 0000000..904d029 --- /dev/null +++ b/tests/test_export_base_dir_override.py @@ -0,0 +1,79 @@ +"""Regression tests for issue #4 — --base-dir must not mutate WORKSPACE_PATH.""" + +from __future__ import annotations + +import os +import sys +import unittest +from unittest.mock import patch + +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if REPO_ROOT not in sys.path: + sys.path.insert(0, REPO_ROOT) + +from scripts import export as export_script # noqa: E402 + + +class TestExportBaseDirOverride(unittest.TestCase): + def test_main_passes_base_dir_as_resolve_override(self): + opts = { + "since": "all", + "out_dir": ".", + "include_composer": False, + "zip": True, + "exclusion_rules_path": None, + "base_dir": "/custom/workspace", + } + with patch.object(export_script, "parse_args", return_value=opts): + with patch.object( + export_script, + "collect_export_entries", + return_value=[], + ) as mock_collect: + with patch.object( + export_script, + "resolve_workspace_path", + return_value="/resolved/workspace", + ) as mock_resolve: + with self.assertRaises(SystemExit) as ctx: + export_script.main() + self.assertEqual(ctx.exception.code, 0) + mock_resolve.assert_called_once_with(override="/custom/workspace") + mock_collect.assert_called_once() + self.assertEqual( + mock_collect.call_args.kwargs["workspace_path"], + "/resolved/workspace", + ) + + def test_base_dir_does_not_mutate_workspace_path_env(self): + opts = { + "since": "all", + "out_dir": ".", + "include_composer": False, + "zip": True, + "exclusion_rules_path": None, + "base_dir": "/custom/workspace", + } + sentinel = "/original/env/workspace" + prior = os.environ.get("WORKSPACE_PATH") + os.environ["WORKSPACE_PATH"] = sentinel + try: + with patch.object(export_script, "parse_args", return_value=opts): + with patch.object(export_script, "collect_export_entries", return_value=[]): + with patch.object( + export_script, + "resolve_workspace_path", + return_value="/resolved/workspace", + ): + with self.assertRaises(SystemExit): + export_script.main() + self.assertEqual(os.environ.get("WORKSPACE_PATH"), sentinel) + finally: + if prior is None: + os.environ.pop("WORKSPACE_PATH", None) + else: + os.environ["WORKSPACE_PATH"] = prior + + +if __name__ == "__main__": + unittest.main() diff --git a/utils/workspace_path.py b/utils/workspace_path.py index 447b9e1..a4cdb03 100644 --- a/utils/workspace_path.py +++ b/utils/workspace_path.py @@ -9,7 +9,8 @@ from .path_helpers import expand_tilde_path -# Module-level override set via POST /api/set-workspace (or --base-dir). +# Module-level override set via POST /api/set-workspace. +# CLI ``--base-dir`` passes resolve_workspace_path(override=...) instead. # Reads and writes are serialized by _workspace_path_lock so threaded WSGI # workers (gunicorn --threads, waitress, etc.) always see the latest override # from another thread and resolve_workspace_path's snapshot+expand stays consistent. From ee9e90f616074b87690b0ae915f602b629da6ed2 Mon Sep 17 00:00:00 2001 From: chen Date: Tue, 23 Jun 2026 10:04:17 +0800 Subject: [PATCH 3/5] Harden export state parsing and listing cache fast path --- services/export_engine.py | 5 +++-- services/workspace_listing.py | 26 ++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/services/export_engine.py b/services/export_engine.py index ea05fa6..1722d1e 100644 --- a/services/export_engine.py +++ b/services/export_engine.py @@ -233,7 +233,7 @@ def _collect_ide_export_entries( composer_id = row["key"].split(":")[1] try: cd = json.loads(row["value"]) - except (json.JSONDecodeError, ValueError) as parse_err: + except (json.JSONDecodeError, TypeError, ValueError) as parse_err: _logger.debug( "Skipping corrupt composerData row %s: %s", composer_id, @@ -385,7 +385,8 @@ def _collect_cli_export_entries( for session in cp["sessions"]: meta = session.get("meta", {}) session_id = session["session_id"] - created_ms: int = meta.get("createdAt") or int( + created_raw = meta.get("createdAt") + created_ms = to_epoch_ms(created_raw) if created_raw else int( datetime.now().timestamp() * 1000, ) session_name = meta.get("name") or f"Session {session_id[:8]}" diff --git a/services/workspace_listing.py b/services/workspace_listing.py index 9cab01a..10d3e55 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -21,12 +21,15 @@ from models import Bubble, ParseWarningCollector from services.export_engine import WorkspaceOrchestration, prepare_workspace_orchestration from services.summary_cache import ( + fingerprint_workspace_storage, get_cached_projects, nocache_enabled, set_cached_projects, ) from services.workspace_db import ( COMPOSER_ROWS_WITH_HEADERS_SQL, + collect_workspace_entries, + global_storage_db_path, load_project_layouts_for_composer, load_project_layouts_map, open_global_db, @@ -91,14 +94,29 @@ def list_workspace_projects( :meth:`models.ParseWarningCollector.to_api_list`; empty when no skips. """ effective_nocache = nocache_enabled(request_nocache=nocache) - orch = prepare_workspace_orchestration( - workspace_path, rules, nocache=effective_nocache, - ) + workspace_entries: list[dict[str, Any]] | None = None if not effective_nocache: - cached = get_cached_projects(orch.fingerprint) + workspace_entries = collect_workspace_entries(workspace_path) + gdb = global_storage_db_path(workspace_path) + cli_path = get_cli_chats_path() + fingerprint = fingerprint_workspace_storage( + workspace_path, + workspace_entries, + global_db_path=gdb if os.path.isfile(gdb) else None, + rules=rules, + cli_chats_path=cli_path if os.path.isdir(cli_path) else None, + ) + cached = get_cached_projects(fingerprint) if cached is not None: return cached + orch = prepare_workspace_orchestration( + workspace_path, + rules, + nocache=effective_nocache, + workspace_entries=workspace_entries, + ) + projects, warnings = _build_workspace_projects_uncached( workspace_path, rules, orch, ) From dbec81e97645a69a4e4426f18b72d8af3526384b Mon Sep 17 00:00:00 2001 From: chen Date: Wed, 24 Jun 2026 01:38:21 +0800 Subject: [PATCH 4/5] Flatten nested mocks in base_dir override tests --- tests/test_export_base_dir_override.py | 42 ++++++++++++++------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/tests/test_export_base_dir_override.py b/tests/test_export_base_dir_override.py index 904d029..f11b138 100644 --- a/tests/test_export_base_dir_override.py +++ b/tests/test_export_base_dir_override.py @@ -24,19 +24,21 @@ def test_main_passes_base_dir_as_resolve_override(self): "exclusion_rules_path": None, "base_dir": "/custom/workspace", } - with patch.object(export_script, "parse_args", return_value=opts): - with patch.object( + with ( + patch.object(export_script, "parse_args", return_value=opts), + patch.object( export_script, "collect_export_entries", return_value=[], - ) as mock_collect: - with patch.object( - export_script, - "resolve_workspace_path", - return_value="/resolved/workspace", - ) as mock_resolve: - with self.assertRaises(SystemExit) as ctx: - export_script.main() + ) as mock_collect, + patch.object( + export_script, + "resolve_workspace_path", + return_value="/resolved/workspace", + ) as mock_resolve, + self.assertRaises(SystemExit) as ctx, + ): + export_script.main() self.assertEqual(ctx.exception.code, 0) mock_resolve.assert_called_once_with(override="/custom/workspace") mock_collect.assert_called_once() @@ -58,15 +60,17 @@ def test_base_dir_does_not_mutate_workspace_path_env(self): prior = os.environ.get("WORKSPACE_PATH") os.environ["WORKSPACE_PATH"] = sentinel try: - with patch.object(export_script, "parse_args", return_value=opts): - with patch.object(export_script, "collect_export_entries", return_value=[]): - with patch.object( - export_script, - "resolve_workspace_path", - return_value="/resolved/workspace", - ): - with self.assertRaises(SystemExit): - export_script.main() + with ( + patch.object(export_script, "parse_args", return_value=opts), + patch.object(export_script, "collect_export_entries", return_value=[]), + patch.object( + export_script, + "resolve_workspace_path", + return_value="/resolved/workspace", + ), + self.assertRaises(SystemExit), + ): + export_script.main() self.assertEqual(os.environ.get("WORKSPACE_PATH"), sentinel) finally: if prior is None: From 0587356ab8897337ae3b826fe28318b0dca294cd Mon Sep 17 00:00:00 2001 From: chen Date: Wed, 24 Jun 2026 03:31:44 +0800 Subject: [PATCH 5/5] Harden export_engine: skip malformed composer keys, guard CLI session meta --- services/export_engine.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/services/export_engine.py b/services/export_engine.py index 1722d1e..ef6ad63 100644 --- a/services/export_engine.py +++ b/services/export_engine.py @@ -230,7 +230,14 @@ def _collect_ide_export_entries( ctx = orch.ctx exported: list[CollectedExportEntry] = [] for row in db_data.ide_composer_rows: - composer_id = row["key"].split(":")[1] + row_key = row["key"] + if ":" not in row_key: + _logger.debug( + "Skipping composer row with malformed key %r", + row_key, + ) + continue + composer_id = row_key.split(":", 1)[1] try: cd = json.loads(row["value"]) except (json.JSONDecodeError, TypeError, ValueError) as parse_err: @@ -383,7 +390,8 @@ def _collect_cli_export_entries( continue for session in cp["sessions"]: - meta = session.get("meta", {}) + raw_meta = session.get("meta") + meta = raw_meta if isinstance(raw_meta, dict) else {} session_id = session["session_id"] created_raw = meta.get("createdAt") created_ms = to_epoch_ms(created_raw) if created_raw else int(