Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 33 additions & 129 deletions api/export_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,23 @@
GET /api/export/state — returns last export time
"""

from __future__ import annotations

import io
import json
import logging
import os
import sqlite3
import zipfile
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import Any, Literal

from flask import Blueprint, Response, request

from api.flask_config import exclusion_rules, json_response

from services.export_engine import collect_export_entries, read_last_export_ms
from services.workspace_db import global_storage_db_path
from utils.workspace_path import resolve_workspace_path
from utils.path_helpers import to_epoch_ms
from utils.text_extract import extract_text_from_bubble, slug
from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules
from utils.cursor_md_exporter import cursor_ide_chat_to_markdown
from services.workspace_context import resolve_workspace_context_minimal
from services.workspace_db import (
load_bubble_map,
load_code_block_diff_map,
open_global_db,
)
from services.workspace_resolver import lookup_workspace_display_name

bp = Blueprint("export_api", __name__)
_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -92,127 +83,40 @@ def export_chats() -> tuple[Response, int] | Response:
exclusion rules file.
"""
try:
body = request.get_json(silent=True) or {}
since = "last" if body.get("since") == "last" else "all"
body = request.get_json(silent=True)
if not isinstance(body, dict):
return json_response({"error": "request body must be a JSON object"}, 400)
since: Literal["all", "last"] = (
"last" if body.get("since") == "last" else "all"
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.

workspace_path = resolve_workspace_path()

# Determine last export timestamp for filtering
last_export_ms = 0
if since == "last":
state = _get_export_state()
ts_str = state.get("lastExportTime")
if ts_str:
last_export_ms = to_epoch_ms(ts_str)

# ── Workspace scanning via service layer ──────────────────────────────
ctx = resolve_workspace_context_minimal(workspace_path)
workspace_entries = ctx.workspace_entries
composer_id_to_ws = ctx.composer_id_to_workspace_id

# Build display-name and slug maps
ws_id_to_slug: dict[str, str] = {}
ws_id_to_display_name: dict[str, str] = {}
for e in workspace_entries:
display = lookup_workspace_display_name(workspace_path, e["name"])
if display != e["name"]:
ws_id_to_display_name[e["name"]] = display
ws_id_to_slug[e["name"]] = slug(display)

today = datetime.now().strftime("%Y-%m-%d")
exported: list[dict[str, Any]] = []
rules = exclusion_rules()

# ── Database reading via service layer ────────────────────────────────
with open_global_db(workspace_path) as (global_db, _):
if global_db is None:
return json_response({"error": "Cursor global storage not found"}, 404)
bubble_map = load_bubble_map(global_db)
code_block_diff_map = load_code_block_diff_map(global_db)

try:
composer_rows = global_db.execute(
"SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'"
" AND value LIKE '%fullConversationHeadersOnly%'"
" AND value NOT LIKE '%fullConversationHeadersOnly\":[]%'"
).fetchall()
except sqlite3.Error:
composer_rows = []

for row in composer_rows:
composer_id = row["key"].split(":")[1]
try:
cd = json.loads(row["value"])
headers = cd.get("fullConversationHeadersOnly") or []
if not headers:
continue

updated_at_ms = to_epoch_ms(cd.get("lastUpdatedAt"))
if updated_at_ms is None:
updated_at_ms = to_epoch_ms(cd.get("createdAt"))
if updated_at_ms is None:
updated_at_ms = 0
if since == "last" and updated_at_ms and updated_at_ms <= last_export_ms:
continue

ws_id = composer_id_to_ws.get(composer_id, "global")
ws_slug = "other-chats" if ws_id == "global" else (ws_id_to_slug.get(ws_id) or slug(ws_id[:12]))
ws_display_name = "Other chats" if ws_id == "global" else (ws_id_to_display_name.get(ws_id) or ws_slug)
title = cd.get("name") or f"Chat {composer_id[:8]}"
model_config = cd.get("modelConfig") or {}
model_name = model_config.get("modelName")
model_names = [model_name] if model_name and model_name != "default" else None

bubble_texts = []
for h in headers:
b = bubble_map.get(h.get("bubbleId"))
if b:
bt = extract_text_from_bubble(b)
if bt:
bubble_texts.append(bt)

searchable = build_searchable_text(
project_name=ws_display_name,
chat_title=title,
model_names=model_names,
chat_content_snippet="\n\n".join(bubble_texts) if bubble_texts else None,
)
if is_excluded_by_rules(rules, searchable):
continue

title_slug = slug(title)
ts_ms = updated_at_ms or int(datetime.now().timestamp() * 1000)
ts_str = datetime.fromtimestamp(ts_ms / 1000).strftime("%Y-%m-%dT%H-%M-%S")
filename = f"{ts_str}__{title_slug}__{composer_id[:8]}.md"
rel_path = os.path.join(today, ws_slug, "chat", filename)

md = cursor_ide_chat_to_markdown(
composer_data=cd,
composer_id=composer_id,
bubble_map=bubble_map,
code_block_diff_map=code_block_diff_map,
workspace_info={"ws_slug": ws_slug, "ws_display_name": ws_display_name},
)
exported.append({"path": rel_path, "content": md, "updatedAt": updated_at_ms})

except Exception as e:
_logger.error(
"Error processing composer %s for export: %s (%s)",
composer_id,
e,
type(e).__name__,
exc_info=True,
)

gdb = global_storage_db_path(workspace_path)
if not os.path.isfile(gdb):
return json_response({"error": "Cursor global storage not found"}, 404)

exported = collect_export_entries(
workspace_path=workspace_path,
exclusion_rules=exclusion_rules(),
since=since,
last_export_ms=read_last_export_ms(since, state=_get_export_state()),
out_dir="",
include_composer=True,
include_cli=False,
)
count = len(exported)
if count == 0:
return json_response({"error": "No conversations to export" + (
" since last export" if since == "last" else ""
)}, 404)
return json_response(
{"error": "No conversations to export" + (
" since last export" if since == "last" else ""
)},
404,
)

buf = io.BytesIO()
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
for entry in exported:
zf.writestr(entry["path"], entry["content"])
zf.writestr(entry["rel_path"], entry["content"])

buf.seek(0)
_save_export_state(count)
Expand All @@ -234,4 +138,4 @@ def export_chats() -> tuple[Response, int] | Response:
type(e).__name__,
exc_info=True,
)
return json_response({"error": "Export failed"}, 500)
return json_response({"error": "Export failed"}, 500)
3 changes: 2 additions & 1 deletion models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from models.conversation import Bubble, Composer, Conversation, WorkspaceLocalComposer
from models.errors import SchemaError
from models.parse_warnings import ParseWarningCollector
from models.export import ExportEntry
from models.export import CollectedExportEntry, ExportEntry
from models.search import ConversationSummary, SearchResult
from models.workspace import Workspace

Expand All @@ -16,6 +16,7 @@
"Composer",
"Conversation",
"ConversationSummary",
"CollectedExportEntry",
"ExportEntry",
"ParseWarningCollector",
"SchemaError",
Expand Down
14 changes: 13 additions & 1 deletion models/export.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any
from typing import Any, TypedDict

from models.from_dict_validation import require_dict, require_non_empty_str_fields


class CollectedExportEntry(TypedDict):
"""One exportable conversation with rendered markdown (engine/CLI collection)."""

id: str
rel_path: str
content: str
out_path: str
updatedAt: int
title: str
workspace: str


@dataclass(frozen=True)
class ExportEntry:
"""One line of manifest.jsonl; log_id / title / workspace required, timestamps optional."""
Expand Down
6 changes: 0 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,6 @@ pretty = true
# Anchored regexes — unanchored `venv/` would match any path segment containing "venv/".
exclude = ["^venv/", "^\\.venv/", "^build/", "^dist/"]

# Standalone CLI export script (~985 LOC) duplicates utils/ helpers; typed
# incrementally — issue #100 allows per-module override until consolidated.
[[tool.mypy.overrides]]
module = "scripts.export"
ignore_errors = true

# Test modules use unittest/pytest patterns that are not worth strict-checking
# alongside production code; route handlers and utils are fully strict.
[[tool.mypy.overrides]]
Expand Down
Loading
Loading