diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 2a67308..119a5cd 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -29,13 +29,16 @@ from .parser import parse_timestamp from .factories import create_transcript_entry from .models import ( + BaseTranscriptEntry, TranscriptEntry, AssistantTranscriptEntry, + QueueOperationTranscriptEntry, SummaryTranscriptEntry, SystemTranscriptEntry, UserTranscriptEntry, ToolResultContent, ) +from .dag import SessionTree, build_dag_from_entries, traverse_session_tree from .renderer import get_renderer, is_html_outdated @@ -47,6 +50,105 @@ def get_file_extension(format: str) -> str: return "md" if format in ("md", "markdown") else format +# ============================================================================= +# Progress Chain Repair +# ============================================================================= + + +def _scan_file_progress(path: Path, chain: dict[str, Optional[str]]) -> None: + """Extract progress entry uuid->parentUuid from a single JSONL file.""" + try: + with open(path, "r", encoding="utf-8", errors="replace") as f: + for line in f: + if "progress" not in line: # Fast pre-filter + continue + line = line.strip() + if not line: + continue + try: + raw = json.loads(line) + if not isinstance(raw, dict): + continue + d = cast(dict[str, Any], raw) + if d.get("type") == "progress": + uuid = d.get("uuid") + if isinstance(uuid, str): + chain[uuid] = d.get("parentUuid") + except json.JSONDecodeError: + continue + except FileNotFoundError: + pass # Race condition: file may have been deleted + + +def _scan_progress_chains(*paths: Path) -> dict[str, Optional[str]]: + """Fast scan of JSONL files for progress entry uuid->parentUuid mappings.""" + chain: dict[str, Optional[str]] = {} + for path in paths: + if path.is_file(): + _scan_file_progress(path, chain) + elif path.is_dir(): + for f in path.glob("*.jsonl"): + _scan_file_progress(f, chain) + # Also scan subagent directories + for f in path.glob("*/subagents/*.jsonl"): + _scan_file_progress(f, chain) + return chain + + +def _scan_sidechain_uuids(directory: Path) -> set[str]: + """Collect UUIDs from sidechain/subagent files not loaded into the DAG. + + Some subagent files (e.g. aprompt_suggestion) are never referenced + via agentId in the main session, so they aren't loaded by + load_transcript(). Their UUIDs are needed to suppress false orphan + warnings when main-chain entries reference sidechain parents. + """ + uuids: set[str] = set() + for f in directory.glob("*/subagents/*.jsonl"): + try: + with open(f, "r", encoding="utf-8", errors="replace") as fh: + for line in fh: + line = line.strip() + if not line: + continue + try: + raw = json.loads(line) + if isinstance(raw, dict): + uuid = cast(dict[str, Any], raw).get("uuid") + if isinstance(uuid, str): + uuids.add(uuid) + except json.JSONDecodeError: + continue + except FileNotFoundError: + pass + return uuids + + +def _repair_parent_chains( + messages: list[TranscriptEntry], + progress_chain: dict[str, Optional[str]], +) -> None: + """Repair parentUuid fields that point to progress entries. + + Walks the progress chain to find the nearest non-progress ancestor. + Mutates entries in place (Pydantic v2 models are mutable by default). + """ + if not progress_chain: + return + for msg in messages: + parent = getattr(msg, "parentUuid", None) + if parent and parent in progress_chain: + current: Optional[str] = parent + seen: set[str] = set() + while current is not None and current in progress_chain: + if current in seen: + current = None + break + seen.add(current) + current = progress_chain[current] + msg.parentUuid = current # type: ignore[union-attr] + + # ============================================================================= # Transcript Loading Functions # ============================================================================= @@ -315,8 +417,12 @@ def load_directory_transcripts( from_date: Optional[str] = None, to_date: Optional[str] = None, silent: bool = False, -) -> list[TranscriptEntry]: - """Load all JSONL transcript files from a directory and combine them.""" +) -> tuple[list[TranscriptEntry], SessionTree]: + """Load all JSONL transcript files from a directory and combine them. + + Returns (messages, session_tree) — the tree is reused by the renderer + to avoid rebuilding the DAG. + """ all_messages: list[TranscriptEntry] = [] # Find all .jsonl files, excluding agent files (they are loaded via load_transcript @@ -331,14 +437,35 @@ def load_directory_transcripts( ) all_messages.extend(messages) - # Sort all messages chronologically - def get_timestamp(entry: TranscriptEntry) -> str: - if hasattr(entry, "timestamp"): - return entry.timestamp # type: ignore - return "" + # Repair parent chains: progress entries create UUID gaps + progress_chain = _scan_progress_chains(directory_path) + _repair_parent_chains(all_messages, progress_chain) - all_messages.sort(key=get_timestamp) - return all_messages + # Partition: sidechain entries excluded from DAG (Phase C scope) + sidechain_entries = [e for e in all_messages if getattr(e, "isSidechain", False)] + main_entries = [e for e in all_messages if not getattr(e, "isSidechain", False)] + + # Collect sidechain UUIDs so DAG build can suppress orphan warnings + # for parents that exist in sidechain data (will be integrated in Phase C) + sidechain_uuids: set[str] = { + e.uuid for e in sidechain_entries if isinstance(e, BaseTranscriptEntry) + } + # Also scan unloaded subagent files (e.g. aprompt_suggestion agents + # that are never referenced via agentId in the main session) + sidechain_uuids |= _scan_sidechain_uuids(directory_path) + + # Build DAG and traverse (entries grouped by session, depth-first) + tree = build_dag_from_entries(main_entries, sidechain_uuids=sidechain_uuids) + dag_ordered = traverse_session_tree(tree) + + # Re-add summaries/queue-ops (excluded from DAG since they lack uuid) + non_dag_entries: list[TranscriptEntry] = [ + e + for e in main_entries + if isinstance(e, (SummaryTranscriptEntry, QueueOperationTranscriptEntry)) + ] + + return dag_ordered + sidechain_entries + non_dag_entries, tree # ============================================================================= @@ -406,9 +533,11 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr content_key = item.tool_use_id break else: - # No tool result found - this is a user text message + # No tool result found - this is a user text message. + # Use uuid to keep distinct messages (even at same timestamp) + # so DAG parent references remain valid. is_user_text = True - # content_key stays empty (dedupe by timestamp alone) + content_key = message.uuid elif isinstance(message, SummaryTranscriptEntry): # Summaries have no timestamp or uuid - use leafUuid to keep them distinct content_key = message.leafUuid @@ -698,6 +827,7 @@ def _generate_paginated_html( session_data: Dict[str, SessionCacheData], working_directories: List[str], silent: bool = False, + session_tree: Optional[SessionTree] = None, ) -> Path: """Generate paginated HTML files for combined transcript. @@ -856,6 +986,7 @@ def _generate_paginated_html( page_title, page_info=page_info, page_stats=page_stats, + session_tree=session_tree, ) page_file.write_text(html_content, encoding="utf-8") @@ -948,11 +1079,18 @@ def convert_jsonl_to( # Initialize working_directories for both branches (used by pagination in directory mode) working_directories: List[str] = [] + # session_tree is populated in directory mode (DAG already built); + # None in single-file mode (renderer builds it on demand) + session_tree: Optional[SessionTree] = None + if input_path.is_file(): # Single file mode - cache only available for directory mode if output_path is None: output_path = input_path.with_suffix(f".{ext}") messages = load_transcript(input_path, silent=silent) + # Repair progress chain gaps for single-file mode + progress_chain = _scan_progress_chains(input_path) + _repair_parent_chains(messages, progress_chain) title = f"Claude Transcript - {input_path.stem}" cache_was_updated = False # No cache in single file mode else: @@ -988,7 +1126,7 @@ def convert_jsonl_to( return output_path # Phase 2: Load messages (will use fresh cache when available) - messages = load_directory_transcripts( + messages, session_tree = load_directory_transcripts( input_path, cache_manager, from_date, to_date, silent ) @@ -1065,6 +1203,7 @@ def convert_jsonl_to( session_data, working_directories, silent=silent, + session_tree=session_tree, ) else: # Use single-file generation for small projects or filtered views @@ -1091,7 +1230,9 @@ def convert_jsonl_to( if should_regenerate: # For referenced images, pass the output directory output_dir = output_path.parent - content = renderer.generate(messages, title, output_dir=output_dir) + content = renderer.generate( + messages, title, output_dir=output_dir, session_tree=session_tree + ) assert content is not None output_path.write_text(content, encoding="utf-8") @@ -1117,44 +1258,12 @@ def convert_jsonl_to( cache_was_updated, image_export_mode, silent=silent, + session_tree=session_tree, ) return output_path -def has_cache_changes( - project_dir: Path, - cache_manager: Optional[CacheManager], - from_date: Optional[str] = None, - to_date: Optional[str] = None, -) -> bool: - """Check if cache needs updating (fast mtime comparison only). - - Returns True if there are modified files or cache is stale. - Does NOT load any messages - that's deferred to ensure_fresh_cache. - """ - if cache_manager is None: - return True # No cache means we need to process - - jsonl_files = list(project_dir.glob("*.jsonl")) - if not jsonl_files: - return False - - # Get cached project data - cached_project_data = cache_manager.get_cached_project_data() - - # Check various invalidation conditions - modified_files = cache_manager.get_modified_files(jsonl_files) - - return ( - cached_project_data is None - or from_date is not None - or to_date is not None - or bool(modified_files) - or (cached_project_data.total_message_count == 0 and bool(jsonl_files)) - ) - - def ensure_fresh_cache( project_dir: Path, cache_manager: Optional[CacheManager], @@ -1165,7 +1274,6 @@ def ensure_fresh_cache( """Ensure cache is fresh and populated. Returns True if cache was updated. This does the heavy lifting of loading and parsing files. - Call has_cache_changes() first for a fast check. """ if cache_manager is None: return False @@ -1201,7 +1309,7 @@ def ensure_fresh_cache( # Load and process messages to populate cache if not silent: print(f"Updating cache for {project_dir.name}...") - messages = load_directory_transcripts( + messages, _tree = load_directory_transcripts( project_dir, cache_manager, from_date, to_date, silent ) @@ -1479,6 +1587,7 @@ def _generate_individual_session_files( cache_was_updated: bool = False, image_export_mode: Optional[str] = None, silent: bool = False, + session_tree: Optional[SessionTree] = None, ) -> int: """Generate individual files for each session in the specified format. @@ -1577,7 +1686,12 @@ def _generate_individual_session_files( if should_regenerate_session: # Generate session content session_content = renderer.generate_session( - messages, session_id, session_title, cache_manager, output_dir + messages, + session_id, + session_title, + cache_manager, + output_dir, + session_tree=session_tree, ) assert session_content is not None # Write session file @@ -1897,7 +2011,7 @@ def process_projects_hierarchy( print( f"Warning: No cached data available for {project_dir.name}, using fallback processing" ) - messages = load_directory_transcripts( + messages, _tree = load_directory_transcripts( project_dir, cache_manager, from_date, to_date, silent=silent ) # Ensure cache is populated with session data (including working directories) diff --git a/claude_code_log/dag.py b/claude_code_log/dag.py new file mode 100644 index 0000000..c07edb0 --- /dev/null +++ b/claude_code_log/dag.py @@ -0,0 +1,647 @@ +"""DAG-based message ordering for Claude Code transcripts. + +Replaces timestamp-based ordering with parentUuid → uuid graph traversal. +Works at the TranscriptEntry level (before factory/rendering). + +See dev-docs/dag.md for the full architecture spec. +""" + +import logging +from dataclasses import dataclass, field +from typing import Optional + +from .models import ( + TranscriptEntry, + SummaryTranscriptEntry, + QueueOperationTranscriptEntry, + UserTranscriptEntry, + AssistantTranscriptEntry, +) + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Data Structures +# ============================================================================= + + +@dataclass +class MessageNode: + """A deduplicated message in the DAG.""" + + uuid: str + parent_uuid: Optional[str] + session_id: str + timestamp: str + entry: TranscriptEntry + children_uuids: list[str] = field(default_factory=lambda: []) + + +@dataclass +class SessionDAGLine: + """A session's ordered chain of unique messages.""" + + session_id: str + uuids: list[str] # Ordered by parent→child chain traversal + first_timestamp: str + parent_session_id: Optional[str] = None + attachment_uuid: Optional[str] = None # UUID in parent where this attaches + is_branch: bool = False # True for within-session fork branches + original_session_id: Optional[str] = None # Original session_id before fork split + + +@dataclass +class JunctionPoint: + """A message where other sessions fork or continue.""" + + uuid: str + session_id: str # The session this message belongs to + target_sessions: list[str] = field(default_factory=lambda: []) + + +@dataclass +class SessionTree: + """The complete session hierarchy for a project.""" + + nodes: dict[str, MessageNode] + sessions: dict[str, SessionDAGLine] + roots: list[str] # Root session IDs (no parent session) + junction_points: dict[str, JunctionPoint] + + +# ============================================================================= +# Step 1: Load and Index +# ============================================================================= + + +def build_message_index( + entries: list[TranscriptEntry], +) -> dict[str, MessageNode]: + """Build a deduplicated message index from transcript entries. + + Skips SummaryTranscriptEntry (no uuid/sessionId) and + QueueOperationTranscriptEntry (no uuid). For duplicate uuids, + keeps the entry from the earliest session (by first entry timestamp). + """ + # First pass: determine earliest timestamp per session + session_first_ts: dict[str, str] = {} + for entry in entries: + if isinstance(entry, (SummaryTranscriptEntry, QueueOperationTranscriptEntry)): + continue + sid = entry.sessionId + ts = entry.timestamp + if sid not in session_first_ts or ts < session_first_ts[sid]: + session_first_ts[sid] = ts + + # Second pass: build nodes, deduplicating by uuid (earliest session wins) + nodes: dict[str, MessageNode] = {} + for entry in entries: + if isinstance(entry, (SummaryTranscriptEntry, QueueOperationTranscriptEntry)): + continue + uuid = entry.uuid + sid = entry.sessionId + if uuid in nodes: + existing = nodes[uuid] + existing_session_ts = session_first_ts.get(existing.session_id, "") + new_session_ts = session_first_ts.get(sid, "") + if new_session_ts < existing_session_ts: + # Replace with entry from earlier session + nodes[uuid] = MessageNode( + uuid=uuid, + parent_uuid=entry.parentUuid, + session_id=sid, + timestamp=entry.timestamp, + entry=entry, + ) + else: + nodes[uuid] = MessageNode( + uuid=uuid, + parent_uuid=entry.parentUuid, + session_id=sid, + timestamp=entry.timestamp, + entry=entry, + ) + + return nodes + + +# ============================================================================= +# Step 2: Build DAG (parent→children links) +# ============================================================================= + + +def build_dag( + nodes: dict[str, MessageNode], + sidechain_uuids: set[str] | None = None, +) -> None: + """Populate children_uuids on each node. Mutates nodes in place. + + Warns about orphan nodes (parentUuid points outside loaded data) + and validates acyclicity. Parents known to be in sidechain data + (Phase C scope) are silently promoted to root without warning. + """ + _sidechain_uuids = sidechain_uuids or set() + + # Clear existing children + for node in nodes.values(): + node.children_uuids = [] + + # Build parent→children links + for node in nodes.values(): + if node.parent_uuid is not None: + parent = nodes.get(node.parent_uuid) + if parent is not None: + parent.children_uuids.append(node.uuid) + else: + if node.parent_uuid not in _sidechain_uuids: + logger.warning( + "Orphan node %s: parentUuid %s not found in loaded" + " data (promoting to root)", + node.uuid, + node.parent_uuid, + ) + # Clear the dangling parent so this node becomes a root + # and can participate in DAG walks + node.parent_uuid = None + + # Validate: no cycles (walk parent chain for each node) + for node in nodes.values(): + visited: set[str] = set() + current: Optional[str] = node.uuid + while current is not None: + if current in visited: + logger.warning("Cycle detected in parent chain at uuid %s", current) + nodes[current].parent_uuid = None + break + visited.add(current) + parent = nodes.get(current) + if parent is None: + break + current = parent.parent_uuid + + +# ============================================================================= +# Step 3: Extract Session DAG-lines +# ============================================================================= + + +def _collect_descendants( + uuid: str, + session_uuids: set[str], + nodes: dict[str, MessageNode], + result: set[str], +) -> None: + """Recursively collect a node and all its same-session descendants.""" + if uuid in result: + return + result.add(uuid) + node = nodes.get(uuid) + if node is None: + return + for child in node.children_uuids: + if child in session_uuids: + _collect_descendants(child, session_uuids, nodes, result) + + +def _is_subtree_dead_end( + uuid: str, + session_uuids: set[str], + nodes: dict[str, MessageNode], + max_depth: int = 20, +) -> bool: + """Check if a node's subtree eventually terminates (no continuation). + + A subtree is a dead end if every leaf within the session has no + same-session children. Walks depth-first with a depth limit to + avoid runaway traversals. + """ + stack: list[tuple[str, int]] = [(uuid, 0)] + while stack: + current, depth = stack.pop() + children = [c for c in nodes[current].children_uuids if c in session_uuids] + if not children: + continue # Leaf — dead end, keep checking siblings + if depth >= max_depth: + return False # Too deep to tell — assume not dead end + for c in children: + stack.append((c, depth + 1)) + return True + + +def _stitch_tool_results( + children: list[str], + session_uuids: set[str], + nodes: dict[str, MessageNode], +) -> Optional[list[str]]: + """Detect and stitch tool-result side-branches into a linear chain. + + When the assistant makes multiple tool calls in one turn, the JSONL + records both the next tool_use and the tool_result as children of the + current tool_use entry, creating a false fork. Two variants: + + Variant 1 — User child is immediate dead end: + A(tool_use) → U(tool_result) [dead-end side-branch] + → A(next tool_use) [main chain continues] + + Variant 2 — User child continues, Assistant subtree dead-ends: + A(tool_use) → U(tool_result) → A(response) → ... [main chain] + → A(tool_use) → ... → dead ends [progress artifact] + + Returns a stitched ordering placing dead-end children first, then + the single continuation child. Returns None if the pattern doesn't + match. + """ + # Separate into user (tool_result) and assistant (continuation) children + user_children = [ + c for c in children if isinstance(nodes[c].entry, UserTranscriptEntry) + ] + assistant_children = [ + c for c in children if isinstance(nodes[c].entry, AssistantTranscriptEntry) + ] + + if not user_children or not assistant_children: + return None # Not the tool_result pattern + + # Check variant 1: all user children are immediate dead ends + user_all_dead = all( + not any(c in session_uuids for c in nodes[uc].children_uuids) + for uc in user_children + ) + + if user_all_dead: + # Variant 1: user dead ends + single assistant continuation + if len(assistant_children) != 1: + return None + user_children.sort(key=lambda c: nodes[c].timestamp) + return user_children + assistant_children + + # Check variant 2: assistant subtrees are dead ends, + # exactly one user child continues + user_with_cont = [ + uc + for uc in user_children + if any(c in session_uuids for c in nodes[uc].children_uuids) + ] + if len(user_with_cont) != 1: + return None # Ambiguous — multiple user continuations + + # Verify all assistant children's subtrees are dead ends + for ac in assistant_children: + if not _is_subtree_dead_end(ac, session_uuids, nodes): + return None + + # Verify remaining user children (without continuation) are dead ends + user_dead = [uc for uc in user_children if uc not in user_with_cont] + for uc in user_dead: + if not _is_subtree_dead_end(uc, session_uuids, nodes): + return None + + # Stitch: dead-end children first, then the continuing user child + dead_ends = user_dead + assistant_children + dead_ends.sort(key=lambda c: nodes[c].timestamp) + return dead_ends + user_with_cont + + +def _walk_session_with_forks( + root: MessageNode, + session_id: str, + session_uuids: set[str], + nodes: dict[str, MessageNode], +) -> tuple[list[SessionDAGLine], set[str]]: + """Walk a session's DAG from root, splitting into separate DAG-lines at fork points. + + Uses a queue-based approach to handle nested forks: + 1. Start with (root_uuid, session_id, None) in the queue + 2. Walk chain following single same-session children + 3. On fork (multiple same-session children): stop chain at fork point, + push each child as a new branch + 4. Update MessageNode.session_id for branch nodes + + Returns: + Tuple of (DAG-line list, set of UUIDs intentionally skipped as + compaction replays). + """ + # Queue entries: (start_uuid, dag_line_id, parent_dag_line_id) + queue: list[tuple[str, str, Optional[str]]] = [(root.uuid, session_id, None)] + result: list[SessionDAGLine] = [] + skipped: set[str] = set() # Compaction replay UUIDs + + while queue: + start_uuid, line_id, parent_line_id = queue.pop(0) + chain: list[str] = [] + current: Optional[MessageNode] = nodes[start_uuid] + is_branch = line_id != session_id + + while current is not None: + chain.append(current.uuid) + # Update session_id for branch nodes (needed for build_session_tree) + if is_branch: + current.session_id = line_id + + # Find children in the original session + same_session_children = [ + c for c in current.children_uuids if c in session_uuids + ] + if len(same_session_children) == 0: + current = None + elif len(same_session_children) == 1: + current = nodes[same_session_children[0]] + else: + # Multiple same-session children. Distinguish real forks + # from artifacts (see dev-docs/dag.md caveats). + same_session_children.sort(key=lambda c: nodes[c].timestamp) + + stitched = _stitch_tool_results( + same_session_children, session_uuids, nodes + ) + if stitched is not None: + # Tool-result side-branches were stitched into the + # chain. The last element is the continuation; all + # others are dead-end nodes whose subtree descendants + # must be skipped. + for su in stitched[:-1]: + if is_branch: + nodes[su].session_id = line_id + _collect_descendants(su, session_uuids, nodes, skipped) + chain.extend(stitched[:-1]) + current = nodes[stitched[-1]] + else: + unique_timestamps = { + nodes[c].timestamp for c in same_session_children + } + if len(unique_timestamps) == 1: + # Same timestamp = compaction replay: follow only + # the first child (original chain), skip replays + # and all their descendants. + current = nodes[same_session_children[0]] + for sc in same_session_children[1:]: + _collect_descendants(sc, session_uuids, nodes, skipped) + else: + # Different timestamps = real fork (rewind). + # Stop chain here, push each child as a branch. + for child_uuid in same_session_children: + branch_id = f"{line_id}@{child_uuid[:12]}" + queue.append((child_uuid, branch_id, line_id)) + current = None + + if chain: + first_ts = nodes[chain[0]].timestamp + dag_line = SessionDAGLine( + session_id=line_id, + uuids=chain, + first_timestamp=first_ts, + is_branch=is_branch, + original_session_id=session_id if is_branch else None, + ) + # Set parent/attachment for branches + if is_branch and parent_line_id is not None: + parent_uuid = nodes[chain[0]].parent_uuid + dag_line.parent_session_id = parent_line_id + dag_line.attachment_uuid = parent_uuid + result.append(dag_line) + + return result, skipped + + +def extract_session_dag_lines( + nodes: dict[str, MessageNode], +) -> dict[str, SessionDAGLine]: + """Extract per-session ordered chains from the DAG. + + For each session, finds the root node (parent_uuid is null or points + to a different session), then walks forward via children_uuids filtering + to same-session children. + + Within-session forks (multiple same-session children) produce additional + DAG-lines with synthetic IDs (e.g., "s1@child_uuid12"). + Falls back to timestamp sort only when no root is found. + """ + # Group nodes by session + session_nodes: dict[str, list[MessageNode]] = {} + for node in nodes.values(): + session_nodes.setdefault(node.session_id, []).append(node) + + sessions: dict[str, SessionDAGLine] = {} + for session_id, snodes in session_nodes.items(): + session_uuids = {n.uuid for n in snodes} + + # Find root(s): nodes whose parent_uuid is null or outside this session + roots = [ + n + for n in snodes + if n.parent_uuid is None or n.parent_uuid not in session_uuids + ] + + if not roots: + logger.warning( + "Session %s: no root found, falling back to timestamp sort", + session_id, + ) + sorted_nodes = sorted(snodes, key=lambda n: n.timestamp) + sessions[session_id] = SessionDAGLine( + session_id=session_id, + uuids=[n.uuid for n in sorted_nodes], + first_timestamp=sorted_nodes[0].timestamp, + ) + continue + + # Sort roots by timestamp (earliest first = primary root) + roots.sort(key=lambda n: n.timestamp) + if len(roots) > 1: + logger.warning( + "Session %s: %d roots found, walking all from earliest (%s)", + session_id, + len(roots), + roots[0].uuid, + ) + + # Walk from ALL roots to maximize coverage (orphan-promoted roots + # create disconnected subtrees that must each be walked) + dag_lines: list[SessionDAGLine] = [] + walked_uuids: set[str] = set() + skipped_uuids: set[str] = set() + for root in roots: + if root.uuid in walked_uuids: + continue + root_lines, root_skipped = _walk_session_with_forks( + root, session_id, session_uuids, nodes + ) + for dl in root_lines: + walked_uuids.update(dl.uuids) + skipped_uuids.update(root_skipped) + dag_lines.extend(root_lines) + + # Check coverage: walked + intentionally skipped (compaction replays) + covered = len(walked_uuids | skipped_uuids) + if covered < len(snodes): + logger.warning( + "Session %s: DAG walk covers %d of %d nodes, " + "falling back to timestamp sort", + session_id, + covered, + len(snodes), + ) + sorted_nodes = sorted(snodes, key=lambda n: n.timestamp) + sessions[session_id] = SessionDAGLine( + session_id=session_id, + uuids=[n.uuid for n in sorted_nodes], + first_timestamp=sorted_nodes[0].timestamp, + ) + else: + # Merge non-branch DAG-lines that share the same session_id + # (happens when multiple roots exist due to orphan promotion) + trunk_lines = [dl for dl in dag_lines if dl.session_id == session_id] + branch_lines = [dl for dl in dag_lines if dl.session_id != session_id] + if trunk_lines: + # Merge all trunk lines into one, ordered by first_timestamp + trunk_lines.sort(key=lambda dl: dl.first_timestamp) + merged_uuids: list[str] = [] + for tl in trunk_lines: + merged_uuids.extend(tl.uuids) + sessions[session_id] = SessionDAGLine( + session_id=session_id, + uuids=merged_uuids, + first_timestamp=trunk_lines[0].first_timestamp, + ) + for dag_line in branch_lines: + sessions[dag_line.session_id] = dag_line + + return sessions + + +# ============================================================================= +# Step 4: Build Session Tree +# ============================================================================= + + +def build_session_tree( + nodes: dict[str, MessageNode], + sessions: dict[str, SessionDAGLine], +) -> SessionTree: + """Build the session hierarchy and identify junction points. + + For each session's DAG-line, the first message's parent_uuid determines + the parent session: + - null → root session + - points to node in different session → child of that session + """ + roots: list[str] = [] + junction_points: dict[str, JunctionPoint] = {} + + for session_id, dag_line in sessions.items(): + if not dag_line.uuids: + roots.append(session_id) + continue + + first_uuid = dag_line.uuids[0] + first_node = nodes[first_uuid] + parent_uuid = first_node.parent_uuid + + if parent_uuid is None or parent_uuid not in nodes: + # Root session (or orphan parent) + roots.append(session_id) + dag_line.parent_session_id = None + dag_line.attachment_uuid = None + else: + parent_node = nodes[parent_uuid] + if parent_node.session_id == session_id: + # Parent is in same session - this is a root + roots.append(session_id) + dag_line.parent_session_id = None + dag_line.attachment_uuid = None + else: + # Child session: attaches to parent session at parent_uuid + dag_line.parent_session_id = parent_node.session_id + dag_line.attachment_uuid = parent_uuid + + # Record junction point + if parent_uuid not in junction_points: + junction_points[parent_uuid] = JunctionPoint( + uuid=parent_uuid, + session_id=parent_node.session_id, + ) + junction_points[parent_uuid].target_sessions.append(session_id) + + # Order roots chronologically + roots.sort(key=lambda sid: sessions[sid].first_timestamp) + + # Order junction point target_sessions chronologically + for jp in junction_points.values(): + jp.target_sessions.sort(key=lambda sid: sessions[sid].first_timestamp) + + return SessionTree( + nodes=nodes, + sessions=sessions, + roots=roots, + junction_points=junction_points, + ) + + +# ============================================================================= +# Step 5: Ordered Traversal +# ============================================================================= + + +def traverse_session_tree(tree: SessionTree) -> list[TranscriptEntry]: + """Depth-first traversal of session tree producing rendering order. + + For each session: yields its DAG-line's entries in chain order. + Children are visited in chronological order (by first_timestamp). + """ + result: list[TranscriptEntry] = [] + visited_sessions: set[str] = set() + + def _visit_session(session_id: str) -> None: + if session_id in visited_sessions: + return + visited_sessions.add(session_id) + + dag_line = tree.sessions.get(session_id) + if dag_line is None: + return + + # Build map: attachment_uuid → [child session IDs] for this session + children_at: dict[str, list[str]] = {} + for sid, sline in tree.sessions.items(): + if sline.parent_session_id == session_id and sline.attachment_uuid: + children_at.setdefault(sline.attachment_uuid, []).append(sid) + for child_sids in children_at.values(): + child_sids.sort(key=lambda sid: tree.sessions[sid].first_timestamp) + + # Emit entries, visiting child sessions at junction points + for uuid in dag_line.uuids: + node = tree.nodes[uuid] + result.append(node.entry) + # After emitting this message, visit any child sessions + # that attach here (in chronological order) + if uuid in children_at: + for child_sid in children_at[uuid]: + _visit_session(child_sid) + + # Visit root sessions in chronological order + for root_sid in tree.roots: + _visit_session(root_sid) + + return result + + +# ============================================================================= +# Convenience: Full Pipeline +# ============================================================================= + + +def build_dag_from_entries( + entries: list[TranscriptEntry], + sidechain_uuids: set[str] | None = None, +) -> SessionTree: + """Build a complete SessionTree from raw transcript entries. + + Convenience function that runs Steps 1-4 in sequence. + ``sidechain_uuids`` suppresses orphan warnings for parents known + to be in sidechain data (not yet integrated, Phase C scope). + """ + nodes = build_message_index(entries) + build_dag(nodes, sidechain_uuids=sidechain_uuids) + sessions = extract_session_dag_lines(nodes) + return build_session_tree(nodes, sessions) diff --git a/claude_code_log/html/renderer.py b/claude_code_log/html/renderer.py index 8d22a37..806b6ff 100644 --- a/claude_code_log/html/renderer.py +++ b/claude_code_log/html/renderer.py @@ -5,6 +5,9 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Optional, Tuple, cast +if TYPE_CHECKING: + from ..dag import SessionTree + from ..cache import get_library_version from ..models import ( AssistantTextMessage, @@ -516,6 +519,7 @@ def generate( title: Optional[str] = None, combined_transcript_link: Optional[str] = None, output_dir: Optional[Path] = None, + session_tree: Optional["SessionTree"] = None, page_info: Optional[dict[str, Any]] = None, page_stats: Optional[dict[str, Any]] = None, ) -> str: @@ -528,6 +532,7 @@ def generate( output_dir: Optional output directory for referenced images. page_info: Optional pagination info (page_number, prev_link, next_link). page_stats: Optional page statistics (message_count, date_range, token_summary). + session_tree: Optional pre-built SessionTree (avoids rebuilding DAG). """ import time @@ -541,7 +546,9 @@ def generate( title = "Claude Transcript" # Get root messages (tree) and session navigation from format-neutral renderer - root_messages, session_nav, _ = generate_template_messages(messages) + root_messages, session_nav, _ = generate_template_messages( + messages, session_tree=session_tree + ) # Flatten tree via pre-order traversal, formatting content along the way with log_timing("Content formatting (pre-order)", t_start): @@ -579,6 +586,7 @@ def generate_session( title: Optional[str] = None, cache_manager: Optional["CacheManager"] = None, output_dir: Optional[Path] = None, + session_tree: Optional["SessionTree"] = None, ) -> str: """Generate HTML for a single session.""" # Filter messages for this session (SummaryTranscriptEntry.sessionId is always None) @@ -599,6 +607,7 @@ def generate_session( title or f"Session {session_id[:8]}", combined_transcript_link=combined_link, output_dir=output_dir, + session_tree=session_tree, ) def generate_projects_index( @@ -645,6 +654,7 @@ def generate_html( combined_transcript_link: Optional[str] = None, page_info: Optional[dict[str, Any]] = None, page_stats: Optional[dict[str, Any]] = None, + session_tree: Optional["SessionTree"] = None, ) -> str: """Generate HTML from transcript messages using Jinja2 templates. @@ -656,6 +666,7 @@ def generate_html( combined_transcript_link: Optional link to combined transcript. page_info: Optional pagination info (page_number, prev_link, next_link). page_stats: Optional page statistics (message_count, date_range, token_summary). + session_tree: Optional pre-built SessionTree (avoids rebuilding DAG). """ return HtmlRenderer().generate( messages, @@ -663,6 +674,7 @@ def generate_html( combined_transcript_link, page_info=page_info, page_stats=page_stats, + session_tree=session_tree, ) diff --git a/claude_code_log/html/system_formatters.py b/claude_code_log/html/system_formatters.py index 0880703..d5e01a1 100644 --- a/claude_code_log/html/system_formatters.py +++ b/claude_code_log/html/system_formatters.py @@ -88,6 +88,39 @@ def format_session_header_content(content: SessionHeaderMessage) -> str: HTML for the session header display """ escaped_title = html.escape(content.title) + if content.is_branch and content.parent_message_index is not None: + # Branch header: backlink to fork point with context + fork_label = "fork point" + if content.parent_session_summary: + escaped_summary = html.escape(content.parent_session_summary) + fork_label = escaped_summary + # Show original session ID for context + orig_id = "" + if content.original_session_id: + orig_id = html.escape(content.original_session_id[:8]) + link = ( + f'' + f"↳ branched from {fork_label}" + ) + return ( + f"{orig_id} {link}{escaped_title}" if orig_id else f"{link}{escaped_title}" + ) + if content.parent_session_id: + parent_label = content.parent_session_summary or content.parent_session_id[:8] + escaped_parent = html.escape(parent_label) + if content.parent_message_index is not None: + link = ( + f'↳ continues from ' + f"{escaped_parent}" + ) + else: + link = ( + f'↳ continues from ' + f"{escaped_parent}" + ) + return f"{link}{escaped_title}" return escaped_title diff --git a/claude_code_log/html/templates/components/global_styles.css b/claude_code_log/html/templates/components/global_styles.css index eed4503..f47a393 100644 --- a/claude_code_log/html/templates/components/global_styles.css +++ b/claude_code_log/html/templates/components/global_styles.css @@ -230,6 +230,21 @@ pre { bottom: 200px; } +.debug-toggle.floating-btn { + bottom: 260px; + border-radius: 6px; + width: 38px; + height: 28px; + font-size: 0.65em; + font-family: 'SFMono-Regular', Consolas, monospace; + font-weight: 600; +} + +.debug-toggle.floating-btn.active { + background-color: #d4e8f7; + color: #333; +} + @media (max-width: 1280px) { .header > span:first-child { flex: auto; diff --git a/claude_code_log/html/templates/components/message_styles.css b/claude_code_log/html/templates/components/message_styles.css index abdd30c..00236b4 100644 --- a/claude_code_log/html/templates/components/message_styles.css +++ b/claude_code_log/html/templates/components/message_styles.css @@ -565,6 +565,9 @@ font-size: 1.2em; } +/* Branch headers (within-session forks) — indent set via inline + style based on tree depth (margin-left: depth*2em). */ + .session-subtitle { font-size: 0.9em; color: var(--text-muted); @@ -892,6 +895,20 @@ details summary { .ansi-bg-cyan { background-color: #11a8cd; } .ansi-bg-white { background-color: #e5e5e5; } +/* Debug UUID info */ +.debug-info { + display: none; + font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; + font-size: 0.7em; + color: #999; + padding: 2px 0; + letter-spacing: 0.02em; +} + +.show-debug-info .debug-info { + display: block; +} + /* Bright background colors */ .ansi-bg-bright-black { background-color: #666666; } .ansi-bg-bright-red { background-color: #f14c4c; } diff --git a/claude_code_log/html/templates/components/session_nav.html b/claude_code_log/html/templates/components/session_nav.html index 143d4a8..4316b5b 100644 --- a/claude_code_log/html/templates/components/session_nav.html +++ b/claude_code_log/html/templates/components/session_nav.html @@ -12,26 +12,50 @@

Session Navigation

{% for session in sessions %} - - - - {% if session.first_user_message %} -
-                {{- session.first_user_message|e -}}
-            
+ {% if session.is_fork_point is defined and session.is_fork_point %} +
+ + ⑂ {{ session.first_user_message }} + +
+ {% elif session.is_branch is defined and session.is_branch %} +
+ + ↳ {{ session.first_user_message }} + +
+ {% else %} +
0 %}style='margin-left: {{ session.depth * 24 }}px'{% endif %}> + {% if session.parent_session_id and mode == "toc" and session.parent_message_index is defined and session.parent_message_index is not none %} + ↳ continues from {{ session.parent_session_id[:8] }} + {% elif session.parent_session_id %} + ↳ continues from {{ session.parent_session_id[:8] }} {% endif %} - + + + + {% if session.first_user_message %} +
+                    {{- session.first_user_message|e -}}
+                
+ {% endif %} +
+
+ {% endif %} {% endfor %}
diff --git a/claude_code_log/html/templates/components/session_nav_styles.css b/claude_code_log/html/templates/components/session_nav_styles.css index a69c4d3..f915b2a 100644 --- a/claude_code_log/html/templates/components/session_nav_styles.css +++ b/claude_code_log/html/templates/components/session_nav_styles.css @@ -24,6 +24,7 @@ } .session-link { + display: block; padding: 8px 12px; background-color: var(--white-dimmed); border: 1px solid #dee2e6; @@ -48,6 +49,25 @@ margin-top: 2px; } +/* Child session hierarchy */ +.session-nav-item.session-child { + border-left: 3px solid var(--accent-color, #6c757d); + padding-left: 8px; +} + +.session-backlink { + font-size: 0.75em; + color: #6c757d; + display: block; + margin-bottom: 2px; + text-decoration: none; +} + +a.session-backlink:hover { + color: var(--text-secondary); + text-decoration: underline; +} + /* Project-specific session navigation */ .project-sessions { margin-top: 15px; @@ -103,4 +123,60 @@ .session-preview { font-size: 0.75em; line-height: 1.3; +} + +/* Within-session fork styles */ +.junction-forward-links { + display: flex; + gap: 8px; + margin-top: 6px; + padding: 4px 8px; + font-size: 0.8em; +} + +.junction-link { + color: #6c757d; + text-decoration: none; + padding: 2px 6px; + border: 1px dashed #adb5bd; + border-radius: 3px; + transition: background-color 0.2s; +} + +.junction-link:hover { + background-color: #e9ecef; + color: var(--text-secondary); +} + +.branch-backlink { + border-left: 2px dashed #888; + padding-left: 6px; +} + +/* Fork point and branch nav items — lightweight text links, not cards */ +.session-fork-point, +.session-branch { + padding-left: 8px; +} + +.session-fork-point { + border-left: 2px dashed #888; +} + +.session-branch { + border-left: 2px dashed #666; +} + +.fork-link, +.branch-link { + display: block; + text-decoration: none; + color: #6c757d; + padding: 2px 0; + font-size: 0.85em; +} + +.fork-link:hover, +.branch-link:hover { + color: var(--text-secondary); } \ No newline at end of file diff --git a/claude_code_log/html/templates/transcript.html b/claude_code_log/html/templates/transcript.html index 5863b50..63348ba 100644 --- a/claude_code_log/html/templates/transcript.html +++ b/claude_code_log/html/templates/transcript.html @@ -99,8 +99,8 @@

🔍 Search & Filter

{% for message, message_title, html_content, formatted_timestamp in messages %} {% if is_session_header(message) %}
-
-
Session: {{ html_content }}
+
+
Session: {{ html_content|safe }}
{% if message.has_children %}
{% if message.immediate_children_count == message.total_descendants_count %} @@ -141,7 +141,15 @@

🔍 Search & Filter

{% endif %}
+ {% if message.meta %}
{{ message.meta.uuid[:12] }}{% if message.meta.parent_uuid %} → {{ message.meta.parent_uuid[:12] }}{% endif %}
{% endif %}
{{ html_content | safe }}
+ {% if message.junction_forward_links %} + + {% endif %} {% if message.has_children %}
{% if message.immediate_children_count == message.total_descendants_count %} @@ -170,6 +178,7 @@

🔍 Search & Filter

+ 🔝