diff --git a/frontends/stapp.py b/frontends/stapp.py index 83a6d33c..73549324 100644 --- a/frontends/stapp.py +++ b/frontends/stapp.py @@ -97,16 +97,17 @@ def _pet_hook(ctx): st.divider() if st.button("开始空闲自主行动"): st.session_state.last_reply_time = int(time.time()) - 1800 - st.toast("已将上次回复时间设为1800秒前"); st.rerun() + st.session_state.autonomous_enabled = True + st.toast("已将上次回复时间设为1800秒前,自主行动已激活"); st.rerun(scope="app") if st.session_state.autonomous_enabled: if st.button("⏸️ 禁止自主行动"): st.session_state.autonomous_enabled = False - st.toast("⏸️ 已禁止自主行动"); st.rerun() + st.toast("⏸️ 已禁止自主行动"); st.rerun(scope="app") st.caption("🟢 自主行动运行中,会在你离开它30分钟后自动进行") else: if st.button("▶️ 允许自主行动", type="primary"): st.session_state.autonomous_enabled = True - st.toast("✅ 已允许自主行动"); st.rerun() + st.toast("✅ 已允许自主行动"); st.rerun(scope="app") st.caption("🔴 自主行动已停止") with st.sidebar: render_sidebar() diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 00000000..84694d4c --- /dev/null +++ b/tools/__init__.py @@ -0,0 +1 @@ +# tools package - utility modules diff --git a/tools/learn_skill_from_cases/README.md b/tools/learn_skill_from_cases/README.md new file mode 100644 index 00000000..c0b186bf --- /dev/null +++ b/tools/learn_skill_from_cases/README.md @@ -0,0 +1,61 @@ +# learn_skill_from_cases — English-only Skill Learning CLI + +A streamlined skill learning tool. **English input only** — provide skill names in pure English. + +## Usage + +```bash +# Learn a skill +python -m tools.learn_skill_from_cases "docker_compose_production" + +# List learned skills +python -m tools.learn_skill_from_cases --list + +# Show skill details +python -m tools.learn_skill_from_cases --show docker_compose_production + +# Dry run (preview without creating files) +python -m tools.learn_skill_from_cases "python_async" --dry-run + +# Force refresh (skip inheriting previous patterns) +python -m tools.learn_skill_from_cases "neo4j_modeling" --force + +# Show version +python -m tools.learn_skill_from_cases --version +``` + +## Environment Variables + +| Variable | Default | Description | +|---|---|---| +| `SKILL_LLM_ENABLE` | `0` | Set to `1` to enable LLM enhancement | +| `LLM_API_BASE` | `http://localhost:11434/v1` | OpenAI-compatible API endpoint | +| `LLM_API_KEY` | — | API key if required | +| `LLM_MODEL` | `qwen2.5:7b` | Model name | +| `LLM_TIMEOUT` | `30` | HTTP timeout in seconds | + +## Output Structure + +``` +GA_ROOT/skills_learning/ + └── {skill_name}/ + ├── rev{N}/ + │ ├── meta.json + │ ├── cases/all_cases.json + │ ├── patterns/knowledge_patterns.json + │ ├── tools/assess.py + │ ├── reports/learning_report.md + │ ├── reports/skill_definition.json + │ └── practice/ + └── ... +``` + +## Phase Flow + +The tool runs a 5-phase pipeline: + +1. **Bootstrap** — create version directory +2. **Define** — fetch skill definition +3. **Search** — collect web cases +4. **Extract** — derive knowledge patterns +5. **Validate** — run assessment and score diff --git a/tools/learn_skill_from_cases/__init__.py b/tools/learn_skill_from_cases/__init__.py new file mode 100644 index 00000000..1ad94e4c --- /dev/null +++ b/tools/learn_skill_from_cases/__init__.py @@ -0,0 +1 @@ +"""learn_skill_from_cases — English-only skill learning from cases (simplified version)""" diff --git a/tools/learn_skill_from_cases/__main__.py b/tools/learn_skill_from_cases/__main__.py new file mode 100644 index 00000000..562753c5 --- /dev/null +++ b/tools/learn_skill_from_cases/__main__.py @@ -0,0 +1,117 @@ +""" +__main__.py — learn_skill_from_cases CLI entry point + +Usage: + python -m tools.learn_skill_from_cases "docker_compose_production" + python -m tools.learn_skill_from_cases --list + python -m tools.learn_skill_from_cases "python_async" --dry-run + python -m tools.learn_skill_from_cases "neo4j_modeling" --force + python -m tools.learn_skill_from_cases --version + python -m tools.learn_skill_from_cases --show docker_compose_production +""" +import sys, argparse, re, json +from pathlib import Path + +GA_ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(GA_ROOT)) + +from tools.learn_skill_from_cases import dir_manager + + +def validate_english_only(name: str): + """Reject skill names containing CJK characters. English only.""" + if re.search(r'[\u4e00-\u9fff\u3000-\u303f\uff00-\uffef]', name): + print("Error: Skill name must be in English only.") + print(" Chinese characters, Japanese characters, and mixed-language inputs are not supported.") + print(" Please provide a pure English skill name (e.g., 'docker_compose_production').") + sys.exit(1) + + +def cmd_list(): + """List all learned skills with version info.""" + skills = dir_manager.get_all_skills() + if not skills: + print("No skills learned yet. Use:") + print(' python -m tools.learn_skill_from_cases "your_skill_name"') + return + print(f"\nLearned skills ({len(skills)} total):") + print("-" * 55) + for skill in skills: + versions = dir_manager.get_versions(skill) + print(f" {skill:30s} rev{versions[-1] if versions else '--'}") + + +def cmd_show(skill_name: str): + """Show details of a specific skill (version list + patterns).""" + skill_dir = dir_manager.get_skill_dir(skill_name) + if not skill_dir.exists(): + print(f"Skill '{skill_name}' not found.") + return + versions = dir_manager.get_versions(skill_name) + if not versions: + print(f"Skill '{skill_name}' has no versions.") + return + print(f"\nSkill: {skill_name}") + print("=" * 55) + for v in versions: + print(f" rev{v}") + patterns_file = skill_dir / f"rev{v}" / "patterns" / "knowledge_patterns.json" + if patterns_file.exists(): + try: + patterns = json.loads(patterns_file.read_text(encoding="utf-8")) + for p in patterns: + print(f" [{p.get('level','?')}] {p.get('principle','?')[:70]}") + except Exception: + pass + + +def main(): + parser = argparse.ArgumentParser( + description="learn_skill_from_cases — English-only skill learning from cases (simplified)", + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("skill_name", nargs="?", help="English skill name to learn (e.g., docker_compose_production)") + parser.add_argument("--list", action="store_true", help="List all learned skills") + parser.add_argument("--show", metavar="NAME", help="Show details of a learned skill") + parser.add_argument("--dry-run", action="store_true", help="Preview without creating files") + parser.add_argument("--force", action="store_true", help="Skip inherited patterns, start fresh") + parser.add_argument("--version", action="store_true", help="Show version") + + args = parser.parse_args() + + # Handle special commands + if args.version: + print("learn_skill_from_cases v1.0.0 (simplified English-only version)") + return + + if args.list: + cmd_list() + return + + if args.show: + cmd_show(args.show) + return + + # Must have a skill name + if not args.skill_name: + parser.print_help() + print("\nError: Please provide a skill name or use --list.") + sys.exit(1) + + # Validate: English only + validate_english_only(args.skill_name) + + # Run the learning pipeline + from tools.learn_skill_from_cases.engine import run + ctx = run(args.skill_name, dry_run=args.dry_run, force=args.force) + + if ctx.get("score", 0) >= 60: + print(f"\n Learning score: {ctx['score']:.1f}/100 — Good result!") + elif ctx.get("score", 0) > 0: + print(f"\n Learning score: {ctx['score']:.1f}/100 — Consider adding more cases.") + else: + print(f"\n Score not available. Review the output above.") + + +if __name__ == "__main__": + main() diff --git a/tools/learn_skill_from_cases/dir_manager.py b/tools/learn_skill_from_cases/dir_manager.py new file mode 100644 index 00000000..4e65bb2a --- /dev/null +++ b/tools/learn_skill_from_cases/dir_manager.py @@ -0,0 +1,130 @@ +""" +dir_manager.py — Skill version directory management (simplified, English-only) + +Responsibilities: detect existing versions, create revN directories, inherit previous patterns. +""" +import os, json, shutil, re +from pathlib import Path + +GA_ROOT = Path(__file__).resolve().parents[2] +SKILL_LEARN_ROOT = GA_ROOT / "skills_learning" + + +def _sanitize_skill_name(skill_name: str) -> str: + """Sanitize skill name: only allow alphanumeric, underscore, hyphen. No path traversal.""" + sanitized = re.sub(r'[^\w\-]', '_', skill_name) + sanitized = sanitized.strip('_') + return sanitized or "unnamed_skill" + + +def _list_dirs(parent: Path) -> list[Path]: + if not parent.exists(): + return [] + return [d for d in parent.iterdir() if d.is_dir()] + + +def get_versions(skill_name: str) -> list[int]: + """Get existing version numbers for a skill, e.g. [1, 2, 3]""" + skill_dir = SKILL_LEARN_ROOT / _sanitize_skill_name(skill_name) + versions = [] + for d in _list_dirs(skill_dir): + if d.name.startswith("rev"): + try: + versions.append(int(d.name[3:])) + except ValueError: + pass + return sorted(versions) + + +def next_version(skill_name: str) -> int: + """Return the next version number.""" + versions = get_versions(skill_name) + return (max(versions) + 1) if versions else 1 + + +def ensure_root_exists(): + """Ensure skills_learning/ root directory exists.""" + if not SKILL_LEARN_ROOT.exists(): + SKILL_LEARN_ROOT.mkdir(parents=True, exist_ok=True) + print(" [OK] skills_learning/ root directory created") + + +def get_skill_dir(skill_name: str) -> Path: + """Return skill directory (path injection protected).""" + return SKILL_LEARN_ROOT / _sanitize_skill_name(skill_name) + + +def get_latest_revision_dir(skill_name: str) -> Path | None: + """Return the latest rev directory that has knowledge patterns.""" + safe_name = _sanitize_skill_name(skill_name) + versions = get_versions(safe_name) + if not versions: + return None + skill_dir = SKILL_LEARN_ROOT / safe_name + for v in reversed(versions): + patterns_file = skill_dir / f"rev{v}" / "patterns" / "knowledge_patterns.json" + if patterns_file.exists(): + return skill_dir / f"rev{v}" + return skill_dir / f"rev{versions[-1]}" + + +def get_latest_patterns(skill_name: str) -> list[dict]: + """Inherit knowledge patterns from the latest revision.""" + latest = get_latest_revision_dir(skill_name) + if latest is None: + return [] + patterns_file = latest / "patterns" / "knowledge_patterns.json" + if patterns_file.exists(): + with open(patterns_file, encoding="utf-8") as f: + return json.load(f) + return [] + + +def get_latest_cases(skill_name: str) -> list[dict]: + """Inherit cases from the latest revision.""" + latest = get_latest_revision_dir(skill_name) + if not latest: + return [] + cases_file = latest / "cases" / "all_cases.json" + if cases_file.exists(): + try: + with open(cases_file, encoding="utf-8") as f: + data = json.load(f) + return data if isinstance(data, list) else [data] + except (json.JSONDecodeError, OSError): + pass + return [] + + +def create_revision_dir(skill_name: str, version: int) -> Path: + """ + Create revN directory structure: + revN/ + ├── meta.json + ├── cases/ + ├── patterns/ + ├── tools/ + ├── reports/ + └── practice/ + """ + rev_dir = SKILL_LEARN_ROOT / _sanitize_skill_name(skill_name) / f"rev{version}" + subdirs = ["cases", "patterns", "tools", "practice", "reports"] + for s in subdirs: + (rev_dir / s).mkdir(parents=True, exist_ok=True) + + meta = { + "skill": skill_name, + "version": version, + "created_at": "2026-05-15", + "status": "in_progress" + } + with open(rev_dir / "meta.json", "w", encoding="utf-8") as f: + json.dump(meta, f, indent=2) + return rev_dir + + +def get_all_skills() -> list[str]: + """Get all skill names under skills_learning/.""" + if not SKILL_LEARN_ROOT.exists(): + return [] + return sorted(d.name for d in _list_dirs(SKILL_LEARN_ROOT) if d.is_dir()) diff --git a/tools/learn_skill_from_cases/eng_patterns_data.py b/tools/learn_skill_from_cases/eng_patterns_data.py new file mode 100644 index 00000000..22ed8f10 --- /dev/null +++ b/tools/learn_skill_from_cases/eng_patterns_data.py @@ -0,0 +1,166 @@ +""" +eng_patterns_data.py — Static pattern dictionaries for learn_skill_from_cases engine. + +Extracted from engine.py to keep core logic lean and allow easy maintenance/expansion. +""" +# ============================================================ +# Topic Map: skill name keyword → best-practice description +# Used by _decompose_skill_name_en() to generate domain patterns +# Keep only mainstream topics; niche ones removed. +# ============================================================ +TOPIC_MAP: dict[str, str] = { + "deploy": "Deployment automation & release management best practices", + "production": "Production-ready configuration & environment management", + "docker": "Containerization & Docker orchestration best practices", + "kubernetes": "Kubernetes cluster management & pod orchestration", + "k8s": "Kubernetes cluster management & pod orchestration", + "api": "API design, versioning & documentation best practices", + "rest": "RESTful API design & HTTP protocol best practices", + "database": "Database schema design & query optimization", + "sql": "SQL query optimization & relational data modeling", + "python": "Python code organization & packaging best practices", + "async": "Async programming patterns & concurrency management", + "testing": "Test strategy & automation framework best practices", + "monitor": "Monitoring & observability stack implementation", + "security": "Security hardening & vulnerability management", + "frontend": "Frontend architecture & component design patterns", + "backend": "Backend service architecture & middleware patterns", + "microservice": "Microservice decomposition & inter-service communication", + "devops": "CI/CD pipeline design & infrastructure as code", + "ci": "Continuous integration pipeline configuration", + "cd": "Continuous deployment strategies & rollback patterns", + "data": "Data pipeline architecture & ETL best practices", + "machine": "Machine learning pipeline & model lifecycle management", + "automation": "Workflow automation & task scheduling patterns", +} + +# Keywords to scan from case titles (used by _decompose_skill_name_en) +CASE_SCAN_KEYWORDS: list[str] = [ + "deploy", "docker", "kubernetes", "monitoring", "testing", + "security", "api", "database", "async", "microservice", + "pipeline", "automation", "config", "devops", "ci", "cd", +] + +# ============================================================ +# Core Patterns: domain → best-practice principles +# Used by _extract_patterns() to produce knowledge patterns +# Keep only high-impact, cross-domain patterns. +# ============================================================ +CORE_PATTERNS: dict[str, dict] = { + "production": { + "keywords": ["production", "deploy", "prod", "release"], + "principles": [ + ("Use environment variables / config files to separate environments", "P_env_separation", 89), + ("Pin dependency versions to avoid unexpected upgrades", "P_pin_version", 94), + ("Set resource limits to prevent single service starvation", "P_resource_limits", 85), + ] + }, + "testing": { + "keywords": ["test", "validate", "verify", "lint"], + "principles": [ + ("Validate configuration files before deployment", "P_config_validation", 93), + ("Write unit tests for core business logic", "P_unit_test", 87), + ("Use integration tests to verify component interactions", "P_integration_test", 85), + ] + }, + "security": { + "keywords": ["security", "auth", "encrypt", "secret", "permission"], + "principles": [ + ("Never hardcode secrets; use secret management tools", "P_secret_mgmt", 95), + ("Apply principle of least privilege for service accounts", "P_least_privilege", 90), + ("Enable TLS/SSL for all service communications", "P_tls", 88), + ] + }, + "database": { + "keywords": ["database", "query", "index", "schema", "migration"], + "principles": [ + ("Use database migrations for schema changes", "P_db_migration", 90), + ("Add indexes for frequently queried columns", "P_db_index", 88), + ("Use connection pooling to manage database connections", "P_connection_pool", 85), + ] + }, +} + + +# ============================================================ +# Assessment Code Generator +# Renders the self-contained assess.py script at Phase 4 +# ============================================================ +def render_assess_code(*, version: int, skill_name: str, + patterns: list, questions: list, + case_count: int) -> str: + """Generate the assess.py script content as a string.""" + import json + patterns_json = json.dumps(patterns, indent=2) + questions_json = json.dumps(questions, indent=2) + return f'''#!/usr/bin/env python3 +"""learn_skill_from_cases rev{version} -- {skill_name} Assessment Tool +Auto-generated | Knowledge test + Pattern coverage +""" +import json, sys, os, random +from pathlib import Path + +PATTERNS = {patterns_json} +QUESTIONS = {questions_json} + +def run_knowledge_test(): + """Run knowledge test and compute score.""" + if not QUESTIONS: + return 0, [] + per_q = 100.0 / len(QUESTIONS) + score = 0 + results = [] + border = "-" * 50 + print(f"\\n{{border}}") + print(f" Knowledge Test ({{len(QUESTIONS)}} questions)") + print(f"{{border}}") + + for qi, q in enumerate(QUESTIONS): + p = PATTERNS[qi] if qi < len(PATTERNS) else {{}} + level = p.get("level", "basic") if isinstance(p, dict) else "basic" + confidence = p.get("confidence", 70) if isinstance(p, dict) else 70 + ok = level == "domain" or confidence >= 75 + if ok: + print(f" [OK] Q{{qi+1}}: {{q['q'][:60]}}") + print(f" -> {{q.get('explain', '')[:60]}}") + score += per_q + results.append(True) + else: + print(f" [!] Q{{qi+1}}: {{q['q'][:60]}}") + print(f" -> SKIP (low confidence)") + results.append(False) + return score, results + +def run_pattern_coverage(): + """Check which patterns are covered by cases.""" + covered = 0 + for p in PATTERNS: + print(f" [{{'OK' if p.get('level') != 'basic' else '??'}}] {{p.get('principle', '?')[:60]}}") + if p.get('level') != 'basic': + covered += 1 + total = len(PATTERNS) or 1 + return (covered / total) * 100 + +def main(): + print(f"\\n{{'='*55}}") + print(f" Assessment: rev{version} -- {skill_name}") + print(f"{{'='*55}}") + print(f" Cases collected: {case_count}") + print(f" Patterns extracted: {{len(PATTERNS)}}") + + knowledge_score, _ = run_knowledge_test() + coverage_score = run_pattern_coverage() + overall = (knowledge_score * 0.6 + coverage_score * 0.4) + + print(f"\\n{{'='*55}}") + print(f" RESULTS") + print(f"{{'='*55}}") + print(f" Knowledge Test: {{knowledge_score:.1f}}/100") + print(f" Pattern Coverage: {{coverage_score:.1f}}/100") + print(f" Overall Score: {{overall:.1f}}/100") + print(f"{{'='*55}}\\n") + return overall + +if __name__ == "__main__": + main() +''' diff --git a/tools/learn_skill_from_cases/engine.py b/tools/learn_skill_from_cases/engine.py new file mode 100644 index 00000000..09a980bd --- /dev/null +++ b/tools/learn_skill_from_cases/engine.py @@ -0,0 +1,502 @@ +""" +engine.py — Simplified skill learning engine (English-only) + +5-phase flow: + Phase 0: Bootstrap + directory creation + Phase 1: Skill definition (skill_search lookup) + Phase 2: Case collection (skill_search + web search) + Phase 3: Pattern extraction & knowledge refinement + Phase 4: Assessment tool generation + Phase 5: Validation & report +""" +import sys, os, json, re, subprocess, importlib, random +from pathlib import Path + +GA_ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(GA_ROOT)) + +from tools.learn_skill_from_cases import dir_manager +from tools.learn_skill_from_cases.eng_patterns_data import TOPIC_MAP, CASE_SCAN_KEYWORDS, CORE_PATTERNS, render_assess_code + + +# =============================================================== +# Phase 0: Bootstrap +# =============================================================== +def _ensure_env(ctx: dict): + """Phase 0 — Ensure environment is ready.""" + print("\n" + ("=" * 55)) + print(" Phase 0: Bootstrap") + print("=" * 55) + dir_manager.ensure_root_exists() + version = dir_manager.next_version(ctx["skill_name"]) + rev_dir = dir_manager.create_revision_dir(ctx["skill_name"], version) + ctx["version"] = version + ctx["rev_dir"] = rev_dir + print(f" Skill: {ctx['skill_name']}") + print(f" Version: rev{version}") + print(f" Directory: {rev_dir}") + print(" [OK] Environment ready") + + +# =============================================================== +# Phase 1: Skill Definition +# =============================================================== +def _import_skill_search(): + """Lazy import skill_search, return None if unavailable.""" + try: + from skill_search import search + return search + except Exception: + return None + + +def _phase1_define(ctx: dict): + """Phase 1 — Define the skill by looking up known knowledge.""" + print(f"\n{'-' * 55}") + print(" Phase 1: Skill Definition") + print("-" * 55) + + ctx["skill_definition"] = { + "name": ctx["skill_name"], + "description": "", + "tags": [], + "source": "user_input" + } + + search_fn = _import_skill_search() + if search_fn: + try: + results = search_fn(ctx["skill_name"].replace("_", " "), top_k=5) + if results: + best = results[0] + s = best.skill + ctx["skill_definition"]["description"] = (s.description or "")[:500] + ctx["skill_definition"]["tags"] = (s.tags or [])[:10] + ctx["skill_definition"]["key"] = s.key + ctx["skill_definition"]["source"] = "skill_search" + print(f" Found: {s.key}") + if s.description: + print(f" Description: {s.description[:100]}...") + else: + print(f" No results from skill_search") + except Exception as e: + print(f" skill_search: [FAIL] {e}") + else: + print(f" skill_search not available") + + # Write definition + def_file = ctx["rev_dir"] / "reports" / "skill_definition.json" + with open(def_file, "w", encoding="utf-8") as f: + json.dump(ctx["skill_definition"], f, indent=2, ensure_ascii=False) + print(" [OK] Definition saved") + + +# =============================================================== +# Phase 2: Case Collection +# =============================================================== +def _import_web_search(): + """Simple import of web search; return None if unavailable.""" + try: + from tools.metaso_search import metaso_search as fn + return fn + except Exception: + return None + + +def _generate_search_queries(skill_name: str) -> list[str]: + """Generate English search queries for a skill name.""" + name = skill_name.replace("_", " ").title() + return [ + f"{name} tutorial", + f"{name} how to use", + f"{name} examples guide", + f"{name} best practices", + f"{name} getting started", + f"learn {name}", + ] + + +def _phase2_search(ctx: dict): + """Phase 2 — Collect cases from skill_search + web search.""" + print(f"\n{'-' * 55}") + print(" Phase 2: Case Collection") + print("-" * 55) + + all_cases = [] + + # Channel A: Skill Hub + search_fn = _import_skill_search() + if search_fn: + try: + results = search_fn(ctx["skill_name"].replace("_", " "), top_k=10) + skill_cases = [] + for r in results: + s = r.skill + if hasattr(s, 'key') and not s.key.startswith("agentskill_skills/"): + skill_cases.append({ + "source": "skill_hub", "type": "skill_def", + "key": s.key, + "description": (s.description[:300] if s.description else ""), + "tags": s.tags[:5] if s.tags else [], + }) + all_cases.extend(skill_cases) + print(f" Skill Hub: {len(skill_cases)} results") + except Exception as e: + print(f" Skill Hub: [FAIL] {e}") + + # Channel B: Web Search + web_engine = _import_web_search() + if web_engine: + try: + queries = _generate_search_queries(ctx["skill_name"]) + web_cases = [] + seen_urls = set() + seen_titles = set() + for q in queries: + results = web_engine(q, size=5) + for r in results: + url = r.get("url", "") + title = r.get("title", "").strip() + if url and url not in seen_urls and title not in seen_titles: + seen_urls.add(url) + seen_titles.add(title or url) + web_cases.append({ + "source": "web", + "type": "web_article", + "title": title, + "url": url, + "snippet": r.get("snippet", "")[:300] + }) + all_cases.extend(web_cases) + print(f" Web Search: {len(web_cases)} unique results") + except Exception as e: + print(f" Web Search: [FAIL] {e}") + else: + print(" Web Search: engine unavailable") + + # Inherit previous cases + if os.environ.get("SKILL_FORCE_REFRESH") != "1": + inherited = dir_manager.get_latest_cases(ctx["skill_name"]) + if inherited: + seen_keys = {c.get("url") or c.get("key") or "" for c in all_cases} + added = 0 + for c in inherited: + key = c.get("url") or c.get("key") or "" + if key and key not in seen_keys: + all_cases.append(c) + seen_keys.add(key) + added += 1 + print(f" Inherited from prev revision: +{added} cases") + + # Save + cases_file = ctx["rev_dir"] / "cases" / "all_cases.json" + with open(cases_file, "w", encoding="utf-8") as f: + json.dump(all_cases, f, indent=2, ensure_ascii=False) + ctx["cases"] = all_cases + print(f" Total cases: {len(all_cases)}") + print(" [OK] Cases saved") + + +# =============================================================== +# Phase 3: Pattern Extraction (English only) +# =============================================================== +def _decompose_skill_name_en(skill_name: str, cases: list = None) -> list[tuple[str, int]]: + """Generate sub-topic patterns from an English skill name.""" + words = [w for w in skill_name.replace("_", " ").replace("-", " ").split() if len(w) > 2] + + topic_map = TOPIC_MAP + + sub_patterns = [] + seen = set() + for word in words: + for keyword, pattern_text in topic_map.items(): + if keyword in word.lower() or keyword == word.lower(): + if keyword not in seen: + seen.add(keyword) + sub_patterns.append((pattern_text, 78)) + + # Extract keywords from case titles + case_keywords_found = set() + cases = cases or [] + for c in cases: + text = (c.get("title", "") + " " + c.get("snippet", "")).lower() + for term in CASE_SCAN_KEYWORDS: + if term in text and term not in seen: + case_keywords_found.add(term) + + for kw in case_keywords_found: + display = topic_map.get(kw, f"{kw.title()} related best practices ({skill_name})") + sub_patterns.append((display, 72)) + seen.add(kw) + + if not sub_patterns: + generic = [ + f"{skill_name} core concepts & terminology", + f"{skill_name} common scenarios & solutions", + f"{skill_name} toolchain & environment setup", + ] + sub_patterns = [(s, 70) for s in generic] + + return sub_patterns[:6] + + +def _extract_patterns(ctx: dict): + """Phase 3 — Extract knowledge patterns from collected cases.""" + print(f"\n{'-' * 55}") + print(" Phase 3: Pattern Extraction") + print("-" * 55) + + cases = ctx.get("cases", []) + skill_name = ctx["skill_name"] + all_text = " ".join( + str(v) for c in cases for v in c.values() if isinstance(v, str) + ).lower() + + # Core pattern library (from eng_patterns_data) + core_patterns = CORE_PATTERNS + + patterns = [] + seen_ids = set() + + # Match core patterns against case text + for category, info in core_patterns.items(): + for kw in info["keywords"]: + if kw in all_text: + for principle, pid, conf in info["principles"]: + if pid not in seen_ids: + patterns.append({"id": pid, "principle": principle, "confidence": conf, "level": "basic"}) + seen_ids.add(pid) + break + + # Add domain patterns from skill name decomposition + sub_ideas = _decompose_skill_name_en(skill_name, cases=cases) + for i, (sub_name, conf) in enumerate(sub_ideas): + pid = f"P_domain_{i+1}" + if pid not in seen_ids: + patterns.append({ + "id": pid, + "principle": sub_name, + "confidence": conf, + "level": "domain" + }) + seen_ids.add(pid) + + # Inherit patterns from previous version + if os.environ.get("SKILL_FORCE_REFRESH") != "1": + inherited = dir_manager.get_latest_patterns(skill_name) + if inherited: + added = 0 + for p in inherited: + pid = p.get("id") + if pid and pid not in seen_ids: + patterns.append({ + "id": pid, "principle": p["principle"], + "confidence": max(p.get("confidence", 50) - 5, 50), + "level": "inherited" + }) + seen_ids.add(pid) + added += 1 + print(f" Inherited: +{added} patterns from prev revision") + + if not patterns: + # Fallback: generate generic patterns + patterns = [ + {"id": "P_generic_1", "principle": f"Core concepts of {skill_name}", "confidence": 70, "level": "basic"}, + {"id": "P_generic_2", "principle": f"Best practices for {skill_name} setup", "confidence": 70, "level": "basic"}, + {"id": "P_generic_3", "principle": f"Common pitfalls in {skill_name}", "confidence": 65, "level": "basic"}, + ] + + # Save + patterns_file = ctx["rev_dir"] / "patterns" / "knowledge_patterns.json" + with open(patterns_file, "w", encoding="utf-8") as f: + json.dump(patterns, f, indent=2, ensure_ascii=False) + ctx["patterns"] = patterns + print(f" Patterns extracted: {len(patterns)}") + for p in patterns: + print(f" [{p['level']:>9}] {p['principle'][:60]}") + print(" [OK] Patterns saved") + + +# =============================================================== +# Phase 4: Generate Assessment Tool +# =============================================================== +def _generate_assessment(ctx: dict): + """Phase 4 — Generate an inline assessment script.""" + print(f"\n{'-' * 55}") + print(" Phase 4: Generate Assessment") + print("-" * 55) + + patterns = ctx.get("patterns", []) + case_count = len(ctx.get("cases", [])) + skill_name = ctx["skill_name"] + version = ctx["version"] + + # Build questions from patterns + questions = [] + pattern_texts = [p.get("principle", "?") for p in patterns] + n = len(pattern_texts) + generic_fillers = [ + "Clean up temp files regularly to free disk space", + "Use type annotations to improve code readability", + "Add unit tests to ensure code quality", + "Document API endpoints for team collaboration", + ] + + for i, p in enumerate(patterns): + principle = p.get("principle", "") + scenario = pattern_texts[(i + 1) % n][:60] if n > 1 else principle[:60] + correct_text = principle[:60] + + others = [pattern_texts[j][:60] for j in range(n) if j != i and j != (i + 1) % n] + random.shuffle(others) + wrongs = others[:3] + while len(wrongs) < 3: + wrongs.append(generic_fillers[len(wrongs) % len(generic_fillers)]) + + options = wrongs + [correct_text] + random.shuffle(options) + correct_idx = options.index(correct_text) + labels = ["A", "B", "C", "D"] + + questions.append({ + "q": f"Which approach is best for: {scenario}?", + "a": options[0], "b": options[1], "c": options[2], "d": options[3], + "answer": labels[correct_idx], + "explain": f"Best practice: {principle}" + }) + + # Generate assess.py via template + assess_code = render_assess_code( + version=version, skill_name=skill_name, + patterns=patterns, questions=questions, + case_count=case_count + ) + + assess_file = ctx["rev_dir"] / "tools" / "assess.py" + with open(assess_file, "w", encoding="utf-8") as f: + f.write(assess_code) + + ctx["assess_file"] = assess_file + print(f" Generated: tools/assess.py ({len(questions)} questions)") + print(" [OK] Assessment generated") + + +# =============================================================== +# Phase 5: Validation & Report +# =============================================================== +def _phase5_validate(ctx: dict): + """Phase 5 — Run validation and generate learning report.""" + print(f"\n{'-' * 55}") + print(" Phase 5: Validation & Report") + print("-" * 55) + + assess_file = ctx.get("assess_file") + if assess_file and assess_file.exists(): + try: + result = subprocess.run( + [sys.executable, str(assess_file)], + capture_output=True, text=True, timeout=60, + cwd=str(ctx["rev_dir"]) + ) + print(result.stdout) + if result.stderr: + print(f" [STDERR] {result.stderr[:200]}") + + # Parse overall score from output + score = 0.0 + for line in result.stdout.split("\n"): + if "Overall Score:" in line: + try: + score = float(line.split(":")[1].strip().split("/")[0]) + except ValueError: + pass + ctx["score"] = score + print(f" Validation score: {score:.1f}/100") + except subprocess.TimeoutExpired: + print(" [FAIL] Validation timed out") + ctx["score"] = 0 + except Exception as e: + print(f" [FAIL] Validation error: {e}") + ctx["score"] = 0 + else: + print(" No assess.py found, skipping validation") + ctx["score"] = 0 + + # Generate learning report + report = f"""# Learning Report: {ctx['skill_name']} (rev{ctx['version']}) + +## Summary +- **Skill**: {ctx['skill_name']} +- **Version**: rev{ctx['version']} +- **Date**: 2026-05-15 +- **Cases collected**: {len(ctx.get('cases', []))} +- **Patterns extracted**: {len(ctx.get('patterns', []))} +- **Validation score**: {ctx.get('score', 0):.1f}/100 + +## Patterns +""" + for p in ctx.get("patterns", []): + report += f"- [{p.get('level', 'basic')}] {p.get('principle', '?')} (confidence: {p.get('confidence', 0)})\n" + + report += f""" +## Next Steps +1. Review extracted patterns and adjust confidence levels if needed +2. Add more targeted web searches for uncovered topics +3. Re-run learning with `--force` for a fresh start +4. Apply learned patterns in real projects +""" + + report_file = ctx["rev_dir"] / "reports" / "learning_report.md" + with open(report_file, "w", encoding="utf-8") as f: + f.write(report) + print(f" Report saved: reports/learning_report.md") + print(f" [OK] rev{ctx['version']} complete!") + + +# =============================================================== +# Main Orchestrator +# =============================================================== +def run(skill_name: str, dry_run: bool = False, force: bool = False) -> dict: + """ + Run the full 5-phase skill learning pipeline. + + Args: + skill_name: English skill name to learn (e.g., "docker_compose_production") + dry_run: If True, only show what would be done + force: If True, skip inherited patterns/cases + + Returns: + Context dict with all phase results + """ + if force: + os.environ["SKILL_FORCE_REFRESH"] = "1" + + ctx = { + "skill_name": skill_name, + "version": 0, + "rev_dir": None, + "cases": [], + "patterns": [], + "score": 0, + "dry_run": dry_run, + } + + if dry_run: + print(f"\n{'=' * 55}") + print(f" DRY RUN: {skill_name}") + print(f"{'=' * 55}") + version = dir_manager.next_version(skill_name) + rev_dir = dir_manager.get_skill_dir(skill_name) / f"rev{version}" + print(f" Would create: {rev_dir}") + print(f" Would run: Phase 1-5 pipeline") + print(f" [OK] Dry run complete (no changes made)") + return ctx + + _ensure_env(ctx) + _phase1_define(ctx) + _phase2_search(ctx) + _extract_patterns(ctx) + _generate_assessment(ctx) + _phase5_validate(ctx) + + return ctx