-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathagent_evolution.sql
More file actions
77 lines (64 loc) · 3.58 KB
/
Copy pathagent_evolution.sql
File metadata and controls
77 lines (64 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
-- =============================================================================
-- Git4Data Tutorial — Part 15: Agent Self-Evolution (series capstone)
-- The full machine-driven loop: branch the agent's brain into candidates,
-- mutate each, evaluate offline, merge the winner, discard the rest — and
-- roll back if a release goes bad. No human in the loop.
-- Run: mysql -h 127.0.0.1 -P 6001 -u root -p111 < agent_evolution.sql
-- =============================================================================
CREATE DATABASE evolve_demo;
USE evolve_demo;
-- The agent's "brain": skills with prompt templates and a fitness score.
CREATE TABLE brain (
skill_id BIGINT PRIMARY KEY,
skill VARCHAR(32),
prompt VARCHAR(256),
score DECIMAL(5,2) -- rolling eval score for this skill
);
INSERT INTO brain
SELECT result, concat('skill_', result),
concat('prompt_v1_for_skill_', result),
round(50 + rand()*30, 2)
FROM generate_series(1, 200) g;
-- Production brain pinned: the rollback point for this evolution round.
CREATE SNAPSHOT brain_gen0 FOR TABLE evolve_demo brain;
-- ============================================== 1. BRANCH: three candidates
-- Each candidate explores a different mutation strategy, fully isolated.
DATA BRANCH CREATE TABLE cand_sft FROM brain; -- rewrite weak prompts
DATA BRANCH CREATE TABLE cand_rl FROM brain; -- amplify what already works
DATA BRANCH CREATE TABLE cand_gov FROM brain; -- prune what keeps failing
UPDATE cand_sft SET prompt = concat('prompt_v2_refined_', skill_id)
WHERE score < 60;
UPDATE cand_rl SET prompt = concat(prompt, '_reinforced'), score = score + 5
WHERE score > 70;
DELETE FROM cand_gov WHERE score < 55;
-- ============================================== 2. EVALUATE: replay gating
-- Each candidate is evaluated offline (replay a fixed task suite against the
-- candidate brain; here the harness writes its results in):
CREATE TABLE eval_results (candidate VARCHAR(16) PRIMARY KEY, eval_score DECIMAL(5,2));
INSERT INTO eval_results VALUES ('cand_sft', 71.40), ('cand_rl', 66.20), ('cand_gov', 64.80);
-- The gate: production baseline is 65.0; best candidate above it wins.
SELECT * FROM eval_results WHERE eval_score > 65.0 ORDER BY eval_score DESC;
-- cand_sft 71.40 <- winner
-- cand_rl 66.20 <- passed but not best
-- What would the winner actually change? Reviewable before deploy:
DATA BRANCH DIFF cand_sft AGAINST brain OUTPUT SUMMARY;
-- ============================================== 3. MERGE the winner, DISCARD the rest
DATA BRANCH MERGE cand_sft INTO brain WHEN CONFLICT ACCEPT;
DROP TABLE cand_rl;
DROP TABLE cand_gov;
SELECT COUNT(*) FROM brain WHERE prompt LIKE 'prompt_v2_refined%'; -- mutations live
CREATE SNAPSHOT brain_gen1 FOR TABLE evolve_demo brain;
-- ============================================== 4. The safety property
-- Generation 1 regresses in production? One statement back to gen 0:
-- RESTORE TABLE evolve_demo.brain {SNAPSHOT = brain_gen0};
-- And the full lineage of every generation stays queryable:
DATA BRANCH DIFF brain AGAINST brain {SNAPSHOT='brain_gen0'} OUTPUT SUMMARY;
-- exactly which skills changed between generations — evolution with receipts.
-- This loop — branch, mutate, evaluate, merge-or-discard, snapshot, repeat —
-- is the agentic workflow this series opened with. Every step here was a
-- machine decision; version control is what made it SAFE to hand over.
-- ---------------------------------------------------------------- cleanup
DROP TABLE eval_results;
DROP SNAPSHOT IF EXISTS brain_gen0;
DROP SNAPSHOT IF EXISTS brain_gen1;
DROP DATABASE IF EXISTS evolve_demo;