Skip to content

Commit 593b8d1

Browse files
jahoomaclaude
andauthored
Hourly freebuff bot-sweep dry-run endpoint (#527)
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 117f400 commit 593b8d1

9 files changed

Lines changed: 1206 additions & 0 deletions

File tree

.github/workflows/bot-sweep.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Freebuff Bot Sweep
2+
3+
# Hourly dry-run sweep over active freebuff sessions. Calls the
4+
# /api/admin/bot-sweep endpoint, which emails james@codebuff.com with a
5+
# ranked list of suspects. No bans are issued — review and run
6+
# scripts/ban-freebuff-bots.ts manually.
7+
8+
on:
9+
schedule:
10+
- cron: '0 * * * *'
11+
workflow_dispatch:
12+
13+
jobs:
14+
sweep:
15+
runs-on: ubuntu-latest
16+
timeout-minutes: 5
17+
steps:
18+
- name: Trigger bot-sweep
19+
env:
20+
BOT_SWEEP_SECRET: ${{ secrets.BOT_SWEEP_SECRET }}
21+
BOT_SWEEP_URL: ${{ vars.BOT_SWEEP_URL || 'https://www.codebuff.com/api/admin/bot-sweep' }}
22+
run: |
23+
set -euo pipefail
24+
if [ -z "$BOT_SWEEP_SECRET" ]; then
25+
echo "BOT_SWEEP_SECRET is not set — skipping."
26+
exit 0
27+
fi
28+
status=$(curl -sS -o /tmp/resp.json -w '%{http_code}' \
29+
-X POST "$BOT_SWEEP_URL" \
30+
-H "Authorization: Bearer $BOT_SWEEP_SECRET" \
31+
-H "Content-Type: application/json" \
32+
--max-time 120)
33+
echo "HTTP $status"
34+
cat /tmp/resp.json
35+
echo
36+
if [ "$status" != "200" ]; then
37+
exit 1
38+
fi

common/src/constants/free-agents.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ import type { CostMode } from './model-config'
88
*/
99
export const FREE_COST_MODE = 'free' as const
1010

11+
/**
12+
* Root-orchestrator agent IDs counted as "a freebuff session" for abuse
13+
* detection and usage auditing. Subagents (file-picker, basher, etc.) are
14+
* excluded — they're spawned by the root, so counting them would inflate
15+
* every user's apparent activity.
16+
*/
17+
export const FREEBUFF_ROOT_AGENT_IDS = ['base2-free'] as const
18+
1119
/**
1220
* Agents that are allowed to run in FREE mode.
1321
* Only these specific agents (and their expected models) get 0 credits in FREE mode.

packages/internal/src/env-schema.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,18 @@ export const serverEnvSchema = clientEnvSchema.extend({
3333
DISCORD_BOT_TOKEN: z.string().min(1),
3434
DISCORD_APPLICATION_ID: z.string().min(1),
3535

36+
// Shared secret for the hourly bot-sweep GitHub Action. Callers must send
37+
// `Authorization: Bearer $BOT_SWEEP_SECRET` to /api/admin/bot-sweep.
38+
// Optional so dev environments can start without it; the endpoint returns
39+
// 503 if the secret isn't configured.
40+
BOT_SWEEP_SECRET: z.string().min(16).optional(),
41+
42+
// Optional GitHub PAT used by the bot-sweep to look up each suspect's
43+
// GitHub account age. Without it we fall back to unauthenticated API
44+
// calls (60 req/hr from the server IP) which is enough for a normal
45+
// sweep but risks rate-limiting.
46+
BOT_SWEEP_GITHUB_TOKEN: z.string().min(1).optional(),
47+
3648
// Freebuff waiting room. Defaults to OFF so the feature requires explicit
3749
// opt-in per environment — the CLI/SDK do not yet send
3850
// freebuff_instance_id, so enabling this before they ship would reject
@@ -90,6 +102,8 @@ export const serverProcessEnv: ServerInput = {
90102
DISCORD_PUBLIC_KEY: process.env.DISCORD_PUBLIC_KEY,
91103
DISCORD_BOT_TOKEN: process.env.DISCORD_BOT_TOKEN,
92104
DISCORD_APPLICATION_ID: process.env.DISCORD_APPLICATION_ID,
105+
BOT_SWEEP_SECRET: process.env.BOT_SWEEP_SECRET,
106+
BOT_SWEEP_GITHUB_TOKEN: process.env.BOT_SWEEP_GITHUB_TOKEN,
93107

94108
// Freebuff waiting room
95109
FREEBUFF_WAITING_ROOM_ENABLED: process.env.FREEBUFF_WAITING_ROOM_ENABLED,

scripts/inspect-freebuff-active.ts

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
/**
2+
* Inspect currently-active and queued freebuff users to spot bots / users
3+
* operating multiple accounts.
4+
*
5+
* Signals collected per free_session row:
6+
* - user profile (email, created_at, banned, discord_id, handle)
7+
* - recent message count (24h) on freebuff agent
8+
* - linked login provider (google / github / discord / etc.)
9+
* - linked device fingerprints + how many OTHER users share each fingerprint
10+
* - distinct IPs / fingerprint sig_hashes
11+
*
12+
* Heuristic red flags are printed next to each user.
13+
*
14+
* usage: bun scripts/inspect-freebuff-active.ts
15+
*/
16+
17+
import { FREEBUFF_ROOT_AGENT_IDS } from '@codebuff/common/constants/free-agents'
18+
import db from '@codebuff/internal/db'
19+
import * as schema from '@codebuff/internal/db/schema'
20+
import { sql, eq, inArray, desc, and, gte } from 'drizzle-orm'
21+
22+
const WINDOW_HOURS = 24
23+
24+
async function main() {
25+
const cutoff = new Date(Date.now() - WINDOW_HOURS * 3600_000)
26+
27+
// 1) All current free_session rows
28+
const sessions = await db
29+
.select({
30+
user_id: schema.freeSession.user_id,
31+
status: schema.freeSession.status,
32+
model: schema.freeSession.model,
33+
active_instance_id: schema.freeSession.active_instance_id,
34+
queued_at: schema.freeSession.queued_at,
35+
admitted_at: schema.freeSession.admitted_at,
36+
expires_at: schema.freeSession.expires_at,
37+
updated_at: schema.freeSession.updated_at,
38+
email: schema.user.email,
39+
name: schema.user.name,
40+
handle: schema.user.handle,
41+
discord_id: schema.user.discord_id,
42+
banned: schema.user.banned,
43+
user_created_at: schema.user.created_at,
44+
})
45+
.from(schema.freeSession)
46+
.leftJoin(schema.user, eq(schema.freeSession.user_id, schema.user.id))
47+
.orderBy(schema.freeSession.status, schema.freeSession.queued_at)
48+
49+
if (sessions.length === 0) {
50+
console.log('No free_session rows found.')
51+
return
52+
}
53+
54+
const userIds = sessions.map((s) => s.user_id)
55+
56+
// 2) Message counts & hourly spread in last 24h for these users
57+
const msgStats = await db
58+
.select({
59+
user_id: schema.message.user_id,
60+
count: sql<number>`COUNT(*)`,
61+
distinctHours: sql<number>`COUNT(DISTINCT EXTRACT(HOUR FROM ${schema.message.finished_at}))`,
62+
firstMsg: sql<string>`MIN(${schema.message.finished_at})`,
63+
lastMsg: sql<string>`MAX(${schema.message.finished_at})`,
64+
})
65+
.from(schema.message)
66+
.where(
67+
and(
68+
inArray(schema.message.user_id, userIds),
69+
inArray(schema.message.agent_id, FREEBUFF_ROOT_AGENT_IDS),
70+
gte(schema.message.finished_at, cutoff),
71+
),
72+
)
73+
.groupBy(schema.message.user_id)
74+
const msgByUser = new Map(msgStats.map((m) => [m.user_id!, m]))
75+
76+
// Lifetime freebuff message count
77+
const lifetime = await db
78+
.select({
79+
user_id: schema.message.user_id,
80+
count: sql<number>`COUNT(*)`,
81+
})
82+
.from(schema.message)
83+
.where(
84+
and(
85+
inArray(schema.message.user_id, userIds),
86+
inArray(schema.message.agent_id, FREEBUFF_ROOT_AGENT_IDS),
87+
),
88+
)
89+
.groupBy(schema.message.user_id)
90+
const lifetimeByUser = new Map(lifetime.map((m) => [m.user_id!, Number(m.count)]))
91+
92+
// 3) Login providers
93+
const accounts = await db
94+
.select({
95+
userId: schema.account.userId,
96+
provider: schema.account.provider,
97+
providerAccountId: schema.account.providerAccountId,
98+
})
99+
.from(schema.account)
100+
.where(inArray(schema.account.userId, userIds))
101+
const providersByUser = new Map<string, string[]>()
102+
for (const a of accounts) {
103+
if (!providersByUser.has(a.userId)) providersByUser.set(a.userId, [])
104+
providersByUser.get(a.userId)!.push(a.provider)
105+
}
106+
107+
// 4) Fingerprints used by these users, and fp-sharing counts
108+
const sessRows = await db
109+
.select({
110+
userId: schema.session.userId,
111+
fingerprint_id: schema.session.fingerprint_id,
112+
type: schema.session.type,
113+
})
114+
.from(schema.session)
115+
.where(inArray(schema.session.userId, userIds))
116+
const fpsByUser = new Map<string, Set<string>>()
117+
const allFps = new Set<string>()
118+
for (const s of sessRows) {
119+
if (!s.fingerprint_id) continue
120+
allFps.add(s.fingerprint_id)
121+
if (!fpsByUser.has(s.userId)) fpsByUser.set(s.userId, new Set())
122+
fpsByUser.get(s.userId)!.add(s.fingerprint_id)
123+
}
124+
125+
// For each fingerprint, count how many distinct users have it (site-wide)
126+
let fpUserCounts = new Map<string, number>()
127+
let fpSigHash = new Map<string, string | null>()
128+
if (allFps.size > 0) {
129+
const fpShares = await db
130+
.select({
131+
fingerprint_id: schema.session.fingerprint_id,
132+
userCount: sql<number>`COUNT(DISTINCT ${schema.session.userId})`,
133+
})
134+
.from(schema.session)
135+
.where(inArray(schema.session.fingerprint_id, [...allFps]))
136+
.groupBy(schema.session.fingerprint_id)
137+
fpUserCounts = new Map(
138+
fpShares.map((r) => [r.fingerprint_id!, Number(r.userCount)]),
139+
)
140+
141+
const fpRows = await db
142+
.select({
143+
id: schema.fingerprint.id,
144+
sig_hash: schema.fingerprint.sig_hash,
145+
})
146+
.from(schema.fingerprint)
147+
.where(inArray(schema.fingerprint.id, [...allFps]))
148+
fpSigHash = new Map(fpRows.map((f) => [f.id, f.sig_hash]))
149+
}
150+
151+
// 5) sig_hash sharing across all users (to catch rotated fingerprints from same device)
152+
const sigHashes = [...new Set([...fpSigHash.values()].filter((s): s is string => !!s))]
153+
let sigHashUserCounts = new Map<string, number>()
154+
if (sigHashes.length > 0) {
155+
const rows = await db
156+
.select({
157+
sig_hash: schema.fingerprint.sig_hash,
158+
userCount: sql<number>`COUNT(DISTINCT ${schema.session.userId})`,
159+
})
160+
.from(schema.session)
161+
.innerJoin(
162+
schema.fingerprint,
163+
eq(schema.session.fingerprint_id, schema.fingerprint.id),
164+
)
165+
.where(inArray(schema.fingerprint.sig_hash, sigHashes))
166+
.groupBy(schema.fingerprint.sig_hash)
167+
sigHashUserCounts = new Map(rows.map((r) => [r.sig_hash!, Number(r.userCount)]))
168+
}
169+
170+
// ---- Print ----
171+
172+
const statusCounts: Record<string, number> = {}
173+
for (const s of sessions) {
174+
statusCounts[s.status] = (statusCounts[s.status] ?? 0) + 1
175+
}
176+
console.log(
177+
`\n${sessions.length} free_session rows: ` +
178+
Object.entries(statusCounts)
179+
.map(([k, v]) => `${k}=${v}`)
180+
.join(' '),
181+
)
182+
console.log(`window for 'msgs24h' and 'hrs24h' = last ${WINDOW_HOURS}h\n`)
183+
184+
console.log(
185+
[
186+
'status'.padEnd(7),
187+
'model'.padEnd(28),
188+
'email'.padEnd(36),
189+
'age_d'.padStart(6),
190+
'msgs24'.padStart(7),
191+
'hrs24'.padStart(5),
192+
'msgLT'.padStart(7),
193+
'providers'.padEnd(16),
194+
'fps'.padStart(4),
195+
'maxFpShare'.padStart(10),
196+
'maxSigShare'.padStart(11),
197+
'flags',
198+
].join(' '),
199+
)
200+
console.log('-'.repeat(160))
201+
202+
const flaggedUsers: { email: string; reasons: string[] }[] = []
203+
204+
for (const s of sessions) {
205+
const now = Date.now()
206+
const ageDays = s.user_created_at
207+
? (now - s.user_created_at.getTime()) / 86400_000
208+
: Infinity
209+
const stats = msgByUser.get(s.user_id)
210+
const msgs24 = Number(stats?.count ?? 0)
211+
const hrs24 = Number(stats?.distinctHours ?? 0)
212+
const msgLT = lifetimeByUser.get(s.user_id) ?? 0
213+
const providers = (providersByUser.get(s.user_id) ?? []).sort()
214+
const fps = fpsByUser.get(s.user_id) ?? new Set<string>()
215+
const maxFpShare = Math.max(
216+
0,
217+
...[...fps].map((fp) => fpUserCounts.get(fp) ?? 0),
218+
)
219+
const sigHashesForUser = [...fps]
220+
.map((fp) => fpSigHash.get(fp))
221+
.filter((h): h is string => !!h)
222+
const maxSigShare = Math.max(
223+
0,
224+
...sigHashesForUser.map((h) => sigHashUserCounts.get(h) ?? 0),
225+
)
226+
227+
const flags: string[] = []
228+
if (s.banned) flags.push('BANNED')
229+
if (maxFpShare >= 3) flags.push(`fp-shared-by-${maxFpShare}`)
230+
if (maxSigShare >= 3) flags.push(`sigHash-shared-by-${maxSigShare}`)
231+
if (ageDays < 1) flags.push('new-acct<1d')
232+
else if (ageDays < 7) flags.push('new-acct<7d')
233+
if (msgs24 >= 300) flags.push(`heavy-msgs:${msgs24}`)
234+
if (msgs24 >= 50 && hrs24 >= 20) flags.push('24-7-usage')
235+
if (providers.length === 0 && msgLT > 0) flags.push('no-oauth')
236+
// Auto-generated looking email/handle
237+
if (s.email && /\+[a-z0-9]{6,}@/i.test(s.email)) flags.push('plus-alias')
238+
if (s.email && /^[a-z]{3,8}\d{4,}@/i.test(s.email)) flags.push('email-digits')
239+
if (s.handle && /^user[-_]?\d+/i.test(s.handle)) flags.push('handle-userN')
240+
241+
const email = s.email ?? s.user_id.slice(0, 8)
242+
if (flags.length) flaggedUsers.push({ email, reasons: flags })
243+
244+
console.log(
245+
[
246+
s.status.padEnd(7),
247+
(s.model ?? '').slice(0, 27).padEnd(28),
248+
email.slice(0, 35).padEnd(36),
249+
(ageDays === Infinity ? '?' : ageDays.toFixed(1)).padStart(6),
250+
msgs24.toString().padStart(7),
251+
hrs24.toString().padStart(5),
252+
msgLT.toString().padStart(7),
253+
providers.join(',').slice(0, 15).padEnd(16),
254+
fps.size.toString().padStart(4),
255+
maxFpShare.toString().padStart(10),
256+
maxSigShare.toString().padStart(11),
257+
flags.join(' '),
258+
].join(' '),
259+
)
260+
}
261+
262+
console.log(`\n${flaggedUsers.length} sessions have at least one red flag.`)
263+
if (flaggedUsers.length > 0) {
264+
console.log('\nSuspicious summary:')
265+
for (const f of flaggedUsers) {
266+
console.log(` ${f.email} ${f.reasons.join(' ')}`)
267+
}
268+
}
269+
270+
// Clusters of users sharing the same sig_hash
271+
const clusters: Record<string, string[]> = {}
272+
for (const s of sessions) {
273+
const fps = fpsByUser.get(s.user_id) ?? new Set<string>()
274+
const userSigs = [...fps]
275+
.map((fp) => fpSigHash.get(fp))
276+
.filter((h): h is string => !!h)
277+
for (const h of userSigs) {
278+
if ((sigHashUserCounts.get(h) ?? 0) >= 2) {
279+
if (!clusters[h]) clusters[h] = []
280+
clusters[h].push(s.email ?? s.user_id.slice(0, 8))
281+
}
282+
}
283+
}
284+
const sharedClusters = Object.entries(clusters).filter(([, users]) => users.length >= 2)
285+
if (sharedClusters.length > 0) {
286+
console.log(`\nClusters of active/queued freebuff users sharing a device sig_hash:`)
287+
for (const [h, users] of sharedClusters) {
288+
console.log(` sig_hash=${h.slice(0, 12)}… n=${users.length}`)
289+
for (const u of [...new Set(users)]) console.log(` ${u}`)
290+
}
291+
}
292+
}
293+
294+
main()
295+
.then(() => process.exit(0))
296+
.catch((err) => {
297+
console.error(err)
298+
process.exit(1)
299+
})

0 commit comments

Comments
 (0)