Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
87 commits
Select commit Hold shift + click to select a range
5a1d1b7
feat(similarity): scaffold similarity_svc package with goja/xxhash deps
CodFrm Apr 13, 2026
b49dac1
feat(similarity): define core fingerprint types and TokenKind enum
CodFrm Apr 13, 2026
9320821
test(similarity): cover KindUnknown in TokenKind_String table
CodFrm Apr 13, 2026
eb8a092
feat(similarity): implement parseAndNormalize via goja AST walk
CodFrm Apr 13, 2026
b6ff6c8
fix(similarity): nil guards on Expression walks; walk ObjectLiteral p…
CodFrm Apr 13, 2026
cfad561
feat(similarity): implement k-gram xxhash64 sliding window
CodFrm Apr 13, 2026
eeb5f67
test(similarity): cover operator value distinction and zero/negative k
CodFrm Apr 13, 2026
4758845
feat(similarity): implement winnowing with monotonic deque
CodFrm Apr 13, 2026
4230617
test(similarity): add winnow brute-force invariant test; clarify comp…
CodFrm Apr 13, 2026
3e3586f
style(similarity): use range over int in property test loop
CodFrm Apr 13, 2026
aee6188
feat(similarity): wire ExtractFingerprints public API
CodFrm Apr 13, 2026
980e613
test(similarity): tighten ExtractFingerprints test contracts; documen…
CodFrm Apr 13, 2026
c9b9d66
feat(similarity): implement pure set-based Jaccard similarity
CodFrm Apr 13, 2026
f022d3d
test(similarity): add Jaccard symmetry and proper-subset cases
CodFrm Apr 13, 2026
179d61e
test(similarity): golden test for rename invariance
CodFrm Apr 13, 2026
acd98ce
test(similarity): golden test for code reorder similarity
CodFrm Apr 13, 2026
4c8b66d
test(similarity): golden test for unrelated code disjointness
CodFrm Apr 13, 2026
b4b8e4b
chore(similarity): lint-fix polish
CodFrm Apr 13, 2026
3e1e888
update .gitignore
CodFrm Apr 13, 2026
4b8462e
feat(similarity): add Phase 2 entities + gormigrate for six tables
CodFrm Apr 13, 2026
f9e1007
refactor(similarity): convert entity status enums to named types + po…
CodFrm Apr 13, 2026
68910c1
feat(similarity): add FingerprintRepo with upsert + parse-status helpers
CodFrm Apr 13, 2026
e8eaed2
refactor(similarity): apply repo conventions to FingerprintRepo
CodFrm Apr 13, 2026
f9084c3
feat(similarity): add SimilarPairRepo with normalized-pair upsert
CodFrm Apr 13, 2026
c2d9979
feat(similarity): add SuspectSummaryRepo with upsert
CodFrm Apr 13, 2026
b6fe291
feat(similarity): add SimilarityWhitelistRepo (pair-level)
CodFrm Apr 13, 2026
07843e6
feat(similarity): add IntegrityWhitelistRepo (script-level)
CodFrm Apr 13, 2026
e1f6e67
feat(similarity): add IntegrityReviewRepo with code-id upsert
CodFrm Apr 13, 2026
9e624c1
feat(similarity): add error codes + zh_CN i18n at 114000 range
CodFrm Apr 13, 2026
9d8e41f
feat(similarity): add similarity.* config keys + Validate() check
CodFrm Apr 13, 2026
443b726
fix(similarity): default ScanEnabled/IntegrityEnabled to true so omit…
CodFrm Apr 13, 2026
aeeac1a
feat(script): add ScriptCode.FindByIDIncludeDeleted for similarity ev…
CodFrm Apr 13, 2026
b7fecb4
feat(similarity): add FingerprintESRepo with bulk/search/agg + index …
CodFrm Apr 13, 2026
494a9ff
fix(similarity): harden FingerprintESRepo error handling + add body t…
CodFrm Apr 13, 2026
5e09d25
feat(similarity): add pending-warning context helper for script_svc h…
CodFrm Apr 13, 2026
87b9fcc
feat(similarity): add integrity signal detectors (Cat A/B/C/D)
CodFrm Apr 13, 2026
696fd95
feat(similarity): add IntegritySvc with Check/IsWhitelisted/RecordWar…
CodFrm Apr 13, 2026
4ba44e6
test(similarity): integrity golden tests cover normal/minified/obfusc…
CodFrm Apr 13, 2026
57bf27d
feat(similarity): add NSQ producers for similarity.scan + integrity.w…
CodFrm Apr 13, 2026
bfad8fb
feat(similarity): implement ScanSvc.Scan orchestration (lock + ES + p…
CodFrm Apr 13, 2026
c109a4d
refactor(similarity): inject Scan dependencies via function vars for …
CodFrm Apr 13, 2026
1af7bf2
test(similarity): cover ScanSvc.Scan branches with mocked repos
CodFrm Apr 13, 2026
ed23599
feat(similarity): NSQ consumer for similarity.scan -> ScanSvc.Scan
CodFrm Apr 13, 2026
b90ebe2
feat(similarity): NSQ consumer for integrity.warning -> IntegritySvc.…
CodFrm Apr 13, 2026
8026457
feat(similarity): crontab handler refreshes Redis stop-fp set from ES…
CodFrm Apr 13, 2026
6e9ab66
feat(similarity): integrate Integrity check + similarity scan publish…
CodFrm Apr 13, 2026
cba9ea5
feat(similarity): integrate Integrity check + similarity scan publish…
CodFrm Apr 13, 2026
4dd3aa1
feat(similarity): register NSQ consumers + stop-fp crontab
CodFrm Apr 13, 2026
d10c043
feat(similarity): register similarity repos, services, and ES index init
CodFrm Apr 13, 2026
a0111b6
chore(similarity): lint-fix polish
CodFrm Apr 13, 2026
d678858
fix(similarity): wire stop-fp crontab to similarity.stop_fp_refresh_s…
CodFrm Apr 13, 2026
1a10e4d
feat(similarity): declare Phase 3 admin + evidence API request/respon…
CodFrm Apr 13, 2026
4def300
feat(similarity): add list/find/resolve methods + ES position lookup …
CodFrm Apr 13, 2026
00e1cc9
feat(similarity): add RequireSimilarityPairAccess middleware for Phase 3
CodFrm Apr 13, 2026
6bce562
feat(similarity): scaffold AdminSvc interface with stub methods
CodFrm Apr 13, 2026
d0f6fd3
feat(similarity): implement AdminSvc.ListPairs + ListSuspects
CodFrm Apr 13, 2026
6b67087
feat(similarity): implement GetPairDetail + MatchSegments builder
CodFrm Apr 13, 2026
caafa47
feat(similarity): implement pair + integrity whitelist and review end…
CodFrm Apr 13, 2026
e7c612e
feat(similarity): wire Phase 3 admin + evidence routes
CodFrm Apr 13, 2026
ba2479e
chore(similarity): silence errcheck on ES resp.Body.Close deferred calls
CodFrm Apr 13, 2026
d8c62ce
feat(similarity): add DELETE /admin/similarity/whitelist/:id endpoint
CodFrm Apr 14, 2026
20367af
fix(similarity): silence errcheck on new ES resp.Body.Close deferred …
CodFrm Apr 14, 2026
38342bd
chore: exclude auto-generated docs/ dir from golangci-lint
CodFrm Apr 14, 2026
b3c2699
feat(similarity): Phase 4 patrol + backfill + stop-fp manual refresh
CodFrm Apr 14, 2026
ff96dda
fix(similarity): close four gaps against design spec
CodFrm Apr 14, 2026
9d5099a
fix(similarity): defer cancel in patrol context cancellation test
CodFrm Apr 14, 2026
1621b1d
perf(similarity): share scans across Integrity.Check signals
CodFrm Apr 14, 2026
c16fd7a
fix(similarity): retry fingerprint parse wrapped in async function
CodFrm Apr 14, 2026
62605f3
feat(similarity): make max_code_size=0 disable the fingerprint size gate
CodFrm Apr 14, 2026
cf2a665
feat(similarity): admin endpoint listing fingerprint parse failures
CodFrm Apr 14, 2026
33fcdf7
chore(similarity): silence gosec G304 on bench test fixture read
CodFrm Apr 14, 2026
273cd26
fix(similarity): make Reset backfill truly force a full rescan
CodFrm Apr 14, 2026
18d0896
chore(similarity): add debug logging to integrity check and match seg…
CodFrm Apr 14, 2026
8e8b388
fix(similarity): mark deleted scripts and purge stale pending pairs
CodFrm Apr 15, 2026
ed1c7e4
fix(similarity): cover ES6+ syntax in fingerprint walker
CodFrm Apr 15, 2026
ff40518
perf(similarity): 将完整性检查耗时信号移至异步扫描消费者
CodFrm Apr 16, 2026
2a79ba8
refactor(similarity): 移除 AAEncode 和 JJEncode 完整性信号
CodFrm Apr 16, 2026
074e812
style: fix gofmt formatting in integrity signals
CodFrm Apr 16, 2026
248fc59
fix(similarity): use Similarity().ScanEnabled in Validate() for consi…
CodFrm Apr 16, 2026
47dcd93
fix(similarity): propagate Redis errors in RunBackfill instead of swa…
CodFrm Apr 16, 2026
2549935
refactor(similarity): deduplicate stopFpRedisKey into single exported…
CodFrm Apr 16, 2026
8c81038
fix(similarity): clamp Jaccard score and document stop-fp approximation
CodFrm Apr 16, 2026
3cb51cf
fix(similarity): use format directive in integrity rejection message
CodFrm Apr 16, 2026
5274a96
fix(similarity): handle FindLatest and IsWhitelisted errors in Update…
CodFrm Apr 16, 2026
5485bb9
fix(similarity): skip integrity pre-check for auto-sync code updates
CodFrm Apr 16, 2026
e8bdeeb
refactor(similarity): move backfill state data access to repository l…
CodFrm Apr 16, 2026
4073a28
refactor(similarity): split admin.go into evidence and whitelist files
CodFrm Apr 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

.claude/settings.local.json
CLAUDE.md
.omc

# ip2region data files (download separately)
/data/*.xdb
Expand Down
4 changes: 4 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ linters:
misspell:
locale: US

exclusions:
paths:
- docs

formatters:
enable:
- gofmt
Expand Down
27 changes: 27 additions & 0 deletions cmd/app/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,13 @@ import (
"github.com/scriptscat/scriptlist/internal/repository/report_repo"
"github.com/scriptscat/scriptlist/internal/repository/resource_repo"
"github.com/scriptscat/scriptlist/internal/repository/script_repo"
"github.com/scriptscat/scriptlist/internal/repository/similarity_repo"
"github.com/scriptscat/scriptlist/internal/repository/statistics_repo"
"github.com/scriptscat/scriptlist/internal/repository/user_repo"
"github.com/scriptscat/scriptlist/internal/service/similarity_svc"
"github.com/scriptscat/scriptlist/internal/task/consumer"
"github.com/scriptscat/scriptlist/internal/task/crontab"
"github.com/scriptscat/scriptlist/internal/task/crontab/handler"
"github.com/scriptscat/scriptlist/migrations"
)

Expand Down Expand Up @@ -103,6 +106,24 @@ func main() {

announcement_repo.RegisterAnnouncement(announcement_repo.NewAnnouncement())

// similarity / integrity
similarity_repo.RegisterFingerprint(similarity_repo.NewFingerprintRepo())
similarity_repo.RegisterSimilarPair(similarity_repo.NewSimilarPairRepo())
similarity_repo.RegisterSuspectSummary(similarity_repo.NewSuspectSummaryRepo())
similarity_repo.RegisterSimilarityWhitelist(similarity_repo.NewSimilarityWhitelistRepo())
similarity_repo.RegisterIntegrityWhitelist(similarity_repo.NewIntegrityWhitelistRepo())
similarity_repo.RegisterIntegrityReview(similarity_repo.NewIntegrityReviewRepo())
similarity_repo.RegisterFingerprintES(similarity_repo.NewFingerprintESRepo())
similarity_repo.RegisterPatrolQuery(similarity_repo.NewPatrolQueryRepo())
similarity_svc.RegisterIntegrity(similarity_svc.NewIntegritySvc())
similarity_svc.RegisterScan(similarity_svc.NewScanSvc())
similarity_svc.RegisterAccess(similarity_svc.NewAccessSvc())
similarity_svc.RegisterAdmin(similarity_svc.NewAdminSvc())
// Wire the crontab handlers' long-running methods into admin_svc so
// the admin endpoints can invoke them without an import cycle.
similarity_svc.RegisterBackfillRunner(handler.NewSimilarityPatrolHandler().RunBackfill)
similarity_svc.RegisterStopFpRefresher(handler.NewSimilarityStopFpHandler().Refresh)

err = cago.New(ctx, cfg).
Registry(component.Core()).
Registry(db.Database()).
Expand All @@ -118,6 +139,12 @@ func main() {
return nil
})).
Registry(cago.FuncComponent(appconfigs.Validate)).
Registry(cago.FuncComponent(func(ctx context.Context, cfg *configs.Config) error {
if !appconfigs.Similarity().ScanEnabled {
return nil
}
return similarity_repo.EnsureFingerprintIndex(ctx)
})).
Registry(cago.FuncComponent(func(ctx context.Context, cfg *configs.Config) error {
v4Path := cfg.String(ctx, "ip2region.v4_xdb_path")
v6Path := cfg.String(ctx, "ip2region.v6_xdb_path")
Expand Down
137 changes: 137 additions & 0 deletions configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,148 @@ func QQMigrate() *QQMigrateConfig {
return cfg
}

// SimilarityConfig 相似度检测系统配置(YAML + DB 覆盖)
type SimilarityConfig struct {
ScanEnabled bool `yaml:"scan_enabled"`
JaccardThreshold float64 `yaml:"jaccard_threshold"`
CoverageThreshold float64 `yaml:"coverage_threshold"`
KGramSize int `yaml:"kgram_size"`
WinnowingWindow int `yaml:"winnowing_window"`
MinFingerprints int `yaml:"min_fingerprints"`
MaxCodeSize int `yaml:"max_code_size"`
StopFpDfCutoff int `yaml:"stop_fp_df_cutoff"`
StopFpRefreshSec int `yaml:"stop_fp_refresh_sec"`
BackfillBatchSize int `yaml:"backfill_batch_size"`
BackfillSleepMs int `yaml:"backfill_sleep_ms"`
IntegrityEnabled bool `yaml:"integrity_enabled"`
IntegrityWarnThreshold float64 `yaml:"integrity_warn_threshold"`
IntegrityBlockThreshold float64 `yaml:"integrity_block_threshold"`
IntegrityAsyncAutoArchive *bool `yaml:"integrity_async_auto_archive"`
}

// Similarity 返回相似度系统配置。读取顺序:
// 1. YAML 文件(configs/config.yaml)— 进程启动时的基线
// 2. spec §6.1 默认值 — 填补 YAML 未声明字段
// 3. pre_system_config 表的 `similarity.*` 行 — 管理员后台动态覆盖
//
// DB override 允许管理员在不重启服务的情况下调整阈值 / 开关,符合 spec §1.1
// "可配置阈值:管理员可动态调整相似度阈值、覆盖率阈值、完整性检查阈值"。
// DB 行写入失败或解析失败时静默回退到 YAML 值。
func Similarity() *SimilarityConfig {
// 预填 spec §6.1 的 bool 默认值——YAML Scan 只会覆盖被显式声明的字段,
// 所以即使 YAML 完全省略 similarity 段,bool 仍是 true(spec 默认)。
cfg := &SimilarityConfig{
ScanEnabled: true,
IntegrityEnabled: true,
}
if d := configs.Default(); d != nil {
_ = d.Scan(context.Background(), "similarity", cfg)
}
// Apply defaults to any zero-valued field (zero is sentinel for "unset" here).
if cfg.JaccardThreshold == 0 {
cfg.JaccardThreshold = 0.30
}
if cfg.CoverageThreshold == 0 {
cfg.CoverageThreshold = 0.50
}
if cfg.KGramSize == 0 {
cfg.KGramSize = 5
}
if cfg.WinnowingWindow == 0 {
cfg.WinnowingWindow = 10
}
if cfg.MinFingerprints == 0 {
cfg.MinFingerprints = 20
}
// MaxCodeSize: 0 means unlimited (subject to the API-level 10MB cap on
// Code). scan.go gates on `MaxCodeSize > 0` so zero disables the guard.
if cfg.StopFpDfCutoff == 0 {
cfg.StopFpDfCutoff = 50
}
if cfg.StopFpRefreshSec == 0 {
cfg.StopFpRefreshSec = 3600
}
if cfg.BackfillBatchSize == 0 {
cfg.BackfillBatchSize = 50
}
if cfg.BackfillSleepMs == 0 {
cfg.BackfillSleepMs = 200
}
if cfg.IntegrityWarnThreshold == 0 {
cfg.IntegrityWarnThreshold = 0.5
}
if cfg.IntegrityBlockThreshold == 0 {
cfg.IntegrityBlockThreshold = 0.8
}
if cfg.IntegrityAsyncAutoArchive == nil {
t := true
cfg.IntegrityAsyncAutoArchive = &t
}
// DB overrides (admin-tunable at runtime per spec §1.1 / §6.1).
if dbProvider != nil {
ctx := context.Background()
if v, ok := dbProvider.GetBool(ctx, "similarity.scan_enabled"); ok {
cfg.ScanEnabled = v
}
if v, ok := dbProvider.GetFloat(ctx, "similarity.jaccard_threshold"); ok {
cfg.JaccardThreshold = v
}
if v, ok := dbProvider.GetFloat(ctx, "similarity.coverage_threshold"); ok {
cfg.CoverageThreshold = v
}
if v, ok := dbProvider.GetInt(ctx, "similarity.kgram_size"); ok {
cfg.KGramSize = v
}
if v, ok := dbProvider.GetInt(ctx, "similarity.winnowing_window"); ok {
cfg.WinnowingWindow = v
}
if v, ok := dbProvider.GetInt(ctx, "similarity.min_fingerprints"); ok {
cfg.MinFingerprints = v
}
if v, ok := dbProvider.GetInt(ctx, "similarity.max_code_size"); ok {
cfg.MaxCodeSize = v
}
if v, ok := dbProvider.GetInt(ctx, "similarity.stop_fp_df_cutoff"); ok {
cfg.StopFpDfCutoff = v
}
if v, ok := dbProvider.GetInt(ctx, "similarity.stop_fp_refresh_sec"); ok {
cfg.StopFpRefreshSec = v
}
if v, ok := dbProvider.GetInt(ctx, "similarity.backfill_batch_size"); ok {
cfg.BackfillBatchSize = v
}
if v, ok := dbProvider.GetInt(ctx, "similarity.backfill_sleep_ms"); ok {
cfg.BackfillSleepMs = v
}
if v, ok := dbProvider.GetBool(ctx, "similarity.integrity_enabled"); ok {
cfg.IntegrityEnabled = v
}
if v, ok := dbProvider.GetFloat(ctx, "similarity.integrity_warn_threshold"); ok {
cfg.IntegrityWarnThreshold = v
}
if v, ok := dbProvider.GetFloat(ctx, "similarity.integrity_block_threshold"); ok {
cfg.IntegrityBlockThreshold = v
}
if v, ok := dbProvider.GetBool(ctx, "similarity.integrity_async_auto_archive"); ok {
cfg.IntegrityAsyncAutoArchive = &v
}
}
return cfg
}

// Validate 在服务启动时检查必要配置项(符合 CaGo FuncComponent 签名)
// 其余配置(turnstile、ai)可通过管理后台动态配置,不在启动时强制校验
func Validate(ctx context.Context, cfg *configs.Config) error {
if cfg.String(ctx, "website.url") == "" {
return fmt.Errorf("missing required config key: website.url")
}
// similarity.scan_enabled=true 需要 elasticsearch 地址(cago 读取 elasticsearch.address 列表)
if Similarity().ScanEnabled {
var esAddress []string
_ = cfg.Scan(ctx, "elasticsearch.address", &esAddress)
if len(esAddress) == 0 {
return fmt.Errorf("similarity.scan_enabled=true requires elasticsearch.address to be set")
}
}
Comment on lines +239 to +246
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Validate() checks cfg.Bool("similarity.scan_enabled") to decide whether Elasticsearch must be configured, but Similarity() defaults ScanEnabled=true even when the YAML key is absent. This can let the app start without elasticsearch.address while similarity scanning is effectively enabled (and main.go later calls EnsureFingerprintIndex based on Similarity().ScanEnabled). Consider basing this check on Similarity().ScanEnabled (or otherwise applying the same defaulting logic as Similarity()) so startup validation matches runtime behavior.

Copilot uses AI. Check for mistakes.
return nil
}
16 changes: 16 additions & 0 deletions configs/config.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,21 @@ qq_migrate:
ip2region:
v4_xdb_path: "data/ip2region_v4.xdb"
v6_xdb_path: "data/ip2region_v6.xdb"
similarity:
scan_enabled: true
jaccard_threshold: 0.30
coverage_threshold: 0.50
kgram_size: 5
winnowing_window: 10
min_fingerprints: 20
max_code_size: 0 # 0 = unlimited (API already caps Code at 10MB)
stop_fp_df_cutoff: 50
stop_fp_refresh_sec: 3600
backfill_batch_size: 50
backfill_sleep_ms: 200
integrity_enabled: true
integrity_warn_threshold: 0.5
integrity_block_threshold: 0.8
integrity_async_auto_archive: true
source: file
version: 2.0.0
46 changes: 46 additions & 0 deletions configs/db_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package configs

import (
"context"
"strconv"

"github.com/scriptscat/scriptlist/internal/repository/system_config_repo"
)
Expand All @@ -25,6 +26,51 @@ func (p *DBConfigProvider) GetString(ctx context.Context, key string) (string, b
return cfg.ConfigValue, true
}

// GetBool returns the DB-stored value for key as a bool. Recognized truthy
// literals: "true", "1", "yes", "on" (case-insensitive). Missing rows and
// parse failures return (false, false) so callers can fall back to YAML.
func (p *DBConfigProvider) GetBool(ctx context.Context, key string) (bool, bool) {
raw, ok := p.GetString(ctx, key)
if !ok {
return false, false
}
switch raw {
case "true", "True", "TRUE", "1", "yes", "Yes", "YES", "on", "On", "ON":
return true, true
case "false", "False", "FALSE", "0", "no", "No", "NO", "off", "Off", "OFF":
return false, true
}
return false, false
}

// GetFloat returns the DB-stored value for key as a float64. Missing rows
// and parse failures return (0, false).
func (p *DBConfigProvider) GetFloat(ctx context.Context, key string) (float64, bool) {
raw, ok := p.GetString(ctx, key)
if !ok {
return 0, false
}
v, err := strconv.ParseFloat(raw, 64)
if err != nil {
return 0, false
}
return v, true
}

// GetInt returns the DB-stored value for key as an int. Missing rows and
// parse failures return (0, false).
func (p *DBConfigProvider) GetInt(ctx context.Context, key string) (int, bool) {
raw, ok := p.GetString(ctx, key)
if !ok {
return 0, false
}
v, err := strconv.Atoi(raw)
if err != nil {
return 0, false
}
return v, true
}

func (p *DBConfigProvider) GetByPrefix(ctx context.Context, prefix string) (map[string]string, error) {
repo := system_config_repo.SystemConfig()
if repo == nil {
Expand Down
Loading