diff --git a/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt b/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt index 96b25f3..76b9067 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt @@ -5,6 +5,7 @@ import org.jetbrains.exposed.sql.* import org.jetbrains.exposed.sql.transactions.experimental.newSuspendedTransaction import zed.rainxch.githubstore.model.RepoOwner import zed.rainxch.githubstore.model.RepoResponse +import zed.rainxch.githubstore.topics.TopicCodeMapper import zed.rainxch.githubstore.util.formatRecency import java.time.OffsetDateTime import java.time.temporal.ChronoUnit @@ -92,6 +93,7 @@ class RepoRepository { license = nestedLicense(this[Repos.licenseSpdxId], this[Repos.licenseName]), language = this[Repos.language], topics = this[Repos.topics], + topicCodes = TopicCodeMapper.resolve(this[Repos.topics]), releasesUrl = "${this[Repos.htmlUrl]}/releases", updatedAt = this[Repos.updatedAtGh]?.toString(), createdAt = this[Repos.createdAtGh]?.toString(), diff --git a/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt b/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt index d55a4a8..0883b6c 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt @@ -5,6 +5,7 @@ import org.jetbrains.exposed.sql.transactions.TransactionManager import org.jetbrains.exposed.sql.transactions.experimental.newSuspendedTransaction import zed.rainxch.githubstore.model.RepoOwner import zed.rainxch.githubstore.model.RepoResponse +import zed.rainxch.githubstore.topics.TopicCodeMapper import zed.rainxch.githubstore.util.formatRecency import java.sql.Array as SqlArray import java.time.OffsetDateTime @@ -112,6 +113,7 @@ class SearchRepository { license = nestedLicense(rs.getString("license_spdx_id"), rs.getString("license_name")), language = rs.getString("language"), topics = topics, + topicCodes = TopicCodeMapper.resolve(topics), releasesUrl = "${rs.getString("html_url")}/releases", updatedAt = rs.getString("updated_at_gh"), createdAt = rs.getString("created_at_gh"), diff --git a/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt b/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt index 61769cc..d2fa28d 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt @@ -29,6 +29,7 @@ import zed.rainxch.githubstore.db.Repos import zed.rainxch.githubstore.model.RepoOwner import zed.rainxch.githubstore.model.RepoResponse import zed.rainxch.githubstore.ranking.SearchScore +import zed.rainxch.githubstore.topics.TopicCodeMapper import zed.rainxch.githubstore.util.FeatureFlags import zed.rainxch.githubstore.util.formatRecency import zed.rainxch.githubstore.util.queryHash @@ -623,6 +624,7 @@ class GitHubSearchClient( license = repo.license?.let { zed.rainxch.githubstore.model.RepoLicense(spdxId = it.spdxId, name = it.name) }, language = repo.language, topics = repo.topics, + topicCodes = TopicCodeMapper.resolve(repo.topics), releasesUrl = "${repo.htmlUrl}/releases", updatedAt = repo.updatedAt, createdAt = repo.createdAt, diff --git a/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt b/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt index 812e447..a6a9ef1 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt @@ -46,6 +46,12 @@ data class RepoResponse( val license: RepoLicense? = null, val language: String?, val topics: List, + // Canonical topic codes derived from raw topics via TopicCodeMapper. + // 15 possible codes (ai, privacy, security, networking, messaging, browser, + // social, launcher, notes, reader, audio, video, photo, backup, self-hosted). + // Empty when no raw topic matches the canonical set. Frontend renders up to 3 + // as TopicGlyph icons. Never stored in DB or Meili — computed at response time. + val topicCodes: List = emptyList(), val releasesUrl: String?, val updatedAt: String?, val createdAt: String?, diff --git a/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt b/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt index 46ec49b..45bba66 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt @@ -11,6 +11,7 @@ import zed.rainxch.githubstore.ingest.GitHubResourceClient import zed.rainxch.githubstore.match.ForgejoResourceClient import zed.rainxch.githubstore.model.RepoOwner import zed.rainxch.githubstore.model.RepoResponse +import zed.rainxch.githubstore.topics.TopicCodeMapper import zed.rainxch.githubstore.util.GitHubIdentifiers private val log = LoggerFactory.getLogger("RepoRoutes") @@ -140,6 +141,7 @@ internal fun GitHubRepo.toMetadataOnlyResponse(): RepoResponse = RepoResponse( license = license?.let { zed.rainxch.githubstore.model.RepoLicense(spdxId = it.spdxId, name = it.name) }, language = language, topics = topics, + topicCodes = TopicCodeMapper.resolve(topics), releasesUrl = "$htmlUrl/releases", updatedAt = updatedAt, createdAt = createdAt, diff --git a/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt b/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt index cf7f21b..76fdbf1 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt @@ -13,6 +13,7 @@ import zed.rainxch.githubstore.model.ExploreResponse import zed.rainxch.githubstore.model.RepoOwner import zed.rainxch.githubstore.model.RepoResponse import zed.rainxch.githubstore.model.SearchResponse +import zed.rainxch.githubstore.topics.TopicCodeMapper private val VALID_PLATFORMS = setOf("android", "windows", "macos", "linux") // `recent` kept for back-compat; `releases` is the public-facing alias. @@ -289,6 +290,7 @@ private fun zed.rainxch.githubstore.db.MeiliRepoHit.toRepoResponse() = RepoRespo license = zed.rainxch.githubstore.db.nestedLicense(license_spdx_id, license_name), language = language, topics = topics, + topicCodes = TopicCodeMapper.resolve(topics), releasesUrl = "$html_url/releases", updatedAt = null, createdAt = null, diff --git a/src/main/kotlin/zed/rainxch/githubstore/topics/TopicCodeMapper.kt b/src/main/kotlin/zed/rainxch/githubstore/topics/TopicCodeMapper.kt new file mode 100644 index 0000000..4265259 --- /dev/null +++ b/src/main/kotlin/zed/rainxch/githubstore/topics/TopicCodeMapper.kt @@ -0,0 +1,153 @@ +package zed.rainxch.githubstore.topics + +/** + * Maps raw GitHub topic strings to canonical topic codes. + * + * 15 codes, chosen from frequency analysis of 11k+ repos in our index + * plus F-Droid category taxonomy. Excludes programming languages, OS tags, + * and build tooling — only app-category concepts. + * + * Call [resolve] with a repo's raw topics list; returns all matching + * canonical codes in priority order (most distinctive first). The frontend + * renders up to 3 as TopicGlyph icons. + */ +object TopicCodeMapper { + + /** + * Returns canonical topic codes that match the given raw GitHub topics. + * Order is deterministic (priority order defined below), duplicates removed. + */ + fun resolve(topics: List): List { + if (topics.isEmpty()) return emptyList() + val lower = topics.mapTo(mutableSetOf()) { it.lowercase() } + return PRIORITY_ORDER.filter { code -> + MAPPINGS.getValue(code).any { it in lower } + } + } + + // ── Canonical codes → raw GitHub topic aliases ───────────────────────── + + private val MAPPINGS: Map> = mapOf( + + // User intent: protect identity / stop tracking — broader principle + "privacy" to setOf( + "privacy", "privacy-tools", "privacy-focused", "anonymity", + "no-telemetry", "tracking-protection", "degoogle", "anti-tracking", + "tracker-blocker", "ungoogled", "de-google", + ), + + // User intent: harden secrets / authenticate — specific mechanism + "security" to setOf( + "security", "encryption", "2fa", "totp", "otp", "pgp", "gpg", + "e2ee", "end-to-end-encryption", "password-manager", "authenticator", + "cryptography", "cipher", "keystore", "biometric", + ), + + // User intent: route / tunnel / block traffic at the network layer + "networking" to setOf( + "vpn", "proxy", "shadowsocks", "v2ray", "xray", "vless", "vmess", + "trojan", "sing-box", "clash", "hysteria", "wireguard", + "dns", "ad-blocker", "adblock", "adblocker", "firewall", + "p2p", "torrent", "downloader", "download-manager", "network", + "ssh", "socks5", "http-proxy", "tor", + ), + + // User intent: interact with AI models / agents + "ai" to setOf( + "ai", "artificial-intelligence", "chatgpt", "llm", "large-language-model", + "mcp", "agent", "ai-agent", "gemini", "deepseek", "openai", + "ollama", "claude", "copilot", "gpt", "local-llm", "on-device-ai", + ), + + // User intent: capture and organise ideas / tasks + "notes" to setOf( + "note-taking", "notes-app", "notes", "note", "note-app", + "markdown", "knowledge-base", "pkm", "second-brain", "zettelkasten", + "todo", "task-manager", "tasks", "to-do", "journal", "diary", + "writing", "text-editor", "notetaking", "productivity", + "local-first", "offline-first", + ), + + // User intent: listen to music / podcasts / radio + "audio" to setOf( + "music-player", "music", "podcast", "podcasts", "radio", + "audio", "audio-player", "mpd", "scrobbler", + ), + + // User intent: watch video / streams + "video" to setOf( + "video-player", "video", "streaming", "youtube", "iptv", + "media-player", "danmaku", "online-video", "video-streaming", + ), + + // User intent: manage or view images / camera + "photo" to setOf( + "photo", "photos", "gallery", "camera", "image-viewer", + "google-photos-alternative", "image-gallery", "photo-gallery", + "screenshots", + ), + + // User intent: read long-form content offline + "reader" to setOf( + "ebook", "e-reader", "epub", "pdf", "djvu", "cbz", "cbr", + "book", "manga", "comic", "comics", "rss", "rss-reader", + "feed-reader", "reading", + ), + + // User intent: send messages / calls to other people + "messaging" to setOf( + "messaging", "chat", "instant-messaging", "im", + "matrix", "xmpp", "email", "mail", "voip", "sip", + "sms", "telegram", "signal", "irc", "discord-alternative", + ), + + // User intent: browse the web + "browser" to setOf( + "browser", "web-browser", "firefox-fork", + ), + + // User intent: run services on own hardware + "self-hosted" to setOf( + "self-hosted", "self-hosting", "homeserver", "home-server", + "self-host", + ), + + // User intent: back up or sync files across devices + "backup" to setOf( + "backup", "sync", "synchronization", "file-sync", + "cloud-sync", "webdav", "nextcloud", "syncthing", + ), + + // User intent: interact with social / fediverse networks + "social" to setOf( + "social-network", "mastodon", "fediverse", "activitypub", + "bluesky", "twitter-alternative", "pleroma", "misskey", + "nostr", "lemmy", "pixelfed", + ), + + // User intent: customise Android home screen / input + "launcher" to setOf( + "launcher", "android-launcher", "home-screen", + ), + ) + + // Priority order: most distinctive for our FOSS audience first. + // A repo matching multiple codes shows the highest-priority ones. + private val PRIORITY_ORDER = listOf( + "ai", + "privacy", + "security", + "networking", + "messaging", + "browser", + "social", + "launcher", + "notes", + "reader", + "audio", + "video", + "photo", + "backup", + "self-hosted", + ) +}