Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import org.jetbrains.exposed.sql.*
import org.jetbrains.exposed.sql.transactions.experimental.newSuspendedTransaction
import zed.rainxch.githubstore.model.RepoOwner
import zed.rainxch.githubstore.model.RepoResponse
import zed.rainxch.githubstore.topics.TopicCodeMapper
import zed.rainxch.githubstore.util.formatRecency
import java.time.OffsetDateTime
import java.time.temporal.ChronoUnit
Expand Down Expand Up @@ -92,6 +93,7 @@ class RepoRepository {
license = nestedLicense(this[Repos.licenseSpdxId], this[Repos.licenseName]),
language = this[Repos.language],
topics = this[Repos.topics],
topicCodes = TopicCodeMapper.resolve(this[Repos.topics]),
releasesUrl = "${this[Repos.htmlUrl]}/releases",
updatedAt = this[Repos.updatedAtGh]?.toString(),
createdAt = this[Repos.createdAtGh]?.toString(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import org.jetbrains.exposed.sql.transactions.TransactionManager
import org.jetbrains.exposed.sql.transactions.experimental.newSuspendedTransaction
import zed.rainxch.githubstore.model.RepoOwner
import zed.rainxch.githubstore.model.RepoResponse
import zed.rainxch.githubstore.topics.TopicCodeMapper
import zed.rainxch.githubstore.util.formatRecency
import java.sql.Array as SqlArray
import java.time.OffsetDateTime
Expand Down Expand Up @@ -112,6 +113,7 @@ class SearchRepository {
license = nestedLicense(rs.getString("license_spdx_id"), rs.getString("license_name")),
language = rs.getString("language"),
topics = topics,
topicCodes = TopicCodeMapper.resolve(topics),
releasesUrl = "${rs.getString("html_url")}/releases",
updatedAt = rs.getString("updated_at_gh"),
createdAt = rs.getString("created_at_gh"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import zed.rainxch.githubstore.db.Repos
import zed.rainxch.githubstore.model.RepoOwner
import zed.rainxch.githubstore.model.RepoResponse
import zed.rainxch.githubstore.ranking.SearchScore
import zed.rainxch.githubstore.topics.TopicCodeMapper
import zed.rainxch.githubstore.util.FeatureFlags
import zed.rainxch.githubstore.util.formatRecency
import zed.rainxch.githubstore.util.queryHash
Expand Down Expand Up @@ -623,6 +624,7 @@ class GitHubSearchClient(
license = repo.license?.let { zed.rainxch.githubstore.model.RepoLicense(spdxId = it.spdxId, name = it.name) },
language = repo.language,
topics = repo.topics,
topicCodes = TopicCodeMapper.resolve(repo.topics),
releasesUrl = "${repo.htmlUrl}/releases",
updatedAt = repo.updatedAt,
createdAt = repo.createdAt,
Expand Down
6 changes: 6 additions & 0 deletions src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ data class RepoResponse(
val license: RepoLicense? = null,
val language: String?,
val topics: List<String>,
// Canonical topic codes derived from raw topics via TopicCodeMapper.
// 15 possible codes (ai, privacy, security, networking, messaging, browser,
// social, launcher, notes, reader, audio, video, photo, backup, self-hosted).
// Empty when no raw topic matches the canonical set. Frontend renders up to 3
// as TopicGlyph icons. Never stored in DB or Meili — computed at response time.
val topicCodes: List<String> = emptyList(),
val releasesUrl: String?,
val updatedAt: String?,
val createdAt: String?,
Expand Down
2 changes: 2 additions & 0 deletions src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import zed.rainxch.githubstore.ingest.GitHubResourceClient
import zed.rainxch.githubstore.match.ForgejoResourceClient
import zed.rainxch.githubstore.model.RepoOwner
import zed.rainxch.githubstore.model.RepoResponse
import zed.rainxch.githubstore.topics.TopicCodeMapper
import zed.rainxch.githubstore.util.GitHubIdentifiers

private val log = LoggerFactory.getLogger("RepoRoutes")
Expand Down Expand Up @@ -140,6 +141,7 @@ internal fun GitHubRepo.toMetadataOnlyResponse(): RepoResponse = RepoResponse(
license = license?.let { zed.rainxch.githubstore.model.RepoLicense(spdxId = it.spdxId, name = it.name) },
language = language,
topics = topics,
topicCodes = TopicCodeMapper.resolve(topics),
releasesUrl = "$htmlUrl/releases",
updatedAt = updatedAt,
createdAt = createdAt,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import zed.rainxch.githubstore.model.ExploreResponse
import zed.rainxch.githubstore.model.RepoOwner
import zed.rainxch.githubstore.model.RepoResponse
import zed.rainxch.githubstore.model.SearchResponse
import zed.rainxch.githubstore.topics.TopicCodeMapper

private val VALID_PLATFORMS = setOf("android", "windows", "macos", "linux")
// `recent` kept for back-compat; `releases` is the public-facing alias.
Expand Down Expand Up @@ -289,6 +290,7 @@ private fun zed.rainxch.githubstore.db.MeiliRepoHit.toRepoResponse() = RepoRespo
license = zed.rainxch.githubstore.db.nestedLicense(license_spdx_id, license_name),
language = language,
topics = topics,
topicCodes = TopicCodeMapper.resolve(topics),
releasesUrl = "$html_url/releases",
updatedAt = null,
createdAt = null,
Expand Down
153 changes: 153 additions & 0 deletions src/main/kotlin/zed/rainxch/githubstore/topics/TopicCodeMapper.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package zed.rainxch.githubstore.topics

/**
* Maps raw GitHub topic strings to canonical topic codes.
*
* 15 codes, chosen from frequency analysis of 11k+ repos in our index
* plus F-Droid category taxonomy. Excludes programming languages, OS tags,
* and build tooling — only app-category concepts.
*
* Call [resolve] with a repo's raw topics list; returns all matching
* canonical codes in priority order (most distinctive first). The frontend
* renders up to 3 as TopicGlyph icons.
*/
object TopicCodeMapper {

/**
* Returns canonical topic codes that match the given raw GitHub topics.
* Order is deterministic (priority order defined below), duplicates removed.
*/
fun resolve(topics: List<String>): List<String> {
if (topics.isEmpty()) return emptyList()
val lower = topics.mapTo(mutableSetOf()) { it.lowercase() }
return PRIORITY_ORDER.filter { code ->
MAPPINGS.getValue(code).any { it in lower }
}
Comment on lines +23 to +25
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 MAPPINGS.getValue(code) throws NoSuchElementException at runtime if a code appears in PRIORITY_ORDER but not in MAPPINGS. Currently both lists are in sync, but the failure mode is silent corruption of all topicCodes responses rather than a compile-time or unit-test error. Using get with a null check makes the contract more resilient to future edits.

Suggested change
return PRIORITY_ORDER.filter { code ->
MAPPINGS.getValue(code).any { it in lower }
}
return PRIORITY_ORDER.filter { code ->
MAPPINGS[code]?.any { it in lower } == true
}

Fix in Claude Code

}

// ── Canonical codes → raw GitHub topic aliases ─────────────────────────

private val MAPPINGS: Map<String, Set<String>> = mapOf(

// User intent: protect identity / stop tracking — broader principle
"privacy" to setOf(
"privacy", "privacy-tools", "privacy-focused", "anonymity",
"no-telemetry", "tracking-protection", "degoogle", "anti-tracking",
"tracker-blocker", "ungoogled", "de-google",
),

// User intent: harden secrets / authenticate — specific mechanism
"security" to setOf(
"security", "encryption", "2fa", "totp", "otp", "pgp", "gpg",
"e2ee", "end-to-end-encryption", "password-manager", "authenticator",
"cryptography", "cipher", "keystore", "biometric",
),

// User intent: route / tunnel / block traffic at the network layer
"networking" to setOf(
"vpn", "proxy", "shadowsocks", "v2ray", "xray", "vless", "vmess",
"trojan", "sing-box", "clash", "hysteria", "wireguard",
"dns", "ad-blocker", "adblock", "adblocker", "firewall",
"p2p", "torrent", "downloader", "download-manager", "network",
"ssh", "socks5", "http-proxy", "tor",
),

// User intent: interact with AI models / agents
"ai" to setOf(
"ai", "artificial-intelligence", "chatgpt", "llm", "large-language-model",
"mcp", "agent", "ai-agent", "gemini", "deepseek", "openai",
"ollama", "claude", "copilot", "gpt", "local-llm", "on-device-ai",
),

// User intent: capture and organise ideas / tasks
"notes" to setOf(
"note-taking", "notes-app", "notes", "note", "note-app",
"markdown", "knowledge-base", "pkm", "second-brain", "zettelkasten",
"todo", "task-manager", "tasks", "to-do", "journal", "diary",
"writing", "text-editor", "notetaking", "productivity",
"local-first", "offline-first",
),

// User intent: listen to music / podcasts / radio
"audio" to setOf(
"music-player", "music", "podcast", "podcasts", "radio",
"audio", "audio-player", "mpd", "scrobbler",
),

// User intent: watch video / streams
"video" to setOf(
"video-player", "video", "streaming", "youtube", "iptv",
"media-player", "danmaku", "online-video", "video-streaming",
),

// User intent: manage or view images / camera
"photo" to setOf(
"photo", "photos", "gallery", "camera", "image-viewer",
"google-photos-alternative", "image-gallery", "photo-gallery",
"screenshots",
),

// User intent: read long-form content offline
"reader" to setOf(
"ebook", "e-reader", "epub", "pdf", "djvu", "cbz", "cbr",
"book", "manga", "comic", "comics", "rss", "rss-reader",
"feed-reader", "reading",
),

// User intent: send messages / calls to other people
"messaging" to setOf(
"messaging", "chat", "instant-messaging", "im",
"matrix", "xmpp", "email", "mail", "voip", "sip",
"sms", "telegram", "signal", "irc", "discord-alternative",
),

// User intent: browse the web
"browser" to setOf(
"browser", "web-browser", "firefox-fork",
),

// User intent: run services on own hardware
"self-hosted" to setOf(
"self-hosted", "self-hosting", "homeserver", "home-server",
"self-host",
),

// User intent: back up or sync files across devices
"backup" to setOf(
"backup", "sync", "synchronization", "file-sync",
"cloud-sync", "webdav", "nextcloud", "syncthing",
),

// User intent: interact with social / fediverse networks
"social" to setOf(
"social-network", "mastodon", "fediverse", "activitypub",
"bluesky", "twitter-alternative", "pleroma", "misskey",
"nostr", "lemmy", "pixelfed",
),

// User intent: customise Android home screen / input
"launcher" to setOf(
"launcher", "android-launcher", "home-screen",
),
)

// Priority order: most distinctive for our FOSS audience first.
// A repo matching multiple codes shows the highest-priority ones.
private val PRIORITY_ORDER = listOf(
"ai",
"privacy",
"security",
"networking",
"messaging",
"browser",
"social",
"launcher",
"notes",
"reader",
"audio",
"video",
"photo",
"backup",
"self-hosted",
)
}
Loading